NingG +

Redis 汇总梳理(转)

NOTE: 这个是我们 Redis 讨论小组中,一个很牛的同事(xkniu)梳理的,特别细致、有价值,在我这儿也转一份,留着纪念。

数据结构

SDS 字符串类型

Preview

typedef char *sds;

struct __attribute__ ((__packed__)) sdshdr64 {
    uint64_t len; /* used */
    uint64_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};

Redis 操作的时候,根据 char 的指针,定位到 sdshdr 结构体的地址,从而获取其他信息来操作 SDS。所以 redis 中定义 char 指针的别名为 SDS。

Summary

链表

Preview

typedef struct listNode {
    struct listNode *prev;
    struct listNode *next;
    void *value;
} listNode;

typedef struct listIter {
    listNode *next;
    int direction;
} listIter;

typedef struct list {
    listNode *head;
    listNode *tail;
    void *(*dup)(void *ptr);
    void (*free)(void *ptr);
    int (*match)(void *ptr, void *key);
    unsigned long len;
} list;

Summary

字典

Preview

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;

typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    int iterators; /* number of iterators currently running */
} dict;

Summary

跳跃表 skiplist

Preview

/* ZSETs use a specialized version of Skiplists */
typedef struct zskiplistNode {
    robj *obj;
    double score;
    struct zskiplistNode *backward;
    struct zskiplistLevel {
        struct zskiplistNode *forward;
        unsigned int span;
    } level[];
} zskiplistNode;

typedef struct zskiplist {
    struct zskiplistNode *header, *tail;
    unsigned long length;
    int level;
} zskiplist;

typedef struct zset {
    dict *dict;
    zskiplist *zsl;
} zset;

Summary

整数集合 intset

Preview

// 总共有三种 encoding
/* Note that these encodings are ordered, so:
 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
#define INTSET_ENC_INT16 (sizeof(int16_t))
#define INTSET_ENC_INT32 (sizeof(int32_t))
#define INTSET_ENC_INT64 (sizeof(int64_t))

typedef struct intset {
    uint32_t encoding;
    uint32_t length;
    int8_t contents[];
} intset;

Summary

压缩列表 ziplist

Preview

zlbytes | zltail | zllen | entry1 | entry2 | ... | entryN | zlend

Summary

对象

Preview

typedef struct redisObject {
    unsigned type:4;
    unsigned encoding:4;
    unsigned lru:LRU_BITS; /* lru time (relative to server.lruclock) */
    int refcount;
    void *ptr;
} robj;

Summary

单机数据库

数据库

Preview

struct redisServer {
    redisDb *db;				/* Array of dbs */
    int dbnum;                      /* Total number of configured DBs */
}

typedef struct redisDb {
    dict *dict;                 /* The keyspace for this DB */
    dict *expires;              /* Timeout of keys with a timeout set */
    dict *blocking_keys;        /* Keys with clients waiting for data (BLPOP) */
    dict *ready_keys;           /* Blocked keys that received a PUSH */
    dict *watched_keys;         /* WATCHED keys for MULTI/EXEC CAS */
    struct evictionPoolEntry *eviction_pool;    /* Eviction pool of keys */
    int id;                     /* Database ID */
    long long avg_ttl;          /* Average TTL, just for stats */
} redisDb;

Summary

持久化

Preview

struct redisServer {
    long long dirty;                /* Changes to DB from the last save */
    time_t lastsave;                /* Unix time of last successful save */
	
	sds aof_buf;      /* AOF buffer, written before entering the event loop */
}

Summary

事件循环(aeEvent 框架)

Preview

void aeMain(aeEventLoop *eventLoop) {
    eventLoop->stop = 0;
    while (!eventLoop->stop) {
        if (eventLoop->beforesleep != NULL)
            eventLoop->beforesleep(eventLoop);
        aeProcessEvents(eventLoop, AE_ALL_EVENTS);
    }
}

Summary

客户端/服务器

Summary

多机数据库

复制

Summary

Sentinel

Sentinel 只是一个 HA 方案,是非集群下,单机 Redis 来实现高可用的。在 Redis cluster 中,已经自带了 master 选举等流程,不需要 sentinel 参与。

Summary

集群

Summary

节点心跳和gossip消息

每一秒,通常一个节点将ping 几个随机节点,这样ping的数据包的总数量(和接收的pong包)是一个恒定的量,无论集群中节点的数量。

但是每个节点可确保ping通,ping或pong不超过一半NODE_TIMEOUT。前NODE_TIMEOUT已过,节点也尝试重新与另一个节点的TCP链接,节点不相信因为当前TCP链接,是不可达的。

信息的交换量大于O(N),NODE_TIMEOUT设置为一个小的数字,但节点的数量(N)是非常大的,因为每个节点将尝试 ping,如果配置信息在NODE_TIMEOUT一半的时间没有更新。

例如,NODE_TIMEOUT设置为60秒的100个节点集群,每个节点会尝试发送99 ping每30秒,那么每秒3.3个ping,即乘以100个节点是每秒330个ping。

有一些方法可以使用已经通过交换的Redis集群的gossip信息,以减少交换的消息的数量。例如,我们可以ping那些一半NODE_TIMEOUT内“可能的失败”状态的节点,然后每秒ping几个包到那些工作的节点。然而,在现实世界中,设置非常小的NODE_TIMEOUT的大型集群可靠地工作,将在未来作为大型集群实际部署测试。

结论:

NODE_TIMEOUT/2 的时间内,每个节点会发出 n-1 个 PING 命令,收到 n-1 个 PONG 响应;

  1. NODE_TIMEOUT/2 时间内,PING 和 PONG 心跳命令的数量:n x (n-1) x 2= 2n^2
  2. Redis 集群中,节点数量大时,耗费较多的网络带宽;
  3. Redis 集群,因为使用 gossip 协议,进行心跳检测,所以,谨慎设计集群规模;
  4. Redis 集群规模过大时,可以采用分级策略,划分为多个隔离的 Redis 集群;

独立功能

发布订阅

Preview

    dict *pubsub_channels;  /* Map channels to list of subscribed clients */
    list *pubsub_patterns;  /* A list of pubsub_patterns */

Summary

Preview

typedef struct client {

    multiState mstate;      /* MULTI/EXEC state */
} client;

typedef struct multiState {
    multiCmd *commands;     /* Array of MULTI commands */
    int count;              /* Total number of MULTI commands */
    int minreplicas;        /* MINREPLICAS for synchronous replication */
    time_t minreplicas_timeout; /* MINREPLICAS timeout as unixtime. */
} multiState;

/* Redis database representation. There are multiple databases identified
* by integers from 0 (the default database) up to the max configured
* database. The database number is the 'id' field in the structure. */
typedef struct redisDb {
    dict *dict;                 /* The keyspace for this DB */
    dict *expires;              /* Timeout of keys with a timeout set */
    dict *blocking_keys;        /* Keys with clients waiting for data (BLPOP) */
    dict *ready_keys;           /* Blocked keys that received a PUSH */
    dict *watched_keys;         /* WATCHED keys for MULTI/EXEC CAS */
    struct evictionPoolEntry *eviction_pool;    /* Eviction pool of keys */
    int id;                     /* Database ID */
    long long avg_ttl;          /* Average TTL, just for stats */
} redisDb;

Summary

二进制数组

二进制位统计算法:variable-precision SWAR 算法(就是 MIT HAKMEM 中看到的归并思想)

监视器 monitor

Preview

typedef struct redisServer {

    list *slaves, *monitors;    /* List of slaves and MONITORs */
} redisServer;

Summary

慢查询 slowlog

记录执行时长超过指定时间的命令请求。

Preview

typedef struct redisServer {

    list *slowlog;                  /* SLOWLOG list of commands */
    long long slowlog_entry_id;     /* SLOWLOG current entry ID */
    long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
    unsigned long slowlog_max_len;     /* SLOWLOG max number of items logged */
} redisServer;

typedef struct slowlogEntry {
    robj **argv;
    int argc;
    long long id;       /* Unique entry identifier. */
    long long duration; /* Time spent by the query, in nanoseconds. */
    time_t time;        /* Unix time at which the query was executed. */
} slowlogEntry;

Summary

Lua 脚本

Summary

参考链接

Top