[sheepdog] [PATCH v2 2/5] sheep: add disk information into sd_node
Robin Dong
robin.k.dong at gmail.com
Wed May 7 12:25:39 CEST 2014
From: Robin Dong <sanbai at taobao.com>
Add disk information into sd_node therefore it will be sended
to cluster shared region (corosync/zookeeper, etc.) and be
aware of by all nodes in this cluster.
The new size of sd_node is 592 bytes and the max size of zookeeper
is 983080, so the SD_MAX_NODES is 983080/(592 * 2) = 830.
Signed-off-by: Robin Dong <sanbai at taobao.com>
---
dog/cluster.c | 4 +++
include/internal_proto.h | 18 ++++++++++++--
include/sheep.h | 5 ++++
sheep/group.c | 9 ++++++-
sheep/md.c | 64 +++++++++++++++++++++++++++++++-----------------
sheep/sheep_priv.h | 24 ++++++++++++++++++
6 files changed, 98 insertions(+), 26 deletions(-)
diff --git a/dog/cluster.c b/dog/cluster.c
index 4af1e7c..7c5bc87 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -123,6 +123,10 @@ static int cluster_format(int argc, char **argv)
if (cluster_cmd_data.strict)
hdr.cluster.flags |= SD_CLUSTER_FLAG_STRICT;
+#ifdef HAVE_DISKVNODES
+ hdr.cluster.flags |= SD_CLUSTER_FLAG_DISKMODE;
+#endif
+
printf("using backend %s store\n", store_name);
ret = dog_exec_req(&sd_nid, &hdr, store_name);
if (ret < 0)
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 628bde7..488d6d9 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -42,7 +42,6 @@
* Currently, only zookeeper driver support SD_MAX_NODES nodes because
* its message buffer size is large enough to hold nodes[SD_MAX_NODES].
*/
-#define SD_MAX_NODES 6144
#define SD_DEFAULT_VNODES 128
/*
@@ -149,7 +148,13 @@ struct node_id {
uint8_t pad[4];
};
-#define SD_NODE_SIZE 80
+struct disk_info {
+ uint64_t disk_id;
+ uint64_t disk_space;
+};
+
+#define DISK_MAX 32
+#define WEIGHT_MIN (1ULL << 32) /* 4G */
struct sd_node {
struct rb_node rb;
@@ -157,6 +162,15 @@ struct sd_node {
uint16_t nr_vnodes;
uint32_t zone;
uint64_t space;
+#ifdef HAVE_DISKVNODES
+ #define SD_MAX_NODES 830
+ #define SD_NODE_SIZE (80 + sizeof(struct disk_info) * DISK_MAX)
+ struct disk_info disks[DISK_MAX];
+#else
+ #define SD_MAX_NODES 6144
+ #define SD_NODE_SIZE 80
+ struct disk_info disks[0];
+#endif
};
/*
diff --git a/include/sheep.h b/include/sheep.h
index f7f5c48..ea376cc 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -276,4 +276,9 @@ static inline struct sd_node *str_to_node(const char *str, struct sd_node *id)
return id;
}
+static inline bool is_cluster_diskmode(const struct cluster_info *cinfo)
+{
+ return (cinfo->flags & SD_CLUSTER_FLAG_DISKMODE) > 0;
+}
+
#endif
diff --git a/sheep/group.c b/sheep/group.c
index 4114dfb..1e861bd 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -965,7 +965,12 @@ static void update_node_size(struct sd_node *node)
if (unlikely(!n))
panic("can't find %s", node_to_str(node));
n->space = node->space;
-
+ if (is_cluster_diskmode(&sys->cinfo)) {
+ memset(n->disks, 0, sizeof(struct disk_info) * DISK_MAX);
+ for (int i = 0; i < DISK_MAX; i++)
+ if (node->disks[i].disk_id)
+ n->disks[i] = node->disks[i];
+ }
put_vnode_info(cur_vinfo);
}
@@ -1026,6 +1031,8 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
sys->this_node.space = sys->disk_space;
+ update_node_disks();
+
sys->cinfo.epoch = get_latest_epoch();
if (sys->cinfo.epoch) {
sys->cinfo.nr_nodes = epoch_log_read(sys->cinfo.epoch,
diff --git a/sheep/md.c b/sheep/md.c
index 8154ee6..54d2f8c 100644
--- a/sheep/md.c
+++ b/sheep/md.c
@@ -17,27 +17,7 @@
#define NONE_EXIST_PATH "/all/disks/are/broken/,ps/əʌo7/!"
-struct disk {
- struct rb_node rb;
- char path[PATH_MAX];
- uint64_t space;
-};
-
-struct vdisk {
- struct rb_node rb;
- struct disk *disk;
- uint64_t hash;
-};
-
-struct md {
- struct rb_root vroot;
- struct rb_root root;
- struct sd_rw_lock lock;
- uint64_t space;
- uint32_t nr_disks;
-};
-
-static struct md md = {
+struct md md = {
.vroot = RB_ROOT,
.root = RB_ROOT,
.lock = SD_RW_LOCK_INITIALIZER,
@@ -87,7 +67,7 @@ static struct vdisk *oid_to_vdisk(uint64_t oid)
return hval_to_vdisk(sd_hash_oid(oid));
}
-static void create_vdisks(struct disk *disk)
+static void create_vdisks(const struct disk *disk)
{
uint64_t hval = sd_hash(disk->path, strlen(disk->path));
int nr = vdisk_number(disk);
@@ -751,6 +731,42 @@ static inline void md_del_disk(const char *path)
md_remove_disk(disk);
}
+#ifdef HAVE_DISKVNODES
+void update_node_disks(void)
+{
+ const struct disk *disk;
+ int i = 0;
+ bool rb_empty = false;
+
+ if (!sys)
+ return;
+
+ memset(sys->this_node.disks, 0, sizeof(struct disk_info) * DISK_MAX);
+ sd_read_lock(&md.lock);
+ rb_for_each_entry(disk, &md.root, rb) {
+ sys->this_node.disks[i].disk_id =
+ sd_hash(disk->path, strlen(disk->path));
+ sys->this_node.disks[i].disk_space = disk->space;
+ i++;
+ }
+ sd_rw_unlock(&md.lock);
+
+ if (RB_EMPTY_ROOT(&md.vroot))
+ rb_empty = true;
+ sd_write_lock(&md.lock);
+ rb_for_each_entry(disk, &md.root, rb) {
+ if (!rb_empty)
+ remove_vdisks(disk);
+ create_vdisks(disk);
+ }
+ sd_rw_unlock(&md.lock);
+}
+#else
+void update_node_disks(void)
+{
+}
+#endif
+
static int do_plug_unplug(char *disks, bool plug)
{
const char *path;
@@ -776,8 +792,10 @@ static int do_plug_unplug(char *disks, bool plug)
out:
sd_rw_unlock(&md.lock);
- if (ret == SD_RES_SUCCESS)
+ if (ret == SD_RES_SUCCESS) {
+ update_node_disks();
kick_recover();
+ }
return ret;
}
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index fddd641..693171c 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -164,6 +164,30 @@ struct system_info {
struct sd_stat stat;
};
+struct disk {
+ struct rb_node rb;
+ char path[PATH_MAX];
+ uint64_t space;
+};
+
+struct vdisk {
+ struct rb_node rb;
+ const struct disk *disk;
+ uint64_t hash;
+};
+
+struct md {
+ struct rb_root vroot;
+ struct rb_root root;
+ struct sd_rw_lock lock;
+ uint64_t space;
+ uint32_t nr_disks;
+};
+
+extern struct md md;
+
+void update_node_disks(void);
+
struct siocb {
uint32_t epoch;
void *buf;
--
1.7.12.4
More information about the sheepdog
mailing list