[sheepdog] [PATCH 5/5] dog/vdi: rework 'vdi object location' and 'vdi track'
Liu Yuan
namei.unix at gmail.com
Thu Feb 27 06:48:08 CET 2014
Add a SD_OP_EXIST to check if an object exists in the specific node.
Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
dog/vdi.c | 275 +++++++++++++----------------------------------
include/internal_proto.h | 1 +
sheep/ops.c | 20 ++++
sheep/recovery.c | 17 +--
sheep/sheep_priv.h | 1 +
5 files changed, 106 insertions(+), 208 deletions(-)
diff --git a/dog/vdi.c b/dog/vdi.c
index debed0a..8a8f089 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -210,122 +210,23 @@ static void print_vdi_graph(uint32_t vid, const char *name, const char *tag,
}
-static void vdi_info_filler(uint32_t vid, const char *name, const char *tag,
- uint32_t snapid, uint32_t flags,
- const struct sd_inode *i, void *data)
-{
- struct get_vdi_info *info = data;
-
- if (info->name) {
- if (info->tag && info->tag[0]) {
- if (!strcmp(name, info->name) &&
- !strcmp(tag, info->tag)) {
- info->vid = vid;
- info->nr_copies = i->nr_copies;
- info->copy_policy = i->copy_policy;
- }
- } else if (info->snapid) {
- if (!strcmp(name, info->name) &&
- snapid == info->snapid) {
- info->vid = vid;
- info->nr_copies = i->nr_copies;
- info->copy_policy = i->copy_policy;
- }
- } else {
- if (!strcmp(name, info->name)) {
- info->vid = vid;
- info->nr_copies = i->nr_copies;
- info->copy_policy = i->copy_policy;
- }
- }
- }
-}
-
-typedef int (*obj_parser_func_t)(const char *sheep, uint64_t oid,
- struct sd_rsp *rsp, char *buf, void *data);
-
-static int do_print_obj(const char *sheep, uint64_t oid, struct sd_rsp *rsp,
- char *buf, void *data)
-{
- switch (rsp->result) {
- case SD_RES_SUCCESS:
- printf("%s has the object\n", sheep);
- break;
- case SD_RES_NO_OBJ:
- printf("%s doesn't have the object\n", sheep);
- break;
- case SD_RES_OLD_NODE_VER:
- case SD_RES_NEW_NODE_VER:
- sd_err("The node list has changed: please try again");
- break;
- default:
- sd_err("%s: hit an unexpected error (%s)", sheep,
- sd_strerror(rsp->result));
- break;
- }
-
- return 0;
-}
-
-struct obj_info_filler_info {
- bool success;
- uint64_t data_oid;
- unsigned idx;
-};
-
-static int obj_info_filler(const char *sheep, uint64_t oid, struct sd_rsp *rsp,
- char *buf, void *data)
+static void for_each_node_print(uint64_t oid)
{
- struct obj_info_filler_info *info = data;
- struct sd_inode *inode = (struct sd_inode *)buf;
- uint32_t vdi_id;
-
- switch (rsp->result) {
- case SD_RES_SUCCESS:
- if (info->success)
- break;
- info->success = true;
- vdi_id = sd_inode_get_vid(inode, info->idx);
- if (vdi_id) {
- info->data_oid = vid_to_data_oid(vdi_id, info->idx);
- return 1;
- }
- break;
- case SD_RES_NO_OBJ:
- break;
- case SD_RES_OLD_NODE_VER:
- case SD_RES_NEW_NODE_VER:
- sd_err("The node list has changed: please try again");
- break;
- default:
- sd_err("%s: hit an unexpected error (%s)", sheep,
- sd_strerror(rsp->result));
- break;
- }
-
- return 0;
-}
-
-static void parse_objs(uint64_t oid, obj_parser_func_t func, void *data,
- size_t size)
-{
- int ret, cb_ret;
+ int ret;
struct sd_node *n;
- char *buf;
+ const char *sheep;
- buf = xzalloc(size);
rb_for_each_entry(n, &sd_nroot, rb) {
struct sd_req hdr;
struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
- sd_init_req(&hdr, SD_OP_READ_PEER);
- hdr.data_length = size;
+ sd_init_req(&hdr, SD_OP_EXIST);
+ hdr.data_length = 0;
hdr.flags = 0;
hdr.epoch = sd_epoch;
hdr.obj.oid = oid;
- hdr.obj.ec_index = SD_MAX_COPIES + 1; /* Ignore index */
- ret = dog_exec_req(&n->nid, &hdr, buf);
+ ret = dog_exec_req(&n->nid, &hdr, NULL);
if (ret < 0)
continue;
switch (rsp->result) {
@@ -333,16 +234,26 @@ static void parse_objs(uint64_t oid, obj_parser_func_t func, void *data,
continue;
}
- cb_ret = func(addr_to_str(n->nid.addr, n->nid.port),
- oid, rsp, buf, data);
- if (cb_ret)
+ sheep = addr_to_str(n->nid.addr, n->nid.port);
+ switch (rsp->result) {
+ case SD_RES_SUCCESS:
+ printf("%s has the object\n", sheep);
+ break;
+ case SD_RES_NO_OBJ:
+ printf("%s doesn't have the object\n", sheep);
break;
+ case SD_RES_OLD_NODE_VER:
+ case SD_RES_NEW_NODE_VER:
+ sd_err("The node list has changed: please try again");
+ break;
+ default:
+ sd_err("%s: hit an unexpected error (%s)", sheep,
+ sd_strerror(rsp->result));
+ break;
+ }
}
-
- free(buf);
}
-
static int vdi_list(int argc, char **argv)
{
const char *vdiname = argv[optind];
@@ -901,67 +812,52 @@ static int vdi_object_map(int argc, char **argv)
static int vdi_object_location(int argc, char **argv)
{
const char *vdiname = argv[optind];
- uint64_t idx = vdi_cmd_data.index;
- struct get_vdi_info info;
- uint32_t vid;
- size_t size;
-
- memset(&info, 0, sizeof(info));
- info.name = vdiname;
- info.tag = vdi_cmd_data.snapshot_tag;
- info.vid = 0;
- info.snapid = vdi_cmd_data.snapshot_id;
-
- if (parse_vdi(vdi_info_filler, SD_INODE_HEADER_SIZE, &info) < 0)
- return EXIT_SYSFAIL;
+ uint64_t idx = vdi_cmd_data.index, oid;
+ struct sd_inode *inode = xmalloc(sizeof(*inode));
+ uint32_t vid, vdi_id;
+ int ret;
- vid = info.vid;
- if (vid == 0) {
- sd_err("VDI not found");
- return EXIT_MISSING;
+ ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id,
+ vdi_cmd_data.snapshot_tag, NULL, inode,
+ SD_INODE_SIZE);
+ if (ret != EXIT_SUCCESS) {
+ sd_err("FATAL: no inode objects");
+ return ret;
}
+ vid = inode->vdi_id;
if (idx == ~0) {
- printf("Looking for the inode object 0x%" PRIx32 " with %d nodes\n\n",
+ printf("Looking for the inode object 0x%" PRIx32 " with %d"
+ " nodes\n\n",
vid, sd_nodes_nr);
- parse_objs(vid_to_vdi_oid(vid), do_print_obj, NULL,
- SD_INODE_SIZE);
- } else {
- struct obj_info_filler_info oid_info = {0};
+ for_each_node_print(vid_to_vdi_oid(vid));
+ ret = EXIT_SUCCESS;
+ goto out;
+ }
- oid_info.success = false;
- oid_info.idx = idx;
+ if (idx >= MAX_DATA_OBJS) {
+ printf("The offset is too large!\n");
+ ret = EXIT_FAILURE;
+ goto out;
+ }
- if (idx >= MAX_DATA_OBJS) {
- printf("The offset is too large!\n");
- exit(EXIT_FAILURE);
- }
+ vdi_id = sd_inode_get_vid(inode, idx);
+ oid = vid_to_data_oid(vdi_id, idx);
+ if (vdi_id) {
+ printf("Looking for the object 0x%" PRIx64
+ " (vid 0x%" PRIx32 " idx %"PRIu64
+ ", %u copies) with %d nodes\n\n",
+ oid, vid, idx, inode->nr_copies, sd_nodes_nr);
- size = get_store_objsize(info.copy_policy,
- vid_to_data_oid(vid, 0));
- parse_objs(vid_to_vdi_oid(vid), obj_info_filler, &oid_info,
- size);
-
- if (oid_info.success) {
- if (oid_info.data_oid) {
- printf("Looking for the object 0x%" PRIx64
- " (vid 0x%" PRIx32 " idx %"PRIu64
- ", %u copies) with %d nodes\n\n",
- oid_info.data_oid, vid, idx,
- info.nr_copies, sd_nodes_nr);
-
- parse_objs(oid_info.data_oid, do_print_obj,
- NULL, size);
- } else
- printf("The inode object 0x%" PRIx32 " idx"
- " %"PRIu64" is not allocated\n",
- vid, idx);
- } else
- sd_err("Failed to read the inode object 0x%" PRIx32,
- vid);
- }
+ for_each_node_print(oid);
+ } else
+ printf("The inode object 0x%" PRIx32 " idx"
+ " %"PRIu64" is not allocated\n",
+ vid, idx);
- return EXIT_SUCCESS;
+out:
+ free(inode);
+ return ret;
}
static int do_track_object(uint64_t oid, uint8_t nr_copies)
@@ -1032,59 +928,37 @@ static int vdi_track(int argc, char **argv)
{
const char *vdiname = argv[optind];
unsigned idx = vdi_cmd_data.index;
- struct get_vdi_info info;
- struct obj_info_filler_info oid_info = {0};
- uint32_t vid;
uint8_t nr_copies;
uint64_t oid = vdi_cmd_data.oid;
+ struct sd_inode *inode = xmalloc(sizeof(*inode));
+ uint32_t vid, vdi_id;
+ int ret;
- memset(&info, 0, sizeof(info));
- info.name = vdiname;
- info.tag = vdi_cmd_data.snapshot_tag;
- info.vid = 0;
- info.snapid = vdi_cmd_data.snapshot_id;
-
- if (parse_vdi(vdi_info_filler, SD_INODE_HEADER_SIZE, &info) < 0)
- return EXIT_SYSFAIL;
-
- vid = info.vid;
- nr_copies = info.nr_copies;
- if (vid == 0) {
- sd_err("VDI not found");
- return EXIT_MISSING;
+ ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id,
+ vdi_cmd_data.snapshot_tag, NULL, inode,
+ SD_INODE_SIZE);
+ if (ret != EXIT_SUCCESS) {
+ sd_err("FATAL: no inode objects");
+ return ret;
}
+ vid = inode->vdi_id;
+ nr_copies = inode->nr_copies;
if (!oid) {
if (idx == ~0) {
printf("Tracking the inode object 0x%" PRIx32
" with %d nodes\n", vid, sd_nodes_nr);
+ free(inode);
return do_track_object(vid_to_vdi_oid(vid), nr_copies);
}
- oid_info.success = false;
- oid_info.idx = idx;
-
if (idx >= MAX_DATA_OBJS) {
printf("The offset is too large!\n");
goto err;
}
- parse_objs(vid_to_vdi_oid(vid), obj_info_filler, &oid_info,
- get_store_objsize(info.copy_policy,
- vid_to_data_oid(vid, 0)));
-
- if (!oid_info.success) {
- sd_err("Failed to read the inode object 0x%" PRIx32,
- vid);
- goto err;
- }
- if (!oid_info.data_oid) {
- printf("The inode object 0x%"PRIx32
- " idx %u is not allocated\n", vid, idx);
- goto err;
- }
-
- oid = oid_info.data_oid;
+ vdi_id = sd_inode_get_vid(inode, idx);
+ oid = vid_to_data_oid(vdi_id, idx);
printf("Tracking the object 0x%" PRIx64
" (the inode vid 0x%" PRIx32 " idx %u)"
@@ -1094,9 +968,10 @@ static int vdi_track(int argc, char **argv)
" (the inode vid 0x%" PRIx32 ")"
" with %d nodes\n", oid, vid, sd_nodes_nr);
+ free(inode);
return do_track_object(oid, nr_copies);
-
err:
+ free(inode);
return EXIT_FAILURE;
}
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 460264c..ace4ac5 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -99,6 +99,7 @@
#define SD_OP_SET_LOGLEVEL 0xBA
#define SD_OP_NFS_CREATE 0xBB
#define SD_OP_NFS_DELETE 0xBC
+#define SD_OP_EXIST 0xBD
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index 0e9bc82..ca00a18 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -1015,6 +1015,19 @@ static int local_set_loglevel(struct request *req)
return SD_RES_SUCCESS;
}
+static int local_oid_exist(struct request *req)
+{
+ uint64_t oid = req->rq.obj.oid;
+ uint8_t ec_index = local_ec_index(req->vinfo, oid);
+
+ if (is_erasure_oid(oid) && ec_index == SD_MAX_COPIES)
+ return SD_RES_NO_OBJ;
+
+ if (sd_store->exist(oid, ec_index))
+ return SD_RES_SUCCESS;
+ return SD_RES_NO_OBJ;
+}
+
#ifdef HAVE_NFS
static int local_nfs_create(struct request *req)
@@ -1336,6 +1349,13 @@ static struct sd_op_template sd_ops[] = {
.process_work = local_set_loglevel,
},
+ [SD_OP_EXIST] = {
+ .name = "EXIST",
+ .type = SD_OP_TYPE_LOCAL,
+ .force = true,
+ .process_work = local_oid_exist,
+ },
+
#ifdef HAVE_NFS
[SD_OP_NFS_CREATE] = {
.name = "NFS_CREATE",
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 1e5f1af..859375d 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -445,19 +445,20 @@ out:
return lost;
}
-static uint8_t local_node_copy_index(struct vnode_info *vinfo, uint64_t oid)
+uint8_t local_ec_index(struct vnode_info *vinfo, uint64_t oid)
{
- int idx;
+ int idx, m = min(get_vdi_copy_number(oid_to_vid(oid)), vinfo->nr_zones);
if (!is_erasure_oid(oid))
- return 0; /* no need to proceed */
+ return SD_MAX_COPIES;
- for (idx = 0; idx < vinfo->nr_zones; idx++) {
+ for (idx = 0; idx < m; idx++) {
const struct sd_node *n = oid_to_node(oid, &vinfo->vroot, idx);
if (node_is_local(n))
return idx;
}
- panic("can't get valid index for %"PRIx64, oid);
+ sd_debug("can't get valid index for %"PRIx64, oid);
+ return SD_MAX_COPIES;
}
/*
@@ -486,7 +487,7 @@ static int recover_erasure_object(struct recovery_obj_work *row)
uint8_t idx;
int ret = -1;
- idx = local_node_copy_index(cur, oid);
+ idx = local_ec_index(cur, oid);
buf = read_erasure_object(oid, idx, row);
if (!buf && !row->stop)
buf = rebuild_erasure_object(oid, idx, row);
@@ -530,7 +531,7 @@ static void recover_object_work(struct work *work)
struct vnode_info *cur = rw->cur_vinfo;
int ret, epoch;
- if (sd_store->exist(oid, local_node_copy_index(cur, oid))) {
+ if (sd_store->exist(oid, local_ec_index(cur, oid))) {
sd_debug("the object is already recovered");
return;
}
@@ -585,7 +586,7 @@ main_fn bool oid_in_recovery(uint64_t oid)
return false;
cur = rinfo->cur_vinfo;
- if (sd_store->exist(oid, local_node_copy_index(cur, oid))) {
+ if (sd_store->exist(oid, local_ec_index(cur, oid))) {
sd_debug("the object %" PRIx64 " is already recoverd", oid);
return false;
}
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index f9c5062..3737f5a 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -436,6 +436,7 @@ int gateway_write_obj(struct request *req);
int gateway_create_and_write_obj(struct request *req);
int gateway_remove_obj(struct request *req);
bool is_erasure_oid(uint64_t oid);
+uint8_t local_ec_index(struct vnode_info *vinfo, uint64_t oid);
/* object_cache */
--
1.8.1.2
More information about the sheepdog
mailing list