[Sheepdog] [PATCH 2/5] sheep: setup node_list_entry before starting object recovery
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Thu Sep 1 11:46:41 CEST 2011
This avoids redundant disk access and simplifies recovery codes.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/store.c | 106 ++++++++++++++++++++++++---------------------------------
1 files changed, 45 insertions(+), 61 deletions(-)
diff --git a/sheep/store.c b/sheep/store.c
index d251d21..2a491f2 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1108,6 +1108,15 @@ struct recovery_work {
int nr_blocking;
int count;
uint64_t *oids;
+
+ int old_nr_nodes;
+ struct sheepdog_node_list_entry old_nodes[SD_MAX_NODES];
+ int cur_nr_nodes;
+ struct sheepdog_node_list_entry cur_nodes[SD_MAX_NODES];
+ int old_nr_vnodes;
+ struct sheepdog_vnode_list_entry old_vnodes[SD_MAX_VNODES];
+ int cur_nr_vnodes;
+ struct sheepdog_vnode_list_entry cur_vnodes[SD_MAX_VNODES];
};
static LIST_HEAD(recovery_work_list);
@@ -1400,9 +1409,6 @@ static void recover_one(struct work *work, int idx)
char *buf = NULL;
int ret;
uint64_t oid = rw->oids[rw->done];
- struct sheepdog_node_list_entry *old_nodes, *cur_nodes;
- struct sheepdog_vnode_list_entry *old_vnodes, *cur_vnodes;
- int old_nr_nodes, cur_nr_nodes, old_nr_vnodes, cur_nr_vnodes;
int old_copies, cur_copies;
uint32_t epoch = rw->epoch;
int i, copy_idx = 0, cur_idx = -1;
@@ -1410,15 +1416,6 @@ static void recover_one(struct work *work, int idx)
eprintf("%"PRIu32" %"PRIu32", %16"PRIx64"\n", rw->done, rw->count, oid);
- old_nodes = malloc(sizeof(*old_nodes) * SD_MAX_NODES);
- cur_nodes = malloc(sizeof(*cur_nodes) * SD_MAX_NODES);
- old_vnodes = malloc(sizeof(*old_vnodes) * SD_MAX_VNODES);
- cur_vnodes = malloc(sizeof(*cur_vnodes) * SD_MAX_VNODES);
- if (!old_nodes || !cur_nodes || !old_vnodes || !cur_vnodes) {
- eprintf("oom\n");
- goto out;
- }
-
fd = ob_open(epoch, oid, 0, &ret);
if (fd != -1) {
/* the object is already recovered */
@@ -1435,37 +1432,18 @@ static void recover_one(struct work *work, int idx)
else
buf = malloc(SD_DATA_OBJ_SIZE);
- cur_nr_nodes = epoch_log_read(epoch, (char *)cur_nodes,
- sizeof(*cur_nodes) * SD_MAX_NODES);
- if (cur_nr_nodes <= 0) {
- eprintf("failed to read current epoch, %"PRIu32"\n", epoch);
- goto out;
- }
- cur_nr_nodes /= sizeof(*cur_nodes);
-
- old_nr_nodes = epoch_log_read(epoch - 1, (char *)old_nodes,
- sizeof(*old_nodes) * SD_MAX_NODES);
- if (old_nr_nodes <= 0) {
- eprintf("failed to read previous epoch, %"PRIu32"\n", epoch - 1);
- goto fail;
- }
- old_nr_nodes /= sizeof(*old_nodes);
-
- old_nr_vnodes = nodes_to_vnodes(old_nodes, old_nr_nodes, old_vnodes);
- cur_nr_vnodes = nodes_to_vnodes(cur_nodes, cur_nr_nodes, cur_vnodes);
-
if (!sys->nr_sobjs)
goto fail;
- cur_idx = obj_to_sheep(cur_vnodes, cur_nr_vnodes, oid, 0);
+ cur_idx = obj_to_sheep(rw->cur_vnodes, rw->cur_nr_vnodes, oid, 0);
- old_copies = get_max_copies(old_nodes, old_nr_nodes);
- cur_copies = get_max_copies(cur_nodes, cur_nr_nodes);
+ old_copies = get_max_copies(rw->old_nodes, rw->old_nr_nodes);
+ cur_copies = get_max_copies(rw->cur_nodes, rw->cur_nr_nodes);
copy_idx = -1;
for (i = 0; i < cur_copies; i++) {
- int n = obj_to_sheep(cur_vnodes, cur_nr_vnodes, oid, i);
- if (is_myself(cur_vnodes[n].addr, cur_vnodes[n].port)) {
+ int n = obj_to_sheep(rw->cur_vnodes, rw->cur_nr_vnodes, oid, i);
+ if (is_myself(rw->cur_vnodes[n].addr, rw->cur_vnodes[n].port)) {
copy_idx = i;
break;
}
@@ -1475,10 +1453,11 @@ static void recover_one(struct work *work, int idx)
goto out;
}
- dprintf("%"PRIu32", %"PRIu32", %"PRIu32"\n", cur_idx, cur_nr_nodes, copy_idx);
+ dprintf("%"PRIu32", %"PRIu32", %"PRIu32"\n", cur_idx, rw->cur_nr_nodes,
+ copy_idx);
- ret = __recover_one(rw, old_vnodes, old_nr_vnodes, old_copies,
- cur_vnodes, cur_nr_vnodes, cur_copies,
+ ret = __recover_one(rw, rw->old_vnodes, rw->old_nr_vnodes, old_copies,
+ rw->cur_vnodes, rw->cur_nr_vnodes, cur_copies,
cur_idx, copy_idx, epoch, epoch - 1, oid,
buf, SD_DATA_OBJ_SIZE);
if (ret == 0)
@@ -1487,8 +1466,8 @@ static void recover_one(struct work *work, int idx)
for (i = 0; i < cur_copies; i++) {
if (i == copy_idx)
continue;
- ret = __recover_one(rw, old_vnodes, old_nr_vnodes, old_copies,
- cur_vnodes, cur_nr_vnodes, cur_copies, cur_idx, i,
+ ret = __recover_one(rw, rw->old_vnodes, rw->old_nr_vnodes, old_copies,
+ rw->cur_vnodes, rw->cur_nr_vnodes, cur_copies, cur_idx, i,
epoch, epoch - 1, oid, buf, SD_DATA_OBJ_SIZE);
if (ret == 0)
goto out;
@@ -1496,10 +1475,6 @@ static void recover_one(struct work *work, int idx)
fail:
eprintf("failed to recover object %"PRIx64"\n", oid);
out:
- free(old_nodes);
- free(cur_nodes);
- free(old_vnodes);
- free(cur_vnodes);
free(buf);
}
@@ -1785,34 +1760,43 @@ static void __start_recovery(struct work *work, int idx)
{
struct recovery_work *rw = container_of(work, struct recovery_work, work);
uint32_t epoch = rw->epoch;
- struct sheepdog_node_list_entry old_entry[SD_MAX_NODES],
- cur_entry[SD_MAX_NODES];
- int old_nr, cur_nr, nr_objs;
+ int nr_objs;
int fd;
char path[PATH_MAX], tmp_path[PATH_MAX];
int ret;
dprintf("%u\n", epoch);
- cur_nr = epoch_log_read(epoch, (char *)cur_entry, sizeof(cur_entry));
- if (cur_nr <= 0) {
- eprintf("failed to read epoch log, %"PRIu32"\n", epoch);
- goto fail;
- }
- cur_nr /= sizeof(struct sheepdog_node_list_entry);
+ if (rw->cur_nr_nodes == 0) {
+ /* setup node list and virtual node list */
+ rw->cur_nr_nodes = epoch_log_read(epoch, (char *)rw->cur_nodes,
+ sizeof(rw->cur_nodes));
+ if (rw->cur_nr_nodes <= 0) {
+ eprintf("failed to read epoch log, %"PRIu32"\n", epoch);
+ goto fail;
+ }
+ rw->cur_nr_nodes /= sizeof(struct sheepdog_node_list_entry);
- old_nr = epoch_log_read(epoch - 1, (char *)old_entry, sizeof(old_entry));
- if (old_nr <= 0) {
- eprintf("failed to read epoch log, %"PRIu32"\n", epoch - 1);
- goto fail;
+ rw->old_nr_nodes = epoch_log_read(epoch - 1, (char *)rw->old_nodes,
+ sizeof(rw->old_nodes));
+ if (rw->old_nr_nodes <= 0) {
+ eprintf("failed to read epoch log, %"PRIu32"\n", epoch - 1);
+ goto fail;
+ }
+ rw->old_nr_nodes /= sizeof(struct sheepdog_node_list_entry);
+
+ rw->old_nr_vnodes = nodes_to_vnodes(rw->old_nodes, rw->old_nr_nodes,
+ rw->old_vnodes);
+ rw->cur_nr_vnodes = nodes_to_vnodes(rw->cur_nodes, rw->cur_nr_nodes,
+ rw->cur_vnodes);
}
- old_nr /= sizeof(struct sheepdog_node_list_entry);
if (!sys->nr_sobjs)
goto fail;
- nr_objs = get_max_copies(cur_entry, cur_nr);
+ nr_objs = get_max_copies(rw->cur_nodes, rw->cur_nr_nodes);
- if (fill_obj_list(rw, old_entry, old_nr, cur_entry, cur_nr, nr_objs) != 0) {
+ if (fill_obj_list(rw, rw->old_nodes, rw->old_nr_nodes, rw->cur_nodes,
+ rw->cur_nr_nodes, nr_objs) != 0) {
eprintf("fatal recovery error\n");
goto fail;
}
--
1.7.2.5
More information about the sheepdog
mailing list