[Sheepdog] [PATCH] sheep: fix I/O blocking problem during object recovery
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Sat Jan 1 09:32:51 CET 2011
Currently, Sheepdog blocks I/O operations when the target objects are
not recovered yet. This patch recovers such objects first, and
reduces the time of blocking.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/store.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 50 insertions(+), 11 deletions(-)
diff --git a/sheep/store.c b/sheep/store.c
index e93f44d..abc2b30 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1021,7 +1021,8 @@ struct recovery_work {
};
static LIST_HEAD(recovery_work_list);
-static int recovering;
+static struct recovery_work *recovering_work;
+static uint64_t blocking_oid;
static int find_tgt_node(struct sheepdog_node_list_entry *old_entry, int old_nr, int old_idx,
struct sheepdog_node_list_entry *cur_entry, int cur_nr, int cur_idx,
@@ -1238,9 +1239,20 @@ static void recover_one(struct work *work, int idx)
int old_nr, cur_nr;
uint32_t epoch = rw->epoch;
int i, my_idx = -1, copy_idx = 0, cur_idx = -1;
+ int fd;
eprintf("%"PRIu32" %"PRIu32", %16"PRIx64"\n", rw->done, rw->count, oid);
+ if (blocking_oid)
+ oid = blocking_oid; /* recover the blocked object first */
+
+ fd = ob_open(epoch, oid, 0, &ret);
+ if (fd != -1) {
+ /* the object is already recovered */
+ close(fd);
+ goto out;
+ }
+
if (is_data_obj(oid))
buf = malloc(SD_DATA_OBJ_SIZE);
else
@@ -1341,9 +1353,36 @@ int is_recoverying_oid(uint64_t oid)
{
uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT);
uint64_t recovering_hval = fnv_64a_buf(&recovering_oid, sizeof(uint64_t), FNV1A_64_INIT);
+ struct recovery_work *rw = recovering_work;
+ int ret, fd;
+
+ if (oid == 0)
+ return 0;
+
+ if (!rw)
+ return 0; /* there is no thread working for object recovery */
+
+ if (before(rw->epoch, sys->epoch))
+ return 1;
+
+ fd = ob_open(sys->epoch, oid, 0, &ret);
+ if (fd != -1) {
+ dprintf("the object %lx is already recoverd\n", oid);
+ close(fd);
+ return 0;
+ }
+
+ if (recovering_hval <= hval) {
+ if (bsearch(&oid, ((uint64_t *)rw->buf) + rw->done,
+ rw->count - rw->done, sizeof(oid), obj_cmp)) {
+ dprintf("recover the object %lx first\n", oid);
+ blocking_oid = oid;
+ return 1;
+ }
+ }
- return before(sys->recovered_epoch, sys->epoch - 1) ||
- (sys->recovered_epoch == sys->epoch - 1 && recovering_hval <= hval);
+ dprintf("the object %lx is not found\n", oid);
+ return 0;
}
static void recover_done(struct work *work, int idx)
@@ -1351,10 +1390,6 @@ static void recover_done(struct work *work, int idx)
struct recovery_work *rw = container_of(work, struct recovery_work, work);
uint64_t oid = *(((uint64_t *)rw->buf) + rw->done);
- recovering_oid = 0;
-
- resume_pending_requests();
-
if (rw->retry && list_empty(&recovery_work_list)) {
rw->retry = 0;
@@ -1364,6 +1399,8 @@ static void recover_done(struct work *work, int idx)
return;
}
+ blocking_oid = 0;
+
if (rw->done < rw->count && list_empty(&recovery_work_list)) {
rw->work.fn = recover_one;
@@ -1372,12 +1409,14 @@ static void recover_done(struct work *work, int idx)
return;
}
recovering_oid = oid;
+ resume_pending_requests();
queue_work(&rw->work);
return;
}
dprintf("recovery done, %"PRIu32"\n", rw->epoch);
- recovering = 0;
+ recovering_oid = 0;
+ recovering_work = NULL;
sys->recovered_epoch = rw->epoch;
resume_pending_requests();
@@ -1391,7 +1430,7 @@ static void recover_done(struct work *work, int idx)
list_del(&rw->rw_siblings);
- recovering = 1;
+ recovering_work = rw;
queue_work(&rw->work);
}
}
@@ -1582,10 +1621,10 @@ int start_recovery(uint32_t epoch)
rw->work.fn = __start_recovery;
rw->work.done = recover_done;
- if (recovering)
+ if (recovering_work != NULL)
list_add_tail(&rw->rw_siblings, &recovery_work_list);
else {
- recovering = 1;
+ recovering_work = rw;
queue_work(&rw->work);
}
--
1.5.6.5
More information about the sheepdog
mailing list