[Sheepdog] [PATCH 03/14] fix I/O accesses to multiple unrecovered objects
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Sat May 14 09:03:48 CEST 2011
If clients access unrecovered objects, Sheepdog should recover them
first. This fixes a bug which occurs when the number of such objects
are more than one.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/store.c | 47 +++++++++++++++++++++++++++++------------------
1 files changed, 29 insertions(+), 18 deletions(-)
diff --git a/sheep/store.c b/sheep/store.c
index 9f04ccc..c229679 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1054,13 +1054,13 @@ struct recovery_work {
struct work work;
struct list_head rw_siblings;
+ int nr_blocking;
int count;
uint64_t *oids;
};
static LIST_HEAD(recovery_work_list);
static struct recovery_work *recovering_work;
-static uint64_t blocking_oid;
/*
* find_tgt_node - find the node from which we should recover objects
@@ -1334,9 +1334,6 @@ static void recover_one(struct work *work, int idx)
eprintf("%"PRIu32" %"PRIu32", %16"PRIx64"\n", rw->done, rw->count, oid);
- if (blocking_oid)
- oid = blocking_oid; /* recover the blocked object first */
-
fd = ob_open(epoch, oid, 0, &ret);
if (fd != -1) {
/* the object is already recovered */
@@ -1417,7 +1414,6 @@ out:
}
static struct recovery_work *suspended_recovery_work;
-static uint64_t recovering_oid;
static void __start_recovery(struct work *work, int idx);
@@ -1431,7 +1427,6 @@ static void recover_timer(void *data)
return;
}
- recovering_oid = oid;
queue_work(&rw->work);
}
@@ -1450,16 +1445,15 @@ void resume_recovery_work(void)
return;
suspended_recovery_work = NULL;
- recovering_oid = oid;
queue_work(&rw->work);
}
int is_recoverying_oid(uint64_t oid)
{
uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT);
- uint64_t recovering_hval = fnv_64a_buf(&recovering_oid, sizeof(uint64_t), FNV1A_64_INIT);
+ uint64_t min_hval;
struct recovery_work *rw = recovering_work;
- int ret, fd;
+ int ret, fd, i;
if (oid == 0)
return 0;
@@ -1467,6 +1461,8 @@ int is_recoverying_oid(uint64_t oid)
if (!rw)
return 0; /* there is no thread working for object recovery */
+ min_hval = fnv_64a_buf(&rw->oids[rw->done + rw->nr_blocking], sizeof(uint64_t), FNV1A_64_INIT);
+
if (before(rw->epoch, sys->epoch))
return 1;
@@ -1480,11 +1476,27 @@ int is_recoverying_oid(uint64_t oid)
return 0;
}
- if (recovering_hval <= hval) {
- if (bsearch(&oid, rw->oids + rw->done,
- rw->count - rw->done, sizeof(oid), obj_cmp)) {
+ /* the first 'rw->nr_blocking' objects were already scheduled to be done earlier */
+ for (i = 0; i < rw->nr_blocking; i++)
+ if (rw->oids[rw->done + i] == oid)
+ return 1;
+
+ if (min_hval <= hval) {
+ uint64_t *p;
+ p = bsearch(&oid, rw->oids + rw->done + rw->nr_blocking,
+ rw->count - rw->done - rw->nr_blocking, sizeof(oid), obj_cmp);
+ if (p) {
dprintf("recover the object %" PRIx64 " first\n", oid);
- blocking_oid = oid;
+ if (rw->nr_blocking == 0)
+ rw->nr_blocking = 1; /* the first oid may be processed now */
+ if (p > rw->oids + rw->done + rw->nr_blocking) {
+ /* this object should be recovered earlier */
+ memmove(rw->oids + rw->done + rw->nr_blocking + 1,
+ rw->oids + rw->done + rw->nr_blocking,
+ sizeof(uint64_t) * (p - (rw->oids + rw->done + rw->nr_blocking)));
+ rw->oids[rw->done + rw->nr_blocking] = oid;
+ rw->nr_blocking++;
+ }
return 1;
}
}
@@ -1500,8 +1512,11 @@ static void recover_done(struct work *work, int idx)
if (rw->state == RW_INIT)
rw->state = RW_RUN;
- else if (!rw->retry)
+ else if (!rw->retry) {
rw->done++;
+ if (rw->nr_blocking > 0)
+ rw->nr_blocking--;
+ }
oid = rw->oids[rw->done];
@@ -1514,8 +1529,6 @@ static void recover_done(struct work *work, int idx)
return;
}
- blocking_oid = 0;
-
if (rw->done < rw->count && list_empty(&recovery_work_list)) {
rw->work.fn = recover_one;
@@ -1523,14 +1536,12 @@ static void recover_done(struct work *work, int idx)
suspended_recovery_work = rw;
return;
}
- recovering_oid = oid;
resume_pending_requests();
queue_work(&rw->work);
return;
}
dprintf("recovery done, %"PRIu32"\n", rw->epoch);
- recovering_oid = 0;
recovering_work = NULL;
sys->recovered_epoch = rw->epoch;
--
1.5.6.5
More information about the sheepdog
mailing list