[sheepdog] [PATCH V2 00/11] INTRODUCE
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Mon Aug 20 20:03:39 CEST 2012
At Mon, 20 Aug 2012 23:34:10 +0800,
Yunkai Zhang wrote:
>
> In fact, I have thought this method, but we should face nearly the same problem:
>
> After sheep joined back, it should known which objects is dirty, and
> should do the clear work(because there are old version object stay in
> it's working directory). This method seems not save the steps, but
> will do extra recovery works.
Can you give me a concrete example?
I created a really naive patch to disable object recovery with my
idea:
==
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 5164aa7..8bf032f 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -35,6 +35,7 @@ struct recovery_work {
uint64_t *oids;
uint64_t *prio_oids;
int nr_prio_oids;
+ int nr_scheduled_oids;
struct vnode_info *old_vinfo;
struct vnode_info *cur_vinfo;
@@ -269,9 +270,6 @@ static inline void prepare_schedule_oid(uint64_t oid)
oid);
return;
}
- /* The oid is currently being recovered */
- if (rw->oids[rw->done] == oid)
- return;
rw->nr_prio_oids++;
rw->prio_oids = xrealloc(rw->prio_oids,
rw->nr_prio_oids * sizeof(uint64_t));
@@ -399,9 +397,31 @@ static inline void finish_schedule_oids(struct recovery_work *rw)
done:
free(rw->prio_oids);
rw->prio_oids = NULL;
+ rw->nr_scheduled_oids += rw->nr_prio_oids;
rw->nr_prio_oids = 0;
}
+static struct timer recovery_timer;
+
+static void recover_next_object(void *arg)
+{
+ struct recovery_work *rw = arg;
+
+ if (rw->nr_prio_oids)
+ finish_schedule_oids(rw);
+
+ if (rw->done < rw->nr_scheduled_oids) {
+ /* Try recover next object */
+ queue_work(sys->recovery_wqueue, &rw->work);
+ return;
+ }
+
+ /* There is no objects to be recovered. Try again later */
+ recovery_timer.callback = recover_next_object;
+ recovery_timer.data = rw;
+ add_timer(&recovery_timer, 1); /* FIXME */
+}
+
static void recover_object_main(struct work *work)
{
struct recovery_work *rw = container_of(work, struct recovery_work,
@@ -425,11 +445,7 @@ static void recover_object_main(struct work *work)
resume_wait_obj_requests(rw->oids[rw->done++]);
if (rw->done < rw->count) {
- if (rw->nr_prio_oids)
- finish_schedule_oids(rw);
-
- /* Try recover next object */
- queue_work(sys->recovery_wqueue, &rw->work);
+ recover_next_object(rw);
return;
}
@@ -458,7 +474,7 @@ static void finish_object_list(struct work *work)
resume_wait_recovery_requests();
rw->work.fn = recover_object_work;
rw->work.done = recover_object_main;
- queue_work(sys->recovery_wqueue, &rw->work);
+ recover_next_object(rw);
return;
}
==
I ran the following test, and object recovery was disabled correctly
for both join and leave case.
==
#!/bin/bash
for i in 0 1 2 3; do
./sheep/sheep /store/$i -z $i -p 700$i -c local
done
sleep 1
./collie/collie cluster format
./collie/collie vdi create test 4G
echo " * objects will be created on node[0-2] *"
md5sum /store/[0,1,2,3]/obj/807c2b2500000000
pkill -f "./sheep/sheep /store/1"
sleep 3
echo " * recovery doesn't start until the object is touched *"
md5sum /store/[0,2,3]/obj/807c2b2500000000
./collie/collie vdi snapshot test # invoke recovery of the vdi object
echo " * the object is recovered *"
md5sum /store/[0,2,3]/obj/807c2b2500000000
./sheep/sheep /store/1 -z 1 -p 7001 -c local
sleep 3
echo " * recovery doesn't start until the object is touched *"
md5sum /store/[0,1,2,3]/obj/807c2b2500000000
./collie/collie vdi list -p 7001 # invoke recovery of the vdi object
echo " * the object is recovered *"
md5sum /store/[0,1,2,3]/obj/807c2b2500000000
==
[Output]
using backend farm store
* objects will be created on node[0-2] *
701e77eab6002c9a48f7ba72c8d9bfe9 /store/0/obj/807c2b2500000000
701e77eab6002c9a48f7ba72c8d9bfe9 /store/1/obj/807c2b2500000000
701e77eab6002c9a48f7ba72c8d9bfe9 /store/2/obj/807c2b2500000000
* recovery doesn't start until the object is touched *
701e77eab6002c9a48f7ba72c8d9bfe9 /store/0/obj/807c2b2500000000
701e77eab6002c9a48f7ba72c8d9bfe9 /store/2/obj/807c2b2500000000
* the object is recovered *
3c3bf0d865363fd0d1f1d5c7aa044dcd /store/0/obj/807c2b2500000000
3c3bf0d865363fd0d1f1d5c7aa044dcd /store/2/obj/807c2b2500000000
3c3bf0d865363fd0d1f1d5c7aa044dcd /store/3/obj/807c2b2500000000
* recovery doesn't start until the object is touched *
3c3bf0d865363fd0d1f1d5c7aa044dcd /store/0/obj/807c2b2500000000
3c3bf0d865363fd0d1f1d5c7aa044dcd /store/2/obj/807c2b2500000000
Name Id Size Used Shared Creation time VDI id Tag
s test 1 4.0 GB 0.0 MB 0.0 MB 2012-08-21 02:49 7c2b25
test 2 4.0 GB 0.0 MB 0.0 MB 2012-08-21 02:49 7c2b26
* the object is recovered *
3c3bf0d865363fd0d1f1d5c7aa044dcd /store/0/obj/807c2b2500000000
3c3bf0d865363fd0d1f1d5c7aa044dcd /store/1/obj/807c2b2500000000
3c3bf0d865363fd0d1f1d5c7aa044dcd /store/2/obj/807c2b2500000000
I couldn't read an old object at all.
Thanks,
Kazutaka
More information about the sheepdog
mailing list