[sheepdog] [PATCH] fix a bug that read_object() fail in recovery
levin li
levin108 at gmail.com
Wed Jun 20 13:09:52 CEST 2012
From: levin li <xingke.lwp at taobao.com>
read_object() calls forward_read_obj_req() to get the object,
but may fail with result SD_RES_OLD_NODE_VER, read_object() do
nothing to handle this error, this patch fixed this problem
by making read_object() wait when gets SD_RES_OLD_NODE_VER until
epoch changes, then the waiting thread can be waked up by
condition variable.
Signed-off-by: levin li <xingke.lwp at taobao.com>
---
sheep/sdnet.c | 5 +++++
sheep/sheep.c | 4 ++++
sheep/sheep_priv.h | 2 ++
sheep/store.c | 8 ++++++++
4 files changed, 19 insertions(+)
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index f7eb6c8..054e2b1 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -13,6 +13,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <netdb.h>
+#include <pthread.h>
#include <arpa/inet.h>
#include <netinet/tcp.h>
#include <sys/epoll.h>
@@ -242,6 +243,10 @@ void resume_wait_epoch_requests(void)
struct request *req, *t;
LIST_HEAD(pending_list);
+ pthread_mutex_lock(&sys->rw_object_mutex);
+ pthread_cond_broadcast(&sys->rw_object_cond);
+ pthread_mutex_unlock(&sys->rw_object_mutex);
+
list_splice_init(&sys->wait_rw_queue, &pending_list);
list_for_each_entry_safe(req, t, &pending_list, request_list) {
diff --git a/sheep/sheep.c b/sheep/sheep.c
index a2cd43e..9b95a92 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -17,6 +17,7 @@
#include <unistd.h>
#include <time.h>
#include <signal.h>
+#include <pthread.h>
#include <linux/limits.h>
#include <sys/syslog.h>
@@ -275,6 +276,9 @@ int main(int argc, char **argv)
exit(1);
}
+ pthread_mutex_init(&sys->rw_object_mutex, NULL);
+ pthread_cond_init(&sys->rw_object_cond, NULL);
+
sys->gateway_wqueue = init_work_queue("gateway", nr_gateway_worker);
sys->io_wqueue = init_work_queue("io", nr_io_worker);
sys->recovery_wqueue = init_work_queue("recovery", 1);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 7a86533..b62de77 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -130,6 +130,8 @@ struct cluster_info {
int nr_copies;
+ pthread_mutex_t rw_object_mutex;
+ pthread_cond_t rw_object_cond;
struct list_head wait_rw_queue;
struct list_head wait_obj_queue;
int nr_outstanding_reqs;
diff --git a/sheep/store.c b/sheep/store.c
index 61f822c..a83f00e 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -590,6 +590,14 @@ forward_read:
read_req.vnodes = vnodes;
ret = forward_read_obj_req(&read_req);
+
+ if (SD_RES_OLD_NODE_VER == ret) {
+ pthread_mutex_lock(&sys->rw_object_mutex);
+ pthread_cond_wait(&sys->rw_object_cond, &sys->rw_object_mutex);
+ pthread_mutex_unlock(&sys->rw_object_mutex);
+ goto forward_read;
+ }
+
if (ret != SD_RES_SUCCESS)
eprintf("failed to forward read object %x\n", ret);
--
1.7.10
More information about the sheepdog
mailing list