[sheepdog-users] Stability problems with kvm using a remote sheepdog volume
Christoph Hellwig
hch at infradead.org
Mon Jun 25 18:53:16 CEST 2012
On Tue, Jun 12, 2012 at 02:04:28PM +0900, MORITA Kazutaka wrote:
> I updated the qemu tree, can you try again? I also recommend to
> update your sheepdog code to the latest one because a fatal network
> I/O problem was fixed last week.
I don't think "sheepdog: create all aio_reqs before sending I/Os"
is a good idea - the only thing we need to do is delay the call to free
the acb. something like the untested patch below:
Is there any chance to get the fixes submitted upstream ASAP? I have
some other changes, and life would be easier if I could work against
upstream directly.
Index: qemu/block/sheepdog.c
===================================================================
--- qemu.orig/block/sheepdog.c 2012-06-22 11:57:53.093286240 +0200
+++ qemu/block/sheepdog.c 2012-06-25 18:41:18.837311516 +0200
@@ -283,8 +283,7 @@ struct SheepdogAIOCB {
void (*aio_done_func)(SheepdogAIOCB *);
int canceled;
-
- QLIST_HEAD(aioreq_head, AIOReq) aioreq_head;
+ int nr_pending;
};
typedef struct BDRVSheepdogState {
@@ -388,19 +387,19 @@ static inline AIOReq *alloc_aio_req(BDRV
QLIST_INSERT_HEAD(&s->outstanding_aio_head, aio_req,
outstanding_aio_siblings);
- QLIST_INSERT_HEAD(&acb->aioreq_head, aio_req, aioreq_siblings);
+ acb->nr_pending++;
return aio_req;
}
-static inline int free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
+static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
{
SheepdogAIOCB *acb = aio_req->aiocb;
+
QLIST_REMOVE(aio_req, outstanding_aio_siblings);
- QLIST_REMOVE(aio_req, aioreq_siblings);
g_free(aio_req);
- return !QLIST_EMPTY(&acb->aioreq_head);
+ acb->nr_pending--;
}
static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
@@ -446,7 +445,7 @@ static SheepdogAIOCB *sd_aio_setup(Block
acb->canceled = 0;
acb->coroutine = qemu_coroutine_self();
acb->ret = 0;
- QLIST_INIT(&acb->aioreq_head);
+ acb->nr_pending = 0;
return acb;
}
@@ -642,7 +641,7 @@ static void coroutine_fn send_pending_re
if (ret < 0) {
error_report("add_aio_request is failed");
free_aio_req(s, aio_req);
- if (QLIST_EMPTY(&acb->aioreq_head)) {
+ if (!acb->nr_pending) {
sd_finish_aiocb(acb);
}
}
@@ -663,7 +662,6 @@ static void coroutine_fn aio_read_respon
int ret;
AIOReq *aio_req = NULL;
SheepdogAIOCB *acb;
- int rest;
unsigned long idx;
if (QLIST_EMPTY(&s->outstanding_aio_head)) {
@@ -734,8 +732,8 @@ static void coroutine_fn aio_read_respon
error_report("%s", sd_strerror(rsp.result));
}
- rest = free_aio_req(s, aio_req);
- if (!rest) {
+ free_aio_req(s, aio_req);
+ if (!acb->nr_pending) {
/*
* We've finished all requests which belong to the AIOCB, so
* we can switch back to sd_co_readv/writev now.
@@ -1547,6 +1545,12 @@ static int coroutine_fn sd_co_rw_vector(
}
}
+ /*
+ * Make sure we don't free the aiocb before we are done with all requests.
+ * This additional reference is dropped at the end of this function.
+ */
+ acb->nr_pending++;
+
while (done != total) {
uint8_t flags = 0;
uint64_t old_oid = 0;
@@ -1615,7 +1619,8 @@ static int coroutine_fn sd_co_rw_vector(
done += len;
}
out:
- if (QLIST_EMPTY(&acb->aioreq_head)) {
+ if (!--acb->nr_pending) {
+ acb->aio_done_func(acb);
return acb->ret;
}
return 1;
More information about the sheepdog-users
mailing list