[Sheepdog] [PATCH RFC v2] Introduce block driver claim and release hooks

Chris Webb chris at arachsys.com
Thu Nov 26 19:07:01 CET 2009


During live migration, block drivers with exclusive locking behaviour [such as
Sheepdog: http://www.osrg.net/sheepdog/] are problematic, as both source and
destination need to have the device open simultaneously. However, the lock is
only required while the vm is running, and at most one vm is running at each
stage of migration. This patch introduces bdrv_claim and bdrv_release hooks
which can be used to claim and release the lock on vm start and stop, allowing
Sheepdog-backed guests to migrate.

This functionality could also be more generally useful. For example, it would
be possible to take fcntl() locks on qcow2 files, preventing corruption from
two qemu processes concurrently modifying qcow2 metadata. Doing this in
bdrv_open() is not possible as it would prevent live migration of guests
backed by qcow2 files on a shared filesystem.
---
 block.c     |  24 ++++++++++++++++++++++++
 block.h     |   2 ++
 block_int.h |   2 ++
 monitor.c   |   4 +++-
 qemu-kvm.c  |   2 ++
 qemu-nbd.c  |   3 ++-
 vl.c        |   7 ++++++-
 7 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
--- a/block.c
+++ b/block.c
@@ -475,6 +475,30 @@
     return 0;
 }
 
+int bdrv_claim_all(void)
+{
+    BlockDriverState *bs;
+
+    for (bs = bdrv_first; bs != NULL; bs = bs->next) {
+        if (bs && bs->drv && bs->drv->bdrv_claim)
+            if (bs->drv->bdrv_claim(bs) < 0) {
+                bdrv_release_all();
+                return -1;
+            }
+    }
+    return 0;
+}
+
+void bdrv_release_all(void)
+{
+    BlockDriverState *bs;
+
+    for (bs = bdrv_first; bs != NULL; bs = bs->next) {
+        if (bs && bs->drv && bs->drv->bdrv_release)
+            bs->drv->bdrv_release(bs);
+    }
+}
+
 void bdrv_close(BlockDriverState *bs)
 {
     if (bs->drv) {
diff --git a/block.h b/block.h
--- a/block.h
+++ b/block.h
@@ -58,6 +58,8 @@
 int bdrv_open(BlockDriverState *bs, const char *filename, int flags);
 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
                BlockDriver *drv);
+int bdrv_claim_all(void);
+void bdrv_release_all(void);
 void bdrv_close(BlockDriverState *bs);
 int bdrv_check(BlockDriverState *bs);
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
diff --git a/block_int.h b/block_int.h
--- a/block_int.h
+++ b/block_int.h
@@ -51,6 +51,8 @@
     int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
     int (*bdrv_probe_device)(const char *filename);
     int (*bdrv_open)(BlockDriverState *bs, const char *filename, int flags);
+    int (*bdrv_claim)(BlockDriverState *bs);
+    void (*bdrv_release)(BlockDriverState *bs);
     int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
                      uint8_t *buf, int nb_sectors);
     int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
diff --git a/monitor.c b/monitor.c
--- a/monitor.c
+++ b/monitor.c
@@ -456,7 +456,9 @@
 
 static void do_quit(Monitor *mon, const QDict *qdict)
 {
-	bdrv_close_all();
+    if (vm_running)
+        bdrv_release_all();
+    bdrv_close_all();
     exit(0);
 }
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -2215,6 +2215,8 @@
         }
     }
 
+    if (vm_running)
+        bdrv_release_all();
     bdrv_close_all();
 
     pause_all_threads();
diff --git a/qemu-nbd.c b/qemu-nbd.c
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -331,7 +331,7 @@
     if (bs == NULL)
         return 1;
 
-    if (bdrv_open(bs, argv[optind], flags) == -1)
+    if (bdrv_open(bs, argv[optind], flags) < 0 || bdrv_claim_all() < 0)
         return 1;
 
     fd_size = bs->total_sectors * 512;
@@ -470,6 +470,7 @@
     qemu_free(data);
 
     close(sharing_fds[0]);
+    bdrv_release_all();
     bdrv_close(bs);
     qemu_free(sharing_fds);
     if (socket)
diff --git a/vl.c b/vl.c
--- a/vl.c
+++ b/vl.c
@@ -3219,7 +3219,7 @@
 
 void vm_start(void)
 {
-    if (!vm_running) {
+    if (!vm_running && bdrv_claim_all() >= 0) {
         cpu_enable_ticks();
         vm_running = 1;
         vm_state_notify(1, 0);
@@ -3293,6 +3293,7 @@
         vm_running = 0;
         pause_all_vcpus();
         vm_state_notify(0, reason);
+        bdrv_release_all();
     }
 }
 
@@ -4178,6 +4179,10 @@
         if ((r = qemu_vmstop_requested()))
             vm_stop(r);
     }
+
+    if (vm_running)
+        bdrv_release_all();
+    bdrv_close_all();
     pause_all_vcpus();
 }
 





More information about the sheepdog mailing list