[Sheepdog] [PATCH 5/5] remove sheepdog_node_list_entry
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Tue Jan 26 05:32:10 CET 2010
A local collie forwards any requests so qemu doesn't need to know node
information.
Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
block/sheepdog.c | 333 ++++++++++++++----------------------------------------
1 files changed, 87 insertions(+), 246 deletions(-)
diff --git a/block/sheepdog.c b/block/sheepdog.c
index e353756..6a45cfa 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -267,20 +267,9 @@ struct bdrv_sd_state {
char *name;
int is_current;
- struct sd_aiostate aio_state_array[FD_SETSIZE];
+ struct sd_aiostate aio_state_array[1];
};
-struct sheepdog_node_list_entry {
- uint64_t id;
- uint8_t addr[16];
- uint16_t port;
- uint16_t pad;
-};
-
-static uint32_t s_epoch;
-static int nr_nodes;
-static struct sheepdog_node_list_entry *node_list_entries;
-
static const char * sd_strerror(int err)
{
int i;
@@ -333,15 +322,6 @@ static inline int after(uint32_t seq1, uint32_t seq2)
return (int32_t)(seq2 - seq1) < 0;
}
-static void set_hostname(char *name, size_t len,
- struct sheepdog_node_list_entry *e)
-{
- /* TODO: ipv6 */
-
- snprintf(name, len, "%d.%d.%d.%d",
- e->addr[12], e->addr[13], e->addr[14], e->addr[15]);
-}
-
static inline uint64_t oid_to_ino(uint64_t inode_oid)
{
return (inode_oid >> 18) & ((1ULL << 37) - 1);
@@ -377,24 +357,6 @@ static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
return hval;
}
-static inline int obj_to_sheep(struct sheepdog_node_list_entry *entries,
- int nr_entries, uint64_t oid, int idx)
-{
- uint64_t id;
- int i;
- struct sheepdog_node_list_entry *e = entries, *n;
-
- id = fnv_64a_buf(&oid, sizeof(oid), FNV1A_64_INIT);
-
- for (i = 0; i < nr_entries - 1; i++, e++) {
- n = e + 1;
- if (id > e->id && id <= n->id)
- break;
- }
-
- return (i + 1 + idx) % nr_entries;
-}
-
static inline struct aio_req *alloc_aio_req(struct sd_aiostate *s,
struct sd_aiocb *acb)
{
@@ -508,12 +470,14 @@ static int sd_schedule_bh(QEMUBHFunc *cb, struct sd_aiocb *acb)
return 0;
}
-static int connect_to_vost(char *name, int port)
+static int connect_to_vost(void)
{
char buf[64];
char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
+ char name[] = "localhost";
int fd, ret;
struct addrinfo hints, *res, *res0;
+ int port = DOG_PORT;
memset(&hints, 0, sizeof(hints));
snprintf(buf, sizeof(buf), "%d", port);
@@ -723,74 +687,6 @@ static void retry_aiocb(struct bdrv_sd_state *s)
}
}
-static int get_node_list(void *buf, unsigned int size, unsigned int *epoch)
-{
- int fd, ret;
- unsigned int wlen;
- struct sd_node_req hdr;
- struct sd_node_rsp *rsp = (struct sd_node_rsp *)&hdr;
- char host[] = "localhost";
-
- fd = connect_to_vost(host, DOG_PORT);
- if (fd < 0)
- return -1;
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.opcode = SD_OP_GET_NODE_LIST;
- hdr.data_length = size;
- hdr.epoch = *epoch;
-
- wlen = 0;
-
- ret = do_req(fd, (struct sd_req *)&hdr, buf, &wlen, &size);
- if (ret) {
- ret = -1;
- goto out;
- }
-
- if (!size) {
- ret = 0;
- goto out;
- }
-
- ret = rsp->nr_nodes;
- *epoch = rsp->epoch;
-out:
- close(fd);
-
- return ret;
-}
-
-static int update_node_list(struct bdrv_sd_state *s)
-{
- char *buf;
- int ret;
- unsigned int size, epoch = s_epoch;
-
- size = FD_SETSIZE * sizeof(struct sheepdog_node_list_entry);
-
- buf = malloc(size);
- if (!buf)
- return -ENOMEM;
-
- ret = get_node_list(buf, size, &epoch);
-
- if (ret <= 0)
- goto out;
-
- memcpy(node_list_entries, buf, size);
- nr_nodes = ret;
- s_epoch = epoch;
-
- if (s && s_epoch != epoch)
- retry_aiocb(s);
-
-out:
- free(buf);
-
- return ret;
-}
-
static void aio_read_response(void *opaque)
{
struct sd_obj_req hdr;
@@ -904,7 +800,9 @@ static void aio_read_response(void *opaque)
return; /* TODO: update node list and resend request */
new_node_list:
- update_node_list(s->s);
+ eprintf("\n");
+ exit(1);
+
return;
}
@@ -939,38 +837,36 @@ static int set_nodelay(int fd)
return ret;
}
-static int get_sheep_fd(struct bdrv_sd_state *s, uint16_t idx, int *cached)
+static int get_sheep_fd(struct bdrv_sd_state *s)
{
int ret, fd;
- char name[128];
-
- if (s->aio_state_array[idx].fd != -1) {
- *cached = 1;
- return s->aio_state_array[idx].fd;
- } else
- *cached = 0;
- set_hostname(name, sizeof(name), &node_list_entries[idx]);
+ if (s->aio_state_array[0].fd != -1)
+ return s->aio_state_array[0].fd;
- fd = connect_to_vost(name, node_list_entries[idx].port);
- if (fd < 0)
+ fd = connect_to_vost();
+ if (fd < 0) {
+ eprintf("%m\n");
return -1;
+ }
ret = set_nonblocking(fd);
if (ret) {
+ eprintf("%m\n");
close(fd);
return -1;
}
ret = set_nodelay(fd);
if (ret) {
+ eprintf("%m\n");
close(fd);
return -1;
}
qemu_aio_set_fd_handler(fd, aio_read_response, NULL, aio_flush_request,
- NULL, &s->aio_state_array[idx]);
- s->aio_state_array[idx].fd = fd;
+ NULL, &s->aio_state_array[0]);
+ s->aio_state_array[0].fd = fd;
return fd;
}
@@ -1002,16 +898,14 @@ static int parse_vdiname(const char *filename, char *vdi, int vdi_len,
}
static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag,
- uint64_t *oid, int for_snapshot, int *current,
- unsigned int *epoch)
+ uint64_t *oid, int for_snapshot, int *current)
{
int ret, fd;
struct sd_vdi_req hdr;
struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
- char hostname[] = "localhost";
unsigned int wlen, rlen = 0;
- fd = connect_to_vost(hostname, DOG_PORT);
+ fd = connect_to_vost();
if (fd < 0)
return -1;
@@ -1021,7 +915,6 @@ static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag,
hdr.data_length = wlen;
hdr.tag = tag;
hdr.flags = SD_FLAG_CMD_WRITE;
- hdr.epoch = s_epoch;
ret = do_req(fd, (struct sd_req *)&hdr, filename, &wlen, &rlen);
if (ret) {
@@ -1037,7 +930,6 @@ static int find_vdi_name(struct bdrv_sd_state *s, char *filename, uint64_t tag,
*oid = rsp->oid;
s->is_current = rsp->flags & SD_VDI_RSP_FLAG_CURRENT;
- *epoch = rsp->epoch;
ret = 0;
out:
close(fd);
@@ -1048,110 +940,80 @@ static int add_aio_request(struct bdrv_sd_state *s, struct sd_aiocb *acb,
uint64_t oid, void *data,
unsigned int datalen, uint64_t offset, uint8_t flags,
uint64_t old_oid, int create, int write,
- unsigned int iov_offset,
- struct sheepdog_node_list_entry *e, int nr)
+ unsigned int iov_offset)
{
- int i = 0;
int nr_copies = s->inode.nr_copies;
+ struct sd_obj_req hdr;
+ int fd;
+ unsigned int wlen;
+ int ret;
+ struct aio_req *aio_req;
if (!nr_copies)
eprintf("bug\n");
- if (!e) {
- e = node_list_entries;
- nr = nr_nodes;
- }
-
- if (!write)
- nr_copies = 1;
-
- {
- struct sd_obj_req hdr;
- int cached, sidx, fd;
- unsigned int wlen;
- int ret;
- struct aio_req *aio_req;
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.epoch = s_epoch;
-
- if (!write) {
- wlen = 0;
- hdr.opcode = SD_OP_READ_OBJ;
- hdr.flags = flags;
- } else if (create) {
- wlen = datalen;
- hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
- hdr.flags = SD_FLAG_CMD_WRITE | flags;
- } else {
- wlen = datalen;
- hdr.opcode = SD_OP_WRITE_OBJ;
- hdr.flags = SD_FLAG_CMD_WRITE | flags;
- }
+ memset(&hdr, 0, sizeof(hdr));
- hdr.oid = oid;
- hdr.cow_oid = old_oid;
- hdr.copies = s->inode.nr_copies;
+ if (!write) {
+ wlen = 0;
+ hdr.opcode = SD_OP_READ_OBJ;
+ hdr.flags = flags;
+ } else if (create) {
+ wlen = datalen;
+ hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
+ hdr.flags = SD_FLAG_CMD_WRITE | flags;
+ } else {
+ wlen = datalen;
+ hdr.opcode = SD_OP_WRITE_OBJ;
+ hdr.flags = SD_FLAG_CMD_WRITE | flags;
+ }
- hdr.data_length = datalen;
- hdr.offset = offset;
+ hdr.oid = oid;
+ hdr.cow_oid = old_oid;
+ hdr.copies = s->inode.nr_copies;
- /*
- * In the case of read, we try a different sheep for
- * retry.
- */
- if (write)
- sidx = obj_to_sheep(e, nr, oid, i);
- else
- sidx = obj_to_sheep(e, nr, oid, acb->retries);
+ hdr.data_length = datalen;
+ hdr.offset = offset;
- fd = get_sheep_fd(s, sidx, &cached);
- if (fd < 0)
- return -EIO;
+ fd = get_sheep_fd(s);
+ if (fd < 0)
+ return -EIO;
- struct sd_aiostate *aio_state = &s->aio_state_array[sidx];
- aio_req = alloc_aio_req(aio_state, acb);
- if (!aio_req) {
- eprintf("too many requests\n");
- return -ENOMEM;
- }
- aio_req->iov_offset = iov_offset;
- hdr.id = get_id_from_req(aio_state, aio_req);
+ struct sd_aiostate *aio_state = &s->aio_state_array[0];
+ aio_req = alloc_aio_req(aio_state, acb);
+ if (!aio_req) {
+ eprintf("too many requests\n");
+ return -ENOMEM;
+ }
+ aio_req->iov_offset = iov_offset;
+ hdr.id = get_id_from_req(aio_state, aio_req);
- ret = send_req(fd, (struct sd_req *)&hdr, data, &wlen);
- if (ret) {
- free_aio_req(aio_state, aio_req);
- return -EIO;
- }
+ ret = send_req(fd, (struct sd_req *)&hdr, data, &wlen);
+ if (ret) {
+ free_aio_req(aio_state, aio_req);
+ return -EIO;
}
return 0;
}
-static int read_vdi_obj(char *buf, uint64_t oid,
- struct sheepdog_node_list_entry *e, int nr, int *copies)
+static int read_vdi_obj(char *buf, uint64_t oid, int *copies)
{
struct sd_obj_req hdr;
struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
unsigned int wlen, rlen;
- int ret, fd, sidx, i = 0;
- char host[128];
+ int ret, fd, i = 0;
wlen = 0;
rlen = SD_INODE_SIZE;
memset(&hdr, 0, sizeof(hdr));
- hdr.epoch = s_epoch;
hdr.opcode = SD_OP_READ_OBJ;
hdr.oid = oid;
hdr.data_length = rlen;
- sidx = obj_to_sheep(e, nr, oid, i);
-
- set_hostname(host, sizeof(host), &e[sidx]);
-
- fd = connect_to_vost(host, e[sidx].port);
+ fd = connect_to_vost();
if (fd < 0) {
eprintf("failed to connect to a sheep, %d\n", i);
return -1;
@@ -1179,13 +1041,12 @@ static int read_vdi_obj(char *buf, uint64_t oid,
/* TODO: error cleanups */
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
{
- int nr, ret, i, j;
+ int ret, i, j;
uint64_t oid = 0;
struct bdrv_sd_state *s = bs->opaque;
char vdi[256];
uint64_t tag;
int for_snapshot = 0, dummy;
- unsigned int epoch;
char *buf;
buf = malloc(SD_INODE_SIZE);
@@ -1194,7 +1055,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
return -1;
}
- for (i = 0; i < FD_SETSIZE; i++) {
+ for (i = 0; i < ARRAY_SIZE(s->aio_state_array); i++) {
struct sd_aiostate *aio_state = &s->aio_state_array[i];
for (j = 0; j < MAX_AIO_REQS; j++) {
aio_state->aio_req_free[j] = &aio_state->aio_req_list[j];
@@ -1208,22 +1069,18 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
if (strstart(filename, "sheepdog:", NULL))
for_snapshot = 1;
- nr = update_node_list(s);
- if (nr < 0 || !nr)
- goto out;
-
memset(vdi, 0, sizeof(vdi));
if (parse_vdiname(filename, vdi, sizeof(vdi), &tag) < 0)
goto out;
- ret = find_vdi_name(s, vdi, tag, &oid, for_snapshot, &s->is_current, &epoch);
+ ret = find_vdi_name(s, vdi, tag, &oid, for_snapshot, &s->is_current);
if (ret)
goto out;
if (!s->is_current)
eprintf("%" PRIx64 " non current inode was open.\n", oid);
- ret = read_vdi_obj(buf, oid, node_list_entries, nr_nodes, &dummy);
+ ret = read_vdi_obj(buf, oid, &dummy);
if (ret)
goto out;
@@ -1247,11 +1104,10 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
struct sd_vdi_req hdr;
struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
int fd, ret;
- char hostname[] = "localhost";
unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN * 2];
- fd = connect_to_vost(hostname, DOG_PORT);
+ fd = connect_to_vost();
if (fd < 0)
return -1;
@@ -1262,7 +1118,6 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
memset(&hdr, 0, sizeof(hdr));
hdr.opcode = SD_OP_NEW_VDI;
- hdr.epoch = s_epoch;
hdr.base_oid = base_oid;
wlen = SD_MAX_VDI_LEN;
@@ -1295,7 +1150,7 @@ static int do_sd_create(char *filename, char *tag, int64_t total_sectors,
static int sd_create(const char *filename, QEMUOptionParameter *options)
{
- int nr, ret;
+ int ret;
uint64_t oid = 0;
int64_t total_sectors = 0;
char *backing_file = NULL;
@@ -1309,16 +1164,10 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
options++;
}
- /* needs to set up s_epoch */
- nr = update_node_list(NULL);
- if (nr < 0 || !nr)
- return -1;
-
if (backing_file) {
BlockDriverState bs;
char vdi[256];
uint64_t tag;
- unsigned int dummy;
memset(&bs, 0, sizeof(bs));
@@ -1337,7 +1186,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
if (tag == CURRENT_VDI_ID)
return -1;
- ret = find_vdi_name(bs.opaque, vdi, tag, &oid, 1, NULL, &dummy);
+ ret = find_vdi_name(bs.opaque, vdi, tag, &oid, 1, NULL);
struct bdrv_sd_state *s = bs.opaque;
if (ret || s->is_current)
return -1;
@@ -1361,12 +1210,11 @@ static int sd_claim(BlockDriverState *bs)
int ret, fd;
struct sd_vdi_req hdr;
struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
- char hostname[] = "localhost";
unsigned int wlen, rlen = 0;
eprintf("%s\n", s->name);
- fd = connect_to_vost(hostname, DOG_PORT);
+ fd = connect_to_vost();
if (fd < 0)
return -1;
@@ -1376,7 +1224,6 @@ static int sd_claim(BlockDriverState *bs)
hdr.data_length = wlen;
hdr.tag = CURRENT_VDI_ID;
hdr.flags = SD_FLAG_CMD_WRITE;
- hdr.epoch = s_epoch;
ret = do_req(fd, (struct sd_req *)&hdr, s->name, &wlen, &rlen);
if (ret) {
@@ -1401,13 +1248,12 @@ static void sd_release(BlockDriverState *bs)
struct bdrv_sd_state *s = bs->opaque;
struct sd_vdi_req hdr;
struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
- char hostname[] = "localhost";
unsigned int wlen, rlen = 0;
int fd, ret;
eprintf("%s\n", s->name);
- fd = connect_to_vost(hostname, DOG_PORT);
+ fd = connect_to_vost();
if (fd < 0)
return;
@@ -1416,7 +1262,6 @@ static void sd_release(BlockDriverState *bs)
hdr.opcode = SD_OP_RELEASE_VDI;
wlen = strlen(s->name) + 1;
hdr.data_length = wlen;
- hdr.epoch = s_epoch;
hdr.flags = SD_FLAG_CMD_WRITE;
ret = do_req(fd, (struct sd_req *)&hdr, s->name, &wlen, &rlen);
@@ -1433,9 +1278,13 @@ static inline int nr_chunks(struct sd_aiocb *acb)
(acb->sector_num * 512 / CHUNK_SIZE) + 1;
}
+/* FIXME */
+#define reset_all_aios(s) \
+ { eprintf(""); exit(1); }
+
static void sd_write_done(struct sd_aiocb *acb)
{
- int i, ret, inode_dirty = 0, epoch_dirty = 0;
+ int i, ret, inode_dirty = 0;
unsigned long idx = acb->sector_num * 512 / CHUNK_SIZE;
struct bdrv_sd_state *s = acb->common.bs->opaque;
@@ -1452,12 +1301,12 @@ static void sd_write_done(struct sd_aiocb *acb)
ret = add_aio_request(s, acb, s->inode.oid, &s->inode,
sizeof(s->inode),
- 0, 0, 0, 0, 1, 0, NULL, 0);
+ 0, 0, 0, 0, 1, 0);
if (ret)
goto new_node_list;
}
- if (!inode_dirty && !epoch_dirty)
+ if (!inode_dirty)
sd_finish_aiocb(acb);
else {
acb->aio_done_func = sd_finish_aiocb;
@@ -1466,7 +1315,7 @@ static void sd_write_done(struct sd_aiocb *acb)
return;
new_node_list:
- update_node_list(s);
+ reset_all_aios(s);
}
static int sd_create_branch(struct bdrv_sd_state *s)
@@ -1490,7 +1339,7 @@ static int sd_create_branch(struct bdrv_sd_state *s)
copies = s->inode.nr_copies;
- ret = read_vdi_obj(buf, oid, node_list_entries, nr_nodes, &copies);
+ ret = read_vdi_obj(buf, oid, &copies);
if (ret < 0)
goto out;
@@ -1576,7 +1425,7 @@ static void sd_write_bh_cb(void *p)
}
ret = add_aio_request(s, acb, oid, buf + done, len, offset, flags, old_oid,
- create, 1, 0, NULL, 0);
+ create, 1, 0);
if (ret < 0) {
eprintf("may be add_aio_request is faled\n");
@@ -1602,7 +1451,7 @@ abort:
sd_schedule_bh(sd_aio_bh_cb, acb);
return;
new_node_list:
- update_node_list(s);
+ reset_all_aios(s);
}
static BlockDriverAIOCB *sd_aio_writev(BlockDriverState *bs,
@@ -1639,7 +1488,7 @@ static void sd_read_done(struct sd_aiocb *acb)
if (oid) {
ret = add_aio_request(s, acb, oid, NULL, len, offset, 0, 0,
- 0, 0, done, NULL, 0);
+ 0, 0, done);
if (ret)
goto new_node_list;
}
@@ -1651,7 +1500,7 @@ static void sd_read_done(struct sd_aiocb *acb)
}
return;
new_node_list:
- update_node_list(s);
+ reset_all_aios(s);
}
static void sd_readv_bh_cb(void *p)
@@ -1794,7 +1643,6 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
char name[SD_MAX_VDI_LEN];
QEMUSnapshotInfo *sn_tab = NULL;
unsigned wlen, rlen;
- char hostname[] = "localhost";
int found = 0;
struct sd_inode inode;
@@ -1805,7 +1653,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
memset(name, 0, sizeof(name));
snprintf(name, sizeof(name), "%s", s->name);
- fd = connect_to_vost(hostname, DOG_PORT);
+ fd = connect_to_vost();
if (fd < 0)
goto out;
@@ -1816,7 +1664,6 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
req.opcode = SD_OP_SO_READ_VDIS;
req.data_length = rlen;
- req.epoch = s_epoch;
ret = do_req(fd, (struct sd_req *)&req, vi, &wlen, &rlen);
@@ -1841,8 +1688,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
if (strcmp(vi[i].name, s->name) || !vi[i].id)
continue;
- ret = read_vdi_obj((char *)&inode, vi[i].oid, node_list_entries,
- nr_nodes, &copies);
+ ret = read_vdi_obj((char *)&inode, vi[i].oid, &copies);
if (ret)
continue;
@@ -1897,11 +1743,6 @@ BlockDriver bdrv_sheepdog = {
static void bdrv_sheepdog_init(void)
{
- int size = FD_SETSIZE * sizeof(struct sheepdog_node_list_entry);
-
- node_list_entries = malloc(size);
- memset(node_list_entries, 0, size);
-
bdrv_register(&bdrv_sheepdog);
}
block_init(bdrv_sheepdog_init);
--
1.5.6.5
More information about the sheepdog
mailing list