[Sheepdog] [PATCH 2/7] sheep: fixing using large stack area
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Thu Aug 4 10:40:56 CEST 2011
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/store.c | 120 +++++++++++++++++++++++++++++++++++++++-----------------
sheep/vdi.c | 103 ++++++++++++++++++++++++++++++++++++-------------
2 files changed, 159 insertions(+), 64 deletions(-)
diff --git a/sheep/store.c b/sheep/store.c
index cdf6a94..9318eed 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -519,10 +519,14 @@ int update_epoch_store(uint32_t epoch)
int write_object_local(uint64_t oid, char *data, unsigned int datalen,
uint64_t offset, int copies, uint32_t epoch, int create)
{
- struct request req;
- struct sd_obj_req *hdr = (struct sd_obj_req *)&req.rq;
+ int ret;
+ struct request *req;
+ struct sd_obj_req *hdr;
- memset(&req, 0, sizeof(req));
+ req = zalloc(sizeof(*req));
+ if (!req)
+ return SD_RES_NO_MEM;
+ hdr = (struct sd_obj_req *)&req->rq;
hdr->oid = oid;
if (create)
@@ -533,20 +537,29 @@ int write_object_local(uint64_t oid, char *data, unsigned int datalen,
hdr->flags = SD_FLAG_CMD_WRITE;
hdr->offset = offset;
hdr->data_length = datalen;
- req.data = data;
+ req->data = data;
+
+ ret = store_queue_request_local(req, epoch);
- return store_queue_request_local(&req, epoch);
+ free(req);
+
+ return ret;
}
int read_object_local(uint64_t oid, char *data, unsigned int datalen,
uint64_t offset, int copies, uint32_t epoch)
{
int ret;
- struct request req;
- struct sd_obj_req *hdr = (struct sd_obj_req *)&req.rq;
- struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&req.rp;
+ struct request *req;
+ struct sd_obj_req *hdr;
+ struct sd_obj_rsp *rsp;
+ unsigned int rsp_data_length;
- memset(&req, 0, sizeof(req));
+ req = zalloc(sizeof(*req));
+ if (!req)
+ return -SD_RES_NO_MEM;
+ hdr = (struct sd_obj_req *)&req->rq;
+ rsp = (struct sd_obj_rsp *)&req->rp;
hdr->oid = oid;
hdr->opcode = SD_OP_READ_OBJ;
@@ -554,17 +567,20 @@ int read_object_local(uint64_t oid, char *data, unsigned int datalen,
hdr->flags = 0;
hdr->offset = offset;
hdr->data_length = datalen;
- req.data = data;
+ req->data = data;
- ret = store_queue_request_local(&req, epoch);
+ ret = store_queue_request_local(req, epoch);
+
+ rsp_data_length = rsp->data_length;
+ free(req);
if (ret != 0)
return -ret;
- if (rsp->data_length != datalen)
+ if (rsp_data_length != datalen)
return -SD_RES_EIO;
- return rsp->data_length;
+ return rsp_data_length;
}
static int store_queue_request_local(struct request *req, uint32_t epoch)
@@ -1195,12 +1211,19 @@ static int __recover_one(struct recovery_work *rw,
char name[128];
unsigned wlen = 0, rlen;
int fd, ret;
- struct sheepdog_vnode_list_entry old_entry[SD_MAX_VNODES],
- cur_entry[SD_MAX_VNODES], next_entry[SD_MAX_VNODES];
+ struct sheepdog_vnode_list_entry *old_entry, *cur_entry, *next_entry;
int next_nr, next_copies;
int tgt_idx = -1;
int old_idx;
+ old_entry = malloc(sizeof(*old_entry) * SD_MAX_VNODES);
+ cur_entry = malloc(sizeof(*cur_entry) * SD_MAX_VNODES);
+ next_entry = malloc(sizeof(*next_entry) * SD_MAX_VNODES);
+ if (!old_entry || !cur_entry || !next_entry) {
+ eprintf("oom\n");
+ goto err;
+ }
+
memcpy(old_entry, _old_entry, sizeof(*old_entry) * old_nr);
memcpy(cur_entry, _cur_entry, sizeof(*cur_entry) * cur_nr);
next:
@@ -1211,7 +1234,7 @@ next:
cur_entry, cur_nr, cur_idx, cur_copies, copy_idx);
if (tgt_idx < 0) {
eprintf("cannot find target node, %"PRIx64"\n", oid);
- return -1;
+ goto err;
}
e = old_entry + tgt_idx;
@@ -1224,13 +1247,13 @@ next:
epoch, oid);
dprintf("link from %s to %s\n", old, new);
if (link(old, new) == 0)
- return 0;
+ goto out;
if (errno == ENOENT) {
next_nr = epoch_log_read(tgt_epoch - 1, buf, buf_len);
if (next_nr <= 0) {
eprintf("no previous epoch, %"PRIu32"\n", tgt_epoch - 1);
- return -1;
+ goto err;
}
next_nr /= sizeof(struct sheepdog_node_list_entry);
next_copies = get_max_copies((struct sheepdog_node_list_entry *)buf,
@@ -1241,7 +1264,7 @@ next:
}
eprintf("cannot recover from local, %s, %s\n", old, new);
- return -1;
+ goto err;
}
addr_to_str(name, sizeof(name), e->addr, 0);
@@ -1249,7 +1272,7 @@ next:
fd = connect_to(name, e->port);
if (fd < 0) {
eprintf("failed to connect to %s:%"PRIu32"\n", name, e->port);
- return -1;
+ goto err;
}
if (is_vdi_obj(oid))
@@ -1273,7 +1296,7 @@ next:
if (ret < 0) {
eprintf("%"PRIu32"\n", rsp->result);
- return -1;
+ goto err;
}
rsp = (struct sd_obj_rsp *)&hdr;
@@ -1290,20 +1313,20 @@ next:
fd = open(tmp_path, flags, def_fmode);
if (fd < 0) {
eprintf("failed to open %s, %s\n", tmp_path, strerror(errno));
- return -1;
+ goto err;
}
ret = write(fd, buf, rlen);
if (ret != rlen) {
eprintf("failed to write object\n");
- return -1;
+ goto err;
}
ret = fsetxattr(fd, ANAME_COPIES, &rsp->copies,
sizeof(rsp->copies), 0);
if (ret) {
eprintf("couldn't set xattr\n");
- return -1;
+ goto err;
}
close(fd);
@@ -1312,22 +1335,22 @@ next:
ret = rename(tmp_path, path);
if (ret < 0) {
eprintf("failed to rename %s to %s, %m\n", tmp_path, path);
- return -1;
+ goto err;
}
dprintf("recovered oid %"PRIx64" to epoch %"PRIu32"\n", oid, epoch);
- return 0;
+ goto out;
}
if (rsp->result == SD_RES_NEW_NODE_VER || rsp->result == SD_RES_OLD_NODE_VER
|| rsp->result == SD_RES_NETWORK_ERROR) {
eprintf("try again, %"PRIu32", %"PRIx64"\n", rsp->result, oid);
rw->retry = 1;
- return 0;
+ goto out;
}
if (rsp->result != SD_RES_NO_OBJ || rsp->data_length == 0) {
eprintf("%"PRIu32"\n", rsp->result);
- return -1;
+ goto err;
}
next_nr = rsp->data_length / sizeof(struct sheepdog_node_list_entry);
next_copies = get_max_copies((struct sheepdog_node_list_entry *)buf, next_nr);
@@ -1340,7 +1363,7 @@ not_found:
break;
if (copy_idx == old_copies) {
eprintf("bug: cannot find the proper copy_idx\n");
- return -1;
+ goto err;
}
dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", rsp->result, rsp->data_length, tgt_idx,
@@ -1356,6 +1379,16 @@ not_found:
tgt_epoch--;
goto next;
+out:
+ free(old_entry);
+ free(cur_entry);
+ free(next_entry);
+ return 0;
+err:
+ free(old_entry);
+ free(cur_entry);
+ free(next_entry);
+ return -1;
}
static void recover_one(struct work *work, int idx)
@@ -1364,10 +1397,8 @@ static void recover_one(struct work *work, int idx)
char *buf = NULL;
int ret;
uint64_t oid = rw->oids[rw->done];
- struct sheepdog_node_list_entry old_nodes[SD_MAX_NODES];
- struct sheepdog_node_list_entry cur_nodes[SD_MAX_NODES];
- struct sheepdog_vnode_list_entry old_vnodes[SD_MAX_VNODES];
- struct sheepdog_vnode_list_entry cur_vnodes[SD_MAX_VNODES];
+ struct sheepdog_node_list_entry *old_nodes, *cur_nodes;
+ struct sheepdog_vnode_list_entry *old_vnodes, *cur_vnodes;
int old_nr_nodes, cur_nr_nodes, old_nr_vnodes, cur_nr_vnodes;
int old_copies, cur_copies;
uint32_t epoch = rw->epoch;
@@ -1376,6 +1407,15 @@ static void recover_one(struct work *work, int idx)
eprintf("%"PRIu32" %"PRIu32", %16"PRIx64"\n", rw->done, rw->count, oid);
+ old_nodes = malloc(sizeof(*old_nodes) * SD_MAX_NODES);
+ cur_nodes = malloc(sizeof(*cur_nodes) * SD_MAX_NODES);
+ old_vnodes = malloc(sizeof(*old_vnodes) * SD_MAX_VNODES);
+ cur_vnodes = malloc(sizeof(*cur_vnodes) * SD_MAX_VNODES);
+ if (!old_nodes || !cur_nodes || !old_vnodes || !cur_vnodes) {
+ eprintf("oom\n");
+ goto out;
+ }
+
fd = ob_open(epoch, oid, 0, &ret);
if (fd != -1) {
/* the object is already recovered */
@@ -1451,8 +1491,11 @@ static void recover_one(struct work *work, int idx)
fail:
eprintf("failed to recover object %"PRIx64"\n", oid);
out:
- if (buf)
- free(buf);
+ free(old_nodes);
+ free(cur_nodes);
+ free(old_vnodes);
+ free(cur_vnodes);
+ free(buf);
}
static struct recovery_work *suspended_recovery_work;
@@ -1685,11 +1728,12 @@ static int fill_obj_list(struct recovery_work *rw,
int i, j;
uint8_t *buf = NULL;
size_t buf_size = SD_DATA_OBJ_SIZE; /* FIXME */
- struct sheepdog_vnode_list_entry vnodes[SD_MAX_VNODES];
+ struct sheepdog_vnode_list_entry *vnodes;
int nr_vnodes, retry_cnt = 0;
+ vnodes = malloc(sizeof(*vnodes) * SD_MAX_VNODES);
buf = malloc(buf_size);
- if (!buf)
+ if (!buf || !vnodes)
goto fail;
nr_vnodes = nodes_to_vnodes(cur_entry, cur_nr, vnodes);
@@ -1722,9 +1766,11 @@ static int fill_obj_list(struct recovery_work *rw,
rw->count, (uint64_t *)buf, nr, nr_objs);
}
+ free(vnodes);
free(buf);
return 0;
fail:
+ free(vnodes);
free(buf);
rw->retry = 1;
return -1;
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 3655005..4291416 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -21,17 +21,25 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t
uint32_t base_vid, uint32_t cur_vid, uint32_t copies,
uint32_t snapid, int is_snapshot)
{
- struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+ struct sheepdog_vnode_list_entry *entries;
/* we are not called concurrently */
struct sheepdog_inode *new = NULL, *base = NULL, *cur = NULL;
struct timeval tv;
int ret, nr_vnodes, nr_zones;
unsigned long block_size = SD_DATA_OBJ_SIZE;
+ entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
+ if (!entries) {
+ eprintf("oom\n");
+ ret = SD_RES_NO_MEM;
+ goto out;
+ }
+
new = zalloc(sizeof(*new));
if (!new) {
eprintf("oom\n");
- return SD_RES_NO_MEM;
+ ret = SD_RES_NO_MEM;
+ goto out;
}
if (base_vid) {
@@ -141,6 +149,7 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t
if (ret != 0)
ret = SD_RES_VDI_WRITE;
out:
+ free(entries);
free(new);
free(cur);
free(base);
@@ -152,14 +161,15 @@ static int find_first_vdi(uint32_t epoch, unsigned long start, unsigned long end
unsigned long *deleted_nr, uint32_t *next_snap,
unsigned int *nr_copies)
{
- struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+ struct sheepdog_vnode_list_entry *entries;
struct sheepdog_inode *inode = NULL;
unsigned long i;
int nr_vnodes, nr_zones, nr_reqs;
int ret, vdi_found = 0;
+ entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
inode = malloc(SD_INODE_HEADER_SIZE);
- if (!inode) {
+ if (!inode || !entries) {
eprintf("oom\n");
ret = SD_RES_NO_MEM;
goto out;
@@ -207,6 +217,7 @@ static int find_first_vdi(uint32_t epoch, unsigned long start, unsigned long end
ret = SD_RES_NO_VDI;
out:
free(inode);
+ free(entries);
return ret;
}
@@ -342,12 +353,13 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid,
uint32_t dummy0;
unsigned long dummy1, dummy2;
int ret;
- struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+ struct sheepdog_vnode_list_entry *entries;
int nr_vnodes, nr_zones, nr_reqs;
struct sheepdog_inode *inode = NULL;
inode = malloc(SD_INODE_HEADER_SIZE);
- if (!inode) {
+ entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
+ if (!inode || !entries) {
eprintf("oom\n");
ret = SD_RES_NO_MEM;
goto out;
@@ -393,6 +405,7 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid,
ret = start_deletion(*vid, epoch);
out:
free(inode);
+ free(entries);
return ret;
}
@@ -427,15 +440,16 @@ static void delete_one(struct work *work, int idx)
{
struct deletion_work *dw = container_of(work, struct deletion_work, work);
uint32_t vdi_id = *(((uint32_t *)dw->buf) + dw->count - dw->done - 1);
- struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+ struct sheepdog_vnode_list_entry *entries;
int nr_vnodes, nr_zones;
int ret, i;
struct sheepdog_inode *inode = NULL;
eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
+ entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
inode = malloc(sizeof(*inode));
- if (!inode) {
+ if (!inode || !entries) {
eprintf("oom\n");
goto out;
}
@@ -465,6 +479,7 @@ static void delete_one(struct work *work, int idx)
inode->nr_copies);
}
out:
+ free(entries);
free(inode);
}
@@ -576,19 +591,27 @@ out:
int start_deletion(uint32_t vid, uint32_t epoch)
{
- struct deletion_work *dw;
- struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+ struct deletion_work *dw = NULL;
+ struct sheepdog_vnode_list_entry *entries;
int nr_vnodes, nr_zones, ret;
uint32_t root_vid;
+ entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
+ if (!entries) {
+ eprintf("oom\n");
+ ret = SD_RES_NO_MEM;
+ goto err;
+ }
dw = zalloc(sizeof(struct deletion_work));
- if (!dw)
- return SD_RES_NO_MEM;
+ if (!dw) {
+ ret = SD_RES_NO_MEM;
+ goto err;
+ }
dw->buf = zalloc(1 << 20); /* FIXME: handle larger buffer */
if (!dw->buf) {
- free(dw);
- return SD_RES_NO_MEM;
+ ret = SD_RES_NO_MEM;
+ goto err;
}
dw->count = 0;
@@ -601,8 +624,10 @@ int start_deletion(uint32_t vid, uint32_t epoch)
get_ordered_sd_vnode_list(entries, &nr_vnodes, &nr_zones);
root_vid = get_vdi_root(entries, nr_vnodes, nr_zones, dw->epoch, dw->vid);
- if (!root_vid)
- return SD_RES_EIO;
+ if (!root_vid) {
+ ret = SD_RES_EIO;
+ goto err;
+ }
ret = fill_vdi_list(dw, entries, nr_vnodes, nr_zones, root_vid);
if (ret)
@@ -611,30 +636,48 @@ int start_deletion(uint32_t vid, uint32_t epoch)
dprintf("%d\n", dw->count);
if (dw->count == 0)
- return SD_RES_SUCCESS;
+ goto out;
if (!list_empty(&deletion_work_list)) {
list_add_tail(&dw->dw_siblings, &deletion_work_list);
- return SD_RES_SUCCESS;
+ goto out;
}
list_add_tail(&dw->dw_siblings, &deletion_work_list);
queue_work(&dw->work);
+out:
+ free(entries);
return SD_RES_SUCCESS;
+err:
+ free(entries);
+ if (dw)
+ free(dw->buf);
+ free(dw);
+
+ return ret;
}
int get_vdi_attr(uint32_t epoch, char *data, int data_len, uint32_t vid,
uint32_t *attrid, int copies, int creat, int excl)
{
- struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+ struct sheepdog_vnode_list_entry *entries;
char attr_buf[SD_ATTR_HEADER_SIZE];
uint64_t oid;
uint32_t end;
int ret, nr_zones, nr_vnodes;
- if (data_len != SD_ATTR_HEADER_SIZE)
- return SD_RES_INVALID_PARMS;
+ entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
+ if (!entries) {
+ eprintf("oom\n");
+ ret = SD_RES_NO_MEM;
+ goto out;
+ }
+
+ if (data_len != SD_ATTR_HEADER_SIZE) {
+ ret = SD_RES_INVALID_PARMS;
+ goto out;
+ }
get_ordered_sd_vnode_list(entries, &nr_vnodes, &nr_zones);
@@ -651,9 +694,10 @@ int get_vdi_attr(uint32_t epoch, char *data, int data_len, uint32_t vid,
ret = write_object(entries, nr_vnodes, nr_zones, epoch, oid, data,
data_len, 0, copies, 1);
if (ret)
- return SD_RES_EIO;
-
- return SD_RES_SUCCESS;
+ ret = SD_RES_EIO;
+ else
+ ret = SD_RES_SUCCESS;
+ goto out;
}
if (ret < 0)
@@ -661,14 +705,19 @@ int get_vdi_attr(uint32_t epoch, char *data, int data_len, uint32_t vid,
if (memcmp(attr_buf, data, sizeof(attr_buf)) == 0) {
if (excl)
- return SD_RES_VDI_EXIST;
+ ret = SD_RES_VDI_EXIST;
else
- return SD_RES_SUCCESS;
+ ret = SD_RES_SUCCESS;
+ goto out;
}
(*attrid)++;
}
dprintf("there is no space for new vdis\n");
- return SD_RES_FULL_VDI;
+ ret = SD_RES_FULL_VDI;
+out:
+ free(entries);
+
+ return ret;
}
--
1.7.2.5
More information about the sheepdog
mailing list