[Sheepdog] [PATCH 2/7] sheep: fixing using large stack area

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Thu Aug 4 10:40:56 CEST 2011


Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/store.c |  120 +++++++++++++++++++++++++++++++++++++++-----------------
 sheep/vdi.c   |  103 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 159 insertions(+), 64 deletions(-)

diff --git a/sheep/store.c b/sheep/store.c
index cdf6a94..9318eed 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -519,10 +519,14 @@ int update_epoch_store(uint32_t epoch)
 int write_object_local(uint64_t oid, char *data, unsigned int datalen,
 		       uint64_t offset, int copies, uint32_t epoch, int create)
 {
-	struct request req;
-	struct sd_obj_req *hdr = (struct sd_obj_req *)&req.rq;
+	int ret;
+	struct request *req;
+	struct sd_obj_req *hdr;
 
-	memset(&req, 0, sizeof(req));
+	req = zalloc(sizeof(*req));
+	if (!req)
+		return SD_RES_NO_MEM;
+	hdr = (struct sd_obj_req *)&req->rq;
 
 	hdr->oid = oid;
 	if (create)
@@ -533,20 +537,29 @@ int write_object_local(uint64_t oid, char *data, unsigned int datalen,
 	hdr->flags = SD_FLAG_CMD_WRITE;
 	hdr->offset = offset;
 	hdr->data_length = datalen;
-	req.data = data;
+	req->data = data;
+
+	ret = store_queue_request_local(req, epoch);
 
-	return store_queue_request_local(&req, epoch);
+	free(req);
+
+	return ret;
 }
 
 int read_object_local(uint64_t oid, char *data, unsigned int datalen,
 		      uint64_t offset, int copies, uint32_t epoch)
 {
 	int ret;
-	struct request req;
-	struct sd_obj_req *hdr = (struct sd_obj_req *)&req.rq;
-	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&req.rp;
+	struct request *req;
+	struct sd_obj_req *hdr;
+	struct sd_obj_rsp *rsp;
+	unsigned int rsp_data_length;
 
-	memset(&req, 0, sizeof(req));
+	req = zalloc(sizeof(*req));
+	if (!req)
+		return -SD_RES_NO_MEM;
+	hdr = (struct sd_obj_req *)&req->rq;
+	rsp = (struct sd_obj_rsp *)&req->rp;
 
 	hdr->oid = oid;
 	hdr->opcode = SD_OP_READ_OBJ;
@@ -554,17 +567,20 @@ int read_object_local(uint64_t oid, char *data, unsigned int datalen,
 	hdr->flags = 0;
 	hdr->offset = offset;
 	hdr->data_length = datalen;
-	req.data = data;
+	req->data = data;
 
-	ret = store_queue_request_local(&req, epoch);
+	ret = store_queue_request_local(req, epoch);
+
+	rsp_data_length = rsp->data_length;
+	free(req);
 
 	if (ret != 0)
 		return -ret;
 
-	if (rsp->data_length != datalen)
+	if (rsp_data_length != datalen)
 		return -SD_RES_EIO;
 
-	return rsp->data_length;
+	return rsp_data_length;
 }
 
 static int store_queue_request_local(struct request *req, uint32_t epoch)
@@ -1195,12 +1211,19 @@ static int __recover_one(struct recovery_work *rw,
 	char name[128];
 	unsigned wlen = 0, rlen;
 	int fd, ret;
-	struct sheepdog_vnode_list_entry old_entry[SD_MAX_VNODES],
-		cur_entry[SD_MAX_VNODES], next_entry[SD_MAX_VNODES];
+	struct sheepdog_vnode_list_entry *old_entry, *cur_entry, *next_entry;
 	int next_nr, next_copies;
 	int tgt_idx = -1;
 	int old_idx;
 
+	old_entry = malloc(sizeof(*old_entry) * SD_MAX_VNODES);
+	cur_entry = malloc(sizeof(*cur_entry) * SD_MAX_VNODES);
+	next_entry = malloc(sizeof(*next_entry) * SD_MAX_VNODES);
+	if (!old_entry || !cur_entry || !next_entry) {
+		eprintf("oom\n");
+		goto err;
+	}
+
 	memcpy(old_entry, _old_entry, sizeof(*old_entry) * old_nr);
 	memcpy(cur_entry, _cur_entry, sizeof(*cur_entry) * cur_nr);
 next:
@@ -1211,7 +1234,7 @@ next:
 				cur_entry, cur_nr, cur_idx, cur_copies, copy_idx);
 	if (tgt_idx < 0) {
 		eprintf("cannot find target node, %"PRIx64"\n", oid);
-		return -1;
+		goto err;
 	}
 	e = old_entry + tgt_idx;
 
@@ -1224,13 +1247,13 @@ next:
 			 epoch, oid);
 		dprintf("link from %s to %s\n", old, new);
 		if (link(old, new) == 0)
-			return 0;
+			goto out;
 
 		if (errno == ENOENT) {
 			next_nr = epoch_log_read(tgt_epoch - 1, buf, buf_len);
 			if (next_nr <= 0) {
 				eprintf("no previous epoch, %"PRIu32"\n", tgt_epoch - 1);
-				return -1;
+				goto err;
 			}
 			next_nr /= sizeof(struct sheepdog_node_list_entry);
 			next_copies = get_max_copies((struct sheepdog_node_list_entry *)buf,
@@ -1241,7 +1264,7 @@ next:
 		}
 
 		eprintf("cannot recover from local, %s, %s\n", old, new);
-		return -1;
+		goto err;
 	}
 
 	addr_to_str(name, sizeof(name), e->addr, 0);
@@ -1249,7 +1272,7 @@ next:
 	fd = connect_to(name, e->port);
 	if (fd < 0) {
 		eprintf("failed to connect to %s:%"PRIu32"\n", name, e->port);
-		return -1;
+		goto err;
 	}
 
 	if (is_vdi_obj(oid))
@@ -1273,7 +1296,7 @@ next:
 
 	if (ret < 0) {
 		eprintf("%"PRIu32"\n", rsp->result);
-		return -1;
+		goto err;
 	}
 
 	rsp = (struct sd_obj_rsp *)&hdr;
@@ -1290,20 +1313,20 @@ next:
 		fd = open(tmp_path, flags, def_fmode);
 		if (fd < 0) {
 			eprintf("failed to open %s, %s\n", tmp_path, strerror(errno));
-			return -1;
+			goto err;
 		}
 
 		ret = write(fd, buf, rlen);
 		if (ret != rlen) {
 			eprintf("failed to write object\n");
-			return -1;
+			goto err;
 		}
 
 		ret = fsetxattr(fd, ANAME_COPIES, &rsp->copies,
 				sizeof(rsp->copies), 0);
 		if (ret) {
 			eprintf("couldn't set xattr\n");
-			return -1;
+			goto err;
 		}
 
 		close(fd);
@@ -1312,22 +1335,22 @@ next:
 		ret = rename(tmp_path, path);
 		if (ret < 0) {
 			eprintf("failed to rename %s to %s, %m\n", tmp_path, path);
-			return -1;
+			goto err;
 		}
 		dprintf("recovered oid %"PRIx64" to epoch %"PRIu32"\n", oid, epoch);
-		return 0;
+		goto out;
 	}
 
 	if (rsp->result == SD_RES_NEW_NODE_VER || rsp->result == SD_RES_OLD_NODE_VER
 	    || rsp->result == SD_RES_NETWORK_ERROR) {
 		eprintf("try again, %"PRIu32", %"PRIx64"\n", rsp->result, oid);
 		rw->retry = 1;
-		return 0;
+		goto out;
 	}
 
 	if (rsp->result != SD_RES_NO_OBJ || rsp->data_length == 0) {
 		eprintf("%"PRIu32"\n", rsp->result);
-		return -1;
+		goto err;
 	}
 	next_nr = rsp->data_length / sizeof(struct sheepdog_node_list_entry);
 	next_copies = get_max_copies((struct sheepdog_node_list_entry *)buf, next_nr);
@@ -1340,7 +1363,7 @@ not_found:
 			break;
 	if (copy_idx == old_copies) {
 		eprintf("bug: cannot find the proper copy_idx\n");
-		return -1;
+		goto err;
 	}
 
 	dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", rsp->result, rsp->data_length, tgt_idx,
@@ -1356,6 +1379,16 @@ not_found:
 
 	tgt_epoch--;
 	goto next;
+out:
+	free(old_entry);
+	free(cur_entry);
+	free(next_entry);
+	return 0;
+err:
+	free(old_entry);
+	free(cur_entry);
+	free(next_entry);
+	return -1;
 }
 
 static void recover_one(struct work *work, int idx)
@@ -1364,10 +1397,8 @@ static void recover_one(struct work *work, int idx)
 	char *buf = NULL;
 	int ret;
 	uint64_t oid = rw->oids[rw->done];
-	struct sheepdog_node_list_entry old_nodes[SD_MAX_NODES];
-	struct sheepdog_node_list_entry cur_nodes[SD_MAX_NODES];
-	struct sheepdog_vnode_list_entry old_vnodes[SD_MAX_VNODES];
-	struct sheepdog_vnode_list_entry cur_vnodes[SD_MAX_VNODES];
+	struct sheepdog_node_list_entry *old_nodes, *cur_nodes;
+	struct sheepdog_vnode_list_entry *old_vnodes, *cur_vnodes;
 	int old_nr_nodes, cur_nr_nodes, old_nr_vnodes, cur_nr_vnodes;
 	int old_copies, cur_copies;
 	uint32_t epoch = rw->epoch;
@@ -1376,6 +1407,15 @@ static void recover_one(struct work *work, int idx)
 
 	eprintf("%"PRIu32" %"PRIu32", %16"PRIx64"\n", rw->done, rw->count, oid);
 
+	old_nodes = malloc(sizeof(*old_nodes) * SD_MAX_NODES);
+	cur_nodes = malloc(sizeof(*cur_nodes) * SD_MAX_NODES);
+	old_vnodes = malloc(sizeof(*old_vnodes) * SD_MAX_VNODES);
+	cur_vnodes = malloc(sizeof(*cur_vnodes) * SD_MAX_VNODES);
+	if (!old_nodes || !cur_nodes || !old_vnodes || !cur_vnodes) {
+		eprintf("oom\n");
+		goto out;
+	}
+
 	fd = ob_open(epoch, oid, 0, &ret);
 	if (fd != -1) {
 		/* the object is already recovered */
@@ -1451,8 +1491,11 @@ static void recover_one(struct work *work, int idx)
 fail:
 	eprintf("failed to recover object %"PRIx64"\n", oid);
 out:
-	if (buf)
-		free(buf);
+	free(old_nodes);
+	free(cur_nodes);
+	free(old_vnodes);
+	free(cur_vnodes);
+	free(buf);
 }
 
 static struct recovery_work *suspended_recovery_work;
@@ -1685,11 +1728,12 @@ static int fill_obj_list(struct recovery_work *rw,
 	int i, j;
 	uint8_t *buf = NULL;
 	size_t buf_size = SD_DATA_OBJ_SIZE; /* FIXME */
-	struct sheepdog_vnode_list_entry vnodes[SD_MAX_VNODES];
+	struct sheepdog_vnode_list_entry *vnodes;
 	int nr_vnodes, retry_cnt = 0;
 
+	vnodes = malloc(sizeof(*vnodes) * SD_MAX_VNODES);
 	buf = malloc(buf_size);
-	if (!buf)
+	if (!buf || !vnodes)
 		goto fail;
 
 	nr_vnodes = nodes_to_vnodes(cur_entry, cur_nr, vnodes);
@@ -1722,9 +1766,11 @@ static int fill_obj_list(struct recovery_work *rw,
 					  rw->count, (uint64_t *)buf, nr, nr_objs);
 	}
 
+	free(vnodes);
 	free(buf);
 	return 0;
 fail:
+	free(vnodes);
 	free(buf);
 	rw->retry = 1;
 	return -1;
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 3655005..4291416 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -21,17 +21,25 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t
 			  uint32_t base_vid, uint32_t cur_vid, uint32_t copies,
 			  uint32_t snapid, int is_snapshot)
 {
-	struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+	struct sheepdog_vnode_list_entry *entries;
 	/* we are not called concurrently */
 	struct sheepdog_inode *new = NULL, *base = NULL, *cur = NULL;
 	struct timeval tv;
 	int ret, nr_vnodes, nr_zones;
 	unsigned long block_size = SD_DATA_OBJ_SIZE;
 
+	entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
+	if (!entries) {
+		eprintf("oom\n");
+		ret = SD_RES_NO_MEM;
+		goto out;
+	}
+
 	new = zalloc(sizeof(*new));
 	if (!new) {
 		eprintf("oom\n");
-		return SD_RES_NO_MEM;
+		ret = SD_RES_NO_MEM;
+		goto out;
 	}
 
 	if (base_vid) {
@@ -141,6 +149,7 @@ static int create_vdi_obj(uint32_t epoch, char *name, uint32_t new_vid, uint64_t
 	if (ret != 0)
 		ret = SD_RES_VDI_WRITE;
 out:
+	free(entries);
 	free(new);
 	free(cur);
 	free(base);
@@ -152,14 +161,15 @@ static int find_first_vdi(uint32_t epoch, unsigned long start, unsigned long end
 			  unsigned long *deleted_nr, uint32_t *next_snap,
 			  unsigned int *nr_copies)
 {
-	struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+	struct sheepdog_vnode_list_entry *entries;
 	struct sheepdog_inode *inode = NULL;
 	unsigned long i;
 	int nr_vnodes, nr_zones, nr_reqs;
 	int ret, vdi_found = 0;
 
+	entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
 	inode = malloc(SD_INODE_HEADER_SIZE);
-	if (!inode) {
+	if (!inode || !entries) {
 		eprintf("oom\n");
 		ret = SD_RES_NO_MEM;
 		goto out;
@@ -207,6 +217,7 @@ static int find_first_vdi(uint32_t epoch, unsigned long start, unsigned long end
 		ret = SD_RES_NO_VDI;
 out:
 	free(inode);
+	free(entries);
 
 	return ret;
 }
@@ -342,12 +353,13 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid,
 	uint32_t dummy0;
 	unsigned long dummy1, dummy2;
 	int ret;
-	struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+	struct sheepdog_vnode_list_entry *entries;
 	int nr_vnodes, nr_zones, nr_reqs;
 	struct sheepdog_inode *inode = NULL;
 
 	inode = malloc(SD_INODE_HEADER_SIZE);
-	if (!inode) {
+	entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
+	if (!inode || !entries) {
 		eprintf("oom\n");
 		ret = SD_RES_NO_MEM;
 		goto out;
@@ -393,6 +405,7 @@ int del_vdi(uint32_t epoch, char *data, int data_len, uint32_t *vid,
 	ret = start_deletion(*vid, epoch);
 out:
 	free(inode);
+	free(entries);
 
 	return ret;
 }
@@ -427,15 +440,16 @@ static void delete_one(struct work *work, int idx)
 {
 	struct deletion_work *dw = container_of(work, struct deletion_work, work);
 	uint32_t vdi_id = *(((uint32_t *)dw->buf) + dw->count - dw->done - 1);
-	struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+	struct sheepdog_vnode_list_entry *entries;
 	int nr_vnodes, nr_zones;
 	int ret, i;
 	struct sheepdog_inode *inode = NULL;
 
 	eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
 
+	entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
 	inode = malloc(sizeof(*inode));
-	if (!inode) {
+	if (!inode || !entries) {
 		eprintf("oom\n");
 		goto out;
 	}
@@ -465,6 +479,7 @@ static void delete_one(struct work *work, int idx)
 			      inode->nr_copies);
 	}
 out:
+	free(entries);
 	free(inode);
 }
 
@@ -576,19 +591,27 @@ out:
 
 int start_deletion(uint32_t vid, uint32_t epoch)
 {
-	struct deletion_work *dw;
-	struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+	struct deletion_work *dw = NULL;
+	struct sheepdog_vnode_list_entry *entries;
 	int nr_vnodes, nr_zones, ret;
 	uint32_t root_vid;
 
+	entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
+	if (!entries) {
+		eprintf("oom\n");
+		ret = SD_RES_NO_MEM;
+		goto err;
+	}
 	dw = zalloc(sizeof(struct deletion_work));
-	if (!dw)
-		return SD_RES_NO_MEM;
+	if (!dw) {
+		ret = SD_RES_NO_MEM;
+		goto err;
+	}
 
 	dw->buf = zalloc(1 << 20); /* FIXME: handle larger buffer */
 	if (!dw->buf) {
-		free(dw);
-		return SD_RES_NO_MEM;
+		ret = SD_RES_NO_MEM;
+		goto err;
 	}
 
 	dw->count = 0;
@@ -601,8 +624,10 @@ int start_deletion(uint32_t vid, uint32_t epoch)
 	get_ordered_sd_vnode_list(entries, &nr_vnodes, &nr_zones);
 
 	root_vid = get_vdi_root(entries, nr_vnodes, nr_zones, dw->epoch, dw->vid);
-	if (!root_vid)
-		return SD_RES_EIO;
+	if (!root_vid) {
+		ret = SD_RES_EIO;
+		goto err;
+	}
 
 	ret = fill_vdi_list(dw, entries, nr_vnodes, nr_zones, root_vid);
 	if (ret)
@@ -611,30 +636,48 @@ int start_deletion(uint32_t vid, uint32_t epoch)
 	dprintf("%d\n", dw->count);
 
 	if (dw->count == 0)
-		return SD_RES_SUCCESS;
+		goto out;
 
 	if (!list_empty(&deletion_work_list)) {
 		list_add_tail(&dw->dw_siblings, &deletion_work_list);
-		return SD_RES_SUCCESS;
+		goto out;
 	}
 
 	list_add_tail(&dw->dw_siblings, &deletion_work_list);
 	queue_work(&dw->work);
+out:
+	free(entries);
 
 	return SD_RES_SUCCESS;
+err:
+	free(entries);
+	if (dw)
+		free(dw->buf);
+	free(dw);
+
+	return ret;
 }
 
 int get_vdi_attr(uint32_t epoch, char *data, int data_len, uint32_t vid,
 		 uint32_t *attrid, int copies, int creat, int excl)
 {
-	struct sheepdog_vnode_list_entry entries[SD_MAX_VNODES];
+	struct sheepdog_vnode_list_entry *entries;
 	char attr_buf[SD_ATTR_HEADER_SIZE];
 	uint64_t oid;
 	uint32_t end;
 	int ret, nr_zones, nr_vnodes;
 
-	if (data_len != SD_ATTR_HEADER_SIZE)
-		return SD_RES_INVALID_PARMS;
+	entries = malloc(sizeof(*entries) * SD_MAX_VNODES);
+	if (!entries) {
+		eprintf("oom\n");
+		ret = SD_RES_NO_MEM;
+		goto out;
+	}
+
+	if (data_len != SD_ATTR_HEADER_SIZE) {
+		ret = SD_RES_INVALID_PARMS;
+		goto out;
+	}
 
 	get_ordered_sd_vnode_list(entries, &nr_vnodes, &nr_zones);
 
@@ -651,9 +694,10 @@ int get_vdi_attr(uint32_t epoch, char *data, int data_len, uint32_t vid,
 			ret = write_object(entries, nr_vnodes, nr_zones, epoch, oid, data,
 					   data_len, 0, copies, 1);
 			if (ret)
-				return SD_RES_EIO;
-
-			return SD_RES_SUCCESS;
+				ret = SD_RES_EIO;
+			else
+				ret = SD_RES_SUCCESS;
+			goto out;
 		}
 
 		if (ret < 0)
@@ -661,14 +705,19 @@ int get_vdi_attr(uint32_t epoch, char *data, int data_len, uint32_t vid,
 
 		if (memcmp(attr_buf, data, sizeof(attr_buf)) == 0) {
 			if (excl)
-				return SD_RES_VDI_EXIST;
+				ret = SD_RES_VDI_EXIST;
 			else
-				return SD_RES_SUCCESS;
+				ret = SD_RES_SUCCESS;
+			goto out;
 		}
 
 		(*attrid)++;
 	}
 
 	dprintf("there is no space for new vdis\n");
-	return SD_RES_FULL_VDI;
+	ret = SD_RES_FULL_VDI;
+out:
+	free(entries);
+
+	return ret;
 }
-- 
1.7.2.5




More information about the sheepdog mailing list