[sheepdog] [PATCH v1, RFC] sheep: writeback cache semantics in backend store

Wed Aug 29 01:21:36 CEST 2012

At Wed, 29 Aug 2012 01:56:10 +0900,
Hitoshi Mitake wrote:
> 
> v1: differences from v0 are,
> * check syncfs() in configure script
> * send SD_OP_FLUSH_PEER to all sheeps

Please add a scissors line so that we can remove the change log from
the commit log easily.

>  
> +static int flush_all_node(struct request *req)
> +{
> +	int i, ret, err_ret, epfd, waiting, cnt;
> +	struct sd_node *s;
> +	struct node_id *node_sent[SD_MAX_NODES];
> +	struct sockfd *sfd, *sfd_sent[SD_MAX_NODES];
> +	struct sd_req hdr;
> +	struct vnode_info *vinfo = req->vinfo;
> +	struct epoll_event ev;
> +
> +	err_ret = SD_RES_SUCCESS;
> +
> +	epfd = epoll_create(SD_MAX_NODES);
> +	if (epfd == -1) {
> +		eprintf("failed to create epoll file descriptor");
> +		return SD_RES_EIO;
> +	}
> +
> +	sd_init_req(&hdr, SD_OP_FLUSH_PEER);
> +
> +	bzero(&ev, sizeof(struct epoll_event));
> +	ev.events = EPOLLIN;
> +
> +	for (waiting = 0, i = 0; i < vinfo->nr_nodes; i++) {
> +		unsigned int wlen = 0;
> +
> +		s = &vinfo->nodes[i];
> +
> +		if (node_is_local(s)) {
> +			_peer_flush();
> +			continue;
> +		}
> +
> +		sfd = sheep_get_sockfd(&s->nid);
> +		if (!sfd) {
> +			err_ret = SD_RES_NETWORK_ERROR;
> +			goto put_sockfd;
> +		}
> +
> +		node_sent[waiting] = &s->nid;
> +		sfd_sent[waiting] = sfd;
> +
> +		ret = send_req(sfd->fd, &hdr, NULL, &wlen);
> +		if (ret) {
> +			eprintf("failed at send_req()");
> +			sheep_del_sockfd(&s->nid, sfd);
> +			err_ret = SD_RES_NETWORK_ERROR;
> +			goto put_sockfd;
> +		}
> +
> +		ev.data.fd = sfd->fd;
> +		if (epoll_ctl(epfd, EPOLL_CTL_ADD, sfd->fd, &ev) == -1) {
> +			eprintf("failed at epoll_ctl(), errno: %s", strerror(errno));
> +			err_ret = SD_RES_EIO;
> +			goto put_sockfd;
> +		}
> +
> +		waiting++;
> +	}
> +
> +	cnt = waiting;
> +	while (cnt) {
> +		struct epoll_event ev_nodes[SD_MAX_NODES];
> +
> +		bzero(ev_nodes, sizeof(struct epoll_event) * cnt);
> +
> +		ret = epoll_wait(epfd, ev_nodes, cnt, -1);
> +		if (ret == -1) {
> +			eprintf("failed at epoll_wait(), errno: %s", strerror(errno));
> +			err_ret = SD_RES_EIO;
> +			break;
> +		}
> +
> +		cnt -= ret;
> +
> +		for (i = 0; i < ret; i++) {
> +			struct sd_rsp rsp;
> +
> +			if (do_read(ev_nodes[i].data.fd, &rsp, sizeof(struct sd_rsp))) {
> +				eprintf("failed to receive response from node");
> +				err_ret = SD_RES_NETWORK_ERROR;
> +				goto put_sockfd;
> +			}
> +		}
> +	}
> +
> +put_sockfd:
> +	for (i = 0; i < waiting; i++)
> +		sheep_put_sockfd(node_sent[i], sfd_sent[i]);
> +
> +	close(epfd);
> +
> +	return err_ret;
> +}
> +

This code looks like reimplementation of gateway_forward_request.  How
about making SD_OP_FLUSH_VDI a gateway operation, and modifying
gateway_forward_request so that it can forward request to all nodes?

> @@ -904,6 +1017,31 @@ out:
>  	return ret;
>  }
>  
> +int _peer_flush(void)
> +{
> +	int fd;
> +
> +	fd = open(obj_path, O_RDONLY);
> +	if (fd < 0) {
> +		eprintf("error at open() %s, %s\n", obj_path, strerror(errno));
> +		return SD_RES_NO_OBJ;
> +	}
> +
> +	if (syncfs(fd)) {
> +		eprintf("error at syncfs(), %s\n", strerror(errno));
> +		return SD_RES_EIO;
> +	}

Add a new interface sync to a storage driver and call it here because
how to flush data is up to the underlying strage driver.

> diff --git a/sheep/sheep.c b/sheep/sheep.c
> index 10c0501..77e8d7c 100644
> --- a/sheep/sheep.c
> +++ b/sheep/sheep.c
> @@ -52,10 +52,11 @@ static struct option const long_options[] = {
>  	{"enable-cache", required_argument, NULL, 'w'},
>  	{"zone", required_argument, NULL, 'z'},
>  	{"pidfile", required_argument, NULL, 'P'},
> +	{"writeback", no_argument, NULL, 'W'},
>  	{NULL, 0, NULL, 0},
>  };

Adding a 'writeback' option is a bit confusing.  Should we extend a
'-w' option so that we can specify types of caches we want to enable?

Thanks,

Kazutaka