[sheepdog] [PATCH v1, RFC] sheep: writeback cache semantics in backend store
Hitoshi Mitake
h.mitake at gmail.com
Wed Aug 29 18:17:24 CEST 2012
On Wed, Aug 29, 2012 at 12:36 PM, Liu Yuan <namei.unix at gmail.com> wrote:
> On 08/29/2012 07:21 AM, MORITA Kazutaka wrote:
>> At Wed, 29 Aug 2012 01:56:10 +0900,
>> Hitoshi Mitake wrote:
>>>
>>> v1: differences from v0 are,
>>> * check syncfs() in configure script
>>> * send SD_OP_FLUSH_PEER to all sheeps
>>
>> Please add a scissors line so that we can remove the change log from
>> the commit log easily.
>>
>>>
>>> +static int flush_all_node(struct request *req)
>>> +{
>>> + int i, ret, err_ret, epfd, waiting, cnt;
>>> + struct sd_node *s;
>>> + struct node_id *node_sent[SD_MAX_NODES];
>>> + struct sockfd *sfd, *sfd_sent[SD_MAX_NODES];
>>> + struct sd_req hdr;
>>> + struct vnode_info *vinfo = req->vinfo;
>>> + struct epoll_event ev;
>>> +
>>> + err_ret = SD_RES_SUCCESS;
>>> +
>>> + epfd = epoll_create(SD_MAX_NODES);
>>> + if (epfd == -1) {
>>> + eprintf("failed to create epoll file descriptor");
>>> + return SD_RES_EIO;
>>> + }
>>> +
>>> + sd_init_req(&hdr, SD_OP_FLUSH_PEER);
>>> +
>>> + bzero(&ev, sizeof(struct epoll_event));
>>> + ev.events = EPOLLIN;
>>> +
>>> + for (waiting = 0, i = 0; i < vinfo->nr_nodes; i++) {
>>> + unsigned int wlen = 0;
>>> +
>>> + s = &vinfo->nodes[i];
>>> +
>>> + if (node_is_local(s)) {
>>> + _peer_flush();
>>> + continue;
>>> + }
>>> +
>>> + sfd = sheep_get_sockfd(&s->nid);
>>> + if (!sfd) {
>>> + err_ret = SD_RES_NETWORK_ERROR;
>>> + goto put_sockfd;
>>> + }
>>> +
>>> + node_sent[waiting] = &s->nid;
>>> + sfd_sent[waiting] = sfd;
>>> +
>>> + ret = send_req(sfd->fd, &hdr, NULL, &wlen);
>>> + if (ret) {
>>> + eprintf("failed at send_req()");
>>> + sheep_del_sockfd(&s->nid, sfd);
>>> + err_ret = SD_RES_NETWORK_ERROR;
>>> + goto put_sockfd;
>>> + }
>>> +
>>> + ev.data.fd = sfd->fd;
>>> + if (epoll_ctl(epfd, EPOLL_CTL_ADD, sfd->fd, &ev) == -1) {
>>> + eprintf("failed at epoll_ctl(), errno: %s", strerror(errno));
>>> + err_ret = SD_RES_EIO;
>>> + goto put_sockfd;
>>> + }
>>> +
>>> + waiting++;
>>> + }
>>> +
>>> + cnt = waiting;
>>> + while (cnt) {
>>> + struct epoll_event ev_nodes[SD_MAX_NODES];
>>> +
>>> + bzero(ev_nodes, sizeof(struct epoll_event) * cnt);
>>> +
>>> + ret = epoll_wait(epfd, ev_nodes, cnt, -1);
>>> + if (ret == -1) {
>>> + eprintf("failed at epoll_wait(), errno: %s", strerror(errno));
>>> + err_ret = SD_RES_EIO;
>>> + break;
>>> + }
>>> +
>>> + cnt -= ret;
>>> +
>>> + for (i = 0; i < ret; i++) {
>>> + struct sd_rsp rsp;
>>> +
>>> + if (do_read(ev_nodes[i].data.fd, &rsp, sizeof(struct sd_rsp))) {
>>> + eprintf("failed to receive response from node");
>>> + err_ret = SD_RES_NETWORK_ERROR;
>>> + goto put_sockfd;
>>> + }
>>> + }
>>> + }
>>> +
>>> +put_sockfd:
>>> + for (i = 0; i < waiting; i++)
>>> + sheep_put_sockfd(node_sent[i], sfd_sent[i]);
>>> +
>>> + close(epfd);
>>> +
>>> + return err_ret;
>>> +}
>>> +
>>
>> This code looks like reimplementation of gateway_forward_request. How
>> about making SD_OP_FLUSH_VDI a gateway operation, and modifying
>> gateway_forward_request so that it can forward request to all nodes?
>>
>
> I think SD_OP_FLUSH_VDI should be local as is. We'd better add another
> SD_OP_SYNC_VDI for sync vdi operation (for now, we broadcast it to all
> nodes), then we can exec_local_req(SD_OP_SYNC_VDI) to queue request on
> the gateway to overcome node change events.
>
I noticed the problem that current implementation doesn't retry when
SD_OP_FLUSH_PEER fails. I'll add the new operation as a gateway request.
Thanks for your advice.
--
Hitoshi Mitake
h.mitake at gmail.com
More information about the sheepdog
mailing list