[sheepdog] [PATCH v1, RFC] sheep: writeback cache semantics in backend store

Wed Aug 29 18:17:24 CEST 2012

On Wed, Aug 29, 2012 at 12:36 PM, Liu Yuan <namei.unix at gmail.com> wrote:
> On 08/29/2012 07:21 AM, MORITA Kazutaka wrote:
>> At Wed, 29 Aug 2012 01:56:10 +0900,
>> Hitoshi Mitake wrote:
>>>
>>> v1: differences from v0 are,
>>> * check syncfs() in configure script
>>> * send SD_OP_FLUSH_PEER to all sheeps
>>
>> Please add a scissors line so that we can remove the change log from
>> the commit log easily.
>>
>>>
>>> +static int flush_all_node(struct request *req)
>>> +{
>>> +    int i, ret, err_ret, epfd, waiting, cnt;
>>> +    struct sd_node *s;
>>> +    struct node_id *node_sent[SD_MAX_NODES];
>>> +    struct sockfd *sfd, *sfd_sent[SD_MAX_NODES];
>>> +    struct sd_req hdr;
>>> +    struct vnode_info *vinfo = req->vinfo;
>>> +    struct epoll_event ev;
>>> +
>>> +    err_ret = SD_RES_SUCCESS;
>>> +
>>> +    epfd = epoll_create(SD_MAX_NODES);
>>> +    if (epfd == -1) {
>>> +            eprintf("failed to create epoll file descriptor");
>>> +            return SD_RES_EIO;
>>> +    }
>>> +
>>> +    sd_init_req(&hdr, SD_OP_FLUSH_PEER);
>>> +
>>> +    bzero(&ev, sizeof(struct epoll_event));
>>> +    ev.events = EPOLLIN;
>>> +
>>> +    for (waiting = 0, i = 0; i < vinfo->nr_nodes; i++) {
>>> +            unsigned int wlen = 0;
>>> +
>>> +            s = &vinfo->nodes[i];
>>> +
>>> +            if (node_is_local(s)) {
>>> +                    _peer_flush();
>>> +                    continue;
>>> +            }
>>> +
>>> +            sfd = sheep_get_sockfd(&s->nid);
>>> +            if (!sfd) {
>>> +                    err_ret = SD_RES_NETWORK_ERROR;
>>> +                    goto put_sockfd;
>>> +            }
>>> +
>>> +            node_sent[waiting] = &s->nid;
>>> +            sfd_sent[waiting] = sfd;
>>> +
>>> +            ret = send_req(sfd->fd, &hdr, NULL, &wlen);
>>> +            if (ret) {
>>> +                    eprintf("failed at send_req()");
>>> +                    sheep_del_sockfd(&s->nid, sfd);
>>> +                    err_ret = SD_RES_NETWORK_ERROR;
>>> +                    goto put_sockfd;
>>> +            }
>>> +
>>> +            ev.data.fd = sfd->fd;
>>> +            if (epoll_ctl(epfd, EPOLL_CTL_ADD, sfd->fd, &ev) == -1) {
>>> +                    eprintf("failed at epoll_ctl(), errno: %s", strerror(errno));
>>> +                    err_ret = SD_RES_EIO;
>>> +                    goto put_sockfd;
>>> +            }
>>> +
>>> +            waiting++;
>>> +    }
>>> +
>>> +    cnt = waiting;
>>> +    while (cnt) {
>>> +            struct epoll_event ev_nodes[SD_MAX_NODES];
>>> +
>>> +            bzero(ev_nodes, sizeof(struct epoll_event) * cnt);
>>> +
>>> +            ret = epoll_wait(epfd, ev_nodes, cnt, -1);
>>> +            if (ret == -1) {
>>> +                    eprintf("failed at epoll_wait(), errno: %s", strerror(errno));
>>> +                    err_ret = SD_RES_EIO;
>>> +                    break;
>>> +            }
>>> +
>>> +            cnt -= ret;
>>> +
>>> +            for (i = 0; i < ret; i++) {
>>> +                    struct sd_rsp rsp;
>>> +
>>> +                    if (do_read(ev_nodes[i].data.fd, &rsp, sizeof(struct sd_rsp))) {
>>> +                            eprintf("failed to receive response from node");
>>> +                            err_ret = SD_RES_NETWORK_ERROR;
>>> +                            goto put_sockfd;
>>> +                    }
>>> +            }
>>> +    }
>>> +
>>> +put_sockfd:
>>> +    for (i = 0; i < waiting; i++)
>>> +            sheep_put_sockfd(node_sent[i], sfd_sent[i]);
>>> +
>>> +    close(epfd);
>>> +
>>> +    return err_ret;
>>> +}
>>> +
>>
>> This code looks like reimplementation of gateway_forward_request.  How
>> about making SD_OP_FLUSH_VDI a gateway operation, and modifying
>> gateway_forward_request so that it can forward request to all nodes?
>>
>
> I think SD_OP_FLUSH_VDI should be local as is. We'd better add another
> SD_OP_SYNC_VDI for sync vdi operation (for now, we broadcast it to all
> nodes), then we can exec_local_req(SD_OP_SYNC_VDI) to queue request on
> the gateway to overcome node change events.
>

I noticed the problem that current implementation doesn't retry when
SD_OP_FLUSH_PEER fails. I'll add the new operation as a gateway request.

Thanks for your advice.

-- 
Hitoshi Mitake
h.mitake at gmail.com