[sheepdog] [PATCH V3 2/3] sheep: remove unregister_event from process_event_queue()

Yunkai Zhang yunkai.me at gmail.com
Thu May 17 04:25:31 CEST 2012


On Thu, May 17, 2012 at 2:36 AM, Shevek <shevek at anarres.org> wrote:
> On Wed, 2012-05-16 at 17:04 +0800, Yunkai Zhang wrote:
>> From: Yunkai Zhang <qiushu.zyk at taobao.com>
>>
>> This is V3, I have make some simple testing with zookeeper dirver.
>> ---------------------------------------------------------------- >8
>
> This patch seems to contain a lot of changes not related to the

Can you point which changes not related to the description?

> description. Some of those changes, I suspect, back out earlier work.
> Did you correctly rebase onto master?
>
> S.
>
>> In old code, we call unregister_event(cdrv_fd, ...) in process_event_queue()
>> when sheep receives cluster EVENT, we will register cdrv_fd into epoll again
>> in event_done() after __sd_xxx() finished.
>>
>> This is dangerous! In our testing, for some reason, __sd_xxx() may be blocked
>> by network issue, as a result event_done() would not be executed, and cdrv_fd
>> would keep outstanding from epoll, then all new coming EVENT could not be
>> process immediately. This will make sheep hard to complete recovery.
>>
>> Now, we call update_cluster_info() in sd_xxx_handler() directly so that we can
>> process new EVENT one by one immediately, and needn't to wait previous EVENT's
>> __sd_xxx() finished.  So we can remove unregister_event() from
>> process_event_queue() safely.
>>
>> Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
>> ---
>>  sheep/group.c |   39 ++++++++++++++-------------------------
>>  1 files changed, 14 insertions(+), 25 deletions(-)
>>
>> diff --git a/sheep/group.c b/sheep/group.c
>> index a5311fc..e37e049 100644
>> --- a/sheep/group.c
>> +++ b/sheep/group.c
>> @@ -830,11 +830,6 @@ static void __sd_join_done(struct event_struct *cevent)
>>
>>       print_node_list(sys->nodes, sys->nr_nodes);
>>
>> -     if (!sys_stat_join_failed()) {
>> -             update_cluster_info(jm, &w->joined, w->member_list,
>> -                                 w->member_list_entries);
>> -     }
>> -
>>       if (sys_can_recover() && jm->inc_epoch) {
>>               list_for_each_entry_safe(node, t, &sys->leave_list, list) {
>>                       list_del(&node->list);
>> @@ -854,19 +849,6 @@ static void __sd_join_done(struct event_struct *cevent)
>>
>>  static void __sd_leave_done(struct event_struct *cevent)
>>  {
>> -     struct work_leave *w = container_of(cevent, struct work_leave, cev);
>> -
>> -     sys->nr_nodes = w->member_list_entries;
>> -     memcpy(sys->nodes, w->member_list, sizeof(*sys->nodes) * sys->nr_nodes);
>> -     qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp);
>> -
>> -     if (sys_can_recover()) {
>> -             sys->epoch++;
>> -             update_epoch_store(sys->epoch);
>> -             update_epoch_log(sys->epoch);
>> -     }
>> -     update_vnode_info();
>> -
>>       print_node_list(sys->nodes, sys->nr_nodes);
>>
>>       if (sys_can_recover())
>> @@ -934,7 +916,6 @@ static void event_fn(struct work *work)
>>  static void event_done(struct work *work)
>>  {
>>       struct event_struct *cevent;
>> -     int ret;
>>
>>       if (!sys->cur_cevent)
>>               vprintf(SDOG_ERR, "bug\n");
>> @@ -961,9 +942,6 @@ static void event_done(struct work *work)
>>       vprintf(SDOG_DEBUG, "free %p\n", cevent);
>>       event_free(cevent);
>>       event_running = 0;
>> -     ret = register_event(cdrv_fd, group_handler, NULL);
>> -     if (ret)
>> -             panic("failed to register event fd");
>>
>>       process_request_event_queues();
>>  }
>> @@ -1067,7 +1045,6 @@ static inline void process_event_queue(void)
>>       event_work.fn = event_fn;
>>       event_work.done = event_done;
>>
>> -     unregister_event(cdrv_fd);
>>       queue_work(sys->event_wqueue, &event_work);
>>  }
>>
>> @@ -1089,7 +1066,7 @@ void sd_join_handler(struct sd_node *joined, struct sd_node *members,
>>       int i, size;
>>       int nr, nr_local, nr_leave;
>>       struct node *n;
>> -     struct join_message *jm;
>> +     struct join_message *jm = opaque;
>>       uint32_t le = get_latest_epoch();
>>
>>       if (node_eq(joined, &sys->this_node)) {
>> @@ -1113,6 +1090,8 @@ void sd_join_handler(struct sd_node *joined, struct sd_node *members,
>>               if (sys_stat_shutdown())
>>                       break;
>>
>> +             update_cluster_info(jm, joined, members, nr_members);
>> +
>>               w = zalloc(sizeof(*w));
>>               if (!w)
>>                       panic("failed to allocate memory");
>> @@ -1171,7 +1150,6 @@ void sd_join_handler(struct sd_node *joined, struct sd_node *members,
>>               }
>>               break;
>>       case CJ_RES_MASTER_TRANSFER:
>> -             jm = (struct join_message *)opaque;
>>               nr = jm->nr_leave_nodes;
>>               for (i = 0; i < nr; i++) {
>>                       if (find_entry_list(&jm->leave_nodes[i], &sys->leave_list)
>> @@ -1232,6 +1210,17 @@ void sd_leave_handler(struct sd_node *left, struct sd_node *members,
>>       if (sys_stat_shutdown())
>>               return;
>>
>> +     sys->nr_nodes = nr_members;
>> +     memcpy(sys->nodes, members, sizeof(*sys->nodes) * sys->nr_nodes);
>> +     qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp);
>> +
>> +     if (sys_can_recover()) {
>> +             sys->epoch++;
>> +             update_epoch_store(sys->epoch);
>> +             update_epoch_log(sys->epoch);
>> +     }
>> +     update_vnode_info();
>> +
>>       w = zalloc(sizeof(*w));
>>       if (!w)
>>               goto oom;
>> --
>> 1.7.7.6
>>
>
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog



-- 
Yunkai Zhang
Work at Taobao



More information about the sheepdog mailing list