[stgt] [PATCH] new timer-based work scheduling
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Mon Jan 17 11:54:49 CET 2011
On Mon, 17 Jan 2011 11:04:17 +0900
FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp> wrote:
> On Sun, 16 Jan 2011 18:24:59 +0200
> Alexander Nezhinsky <alexandern at Voltaire.COM> wrote:
>
> > On 01/15/2011 01:14 PM, FUJITA Tomonori wrote:
> > >> Re-implementing the time-based work scheduler. This patch implements
> > >> a timer-based scheme.
> >
> > > You need this patch for more precise timer, right? In another way, the
> > > problem that you try to fix is that the current timer is too
> > > unexpected.
> >
> > Yes, the jiffies mechanism relied on the timeout values passed to
> > epoll_wait(), but the events could pop up before the timed has expired.
> > So under load, the intervals between jiffies counts are practically zero.
> > On the other hand, event handlers can take indefinite times, but this
> > would be undetected by the jiffies count.
> >
> > In the new scheme, the timer never "underflows" - at least the requested
> > interval is alwys guaranteed. Other event handlers can still delay
> > the actual work scheduling, but after they return all overdue work units
> > are submitted for execution immediately.
> >
> > The underlying timer fires every 250ms, setting the expected accuracy of
> > scheduling. I guess that all event handlers practically complete well
> > within this interval, so we can fairly count on delays bounded by,
> > let's say, ~300ms.
>
> Even if the timer fires every 250ms, there is no guarantee that the
> file descriptor is handled immediately, right?
>
> Adding the better timer is fine by me. Can we use timerfd instead of
> using another thread? timerfd is exactly what we want here.
>
> We still need to care about too old kernels that don't support
> timerfd?
Here's an example to use timerfd.
We need to keep the old jiffies code for old kernels and non Linux but
timerfd-capable kernels can enjoy timerfd.
diff --git a/usr/tgtd.c b/usr/tgtd.c
index 2fd4959..13ff65c 100644
--- a/usr/tgtd.c
+++ b/usr/tgtd.c
@@ -337,7 +337,7 @@ static void event_loop(void)
retry:
sched_remains = tgt_exec_scheduled();
- timeout = sched_remains ? 0 : TGTD_TICK_PERIOD * 1000;
+ timeout = sched_remains ? 0 : -1;
nevent = epoll_wait(ep_fd, events, ARRAY_SIZE(events), timeout);
if (nevent < 0) {
@@ -350,8 +350,7 @@ retry:
tev = (struct event_data *) events[i].data.ptr;
tev->handler(tev->fd, events[i].events, tev->data);
}
- } else
- schedule();
+ }
if (system_active)
goto retry;
@@ -517,12 +516,18 @@ int main(int argc, char **argv)
}
}
+ err = work_timer_start();
+ if (err)
+ exit(1);
+
bs_init();
event_loop();
lld_exit();
+ work_timer_stop();
+
ipc_exit();
log_close();
diff --git a/usr/work.c b/usr/work.c
index 3080a59..2373e78 100644
--- a/usr/work.c
+++ b/usr/work.c
@@ -1,8 +1,9 @@
/*
- * bogus scheduler
+ * work scheduler, loosely timer-based
*
* Copyright (C) 2006-2007 FUJITA Tomonori <tomof at acm.org>
* Copyright (C) 2006-2007 Mike Christie <michaelc at cs.wisc.edu>
+ * Copyright (C) 2011 Alexander Nezhinsky <alexandern at voltaire.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -21,27 +22,107 @@
*/
#include <stdlib.h>
#include <stdint.h>
+#include <sys/epoll.h>
+#include <sys/timerfd.h>
#include "list.h"
#include "util.h"
#include "log.h"
#include "work.h"
+#include "tgtd.h"
+
+#define WORK_TIMER_INT_MSEC 250
+#define WORK_TIMER_INT_NSEC (WORK_TIMER_INT_MSEC * 1000 * 1000)
+
+static int timer_fd = -1;
+static unsigned int current_time;
-static unsigned int jiffies;
static LIST_HEAD(active_work_list);
static LIST_HEAD(inactive_work_list);
+static void execute_work(void);
+
+static void work_timer_evt_handler(int fd, int events, void *data)
+{
+ unsigned long long s;
+ static int first;
+ int ret;
+
+ if (!first) {
+ first++;
+ return;
+ }
+
+ ret = read(timer_fd, &s, sizeof(s));
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ return;
+ eprintf("Failed to read from pipe, %m\n");
+ return;
+ }
+
+ current_time += s * WORK_TIMER_INT_MSEC;
+
+ execute_work();
+}
+
+int work_timer_start(void)
+{
+ struct itimerspec new, old;
+ int ret;
+
+ if (timer_fd != -1)
+ return 0;
+
+ timer_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK);
+ if (timer_fd < 0) {
+ eprintf("the system doesn't support timerfd");
+ goto timer_err;
+ }
+
+ new.it_value.tv_sec = 0;
+ new.it_value.tv_nsec = 1;
+
+ new.it_interval.tv_sec = 0;
+ new.it_interval.tv_nsec = WORK_TIMER_INT_NSEC;
+
+ ret = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &new, &old);
+ if (ret < 0) {
+ eprintf("the system doesn't support timerfd");
+ close(timer_fd);
+ goto timer_err;
+ }
+
+ ret = tgt_event_add(timer_fd, EPOLLIN,
+ work_timer_evt_handler, NULL);
+ if (ret) {
+ eprintf("failed to add timer event, fd:%d\n", timer_fd);
+ goto timer_err;
+ }
+
+ return 0;
+timer_err:
+ work_timer_stop();
+ return -1;
+}
+
+int work_timer_stop(void)
+{
+ if (timer_fd == -1)
+ return 0;
+
+ tgt_event_del(timer_fd);
+ close(timer_fd);
+
+ return 0;
+}
+
void add_work(struct tgt_work *work, unsigned int second)
{
- unsigned int when;
struct tgt_work *ent;
if (second) {
- when = second / TGTD_TICK_PERIOD;
- if (!when)
- when = 1;
-
- work->when = when + jiffies;
+ work->when = current_time + second * 1000;
list_for_each_entry(ent, &inactive_work_list, entry) {
if (before(work->when, ent->when))
@@ -49,8 +130,10 @@ void add_work(struct tgt_work *work, unsigned int second)
}
list_add_tail(&work->entry, &ent->entry);
- } else
+ } else {
list_add_tail(&work->entry, &active_work_list);
+ execute_work();
+ }
}
void del_work(struct tgt_work *work)
@@ -58,20 +141,16 @@ void del_work(struct tgt_work *work)
list_del_init(&work->entry);
}
-/*
- * this function is called only when the system is idle. So this
- * scheduler is pretty bogus. Your job would be delayed unexpectedly.
- */
-void schedule(void)
+static void execute_work()
{
struct tgt_work *work, *n;
list_for_each_entry_safe(work, n, &inactive_work_list, entry) {
- if (after(jiffies, work->when)) {
- list_del(&work->entry);
- list_add_tail(&work->entry, &active_work_list);
- } else
+ if (before(current_time, work->when))
break;
+
+ list_del(&work->entry);
+ list_add_tail(&work->entry, &active_work_list);
}
while (!list_empty(&active_work_list)) {
@@ -80,6 +159,4 @@ void schedule(void)
list_del_init(&work->entry);
work->func(work->data);
}
-
- jiffies++;
}
diff --git a/usr/work.h b/usr/work.h
index 3d5e75e..7b1876a 100644
--- a/usr/work.h
+++ b/usr/work.h
@@ -1,8 +1,6 @@
#ifndef __SCHED_H
#define __SCHED_H
-#define TGTD_TICK_PERIOD 2
-
struct tgt_work {
struct list_head entry;
void (*func)(void *);
@@ -10,7 +8,9 @@ struct tgt_work {
unsigned int when;
};
-extern void schedule(void);
+extern int work_timer_start(void);
+extern int work_timer_stop(void);
+
extern void add_work(struct tgt_work *work, unsigned int second);
extern void del_work(struct tgt_work *work);
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
More information about the stgt
mailing list