[stgt] [PATCH] new timer-based work scheduling

Alexander Nezhinsky alexandern at Voltaire.COM
Mon Jan 17 20:05:11 CET 2011


Re-implementing the time-based work scheduler. This patch implements
a timer-based scheme.

A global timer is registered and fired periodically, few times a second.
It is implemented using timerfd, if supported by the kernel. Otherwise
a signal-based timer is used. In the former case timerfd becomes readable
and the number of times the timer had expired before it was signaled can be
read from it. In the latter case the signal handler writes a word
to a dedicated pipe, whose read-end fd is registered with the event loop.
To be consistent with timerfd scheme this word contains 1.

Work items are scheduled from various application contexts, and put on
a queue. The current time is calculated on each timer event based on the
requested timer interval. The work item is stamped with the expiration time,
obtained as the current time plus the timeout period, in msec.
 
The event handler reads from the timer related fd and examines the inactive
work queue. All items that have expired are moved to the active list and 
processed one after another.

The new scheme handles the timer event through the standard event loop's
file descritor registration and polling. This removes the previosly used
call to schedule() function which had to update jiffies.
 
There is still no guarantee about the exact work execution time.
This scheme is suitable for tasks with seconds-scale resolution, so that
firing the timer few times per second provides a satisfactory accuracy.
The assumption is that all event handlers take small periods of time.
If not, in theory, the timer handlers may be delayed indefinitely,
but then the entire event loop processing gets stalled.

The immediate clients of this mechansim is the iSNS code and the new
iSER code to be based on it instead of the previously proposed
custom timer.

Signed-off-by: Alexander Nezhinsky <alexandern at voltaire.com>
---
 usr/tgtd.c |   11 ++-
 usr/work.c |  220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 usr/work.h |    6 +-
 3 files changed, 211 insertions(+), 26 deletions(-)

diff --git a/usr/tgtd.c b/usr/tgtd.c
index 2fd4959..13ff65c 100644
--- a/usr/tgtd.c
+++ b/usr/tgtd.c
@@ -337,7 +337,7 @@ static void event_loop(void)
 
 retry:
 	sched_remains = tgt_exec_scheduled();
-	timeout = sched_remains ? 0 : TGTD_TICK_PERIOD * 1000;
+	timeout = sched_remains ? 0 : -1;
 
 	nevent = epoll_wait(ep_fd, events, ARRAY_SIZE(events), timeout);
 	if (nevent < 0) {
@@ -350,8 +350,7 @@ retry:
 			tev = (struct event_data *) events[i].data.ptr;
 			tev->handler(tev->fd, events[i].events, tev->data);
 		}
-	} else
-		schedule();
+	}
 
 	if (system_active)
 		goto retry;
@@ -517,12 +516,18 @@ int main(int argc, char **argv)
 		}
 	}
 
+	err = work_timer_start();
+	if (err)
+		exit(1);
+
 	bs_init();
 
 	event_loop();
 
 	lld_exit();
 
+	work_timer_stop();
+
 	ipc_exit();
 
 	log_close();
diff --git a/usr/work.c b/usr/work.c
index 3080a59..a71ff0b 100644
--- a/usr/work.c
+++ b/usr/work.c
@@ -1,8 +1,9 @@
 /*
- * bogus scheduler
+ * work scheduler, loosely timer-based
  *
  * Copyright (C) 2006-2007 FUJITA Tomonori <tomof at acm.org>
  * Copyright (C) 2006-2007 Mike Christie <michaelc at cs.wisc.edu>
+ * Copyright (C) 2011 Alexander Nezhinsky <alexandern at voltaire.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -21,27 +22,210 @@
  */
 #include <stdlib.h>
 #include <stdint.h>
+#include <sys/epoll.h>
 
 #include "list.h"
 #include "util.h"
 #include "log.h"
 #include "work.h"
+#include "tgtd.h"
+
+#define WORK_TIMER_INT_MSEC	200
+
+static unsigned int current_time;
+static int timer_started;
+static int timer_fd_rd = -1;
 
-static unsigned int jiffies;
 static LIST_HEAD(active_work_list);
 static LIST_HEAD(inactive_work_list);
 
+static void execute_work(void);
+
+static void work_timer_evt_handler(int fd, int events, void *data)
+{
+	unsigned long long n; /* timer expirations */
+	int ret;
+
+	ret = read(timer_fd_rd, &n, sizeof(n));
+	if (ret < 0) {
+		if (ret == -EAGAIN)
+			return;
+		eprintf("Failed to read from pipe, %m\n");
+		return;
+	}
+	current_time += n * WORK_TIMER_INT_MSEC;
+
+	execute_work();
+}
+
+#if defined(__NR_timerfd) /* timerfd supported */
+
+#include <sys/timerfd.h>
+
+#define WORK_TIMER_INT_NSEC	(WORK_TIMER_INT_MSEC * 1000 * 1000)
+
+int work_timer_start(void)
+{
+	struct itimerspec new_t, old_t;
+	int ret;
+
+	if (timer_started)
+		return 0;
+
+	timer_fd_rd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK);
+	if (timer_fd_rd < 0) {
+		eprintf("the system doesn't support timerfd");
+		goto timer_err;
+	}
+
+	new_t.it_value.tv_sec = 0;
+	new_t.it_value.tv_nsec = 1;
+
+	new_t.it_interval.tv_sec = 0;
+	new_t.it_interval.tv_nsec = WORK_TIMER_INT_NSEC;
+
+	ret = timerfd_settime(timer_fd_rd, TFD_TIMER_ABSTIME, &new_t, &old_t);
+	if (ret < 0) {
+		eprintf("the system doesn't support timerfd");
+		close(timer_fd_rd);
+		return -1;
+	}
+
+	ret = tgt_event_add(timer_fd_rd, EPOLLIN,
+			    work_timer_evt_handler, NULL);
+	if (ret) {
+		eprintf("failed to add timer event, fd:%d\n", timer_fd_rd);
+		return -1;
+	}
+
+	dprintf("started, timeout: %d msec\n", WORK_TIMER_INT_MSEC);
+	timer_started = 1;
+	return 0;
+}
+
+int work_timer_stop(void)
+{
+	if (!timer_started)
+		return 0;
+
+	tgt_event_del(timer_fd_rd);
+	close(timer_fd_rd);
+
+	timer_started = 0;
+	return 0;
+}
+
+#else /* timerfd NOT supported */
+
+#include <signal.h>
+#include <sys/time.h>
+
+#define WORK_TIMER_INT_USEC	(WORK_TIMER_INT_MSEC * 1000)
+
+static struct itimerval work_timer = {
+       {0, WORK_TIMER_INT_USEC},
+       {0, WORK_TIMER_INT_USEC}
+};
+
+static struct sigaction old_sig;
+static int timer_fd[2];
+
+static void work_timer_sig_handler(int data)
+{
+	unsigned long long n = 1; /* signal 1 timer expiration */
+	int err;
+
+	err = write(timer_fd[1], &n, sizeof(n));
+	if (err < 0)
+		eprintf("Failed to write to pipe, %m\n");
+}
+
+int work_timer_start(void)
+{
+	struct sigaction new_sig;
+	int err;
+
+	if (timer_started)
+		return 0;
+
+	sigemptyset(&new_sig.sa_mask);
+	sigaddset(&new_sig.sa_mask, SIGALRM);
+	new_sig.sa_flags = 0;
+	new_sig.sa_handler = work_timer_sig_handler;
+	err = sigaction(SIGALRM, &new_sig, &old_sig);
+	if (err) {
+		eprintf("Failed to setup timer handler\n");
+		return -1;
+	}
+
+	err = setitimer(ITIMER_REAL, &work_timer, 0);
+	if (err) {
+		eprintf("Failed to set timer\n");
+		goto timer_err_itimer;
+	}
+
+	err = pipe(timer_fd);
+	if (err) {
+		eprintf("Failed to open timer pipe\n");
+		goto timer_err_pipe;
+	}
+
+	timer_fd_rd = timer_fd[0];
+	err = tgt_event_add(timer_fd_rd, EPOLLIN,
+			    work_timer_evt_handler, NULL);
+	if (err) {
+		eprintf("failed to add timer event, fd:%d\n", timer_fd_rd);
+		goto timer_err_event;
+	}
+
+	dprintf("started, timeout: %d msec\n", WORK_TIMER_INT_MSEC);
+	timer_started = 1;
+	return 0;
+
+timer_err_event:
+	timer_fd_rd = -1;
+	close(timer_fd[0]);
+	close(timer_fd[1]);
+timer_err_pipe:
+	setitimer(ITIMER_REAL, 0, 0);
+timer_err_itimer:
+	sigaction(SIGALRM, &old_sig, NULL);
+
+	return -1;
+}
+
+int work_timer_stop(void)
+{
+	int err;
+
+	if (!timer_started)
+		return 0;
+
+	tgt_event_del(timer_fd_rd);
+	timer_fd_rd = -1;
+	close(timer_fd[0]);
+	close(timer_fd[1]);
+
+	err = setitimer(ITIMER_REAL, 0, 0);
+	if (err)
+		eprintf("Failed to stop timer\n");
+	else
+		dprintf("Timer stopped\n");
+
+	sigaction(SIGALRM, &old_sig, NULL);
+
+	timer_started = 0;
+	return err;
+}
+
+#endif /* timerfd support */
+
 void add_work(struct tgt_work *work, unsigned int second)
 {
-	unsigned int when;
 	struct tgt_work *ent;
 
 	if (second) {
-		when = second / TGTD_TICK_PERIOD;
-		if (!when)
-			when = 1;
-
-		work->when = when + jiffies;
+		work->when = current_time + second * 1000;
 
 		list_for_each_entry(ent, &inactive_work_list, entry) {
 			if (before(work->when, ent->when))
@@ -49,8 +233,10 @@ void add_work(struct tgt_work *work, unsigned int second)
 		}
 
 		list_add_tail(&work->entry, &ent->entry);
-	} else
+	} else {
 		list_add_tail(&work->entry, &active_work_list);
+		execute_work();
+	}
 }
 
 void del_work(struct tgt_work *work)
@@ -58,20 +244,16 @@ void del_work(struct tgt_work *work)
 	list_del_init(&work->entry);
 }
 
-/*
- * this function is called only when the system is idle. So this
- * scheduler is pretty bogus. Your job would be delayed unexpectedly.
- */
-void schedule(void)
+static void execute_work()
 {
 	struct tgt_work *work, *n;
 
 	list_for_each_entry_safe(work, n, &inactive_work_list, entry) {
-		if (after(jiffies, work->when)) {
-			list_del(&work->entry);
-			list_add_tail(&work->entry, &active_work_list);
-		} else
+		if (before(current_time, work->when))
 			break;
+
+		list_del(&work->entry);
+		list_add_tail(&work->entry, &active_work_list);
 	}
 
 	while (!list_empty(&active_work_list)) {
@@ -80,6 +262,4 @@ void schedule(void)
 		list_del_init(&work->entry);
 		work->func(work->data);
 	}
-
-	jiffies++;
 }
diff --git a/usr/work.h b/usr/work.h
index 3d5e75e..7b1876a 100644
--- a/usr/work.h
+++ b/usr/work.h
@@ -1,8 +1,6 @@
 #ifndef __SCHED_H
 #define __SCHED_H
 
-#define TGTD_TICK_PERIOD 2
-
 struct tgt_work {
 	struct list_head entry;
 	void (*func)(void *);
@@ -10,7 +8,9 @@ struct tgt_work {
 	unsigned int when;
 };
 
-extern void schedule(void);
+extern int work_timer_start(void);
+extern int work_timer_stop(void);
+
 extern void add_work(struct tgt_work *work, unsigned int second);
 extern void del_work(struct tgt_work *work);
 
--
1.7.3
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the stgt mailing list