[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-aio
Subject:    RE: Optimize aio reap/completion path
From:       "Chen, Kenneth W" <kenneth.w.chen () intel ! com>
Date:       2004-05-05 21:47:58
Message-ID: 200405052147.i45LlwF27733 () unix-os ! sc ! intel ! com
[Download RAW message or body]

>>>> Chen, Kenneth W wrote on Wed, April 28, 2004 6:14 PM
> I think we can optimize it away with another reap waiter list
> in the struct kioctx and setup it up in the slow path of
> read_event().  In aio_complete(), we can check against min_nr
> and only wake up when min_nr is reached.  This will cut down
> ctx rate to minimum.  Does this sounds reasonable?

OK, my first rev for this optimization.  Using a mocked up micro
benchmark, I'm seeing sizeable reduction in ctx rate.  The mock
up code does: submit 800 I/O initially, then endless loop with
reap 400, submit 400. (on 4P system, 200 disks, random 2KB direct-io)

Before:
procs              memory      swap         io      system        cpu
 r  b     free   buff  cache   si   so    bi  bo   in    cs  us sy id wa
 0  0 31665600  25040  50576    0    0 30816   0 19669 23082  0  8 92  0
 0  0 31665536  25040  50576    0    0 31224   0 19646 22978  0  8 92  0
 0  0 31665536  25040  50576    0    0 30814   0 19662 23130  0  8 92  0

After:
 0  0 32875968  13616  35616    0    0 30886   0 19596   161  0  6 94  0
 0  0 32875968  13616  35616    0    0 30076   0 19135   161  0  6 94  0
 0  0 32875968  13616  35616    0    0 30880   0 19591   161  0  6 94  0

Tested with industry standard db workload, measured 18% reduction
in ctx rate.


--- 1.1/fs/aio.c	Tue Feb 17 19:57:48 2004
+++ edited/fs/aio.c	Mon May  3 00:02:23 2004
@@ -230,6 +230,7 @@

 	INIT_LIST_HEAD(&ctx->active_reqs);
 	INIT_LIST_HEAD(&ctx->run_list);
+	INIT_LIST_HEAD(&ctx->reap_list);
 	INIT_WORK(&ctx->wq, aio_kick_handler, ctx);

 	if (aio_setup_ring(ctx) < 0)
@@ -697,11 +698,16 @@
 	/* everything turned out well, dispose of the aiocb. */
 	ret = __aio_put_req(ctx, iocb);

+	if (!list_empty(&ctx->reap_list)) {
+		struct aio_reap_queue *queue;
+		queue = container_of(ctx->reap_list.next,
+				     struct aio_reap_queue, list);
+		list_move(&queue->list, &ctx->reap_list);
+		if (--queue->wait_for <= 0)
+			wake_up_process(queue->task);
+	}
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);

-	if (waitqueue_active(&ctx->wait))
-		wake_up(&ctx->wait);
-
 	if (ret)
 		put_ioctx(ctx);

@@ -803,11 +809,13 @@
 {
 	long			start_jiffies = jiffies;
 	struct task_struct	*tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
 	int			ret;
 	int			i = 0;
 	struct io_event		ent;
 	struct timeout		to;
+	struct aio_ring_info	*info;
+	struct aio_ring		*ring;
+	struct aio_reap_queue	reap_queue;

 	/* needed to zero any padding within an entry (there shouldn't be
 	 * any, but C is fun!
@@ -815,6 +823,7 @@
 	memset(&ent, 0, sizeof(ent));
 	ret = 0;

+repeat:
 	while (likely(i < nr)) {
 		ret = aio_read_evt(ctx, &ent);
 		if (unlikely(ret <= 0))
@@ -853,6 +862,34 @@
 		set_timeout(start_jiffies, &to, &ts);
 	}

+	info = &ctx->ring_info;
+	ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+
+	spin_lock_irq(&ctx->ctx_lock);
+	reap_queue.wait_for = min_nr - i - aio_ring_used(info, ring);
+
+	if (reap_queue.wait_for > 0) {
+		set_task_state(tsk, TASK_INTERRUPTIBLE);
+		reap_queue.task = current;
+		list_add_tail(&reap_queue.list, &ctx->reap_list);
+		spin_unlock_irq(&ctx->ctx_lock);
+
+		schedule();
+		set_task_state(tsk, TASK_RUNNING);
+
+		spin_lock_irq(&ctx->ctx_lock);
+		list_del(&reap_queue.list);
+	}
+	spin_unlock_irq(&ctx->ctx_lock);
+
+	if (timeout)
+		clear_timeout(&to);
+	if (to.timed_out)
+		min_nr = 0;
+
+	goto repeat;
+
+#if 0
 	while (likely(i < nr)) {
 		add_wait_queue_exclusive(&ctx->wait, &wait);
 		do {
@@ -893,6 +930,8 @@

 	if (timeout)
 		clear_timeout(&to);
+#endif
+
 out:
 	return i ? i : ret;
 }
--- 1.1/include/linux/aio.h	Tue Feb 17 19:57:12 2004
+++ edited/include/linux/aio.h	Sun May  2 23:46:35 2004
@@ -98,6 +98,7 @@
 }; /* 128 bytes + ring size */

 #define aio_ring_avail(info, ring)	(((ring)->head + (info)->nr - 1 - (ring)->tail) % (info)->nr)
+#define aio_ring_used(info, ring)	(((ring)->tail + (info)->nr - (ring)->head) % (info)->nr)

 #define AIO_RING_PAGES	8
 struct aio_ring_info {
@@ -129,12 +130,19 @@
 	int			reqs_active;
 	struct list_head	active_reqs;	/* used for cancellation */
 	struct list_head	run_list;	/* used for kicked reqs */
+	struct list_head	reap_list;	/* used for group reap */

 	unsigned		max_reqs;

 	struct aio_ring_info	ring_info;

 	struct work_struct	wq;
+};
+
+struct aio_reap_queue {
+	struct list_head	list;
+	struct task_struct	*task;
+	int			wait_for;
 };

 /* prototypes */



--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic