[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-kernel
Subject:    Re: [PATCH] Re: Negative scalability by removal of lock_kernel()?(Was:
From:       Andrew Morton <andrewm () uow ! edu ! au>
Date:       2000-10-31 15:36:33
[Download RAW message or body]


Kouichi,

how many Apache processes are you running? MaxSpareServers?

This patch is a moderate rewrite of __wake_up_common.  I'd be
interested in seeing how much difference it makes to the
performance of Apache when the file-locking serialisation is
disabled.

- It implements last-in/first-out semantics for waking
  TASK_EXCLUSIVE tasks.

- It fixes what was surely a bug wherein __wake_up_common
  scans the entire wait queue even when it has found the
  task which it wants to run.

On a dual-CPU box it dramatically increases the max connection
rate when there are a large number of waiters:

#waiters        conn/sec (t10-p5+patch)     conn/sec (t10-p5)
    30              5525                      4990
   100              5700                      4100
  1000              5600                      1500

This will be due entirely to the queue scanning fix - my
test app has a negligible cache footprint.

It's stable, but it's a work-in-progress.

- __wake_up_common does a fair amount of SMP-specific
  stuff when compiled for UP which needs sorting out

- it seems there's somebody in the networking code who
  changes a task state incorrectly when it's on a wait
  queue. This used to be OK, but it's not OK now that
  I'm relying upon the wait queue being in the state
  which it should be.

Thanks.



--- linux-2.4.0-test10-pre5/kernel/sched.c	Sun Oct 15 01:27:46 2000
+++ linux-akpm/kernel/sched.c	Wed Nov  1 01:54:44 2000
@@ -697,6 +697,53 @@
 	return;
 }
 
+#if WAITQUEUE_DEBUG
+/*
+ * Check that the wait queue is in the correct order:
+ *  !TASK_EXCLUSIVE at the head
+ *  TASK_EXCLUSIVE at the tail
+ *  The list is locked.
+ */
+
+static void check_wq_sanity(wait_queue_head_t *q)
+{
+	struct list_head *probe, *head;
+
+	head = &q->task_list;
+	probe = head->next;
+	while (probe != head) {
+		wait_queue_t *curr = list_entry(probe, wait_queue_t, task_list);
+		if (curr->task->state & TASK_EXCLUSIVE)
+			break;
+		probe = probe->next;
+	}
+	while (probe != head) {
+		wait_queue_t *curr = list_entry(probe, wait_queue_t, task_list);
+		if (!(curr->task->state & TASK_EXCLUSIVE)) {
+			printk("check_wq_sanity: mangled wait queue\n");
+#ifdef CONFIG_X86
+			show_stack(0);
+#endif
+			break;
+		}
+		probe = probe->next;
+	}
+}
+#endif	/* WAITQUEUE_DEBUG */
+	
+/*
+ * Wake up some tasks which are on *q.
+ *
+ * All tasks which are !TASK_EXCLUSIVE are woken.
+ * Only one TASK_EXCLUSIVE task is woken.
+ * The !TASK_EXCLUSIVE tasks start at q->task_list.next
+ * The TASK_EXCLUSIVE tasks start at q->task_list.prev
+ *
+ * When waking a TASK_EXCLUSIVE task we search backward,
+ * so we find the most-recently-added task (it will have the
+ * hottest cache)
+ */
+
 static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
 						const int sync)
 {
@@ -714,6 +761,7 @@
 	wq_write_lock_irqsave(&q->lock, flags);
 
 #if WAITQUEUE_DEBUG
+	check_wq_sanity(q);
 	CHECK_MAGIC_WQHEAD(q);
 #endif
 
@@ -722,6 +770,11 @@
         if (!head->next || !head->prev)
                 WQ_BUG();
 #endif
+
+	/*
+	 * if (mode & TASK_EXCLUSIVE) Wake all the !TASK_EXCLUSIVE tasks 
+	 * if !(mode & TASK_EXCLUSIVE) Wake all the tasks
+	 */
 	tmp = head->next;
 	while (tmp != head) {
 		unsigned int state;
@@ -734,40 +787,69 @@
 #endif
 		p = curr->task;
 		state = p->state;
+		if (state & mode & TASK_EXCLUSIVE)
+			break;			/* Finished all !TASK_EXCLUSIVEs */
 		if (state & (mode & ~TASK_EXCLUSIVE)) {
 #if WAITQUEUE_DEBUG
 			curr->__waker = (long)__builtin_return_address(0);
 #endif
-			/*
-			 * If waking up from an interrupt context then
-			 * prefer processes which are affine to this
-			 * CPU.
-			 */
-			if (irq && (state & mode & TASK_EXCLUSIVE)) {
+			if (sync)
+				wake_up_process_synchronous(p);
+			else
+				wake_up_process(p);
+		}
+	}
+
+	/*
+	 * Now wake one TASK_EXCLUSIVE task.
+	 */
+	if (mode & TASK_EXCLUSIVE) {
+		tmp = head->prev;
+		while (tmp != head) {
+			unsigned int state;
+	                wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+			
+			tmp = tmp->prev;
+#if WAITQUEUE_DEBUG
+			CHECK_MAGIC(curr->__magic);
+#endif
+			p = curr->task;
+			state = p->state;
+			if (!(state & TASK_EXCLUSIVE))
+				break;		/* Exhausted all TASK_EXCLUSIVEs */
+
+			if (state & mode) {
+				/*
+				 * If waking up from an interrupt context then
+				 * prefer processes which are affine to this
+				 * CPU.
+				 */
 				if (!best_exclusive)
 					best_exclusive = p;
-				else if ((p->processor == best_cpu) &&
-					(best_exclusive->processor != best_cpu))
+				if (irq) {
+					if (p->processor == best_cpu) {
 						best_exclusive = p;
-			} else {
-				if (sync)
-					wake_up_process_synchronous(p);
-				else
-					wake_up_process(p);
-				if (state & mode & TASK_EXCLUSIVE)
+						break;
+					}
+				} else {
 					break;
+				}
 			}
 		}
-	}
-	if (best_exclusive)
-		best_exclusive->state = TASK_RUNNING;
-	wq_write_unlock_irqrestore(&q->lock, flags);
-
-	if (best_exclusive) {
-		if (sync)
-			wake_up_process_synchronous(best_exclusive);
-		else
-			wake_up_process(best_exclusive);
+
+		if (best_exclusive)
+			best_exclusive->state = TASK_RUNNING;
+
+		wq_write_unlock_irqrestore(&q->lock, flags);
+
+		if (best_exclusive) {
+			if (sync)
+				wake_up_process_synchronous(best_exclusive);
+			else
+				wake_up_process(best_exclusive);
+		}
+	} else {
+		wq_write_unlock_irqrestore(&q->lock, flags);
 	}
 out:
 	return;
--- linux-2.4.0-test10-pre5/kernel/fork.c	Sat Sep  9 16:19:30 2000
+++ linux-akpm/kernel/fork.c	Wed Nov  1 02:08:00 2000
@@ -39,6 +39,14 @@
 	unsigned long flags;
 
 	wq_write_lock_irqsave(&q->lock, flags);
+#if WAITQUEUE_DEBUG
+	if (wait->task.state & TASK_EXCLUSIVE) {
+		printk("add_wait_queue: exclusive task added at head!\n");
+#ifdef CONFIG_X86
+		show_stack(0);
+#endif
+	}
+#endif
 	__add_wait_queue(q, wait);
 	wq_write_unlock_irqrestore(&q->lock, flags);
 }
@@ -48,6 +56,14 @@
 	unsigned long flags;
 
 	wq_write_lock_irqsave(&q->lock, flags);
+#if WAITQUEUE_DEBUG
+	if (!(wait->task.state & TASK_EXCLUSIVE)) {
+		printk("add_wait_queue: non-exclusive task added at tail!\n");
+#ifdef CONFIG_X86
+		show_stack(0);
+#endif
+	}
+#endif
 	__add_wait_queue_tail(q, wait);
 	wq_write_unlock_irqrestore(&q->lock, flags);
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic