[prev in list] [next in list] [prev in thread] [next in thread] 

List:       xen-ppc-devel
Subject:    [XenPPC] [linux-ppc-2.6] [XEN][POWERPC] Turn on SMP.. Finally.
From:       "Xen patchbot-linux-ppc-2.6" <patchbot-linux-ppc-2.6 () lists ! xensource ! com>
Date:       2007-01-21 13:50:53
Message-ID: E1H8d5z-00047U-5W () xenbits ! xensource ! com
[Download RAW message or body]

# HG changeset patch
# User Jimi Xenidis <jimix@watson.ibm.com>
# Node ID a384dbf50d5934ba93eea17eccb7e43cf408dd87
# Parent  bbf2db4ddf5400e908ee6bf92ac798e5cfed82a0
[XEN][POWERPC] Turn on SMP.. Finally.

The following patch uses Xen specific methods to spin up secondary
processors and add them to the Linux devtree (not the flat-devtree).
Specifically:

 - Adds HYPERVISOR_vcpu_op() for probing and spinning.
 - "Hot-Plug" new CPU entries into the devtree
 - Start CPUs int he same place tha OF/prom_init.c would have
 - Wire up SMP IPI to Xen event channels
 - 6 line common code change in LinuxPPC to set the # possible CPUs correctly

Tested on JS21 (4-way) and Maple(2-way) creating 1-1 Dom0 and several
VIO/DomUs up to 32-way.

NOTE: we cannot yet:
 - _add_ a CPU after the normal boot spinup process
 - remove a CPU

Signed-off-by: Jimi Xenidis <jimix@watson.ibm.com>
---
 arch/powerpc/kernel/setup-common.c       |    6 
 arch/powerpc/platforms/xen/Makefile      |    1 
 arch/powerpc/platforms/xen/hcall.c       |   30 ++
 arch/powerpc/platforms/xen/setup.c       |   36 --
 arch/powerpc/platforms/xen/setup.h       |    1 
 arch/powerpc/platforms/xen/smp.c         |  424 +++++++++++++++++++++++++++++++
 include/asm-powerpc/xen/asm/hypercall.h  |    1 
 include/asm-powerpc/xen/asm/hypervisor.h |    2 
 8 files changed, 468 insertions(+), 33 deletions(-)

diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/kernel/setup-common.c
--- a/arch/powerpc/kernel/setup-common.c	Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/kernel/setup-common.c	Sun Jan 21 08:34:45 2007 -0500
@@ -388,6 +388,12 @@ void __init smp_setup_cpu_maps(void)
 		}
 	}
 
+	if (machine_is(xen)) {
+		/* something more inteligent perhaps? */
+		for (cpu = 0; cpu < NR_CPUS; cpu++)
+			cpu_set(cpu, cpu_possible_map);
+	}
+
 #ifdef CONFIG_PPC64
 	/*
 	 * On pSeries LPAR, we need to know how many cpus
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/Makefile
--- a/arch/powerpc/platforms/xen/Makefile	Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/Makefile	Sun Jan 21 08:34:45 2007 -0500
@@ -3,6 +3,7 @@ obj-y	+= hcall.o
 obj-y	+= hcall.o
 obj-y	+= reboot.o
 obj-y	+= setup.o
+obj-y	+= smp.o
 obj-y	+= time.o
 obj-y	+= udbg_xen.o
 obj-y	+= xen_guest.o
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/hcall.c
--- a/arch/powerpc/platforms/xen/hcall.c	Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/hcall.c	Sun Jan 21 08:34:45 2007 -0500
@@ -33,7 +33,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/event_channel.h>
 #include <xen/interface/physdev.h>
-#include <xen/interface/grant_table.h>
+#include <xen/interface/vcpu.h>
 #include <xen/public/privcmd.h>
 #include <asm/hypercall.h>
 #include <asm/page.h>
@@ -599,3 +599,31 @@ int arch_privcmd_hypercall(privcmd_hyper
 	}
 }
 
+int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
+{
+	int argsize;
+	const unsigned long hcall = __HYPERVISOR_vcpu_op;
+	void *desc;
+
+	switch (cmd) {
+	case  VCPUOP_initialise:
+		argsize = sizeof(vcpu_guest_context_t);
+		break;
+	case VCPUOP_up:
+	case VCPUOP_down:
+	case VCPUOP_is_up:
+		return plpar_hcall_norets(XEN_MARK(hcall), cmd, vcpuid, 0);
+
+	case VCPUOP_get_runstate_info:
+		argsize = sizeof (vcpu_runstate_info_t);
+		break;
+	default:
+		printk(KERN_ERR "%s: unknown version cmd %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	desc = xencomm_create_inline(extra_args);
+	(void)argsize;
+	return plpar_hcall_norets(XEN_MARK(hcall), cmd, vcpuid, desc);
+}
+	
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/setup.c
--- a/arch/powerpc/platforms/xen/setup.c	Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/setup.c	Sun Jan 21 08:34:45 2007 -0500
@@ -168,42 +168,10 @@ static void xen_power_save(void)
 	HYPERVISOR_sched_op(SCHEDOP_block, NULL);
 }
 
-#ifdef CONFIG_SMP
-
-int __init smp_xen_probe(void)
-{
-	return 1;
-}
-
-void smp_xen_message_pass(int target, int msg)
-{
-	printk("%s(%d, %d)\n", __func__, target, msg);
-}
-
-void __devinit smp_xen_setup_cpu(int cpu)
-{
-	printk("%s(%d)\n", __func__, cpu);
-}
-
-struct smp_ops_t xen_smp_ops = {
-	.probe		= smp_xen_probe,
-	.message_pass	= smp_xen_message_pass,
-	.kick_cpu	= smp_generic_kick_cpu,
-	.setup_cpu	= smp_xen_setup_cpu,
-	.give_timebase	= smp_generic_give_timebase,
-	.take_timebase	= smp_generic_take_timebase,
-};
-#endif /* CONFIG_SMP */
-
 void __init xen_setup_arch(void)
 {
 	/* init to some ~sane value until calibrate_delay() runs */
 	loops_per_jiffy = 50000000;
-
-	/* Setup SMP callback */
-#ifdef CONFIG_SMP
-	smp_ops = &xen_smp_ops;
-#endif
 
 	/* Lookup PCI hosts */
 	if (is_initial_xendomain())
@@ -211,6 +179,10 @@ void __init xen_setup_arch(void)
 
 #ifdef CONFIG_DUMMY_CONSOLE
 	conswitchp = &dummy_con;
+#endif
+#ifdef CONFIG_SMP
+	/* let them fly */
+	xen_setup_smp();
 #endif
 
 	printk(KERN_INFO "Using Xen idle loop\n");
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/setup.h
--- a/arch/powerpc/platforms/xen/setup.h	Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/setup.h	Sun Jan 21 08:34:45 2007 -0500
@@ -27,3 +27,4 @@ extern void free_foreign_page(struct pag
 extern void free_foreign_page(struct page *page);
 
 extern void __init xen_setup_time(struct machdep_calls *host_md);
+extern void xen_setup_smp(void);
diff -r bbf2db4ddf54 -r a384dbf50d59 include/asm-powerpc/xen/asm/hypercall.h
--- a/include/asm-powerpc/xen/asm/hypercall.h	Tue Dec 19 09:22:37 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypercall.h	Sun Jan 21 08:34:45 2007 -0500
@@ -44,6 +44,7 @@ extern int HYPERVISOR_physdev_op(int cmd
 extern int HYPERVISOR_physdev_op(int cmd, void *op);
 extern int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop,
 		unsigned int count);
+extern int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
 extern int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
 extern int HYPERVISOR_multicall(void *call_list, int nr_calls);
 
diff -r bbf2db4ddf54 -r a384dbf50d59 include/asm-powerpc/xen/asm/hypervisor.h
--- a/include/asm-powerpc/xen/asm/hypervisor.h	Tue Dec 19 09:22:37 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypervisor.h	Sun Jan 21 08:34:45 2007 -0500
@@ -146,6 +146,8 @@ int direct_remap_pfn_range(struct vm_are
 #define DYNIRQ_BASE		(PIRQ_BASE + NR_PIRQS)
 #define NR_DYNIRQS		256
 
+#define NR_IPIS 4		/* PPC_MSG_DEBUGGER_BREAK + 1 */
+
 #if NR_IRQS < (NR_PIRQS + NR_DYNIRQS)
 #error to many Xen IRQs
 #endif
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/smp.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/powerpc/platforms/xen/smp.c	Sun Jan 21 08:34:45 2007 -0500
@@ -0,0 +1,424 @@
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/evtchn.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/hypervisor.h>
+#include "setup.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(KERN_EMERG fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static inline void *xen_of_alloc(ulong size)
+{
+	if (mem_init_done)
+		return kmalloc(size, GFP_KERNEL);
+	return alloc_bootmem(size);
+}
+static inline void xen_of_free(void *ptr)
+{
+	/* if this happens with the boot allocator then we are screwed */
+	BUG_ON(!mem_init_done);
+	kfree(ptr);
+}
+
+static struct property *dup_prop(struct property *op)
+{
+	struct property *np;
+	void *p;
+	ulong sz;
+
+
+	/* allocate everything in one go in case it fails */
+	sz = sizeof (*np); /* prop node */
+	sz += strlen(op->name) + 1; /* prop name */
+	sz += op->length; /* prop value */
+		
+	p = xen_of_alloc(sz);
+	if (!p)
+		return NULL;
+	memset(p, 0, sz);
+
+	/* prop node first */
+	np = p;
+	p += sizeof (*np);
+
+	/* value next becuase we want it aligned */
+	np->value = p;
+	p += op->length;
+
+	/* name */
+	np->name = p;
+
+	/* copy it all */
+	strcpy(np->name, op->name);
+	np->length = op->length;
+	memcpy(np->value, op->value, np->length);
+
+	return np;
+}
+
+static int dup_properties(struct device_node *dst, struct device_node *src)
+{
+	struct property *op;
+	struct property *np;
+	struct property *lp;
+	int rc = 0;
+
+	DBG("%s: duping to new cpu node: %s\n", __func__, dst->full_name);
+
+	np = lp = NULL;
+	for (op = src->properties; op != 0; op = op->next) {
+		lp = np;
+		np = dup_prop(op);
+		if (!np)
+			break;
+
+		prom_add_property(dst, np);
+	}
+
+	if (!np) {
+		DBG("%s: FAILED duping: %s\n", __func__, dst->full_name);
+		/* we could not allocate enuff so free what we have
+		 * allocated */
+		rc = -ENOMEM;
+		for (op = dst->properties; lp && op != lp; op = op->next)
+			xen_of_free(op);
+	}
+
+	return rc;
+}
+
+/* returns added device node so it can be added to procfs in the case
+ * of hotpluging */
+static struct device_node *xen_add_vcpu_node(struct device_node *boot_cpu,
+					     uint cpu)
+{
+	struct device_node *new_cpu;
+	struct property *pp;
+	void *p;
+	int sz;
+	int type_sz;
+	int name_sz;
+
+	DBG("%s: boot cpu: %s\n", __func__, boot_cpu->full_name);
+
+	/* allocate in one shot in case we fail */
+	name_sz = strlen(boot_cpu->name) + 1;
+	type_sz = strlen(boot_cpu->type) + 1;
+
+	sz = sizeof (*new_cpu);	/* the node */
+	sz += strlen(boot_cpu->full_name) + 3; /* full_name */
+	sz += name_sz; /* name */
+	sz += type_sz; /* type */
+
+	p = xen_of_alloc(sz);
+	if (!p)
+		return NULL;
+	memset(p, 0, sz);
+
+	/* the node */
+	new_cpu = p;
+	p += sizeof (*new_cpu);
+	
+	/* name */
+	new_cpu->name = p;
+	strcpy(new_cpu->name, boot_cpu->name);
+	p += name_sz;
+	
+	/* type */
+	new_cpu->type = p;
+	strcpy(new_cpu->type, boot_cpu->type);
+	p += type_sz;
+
+	/* full_name */
+	new_cpu->full_name = p;
+
+	/* assemble new full_name */
+	pp = of_find_property(boot_cpu, "name", NULL);
+	if (!pp)
+		panic("%s: no name prop\n", __func__);
+
+	DBG("%s: name is: %s = %s\n", __func__, pp->name, pp->value);
+	sprintf(new_cpu->full_name, "/cpus/%s@%u", pp->value, cpu);
+
+	if (dup_properties(new_cpu, boot_cpu)) {
+		xen_of_free(new_cpu);
+		return NULL;
+	}
+
+	/* fixup reg property */
+	DBG("%s: updating reg: %d\n", __func__, cpu);
+	pp = of_find_property(new_cpu, "reg", NULL);
+	if (!pp)
+		panic("%s: no reg prop\n", __func__);
+	*(int *)pp->value = cpu;
+
+	if (mem_init_done)
+		OF_MARK_DYNAMIC(new_cpu);
+
+	kref_init(&new_cpu->kref);
+
+	/* insert the node */
+	new_cpu->parent = of_get_parent(boot_cpu);
+	of_attach_node(new_cpu);
+	of_node_put(new_cpu->parent);
+
+	return new_cpu;
+}
+
+static void cpu_initialize_context(unsigned int vcpu, ulong entry)
+{
+	vcpu_guest_context_t ctxt;
+
+	memset(&ctxt.user_regs, 0x55, sizeof(ctxt.user_regs));
+
+	ctxt.user_regs.pc = entry;
+	ctxt.user_regs.msr = 0;
+	ctxt.user_regs.gprs[1] = 0; /* Linux uses its own stack */
+	ctxt.user_regs.gprs[3] = vcpu;
+
+	/* XXX verify this *** */
+	/* There is a buggy kernel that does not zero the "local_paca", so
+	 * we must make sure this register is 0 */
+	ctxt.user_regs.gprs[13] = 0;
+
+	DBG("%s: initializing vcpu: %d\n", __func__, vcpu);
+
+	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt))
+		panic("%s: VCPUOP_initialise failed, vcpu: %d\n",
+		       __func__, vcpu);
+
+}
+
+static int xen_start_vcpu(uint vcpu, ulong entry)
+{
+	DBG("%s: starting vcpu: %d\n", __func__, vcpu);
+
+	cpu_initialize_context(vcpu, entry);
+
+	DBG("%s: Spinning up vcpu: %d\n", __func__, vcpu);
+	return HYPERVISOR_vcpu_op(VCPUOP_up, vcpu, NULL);
+}
+
+extern void __secondary_hold(void);
+extern unsigned long __secondary_hold_spinloop;
+extern unsigned long __secondary_hold_acknowledge;
+
+static void xen_boot_secondary_vcpus(void)
+{
+	int vcpu;
+	int rc;
+	const unsigned long mark = (unsigned long)-1;
+	unsigned long *spinloop = &__secondary_hold_spinloop;
+	unsigned long *acknowledge = &__secondary_hold_acknowledge;
+#ifdef CONFIG_PPC64
+	/* __secondary_hold is actually a descriptor, not the text address */
+	unsigned long secondary_hold = __pa(*(unsigned long *)__secondary_hold);
+#else
+	unsigned long secondary_hold = __pa(__secondary_hold);
+#endif
+	struct device_node *boot_cpu;
+
+	DBG("%s: finding CPU node\n", __func__);
+	boot_cpu = of_find_node_by_type(NULL, "cpu");
+	if (!boot_cpu)
+		panic("%s: Cannot find Booting CPU node\n", __func__);
+
+	/* Set the common spinloop variable, so all of the secondary cpus
+	 * will block when they are awakened from their OF spinloop.
+	 * This must occur for both SMP and non SMP kernels, since OF will
+	 * be trashed when we move the kernel.
+	 */
+	*spinloop = 0;
+
+	DBG("%s: Searching for all vcpu numbers > 0\n", __func__);
+	/* try and start as many as we can */
+	for (vcpu = 1; vcpu < NR_CPUS; vcpu++) {
+		int i;
+
+		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, vcpu, NULL);
+		if (rc < 0)
+			continue;
+
+		DBG("%s: Found vcpu: %d\n", __func__, vcpu);
+		/* Init the acknowledge var which will be reset by
+		 * the secondary cpu when it awakens from its OF
+		 * spinloop.
+		 */
+		*acknowledge = mark;
+
+		DBG("%s: Starting vcpu: %d at pc: 0x%lx\n", __func__,
+		    vcpu, secondary_hold);
+		rc = xen_start_vcpu(vcpu, secondary_hold);
+		if (rc)
+			panic("%s: xen_start_vpcu() failed\n", __func__);
+
+
+		DBG("%s: Waiting for ACK on vcpu: %d\n", __func__, vcpu);
+		for (i = 0; (i < 100000000) && (*acknowledge == mark); i++)
+			mb();
+
+		if (*acknowledge == vcpu)
+			DBG("%s: Recieved for ACK on vcpu: %d\n",
+			    __func__, vcpu);
+
+		xen_add_vcpu_node(boot_cpu, vcpu);
+
+		cpu_set(vcpu, cpu_present_map);
+		set_hard_smp_processor_id(vcpu, vcpu);
+	}
+	of_node_put(boot_cpu);
+	DBG("%s: end...\n", __func__);
+}
+
+static int __init smp_xen_probe(void)
+{
+	return cpus_weight(cpu_present_map);
+}
+
+static irqreturn_t xen_ppc_msg_reschedule(int irq, void *dev_id,
+					  struct pt_regs *regs)
+{
+	smp_message_recv(PPC_MSG_RESCHEDULE, regs);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t xen_ppc_msg_call_function(int irq, void *dev_id,
+					     struct pt_regs *regs)
+{
+	smp_message_recv(PPC_MSG_CALL_FUNCTION, regs);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t xen_ppc_msg_debugger_break(int irq, void *dev_id,
+					  struct pt_regs *regs)
+{
+	smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs);
+	return IRQ_HANDLED;
+}
+
+struct message {
+	irqreturn_t (*f)(int, void *, struct pt_regs *);
+	int num;
+	char *name;
+};
+static struct message ipi_msgs[] = {
+	{
+		.num = PPC_MSG_RESCHEDULE,
+		.f = xen_ppc_msg_reschedule,
+		.name = "IPI-resched"
+	},
+	{
+		.num = PPC_MSG_CALL_FUNCTION,
+		.f = xen_ppc_msg_call_function,
+		.name = "IPI-function"
+		},
+	{
+		.num = PPC_MSG_DEBUGGER_BREAK,
+		.f = xen_ppc_msg_debugger_break,
+		.name = "IPI-debug"
+	}
+};
+
+DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+
+static void __devinit smp_xen_setup_cpu(int cpu)
+{
+	int irq;
+	int i;
+	const int nr_ipis = ARRAY_SIZE(__get_cpu_var(ipi_to_irq));
+
+	/* big scary include web could mess with our values, so we
+	 * make sure they are sane */
+	BUG_ON(ARRAY_SIZE(ipi_msgs) > nr_ipis);
+
+	for (i = 0; i < ARRAY_SIZE(ipi_msgs); i++) {
+		BUG_ON(ipi_msgs[i].num >= nr_ipis);
+
+		irq = bind_ipi_to_irqhandler(ipi_msgs[i].num,
+					     cpu,
+					     ipi_msgs[i].f,
+					     SA_INTERRUPT,
+					     ipi_msgs[i].name,
+					     NULL);
+		BUG_ON(irq < 0);
+		per_cpu(ipi_to_irq, cpu)[ipi_msgs[i].num] = irq;
+		DBG("%s: cpu: %d vector :%d irq: %d\n",
+		       __func__, cpu, ipi_msgs[i].num, irq);
+	}
+}
+
+static inline void send_IPI_one(unsigned int cpu, int vector)
+{
+	int irq;
+
+	irq = per_cpu(ipi_to_irq, cpu)[vector];
+	BUG_ON(irq < 0);
+
+	DBG("%s: cpu: %d vector :%d irq: %d!\n",
+	       __func__, cpu, vector, irq);
+	DBG("%s: per_cpu[%p]: %d %d %d %d\n",
+	       __func__, per_cpu(ipi_to_irq, cpu),
+	       per_cpu(ipi_to_irq, cpu)[0],
+	       per_cpu(ipi_to_irq, cpu)[1],
+	       per_cpu(ipi_to_irq, cpu)[2],
+	       per_cpu(ipi_to_irq, cpu)[3]);
+
+	notify_remote_via_irq(irq);
+}
+
+static void smp_xen_message_pass(int target, int msg)
+{
+	int cpu;
+
+	switch (msg) {
+	case PPC_MSG_RESCHEDULE:
+	case PPC_MSG_CALL_FUNCTION:
+	case PPC_MSG_DEBUGGER_BREAK:
+		break;
+	default:
+		panic("SMP %d: smp_message_pass: unknown msg %d\n",
+		       smp_processor_id(), msg);
+		return;
+	}
+	switch (target) {
+	case MSG_ALL:
+	case MSG_ALL_BUT_SELF:
+		for_each_online_cpu(cpu) {
+			if (target == MSG_ALL_BUT_SELF &&
+			    cpu == smp_processor_id())
+				continue;
+			send_IPI_one(cpu, msg);
+		}
+		break;
+	default:
+		send_IPI_one(target, msg);
+		break;
+	}
+}
+
+static struct smp_ops_t xen_smp_ops = {
+	.probe		= smp_xen_probe,
+	.message_pass	= smp_xen_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+	.setup_cpu	= smp_xen_setup_cpu,
+};
+
+void xen_setup_smp(void)
+{
+	smp_ops = &xen_smp_ops;
+
+	xen_boot_secondary_vcpus();
+	smp_release_cpus();
+}

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic