[prev in list] [next in list] [prev in thread] [next in thread] 

List:       xen-ppc-devel
Subject:    [XenPPC] [PATCH] xm save / restore
From:       poff <poff () watson ! ibm ! com>
Date:       2006-12-13 21:39:21
Message-ID: 200612132139.kBDLdLG929960 () rios2 ! watson ! ibm ! com
[Download RAW message or body]

Xen and Linux patches for 'nonlive' save & restore.
Also improved code for searching page_array[] while saving htab, xc_linux_save.c

A modified version of htab.h is placed in new directory: tools/libxc/xen/asm
Probably need better resolution.

htab is mapped via 'decorating' the pfn, xen/arch/powerpc/mm.c. However, 
no range / validation checking is done at this time.

...................................................


Xen diffs:

diff -r 7669fca80bfc config/powerpc64.mk
--- a/config/powerpc64.mk	Mon Dec 04 11:46:53 2006 -0500
+++ b/config/powerpc64.mk	Wed Dec 13 15:39:32 2006 -0500
@@ -3,3 +3,4 @@ CONFIG_POWERPC_$(XEN_OS) := y
 
 CFLAGS += -DELFSIZE=64
 LIBDIR := lib
+CONFIG_XCUTILS :=y
diff -r 7669fca80bfc tools/libxc/powerpc64/Makefile
--- a/tools/libxc/powerpc64/Makefile	Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/powerpc64/Makefile	Wed Dec 13 15:39:32 2006 -0500
@@ -2,5 +2,7 @@ GUEST_SRCS-y += powerpc64/xc_linux_build
 GUEST_SRCS-y += powerpc64/xc_linux_build.c
 GUEST_SRCS-y += powerpc64/xc_prose_build.c
 GUEST_SRCS-y += powerpc64/utils.c
+GUEST_SRCS-y += powerpc64/xc_linux_save.c
+GUEST_SRCS-y += powerpc64/xc_linux_restore.c
 
 CTRL_SRCS-y += powerpc64/xc_memory.c
diff -r 7669fca80bfc tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c	Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/xc_private.c	Wed Dec 13 15:39:32 2006 -0500
@@ -306,6 +306,23 @@ int xc_get_pfn_list(int xc_handle,
 
     return (ret < 0) ? -1 : domctl.u.getmemlist.num_pfns;
 }
+
+int xc_get_shadow_list( int xc_handle,
+                       uint32_t domid,
+                       uint64_t *htab_raddr)
+{
+    DECLARE_DOMCTL;
+    int ret;
+    
+    domctl.cmd = XEN_DOMCTL_getshadowlist;
+    domctl.domain = (domid_t)domid;
+
+    ret = do_domctl(xc_handle, &domctl);
+    *htab_raddr = domctl.u.getshadowlist.htab_map;
+
+    return (ret < 0) ? -1 : domctl.u.getshadowlist.htab_num_ptes;
+}
+
 #endif
 
 long xc_get_tot_pages(int xc_handle, uint32_t domid)
diff -r 7669fca80bfc tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/xenctrl.h	Wed Dec 13 15:39:32 2006 -0500
@@ -518,6 +518,8 @@ int xc_get_pfn_list(int xc_handle, uint3
 int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf,
                     unsigned long max_pfns);
 
+int xc_get_shadow_list(int xc_handle, uint32_t domid, uint64_t *mfn_htab_map);
+
 unsigned long xc_ia64_fpsr_default(void);
 
 int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid,
diff -r 7669fca80bfc tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py	Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py	Wed Dec 13 15:39:32 2006 -0500
@@ -461,6 +461,7 @@ class Common_XendDomainInfo:
         if self.state in (DOM_STATE_HALTED, DOM_STATE_SUSPENDED):
             try:
                 self._constructDomain()
+                self._allocMem2()
                 self._storeVmDetails()
                 self._createDevices()
                 self._createChannels()
@@ -1237,7 +1238,67 @@ class Common_XendDomainInfo:
         # Set maximum number of vcpus in domain
         xc.domain_max_vcpus(self.domid, int(self.info['vcpus']))
 
-
+    # Use the saved architecture- and image-specific calculations
+    # the various headrooms necessary, given the raw configured
+    # values. maxmem, memory, and shadow are all in KiB.
+                
+    def _allocMem2(self):
+        log.debug("allocMem2");
+
+        maxmem = self.info['maxmem'] * 1024
+        memory = self.info['memory'] * 1024
+        shadow = self.info['shadow_memory'] * 1024
+
+        # Round shadow up to a multiple of a MiB, as shadow_mem_control
+        # takes MiB and we must not round down and end up under-providing.
+        shadow = ((shadow + 1023) / 1024) * 1024
+
+        # set memory limit
+        xc.domain_setmaxmem(self.domid, maxmem)
+
+        # Make sure there's enough RAM available for the domain
+        balloon.free(memory + shadow)
+
+        # Set up the shadow memory, i.e. the PowerPC hash table
+        shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
+        self.info['shadow_memory'] = shadow_cur
+
+        rma_log = 26 ### self.info['powerpc_rma_log']
+        if rma_log == 0:
+            # use smallest RMA size available
+            rma_log = self.getRealModeLogs()[0]
+
+        if rma_log not in self.getRealModeLogs():
+            raise ValueError("rma_log(%d) must be one of" % rma_log,
+                             self.getRealModeLogs())
+            
+        # store info for FlatDeviceTree            
+        ### self.info['powerpc_rma_log'] = rma_log
+            
+        rma_kb = (1 << rma_log) / 1024
+        if memory < rma_kb:
+            raise ValueError("Domain memory must be at least %d KB" % rma_kb)
+
+        if memory % (16 << 10):
+            raise ValueError("Domain memory %dKB must be a multiple of 16MB"
+                             % memory)
+
+        # allocate the RMA
+        log.debug("alloc_real_mode_area(%d, %d)", self.domid, rma_log)
+        xc.alloc_real_mode_area(self.domid, rma_log)
+
+        # now allocate the remaining memory as large-order allocations
+        memory -= rma_kb
+        extent_log = 24 # 16 MB
+        page_log = 12 # 4 KB
+        extent_order = extent_log - page_log
+        log.debug("increase_reservation(%d, 0x%x, %d)", self.domid,
+                  memory, extent_order)
+        xc.domain_memory_increase_reservation(self.domid,
+                                              memory,
+                                              extent_order)
+                
+ 
     def _introduceDomain(self):
         assert self.domid is not None
         assert self.store_mfn is not None
diff -r 7669fca80bfc xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c	Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/domain.c	Wed Dec 13 15:39:32 2006 -0500
@@ -152,7 +152,32 @@ void vcpu_destroy(struct vcpu *v)
 
 int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c)
 { 
+    int i;
+
     memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
+
+    for ( i = 0; i < NUM_SLB_ENTRIES; i++) {
+       memcpy(&v->arch.slb_entries[i], &c->slb_entries[i], sizeof(struct slb_entry));
+    }
+
+    for ( i = 0; i< 4; i++) v->arch.sprg[i] = c->sprg[i];
+
+    v->arch.timebase = c->timebase;
+    v->arch.dar = c->dar;
+    v->arch.dsisr = c->dsisr;
+
+    memcpy( &v->arch.cpu, &c->cpu, sizeof(struct cpu_vcpu));
+    v->arch.dec = c->dec;
+
+#ifdef HAS_FLOAT
+    memcpy( v->arch.fprs, c->fprs, sizeof(double)*NUM_FPRS);
+#endif /* HAS_FLOAT */
+
+#ifdef HAS_VMX
+    memcpy( &v->arch.vrs, &c->vrs, sizeof(vector128)*32);
+    memcpy( &v->arch.vscr, &c->vscr, sizeof(vector128));
+    v->arch.vrsave = c->vrsave;
+#endif /* HAS_VMX */
 
     printk("Domain[%d].%d: initializing\n",
            v->domain->domain_id, v->vcpu_id);
diff -r 7669fca80bfc xen/arch/powerpc/domctl.c
--- a/xen/arch/powerpc/domctl.c	Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/domctl.c	Wed Dec 13 15:39:32 2006 -0500
@@ -29,10 +29,37 @@
 #include <public/sysctl.h>
 #include <asm/processor.h>
 
+#define        DECOR   0x80000000      // indicates htab address
+
+
 void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *);
 void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c)
 { 
+    int i;
+
     memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
+    for (i = 0; i < NUM_SLB_ENTRIES; i++) {
+       memcpy(&c->slb_entries[i],&v->arch.slb_entries[i],sizeof(struct slb_entry));
+    }
+
+    for (i = 0; i < 4; i++) c->sprg[i] = v->arch.sprg[i] ;
+    c->timebase = v->arch.timebase;
+    c->dar = v->arch.dar;
+    c->dsisr = v->arch.dsisr;
+    memcpy(&c->cpu,&v->arch.cpu,sizeof(struct cpu_vcpu));
+    c->dec = v->arch.dec;
+
+#ifdef HAS_FLOAT
+    memcpy(c->fprs,v->arch.fprs,sizeof(double)*NUM_FPRS);
+#endif         /*  HAS_FLOAT */
+
+#ifdef HAS_VMX
+    memcpy(c->vrs, v->arch.vrs, sizeof(vector128)*32);
+    memcpy(&c->vscr, &v->arch.vscr, sizeof(vector128));
+    c->vrsave = v->arch.vrsave;
+#endif /* HAS_VMX */
+
+
     /* XXX fill in rest of vcpu_guest_context_t */
 }
 
@@ -108,6 +135,27 @@ long arch_do_domctl(struct xen_domctl *d
         }
     }
     break;
+    case XEN_DOMCTL_getshadowlist:
+    {
+       struct domain *d = find_domain_by_id(domctl->domain);
+       uint num_ptes;
+
+       ret = -EINVAL;
+       if ( d != NULL)
+       {
+          ret = 0;
+          
+          domctl->u.getshadowlist.htab_map = (uint64_t)(d->arch.htab.map);
+
+          num_ptes = 1UL << d->arch.htab.log_num_ptes;
+          domctl->u.getshadowlist.htab_num_ptes = num_ptes;
+       
+          copy_to_guest(u_domctl, domctl, 1);
+          put_domain(d);
+       }
+    }
+    break;
+
 
     default:
         ret = -ENOSYS;
diff -r 7669fca80bfc xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c	Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/mm.c	Wed Dec 13 15:39:32 2006 -0500
@@ -37,6 +37,8 @@
 #define MEM_LOG(_f, _a...) ((void)0)
 #endif
 
+#define        DECOR 0x80000000UL
+
 /* Frame table and its size in pages. */
 struct page_info *frame_table;
 unsigned long max_page;
@@ -408,6 +410,11 @@ ulong pfn2mfn(struct domain *d, ulong pf
     ulong foreign_map_pfn = 1UL << cpu_foreign_map_order();
 
     /* quick tests first */
+    if (pfn & DECOR)
+    {
+        mfn = pfn & ~DECOR;		//*** TBD Check for valid htab range?
+    }  
+    else
     if (pfn & foreign_map_pfn) {
         t = PFN_TYPE_FOREIGN;
         mfn = foreign_to_mfn(d, pfn);
diff -r 7669fca80bfc xen/include/asm-powerpc/domain.h
--- a/xen/include/asm-powerpc/domain.h	Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/asm-powerpc/domain.h	Wed Dec 13 15:39:32 2006 -0500
@@ -51,10 +51,6 @@ struct arch_domain {
     uint large_page_order[4];
 } __cacheline_aligned;
 
-struct slb_entry {
-    ulong slb_vsid;
-    ulong slb_esid;
-};
 #define SLB_ESID_VALID (1ULL << (63 - 36))
 #define SLB_ESID_CLASS (1ULL << (63 - 56))
 #define SLB_ESID_MASK  (~0ULL << (63 - 35))
@@ -63,9 +59,9 @@ struct slb_entry {
 
 struct xencomm;
 
-typedef struct {
-    u32 u[4];
-} __attribute__((aligned(16))) vector128;
+#ifdef HAS_VMX
+typedef  _vector128 vector128;
+#endif /* HAS_VMX */
 
 struct arch_vcpu {
     cpu_user_regs_t ctxt; /* User-level CPU registers */
diff -r 7669fca80bfc xen/include/asm-powerpc/htab.h
--- a/xen/include/asm-powerpc/htab.h	Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/asm-powerpc/htab.h	Wed Dec 13 15:39:32 2006 -0500
@@ -69,68 +69,68 @@
 
 union pte {
     struct pte_words {
-        ulong vsid;
-        ulong rpn;
+        uint64_t vsid;
+        uint64_t rpn;
     } words;
     struct pte_bits {
         /* *INDENT-OFF* */
         /* high word */
-        ulong avpn:     57; /* [0-56] abbreviated virtual page number */
-        ulong lock:     1;  /* [57] hypervisor lock bit */
-        ulong res:      1;  /* [58] reserved for hypervisor */
-        ulong bolted:   1;  /* [59] XXX software-reserved; temp hack */
-        ulong sw:       1;  /* [60] reserved for software */
-        ulong l:        1;  /* [61] Large Page */
-        ulong h:        1;  /* [62] hash function id */
-        ulong v:        1;  /* [63] valid */
+        uint64_t avpn:     57; /* [0-56] abbreviated virtual page number */
+        uint64_t lock:     1;  /* [57] hypervisor lock bit */
+        uint64_t res:      1;  /* [58] reserved for hypervisor */
+        uint64_t bolted:   1;  /* [59] XXX software-reserved; temp hack */
+        uint64_t sw:       1;  /* [60] reserved for software */
+        uint64_t l:        1;  /* [61] Large Page */
+        uint64_t h:        1;  /* [62] hash function id */
+        uint64_t v:        1;  /* [63] valid */
 
         /* low word */
-        ulong pp0:  1;  /* [0] page protection bit 0 (current PowerPC
+        uint64_t pp0:  1;  /* [0] page protection bit 0 (current PowerPC
                          *     specification says it can always be 0) */
-        ulong ts:   1;  /* [1] tag select */
-        ulong rpn:  50; /* [2-51] real page number */
-        ulong res2: 2;  /* [52,53] reserved */
-        ulong ac:   1;  /* [54] address compare */
-        ulong r:    1;  /* [55] referenced */
-        ulong c:    1;  /* [56] changed */
-        ulong w:    1;  /* [57] write through */
-        ulong i:    1;  /* [58] cache inhibited */
-        ulong m:    1;  /* [59] memory coherent */
-        ulong g:    1;  /* [60] guarded */
-        ulong n:    1;  /* [61] no-execute */
-        ulong pp1:  2;  /* [62,63] page protection bits 1:2 */
+        uint64_t ts:   1;  /* [1] tag select */
+        uint64_t rpn:  50; /* [2-51] real page number */
+        uint64_t res2: 2;  /* [52,53] reserved */
+        uint64_t ac:   1;  /* [54] address compare */
+        uint64_t r:    1;  /* [55] referenced */
+        uint64_t c:    1;  /* [56] changed */
+        uint64_t w:    1;  /* [57] write through */
+        uint64_t i:    1;  /* [58] cache inhibited */
+        uint64_t m:    1;  /* [59] memory coherent */
+        uint64_t g:    1;  /* [60] guarded */
+        uint64_t n:    1;  /* [61] no-execute */
+        uint64_t pp1:  2;  /* [62,63] page protection bits 1:2 */
         /* *INDENT-ON* */
     } bits;
 };
 
 union ptel {
-    ulong word;
+    uint64_t word;
     struct ptel_bits {
         /* *INDENT-OFF* */
 
-        ulong pp0:  1;  /* page protection bit 0 (current PPC
+        uint64_t pp0:  1;  /* page protection bit 0 (current PPC
                          *   AS says it can always be 0) */
-        ulong ts:   1;  /* tag select */
-        ulong rpn:  50; /* real page number */
-        ulong res2: 2;  /* reserved */
-        ulong ac:   1;  /* address compare */
-        ulong r:    1;  /* referenced */
-        ulong c:    1;  /* changed */
-        ulong w:    1;  /* write through */
-        ulong i:    1;  /* cache inhibited */
-        ulong m:    1;  /* memory coherent */
-        ulong g:    1;  /* guarded */
-        ulong n:    1;  /* no-execute */
-        ulong pp1:  2;  /* page protection bits 1:2 */
+        uint64_t ts:   1;  /* tag select */
+        uint64_t rpn:  50; /* real page number */
+        uint64_t res2: 2;  /* reserved */
+        uint64_t ac:   1;  /* address compare */
+        uint64_t r:    1;  /* referenced */
+        uint64_t c:    1;  /* changed */
+        uint64_t w:    1;  /* write through */
+        uint64_t i:    1;  /* cache inhibited */
+        uint64_t m:    1;  /* memory coherent */
+        uint64_t g:    1;  /* guarded */
+        uint64_t n:    1;  /* no-execute */
+        uint64_t pp1:  2;  /* page protection bits 1:2 */
         /* *INDENT-ON* */
     } bits;
 };
 
 struct domain_htab {
-    ulong sdr1;
+    uint64_t sdr1;
     uint log_num_ptes;  /* log number of PTEs in HTAB. */
     uint order;         /* order for freeing. */
     union pte *map;     /* access the htab like an array */
-    ulong *shadow;      /* idx -> logical translation array */
+    uint64_t *shadow;      /* idx -> logical translation array */
 };
 #endif
diff -r 7669fca80bfc xen/include/public/arch-powerpc.h
--- a/xen/include/public/arch-powerpc.h	Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/public/arch-powerpc.h	Wed Dec 13 15:39:32 2006 -0500
@@ -98,11 +98,66 @@ typedef struct cpu_user_regs cpu_user_re
 
 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */
 
+#define        NUM_SLB_ENTRIES 64
+struct slb_entry {
+       uint64_t slb_vsid;
+       uint64_t slb_esid;
+};
+typedef struct slb_entry slb_entry_t;
+
+#ifndef HAS_VMX
+#define		 HAS_VMX 1
+#endif
+
+#ifndef HAS_FLOAT
+#define		 HAS_FLOAT 1
+#endif
+
+#ifdef HAS_VMX
+typedef struct {
+       uint32_t u[4];
+} __attribute__((aligned(16))) _vector128;
+#endif /* HAS_VMX */
+
+
 /* ONLY used to communicate with dom0! See also struct exec_domain. */
 struct vcpu_guest_context {
     cpu_user_regs_t user_regs;         /* User-level CPU registers     */
+    slb_entry_t        slb_entries[NUM_SLB_ENTRIES];   /* Segment Lookaside Buffer */
+
+    /* Special-Purpose Registers */
+    uint64_t sprg[4];
+    uint64_t timebase;
+    uint64_t dar;
+    uint64_t dsisr;
+
+    struct cpu_vcpu_tag {
+       uint64_t hid4;
+    } cpu; /* CPU-specific bits */
+
+    uint32_t dec;
+
+    /* XXX etc */
+#ifdef HAS_FLOAT
+#define  NUM_FPRS 32
+    double fprs[NUM_FPRS];
+#endif
+#ifdef HAS_VMX
+    _vector128 vrs[32];
+    _vector128 vscr;
+    uint32_t vrsave;
+#endif
+
+#if 0
+    struct xencomm *xencomm;
+
+    /* I/O-port access bitmap. */
+    u8 *iobmp;        /* Guest kernel virtual address of the bitmap. */
+    int iobmp_limit;  /* Number of ports represented in the bitmap.  */
+    int iopl;         /* Current IOPL for this VCPU. */
+#endif
+
     uint64_t sdr1;                     /* Pagetable base               */
-    /* XXX etc */
 };
 typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
diff -r 7669fca80bfc xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/public/domctl.h	Wed Dec 13 15:39:32 2006 -0500
@@ -392,6 +392,18 @@ typedef struct xen_domctl_real_mode_area
 typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
 
+#define XEN_DOMCTL_getshadowlist       29
+struct xen_domctl_getshadowlist {
+       /* OUT variables */
+       /* Start of htab array */
+       uint64_t htab_map;
+       /* Number of ptes within htab */
+       uint32_t htab_num_ptes;
+};
+
+typedef struct xen_domctl_getshadowlist        xen_domctl_getshadowlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t);
+ 
 struct xen_domctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
@@ -418,6 +430,7 @@ struct xen_domctl {
         struct xen_domctl_arch_setup        arch_setup;
         struct xen_domctl_settimeoffset     settimeoffset;
         struct xen_domctl_real_mode_area    real_mode_area;
+        struct xen_domctl_getshadowlist     getshadowlist;
         uint8_t                             pad[128];
     } u;
 };
diff -r 7669fca80bfc tools/libxc/powerpc64/xc_linux_restore.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/powerpc64/xc_linux_restore.c	Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,312 @@
+/******************************************************************************
+ * xc_linux_restore.c
+ *
+ * Restore the state of a Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Rewritten for PPC:  Dan Poff <poff@us.ibm.com>, Yi Ge <geyi@cn.ibm.com>
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <xen/asm/htab.h>
+
+#include "xg_private.h"
+
+#define DECOR 0x80000000       		 // indicates htab address
+#define LOG_PTE_SIZE            4
+
+#define INVALID_MFN       (~0ULL)
+
+#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+static ssize_t
+read_exact(int fd, void *buf, size_t count)
+{
+    int r = 0, s;
+    unsigned char *b = buf;
+
+    while (r < count) {
+        s = read(fd, &b[r], count - r);
+        if ((s == -1) && (errno == EINTR))
+            continue;
+        if (s <= 0) {
+            break;
+        }
+        r += s;
+    }
+
+    return (r == count) ? 1 : 0;
+}
+
+static int
+read_page(int xc_handle, int io_fd, uint32_t dom, xen_pfn_t mfn)
+{
+    void *mem;
+
+    mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                               PROT_READ|PROT_WRITE, mfn);
+    if (mem == NULL) {
+            ERROR("cannot map page");
+	    return -1;
+    }
+    if (!read_exact(io_fd, mem, PAGE_SIZE)) {
+            ERROR("Error when reading from state file (5)");
+            return -1;
+    }
+
+    munmap(mem, PAGE_SIZE);
+    return 0;
+}
+
+int
+xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
+                 unsigned long nr_pfns, unsigned int store_evtchn,
+                 unsigned long *store_mfn, unsigned int console_evtchn,
+                 unsigned long *console_mfn)
+{
+    DECLARE_DOMCTL;
+    int rc = 1, i;
+    xen_pfn_t pfn;
+    xen_pfn_t mfn = INVALID_MFN;
+    unsigned long ver;
+    
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+    unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
+    shared_info_t *shared_info = (shared_info_t *)shared_info_page;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    xen_pfn_t shared_info_pfn, *page_array = NULL;
+
+    /* A temporary mapping of the guest's start_info page. */
+    start_info_t *start_info;
+
+    max_pfn = nr_pfns;
+
+    DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn);
+
+    if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
+	ERROR("Error when reading version");
+	goto out;
+    }
+    if (ver != 1) {
+	ERROR("version of save doesn't match");
+	goto out;
+    }
+
+    if (mlock(&ctxt, sizeof(ctxt))) {
+        /* needed for build domctl, but might as well do early */
+        ERROR("Unable to mlock ctxt");
+        return 1;
+    }
+
+    /* Get the domain's shared-info frame. */
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = (domid_t)dom;
+    if (xc_domctl(xc_handle, &domctl) < 0) {
+        ERROR("Could not get information on new domain");
+        goto out;
+    }
+    shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
+
+    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+        errno = ENOMEM;
+        goto out;
+    }
+
+    /* Get pages.  */
+    page_array = malloc(max_pfn * sizeof(xen_pfn_t));
+    if (page_array == NULL ) {
+        ERROR("Could not allocate memory");
+        goto out;
+    }
+
+    if (xc_get_pfn_list(xc_handle, dom, 
+                            page_array, max_pfn) != max_pfn) {
+        ERROR("Could not get the page frame list");
+        goto out;
+    }
+
+    DPRINTF("Reloading memory pages:   0%%\n");
+
+    while (1) {
+        if (!read_exact(io_fd, &pfn, sizeof(xen_pfn_t))) {
+            ERROR("Error when reading batch size");
+            goto out;
+        }
+
+	if (pfn == INVALID_MFN)
+	    break;
+
+	if (pfn > max_pfn){
+	    DPRINTF("pfn: 0x%016llx\n", pfn);
+	    continue;
+	}
+
+	mfn = page_array[pfn];
+
+	if (read_page(xc_handle, io_fd, dom, mfn) < 0)
+	    goto out;
+    }
+
+    DPRINTF("Received all pages\n");
+
+    /* Read and uncanonicalise htab, page-at-a-time */
+    {
+	int N, total_sent = 0;
+	int num_ptes, htab_ptes, htab_pages;
+        unsigned long htab_mfn;
+        uint64_t htab_raddr;
+        xen_pfn_t htab_rpn;
+        union pte *ppte;
+        char *mem, *temp, *copy;
+
+        htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr);
+        if (htab_ptes == -1){ 
+            ERROR("Could not get the shadow list");
+            goto out;
+        }
+	
+	if (!read_exact(io_fd, &num_ptes, sizeof(num_ptes))) {
+	    ERROR("Error when reading num_ptes");
+	    goto out;
+	}
+
+	if (num_ptes != htab_ptes){
+	    ERROR("num_ptes != htab_ptes:  %d  %d   htab_raddr: 0x%016llx",
+		num_ptes, htab_ptes, htab_raddr);
+	    goto out;
+	}
+
+        temp = malloc(PAGE_SIZE * 2);
+        if (temp == NULL){
+            ERROR("Could not allocate temp memory");
+            goto out;
+        }
+
+        copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1)));
+
+        htab_mfn = htab_raddr >> PAGE_SHIFT;
+        htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE));
+
+        /* Replace guest pfn with rfn, then copy to htab, by page */
+        for (N = 0; N < htab_pages;  N++, htab_mfn++) {
+            mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, htab_mfn | DECOR);
+            if (mem == NULL){
+                ERROR("Cannot map htab_mfn 0x%08lx: %s\n", 
+                    htab_mfn, strerror (errno));
+                goto out;
+            }
+
+	    if (!read_exact(io_fd, copy, PAGE_SIZE)) {
+		ERROR("Error when reading htab page");
+		goto out;
+	    }
+
+            ppte = (union pte *)copy;
+            for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){
+                if (ppte->bits.v == 1){		       	// valid htab entry
+                    htab_rpn = ppte->bits.rpn;		// guest's pfn
+
+                    if (htab_rpn >= max_pfn){
+                        ERROR("htab_rpn: 0x%016llx not found in page_array[]",
+                            htab_rpn);
+                        goto out;
+                    }
+
+		    ppte->bits.rpn = page_array[htab_rpn]; // guest's rpn
+
+                } else {      	      			// invalid htab entry
+                    ppte->words.rpn = 0;
+                }
+            }
+
+            memcpy(mem, copy, PAGE_SIZE);
+            munmap(mem, PAGE_SIZE);
+            total_sent++;
+        }
+
+        free(temp);
+    }
+
+    /* Read vcpu context and set */
+    if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+        ERROR("Error when reading ctxt");
+        goto out;
+    }
+
+    domctl.cmd = XEN_DOMCTL_setvcpucontext;
+    domctl.domain = (domid_t)dom;
+    domctl.u.vcpucontext.vcpu   = 0;
+    set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
+
+    if (xc_domctl(xc_handle, &domctl) != 0) {
+		     ERROR("Couldn't set vcpu context");
+		     goto out;
+    }
+    
+    /* Read shared info.  */
+    shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, shared_info_frame);
+    if (shared_info == NULL) {
+            ERROR("cannot map page");
+	    goto out;
+    }
+    if (!read_exact(io_fd, shared_info, PAGE_SIZE)) {
+            ERROR("Error when reading shared_info page");
+	    goto out;
+    }
+
+    /* clear any pending events */
+    memset(&(shared_info->evtchn_pending[0]), 0,
+           sizeof (shared_info->evtchn_pending));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+        shared_info->vcpu_info[i].evtchn_pending_sel = 0;
+
+    mfn = shared_info_frame - 3 ;
+
+    munmap (shared_info, PAGE_SIZE);
+
+    for (i = 0; i < max_pfn; i++)	 	// find pfn of shared_info_frame 
+	if (page_array[i] == shared_info_frame) break;
+    if ( i >= max_pfn) {
+	ERROR("Cannot find pfn of shared_info_frame");
+	goto out;
+    }
+    shared_info_pfn = (unsigned long) i;
+
+    /* Setup start_info page */
+    start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                      PROT_READ | PROT_WRITE, mfn);
+    start_info->nr_pages = max_pfn;
+    start_info->shared_info =  shared_info_pfn << PAGE_SHIFT;
+    start_info->flags = 0;
+    *store_mfn = page_array[start_info->store_mfn];
+    start_info->store_evtchn = store_evtchn;
+    *console_mfn = page_array[start_info->console.domU.mfn];
+    start_info->console.domU.evtchn = console_evtchn;
+    munmap(start_info, PAGE_SIZE);
+
+    DPRINTF("Domain ready to be built.\n");
+
+    rc = 0;
+
+ out:
+    if ((rc != 0) && (dom != 0))
+        xc_domain_destroy(xc_handle, dom);
+
+    free (page_array);
+    safe_munlock(&ctxt, sizeof(ctxt));
+
+    DPRINTF("Restore exit with rc=%d\n", rc);
+
+    return rc;
+}
diff -r 7669fca80bfc tools/libxc/powerpc64/xc_linux_save.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/powerpc64/xc_linux_save.c	Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,417 @@
+/******************************************************************************
+ * xc_linux_save.c
+ *
+ * Save the state of a running Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Rewritten for PPC:  Dan Poff <poff@us.ibm.com>, Yi Ge <geyi@cn.ibm.com>
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <xen/asm/htab.h>
+
+#include "xg_private.h"
+
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_linux_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS    (4 - 1)		 /* limit us to 4 times round loop  */
+#define DEF_MAX_FACTOR   3		 		 /* never send more than 3x nr_pfns */
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, and to skip.
+*/
+
+#define DECOR 0x80000000		 		 // indicates htab address
+#define LOG_PTE_SIZE            4
+
+#define INVALID_MFN       (~0ULL)
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+static int
+suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+                  int dom, xc_dominfo_t *info)
+{
+    int i = 0;
+
+    if (!(*suspend)(dom)) {
+        ERROR("Suspend request failed");
+        return -1;
+    }
+
+retry:
+    if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) {
+        ERROR("Could not get domain info");
+        return -1;
+    }
+
+    if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend)
+        return 0; // success
+
+    if (info->paused) {
+        // try unpausing domain, wait, and retest
+        xc_domain_unpause(xc_handle, dom);
+
+        ERROR("Domain was paused. Wait and re-test.");
+        usleep(10000);  // 10ms
+
+        goto retry;
+    }
+
+
+    if(++i < 100) {
+        ERROR("Retry suspend domain.");
+        usleep(10000);  // 10ms
+        goto retry;
+    }
+
+    ERROR("Unable to suspend domain.");
+
+    return -1;
+}
+
+static inline ssize_t
+write_exact(int fd, void *buf, size_t count)
+{
+    if (write(fd, buf, count) != count)
+        return 0;
+    return 1;
+}
+
+int
+xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+              uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+{
+    xc_dominfo_t info;
+
+    int rc = 1;
+    int debug = 0;
+
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    xen_pfn_t *page_array = NULL;
+
+    /* Live mapping of shared info structure */
+    shared_info_t *live_shinfo = NULL;
+
+    if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+        ERROR("Could not get domain info");
+        return 1;
+    }
+
+    shared_info_frame = info.shared_info_frame;
+
+    /* Map the shared info frame */
+    live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ, shared_info_frame);
+    if (!live_shinfo) {
+        ERROR("Couldn't map live_shinfo");
+        goto out;
+    }
+
+    max_pfn = info.max_memkb >> (PAGE_SHIFT - 10);
+
+    page_array = malloc(max_pfn * sizeof(xen_pfn_t));
+    if (page_array == NULL) {
+        ERROR("Could not allocate memory");
+        goto out;
+    }
+
+    /* This is expected by xm restore.  */
+    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
+        ERROR("write: max_pfn");
+        goto out;
+    }
+
+    /* xc_linux_restore starts to read here.  */
+    /* Write a version number.  This can avoid searching for a stupid bug
+       if the format change.
+       The version is hard-coded, don't forget to change the restore code
+       too!  */
+    {
+        unsigned long version = 1;
+
+        if (!write_exact(io_fd, &version, sizeof(unsigned long))) {
+            ERROR("write: version");
+            goto out;
+        }
+    }
+
+    /* This is a non-live suspend. Issue the call back to get the
+       domain suspended */
+    
+    if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
+        ERROR("Domain appears not to have suspended");
+        goto out;
+    }
+    
+
+    {
+        char *mem;
+        xen_pfn_t pfn;
+        unsigned int total_sent = 0;
+
+        if (xc_get_pfn_list(xc_handle, dom, 
+                            page_array, max_pfn) != max_pfn) {
+            ERROR("Could not get the page frame list");
+            goto out;
+        }
+
+        /* Start writing out the saved-domain record. */
+        for (pfn = 0; pfn < max_pfn; pfn++){
+            if (page_array[pfn] == INVALID_MFN)
+                continue;
+
+            if (debug)
+                fprintf(stderr, "xc_linux_save: page %llx (%llu/%lu)\n",
+                        page_array[pfn], pfn, max_pfn);
+
+            mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, page_array[pfn]);
+            if (mem == NULL) {
+                ERROR("cannot map page %llx: %s",
+                        page_array[pfn], strerror (errno));
+                goto out;
+            }
+
+            if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
+                ERROR("Error when writing to state file (4)");
+                goto out;
+            }
+
+            if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
+                ERROR("Error when writing to state file (5)");
+                goto out;
+            }
+            munmap(mem, PAGE_SIZE);
+            total_sent++;
+        }
+    }
+
+    DPRINTF("All memory is saved\n");
+
+    /* terminate memory dump */
+    {
+        xen_pfn_t pfn = INVALID_MFN;
+        if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
+            ERROR("Error when writing to state file (6)");
+            goto out;
+        }
+    }
+    
+    /* Canonicalize htab and save */
+    {
+        int i, k, n;
+        int N, total_sent = 0;
+        int htab_ptes, htab_pages, n_chunks;
+        unsigned long htab_mfn;
+        uint64_t htab_raddr;
+        xen_pfn_t htab_rpn, pfn = 0;
+        union pte *ppte;
+        char *mem, *temp, *copy;
+        unsigned long long sizes;
+        struct chunk_array {xen_pfn_t mfn; unsigned long long size;} *p_chunk;
+
+        n_chunks = 1;
+        for (k = 0; k < max_pfn - 1; k++){	// find number of chunks
+            if (page_array[k] + 1 != page_array[k+1]){
+                n_chunks += 1;
+            }
+        }
+
+        p_chunk = malloc(n_chunks * sizeof(struct chunk_array));
+        if (p_chunk == NULL) {
+            ERROR("Could not allocate memory for chunk_array");
+            goto out;
+        }
+
+        k = 0; n = 0;
+        p_chunk[n].mfn = page_array[k];
+        p_chunk[n].size = 1;
+
+        for (k = 0; k < max_pfn - 1; k++){	// record mfn for start of each chunk, size
+            if (page_array[k] + 1 != page_array[k+1]){
+                p_chunk[n+1].mfn = page_array[k+1];
+                p_chunk[n+1].size = 1;
+                n += 1;
+            } else {
+                p_chunk[n].size += 1;
+            }
+        }
+#if 0
+        DPRINTF("n_chunks: %d\n", n_chunks);
+        for (i = 0; i < n_chunks; i++){
+            DPRINTF("0x%016llx  0x%016llx\n", p_chunk[i].mfn, p_chunk[i].size);
+        }
+#endif
+        htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr);
+        if (htab_ptes == -1){ 
+            ERROR("Could not get the shadow list");
+            goto out;
+        }
+
+        temp = malloc(PAGE_SIZE * 2);
+        if (temp == NULL){
+            ERROR("Could not allocate temp memory");
+            goto out;
+        }
+
+        copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1)));
+
+        htab_mfn = htab_raddr >> PAGE_SHIFT;
+        htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE));
+        // DPRINTF("htab_pages: 0x%08lx htab_addr: %llx  htab_mfn %lx\n",
+        // htab_pages,htab_raddr, htab_mfn);
+
+        if (!write_exact(io_fd, &htab_ptes, sizeof(htab_ptes))) {
+            ERROR("Error when writing to state file (6)");
+            goto out;
+        }
+
+        /* Replace rpn with guest pfn, then write out htab, by page */
+        for (N = 0; N < htab_pages;  N++, htab_mfn++) {
+            mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, htab_mfn | DECOR);
+            if (mem == NULL){
+                ERROR("Cannot map htab_mfn 0x%08lx: %s\n", 
+                    htab_mfn, strerror (errno));
+                goto out;
+            }
+
+            memcpy(copy, mem, PAGE_SIZE);
+
+            /* Improved search of page_array[] for htab_rpn - by chunks*/
+            ppte = (union pte *)copy;
+            for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){
+                if (ppte->bits.v == 1){				// valid htab entry
+                    sizes = 0;
+                    htab_rpn = ppte->bits.rpn;
+                    for (n = 0; n < n_chunks; n++){	// search by memory chunk
+                        if ((htab_rpn >= p_chunk[n].mfn) &&
+                            (htab_rpn < (p_chunk[n].mfn) + p_chunk[n].size)){
+                            pfn = (htab_rpn - p_chunk[n].mfn) + sizes;
+                            break;
+                        } else {
+                            sizes += p_chunk[n].size;
+                        }
+                    }
+
+                    if (n >= n_chunks){
+                        ERROR("htab_rpn: 0x%016llx not found in page_array[]",
+                            htab_rpn);
+                        goto out;
+                    }
+
+                    if (pfn >= max_pfn){
+                        ERROR("pfn >= max_pfn: 0x%08llx 0x%08lx", pfn, max_pfn);
+                        goto out;
+                    }
+
+//***  validation						
+                    for (k = 0; k < max_pfn; k++){		// linear search
+                        if (htab_rpn == page_array[k])
+                            break;
+                    }
+
+                    if (k != pfn){
+                        ERROR("k != pfn: 0x%08x 0x%08llx", k, pfn);
+                             ERROR("htab_rpn: 0x%016llx", htab_rpn);
+                        goto out;
+                    }
+
+                    if (k >= max_pfn){
+                        ERROR("htab_rpn: 0x%016llx not found in page_array[] %d",
+                            htab_rpn, i);
+                        goto out;
+                    }
+//***
+
+                    ppte->bits.rpn = pfn;
+                } else {							// invalid htab entry
+                    ppte->words.rpn = 0;
+                }
+            }
+
+            if (write(io_fd, copy, PAGE_SIZE) != PAGE_SIZE) {
+                ERROR("Error when writing to state file (7)");
+                goto out;
+            }
+
+            munmap(mem, PAGE_SIZE);
+            total_sent++;
+        }
+
+        free(temp);
+        free(p_chunk);
+    }
+        
+    /* save vcpu context only for vcpu 0;			 	     */
+    /* linux already suspended other vcpus via smp_suspend() */
+    if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
+        ERROR("Could not get vcpu context");
+        goto out;
+    }
+
+    if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
+        ERROR("Error when writing to state file (1)");
+        goto out;
+    }
+
+    if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
+        ERROR("Error when writing to state file (1)");
+        goto out;
+    }
+
+    /* Success! */
+    rc = 0;
+
+#if 0
+    DPRINTF("Domain ready to be built.\n");
+
+    domctl.cmd = XEN_DOMCTL_setvcpucontext;
+    domctl.domain = (domid_t)dom;
+    domctl.u.vcpucontext.vcpu   = 0;
+    set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
+    rc = xc_domctl(xc_handle, &domctl);
+
+    if (rc != 0) {
+        ERROR("Couldn't build the domain");
+        goto out;
+    }
+#endif
+
+ out:
+
+    free(page_array);
+    if (live_shinfo)
+        munmap(live_shinfo, PAGE_SIZE);
+
+    DPRINTF("Save exit rc=%d\n",rc);
+
+    return !!rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 7669fca80bfc tools/libxc/xen/asm/htab.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xen/asm/htab.h	Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,134 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2005
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef _ASM_HTAB_H_
+#define _ASM_HTAB_H_
+
+
+/***** general PowerPC architecture limits ******/
+
+/* 256KB, from PowerPC Architecture specification */
+#define HTAB_MIN_LOG_SIZE 18
+
+#define LOG_NUM_PTES_IN_PTEG    3
+#define NUM_PTES_IN_PTEG        (1 << LOG_NUM_PTES_IN_PTEG)
+#define LOG_PTE_SIZE            4
+#define LOG_PTEG_SIZE           (LOG_NUM_PTES_IN_PTEG + LOG_PTE_SIZE)
+#define LOG_HTAB_HASH           (LOG_HTAB_SIZE - LOG_PTEG_SIZE)
+
+/* real page number shift to create the rpn field of the pte */
+#define RPN_SHIFT 12
+
+/* page protection bits in pp1 (name format: MSR:PR=0 | MSR:PR=1) */
+#define PP_RWxx 0x0UL
+#define PP_RWRW 0x2UL
+#define PP_RWRx 0x4UL
+#define PP_RxRx 0x6UL
+
+/***** 64-bit PowerPC architecture limits ******/
+
+#define SDR1_HTABORG_MASK   0xfffffffffff80000ULL
+#define SDR1_HTABSIZE_MASK  0x1fUL
+#define SDR1_HTABSIZE_MAX   46
+#define SDR1_HTABSIZE_BASEBITS 11
+
+/* used to turn a vsid into a number usable in the hash function */
+#define VSID_HASH_MASK 0x0000007fffffffffUL
+
+/* used to turn a vaddr into an api for a pte */
+#define VADDR_TO_API(vaddr) (((vaddr) & API_MASK) >> API_SHIFT)
+#define API_VEC   0x1fUL
+#define API_SHIFT 23
+#define API_MASK  (API_VEC << API_SHIFT)
+
+/***** hypervisor internals ******/
+
+/* 64M: reasonable hypervisor limit? */
+#define HTAB_MAX_LOG_SIZE 26
+
+#define GET_HTAB(domain) ((domain)->arch.htab.sdr1 & SDR1_HTABORG_MASK)
+
+union pte {
+    struct pte_words {
+        uint64_t vsid;
+        uint64_t rpn;
+    } words;
+    struct pte_bits {
+        /* *INDENT-OFF* */
+        /* high word */
+        uint64_t avpn:     57; /* [0-56] abbreviated virtual page number */
+        uint64_t lock:     1;  /* [57] hypervisor lock bit */
+        uint64_t res:      1;  /* [58] reserved for hypervisor */
+        uint64_t bolted:   1;  /* [59] XXX software-reserved; temp hack */
+        uint64_t sw:       1;  /* [60] reserved for software */
+        uint64_t l:        1;  /* [61] Large Page */
+        uint64_t h:        1;  /* [62] hash function id */
+        uint64_t v:        1;  /* [63] valid */
+
+        /* low word */
+        uint64_t pp0:  1;  /* [0] page protection bit 0 (current PowerPC
+                         *     specification says it can always be 0) */
+        uint64_t ts:   1;  /* [1] tag select */
+        uint64_t rpn:  50; /* [2-51] real page number */
+        uint64_t res2: 2;  /* [52,53] reserved */
+        uint64_t ac:   1;  /* [54] address compare */
+        uint64_t r:    1;  /* [55] referenced */
+        uint64_t c:    1;  /* [56] changed */
+        uint64_t w:    1;  /* [57] write through */
+        uint64_t i:    1;  /* [58] cache inhibited */
+        uint64_t m:    1;  /* [59] memory coherent */
+        uint64_t g:    1;  /* [60] guarded */
+        uint64_t n:    1;  /* [61] no-execute */
+        uint64_t pp1:  2;  /* [62,63] page protection bits 1:2 */
+        /* *INDENT-ON* */
+    } bits;
+};
+
+union ptel {
+    uint64_t word;
+    struct ptel_bits {
+        /* *INDENT-OFF* */
+
+        uint64_t pp0:  1;  /* page protection bit 0 (current PPC
+                         *   AS says it can always be 0) */
+        uint64_t ts:   1;  /* tag select */
+        uint64_t rpn:  50; /* real page number */
+        uint64_t res2: 2;  /* reserved */
+        uint64_t ac:   1;  /* address compare */
+        uint64_t r:    1;  /* referenced */
+        uint64_t c:    1;  /* changed */
+        uint64_t w:    1;  /* write through */
+        uint64_t i:    1;  /* cache inhibited */
+        uint64_t m:    1;  /* memory coherent */
+        uint64_t g:    1;  /* guarded */
+        uint64_t n:    1;  /* no-execute */
+        uint64_t pp1:  2;  /* page protection bits 1:2 */
+        /* *INDENT-ON* */
+    } bits;
+};
+
+struct domain_htab {
+    uint64_t sdr1;
+    uint log_num_ptes;  /* log number of PTEs in HTAB. */
+    uint order;         /* order for freeing. */
+    union pte *map;     /* access the htab like an array */
+    uint64_t *shadow;      /* idx -> logical translation array */
+};
+#endif


Linux diffs:

diff -r c8d1f32fd7de arch/powerpc/platforms/xen/hcall.c
--- a/arch/powerpc/platforms/xen/hcall.c	Wed Nov 22 14:51:54 2006 -0500
+++ b/arch/powerpc/platforms/xen/hcall.c	Wed Dec 13 15:54:20 2006 -0500
@@ -256,6 +256,7 @@ static int xenppc_privcmd_domctl(privcmd
 	case XEN_DOMCTL_pausedomain:
 	case XEN_DOMCTL_unpausedomain:
 	case XEN_DOMCTL_getdomaininfo:
+	case XEN_DOMCTL_getshadowlist:
 		break;
 	case XEN_DOMCTL_getmemlist:
 		ret = xencomm_create(
diff -r c8d1f32fd7de arch/powerpc/platforms/xen/reboot.c
--- a/arch/powerpc/platforms/xen/reboot.c	Wed Nov 22 14:51:54 2006 -0500
+++ b/arch/powerpc/platforms/xen/reboot.c	Wed Dec 13 15:54:20 2006 -0500
@@ -1,10 +1,20 @@
 #include <linux/module.h>
+#include <linux/kernel.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/io/console.h>
 #include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+#include <xen/gnttab.h>
+#include <xen/evtchn.h>
 #include <asm/hypervisor.h>
 #include <asm/machdep.h>
+#include <asm/mmu_context.h>
 
+#define  SHUTDOWN_INVALID 		 -1
+
+extern int shutting_down ;
+ 
 static void domain_machine_restart(char * __unused)
 {
 	/* We really want to get pending console data out before we die. */
@@ -31,3 +41,86 @@ void xen_reboot_init(struct machdep_call
 		ppc_md.halt	 = domain_machine_power_off;
 	}
 }
+
+static void switch_idle_mm(void)
+{
+		 struct mm_struct *mm = current->active_mm;
+
+		 if (mm == &init_mm)
+		 		 return;
+
+		 atomic_inc(&init_mm.mm_count);
+		 switch_mm(mm, &init_mm, current);
+		 current->active_mm = &init_mm;
+		 mmdrop(mm);
+}
+
+int ppc_do_suspend(void *ignore)
+{
+		 int  err;
+		 enum system_states temp_state;
+
+		 BUG_ON(smp_processor_id() != 0);
+		 BUG_ON(in_interrupt());
+
+#ifndef CONFIG_PPC_XEN
+		 if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		 		 printk(KERN_WARNING "Cannot suspend in "
+		 		        "auto_translated_physmap mode.\n");
+		 		 return -EOPNOTSUPP;
+		 }
+#endif
+		 err = smp_suspend();
+		 if (err)
+		     return err;
+
+		 xenbus_suspend();
+
+		 preempt_disable();
+
+		 local_irq_disable();
+		 
+		 temp_state = system_state;
+		 system_state = SYSTEM_SUSPEND_DISK;
+
+		 preempt_enable();
+
+		 gnttab_suspend();
+
+		 HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+
+		 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+		 xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn);
+
+		 /*
+		  * We'll stop somewhere inside this hypercall. When it returns,
+		  * we'll start resuming after the restore.
+		  */
+		 HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+		 shutting_down = SHUTDOWN_INVALID;
+		 
+		 HYPERVISOR_shared_info = (shared_info_t *)__va(xen_start_info->shared_info);
+		 memset(empty_zero_page, 0, PAGE_SIZE);
+
+		 gnttab_resume();
+
+		 irq_resume();
+
+		 switch_idle_mm();
+
+		 system_state = temp_state;
+		 
+		 per_cpu(last_jiffy, smp_processor_id()) = get_tbl();	/* update time base */
+
+		 local_irq_enable();
+
+		 xencons_resume();
+
+		 xenbus_resume();
+
+		 smp_resume();
+
+		 return err;
+}
+
diff -r c8d1f32fd7de drivers/xen/core/reboot.c
--- a/drivers/xen/core/reboot.c	Wed Nov 22 14:51:54 2006 -0500
+++ b/drivers/xen/core/reboot.c	Wed Dec 13 15:54:20 2006 -0500
@@ -7,16 +7,16 @@
 #include <linux/reboot.h>
 #include <linux/sysrq.h>
 #include <linux/stringify.h>
-#include <asm/irq.h>
-#include <asm/mmu_context.h>
-#include <xen/evtchn.h>
-#include <asm/hypervisor.h>
-#include <xen/xenbus.h>
 #include <linux/cpu.h>
 #include <linux/kthread.h>
+#include <xen/evtchn.h>
+#include <xen/xenbus.h>
 #include <xen/gnttab.h>
 #include <xen/xencons.h>
 #include <xen/cpu_hotplug.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/hypervisor.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 /*
@@ -79,7 +79,7 @@ EXPORT_SYMBOL(machine_power_off);
  */
 
 /* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
+int shutting_down = SHUTDOWN_INVALID;
 static void __shutdown_handler(void *unused);
 static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
 
@@ -192,13 +192,14 @@ static int __do_suspend(void *ignore)
 
 	return err;
 }
-#else  /* CONFIG_PPC_XEN */
+
+#else
+int ppc_do_suspend(void *ignore);
 static int __do_suspend(void *ignore)
 {
-	printk("SUSPEND!!??\n");
-	return 0;
-}
-#endif  /* CONFIG_PPC_XEN */
+	return ppc_do_suspend(ignore);
+}
+#endif
 
 static int shutdown_process(void *__unused)
 {
diff -r c8d1f32fd7de include/asm-powerpc/xen/asm/hypercall.h
--- a/include/asm-powerpc/xen/asm/hypercall.h	Wed Nov 22 14:51:54 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypercall.h	Wed Dec 13 15:54:20 2006 -0500
@@ -60,6 +60,16 @@ static inline int HYPERVISOR_shutdown(un
 	return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
 }
 
+
+static inline int HYPERVISOR_suspend(unsigned long srec)
+{
+        struct sched_shutdown sched_shutdown = {
+	        .reason = SHUTDOWN_suspend
+	};
+
+	return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+}
+
 static inline int HYPERVISOR_set_timer_op(unsigned long arg)
 {
 	return plpar_hcall_norets(XEN_MARK(__HYPERVISOR_set_timer_op), arg);
diff -r c8d1f32fd7de include/xen/interface/arch-powerpc.h
--- a/include/xen/interface/arch-powerpc.h	Wed Nov 22 14:51:54 2006 -0500
+++ b/include/xen/interface/arch-powerpc.h	Wed Dec 13 15:54:20 2006 -0500
@@ -29,7 +29,6 @@
 
 #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)     __guest_handle_ ## name
 #define set_xen_guest_handle(hnd, val) \
     do { \
         if (sizeof ((hnd).__pad)) \
@@ -42,9 +41,6 @@
 #endif
 
 #ifndef __ASSEMBLY__
-
-typedef uint64_t uint64_aligned_t;
-
 /* Guest handles for primitive C types. */
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
@@ -98,9 +94,65 @@ typedef struct cpu_user_regs cpu_user_re
 
 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */
 
+#define        NUM_SLB_ENTRIES 64
+struct slb_entry {
+       uint64_t slb_vsid;
+       uint64_t slb_esid;
+};
+typedef struct slb_entry slb_entry_t;
+
+#ifndef HAS_VMX
+#define		 HAS_VMX 1
+#endif
+
+#ifndef HAS_FLOAT
+#define		 HAS_FLOAT 1
+#endif
+
+#ifdef HAS_VMX
+typedef struct {
+       uint32_t u[4];
+} __attribute__((aligned(16))) _vector128;
+#endif /* HAS_VMX */
+
+
 /* ONLY used to communicate with dom0! See also struct exec_domain. */
 struct vcpu_guest_context {
     cpu_user_regs_t user_regs;         /* User-level CPU registers     */
+    slb_entry_t        slb_entries[NUM_SLB_ENTRIES];   /* Segment Lookaside Buffer */
+
+    /* Special-Purpose Registers */
+    uint64_t sprg[4];
+    uint64_t timebase;
+    uint64_t dar;
+    uint64_t dsisr;
+
+    struct cpu_vcpu_tag {
+       uint64_t hid4;
+    } cpu; /* CPU-specific bits */
+
+    uint32_t dec;
+
+    /* XXX etc */
+#ifdef HAS_FLOAT
+#define  NUM_FPRS 32
+    double fprs[NUM_FPRS];
+#endif
+#ifdef HAS_VMX
+    _vector128 vrs[32];
+    _vector128 vscr;
+    uint32_t vrsave;
+#endif
+
+#if 0
+    struct xencomm *xencomm;
+
+    /* I/O-port access bitmap. */
+    u8 *iobmp;        /* Guest kernel virtual address of the bitmap. */
+    int iobmp_limit;  /* Number of ports represented in the bitmap.  */
+    int iopl;         /* Current IOPL for this VCPU. */
+#endif
+
     uint64_t sdr1;                     /* Pagetable base               */
     /* XXX etc */
 };
diff -r c8d1f32fd7de include/xen/interface/domctl.h
--- a/include/xen/interface/domctl.h	Wed Nov 22 14:51:54 2006 -0500
+++ b/include/xen/interface/domctl.h	Wed Dec 13 15:54:20 2006 -0500
@@ -354,6 +354,17 @@ struct xen_domctl_real_mode_area {
 };
 typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
+#define XEN_DOMCTL_getshadowlist		 29
+struct xen_domctl_getshadowlist {
+		 /* OUT variables. */
+		 /* Start of htab array */
+		 uint64_t htab_map;
+		 /* Numver of ptes within htab */
+		 uint htab_num_ptes;
+};
+typedef struct xen_domctl_getshadowlist  xen_domctl_getshadowlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t);
 
 struct xen_domctl {
     uint32_t cmd;
@@ -381,6 +392,7 @@ struct xen_domctl {
         struct xen_domctl_arch_setup        arch_setup;
         struct xen_domctl_settimeoffset     settimeoffset;
         struct xen_domctl_real_mode_area    real_mode_area;
+        struct xen_domctl_getshadowlist     getshadowlist;
         uint8_t                             pad[128];
     } u;
 };

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic