[prev in list] [next in list] [prev in thread] [next in thread] 

List:       xen-devel
Subject:    [Xen-devel] [PATCH][RFC] xm dump-core command add on (TAKE 4)
From:       Akio Takebe <takebe_akio () jp ! fujitsu ! com>
Date:       2006-08-31 9:29:15
Message-ID: 8BC6CCDFECFD8Etakebe_akio () jp ! fujitsu ! com
[Download RAW message or body]

Hi,

These patches add xm dump-core command.
Commets are welcome.

Usage:
    xm dump-core [-L|--live][-C| --crash] <domID> [output path]

    -L| --live Live dump:
        By default, xm dump does an xm pause, unpause before and
        after taking the dump, respectively.  This option disables
        the pause/unpause and simply takes the dump.

    -C :| --crash crash dump: 
        This executes an xm destroy after the dump file is complete.

These patches are;
- check_status_xc_dumpcore.patch
  This patch is a bugfix patch.
  It check status of copy_from_domain_page() in 
  xc_domain_dumpcore_via_callback(). and if it failed,
  xc_core make incomplete list. 
  (I reflect commets of John)
  
- xmdump.v5.patch
  This patch adds xm dump-core command. usage is the above.
  (separate the above patch and up port to xen-unstable.hg-11389)
  
- rename_dumpcore.v2.patch
  This patch adds localtime to corename.
  And if xc_core failed, add "-incomplete" to corename.
  This patch depends on xmdump.v5.patch.
  (not change, only up port to xen-unstable.hg-11389)
  
Signed-off-by: Ken Hironaka <hironaka.ken@soft.fujitsu.com>
Signed-off-by: Akio Takebe <takebe_akio@jp.fujitsu.com>

Best Regards,

Akio Takebe
["check_status_xc_dumpcore.patch" (application/octet-stream)]

# HG changeset patch
# User root@procyon
# Node ID 0c43a1f62c859019d926eecfbcfbf49c128bd0bd
# Parent  11626efd003c5c080bf9622fee0f1c3da508086a
In this patch, the xc_domain_dumpcore_via_callback() in xc_core.c of
libxc is modified. Previously, the xc_domain_dumpcore_via_callback()
did not respond to error when copy_from_domain_page() failed.
In other words, the dump core remained silent even if mapping the domain
memory failed and its page could not be copied. When this happened,
erroneous data had been dumped to the file without the user realizing it.
Now, it has been modified so that if copy_from_domain_page fails,
this fact is recorded in the logfile and make imcoplerte-list.
However even in such cases, the dumping will continue as before.

Signed-off-by: Ken Hironaka <hironaka.ken@soft.fujitsu.com>
Signed-off-by: Akio Takebe <takebe_akio@jp.fujitsu.com>

diff -r 11626efd003c -r 0c43a1f62c85 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c	Thu Aug 31 03:05:40 2006 +0100
+++ b/tools/libxc/xc_core.c	Thu Aug 31 13:01:05 2006 +0900
@@ -5,6 +5,12 @@
 /* number of pages to write at a time */
 #define DUMP_INCREMENT (4 * 1024)
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+/* Callback args for writing to a local dump file. */
+struct dump_args {
+    int     fd;
+    int     incomp_fd;
+};
 
 static int
 copy_from_domain_page(int xc_handle,
@@ -27,7 +33,7 @@ xc_domain_dumpcore_via_callback(int xc_h
                                 void *args,
                                 dumpcore_rtn_t dump_rtn)
 {
-    unsigned long nr_pages;
+    unsigned long n, nr_pages;
     xen_pfn_t *page_array = NULL;
     xc_dominfo_t info;
     int i, nr_vcpus = 0;
@@ -37,6 +43,12 @@ xc_domain_dumpcore_via_callback(int xc_h
     char dummy[PAGE_SIZE];
     int dummy_len;
     int sts;
+    unsigned int cpy_err_cnt = 0;
+    struct dump_args *da = args;
+    int fd = da->fd;
+    int incomp_fd = da->incomp_fd;
+    char cpy_err_mesg[64];
+    int mesg_bytes;
 
     if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL )
     {
@@ -73,11 +85,11 @@ xc_domain_dumpcore_via_callback(int xc_h
                  (nr_pages * sizeof(xen_pfn_t)));
     header.xch_pages_offset = round_pgup(dummy_len);
 
-    sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header));
-    if ( sts != 0 )
-        goto error_out;
-
-    sts = dump_rtn(args, (char *)&ctxt, sizeof(ctxt[0]) * nr_vcpus);
+    sts = dump_rtn(fd, (char *)&header, sizeof(struct xc_core_header));
+    if ( sts != 0 )
+        goto error_out;
+
+    sts = dump_rtn(fd, (char *)&ctxt, sizeof(ctxt[0]) * nr_vcpus);
     if ( sts != 0 )
         goto error_out;
 
@@ -91,27 +103,39 @@ xc_domain_dumpcore_via_callback(int xc_h
         IPRINTF("Could not get the page frame list\n");
         goto error_out;
     }
-    sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(xen_pfn_t));
+    sts = dump_rtn(fd, (char *)page_array, nr_pages * sizeof(xen_pfn_t));
     if ( sts != 0 )
         goto error_out;
 
     /* Pad the output data to page alignment. */
     memset(dummy, 0, PAGE_SIZE);
-    sts = dump_rtn(args, dummy, header.xch_pages_offset - dummy_len);
-    if ( sts != 0 )
-        goto error_out;
-
-    for ( dump_mem = dump_mem_start, i = 0; i < nr_pages; i++ )
-    {
-        copy_from_domain_page(xc_handle, domid, page_array[i], dump_mem);
+    sts = dump_rtn(fd, dummy, header.xch_pages_offset - dummy_len);
+    if ( sts != 0 )
+        goto error_out;
+
+    for ( dump_mem = dump_mem_start, n = 0; n < nr_pages; n++ )
+    {
+        sts = copy_from_domain_page(xc_handle, domid, page_array[i], dump_mem);
+        if( sts != 0 ){
+            memset(dump_mem, 0, PAGE_SIZE);
+            cpy_err_cnt++;
+            memset(cpy_err_mesg, 0, sizeof(cpy_err_mesg));
+            mesg_bytes = sprintf(cpy_err_mesg, "Cannot copy_from_domain_page (%lu)\n", n);
+            dump_rtn(incomp_fd, (char *)cpy_err_mesg, mesg_bytes);
+        }
+
         dump_mem += PAGE_SIZE;
-        if ( ((i + 1) % DUMP_INCREMENT == 0) || ((i + 1) == nr_pages) )
+        if ( ((n + 1) % DUMP_INCREMENT == 0) || ((n + 1) == nr_pages) )
         {
-            sts = dump_rtn(args, dump_mem_start, dump_mem - dump_mem_start);
+            sts = dump_rtn(fd, dump_mem_start, dump_mem - dump_mem_start);
             if ( sts != 0 )
                 goto error_out;
             dump_mem = dump_mem_start;
         }
+    }
+    if( cpy_err_cnt != 0 ){
+        IPRINTF("Could not copy from domid=%d (%d)pages\n", domid, cpy_err_cnt);
+        goto error_out;
     }
 
     free(dump_mem_start);
@@ -124,20 +148,14 @@ xc_domain_dumpcore_via_callback(int xc_h
     return -1;
 }
 
-/* Callback args for writing to a local dump file. */
-struct dump_args {
-    int     fd;
-};
-
 /* Callback routine for writing to a local dump file. */
-static int local_file_dump(void *args, char *buffer, unsigned int length)
-{
-    struct dump_args *da = args;
+static int local_file_dump(int fd, char *buffer, unsigned int length)
+{
     int bytes, offset;
 
     for ( offset = 0; offset < length; offset += bytes )
     {
-        bytes = write(da->fd, &buffer[offset], length-offset);
+        bytes = write(fd, &buffer[offset], length-offset);
         if ( bytes <= 0 )
         {
             PERROR("Failed to write buffer: %s", strerror(errno));
@@ -154,11 +172,26 @@ xc_domain_dumpcore(int xc_handle,
                    const char *corename)
 {
     struct dump_args da;
+    char *incomp_file;
     int sts;
 
     if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 )
     {
         PERROR("Could not open corefile %s: %s", corename, strerror(errno));
+        return -errno;
+    }
+
+    
+    if ( (incomp_file = (char *)malloc(sizeof(corename) + 12)) == NULL )
+    {
+        PERROR("Could not allocate incomp_file");
+        return -errno;
+    }
+
+    sprintf(incomp_file, "%s-incomp.list", corename);
+    if ( (da.incomp_fd = open(incomp_file, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 )
+    {
+        PERROR("Could not open corefile %s: %s", incomp_file, strerror(errno));
         return -errno;
     }
 
@@ -166,6 +199,10 @@ xc_domain_dumpcore(int xc_handle,
         xc_handle, domid, &da, &local_file_dump);
 
     close(da.fd);
+    close(da.incomp_fd);
+
+    if( sts == 0)
+        unlink(incomp_file);
 
     return sts;
 }
diff -r 11626efd003c -r 0c43a1f62c85 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Thu Aug 31 03:05:40 2006 +0100
+++ b/tools/libxc/xenctrl.h	Thu Aug 31 13:01:05 2006 +0900
@@ -161,7 +161,7 @@ int xc_domain_dumpcore(int xc_handle,
  * and passes an opaque object for the use of the function and
  * created by the caller of xc_domain_dumpcore_via_callback.
  */
-typedef int (dumpcore_rtn_t)(void *arg, char *buffer, unsigned int length);
+typedef int (dumpcore_rtn_t)(int fd, char *buffer, unsigned int length);
 
 int xc_domain_dumpcore_via_callback(int xc_handle,
                                     uint32_t domid,

["rename_dumpcore.v2.patch" (application/octet-stream)]

# HG changeset patch
# User root@procyon
# Node ID 9611a5c9e1a10588ec2de9bd45c0d0506feee8e9
# Parent  1efc5533181eb36e1e81c976179852b38a708e13
This patch add localtime to dumpcore filename.
And it add '-imcomplete' if failed.

Signed-off-by: Akio Takebe <takebe_akio@jp.fujitsu.com>

diff -r 1efc5533181e -r 9611a5c9e1a1 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py	Thu Aug 31 13:10:54 2006 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py	Thu Aug 31 13:12:26 2006 +0900
@@ -981,11 +981,14 @@ class XendDomainInfo:
         
         try:
             if not corefile:
-                corefile = "/var/xen/dump/%s.%s.core" % (self.info['name'],
-                                                     self.domid)
+                this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime())
+                corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
+                                  self.info['name'], self.domid)
             xc.domain_dumpcore(self.domid, corefile)
 
         except:
+            corefile_incomp = corefile+'-incomplete'
+            os.rename(corefile, corefile_incomp)
             log.exception("XendDomainInfo.dumpCore failed: id = %s name = %s",
                           self.domid, self.info['name'])
 

["xmdump.v5.patch" (application/octet-stream)]

# HG changeset patch
# User root@procyon
# Node ID 1efc5533181eb36e1e81c976179852b38a708e13
# Parent  0c43a1f62c859019d926eecfbcfbf49c128bd0bd
xm dump command add on

xm dump-core [-L|--live][-C| --crash] <domID> [output path]

-L| --live Live dump:
        By default, xm dump does an xm pause, unpause before and
        after taking the dump, respectively.  This option disables
        the pause/unpause and simply takes the dump.

-C :| --crash crash dump: 
        This executes an xm destroy after the dump file is complete.

The output path is optional, and if it is not specified, the path will be
/var/xen/dump/<domU name>.<domU ID>.core

This command uses the existant dumpCore(), which has been used for
coredump when a domU crashed. 

Signed-off-by: Ken Hironaka <hironaka.ken@soft.fujitsu.com>
Signed-off-by: Akio Takebe <takebe_akio@jp.fujitsu.com>

diff -r 0c43a1f62c85 -r 1efc5533181e tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py	Thu Aug 31 13:01:05 2006 +0900
+++ b/tools/python/xen/xend/XendDomain.py	Thu Aug 31 13:10:54 2006 +0900
@@ -390,6 +390,22 @@ class XendDomain:
         except Exception, ex:
             raise XendError(str(ex))
 
+    def domain_dump(self, domid, filename, live, crash):
+        """Dump domain core."""
+
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+
+        if dominfo.getDomid() == PRIV_DOMAIN:
+            raise XendError("Cannot dump core for privileged domain %s" % domid)
+
+        try:
+            log.info("Domain core dump requested for domain %s (%d) live=%d \
crash=%d.", +                     dominfo.getName(), dominfo.getDomid(), live, crash)
+            return dominfo.dumpCore(filename)
+        except Exception, ex:
+            raise XendError(str(ex))
 
     def domain_destroy(self, domid):
         """Terminate domain immediately."""
diff -r 0c43a1f62c85 -r 1efc5533181e tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py	Thu Aug 31 13:01:05 2006 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py	Thu Aug 31 13:10:54 2006 +0900
@@ -976,11 +976,12 @@ class XendDomainInfo:
         self.restart(True)
 
 
-    def dumpCore(self):
+    def dumpCore(self,corefile=None):
         """Create a core dump for this domain.  Nothrow guarantee."""
         
         try:
-            corefile = "/var/xen/dump/%s.%s.core" % (self.info['name'],
+            if not corefile:
+                corefile = "/var/xen/dump/%s.%s.core" % (self.info['name'],
                                                      self.domid)
             xc.domain_dumpcore(self.domid, corefile)
 
diff -r 0c43a1f62c85 -r 1efc5533181e tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py	Thu Aug 31 13:01:05 2006 +0900
+++ b/tools/python/xen/xm/main.py	Thu Aug 31 13:10:54 2006 +0900
@@ -57,6 +57,9 @@ create_help =  """create [-c] <ConfigFil
 create_help =  """create [-c] <ConfigFile>
                [Name=Value]..       Create a domain based on Config File"""
 destroy_help = "destroy <DomId>                  Terminate a domain immediately"
+dump_core_help =   """dump-core [-L|--live][-C|--crash]
+            <DomId> [FileName]      Dump core of the specified domain"""
+
 help_help =    "help                             Display this message"
 list_help =    "list [--long] [DomId, ...]       List information about domains"
 list_label_help = "list [--label] [DomId, ...]      List information about domains \
including their labels" @@ -138,6 +141,7 @@ short_command_list = [
     "console",
     "create",
     "destroy",
+    "dump-core",
     "help",
     "list",
     "mem-set",
@@ -159,6 +163,7 @@ domain_commands = [
     "destroy",
     "domid",
     "domname",
+    "dump-core",
     "list",
     "list_label",
     "mem-max",
@@ -590,6 +595,43 @@ def xm_unpause(args):
 
     server.xend.domain.unpause(dom)
 
+def xm_dump_core(args):
+    arg_check(args, "dump-core",1,3)
+    live = False
+    crash = False
+    import getopt
+    (options, params) = getopt.gnu_getopt(args, 'LC', ['live','crash'])
+
+    for (k, v) in options:
+        if k in ['-L', '--live']:
+            live = True
+        if k in ['-C', '--crash']:
+            crash = True
+
+    if len(params) == 0 or len(params) > 2:
+        err("invalid number of parameters")
+        usage("dump-core")
+
+    dom = params[0]
+    if len(params) == 2:
+        filename = os.path.abspath(params[1])
+    else:
+        filename = None
+
+    if not live:
+        server.xend.domain.pause(dom)
+
+    try:
+        print "dumping core of domain:%s ..." % str(dom)
+        server.xend.domain.dump(dom, filename, live, crash)
+    finally:
+        if not live:
+            server.xend.domain.unpause(dom)
+
+    if crash:
+        print "destroying domain:%s ..." % str(dom)
+        server.xend.domain.destroy(dom)
+
 def xm_rename(args):
     arg_check(args, "rename", 2)
 
@@ -1168,6 +1210,7 @@ commands = {
     "destroy": xm_destroy,
     "domid": xm_domid,
     "domname": xm_domname,
+    "dump-core": xm_dump_core,
     "rename": xm_rename,
     "restore": xm_restore,
     "save": xm_save,



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic