[prev in list] [next in list] [prev in thread] [next in thread] 

List:       qemu-devel
Subject:    [Qemu-devel] [PATCH] Add snapshot capabilities to the VMDK format.
From:       "Igor Lvovsky" <igor.lvovsky () qumranet ! com>
Date:       2006-12-25 15:40:11
Message-ID: 64F9B87B6B770947A9F8391472E0321609AB059E () ehost011-8 ! exch011 ! intermedia ! net
[Download RAW message or body]

[Attachment #2 (multipart/alternative)]

[Attachment #4 (text/plain)]

This patch implements the snapshot capabilities of the VMDK format.

 

                        Igor L.

 


[Attachment #5 (text/html)]

<html xmlns:o="urn:schemas-microsoft-com:office:office" \
xmlns:w="urn:schemas-microsoft-com:office:word" \
xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
<meta name=Generator content="Microsoft Word 11 (filtered medium)">
<style>
<!--
 /* Style Definitions */
 p.MsoNormal, li.MsoNormal, div.MsoNormal
	{margin:0cm;
	margin-bottom:.0001pt;
	font-size:12.0pt;
	font-family:"Times New Roman";}
a:link, span.MsoHyperlink
	{color:blue;
	text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
	{color:purple;
	text-decoration:underline;}
span.EmailStyle17
	{mso-style-type:personal-compose;
	font-family:Arial;
	color:windowtext;}
@page Section1
	{size:612.0pt 792.0pt;
	margin:72.0pt 90.0pt 72.0pt 90.0pt;}
div.Section1
	{page:Section1;}
-->
</style>

</head>

<body lang=EN-US link=blue vlink=purple>

<div class=Section1>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'>This patch implements the snapshot capabilities of the VMDK
format.<o:p></o:p></span></font></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p>&nbsp;</o:p></span></font></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'>                        Igor L.<o:p></o:p></span></font></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p>&nbsp;</o:p></span></font></p>

</div>

</body>

</html>


["vmdk-support.diff" (application/octet-stream)]

Index: Makefile
===================================================================
RCS file: /sources/qemu/qemu/Makefile,v
retrieving revision 1.106
diff -u -r1.106 Makefile
--- Makefile	5 Aug 2006 21:29:27 -0000	1.106
+++ Makefile	25 Dec 2006 15:15:26 -0000
@@ -13,7 +13,7 @@
 CFLAGS+=-mcpu=ultrasparc
 endif
 LDFLAGS=-g
-LIBS=
+LIBS= -luuid
 DEFINES+=-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
 TOOLS=qemu-img$(EXESUF)
 ifdef CONFIG_STATIC
Index: Makefile.target
===================================================================
RCS file: /sources/qemu/qemu/Makefile.target,v
retrieving revision 1.134
diff -u -r1.134 Makefile.target
--- Makefile.target	7 Dec 2006 18:15:35 -0000	1.134
+++ Makefile.target	25 Dec 2006 15:15:27 -0000
@@ -413,7 +413,7 @@
 ifndef CONFIG_DARWIN
 ifndef CONFIG_WIN32
 ifndef CONFIG_SOLARIS
-VL_LIBS=-lutil -lrt
+VL_LIBS=-lutil -lrt -luuid
 endif
 endif
 endif
Index: block-vmdk.c
===================================================================
RCS file: /sources/qemu/qemu/block-vmdk.c,v
retrieving revision 1.9
diff -u -r1.9 block-vmdk.c
--- block-vmdk.c	1 Aug 2006 16:21:11 -0000	1.9
+++ block-vmdk.c	25 Dec 2006 15:15:27 -0000
@@ -22,6 +22,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+#include <uuid/uuid.h>
+#include <libgen.h>
 #include "vl.h"
 #include "block_int.h"
 
@@ -89,7 +91,286 @@
         return 0;
 }
 
-static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
+#define CHECK_CID 1
+
+#define SECTOR_SIZE 512				
+#define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
+#define HEADER_SIZE 512   			// first sector of 512 bytes 
+
+static uint32_t vmdk_read_cid(int fd, int parent)
+{
+    char desc[DESC_SIZE];
+    uint32_t cid;
+    char *p_name, *cid_str; 
+    size_t cid_str_size;
+
+    if (fd) {
+        /* the descriptor offset = 0x200 */
+        if (lseek(fd, 0x200, SEEK_SET) == -1)
+            return 0;
+        if (read(fd, desc, DESC_SIZE) != DESC_SIZE)
+            return 0;
+
+        if (parent) {
+            cid_str = "parentCID";
+            cid_str_size = sizeof("parentCID");
+        } else {
+            cid_str = "CID";
+            cid_str_size = sizeof("CID");
+        }
+
+        if ((p_name = strstr(desc,cid_str)) != 0) {
+            p_name += cid_str_size;
+            sscanf(p_name,"%x",&cid);
+        }
+
+        return cid;
+    }
+    return 0;
+}
+
+static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
+{
+    BDRVVmdkState *s = bs->opaque;
+    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+    char *p_name, *tmp_str;
+
+    if (s->fd) {
+        /* the descriptor offset = 0x200 */
+        if (lseek(s->fd, 0x200, SEEK_SET) == -1)
+            return -1;
+        if (read(s->fd, desc, DESC_SIZE) != DESC_SIZE)
+            return 0;
+
+        tmp_str = strstr(desc,"parentCID");
+        strcpy(tmp_desc, tmp_str);
+        if ((p_name = strstr(desc,"CID")) != 0) {
+            p_name += sizeof("CID");
+            sprintf(p_name,"%x\n",cid);
+            strcat(desc,tmp_desc);
+        }
+
+        if (lseek(s->fd, 0x200, SEEK_SET) == -1)
+            return -1;
+        if (write(s->fd, &desc, DESC_SIZE) != DESC_SIZE)
+            return -1;
+
+        bs->cid = cid;
+        return 0;
+    }
+    return -1;
+}
+
+static int vmdk_is_cid_valid(BlockDriverState *bs)
+{
+#ifdef CHECK_CID
+    BlockDriverState *p_bs = bs->bs_par_table;
+    uint32_t cur_pcid;
+
+    if (p_bs) {
+        BDRVVmdkState *s = p_bs->opaque;
+
+        cur_pcid = vmdk_read_cid(s->fd,0);
+        if (bs->parent_cid != cur_pcid)
+            // CID not valid
+            return 0;
+    }
+#endif
+    // CID valid
+    return 1;
+}
+
+int vmdk_snapshot_create(BlockDriverState *bs)
+{
+    int snp_fd, p_fd;
+    uint32_t p_cid;
+    char *p_name, *gd_buf, *rgd_buf; 
+    VMDK4Header header;
+    uint32_t gde_entries, gd_size;
+    int64_t gd_offset, rgd_offset, capacity, gt_size;
+    char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
+    char parent_filename[1024];
+    char snapshot_filename[1024];
+    uuid_t name;
+    char *desc_template =
+    "# Disk DescriptorFile\n"
+    "version=1\n"
+    "CID=%x\n"
+    "parentCID=%x\n"
+    "createType=\"monolithicSparse\"\n"
+    "parentFileNameHint=\"%s\"\n"
+    "\n"
+    "# Extent description\n"
+    "RW %lu SPARSE \"%s\"\n"
+    "\n"
+    "# The Disk Data Base \n"
+    "#DDB\n"
+    "\n";
+
+    strcpy(parent_filename, bs->filename);
+    uuid_generate(name);     // it should be unique filename
+    uuid_unparse(name, snapshot_filename);
+    strcat(snapshot_filename,".vmdk");
+
+    snp_fd = open(snapshot_filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | \
O_LARGEFILE, 0644); +    if (snp_fd < 0)
+        return -1;
+    p_fd = open(parent_filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+    if (p_fd < 0) {
+        close(snp_fd);
+        return -1;
+    }
+
+    if (lseek(p_fd, 0x0, SEEK_SET) == -1)
+        goto fail;
+    if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE)
+        goto fail;
+
+    /* write the header */
+    if (lseek(snp_fd, 0x0, SEEK_SET) == -1)
+        goto fail;
+    if (write(snp_fd, hdr, HEADER_SIZE) == -1)
+        goto fail;
+
+    memset(&header, 0, sizeof(header));
+    memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
+
+    ftruncate(snp_fd, header.grain_offset << 9);
+    /* the descriptor offset = 0x200 */
+    if (lseek(p_fd, 0x200, SEEK_SET) == -1)
+        goto fail;
+    if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE)
+        goto fail;
+
+    if ((p_name = strstr(p_desc,"CID")) != 0) {
+        p_name += sizeof("CID");
+        sscanf(p_name,"%x",&p_cid);
+    }
+    sprintf(s_desc, desc_template, p_cid, p_cid, parent_filename
+            , (uint32_t)header.capacity, snapshot_filename);
+
+    bs->parent_cid = p_cid;
+
+    /* write the descriptor */
+    if (lseek(snp_fd, 0x200, SEEK_SET) == -1)
+        goto fail;
+    if (write(snp_fd, s_desc, strlen(s_desc)) == -1)
+        goto fail;
+
+    gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
+    rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
+    capacity = header.capacity * SECTOR_SIZE;       // Extent size
+    /*
+     * Each GDE span 32M disk, means:
+     * 512 GTE per GT, each GTE points to grain
+     */
+    gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
+    if (!gt_size)
+        goto fail;
+    gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde 
+    gd_size = gde_entries * sizeof(uint32_t);
+
+    /* write RGD */
+    rgd_buf = qemu_malloc(gd_size);
+    if (!rgd_buf)
+        goto fail;
+    if (lseek(p_fd, rgd_offset, SEEK_SET) == -1)
+        goto fail_rgd;
+    if (read(p_fd, rgd_buf, gd_size) != gd_size)
+        goto fail_rgd;
+    if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1)
+        goto fail_rgd;
+    if (write(snp_fd, rgd_buf, gd_size) == -1)
+        goto fail_rgd;
+    qemu_free(rgd_buf);
+
+    /* write GD */
+    gd_buf = qemu_malloc(gd_size);
+    if (!gd_buf)
+        goto fail_rgd;
+    if (lseek(p_fd, gd_offset, SEEK_SET) == -1)
+        goto fail_gd;
+    if (read(p_fd, gd_buf, gd_size) != gd_size)
+        goto fail_gd;
+    if (lseek(snp_fd, gd_offset, SEEK_SET) == -1)
+        goto fail_gd;
+    if (write(snp_fd, gd_buf, gd_size) == -1)
+        goto fail_gd;
+    qemu_free(gd_buf);
+
+    close(p_fd);
+    close(snp_fd);
+    return 0;
+
+    fail_gd:
+    qemu_free(gd_buf);
+    fail_rgd:   
+    qemu_free(rgd_buf);
+    fail:
+    close(p_fd);
+    close(snp_fd);
+    return -1;
+}
+
+static void vmdk_parent_close(BlockDriverState *bs)
+{
+    if (bs->bs_par_table)
+        bdrv_close(bs->bs_par_table);
+}
+
+
+static int vmdk_parent_open(BlockDriverState *bs, int fd, char * dir_name)
+{
+    char *p_name; 
+    char desc[DESC_SIZE];
+    static int idx=0;
+    char parent_img_name[1024];
+
+    /* the descriptor offset = 0x200 */
+    if (lseek(fd, 0x200, SEEK_SET) == -1)
+        return -1;
+    if (read(fd, desc, DESC_SIZE) != DESC_SIZE)
+        return -1;
+
+    if ((p_name = strstr(desc,"parentFileNameHint")) != 0) {
+        char *end_name, *tmp_name;
+        char name[256], buf[128];
+        int name_size;
+        struct stat file_buf;
+
+        p_name += sizeof("parentFileNameHint") + 1;
+        if ((end_name = strchr(p_name,'\"')) == 0)
+            return -1;
+
+        bs->parent_img_name = qemu_mallocz(end_name - p_name + 2);
+        strncpy(bs->parent_img_name, p_name, end_name - p_name);
+        if (stat(bs->parent_img_name, &file_buf) != 0) {
+            strcpy(parent_img_name, dir_name);
+            strcat(parent_img_name, basename(bs->parent_img_name));
+        } else {
+            strcpy(parent_img_name, bs->parent_img_name);
+        }
+
+        tmp_name = strstr(bs->device_name,"_QEMU");
+        name_size = tmp_name ? (tmp_name - bs->device_name) : \
sizeof(bs->device_name); +        strncpy(name,bs->device_name,name_size);
+        sprintf(buf, "_QEMU_%d", ++idx);
+
+        bs->bs_par_table = bdrv_new(strcat(name, buf));
+        if (!bs->bs_par_table) {
+            failure:
+            bdrv_close(bs);
+            return -1;
+        }
+
+        if (bdrv_open(bs->bs_par_table, parent_img_name, 0) < 0)
+            goto failure;
+    }
+
+    return 0;
+}
+
+static int vmdk_open(BlockDriverState *bs, const char *filename)
 {
     BDRVVmdkState *s = bs->opaque;
     int fd, i;
@@ -119,6 +400,7 @@
         s->l1_backup_table_offset = 0;
         s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
     } else if (magic == VMDK4_MAGIC) {
+        char dir_name[1024];
         VMDK4Header header;
         
         if (read(fd, &header, sizeof(header)) != sizeof(header))
@@ -133,6 +415,15 @@
             / s->l1_entry_sectors;
         s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
         s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
+
+        // try to open parent images, if exist
+        strcpy(dir_name, dirname(filename));
+        strcat(dir_name,"/");
+        if (vmdk_parent_open(bs, fd, dir_name) != 0)
+            goto fail;
+        // write the CID once after the image creation
+        bs->cid = vmdk_read_cid(fd,0);
+        bs->parent_cid = vmdk_read_cid(fd,1);
     } else {
         goto fail;
     }
@@ -175,6 +466,39 @@
     return -1;
 }
 
+static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int \
allocate); +
+static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
+                             uint64_t offset, int allocate)
+{
+    int ret;
+    uint64_t parent_cluster_offset;
+    BDRVVmdkState *s = bs->opaque;
+    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes \
each = grain size 64KB +
+    // we will be here if it's first write on non-exist grain(cluster).
+    // try to read from parent image, if exist
+    if (bs->bs_par_table) {
+        BDRVVmdkState *ps = bs->bs_par_table->opaque;
+
+        if (!vmdk_is_cid_valid(bs))
+            return -1;
+
+        parent_cluster_offset = get_cluster_offset(bs->bs_par_table, offset, \
allocate); +
+        lseek(ps->fd, parent_cluster_offset, SEEK_SET);
+        ret = read(ps->fd, whole_grain, ps->cluster_sectors*512);
+        if (ret != ps->cluster_sectors*512)
+            return -1;
+
+        lseek(s->fd, cluster_offset << 9, SEEK_SET);
+        ret = write(s->fd, whole_grain, sizeof(whole_grain));
+        if (ret != sizeof(whole_grain))
+            return -1;
+    }
+    return 0;
+}
+
 static uint64_t get_cluster_offset(BlockDriverState *bs,
                                    uint64_t offset, int allocate)
 {
@@ -240,6 +564,9 @@
             if (write(s->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
                 return 0;
         }
+
+        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
+            return 0;
     }
     cluster_offset <<= 9;
     return cluster_offset;
@@ -275,11 +602,19 @@
         if (n > nb_sectors)
             n = nb_sectors;
         if (!cluster_offset) {
-            memset(buf, 0, 512 * n);
+            // try to read from parent image, if exist
+            if (bs->bs_par_table) {
+                if (!vmdk_is_cid_valid(bs))
+                    return -1;
+                if (vmdk_read(bs->bs_par_table, sector_num, buf, nb_sectors) == -1)
+                    return -1;
+            } else {
+                memset(buf, 0, 512 * n);
+            }
         } else {
             lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
             ret = read(s->fd, buf, n * 512);
-            if (ret != n * 512) 
+            if (ret != n * 512)
                 return -1;
         }
         nb_sectors -= n;
@@ -295,6 +630,7 @@
     BDRVVmdkState *s = bs->opaque;
     int ret, index_in_cluster, n;
     uint64_t cluster_offset;
+    static int cid_update = 0;
 
     while (nb_sectors > 0) {
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -311,6 +647,12 @@
         nb_sectors -= n;
         sector_num += n;
         buf += n * 512;
+
+        // update CID on the first write every time the virtual disk is opened
+        if (!cid_update) {
+            vmdk_write_cid(bs, time(NULL));
+            cid_update++;
+        }
     }
     return 0;
 }
@@ -334,7 +676,7 @@
         "# The Disk Data Base \n"
         "#DDB\n"
         "\n"
-        "ddb.virtualHWVersion = \"3\"\n"
+        "ddb.virtualHWVersion = \"4\"\n"
         "ddb.geometry.cylinders = \"%lu\"\n"
         "ddb.geometry.heads = \"16\"\n"
         "ddb.geometry.sectors = \"63\"\n"
@@ -421,9 +763,12 @@
 static void vmdk_close(BlockDriverState *bs)
 {
     BDRVVmdkState *s = bs->opaque;
+
     qemu_free(s->l1_table);
     qemu_free(s->l2_cache);
     close(s->fd);
+    // try to close parent image, if exist
+    vmdk_parent_close(bs);
 }
 
 static void vmdk_flush(BlockDriverState *bs)
Index: block_int.h
===================================================================
RCS file: /sources/qemu/qemu/block_int.h,v
retrieving revision 1.10
diff -u -r1.10 block_int.h
--- block_int.h	19 Aug 2006 11:45:59 -0000	1.10
+++ block_int.h	25 Dec 2006 15:15:28 -0000
@@ -107,12 +107,18 @@
     /* async read/write emulation */
 
     void *sync_aiocb;
+
+    BlockDriverState *bs_par_table;
+    char *parent_img_name;
+    uint32_t parent_cid;
+    uint32_t cid;
+
     
     /* NOTE: the following infos are only hints for real hardware
        drivers. They are not used by the block driver */
     int cyls, heads, secs, translation;
     int type;
-    char device_name[32];
+    char device_name[256];
     BlockDriverState *next;
 };
 



_______________________________________________
Qemu-devel mailing list
Qemu-devel@nongnu.org
http://lists.nongnu.org/mailman/listinfo/qemu-devel


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic