[prev in list] [next in list] [prev in thread] [next in thread]
List: qemu-devel
Subject: [Qemu-devel] [PATCH] Add snapshot capabilities to the VMDK format.
From: "Igor Lvovsky" <igor.lvovsky () qumranet ! com>
Date: 2006-12-25 15:40:11
Message-ID: 64F9B87B6B770947A9F8391472E0321609AB059E () ehost011-8 ! exch011 ! intermedia ! net
[Download RAW message or body]
[Attachment #2 (multipart/alternative)]
[Attachment #4 (text/plain)]
This patch implements the snapshot capabilities of the VMDK format.
Igor L.
[Attachment #5 (text/html)]
<html xmlns:o="urn:schemas-microsoft-com:office:office" \
xmlns:w="urn:schemas-microsoft-com:office:word" \
xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
<meta name=Generator content="Microsoft Word 11 (filtered medium)">
<style>
<!--
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman";}
a:link, span.MsoHyperlink
{color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{color:purple;
text-decoration:underline;}
span.EmailStyle17
{mso-style-type:personal-compose;
font-family:Arial;
color:windowtext;}
@page Section1
{size:612.0pt 792.0pt;
margin:72.0pt 90.0pt 72.0pt 90.0pt;}
div.Section1
{page:Section1;}
-->
</style>
</head>
<body lang=EN-US link=blue vlink=purple>
<div class=Section1>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'>This patch implements the snapshot capabilities of the VMDK
format.<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'> Igor L.<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>
</div>
</body>
</html>
["vmdk-support.diff" (application/octet-stream)]
Index: Makefile
===================================================================
RCS file: /sources/qemu/qemu/Makefile,v
retrieving revision 1.106
diff -u -r1.106 Makefile
--- Makefile 5 Aug 2006 21:29:27 -0000 1.106
+++ Makefile 25 Dec 2006 15:15:26 -0000
@@ -13,7 +13,7 @@
CFLAGS+=-mcpu=ultrasparc
endif
LDFLAGS=-g
-LIBS=
+LIBS= -luuid
DEFINES+=-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
TOOLS=qemu-img$(EXESUF)
ifdef CONFIG_STATIC
Index: Makefile.target
===================================================================
RCS file: /sources/qemu/qemu/Makefile.target,v
retrieving revision 1.134
diff -u -r1.134 Makefile.target
--- Makefile.target 7 Dec 2006 18:15:35 -0000 1.134
+++ Makefile.target 25 Dec 2006 15:15:27 -0000
@@ -413,7 +413,7 @@
ifndef CONFIG_DARWIN
ifndef CONFIG_WIN32
ifndef CONFIG_SOLARIS
-VL_LIBS=-lutil -lrt
+VL_LIBS=-lutil -lrt -luuid
endif
endif
endif
Index: block-vmdk.c
===================================================================
RCS file: /sources/qemu/qemu/block-vmdk.c,v
retrieving revision 1.9
diff -u -r1.9 block-vmdk.c
--- block-vmdk.c 1 Aug 2006 16:21:11 -0000 1.9
+++ block-vmdk.c 25 Dec 2006 15:15:27 -0000
@@ -22,6 +22,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include <uuid/uuid.h>
+#include <libgen.h>
#include "vl.h"
#include "block_int.h"
@@ -89,7 +91,286 @@
return 0;
}
-static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
+#define CHECK_CID 1
+
+#define SECTOR_SIZE 512
+#define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
+#define HEADER_SIZE 512 // first sector of 512 bytes
+
+static uint32_t vmdk_read_cid(int fd, int parent)
+{
+ char desc[DESC_SIZE];
+ uint32_t cid;
+ char *p_name, *cid_str;
+ size_t cid_str_size;
+
+ if (fd) {
+ /* the descriptor offset = 0x200 */
+ if (lseek(fd, 0x200, SEEK_SET) == -1)
+ return 0;
+ if (read(fd, desc, DESC_SIZE) != DESC_SIZE)
+ return 0;
+
+ if (parent) {
+ cid_str = "parentCID";
+ cid_str_size = sizeof("parentCID");
+ } else {
+ cid_str = "CID";
+ cid_str_size = sizeof("CID");
+ }
+
+ if ((p_name = strstr(desc,cid_str)) != 0) {
+ p_name += cid_str_size;
+ sscanf(p_name,"%x",&cid);
+ }
+
+ return cid;
+ }
+ return 0;
+}
+
+static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
+{
+ BDRVVmdkState *s = bs->opaque;
+ char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+ char *p_name, *tmp_str;
+
+ if (s->fd) {
+ /* the descriptor offset = 0x200 */
+ if (lseek(s->fd, 0x200, SEEK_SET) == -1)
+ return -1;
+ if (read(s->fd, desc, DESC_SIZE) != DESC_SIZE)
+ return 0;
+
+ tmp_str = strstr(desc,"parentCID");
+ strcpy(tmp_desc, tmp_str);
+ if ((p_name = strstr(desc,"CID")) != 0) {
+ p_name += sizeof("CID");
+ sprintf(p_name,"%x\n",cid);
+ strcat(desc,tmp_desc);
+ }
+
+ if (lseek(s->fd, 0x200, SEEK_SET) == -1)
+ return -1;
+ if (write(s->fd, &desc, DESC_SIZE) != DESC_SIZE)
+ return -1;
+
+ bs->cid = cid;
+ return 0;
+ }
+ return -1;
+}
+
+static int vmdk_is_cid_valid(BlockDriverState *bs)
+{
+#ifdef CHECK_CID
+ BlockDriverState *p_bs = bs->bs_par_table;
+ uint32_t cur_pcid;
+
+ if (p_bs) {
+ BDRVVmdkState *s = p_bs->opaque;
+
+ cur_pcid = vmdk_read_cid(s->fd,0);
+ if (bs->parent_cid != cur_pcid)
+ // CID not valid
+ return 0;
+ }
+#endif
+ // CID valid
+ return 1;
+}
+
+int vmdk_snapshot_create(BlockDriverState *bs)
+{
+ int snp_fd, p_fd;
+ uint32_t p_cid;
+ char *p_name, *gd_buf, *rgd_buf;
+ VMDK4Header header;
+ uint32_t gde_entries, gd_size;
+ int64_t gd_offset, rgd_offset, capacity, gt_size;
+ char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
+ char parent_filename[1024];
+ char snapshot_filename[1024];
+ uuid_t name;
+ char *desc_template =
+ "# Disk DescriptorFile\n"
+ "version=1\n"
+ "CID=%x\n"
+ "parentCID=%x\n"
+ "createType=\"monolithicSparse\"\n"
+ "parentFileNameHint=\"%s\"\n"
+ "\n"
+ "# Extent description\n"
+ "RW %lu SPARSE \"%s\"\n"
+ "\n"
+ "# The Disk Data Base \n"
+ "#DDB\n"
+ "\n";
+
+ strcpy(parent_filename, bs->filename);
+ uuid_generate(name); // it should be unique filename
+ uuid_unparse(name, snapshot_filename);
+ strcat(snapshot_filename,".vmdk");
+
+ snp_fd = open(snapshot_filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | \
O_LARGEFILE, 0644); + if (snp_fd < 0)
+ return -1;
+ p_fd = open(parent_filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (p_fd < 0) {
+ close(snp_fd);
+ return -1;
+ }
+
+ if (lseek(p_fd, 0x0, SEEK_SET) == -1)
+ goto fail;
+ if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE)
+ goto fail;
+
+ /* write the header */
+ if (lseek(snp_fd, 0x0, SEEK_SET) == -1)
+ goto fail;
+ if (write(snp_fd, hdr, HEADER_SIZE) == -1)
+ goto fail;
+
+ memset(&header, 0, sizeof(header));
+ memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
+
+ ftruncate(snp_fd, header.grain_offset << 9);
+ /* the descriptor offset = 0x200 */
+ if (lseek(p_fd, 0x200, SEEK_SET) == -1)
+ goto fail;
+ if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE)
+ goto fail;
+
+ if ((p_name = strstr(p_desc,"CID")) != 0) {
+ p_name += sizeof("CID");
+ sscanf(p_name,"%x",&p_cid);
+ }
+ sprintf(s_desc, desc_template, p_cid, p_cid, parent_filename
+ , (uint32_t)header.capacity, snapshot_filename);
+
+ bs->parent_cid = p_cid;
+
+ /* write the descriptor */
+ if (lseek(snp_fd, 0x200, SEEK_SET) == -1)
+ goto fail;
+ if (write(snp_fd, s_desc, strlen(s_desc)) == -1)
+ goto fail;
+
+ gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
+ rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
+ capacity = header.capacity * SECTOR_SIZE; // Extent size
+ /*
+ * Each GDE span 32M disk, means:
+ * 512 GTE per GT, each GTE points to grain
+ */
+ gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
+ if (!gt_size)
+ goto fail;
+ gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
+ gd_size = gde_entries * sizeof(uint32_t);
+
+ /* write RGD */
+ rgd_buf = qemu_malloc(gd_size);
+ if (!rgd_buf)
+ goto fail;
+ if (lseek(p_fd, rgd_offset, SEEK_SET) == -1)
+ goto fail_rgd;
+ if (read(p_fd, rgd_buf, gd_size) != gd_size)
+ goto fail_rgd;
+ if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1)
+ goto fail_rgd;
+ if (write(snp_fd, rgd_buf, gd_size) == -1)
+ goto fail_rgd;
+ qemu_free(rgd_buf);
+
+ /* write GD */
+ gd_buf = qemu_malloc(gd_size);
+ if (!gd_buf)
+ goto fail_rgd;
+ if (lseek(p_fd, gd_offset, SEEK_SET) == -1)
+ goto fail_gd;
+ if (read(p_fd, gd_buf, gd_size) != gd_size)
+ goto fail_gd;
+ if (lseek(snp_fd, gd_offset, SEEK_SET) == -1)
+ goto fail_gd;
+ if (write(snp_fd, gd_buf, gd_size) == -1)
+ goto fail_gd;
+ qemu_free(gd_buf);
+
+ close(p_fd);
+ close(snp_fd);
+ return 0;
+
+ fail_gd:
+ qemu_free(gd_buf);
+ fail_rgd:
+ qemu_free(rgd_buf);
+ fail:
+ close(p_fd);
+ close(snp_fd);
+ return -1;
+}
+
+static void vmdk_parent_close(BlockDriverState *bs)
+{
+ if (bs->bs_par_table)
+ bdrv_close(bs->bs_par_table);
+}
+
+
+static int vmdk_parent_open(BlockDriverState *bs, int fd, char * dir_name)
+{
+ char *p_name;
+ char desc[DESC_SIZE];
+ static int idx=0;
+ char parent_img_name[1024];
+
+ /* the descriptor offset = 0x200 */
+ if (lseek(fd, 0x200, SEEK_SET) == -1)
+ return -1;
+ if (read(fd, desc, DESC_SIZE) != DESC_SIZE)
+ return -1;
+
+ if ((p_name = strstr(desc,"parentFileNameHint")) != 0) {
+ char *end_name, *tmp_name;
+ char name[256], buf[128];
+ int name_size;
+ struct stat file_buf;
+
+ p_name += sizeof("parentFileNameHint") + 1;
+ if ((end_name = strchr(p_name,'\"')) == 0)
+ return -1;
+
+ bs->parent_img_name = qemu_mallocz(end_name - p_name + 2);
+ strncpy(bs->parent_img_name, p_name, end_name - p_name);
+ if (stat(bs->parent_img_name, &file_buf) != 0) {
+ strcpy(parent_img_name, dir_name);
+ strcat(parent_img_name, basename(bs->parent_img_name));
+ } else {
+ strcpy(parent_img_name, bs->parent_img_name);
+ }
+
+ tmp_name = strstr(bs->device_name,"_QEMU");
+ name_size = tmp_name ? (tmp_name - bs->device_name) : \
sizeof(bs->device_name); + strncpy(name,bs->device_name,name_size);
+ sprintf(buf, "_QEMU_%d", ++idx);
+
+ bs->bs_par_table = bdrv_new(strcat(name, buf));
+ if (!bs->bs_par_table) {
+ failure:
+ bdrv_close(bs);
+ return -1;
+ }
+
+ if (bdrv_open(bs->bs_par_table, parent_img_name, 0) < 0)
+ goto failure;
+ }
+
+ return 0;
+}
+
+static int vmdk_open(BlockDriverState *bs, const char *filename)
{
BDRVVmdkState *s = bs->opaque;
int fd, i;
@@ -119,6 +400,7 @@
s->l1_backup_table_offset = 0;
s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
} else if (magic == VMDK4_MAGIC) {
+ char dir_name[1024];
VMDK4Header header;
if (read(fd, &header, sizeof(header)) != sizeof(header))
@@ -133,6 +415,15 @@
/ s->l1_entry_sectors;
s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
+
+ // try to open parent images, if exist
+ strcpy(dir_name, dirname(filename));
+ strcat(dir_name,"/");
+ if (vmdk_parent_open(bs, fd, dir_name) != 0)
+ goto fail;
+ // write the CID once after the image creation
+ bs->cid = vmdk_read_cid(fd,0);
+ bs->parent_cid = vmdk_read_cid(fd,1);
} else {
goto fail;
}
@@ -175,6 +466,39 @@
return -1;
}
+static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int \
allocate); +
+static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
+ uint64_t offset, int allocate)
+{
+ int ret;
+ uint64_t parent_cluster_offset;
+ BDRVVmdkState *s = bs->opaque;
+ uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes \
each = grain size 64KB +
+ // we will be here if it's first write on non-exist grain(cluster).
+ // try to read from parent image, if exist
+ if (bs->bs_par_table) {
+ BDRVVmdkState *ps = bs->bs_par_table->opaque;
+
+ if (!vmdk_is_cid_valid(bs))
+ return -1;
+
+ parent_cluster_offset = get_cluster_offset(bs->bs_par_table, offset, \
allocate); +
+ lseek(ps->fd, parent_cluster_offset, SEEK_SET);
+ ret = read(ps->fd, whole_grain, ps->cluster_sectors*512);
+ if (ret != ps->cluster_sectors*512)
+ return -1;
+
+ lseek(s->fd, cluster_offset << 9, SEEK_SET);
+ ret = write(s->fd, whole_grain, sizeof(whole_grain));
+ if (ret != sizeof(whole_grain))
+ return -1;
+ }
+ return 0;
+}
+
static uint64_t get_cluster_offset(BlockDriverState *bs,
uint64_t offset, int allocate)
{
@@ -240,6 +564,9 @@
if (write(s->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
return 0;
}
+
+ if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
+ return 0;
}
cluster_offset <<= 9;
return cluster_offset;
@@ -275,11 +602,19 @@
if (n > nb_sectors)
n = nb_sectors;
if (!cluster_offset) {
- memset(buf, 0, 512 * n);
+ // try to read from parent image, if exist
+ if (bs->bs_par_table) {
+ if (!vmdk_is_cid_valid(bs))
+ return -1;
+ if (vmdk_read(bs->bs_par_table, sector_num, buf, nb_sectors) == -1)
+ return -1;
+ } else {
+ memset(buf, 0, 512 * n);
+ }
} else {
lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
ret = read(s->fd, buf, n * 512);
- if (ret != n * 512)
+ if (ret != n * 512)
return -1;
}
nb_sectors -= n;
@@ -295,6 +630,7 @@
BDRVVmdkState *s = bs->opaque;
int ret, index_in_cluster, n;
uint64_t cluster_offset;
+ static int cid_update = 0;
while (nb_sectors > 0) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -311,6 +647,12 @@
nb_sectors -= n;
sector_num += n;
buf += n * 512;
+
+ // update CID on the first write every time the virtual disk is opened
+ if (!cid_update) {
+ vmdk_write_cid(bs, time(NULL));
+ cid_update++;
+ }
}
return 0;
}
@@ -334,7 +676,7 @@
"# The Disk Data Base \n"
"#DDB\n"
"\n"
- "ddb.virtualHWVersion = \"3\"\n"
+ "ddb.virtualHWVersion = \"4\"\n"
"ddb.geometry.cylinders = \"%lu\"\n"
"ddb.geometry.heads = \"16\"\n"
"ddb.geometry.sectors = \"63\"\n"
@@ -421,9 +763,12 @@
static void vmdk_close(BlockDriverState *bs)
{
BDRVVmdkState *s = bs->opaque;
+
qemu_free(s->l1_table);
qemu_free(s->l2_cache);
close(s->fd);
+ // try to close parent image, if exist
+ vmdk_parent_close(bs);
}
static void vmdk_flush(BlockDriverState *bs)
Index: block_int.h
===================================================================
RCS file: /sources/qemu/qemu/block_int.h,v
retrieving revision 1.10
diff -u -r1.10 block_int.h
--- block_int.h 19 Aug 2006 11:45:59 -0000 1.10
+++ block_int.h 25 Dec 2006 15:15:28 -0000
@@ -107,12 +107,18 @@
/* async read/write emulation */
void *sync_aiocb;
+
+ BlockDriverState *bs_par_table;
+ char *parent_img_name;
+ uint32_t parent_cid;
+ uint32_t cid;
+
/* NOTE: the following infos are only hints for real hardware
drivers. They are not used by the block driver */
int cyls, heads, secs, translation;
int type;
- char device_name[32];
+ char device_name[256];
BlockDriverState *next;
};
_______________________________________________
Qemu-devel mailing list
Qemu-devel@nongnu.org
http://lists.nongnu.org/mailman/listinfo/qemu-devel
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic