[prev in list] [next in list] [prev in thread] [next in thread] 

List:       user-mode-linux-devel
Subject:    [uml-devel] stacked COW files for 2.4.20-5um
From:       "James McMechan" <James_McMechan () hotmail ! com>
Date:       2003-05-23 23:11:45
[Download RAW message or body]

Well I have gotten a a strange error out of 2.5.69-1 while running fsx
on a stack of 24 COW files on my partitioned root file system.
kernel/sched.c:1430: spin_lock(kernel/printk.c:a02704a0) already locked
by kernel/sched.c/1430
But 2.4.20-5 + COW-2.4.20-5.patch ran fsx for 2.5 hours without problem
with the same 25 layer setup.
Attached is the COW-2.4.20-5.patch
I am now resuming work on the ubd-many for 2.4.20-5um

["COW-2.4.20-5.patch" (application/octet-stream)]

diff -Nur uml-2.4.20-5/arch/um/drivers/cow.h work-2.4.20-5/arch/um/drivers/cow.h
--- uml-2.4.20-5/arch/um/drivers/cow.h	1969-12-31 16:00:00.000000000 -0800
+++ work-2.4.20-5/arch/um/drivers/cow.h	2003-05-18 22:33:38.000000000 -0700
@@ -0,0 +1,19 @@
+#define COW_RDONLY 1
+#define COW_CREATE 2
+#define COW_DIRTY 4
+#define COW_SPARSE 8
+#define COW_NOMAP 16
+struct devinfo {
+	char *name;
+	__u64 cow_length, cow_offset, cow_current;
+	__u64 data_length, data_offset;
+	char *cow_bitmap;
+	struct devinfo *next_cow_dev;
+	int fd,flags;
+	int cylinders, heads, sectors, sector_size;
+	time_t mtime;
+};
+struct devinfo *open_COW(char *cow_file, int in_flags);
+ssize_t close_COW(struct devinfo *dev);
+ssize_t read_COW(struct devinfo *dev, void *buf, size_t length, __u64 offset);
+ssize_t write_COW(struct devinfo *dev, void *buf, size_t count, __u64 offset);
diff -Nur uml-2.4.20-5/arch/um/drivers/cow_user.c \
                work-2.4.20-5/arch/um/drivers/cow_user.c
--- uml-2.4.20-5/arch/um/drivers/cow_user.c	1969-12-31 16:00:00.000000000 -0800
+++ work-2.4.20-5/arch/um/drivers/cow_user.c	2003-05-19 03:33:27.000000000 -0700
@@ -0,0 +1,745 @@
+/*
+ * common cow file funcitons to be used by both uml_moo and ubd_user
+ * we really want 64 bit math on almost everything since otherwise
+ * we overflow at 2/4G or at 2^31 sectors ~2TiB which is starting to get
+ * to be easy to generate for real storage.  For sparse files it has been
+ * possibile for some time.
+ * LICENSE GPLv2
+ * James_McMechan at hotmail.com
+ * the version 3 header is test header I am working on dont use it
+ * yet.  It is designed to deal with other issues that are 
+ * only small problems at the moment.
+ */
+#include <sys/types.h> /* stat... */
+#ifndef __USE_XOPEN
+#define __USE_XOPEN
+#endif
+#ifndef __USE_LARGEFILE64
+#define __USE_LARGEFILE64
+#endif
+#include <sys/stat.h> /* stat... */
+#include <unistd.h> /* pread/pwrite */
+#include <sys/param.h> /* MAXPATHLEN */
+#include <asm/types.h> /* __u32... */
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h> /* O_RDONLY etc for open */
+#include <netinet/in.h>	/* ntohl... */
+#include <endian.h>
+#include <byteswap.h>
+#include "cow.h"
+#if __BYTE_ORDER == __BIG_ENDIAN
+# define ntohll(x) (x)
+# define htonll(x) (x)
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+# define ntohll(x)  bswap_64(x)
+# define htonll(x)  bswap_64(x)
+#else
+#error "__BYTE_ORDER not defined"
+#endif
+
+#ifdef TEST
+#define TEST
+#define printk printf
+#define ERROR(format,f...) printf(format,##f)
+#define DEBUG(format,f...)
+static void *ubd_alloc(int size) {
+	void *memory =  (void *) malloc(size);
+	DEBUG("allocated memory %p\n", memory);
+	return memory;
+}
+
+static void ubd_free(void *memory) {
+	DEBUG("freeing memory %p\n", memory);
+	free(memory);
+}
+#else
+#include "user.h"
+#ifndef KERN_ERR
+#define KERN_ERR "<3>"
+#define KERN_INFO "<6>"
+#endif
+#define ERROR(format,f...) printk(KERN_ERR format,##f)
+#define DEBUG(format,f...)
+static void *ubd_alloc(int size)
+{
+	void *memory =  (void *) um_kmalloc(size);
+	DEBUG("allocated memory %p\n", memory);
+	return memory;
+}
+
+static void ubd_free(void *memory)
+{
+	DEBUG("freeing memory %p\n", memory);
+	kfree(memory);
+}
+#endif
+
+#define PATH_LEN_V1 256
+#define PATH_LEN_V2 MAXPATHLEN
+#define PATH_LEN_V3 4096
+
+#define VERSION_1 1
+#define VERSION_2 2
+#define VERSION_3 3
+#define CURRENT_VERSION 2
+#define COW_MAGIC 0x4f4f4f4d  /* mooo */
+
+#define SECTOR_SIZE 512
+#define MAP_SIZE 4096
+#define PAD_SIZE 512
+union cow_header {
+#ifdef VERSION_1
+	struct cow_header_v1 {
+		int magic, version;
+		char backing_file[PATH_LEN_V1];
+		time_t mtime;
+	       	__u64 size;
+		int sectorsize;
+	} v1;
+#define fillsize1 ((sizeof(struct cow_header_v1)+PAD_SIZE-1) & ~(PAD_SIZE-1))
+	char space_v1[fillsize1];
+#endif
+#ifdef VERSION_2
+	struct cow_header_v2 {
+		int magic, version;
+		char backing_file[PATH_LEN_V2];
+		time_t mtime;
+		__u64 size;
+		int sectorsize;
+	} v2;
+#define fillsize2 ((sizeof(struct cow_header_v2)+PAD_SIZE-1) & ~(PAD_SIZE-1))
+	char space_v2[fillsize2];
+#endif
+#ifdef VERSION_3
+	struct cow_header_v3 {
+		char magic[4];
+		__u32 version;
+		__u64 cow_length, cow_offset;
+		__u64 data_length, data_offset;
+		__u64 mtime;
+		__u32 cylinders, heads, sectors, sector_size;
+		char backing_file[PATH_LEN_V3];
+	} v3;
+#define fillsize3 ((sizeof(struct cow_header_v3)+PAD_SIZE-1) & ~(PAD_SIZE-1))
+	char space_v3[fillsize3];
+#endif
+};
+
+char zero[SECTOR_SIZE] = {};
+
+/*
+ * a bit shift of 3 is the log base 2 of 8
+ * and is used to avoid havving to use the
+ * 64 bit math libraries also the function
+ * is half the size of the div/mod version
+ * also this really needs to use char * so
+ * that it will work with the same file data
+ * on both big and little endian machines.
+ */
+/* log2(sizeof(data)) unset to use div3/mod3 */
+#define bit_shift 3
+#ifdef bit_shift
+#define divide8(x) ((x) >> bit_shift)
+#else
+#define divide8(x) ((x)/8)
+#endif
+static int ubd_test_bit(off64_t bit, unsigned char *data)
+{
+	__u64 index;
+	unsigned char off;
+
+#ifdef bit_shift
+	index = bit >> bit_shift; /* fast version of div3 */
+	off = bit & ((1 << bit_shift)-1); /* bitmask with all const math */
+	DEBUG("bit %llx data[%llx] = %02x & 1 << \
%d:%x\n",bit,index,data[index],off,1<<off); +#else
+	int bits = CHAR_BITS * sizeof(*data);
+	index  = bit / bits;
+	offset = bit % bits;
+#endif
+	return((data[index] & (1 << off)) != 0);
+	return(0);
+}
+
+static void ubd_set_bit(off64_t bit, unsigned char *data)
+{
+	__u64 index;
+	unsigned char off;
+
+#ifdef bit_shift
+	index = bit >> bit_shift; /* fast version of div3 */
+	off = bit & ((1 << bit_shift)-1); /* bitmask with all const math */
+	DEBUG("set %llx data[%llx] = %02x & 1 << \
%d:%x\n",bit,index,data[index],off,1<<off); +#else
+	int bits = CHAR_BITS * sizeof(*data);
+	index  = bit / bits;
+	offset = bit % bits;
+#endif
+	data[index] |= (1 << off);
+}
+
+ssize_t close_COW(struct devinfo *dev)
+{
+	if (dev->flags & COW_DIRTY) {
+		ssize_t tmp;
+		DEBUG("closing %s fd %d buf %p[%d]:%x\n", dev->name, dev->fd,
+				dev->cow_bitmap, MAP_SIZE, dev->cow_offset
+				+ divide8(dev->cow_current));
+		tmp = pwrite64(dev->fd, dev->cow_bitmap, MAP_SIZE,
+				dev->cow_offset + divide8(dev->cow_current));
+		if (tmp < MAP_SIZE) {
+			ERROR("Eek, short write closing %lu of %lu\n", tmp,
+					MAP_SIZE);
+		}
+	}
+	if (dev->next_cow_dev)
+		close_COW(dev->next_cow_dev);
+	close(dev->fd);
+	ubd_free(dev);
+}
+
+struct devinfo *open_COW(char *cow_file, int in_flags)
+{
+	union cow_header *workspace;
+	struct devinfo tmp = {}, *result;
+	char *name,*cow_name = NULL;
+	int n, flags, version, mode;
+	struct stat64 buf;
+	off64_t count;
+
+	if (cow_file == NULL) {
+		ERROR("no file to open\n");
+		return NULL;
+	}
+
+	/* save passed in flags */
+	tmp.flags = in_flags;
+
+	name = strchr(cow_file,',');
+	if (name) {
+		*name = '\0'; /* this splits the string at the ',' */
+		name++;
+		/* if it ends with a comma just dump it */
+		if (*name == '\0') name = NULL;
+	}
+	/* we need a backing file to creat a COW file */
+	if (!name) {
+		in_flags &= ~COW_CREATE;
+	}
+	
+	/* default mode for open command */
+	mode  = S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR | S_IWGRP | S_IWOTH;
+	if (in_flags & COW_RDONLY) {
+		flags = O_RDONLY;
+	} else {
+		flags = O_RDWR;
+		if (in_flags & COW_CREATE) {
+			flags |= O_CREAT;
+		}
+	}
+
+	tmp.fd = open64(cow_file, flags, mode);
+	if (tmp.fd < 0) {
+		ERROR("unable to open %s\n",cow_file);
+		return NULL;
+	}
+	workspace = ubd_alloc(sizeof(struct cow_header_v3));
+	if (!workspace) {
+			ERROR("unable to allocate space for reading header\n");
+			return NULL;
+	}
+	n = read(tmp.fd,workspace,sizeof(struct cow_header_v3));
+	if (n < sizeof(struct cow_header_v3)) {
+		memset(workspace, 0, sizeof(struct cow_header_v3));
+		if (!(in_flags & COW_CREATE)) {
+			ERROR("short read on %s only got %d wanted %d error %s\n", cow_file, n, \
sizeof(struct cow_header_v3), strerror(errno)); +			goto close1;
+		}
+	}
+
+	version = 0;
+#ifdef VERSION_1
+        if (workspace->v1.magic == COW_MAGIC) {
+		if (workspace->v1.version == 1) {
+			version = 1;
+			cow_name = workspace->v1.backing_file;
+			tmp.data_length = workspace->v1.size;
+			tmp.mtime = workspace->v1.mtime;
+			tmp.sector_size = workspace->v1.sectorsize;
+			tmp.cow_offset = sizeof(struct cow_header_v1);
+			tmp.heads = 128;
+			tmp.sectors = 32;
+			tmp.cylinders = tmp.data_length / (tmp.heads * tmp.sectors * tmp.sector_size);
+			tmp.cow_length = (tmp.data_length + tmp.sector_size - 1) / tmp.sector_size;
+			tmp.data_offset = tmp.cow_offset + divide8(tmp.cow_length);
+			tmp.data_offset = (tmp.data_offset + tmp.sector_size - 1) / tmp.sector_size;
+			tmp.data_offset *= tmp.sector_size;
+		}
+	}
+#endif
+#ifdef VERSION_2
+        if (ntohl(workspace->v2.magic) == COW_MAGIC) {
+		if (ntohl(workspace->v2.version) == 2) {
+			version = ntohl(workspace->v2.version);
+			cow_name = workspace->v2.backing_file;
+			tmp.data_length = ntohll(workspace->v2.size);
+			tmp.mtime = ntohl(workspace->v2.mtime);
+			tmp.sector_size = ntohl(workspace->v2.sectorsize);
+			tmp.cow_offset = sizeof(struct cow_header_v2);
+			tmp.heads = 128;
+			tmp.sectors = 32;
+			tmp.cylinders = tmp.data_length / (tmp.heads * tmp.sectors * tmp.sector_size);
+			tmp.cow_length = (tmp.data_length + tmp.sector_size - 1) / tmp.sector_size;
+			tmp.data_offset = tmp.cow_offset + divide8(tmp.cow_length);
+			tmp.data_offset = (tmp.data_offset + tmp.sector_size - 1) / tmp.sector_size;
+			tmp.data_offset *= tmp.sector_size;
+		}
+	}
+	#endif
+#ifdef VERSION_3
+	char magic[] = {'M','O','O','O'};
+        if (memcmp(&workspace->v3.magic, &magic, sizeof(magic)) == 0) {
+		if (ntohl(workspace->v3.version) == 3) {
+			version = ntohl(workspace->v3.version);
+			cow_name = workspace->v3.backing_file;
+			tmp.data_offset = ntohll(workspace->v3.data_offset);
+			tmp.data_length = ntohll(workspace->v3.data_length);
+			tmp.mtime = ntohll(workspace->v3.mtime);
+			tmp.cow_offset = ntohll(workspace->v3.cow_offset);
+			tmp.cow_length = ntohll(workspace->v3.cow_length);
+			tmp.cylinders = ntohl(workspace->v3.cylinders);
+			tmp.heads = ntohl(workspace->v3.heads);
+			tmp.sectors = ntohl(workspace->v3.sectors);
+			tmp.sector_size = ntohl(workspace->v3.sector_size);
+		}
+	}
+#endif
+
+	/* if no COW file header */
+	if (version == 0) {
+		/* if create a COW file */
+		if (in_flags & COW_CREATE || !name) {
+			/* if no backing file this is the disk image file */
+			if (name == NULL) {
+				tmp.sector_size = SECTOR_SIZE;
+				tmp.cow_length = 0;
+				tmp.cow_offset = 0;
+				if (fstat64(tmp.fd, &buf) < 0) {
+					ERROR("problem stating file %s %s\n",
+						cow_file, strerror(errno));
+					goto close1;
+				}
+				tmp.data_length = buf.st_size;
+				tmp.mtime = buf.st_mtime;
+				tmp.data_offset = 0;
+				tmp.heads = 128;
+				tmp.sectors = 32;
+				tmp.cylinders = tmp.data_length / (tmp.heads * tmp.sectors * tmp.sector_size);
+				tmp.next_cow_dev = NULL;
+			} else {
+				/*
+				 * we have a backing file to try
+				 * here we should check backing_file
+				 */
+				in_flags &= ~COW_CREATE;
+				in_flags |=  COW_RDONLY;
+				result = open_COW(name,in_flags);
+				if (result) {
+					char magic[] = {'M','O','O','O'};
+					struct cow_header_v3 *v3 = &workspace->v3;
+        				/* copy in the MAGIC "MOOO" */
+        				memcpy(&v3->magic, &magic, sizeof(magic));
+					/* a version 3 header */
+					version = 3;
+					v3->version = htonl(version);
+
+					/* cylinders for CHS */
+					count = result->cylinders;
+					v3->cylinders = htonl(count);
+					tmp.cylinders = count;
+
+					/* heads for CHS */
+					count = result->heads;
+					v3->heads = htonl(count);
+					tmp.heads = count;
+
+					/* sectors for CHS */
+					count = result->sectors;
+					v3->sectors = htonl(count);
+					tmp.sectors = count;
+
+					/* sector size for all calcs */
+					count = result->sector_size;
+					v3->sector_size = htonl(count);
+					tmp.sector_size = count;
+
+					/* backing file length */
+					count = result->data_length;
+					v3->data_length = htonll(count);
+					tmp.data_length = count;
+
+					/* convert to sector count */
+					count += result->sector_size-1;
+					count /= result->sector_size;
+
+					/* save offsets and length */
+					v3->cow_offset = htonll(fillsize3);
+					v3->cow_length = htonll(count);
+					tmp.cow_offset = fillsize3;
+					tmp.cow_length = count;
+
+					/* convert to byte count */
+					count += 8-1;
+					count  = divide8(count);
+
+					/* now add in the PAD_SIZE */
+					count +=  (PAD_SIZE-1);
+					count &= ~(PAD_SIZE-1);
+
+					/* add in length of the header */
+					count += fillsize3;
+					v3->data_offset = htonll(count);
+					tmp.data_offset = count;
+
+					/* save backing file name */
+					/* max size */
+					count = sizeof(v3->backing_file);
+					/* and copy safely */
+					cow_name = v3->backing_file;
+					strncpy(cow_name, name, count);
+
+					/* save the backing file time */
+					count = result->mtime;
+					v3->mtime = htonll(count);
+					tmp.mtime = count;
+
+					tmp.next_cow_dev = result;
+					/* if we are creating a plain file */
+					if (in_flags & COW_NOMAP) {
+						tmp.data_offset = 0;
+						tmp.cow_offset = 0;
+						tmp.cow_length = 0;
+					} else {
+						/* its a COW file */
+						/* so write header */
+						DEBUG("header %s fd %d buf " "%p[%d]\n", cow_file, tmp.fd, workspace, \
sizeof(struct cow_header_v3)); +						pwrite64(tmp.fd, &workspace->v3, sizeof(struct \
cow_header_v3), (off64_t) 0); +					}
+					count  = tmp.data_offset;
+					count += tmp.data_length;
+					count -= sizeof(zero);
+					DEBUG("seek %s fd %d buf %p[%d]:%x\n",
+						cow_file, tmp.fd, zero,
+						sizeof(zero), count);
+					pwrite64(tmp.fd, &zero, sizeof(zero), (off64_t) count);
+					} else {
+						ERROR("failed to open backing file %s not creating COW file %s\n", name, \
cow_file); +						goto close1;
+					}
+				}
+			} else {
+				/* dont create so error instead */
+				ERROR("unable to find a COW file %s for %s\n",
+						cow_file, name);
+				goto close1;
+			}
+		}
+
+		/* if any version cow file do the checks */
+		if (version) {
+			/* first recuse down the chain */
+			if (!tmp.next_cow_dev) {
+				if (!name) {
+					name = cow_name;
+				}
+				if (strcmp(cow_name, name)) {
+					ERROR("filename %s is not the backing "
+							"file of %s\n" , name,
+							cow_name);
+					}
+				in_flags &= ~COW_CREATE;
+				in_flags |=  COW_RDONLY;
+				tmp.next_cow_dev = open_COW(name, in_flags);
+
+				/* if we did not open the specified file ? */
+				if (!tmp.next_cow_dev) {
+					ERROR("failed to open %s as backing "
+							"for %s\n", name,
+							cow_file);
+					goto close1;
+				}
+			}
+			if (name && strcmp(cow_name, name)) {
+				ERROR("filename %s is not the backing file of %s\n" ,
+						name, cow_name);
+			}
+			if (tmp.mtime != tmp.next_cow_dev->mtime) {
+				ERROR("bad mtimes %llu != %llu\n" , tmp.mtime,
+						tmp.next_cow_dev->mtime);
+				goto close1;
+			}
+			if (tmp.data_length != tmp.next_cow_dev->data_length) {
+				ERROR("bad data length %llu != %llu\n" ,
+						tmp.data_length,
+						tmp.next_cow_dev->data_length);
+				goto close1;
+			}
+		}
+
+	count = sizeof(tmp);
+	count += strlen(cow_file)+1;
+	if (tmp.cow_length) count += MAP_SIZE;
+	result = ubd_alloc(count);
+
+	if (!result) {
+		ERROR("failed to allocate device structure for %s\n",cow_file);
+		goto close1;
+	}
+
+	memcpy(result, &tmp, sizeof(tmp));
+	result->name = (char *)&result[1];
+	strcpy(result->name, cow_file);
+	if (result->cow_length) {
+		/* put the map page just after the dev struct */
+		result->cow_bitmap = &result->name[strlen(cow_file)+1];
+		result->cow_current = 0;
+		DEBUG("setup %s fd %d buf %p[%d]:%x\n", result->name,
+				result->fd, result->cow_bitmap, MAP_SIZE,
+				result->cow_offset + divide8(result->cow_current));
+		n = pread64(result->fd, result->cow_bitmap, MAP_SIZE,
+				result->cow_offset + divide8(result->cow_current));
+		if (n < MAP_SIZE) {
+			ERROR("failed to read COW data %d of %d error %s\n",
+					n, MAP_SIZE, strerror(errno));
+			goto free;
+		}
+	}
+	return result;
+free:
+	if (result != NULL) {
+		ubd_free(result);
+	}
+close1:
+	if (workspace) ubd_free(workspace);
+	if (tmp.fd >= 0) {
+		if (tmp.next_cow_dev) {
+			close_COW(tmp.next_cow_dev);
+		}
+		close(tmp.fd);
+	}
+	return NULL;
+}
+
+/* compute map index */
+static int remap_COW(struct devinfo *dev, off64_t offset)
+{
+	int result;
+	off64_t index, offset2;
+
+	if (offset >= dev->cow_length) {
+		ERROR("access %llu past end of COW %llu\n", offset,
+				dev->cow_length);
+		return -1; /* error no data */
+	}
+
+	index = offset - dev->cow_current;
+
+	if (index < 0 || (MAP_SIZE * 8) <= index) {
+		if (dev->flags & COW_DIRTY) {
+			offset2 = dev->cow_offset + divide8(dev->cow_current);
+
+			DEBUG("dirty %s fd %d buf %p[%d]:%x\n", dev->name,
+					dev->fd, dev->cow_bitmap, MAP_SIZE,
+					offset2);
+			result = pwrite64(dev->fd, dev->cow_bitmap, MAP_SIZE,
+					offset2);
+			if (result < MAP_SIZE) {
+				ERROR("Eek, short write in COW %lu of %lu\n",
+						result, MAP_SIZE);
+				return -1;
+			}
+		}
+		dev->flags &= ~COW_DIRTY;
+
+		dev->cow_current = offset & ~((MAP_SIZE * 8)-1);
+		offset2 = dev->cow_offset + divide8(dev->cow_current);
+
+		DEBUG("mapping %s fd %d buf %p[%d]:%x\n", dev->name, dev->fd,
+				dev->cow_bitmap, MAP_SIZE, offset2);
+		result = pread64(dev->fd, dev->cow_bitmap, MAP_SIZE, offset2);
+		if (result < MAP_SIZE) {
+			ERROR("failed to read COW data %d of %d error %s\n",
+					result, MAP_SIZE, strerror(errno));
+					return -1; /* um we died */
+		}
+		/* recompute map offset */
+		index = offset - dev->cow_current;
+	}
+	return index;
+}
+
+ssize_t read_COW(struct devinfo *dev, void *buf, size_t length, __u64 offset)
+{
+	off64_t work = dev->data_offset + (offset * dev->sector_size);
+	ssize_t count, result = 0;
+	struct devinfo *back_dev = dev->next_cow_dev;
+
+	while (length) {
+		if (offset == 34)
+			printk("read debug offset reached\n");
+		if (dev->cow_length) {
+			/* compute map index */
+			int index = remap_COW(dev,offset);
+
+			if (index < 0 ) {
+				return result;
+			}
+
+			if (ubd_test_bit(index, dev->cow_bitmap)) {
+				DEBUG("backed %s fd %d buf %p[%d]:%x\n",
+						dev->name,
+						dev->fd, buf,
+						dev->sector_size, work);
+				count = pread64(dev->fd, buf, dev->sector_size,
+						work);
+				goto one_done;
+			}
+		}
+		/* recurse down the chain to the backing files */
+		if (back_dev) {
+			count = read_COW(back_dev, buf, 1, offset);
+			count *= dev->sector_size;
+		} else {
+			DEBUG("reading %s fd %d buf %p[%d]:%x\n",
+						dev->name,
+						dev->fd, buf,
+						dev->sector_size, offset);
+			count = pread64(dev->fd, buf, dev->sector_size, work);
+		}
+one_done:
+		if (count < dev->sector_size) {
+			ERROR("short read on %d:%d only got %d wanted %d "
+					"error %s\n", dev->fd, back_dev->fd,
+					count, dev->sector_size, strerror(errno));
+			return result;
+	       	}
+		buf += count;
+		work += count;
+		result++;
+		length--;
+		offset++;
+	}
+	return result;
+}
+
+ssize_t write_COW(struct devinfo *dev, void *buf, size_t length, __u64 offset)
+{
+	off64_t work = dev->data_offset + (offset * dev->sector_size);
+	ssize_t count, result = 0;
+
+	while (length) {
+		if (dev->cow_length) {
+			/* compute map index */
+			int index = remap_COW(dev,offset);
+
+			if (index < 0 ) {
+				return result;
+			}
+
+			ubd_set_bit(index, dev->cow_bitmap);
+			/* write out to the data section */
+			DEBUG("dataout %s fd %d buf %p[%d]:%x\n", dev->name,
+					dev->fd, buf, dev->sector_size, work);
+			count = pwrite64(dev->fd, buf, dev->sector_size, work);
+			dev->flags |= COW_DIRTY;
+			goto one_done;
+		}
+		/* ok this is the sparse calculation it is used to merge the 
+		 * COW file with a backing file.
+		 * First we check the SPARSE flag should be set only when
+		 * running as uml_moo.
+		 * 2nd we check if the beffer to be writen is all zeros
+		 * if it was all zeros then read the backing file to find if
+		 * the base file is also all zeros also this will handle the
+		 * case where zeros are over writen in the COW file on top
+		 * of a non-zero backing file sector.
+		 */
+		if (offset == 34)
+			printk("write debug offset reached\n");
+		if ((dev->flags & COW_SPARSE) && (memcmp(buf, zero, dev->sector_size) == 0)) {
+			count = read_COW(dev->next_cow_dev, buf, 1, offset);
+			count *= dev->sector_size;
+			if (count == dev->sector_size) {
+				if (memcmp(buf, zero, dev->sector_size) == 0) {
+				       goto one_done;
+				}
+			DEBUG("zero %s fd %d buf %p[%d]:%x\n", dev->name,
+					dev->fd, zero, dev->sector_size, work);
+			count = pwrite64(dev->fd, zero, dev->sector_size, work);
+			if (count < dev->sector_size) {
+				ERROR("short write zero %d:buf[%p]+%xl:%d",
+						dev->fd, zero, work, dev->sector_size);
+				return result;
+			}
+			goto one_done;
+			} else
+				ERROR("did not read file for zero check\n");
+		}
+		DEBUG("writeout %s fd %d buf %p[%d]:%x\n", dev->name, dev->fd,
+				buf, dev->sector_size, work);
+		count = pwrite64(dev->fd, buf, dev->sector_size, work);
+		if (count < dev->sector_size) {
+			ERROR("write %d:buf[%p]+%xl:%d",dev->fd, buf, work,
+					dev->sector_size);
+			return result;
+		}
+one_done:
+		result++;
+		buf += count;
+		work += count;
+		length--;
+		offset++;
+	}
+	return result;
+}
+
+#ifdef TEST
+struct devinfo *fd;
+char *buffer;
+main(int argc,char *argv[])
+{
+	char *name;
+	int i, nr, nw;
+
+#define COUNT 10
+	buffer = ubd_alloc(SECTOR_SIZE * COUNT);
+	if (!buffer) {
+		printf("error unable to alloc enough buffer\n");
+		return 1;
+	}
+
+	if (argc > 1) {
+		fd = open_COW(argv[1],COW_CREATE|COW_SPARSE|COW_NOMAP);
+	} else {
+		printf("%s is used with MOOFILE,COWFILE\n",argv[0]);
+	}
+	printf("fd = %p\n",fd);
+	if (fd) {
+		i = 0;
+		do {
+			nr = read_COW(fd, buffer, COUNT, i); 
+			nw= write_COW(fd, buffer, nr, i);
+			i += nr;
+			if (nr != nw ) {
+				printf("error get %d wrote %d\n",nr,nw);
+				break;
+			}
+		} while (nr);
+
+		close_COW(fd);
+	}
+	return 0;
+}
+#endif
+
diff -Nur uml-2.4.20-5/arch/um/drivers/Makefile \
                work-2.4.20-5/arch/um/drivers/Makefile
--- uml-2.4.20-5/arch/um/drivers/Makefile	2003-05-23 07:02:29.000000000 -0700
+++ work-2.4.20-5/arch/um/drivers/Makefile	2003-05-23 07:47:54.000000000 -0700
@@ -18,7 +18,7 @@
 net-objs := net_kern.o net_user.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
 hostaudio-objs := hostaudio_kern.o hostaudio_user.o
-ubd-objs := ubd_kern.o ubd_user.o
+ubd-objs := ubd_kern.o ubd_user.o cow_user.o
 port-objs := port_kern.o port_user.o
 harddog-objs := harddog_kern.o harddog_user.o
 
diff -Nur uml-2.4.20-5/arch/um/drivers/ubd_kern.c \
                work-2.4.20-5/arch/um/drivers/ubd_kern.c
--- uml-2.4.20-5/arch/um/drivers/ubd_kern.c	2003-05-23 07:02:29.000000000 -0700
+++ work-2.4.20-5/arch/um/drivers/ubd_kern.c	2003-05-23 12:51:10.000000000 -0700
@@ -4,7 +4,7 @@
  */
 
 /* 2001-09-28...2002-04-17
- * Partition stuff by James_McMechan@hotmail.com
+ * Partition stuff by James_McMechan at hotmail.com
  * old style ubd by setting UBD_SHIFT to 0
  */
 
@@ -40,6 +40,7 @@
 #include "ubd_user.h"
 #include "2_5compat.h"
 #include "os.h"
+#include "cow.h"
 
 static int ubd_open(struct inode * inode, struct file * filp);
 static int ubd_release(struct inode * inode, struct file * file);
@@ -48,15 +49,14 @@
 static int ubd_revalidate(kdev_t rdev);
 static int ubd_revalidate1(kdev_t rdev);
 
-#define MAX_DEV (8)
-#define MAX_MINOR (MAX_DEV << UBD_SHIFT)
+#define NM (256)
+#define MAX_DEV (NM >> UBD_SHIFT)
 
-/* Not modified by this driver */
-static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE };
-static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 };
+static int blk_sizes[NM] = { [ 0 ... NM - 1 ] = BLOCK_SIZE };
+static int hardsect_sizes[NM] = { [ 0 ... NM - 1 ] = 512 };
 
 /* Protected by ubd_lock */
-static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 };
+static int sizes[NM] = { [ 0 ... NM - 1 ] = 0 };
 
 static struct block_device_operations ubd_blops = {
         .open		= ubd_open,
@@ -68,8 +68,7 @@
 /* Protected by ubd_lock, except in prepare_request and ubd_ioctl because 
  * the block layer should ensure that the device is idle before closing it.
  */
-static struct hd_struct	ubd_part[MAX_MINOR] =
-	{ [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } };
+static struct hd_struct	ubd_part[NM] = { [ 0 ... NM - 1 ] = { 0, 0, 0 } };
 
 /* Protected by io_request_lock */
 static request_queue_t *ubd_queue;
@@ -115,43 +114,24 @@
  */
 static struct openflags global_openflags = OPEN_FLAGS;
 
-struct cow {
-	char *file;
-	int fd;
-	unsigned long *bitmap;
-	unsigned long bitmap_len;
-	int bitmap_offset;
-        int data_offset;
-};
-
 struct ubd {
 	char *file;
 	int count;
-	int fd;
-	__u64 size;
+	struct devinfo *info;
+	int n;
 	struct openflags boot_openflags;
 	struct openflags openflags;
 	devfs_handle_t devfs;
-	struct cow cow;
 };
 
-#define DEFAULT_COW { \
-	.file			= NULL, \
-        .fd			= -1, \
-        .bitmap			= NULL, \
-	.bitmap_offset		= 0, \
-        .data_offset		= 0, \
-}
-
 #define DEFAULT_UBD { \
 	.file 			= NULL, \
 	.count			= 0, \
-	.fd			= -1, \
-	.size			= -1, \
+	.info			= NULL, \
+	.n			= 0, \
 	.boot_openflags		= OPEN_FLAGS, \
 	.openflags		= OPEN_FLAGS, \
 	.devfs			= NULL, \
-        .cow			= DEFAULT_COW, \
 }
 
 struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
@@ -186,9 +166,9 @@
 	strcpy(page, "disk\n");
 	len = strlen("disk\n");
 	len -= off;
-	if (len < count){
+	if (len < count) {
 		*eof = 1;
-		if (len <= 0) return 0;
+		if (len <= 0) return (0);
 	}
 	else len = count;
 	*start = page + off;
@@ -236,15 +216,16 @@
 "    Create ide0 entries that map onto ubd devices.\n\n"
 );
 
-static int parse_unit(char **ptr)
+static struct ubd *parse_unit(char **ptr)
 {
+	struct ubd *dev;
 	char *str = *ptr, *end;
 	int n = -1;
 
 	if(isdigit(*str)) {
 		n = simple_strtoul(str, &end, 0);
 		if(end == str)
-			return(-1);
+			n = -1;
 		*ptr = end;
 	}
 	else if (('a' <= *str) && (*str <= 'h')) {
@@ -252,35 +233,37 @@
 		str++;
 		*ptr = str;
 	}
-	return(n);
+	/* later this will look up from the dev list */
+	if ((0 <= n) && (n < MAX_DEV)) {
+		dev = &ubd_dev[n];
+		dev->n = n;
+	} else {
+		dev = 0;
+	}
+	return (dev);
 }
 
-static int ubd_setup_common(char *str, int *index_out)
+static struct ubd *ubd_setup_common(char *str)
 {
 	struct openflags flags = global_openflags;
-	struct ubd *dev;
-	char *backing_file;
-	int n, err;
+	struct ubd *dev = NULL;
 
-	if(index_out) *index_out = -1;
-	n = *str;
-	if(n == '='){
+	if(*str == '='){
 		char *end;
 		int major;
 
 		str++;
 		if(!strcmp(str, "sync")){
 			global_openflags.s = 1;
-			return(0);
+			return NULL;
 		}
 		major = simple_strtoul(str, &end, 0);
 		if((*end != '\0') || (end == str)){
 			printk(KERN_ERR 
 			       "ubd_setup : didn't parse major number\n");
-			return(1);
+			return NULL;
 		}
 
-		err = 1;
 		spin_lock(&ubd_lock);
 		if(fake_major != MAJOR_NR){
 			printk(KERN_ERR "Can't assign a fake major twice\n");
@@ -292,36 +275,25 @@
 	
 		printk(KERN_INFO "Setting extra ubd major number to %d\n",
 		       major);
-		err = 0;
-	out1:
+out1:
 		spin_unlock(&ubd_lock);
-		return(err);
+		return NULL;
 	}
 
-	n = parse_unit(&str);
-	if(n < 0){
-		printk(KERN_ERR "ubd_setup : couldn't parse unit number "
-		       "'%s'\n", str);
-		return(1);
+	dev = parse_unit(&str);
+	if (dev == 0){
+		printk(KERN_ERR "ubd_setup : index out of range %s\n", str);
+		return NULL;
 	}
 
-	if(n >= MAX_DEV){
-		printk(KERN_ERR "ubd_setup : index %d out of range "
-		       "(%d devices)\n", n, MAX_DEV);	
-		return(1);
-	}
-
-	err = 1;
 	spin_lock(&ubd_lock);
 
-	dev = &ubd_dev[n];
 	if(dev->file != NULL){
 		printk(KERN_ERR "ubd_setup : device already configured\n");
+		dev = NULL;
 		goto out2;
 	}
 
-	if(index_out) *index_out = n;
-
 	if (*str == 'r'){
 		flags.w = 0;
 		str++;
@@ -335,23 +307,16 @@
 		goto out2;
 	}
 
-	err = 0;
-	backing_file = strchr(str, ',');
-	if(backing_file){
-		*backing_file = '\0';
-		backing_file++;
-	}
 	dev->file = str;
-	dev->cow.file = backing_file;
 	dev->boot_openflags = flags;
- out2:
+out2:
 	spin_unlock(&ubd_lock);
-	return(err);
+	return (dev);
 }
 
 static int ubd_setup(char *str)
 {
-	ubd_setup_common(str, NULL);
+	ubd_setup_common(str);
 	return(1);
 }
 
@@ -428,8 +393,8 @@
 		return;
 	}
         
-        if((req.offset != ((__u64) (CURRENT->sector)) << 9) ||
-	   (req.length != (CURRENT->current_nr_sectors) << 9))
+        if((req.offset != ((__u64) (CURRENT->sector))) ||
+	   (req.length != (CURRENT->current_nr_sectors)))
 		panic("I/O op mismatch");
 	
 	spin_lock(&io_request_lock);
@@ -458,11 +423,11 @@
 /* Initialized in an initcall, and unchanged thereafter */
 devfs_handle_t ubd_dir_handle;
 
-static int ubd_add(int n)
+static int ubd_add(struct ubd *dev)
 {
-	struct ubd *dev = &ubd_dev[n];
 	char name[sizeof("nnnnnn\0")], dev_name[sizeof("ubd0x")];
 	int err = -EISDIR;
+	int n = dev->n;
 
 	if(dev->file == NULL)
 		goto out;
@@ -491,24 +456,24 @@
 
 static int ubd_config(char *str)
 {
-	int n, err;
+	int err;
+	struct ubd *dev;
 
 	str = uml_strdup(str);
 	if(str == NULL){
 		printk(KERN_ERR "ubd_config failed to strdup string\n");
 		return(1);
 	}
-	err = ubd_setup_common(str, &n);
-	if(err){
+	dev = ubd_setup_common(str);
+	if (dev == 0) {
 		kfree(str);
 		return(-1);
 	}
-	if(n == -1) return(0);
 
 	spin_lock(&ubd_lock);
-	err = ubd_add(n);
+	err = ubd_add(dev);
 	if(err)
-		ubd_dev[n].file = NULL;
+		dev->file = NULL;
 	spin_unlock(&ubd_lock);
 
 	return(err);
@@ -516,22 +481,15 @@
 
 static int ubd_get_config(char *name, char *str, int size, char **error_out)
 {
+	int len = 0;
 	struct ubd *dev;
-	char *end;
-	int n, len = 0;
-
-	n = simple_strtoul(name, &end, 0);
-	if((*end != '\0') || (end == name)){
-		*error_out = "ubd_get_config : didn't parse device number";
-		return(-1);
-	}
 
-	if((n >= MAX_DEV) || (n < 0)){
+	dev = parse_unit(&str);
+	if (dev == 0) {
 		*error_out = "ubd_get_config : device number out of range";
 		return(-1);
 	}
 
-	dev = &ubd_dev[n];
 	spin_lock(&ubd_lock);
 
 	if(dev->file == NULL){
@@ -541,9 +499,9 @@
 
 	CONFIG_CHUNK(str, size, len, dev->file, 0);
 
-	if(dev->cow.file != NULL){
+	if(dev->info->name != NULL){
 		CONFIG_CHUNK(str, size, len, ",", 0);
-		CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
+		CONFIG_CHUNK(str, size, len, dev->info->name, 1);
 	}
 	else CONFIG_CHUNK(str, size, len, "", 1);
 
@@ -555,29 +513,17 @@
 static int ubd_remove(char *str)
 {
 	struct ubd *dev;
-	int n, err = -ENODEV;
-
-	if(isdigit(*str)){
-		char *end;
-		n = simple_strtoul(str, &end, 0);
-		if ((*end != '\0') || (end == str)) 
-			return(err);
-	}
-	else if (('a' <= *str) && (*str <= 'h'))
-		n = *str - 'a';
-	else
-		return(err);	/* it should be a number 0-7/a-h */
+	int err = -EBUSY;
 
-	if((n < 0) || (n >= MAX_DEV))
-		return(err);
-
-	dev = &ubd_dev[n];
+	dev = parse_unit(&str);
+	if (dev == NULL)
+		return (-ENODEV);	/* could not find device */
 
 	spin_lock(&ubd_lock);
-	err = 0;
-	if(dev->file == NULL)
+
+	if (dev->file == NULL)
 		goto out;
-	err = -1;
+
 	if(dev->count > 0)
 		goto out;
 	if(dev->devfs != NULL) 
@@ -587,7 +533,7 @@
 	err = 0;
  out:
 	spin_unlock(&ubd_lock);
-	return(err);
+	return (err);
 }
 
 static struct mc_device ubd_mc = {
@@ -600,14 +546,14 @@
 static int ubd_mc_init(void)
 {
 	mconsole_register_dev(&ubd_mc);
-	return(0);
+	return (0);
 }
 
 __initcall(ubd_mc_init);
 
 static request_queue_t *ubd_get_queue(kdev_t device)
 {
-	return(ubd_queue);
+	return (ubd_queue);
 }
 
 int ubd_init(void)
@@ -654,8 +600,11 @@
                 add_gendisk(&fake_gendisk);
 	}
 
-	for(i=0;i<MAX_DEV;i++) 
-		ubd_add(i);
+	for (i = 0; i < MAX_DEV; i++) {
+		struct ubd *dev = &ubd_dev[i];
+		dev->n = i;
+		ubd_add(dev);
+ 	}
 
 	if(global_openflags.s){
 		printk(KERN_INFO "ubd : Synchronous mode\n");
@@ -681,70 +630,31 @@
 
 static void ubd_close(struct ubd *dev)
 {
-	os_close_file(dev->fd);
-	if(dev->cow.file != NULL) {
-		os_close_file(dev->cow.fd);
-		vfree(dev->cow.bitmap);
-		dev->cow.bitmap = NULL;
+	if (dev->info) {
+		//close_COW(dev->info);
+		//dev->info = NULL;
+	} else {
+		printk(KERN_ERR"Closing a ubd device that was not open\n");
 	}
 }
 
 static int ubd_open_dev(struct ubd *dev)
 {
-	struct openflags flags;
-	int err, create_cow, *create_ptr;
+	int mode;
 
 	dev->openflags = dev->boot_openflags;
-	create_cow = 0;
-	create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
-	dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file,
-				&dev->cow.bitmap_offset, &dev->cow.bitmap_len, 
-				&dev->cow.data_offset, create_ptr);
-
-	if((dev->fd == -ENOENT) && create_cow){
-		dev->fd = create_cow_file(dev->file, dev->cow.file, 
-					  dev->openflags, 1 << 9,
-					  &dev->cow.bitmap_offset, 
-					  &dev->cow.bitmap_len,
-					  &dev->cow.data_offset);
-		if(dev->fd >= 0){
-			printk(KERN_INFO "Creating \"%s\" as COW file for "
-			       "\"%s\"\n", dev->file, dev->cow.file);
-		}
-	}
 
-	if(dev->fd < 0) return(dev->fd);
-
-	if(dev->cow.file != NULL){
-		err = -ENOMEM;
-		dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
-		if(dev->cow.bitmap == NULL) goto error;
-		flush_tlb_kernel_vm();
-
-		err = read_cow_bitmap(dev->fd, dev->cow.bitmap, 
-				      dev->cow.bitmap_offset, 
-				      dev->cow.bitmap_len);
-		if(err) goto error;
-
-		flags = dev->openflags;
-		flags.w = 0;
-		err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, 
-				    NULL, NULL);
-		if(err < 0) goto error;
-		dev->cow.fd = err;
-	}
-	return(0);
- error:
-	os_close_file(dev->fd);
-	return(err);
-}
+	if (dev->openflags.w)
+		mode = COW_CREATE;
+	else
+		mode = COW_RDONLY;
 
-static int ubd_file_size(struct ubd *dev, __u64 *size_out)
-{
-	char *file;
+	if (!dev->info)
+		dev->info = open_COW(dev->file, mode);
 
-	file = dev->cow.file ? dev->cow.file : dev->file;
-	return(os_file_size(file, size_out));
+	if (dev->info == 0)
+		return(-ENODEV);
+	return (0);
 }
 
 static int ubd_open(struct inode *inode, struct file *filp)
@@ -760,89 +670,51 @@
 	spin_lock(&ubd_lock);
 	offset = n << UBD_SHIFT;
 
-	if(dev->count == 0){
+	if (dev->count == 0) {
+		dev->openflags = dev->boot_openflags;
+
 		err = ubd_open_dev(dev);
-		if(err){
+		if (err) {
 			printk(KERN_ERR "ubd%d: Can't open \"%s\": "
-			       "errno = %d\n", n, dev->file, -err);
+					"errno = %d\n", n, dev->file, -err);
 			goto out;
 		}
-		err = ubd_file_size(dev, &dev->size);
-		if(err)
-			goto out;
-		sizes[offset] = dev->size / BLOCK_SIZE;
-		ubd_part[offset].nr_sects = dev->size / hardsect_sizes[offset];
+		sizes[offset] = dev->info->data_length / BLOCK_SIZE;
+		ubd_part[offset].nr_sects = dev->info->data_length / hardsect_sizes[offset];
 	}
 	dev->count++;
-	if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
-	        if(--dev->count == 0) ubd_close(dev);
+	if ((filp->f_mode & FMODE_WRITE) && !dev->openflags.w) {
+	        if (--dev->count == 0) ubd_close(dev);
 	        err = -EROFS;
 	}
- out:
+out:
 	spin_unlock(&ubd_lock);
-	return(err);
+	return (err);
 }
 
 static int ubd_release(struct inode * inode, struct file * file)
 {
         int n, offset;
+	struct ubd *dev;
 
 	n =  DEVICE_NR(inode->i_rdev);
 	offset = n << UBD_SHIFT;
 	if(n >= MAX_DEV)
 		return -ENODEV;
+	dev = ubd_dev[n];
 
 	spin_lock(&ubd_lock);
-	if(--ubd_dev[n].count == 0)
-		ubd_close(&ubd_dev[n]);
+	if(--dev->count == 0)
+		ubd_close(dev);
 	spin_unlock(&ubd_lock);
 
 	return(0);
 }
 
-void cowify_req(struct io_thread_req *req, struct ubd *dev)
-{
-        int i, update_bitmap, sector = req->offset >> 9;
-
-	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
-		panic("Operation too long");
-	if(req->op == UBD_READ) {
-		for(i = 0; i < req->length >> 9; i++){
-			if(ubd_test_bit(sector + i, (unsigned char *) 
-					dev->cow.bitmap)){
-				ubd_set_bit(i, (unsigned char *) 
-					    &req->sector_mask);
-			}
-                }
-        } 
-        else {
-		update_bitmap = 0;
-		for(i = 0; i < req->length >> 9; i++){
-			ubd_set_bit(i, (unsigned char *) 
-				    &req->sector_mask);
-			if(!ubd_test_bit(sector + i, (unsigned char *) 
-					 dev->cow.bitmap))
-				update_bitmap = 1;
-			ubd_set_bit(sector + i, (unsigned char *) 
-				    dev->cow.bitmap);
-		}
-		if(update_bitmap){
-			req->cow_offset = sector / (sizeof(unsigned long) * 8);
-			req->bitmap_words[0] = 
-				dev->cow.bitmap[req->cow_offset];
-			req->bitmap_words[1] = 
-				dev->cow.bitmap[req->cow_offset + 1];
-			req->cow_offset *= sizeof(unsigned long);
-			req->cow_offset += dev->cow.bitmap_offset;
-		}
-	}
-}
-
 static int prepare_request(struct request *req, struct io_thread_req *io_req)
 {
 	struct ubd *dev;
-	__u64 block;
-	int nsect, minor, n;
+	int minor, n;
 
 	if(req->rq_status == RQ_INACTIVE) return(1);
 
@@ -856,23 +728,15 @@
 	}
 
         req->sector += ubd_part[minor].start_sect;
-        block = req->sector;
-        nsect = req->current_nr_sectors;
 
 	io_req->op = (req->cmd == READ) ? UBD_READ : UBD_WRITE;
-	io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
-	io_req->fds[1] = dev->fd;
-	io_req->offsets[0] = 0;
-	io_req->offsets[1] = dev->cow.data_offset;
-	io_req->offset = ((__u64) block) << 9;
-	io_req->length = nsect << 9;
+	io_req->info = dev->info;;
+	io_req->offset = req->sector;
+	io_req->length = req->current_nr_sectors;
 	io_req->buffer = req->buffer;
 	io_req->sectorsize = 1 << 9;
-	io_req->sector_mask = 0;
-	io_req->cow_offset = -1;
 	io_req->error = 0;
 
-        if(dev->cow.file != NULL) cowify_req(io_req, dev);
 	return(0);
 }
 
@@ -882,7 +746,7 @@
 	struct request *req;
 	int err, n;
 
-	if(thread_fd == -1){
+	if (thread_fd == -1) {
 		while(!list_empty(&q->queue_head)){
 			req = blkdev_entry_next_request(&q->queue_head);
 			err = prepare_request(req, &io_req);
@@ -891,18 +755,19 @@
 				ubd_finish(io_req.error);
 			}
 		}
-	}
-	else {
-		if(DEVICE_INTR || list_empty(&q->queue_head)) return;
+	} else {
+		if (DEVICE_INTR || list_empty(&q->queue_head))
+			return;
+
 		req = blkdev_entry_next_request(&q->queue_head);
 		err = prepare_request(req, &io_req);
-		if(!err){
+		if (!err) {
 			SET_INTR(ubd_handler);
-			n = write_ubd_fs(thread_fd, (char *) &io_req, 
-					 sizeof(io_req));
+			n = write_ubd_fs(thread_fd, (char *) &io_req,
+					sizeof(io_req));
 			if(n != sizeof(io_req))
 				printk("write to io thread failed, "
-				       "errno = %d\n", -n);
+						"errno = %d\n", -n);
 		}
 	}
 }
@@ -913,11 +778,6 @@
 	struct hd_geometry *loc = (struct hd_geometry *) arg;
  	struct ubd *dev;
 	int n, minor, err;
-	struct hd_driveid ubd_id = {
-		.cyls		= 0,
-		.heads		= 128,
-		.sectors	= 32,
-	};
 	
         if(!inode) return(-EINVAL);
 	minor = MINOR(inode->i_rdev);
@@ -926,13 +786,14 @@
 		return(-EINVAL);
 	dev = &ubd_dev[n];
 	switch (cmd) {
-	        struct hd_geometry g;
+	        struct hd_geometry g = {};
+		struct hd_driveid ubd_id = {};
 		struct cdrom_volctrl volume;
 	case HDIO_GETGEO:
 		if(!loc) return(-EINVAL);
-		g.heads = 128;
-		g.sectors = 32;
-		g.cylinders = dev->size / (128 * 32 * hardsect_sizes[minor]);
+		g.heads = dev->info->heads;
+		g.sectors = dev->info->sectors;
+		g.cylinders = dev->info->cylinders;
 		g.start = 2;
 		return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
 	case BLKGETSIZE:   /* Return device size */
@@ -970,7 +831,9 @@
 		return(0);
 
 	case HDIO_GET_IDENTITY:
-		ubd_id.cyls = dev->size / (128 * 32 * hardsect_sizes[minor]);
+		ubd_id.heads = dev->info->heads;
+		ubd_id.sectors = dev->info->sectors;
+		ubd_id.cyls = dev->info->cylinders;
 		if(copy_to_user((char *) arg, (char *) &ubd_id, 
 				 sizeof(ubd_id)))
 			return(-EFAULT);
@@ -1027,13 +890,9 @@
 		}
 
 		/* have to recompute sizes since we opened it */
-		err = ubd_file_size(dev, &dev->size);
-		if(err) {
-			ubd_close(dev);
-			goto out;
-		}
 		part->start_sect = 0;
-		part->nr_sects = dev->size / hardsect_sizes[offset];
+		part->nr_sects = dev->info->data_length /
+			hardsect_sizes[offset];
 		register_disk(&ubd_gendisk, MKDEV(MAJOR_NR, offset), pcount, 
 			      &ubd_blops, part->nr_sects);
 
diff -Nur uml-2.4.20-5/arch/um/drivers/ubd_user.c \
                work-2.4.20-5/arch/um/drivers/ubd_user.c
--- uml-2.4.20-5/arch/um/drivers/ubd_user.c	2003-05-23 07:02:29.000000000 -0700
+++ work-2.4.20-5/arch/um/drivers/ubd_user.c	2003-05-23 08:57:21.000000000 -0700
@@ -24,6 +24,7 @@
 #include "user.h"
 #include "ubd_user.h"
 #include "os.h"
+#include "cow.h"
 
 #include <endian.h>
 #include <byteswap.h>
@@ -37,184 +38,6 @@
 #error "__BYTE_ORDER not defined"
 #endif
 
-#define PATH_LEN_V1 256
-
-struct cow_header_v1 {
-	int magic;
-	int version;
-	char backing_file[PATH_LEN_V1];
-	time_t mtime;
-	__u64 size;
-	int sectorsize;
-};
-
-#define PATH_LEN_V2 MAXPATHLEN
-
-struct cow_header_v2 {
-	unsigned long magic;
-	unsigned long version;
-	char backing_file[PATH_LEN_V2];
-	time_t mtime;
-	__u64 size;
-	int sectorsize;
-};
-
-union cow_header {
-	struct cow_header_v1 v1;
-	struct cow_header_v2 v2;
-};
-
-#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
-#define COW_VERSION 2
-
-static void sizes(__u64 size, int sectorsize, int bitmap_offset, 
-		  unsigned long *bitmap_len_out, int *data_offset_out)
-{
-	*bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
-
-	*data_offset_out = bitmap_offset + *bitmap_len_out;
-	*data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize;
-	*data_offset_out *= sectorsize;
-}
-
-static int read_cow_header(int fd, int *magic_out, char **backing_file_out, 
-			   time_t *mtime_out, __u64 *size_out, 
-			   int *sectorsize_out, int *bitmap_offset_out)
-{
-	union cow_header *header;
-	char *file;
-	int err, n;
-	unsigned long version, magic;
-
-	header = um_kmalloc(sizeof(*header));
-	if(header == NULL){
-		printk("read_cow_header - Failed to allocate header\n");
-		return(-ENOMEM);
-	}
-	err = -EINVAL;
-	n = read(fd, header, sizeof(*header));
-	if(n < offsetof(typeof(header->v1), backing_file)){
-		printk("read_cow_header - short header\n");
-		goto out;
-	}
-
-	magic = header->v1.magic;
-	if(magic == COW_MAGIC) {
-		version = header->v1.version;
-	}
-	else if(magic == ntohl(COW_MAGIC)){
-		version = ntohl(header->v1.version);
-	}
-	else goto out;
-
-	*magic_out = COW_MAGIC;
-
-	if(version == 1){
-		if(n < sizeof(header->v1)){
-			printk("read_cow_header - failed to read V1 header\n");
-			goto out;
-		}
-		*mtime_out = header->v1.mtime;
-		*size_out = header->v1.size;
-		*sectorsize_out = header->v1.sectorsize;
-		*bitmap_offset_out = sizeof(header->v1);
-		file = header->v1.backing_file;
-	}
-	else if(version == 2){
-		if(n < sizeof(header->v2)){
-			printk("read_cow_header - failed to read V2 header\n");
-			goto out;
-		}
-		*mtime_out = ntohl(header->v2.mtime);
-		*size_out = ntohll(header->v2.size);
-		*sectorsize_out = ntohl(header->v2.sectorsize);
-		*bitmap_offset_out = sizeof(header->v2);
-		file = header->v2.backing_file;
-	}
-	else {
-		printk("read_cow_header - invalid COW version\n");
-		goto out;
-	}
-	err = -ENOMEM;
-	*backing_file_out = uml_strdup(file);
-	if(*backing_file_out == NULL){
-		printk("read_cow_header - failed to allocate backing file\n");
-		goto out;
-	}
-	err = 0;
- out:
-	kfree(header);
-	return(err);
-}
-
-static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
-{
-	struct stat64 buf1, buf2;
-
-	if(from_cmdline == NULL) return(1);
-	if(!strcmp(from_cmdline, from_cow)) return(1);
-
-	if(stat64(from_cmdline, &buf1) < 0){
-		printk("Couldn't stat '%s', errno = %d\n", from_cmdline, 
-		       errno);
-		return(1);
-	}
-	if(stat64(from_cow, &buf2) < 0){
-		printk("Couldn't stat '%s', errno = %d\n", from_cow, errno);
-		return(1);
-	}
-	if((buf1.st_dev == buf2.st_dev) && (buf1.st_ino == buf2.st_ino))
-		return(1);
-
-	printk("Backing file mismatch - \"%s\" requested,\n"
-	       "\"%s\" specified in COW header of \"%s\"\n",
-	       from_cmdline, from_cow, cow);
-	return(0);
-}
-
-static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
-{
-	struct stat64 buf;
-	long long actual;
-	int err;
-
-  	if(stat64(file, &buf) < 0){
-		printk("Failed to stat backing file \"%s\", errno = %d\n",
-		       file, errno);
-		return(-errno);
-	}
-
-	err = os_file_size(file, &actual);
-	if(err){
-		printk("Failed to get size of backing file \"%s\", "
-		       "errno = %d\n", file, -err);
-		return(err);
-	}
-
-  	if(actual != size){
-		printk("Size mismatch (%ld vs %ld) of COW header vs backing "
-		       "file\n", size, actual);
-		return(-EINVAL);
-	}
-	if(buf.st_mtime != mtime){
-		printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
-		       "file\n", mtime, buf.st_mtime);
-		return(-EINVAL);
-	}
-	return(0);
-}
-
-int read_cow_bitmap(int fd, void *buf, int offset, int len)
-{
-	int err;
-
-	err = os_seek_file(fd, offset);
-	if(err != 0) return(-errno);
-	err = read(fd, buf, len);
-	if(err < 0) return(-errno);
-	return(0);
-}
-
 static int absolutize(char *to, int size, char *from)
 {
 	char save_cwd[256], *slash;
@@ -261,188 +84,6 @@
 	return(0);
 }
 
-static int write_cow_header(char *cow_file, int fd, char *backing_file, 
-			    int sectorsize, long long *size)
-{
-        struct cow_header_v2 *header;
-	struct stat64 buf;
-	int err;
-
-	err = os_seek_file(fd, 0);
-	if(err != 0){
-		printk("write_cow_header - lseek failed, errno = %d\n", errno);
-		return(-errno);
-	}
-
-	err = -ENOMEM;
-	header = um_kmalloc(sizeof(*header));
-	if(header == NULL){
-		printk("Failed to allocate COW V2 header\n");
-		goto out;
-	}
-	header->magic = htonl(COW_MAGIC);
-	header->version = htonl(COW_VERSION);
-
-	err = -EINVAL;
-	if(strlen(backing_file) > sizeof(header->backing_file) - 1){
-		printk("Backing file name \"%s\" is too long - names are "
-		       "limited to %d characters\n", backing_file, 
-		       sizeof(header->backing_file) - 1);
-		goto out_free;
-	}
-
-	if(absolutize(header->backing_file, sizeof(header->backing_file), 
-		      backing_file))
-		goto out_free;
-
-	err = stat64(header->backing_file, &buf);
-	if(err < 0){
-		printk("Stat of backing file '%s' failed, errno = %d\n",
-		       header->backing_file, errno);
-		err = -errno;
-		goto out_free;
-	}
-
-	err = os_file_size(header->backing_file, size);
-	if(err){
-		printk("Couldn't get size of backing file '%s', errno = %d\n",
-		       header->backing_file, -*size);
-		goto out_free;
-	}
-
-	header->mtime = htonl(buf.st_mtime);
-	header->size = htonll(*size);
-	header->sectorsize = htonl(sectorsize);
-
-	err = write(fd, header, sizeof(*header));
-	if(err != sizeof(*header)){
-		printk("Write of header to new COW file '%s' failed, "
-		       "errno = %d\n", cow_file, errno);
-		goto out_free;
-	}
-	err = 0;
- out_free:
-	kfree(header);
- out:
-	return(err);
-}
-
-int open_ubd_file(char *file, struct openflags *openflags, 
-		  char **backing_file_out, int *bitmap_offset_out, 
-		  unsigned long *bitmap_len_out, int *data_offset_out, 
-		  int *create_cow_out)
-{
-	time_t mtime;
-	__u64 size;
-	char *backing_file;
-        int fd, err, sectorsize, magic, same, mode = 0644;
-
-        if((fd = os_open_file(file, *openflags, mode)) < 0){
-		if((fd == -ENOENT) && (create_cow_out != NULL))
-			*create_cow_out = 1;
-                if(!openflags->w ||
-                   ((errno != EROFS) && (errno != EACCES))) return(-errno);
-		openflags->w = 0;
-                if((fd = os_open_file(file, *openflags, mode)) < 0) 
-			return(fd);
-        }
-
-	err = os_lock_file(fd, openflags->w);
-	if(err){
-		printk("Failed to lock '%s', errno = %d\n", file, -err);
-		goto error;
-	}
-	
-	if(backing_file_out == NULL) return(fd);
-
-	err = read_cow_header(fd, &magic, &backing_file, &mtime, &size, 
-			      &sectorsize, bitmap_offset_out);
-	if(err && (*backing_file_out != NULL)){
-		printk("Failed to read COW header from COW file \"%s\", "
-		       "errno = %d\n", file, err);
-		goto error;
-	}
-	if(err) return(fd);
-
-	if(backing_file_out == NULL) return(fd);
-	
-	same = same_backing_files(*backing_file_out, backing_file, file);
-
-	if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
-		printk("Switching backing file to '%s'\n", *backing_file_out);
-		err = write_cow_header(file, fd, *backing_file_out, 
-				       sectorsize, &size);
-		if(err){
-			printk("Switch failed, errno = %d\n", err);
-			return(err);
-		}
-	}
-	else {
-		*backing_file_out = backing_file;
-		err = backing_file_mismatch(*backing_file_out, size, mtime);
-		if(err) goto error;
-	}
-
-	sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, 
-	      data_offset_out);
-
-        return(fd);
- error:
-	os_close_file(fd);
-	return(err);
-}
-
-int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
-		    int sectorsize, int *bitmap_offset_out, 
-		    unsigned long *bitmap_len_out, int *data_offset_out)
-{
-	__u64 offset;
-	int err, fd;
-	long long size;
-	char zero = 0;
-
-	flags.c = 1;
-	fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
-	if(fd < 0){
-		err = fd;
-		printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
-		       -err);
-		goto out;
-	}
-
-	err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size);
-	if(err) goto out_close;
-
-	sizes(size, sectorsize, sizeof(struct cow_header_v2), 
-	      bitmap_len_out, data_offset_out);
-	*bitmap_offset_out = sizeof(struct cow_header_v2);
-
-	offset = *data_offset_out + size - sizeof(zero);
-	err = os_seek_file(fd, offset);
-	if(err != 0){
-		printk("cow bitmap lseek failed : errno = %d\n", errno);
-		goto out_close;
-	}
-
-	/* does not really matter how much we write it is just to set EOF 
-	 * this also sets the entire COW bitmap
-	 * to zero without having to allocate it 
-	 */
-	err = os_write_file(fd, &zero, sizeof(zero));
-	if(err != sizeof(zero)){
-		printk("Write of bitmap to new COW file '%s' failed, "
-		       "errno = %d\n", cow_file, errno);
-		goto out_close;
-	}
-
-	return(fd);
-
- out_close:
-	close(fd);
- out:
-	return(err);
-}
-
 int read_ubd_fs(int fd, void *buffer, int len)
 {
 	int n;
@@ -463,77 +104,23 @@
 
 void do_io(struct io_thread_req *req)
 {
-	char *buf;
-	unsigned long len;
-	int n, nsectors, start, end, bit;
-	__u64 off;
-
-	nsectors = req->length / req->sectorsize;
-	start = 0;
-	do {
-		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
-		end = start;
-		while((end < nsectors) && 
-		      (ubd_test_bit(end, (unsigned char *) 
-				    &req->sector_mask) == bit))
-			end++;
-
-		if(end != nsectors)
-			printk("end != nsectors\n");
-		off = req->offset + req->offsets[bit] + 
-			start * req->sectorsize;
-		len = (end - start) * req->sectorsize;
-		buf = &req->buffer[start * req->sectorsize];
-
-		if(os_seek_file(req->fds[bit], off) != 0){
-			printk("do_io - lseek failed : errno = %d\n", errno);
-			req->error = 1;
-			return;
-		}
-		if(req->op == UBD_READ){
-			n = 0;
-			do {
-				buf = &buf[n];
-				len -= n;
-				n = read(req->fds[bit], buf, len);
-				if (n < 0) {
-					printk("do_io - read returned %d : "
-					       "errno = %d fd = %d\n", n,
-					       errno, req->fds[bit]);
-					req->error = 1;
-					return;
-				}
-			} while((n < len) && (n != 0));
-			if (n < len) memset(&buf[n], 0, len - n);
-		}
-		else {
-			n = write(req->fds[bit], buf, len);
-			if(n != len){
-				printk("do_io - write returned %d : "
-				       "errno = %d fd = %d\n", n, 
-				       errno, req->fds[bit]);
-				req->error = 1;
-				return;
-			}
-		}
-
-		start = end;
-	} while(start < nsectors);
+	int n;
 
-	if(req->cow_offset != -1){
-		if(os_seek_file(req->fds[1], req->cow_offset) != 0){
-			printk("do_io - bitmap lseek failed : errno = %d\n",
-			       errno);
-			req->error = 1;
-			return;
-		}
-		n = write(req->fds[1], &req->bitmap_words, 
-			  sizeof(req->bitmap_words));
-		if(n != sizeof(req->bitmap_words)){
-			printk("do_io - bitmap update returned %d : "
-			       "errno = %d fd = %d\n", n, errno, req->fds[1]);
-			req->error = 1;
-			return;
+	if (req->op == UBD_READ) {
+		n = read_COW(req->info, req->buffer, req->length, req->offset);
+		if (n != req->length) {
+			printk("do_io - read_COW returned %d : errno = %d info "
+					"= %s\n", n, errno, req->info->name);
+		req->error = 1;
+		return;
+		}
+	} else if (req->op == UBD_WRITE) {
+		n = write_COW(req->info, req->buffer, req->length, req->offset);
+		if (n != req->length) {
+			printk("do_io - write_COW returned %d : errno = %d info "
+					"= %s\n", n, errno, req->info->name);
+		req->error = 1;
+		return;
 		}
 	}
 	req->error = 0;
diff -Nur uml-2.4.20-5/arch/um/include/ubd_user.h \
                work-2.4.20-5/arch/um/include/ubd_user.h
--- uml-2.4.20-5/arch/um/include/ubd_user.h	2003-05-23 07:02:30.000000000 -0700
+++ work-2.4.20-5/arch/um/include/ubd_user.h	2003-05-23 08:42:59.000000000 -0700
@@ -13,56 +13,19 @@
 
 struct io_thread_req {
 	enum ubd_req op;
-	int fds[2];
-	unsigned long offsets[2];
 	unsigned long long offset;
 	unsigned long length;
 	char *buffer;
+	struct devinfo *info;
 	int sectorsize;
-	unsigned long sector_mask;
-	unsigned long cow_offset;
-	unsigned long bitmap_words[2];
 	int error;
 };
 
-extern int open_ubd_file(char *file, struct openflags *openflags, 
-			 char **backing_file_out, int *bitmap_offset_out, 
-			 unsigned long *bitmap_len_out, int *data_offset_out,
-			 int *create_cow_out);
-extern int create_cow_file(char *cow_file, char *backing_file, 
-			   struct openflags flags, int sectorsize, 
-			   int *bitmap_offset_out, 
-			   unsigned long *bitmap_len_out,
-			   int *data_offset_out);
-extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
 extern int read_ubd_fs(int fd, void *buffer, int len);
 extern int write_ubd_fs(int fd, char *buffer, int len);
 extern int start_io_thread(unsigned long sp, int *fds_out);
 extern void do_io(struct io_thread_req *req);
 
-static inline int ubd_test_bit(__u64 bit, unsigned char *data)
-{
-	__u64 n;
-	int bits, off;
-
-	bits = sizeof(data[0]) * 8;
-	n = bit / bits;
-	off = bit % bits;
-	return((data[n] & (1 << off)) != 0);
-}
-
-static inline void ubd_set_bit(__u64 bit, unsigned char *data)
-{
-	__u64 n;
-	int bits, off;
-
-	bits = sizeof(data[0]) * 8;
-	n = bit / bits;
-	off = bit % bits;
-	data[n] |= (1 << off);
-}
-
-
 #endif
 
 /*


-------------------------------------------------------
This SF.net email is sponsored by: ObjectStore.
If flattening out C++ or Java code to make your application fit in a
relational database is painful, don't do it! Check out ObjectStore.
Now part of Progress Software. http://www.objectstore.net/sourceforge
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic