[prev in list] [next in list] [prev in thread] [next in thread] 

List:       git
Subject:    Re: [PATCH] Enable index-pack threading in msysgit.
From:       Duy Nguyen <pclouds () gmail ! com>
Date:       2014-03-21 5:21:19
Message-ID: 20140321052118.GA28519 () duynguyen-vnpc ! dek-tpc ! internal
[Download RAW message or body]

On Fri, Mar 21, 2014 at 08:51:18AM +0700, Duy Nguyen wrote:
> On Thu, Mar 20, 2014 at 11:08 PM, Stefan Zager <szager@chromium.org> wrote:
> > Duy, would you like to re-post your patch without the new pread implementation?
> 
> I will but let me try out the sliding window idea first. My quick
> tests on git.git show me we may only need 21k mmap instead of 177k
> pread. That hints some potential performance improvement.

The patch at the bottom reuses (un)use_pack() instead of pread(). The
results on linux-2.6 do not look any different. I guess we can drop
the idea.

It makes me wonder, though, what's wrong a simple patch like this to
make pread in index-pack thread-safe? It does not look any different
either from the performance point of view, perhaps because
unpack_data() reads small deltas most of the time

-- 8< --
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index a6b1c17..b91f4f8 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -40,11 +40,6 @@ struct base_data {
 	int ofs_first, ofs_last;
 };
 
-#if !defined(NO_PTHREADS) && defined(NO_THREAD_SAFE_PREAD)
-/* pread() emulation is not thread-safe. Disable threading. */
-#define NO_PTHREADS
-#endif
-
 struct thread_local {
 #ifndef NO_PTHREADS
 	pthread_t thread;
@@ -175,6 +170,22 @@ static void cleanup_thread(void)
 #endif
 
 
+#if defined(NO_THREAD_SAFE_PREAD)
+static inline ssize_t pread_safe(int fd, void *buf, size_t count, off_t off)
+{
+	int ret;
+	read_lock();
+	ret = pread(fd, buf, count, off);
+	read_unlock();
+	return ret;
+}
+#else
+static inline ssize_t pread_safe(int fd, void *buf, size_t count, off_t off)
+{
+	return pread(fd, buf, count, off);
+}
+#endif
+
 static int mark_link(struct object *obj, int type, void *data)
 {
 	if (!obj)
@@ -542,7 +553,7 @@ static void *unpack_data(struct object_entry *obj,
 
 	do {
 		ssize_t n = (len < 64*1024) ? len : 64*1024;
-		n = pread(pack_fd, inbuf, n, from);
+		n = pread_safe(pack_fd, inbuf, n, from);
 		if (n < 0)
 			die_errno(_("cannot pread pack file"));
 		if (!n)
-- 8< --

And the sliding window patch for the list archive

-- 8< --
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index a6b1c17..6f5c6d9 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -91,7 +91,8 @@ static off_t consumed_bytes;
 static unsigned deepest_delta;
 static git_SHA_CTX input_ctx;
 static uint32_t input_crc32;
-static int input_fd, output_fd, pack_fd;
+static int input_fd, output_fd;
+static struct packed_git *pack;
 
 #ifndef NO_PTHREADS
 
@@ -224,8 +225,10 @@ static unsigned check_objects(void)
 static void flush(void)
 {
 	if (input_offset) {
-		if (output_fd >= 0)
+		if (output_fd >= 0) {
 			write_or_die(output_fd, input_buffer, input_offset);
+			pack->pack_size += input_offset;
+		}
 		git_SHA1_Update(&input_ctx, input_buffer, input_offset);
 		memmove(input_buffer, input_buffer + input_offset, input_len);
 		input_offset = 0;
@@ -277,6 +280,10 @@ static void use(int bytes)
 
 static const char *open_pack_file(const char *pack_name)
 {
+	pack = xmalloc(sizeof(*pack) + 1);
+	memset(pack, 0, sizeof(*pack));
+	pack->pack_name[0] = '\0';
+
 	if (from_stdin) {
 		input_fd = 0;
 		if (!pack_name) {
@@ -288,13 +295,17 @@ static const char *open_pack_file(const char *pack_name)
 			output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
 		if (output_fd < 0)
 			die_errno(_("unable to create '%s'"), pack_name);
-		pack_fd = output_fd;
+		pack->pack_fd = output_fd;
 	} else {
+		struct stat st;
 		input_fd = open(pack_name, O_RDONLY);
 		if (input_fd < 0)
 			die_errno(_("cannot open packfile '%s'"), pack_name);
+		if (lstat(pack_name, &st))
+			die_errno(_("cannot stat packfile '%s'"), pack_name);
 		output_fd = -1;
-		pack_fd = input_fd;
+		pack->pack_fd = input_fd;
+		pack->pack_size = st.st_size;
 	}
 	git_SHA1_Init(&input_ctx);
 	return pack_name;
@@ -531,9 +542,15 @@ static void *unpack_data(struct object_entry *obj,
 	unsigned char *data, *inbuf;
 	git_zstream stream;
 	int status;
+	struct pack_window *w_cursor = NULL;
+
+	if (from + len > pack->pack_size)
+		die(Q_("premature end of pack file, %lu byte missing",
+		       "premature end of pack file, %lu bytes missing",
+		       from + len - pack->pack_size),
+		    (unsigned long)(from + len - pack->pack_size));
 
 	data = xmalloc(consume ? 64*1024 : obj->size);
-	inbuf = xmalloc((len < 64*1024) ? len : 64*1024);
 
 	memset(&stream, 0, sizeof(stream));
 	git_inflate_init(&stream);
@@ -541,15 +558,12 @@ static void *unpack_data(struct object_entry *obj,
 	stream.avail_out = consume ? 64*1024 : obj->size;
 
 	do {
-		ssize_t n = (len < 64*1024) ? len : 64*1024;
-		n = pread(pack_fd, inbuf, n, from);
-		if (n < 0)
-			die_errno(_("cannot pread pack file"));
-		if (!n)
-			die(Q_("premature end of pack file, %lu byte missing",
-			       "premature end of pack file, %lu bytes missing",
-			       len),
-			    len);
+		ssize_t n;
+		unsigned long left;
+		read_lock();
+		inbuf = use_pack(pack, &w_cursor, from, &left);
+		read_unlock();
+		n = left > len ? len : left;
 		from += n;
 		len -= n;
 		stream.next_in = inbuf;
@@ -568,6 +582,9 @@ static void *unpack_data(struct object_entry *obj,
 				stream.avail_out = 64*1024;
 			} while (status == Z_OK && stream.avail_in);
 		}
+		read_lock();
+		unuse_pack(&w_cursor);
+		read_unlock();
 	} while (len && status == Z_OK && !stream.avail_in);
 
 	/* This has been inflated OK when first encountered, so... */
@@ -575,7 +592,6 @@ static void *unpack_data(struct object_entry *obj,
 		die(_("serious inflate inconsistency"));
 
 	git_inflate_end(&stream);
-	free(inbuf);
 	if (consume) {
 		free(data);
 		data = NULL;
@@ -1657,6 +1673,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
 	free(objects);
 	free(index_name_buf);
 	free(keep_name_buf);
+	close_pack_windows(pack);
+	free(pack);
 	if (pack_name == NULL)
 		free((void *) curr_pack);
 	if (index_name == NULL)
diff --git a/sha1_file.c b/sha1_file.c
index 187f5a6..aa0b16d 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -977,7 +977,13 @@ unsigned char *use_pack(struct packed_git *p,
 	 */
 	if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
 		die("packfile %s cannot be accessed", p->pack_name);
-	if (offset > (p->pack_size - 20))
+	/*
+	 * index-pack uses this function even if the pack is not
+	 * complete yet (i.e. trailing SHA-1 missing). Loosen the
+	 * check a bit in this case (pack_empty name uses as the
+	 * indicator).
+	 */
+	if (offset > (p->pack_size - (*p->pack_name ? 20 : 0)))
 		die("offset beyond end of packfile (truncated pack?)");
 
 	if (!win || !in_window(win, offset)) {
-- 8< --

--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic