[prev in list] [next in list] [prev in thread] [next in thread] 

List:       busybox
Subject:    [PATCH] Add a gzip fastpath for the xmalloc readers, v3
From:       Lauri Kasanen <curaga () operamail ! com>
Date:       2014-11-30 20:32:24
Message-ID: 1417379544.839789.197025141.24DF0379 () webmail ! messagingengine ! com
[Download RAW message or body]

v3: Use the aux struct as requested by Denys
v2: Add missing check on open

The performance and number of processes for a "depmod -a" with gzipped
modules was abysmal. This patch adds a fast path without fork for well-
behaved gzip files, benefiting all users of xmalloc_open_zipped_read_close.

"modinfo radeon.ko.gz", a single-file reader, got 30% faster.
"depmod -a", which used to fork over 800 times, got 20% faster. And of course
a whole lot less processes -> much saved RAM.

function                                             old     new   delta
inflate_unzip_internal                              2304    2521    +217
xmalloc_open_zipped_read_close                        73     201    +128
unpack_gz_stream                                     567     570      +3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 348/0)             Total: 348 bytes

-- 
http://www.fastmail.com - Does exactly what it says on the tin


["0001-Add-a-gzip-fastpath-for-the-xmalloc-readers-v3.patch" (0001-Add-a-gzip-fastpath-for-the-xmalloc-readers-v3.patch)]

From 5ad6804ed4485eae176da45524ea848a00b11929 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <curaga@operamail.com>
Date: Sun, 30 Nov 2014 21:37:10 +0200
Subject: [PATCH] Add a gzip fastpath for the xmalloc readers, v3

v3: Use the aux struct as requested by Denys
v2: Add missing check on open

The performance and number of processes for a "depmod -a" with gzipped
modules was abysmal. This patch adds a fast path without fork for well-
behaved gzip files, benefiting all users of xmalloc_open_zipped_read_close.

"modinfo radeon.ko.gz", a single-file reader, got 30% faster.
"depmod -a", which used to fork over 800 times, got 20% faster. And of course
a whole lot less processes -> much saved RAM.

function                                             old     new   delta
inflate_unzip_internal                              2304    2521    +217
xmalloc_open_zipped_read_close                        73     201    +128
unpack_gz_stream                                     567     570      +3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 348/0)             Total: 348 bytes

Signed-off-by: Lauri Kasanen <curaga@operamail.com>
---
 archival/libarchive/decompress_gunzip.c | 38 ++++++++++++++++++++++++++++-----
 archival/libarchive/open_transformer.c  | 31 ++++++++++++++++++++++++++-
 include/bb_archive.h                    |  2 ++
 3 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/archival/libarchive/decompress_gunzip.c b/archival/libarchive/decompress_gunzip.c
index 7c6f38e..938d21f 100644
--- a/archival/libarchive/decompress_gunzip.c
+++ b/archival/libarchive/decompress_gunzip.c
@@ -971,10 +971,11 @@ static int inflate_get_next_window(STATE_PARAM_ONLY)
 
 /* Called from unpack_gz_stream() and inflate_unzip() */
 static IF_DESKTOP(long long) int
-inflate_unzip_internal(STATE_PARAM int in, int out)
+inflate_unzip_internal(STATE_PARAM transformer_aux_data_t *aux, int in, int out)
 {
 	IF_DESKTOP(long long) int n = 0;
 	ssize_t nwrote;
+	size_t bufsize = 0;
 
 	/* Allocate all global buffers (for DYN_ALLOC option) */
 	gunzip_window = xmalloc(GUNZIP_WSIZE);
@@ -1002,16 +1003,43 @@ inflate_unzip_internal(STATE_PARAM int in, int out)
 
 	while (1) {
 		int r = inflate_get_next_window(PASS_STATE_ONLY);
-		nwrote = full_write(out, gunzip_window, gunzip_outbuf_count);
+
+		if (aux->mem_output_size) {
+			nwrote = gunzip_outbuf_count;
+			if (gunzip_outbuf_count + n > bufsize) {
+				// increase by four blocks each time
+				const size_t newsize = bufsize + 4 * gunzip_outbuf_count + 1;
+				aux->mem_output_buf = xrealloc(aux->mem_output_buf, newsize);
+				bufsize = newsize;
+			}
+
+			if (bufsize > aux->mem_output_size) {
+				free(aux->mem_output_buf);
+				aux->mem_output_buf = NULL;
+				n = -1;
+				goto ret;
+			}
+
+			memcpy(aux->mem_output_buf + n, gunzip_window, gunzip_outbuf_count);
+		} else {
+			nwrote = full_write(out, gunzip_window, gunzip_outbuf_count);
+		}
 		if (nwrote != (ssize_t)gunzip_outbuf_count) {
 			bb_perror_msg("write");
 			n = -1;
 			goto ret;
 		}
-		IF_DESKTOP(n += nwrote;)
+		n += nwrote;
 		if (r == 0) break;
 	}
 
+	/* Final realloc, plus zero byte */
+	if (aux->mem_output_size) {
+		aux->mem_output_buf = xrealloc(aux->mem_output_buf, n + 1);
+		aux->mem_output_size = n;
+		aux->mem_output_buf[n] = '\0';
+	}
+
 	/* Store unused bytes in a global buffer so calling applets can access it */
 	if (gunzip_bk >= 8) {
 		/* Undo too much lookahead. The next read will be byte aligned
@@ -1045,7 +1073,7 @@ inflate_unzip(transformer_aux_data_t *aux, int in, int out)
 //	bytebuffer_max = 0x8000;
 	bytebuffer_offset = 4;
 	bytebuffer = xmalloc(bytebuffer_max);
-	n = inflate_unzip_internal(PASS_STATE in, out);
+	n = inflate_unzip_internal(PASS_STATE aux, in, out);
 	free(bytebuffer);
 
 	aux->crc32 = gunzip_crc;
@@ -1224,7 +1252,7 @@ unpack_gz_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd)
 		goto ret;
 	}
 
-	n = inflate_unzip_internal(PASS_STATE src_fd, dst_fd);
+	n = inflate_unzip_internal(PASS_STATE aux, src_fd, dst_fd);
 	if (n < 0) {
 		total = -1;
 		goto ret;
diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c
index 1986630..ad9dce5 100644
--- a/archival/libarchive/open_transformer.c
+++ b/archival/libarchive/open_transformer.c
@@ -211,7 +211,36 @@ int FAST_FUNC open_zipped(const char *fname, int fail_if_not_compressed)
 void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_p)
 {
 	int fd;
-	char *image;
+	char *image = NULL;
+
+	/* Fast path for well-behaved gzip files, avoiding forks. */
+	if (ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_DESKTOP && BB_MMU) {
+		uint16_t magic;
+		fd = open(fname, O_RDONLY);
+		if (fd < 0)
+			return NULL;
+
+		xread(fd, &magic, 2);
+
+		if (magic == GZIP_MAGIC) {
+			transformer_aux_data_t aux;
+			init_transformer_aux_data(&aux);
+
+			/* In-memory decompression instead of the usual fork */
+			aux.mem_output_size = *maxsz_p;
+
+			unpack_gz_stream(&aux, fd, -1);
+
+			if (aux.mem_output_buf) {
+				image = aux.mem_output_buf;
+				*maxsz_p = aux.mem_output_size;
+			}
+		}
+
+		close(fd);
+		if (image)
+			return image;
+	}
 
 	fd = open_zipped(fname, /*fail_if_not_compressed:*/ 0);
 	if (fd < 0)
diff --git a/include/bb_archive.h b/include/bb_archive.h
index b82cfd8..6b19413 100644
--- a/include/bb_archive.h
+++ b/include/bb_archive.h
@@ -209,6 +209,8 @@ typedef struct transformer_aux_data_t {
 	off_t    bytes_in;  /* used in unzip code only: needs to know packed size */
 	uint32_t crc32;
 	time_t   mtime;     /* gunzip code may set this on exit */
+	size_t   mem_output_size; /* if non-zero, decompress to RAM instead of fd */
+	char     *mem_output_buf;
 } transformer_aux_data_t;
 
 void init_transformer_aux_data(transformer_aux_data_t *aux) FAST_FUNC;
-- 
1.8.3.1



_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic