[prev in list] [next in list] [prev in thread] [next in thread]
List: zlib-devel
Subject: [Zlib-devel] [PATCH 04/11] Add preprocessor define to tune Adler32 loop unrolling.
From: james.t.kukunas () linux ! intel ! com (Jim Kukunas)
Date: 2014-03-18 19:15:36
Message-ID: 1395170143-1745-5-git-send-email-james.t.kukunas () linux ! intel ! com
[Download RAW message or body]
Excessive loop unrolling is detrimental to performance. This patch
adds a preprocessor define, ADLER32_UNROLL_LESS, to reduce unrolling
factor from 16 to 8.
Updates configure script to set as default on x86
---
adler32.c | 16 ++++++++++++++++
configure | 6 ++++++
2 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/adler32.c b/adler32.c
index a868f07..1007e38 100644
--- a/adler32.c
+++ b/adler32.c
@@ -104,10 +104,19 @@ uLong ZEXPORT adler32(adler, buf, len)
/* do length NMAX blocks -- requires just one modulo operation */
while (len >= NMAX) {
len -= NMAX;
+#ifndef ADLER32_UNROLL_LESS
n = NMAX / 16; /* NMAX is divisible by 16 */
+#else
+ n = NMAX / 8; /* NMAX is divisible by 8 */
+#endif
do {
+#ifndef ADLER32_UNROLL_LESS
DO16(buf); /* 16 sums unrolled */
buf += 16;
+#else
+ DO8(buf,0); /* 8 sums unrolled */
+ buf += 8;
+#endif
} while (--n);
MOD(adler);
MOD(sum2);
@@ -115,10 +124,17 @@ uLong ZEXPORT adler32(adler, buf, len)
/* do remaining bytes (less than NMAX, still just one modulo) */
if (len) { /* avoid modulos if none remaining */
+#ifndef ADLER32_UNROLL_LESS
while (len >= 16) {
len -= 16;
DO16(buf);
buf += 16;
+#else
+ while (len >= 8) {
+ len -= 8;
+ DO8(buf, 0);
+ buf += 8;
+#endif
}
while (len--) {
adler += *buf++;
diff --git a/configure b/configure
index b1c5a7e..d6f790b 100755
--- a/configure
+++ b/configure
@@ -771,6 +771,9 @@ case "${ARCH}" in
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
+
+ CFLAGS="${CFLAGS} -DADLER32_UNROLL_LESS"
+ SFLAGS="${SFLAGS} -DADLER32_UNROLL_LESS"
;;
i386 | i486 | i586 | i686)
OBJC="${OBJC} x86.o"
@@ -781,6 +784,9 @@ case "${ARCH}" in
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
+
+ CFLAGS="${CFLAGS} -DADLER32_UNROLL_LESS"
+ SFLAGS="${SFLAGS} -DADLER32_UNROLL_LESS"
;;
esac
--
1.7.1
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic