[prev in list] [next in list] [prev in thread] [next in thread] 

List:       zlib-devel
Subject:    [Zlib-devel] [PATCH 04/11] Add preprocessor define to tune Adler32 loop unrolling.
From:       james.t.kukunas () linux ! intel ! com (Jim Kukunas)
Date:       2014-03-18 19:15:36
Message-ID: 1395170143-1745-5-git-send-email-james.t.kukunas () linux ! intel ! com
[Download RAW message or body]

Excessive loop unrolling is detrimental to performance. This patch
adds a preprocessor define, ADLER32_UNROLL_LESS, to reduce unrolling
factor from 16 to 8.

Updates configure script to set as default on x86
---
 adler32.c |   16 ++++++++++++++++
 configure |    6 ++++++
 2 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/adler32.c b/adler32.c
index a868f07..1007e38 100644
--- a/adler32.c
+++ b/adler32.c
@@ -104,10 +104,19 @@ uLong ZEXPORT adler32(adler, buf, len)
     /* do length NMAX blocks -- requires just one modulo operation */
     while (len >= NMAX) {
         len -= NMAX;
+#ifndef ADLER32_UNROLL_LESS
         n = NMAX / 16;          /* NMAX is divisible by 16 */
+#else
+        n = NMAX / 8;           /* NMAX is divisible by 8 */
+#endif
         do {
+#ifndef ADLER32_UNROLL_LESS
             DO16(buf);          /* 16 sums unrolled */
             buf += 16;
+#else
+            DO8(buf,0);         /* 8 sums unrolled */
+            buf += 8;
+#endif
         } while (--n);
         MOD(adler);
         MOD(sum2);
@@ -115,10 +124,17 @@ uLong ZEXPORT adler32(adler, buf, len)
 
     /* do remaining bytes (less than NMAX, still just one modulo) */
     if (len) {                  /* avoid modulos if none remaining */
+#ifndef ADLER32_UNROLL_LESS
         while (len >= 16) {
             len -= 16;
             DO16(buf);
             buf += 16;
+#else
+        while (len >= 8) {
+            len -= 8;
+            DO8(buf, 0);
+            buf += 8;
+#endif
         }
         while (len--) {
             adler += *buf++;
diff --git a/configure b/configure
index b1c5a7e..d6f790b 100755
--- a/configure
+++ b/configure
@@ -771,6 +771,9 @@ case "${ARCH}" in
 
         CFLAGS="${CFLAGS} -DUNALIGNED_OK"
         SFLAGS="${SFLAGS} -DUNALIGNED_OK"
+
+        CFLAGS="${CFLAGS} -DADLER32_UNROLL_LESS"
+        SFLAGS="${SFLAGS} -DADLER32_UNROLL_LESS"
     ;;
     i386 | i486 | i586 | i686)
         OBJC="${OBJC} x86.o"
@@ -781,6 +784,9 @@ case "${ARCH}" in
 
         CFLAGS="${CFLAGS} -DUNALIGNED_OK"
         SFLAGS="${SFLAGS} -DUNALIGNED_OK"
+
+        CFLAGS="${CFLAGS} -DADLER32_UNROLL_LESS"
+        SFLAGS="${SFLAGS} -DADLER32_UNROLL_LESS"
     ;;
 esac
 
-- 
1.7.1




[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic