[prev in list] [next in list] [prev in thread] [next in thread] 

List:       coreutils
Subject:    [PATCH] factor: sync longlong.h adjustments from upstream
From:       Pádraig Brady <P () draigBrady ! com>
Date:       2020-02-25 11:56:08
Message-ID: 20200225115608.908921-1-P () draigBrady ! com
[Download RAW message or body]

* src/longlong.h: Sync changes from:
https://gmplib.org/repo/gmp/log/tip/longlong.h
mips64: Provide r6 asm code as default expression yields.
arm32: Define sub_ddmmss separately for non-thumb (no rsc instruction).
powerpc: Add "CLOBBER" descriptions for some registers.
x86: Fix criterion for when to use mulx in umul_ppmm.
---
 src/longlong.h | 166 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 121 insertions(+), 45 deletions(-)

diff --git a/src/longlong.h b/src/longlong.h
index 92ab1a6f2..e57ba7821 100644
--- a/src/longlong.h
+++ b/src/longlong.h
@@ -432,11 +432,39 @@ long __MPN(count_leading_zeros) (UDItype);
 	   : "=r" (sh), "=&r" (sl)					\
 	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC);	\
   } while (0)
-/* FIXME: Extend the immediate range for the low word by using both
-   ADDS and SUBS, since they set carry in the same way.  */
+/* FIXME: Extend the immediate range for the low word by using both ADDS and
+   SUBS, since they set carry in the same way.  Note: We need separate
+   definitions for thumb and non-thumb due to the absence of RSC on thumb.  */
+#if defined (__thumb__)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && __builtin_constant_p (bh)		\
+	&& (ah) == (bh))						\
+      __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0"			\
+	       : "=r" (sh), "=r" (sl)					\
+	       : "r" (al), "rI" (bl) __CLOBBER_CC);			\
+    else if (__builtin_constant_p (al))					\
+      __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+    else if (__builtin_constant_p (bl))					\
+      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+    else								\
+      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+    } while (0)
+#else
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
-    if (__builtin_constant_p (al))					\
+    if (__builtin_constant_p (ah) && __builtin_constant_p (bh)		\
+	&& (ah) == (bh))						\
+      __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0"			\
+	       : "=r" (sh), "=r" (sl)					\
+	       : "r" (al), "rI" (bl) __CLOBBER_CC);			\
+    else if (__builtin_constant_p (al))					\
       {									\
 	if (__builtin_constant_p (ah))					\
 	  __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2"		\
@@ -459,21 +487,15 @@ long __MPN(count_leading_zeros) (UDItype);
 		   : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
       }									\
     else if (__builtin_constant_p (bl))					\
-      {									\
-	if (__builtin_constant_p (bh))					\
-	  __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"		\
-		   : "=r" (sh), "=&r" (sl)				\
-		   : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
-	else								\
-	  __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2"		\
-		   : "=r" (sh), "=&r" (sl)				\
-		   : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
-      }									\
+      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
     else /* only bh might be a constant */				\
       __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
 	       : "=r" (sh), "=&r" (sl)					\
-	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
+	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
     } while (0)
+#endif
 #if defined (__ARM_ARCH_2__) || defined (__ARM_ARCH_2A__) \
     || defined (__ARM_ARCH_3__)
 #define umul_ppmm(xh, xl, a, b)						\
@@ -1016,14 +1038,16 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   : "=r" (sh), "=&r" (sl)					\
 	   : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)),		\
 	     "1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
-#if defined (HAVE_MULX)
+#if X86_ASM_MULX \
+   && (HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell \
+       || HAVE_HOST_CPU_skylake || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen)
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulx	%3, %0, %1"					\
+  __asm__ ("mulx\t%3, %0, %1"						\
 	   : "=r" (w0), "=r" (w1)					\
 	   : "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
 #else
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulq	%3"						\
+  __asm__ ("mulq\t%3"							\
 	   : "=a" (w0), "=d" (w1)					\
 	   : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
 #endif
@@ -1031,21 +1055,44 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
   __asm__ ("divq %4"		     /* stringification in K&R C */	\
 	   : "=a" (q), "=d" (r)						\
 	   : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
-/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
+
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+  || HAVE_HOST_CPU_k10 || HAVE_HOST_CPU_bd1 || HAVE_HOST_CPU_bd2	\
+  || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen	\
+  || HAVE_HOST_CPU_bobcat || HAVE_HOST_CPU_jaguar
+#define count_leading_zeros(count, x)					\
+  do {									\
+    /* This is lzcnt, spelled for older assemblers.  Destination and */	\
+    /* source must be a 64-bit registers, hence cast and %q.         */	\
+    __asm__ ("rep;bsr\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 64
+#else
 #define count_leading_zeros(count, x)					\
   do {									\
     UDItype __cbtmp;							\
     ASSERT ((x) != 0);							\
-    __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x)));	\
+    __asm__ ("bsr\t%1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x)));	\
     (count) = __cbtmp ^ 63;						\
   } while (0)
-/* bsfq destination must be a 64-bit register, "%q0" forces this in case
-   count is only an int. */
+#endif
+
+#if HAVE_HOST_CPU_bd2 || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 \
+  || HAVE_HOST_CPU_zen || HAVE_HOST_CPU_jaguar
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    /* This is tzcnt, spelled for older assemblers.  Destination and */	\
+    /* source must be a 64-bit registers, hence cast and %q.         */	\
+    __asm__ ("rep;bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+  } while (0)
+#define COUNT_TRAILING_ZEROS_0 64
+#else
 #define count_trailing_zeros(count, x)					\
   do {									\
     ASSERT ((x) != 0);							\
-    __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+    __asm__ ("bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
   } while (0)
+#endif
 #endif /* __amd64__ */
 
 #if defined (__i860__) && W_TYPE_SIZE == 32
@@ -1239,7 +1286,15 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 #endif /* __mips */
 
 #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if __GMP_GNUC_PREREQ (4,4)
+#if defined (_MIPS_ARCH_MIPS64R6)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    UDItype __m0 = (u), __m1 = (v);					\
+    (w0) = __m0 * __m1;							\
+    __asm__ ("dmuhu\t%0, %1, %2" : "=d" (w1) : "d" (__m0), "d" (__m1));	\
+  } while (0)
+#endif
+#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (4,4)
 #define umul_ppmm(w1, w0, u, v) \
   do {									\
     typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
@@ -1324,33 +1379,41 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
   do {									\
     if (__builtin_constant_p (bh) && (bh) == 0)				\
       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
-	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
+		 __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
-	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)	\
+		 __CLOBBER_CC);						\
     else								\
       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
-	     : "=r" (sh), "=&r" (sl)					\
-	     : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)		\
+		 __CLOBBER_CC);						\
   } while (0)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (ah) && (ah) == 0)				\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
-	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
+		 __CLOBBER_CC);						\
     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
-	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)	\
+		 __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == 0)			\
       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
-	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
+		 __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
-	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)	\
+		 __CLOBBER_CC);						\
     else								\
       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
 	       : "=r" (sh), "=&r" (sl)					\
-	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl)		\
+		 __CLOBBER_CC);						\
   } while (0)
 #define count_leading_zeros(count, x) \
   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
@@ -1398,17 +1461,20 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
 	       : "=r" (sh), "=&r" (sl)					\
 	       : "r"  ((UDItype)(ah)),					\
-		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)));		\
+		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))		\
+		 __CLOBBER_CC);						\
     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
 	       : "=r" (sh), "=&r" (sl)					\
 	       : "r"  ((UDItype)(ah)),					\
-		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)));		\
+		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))		\
+		 __CLOBBER_CC);						\
     else								\
       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
 	       : "=r" (sh), "=&r" (sl)					\
 	       : "r"  ((UDItype)(ah)), "r"  ((UDItype)(bh)),		\
-		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)));		\
+		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))		\
+		 __CLOBBER_CC);						\
   } while (0)
 /* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
    This might seem strange, but gcc folds away the dead code late.  */
@@ -1419,53 +1485,63 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	  __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   :                       "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
+		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
 	  __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   :                       "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
+		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (bh) && (bh) == 0)		\
 	  __asm__ ("addic %1,%3,%4\n\taddme %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   : "r"  ((UDItype)(ah)),				\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
+		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	\
 	  __asm__ ("addic %1,%3,%4\n\taddze %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   : "r"  ((UDItype)(ah)),				\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
+		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
 	else								\
 	  __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   : "r"  ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))));	\
+		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     __CLOBBER_CC);					\
     } else {								\
 	if (__builtin_constant_p (ah) && (ah) == 0)			\
 	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   :                       "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
 	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   :                       "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (bh) && (bh) == 0)		\
 	  __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   : "r"  ((UDItype)(ah)),				\
-		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	\
 	  __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   : "r"  ((UDItype)(ah)),				\
-		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
 	else								\
 	  __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"		\
 		   : "=r" (sh), "=&r" (sl)				\
 		   : "r"  ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl)));	\
+		     "rI" ((UDItype)(al)), "r" ((UDItype)(bl))		\
+		     __CLOBBER_CC);					\
     }									\
   } while (0)
 #endif /* ! _LONG_LONG_LIMB */
-- 
2.24.1



[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic