'[PATCH][BZ #3268] Add fam float/double to soft-sp'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       glibc-alpha
Subject:    [PATCH][BZ #3268] Add fam float/double to soft-sp
From:       Steven Munroe <munroesj () us ! ibm ! com>
Date:       2007-04-03 19:47:36
Message-ID: 4612B3D2.1090405 () us ! ibm ! com
[Download RAW message or body]

Previous soft-fp work exposed a general lack of correct Fused Multiply Add (fma) \
implemetations for platforms that don't implement fma in hardware (soft-fp or hard-fp \
that doesn't implement fma).

This patch provides a basic soft-fp implementation for float and double fma.  This \
updated version provides macros to directly convert between RAW, SEMIRAW, and \
CANNONICAL internal forms without requiring floating types (like TF). This allows \
platforms that do not implement 128-bit long double to still use the quad.h soft-fp \
macros in the implementation of the double fma.

This patch has been updated and verified on powerpc32 with todays CVS.

I assume that __fmadf4/__fmasf4 are acceptable names for these functions. These \
functions need to be in libc.so to access the soft-float exeception and rounding \
modes.

A separate patch can then override s_fma.c/s_fmaf.c (in libm.so) as needed to call \
__fmadf4/__fmasf4 directly. For from ./ports for example.


["soft-fp-fma-20070320.txt" (text/plain)]

2007-04-03  Steven Munroe  <sjmunroe@us.ibm.com>

	[BZ #3268]
	* soft-fp/Makefile (gcc-single-routines): Add fmasf4.
	(gcc-double-routines): Add fmadf4.
	* soft-fp/double.h: Define FP_COPY_RAW_D, FP_COPY_RAW_TO_CANONICAL_D,
	FP_COPY_RAW_TO_SEMIRAW_D, FP_COPY_SEMIRAW_D, and
	FP_COPY_CANONICAL_TO_SEMIRAW_D macros.
	* soft-fp/quad.h: Define FP_COPY_RAW_Q, FP_COPY_RAW_TO_CANONICAL_Q,
	FP_COPY_RAW_TO_SEMIRAW_Q, FP_COPY_SEMIRAW_Q, and
	FP_COPY_CANONICAL_TO_SEMIRAW_Q macros.
	* soft-fp/fmadf4.c: New file.
	* soft-fp/fmasf4.c: New file.

diff -urN libc25-cvstip-20070320/soft-fp/Makefile libc25/soft-fp/Makefile
--- libc25-cvstip-20070320/soft-fp/Makefile	2006-01-06 04:47:45.000000000 -0600
+++ libc25/soft-fp/Makefile	2007-03-30 15:34:09.000000000 -0500
@@ -24,12 +24,13 @@
 
 gcc-single-routines := negsf2 addsf3 subsf3 mulsf3 divsf3 eqsf2 \
 	lesf2 gesf2 unordsf2 fixsfsi fixunssfsi floatsisf fixsfdi        \
-	fixunssfdi floatdisf sqrtsf2 floatunsisf floatundisf
+	fixunssfdi floatdisf sqrtsf2 floatunsisf floatundisf \
+	fmasf4
 
 gcc-double-routines := negdf2 adddf3 subdf3 muldf3 divdf3 eqdf2 \
 	ledf2 gedf2 unorddf2 fixdfsi fixunsdfsi floatsidf fixdfdi        \
 	fixunsdfdi floatdidf extendsfdf2 truncdfsf2 sqrtdf2 floatunsidf \
-	floatundidf
+	floatundidf fmadf4
 
 gcc-quad-routines := negtf2 addtf3 subtf3 multf3 divtf3 eqtf2 \
 	letf2 getf2 unordtf2 fixtfsi fixunstfsi floatsitf fixtfdi      \
diff -urN libc25-cvstip-20070320/soft-fp/double.h libc25/soft-fp/double.h
--- libc25-cvstip-20070320/soft-fp/double.h	2007-03-20 07:10:36.000000000 -0500
+++ libc25/soft-fp/double.h	2007-03-30 15:34:14.000000000 -0500
@@ -142,6 +142,53 @@
       _FP_PACK_RAW_2_P(D,val,X);	\
   } while (0)
 
+/* Copy the internal layout between RAW, SEMIRAW, and CANONICAL forms.
+   These macros are used in the fma implementations.  */
+
+#define FP_COPY_RAW_D(Y, X)		\
+  do {					\
+    Y##_f0 = X##_f0;			\
+    Y##_f1 = X##_f1 &			\
+    		((1 << (_FP_FRACBITS_D	\
+    		- (_FP_IMPLBIT_D != 0)	\
+		- _FP_W_TYPE_SIZE)) -1);\
+    Y##_e  = X##_e  &			\
+    		((1 << _FP_EXPBITS_D)	\
+		-1);			\
+    Y##_s  = X##_s;			\
+  } while (0)
+  
+#define FP_COPY_RAW_TO_CANONICAL_D(Y,X)	\
+  do {					\
+    FP_COPY_RAW_D(Y,X);			\
+    _FP_UNPACK_CANONICAL(D,2,Y);	\
+  } while (0)
+
+#define FP_COPY_RAW_TO_SEMIRAW_D(Y,X)	\
+  do {					\
+    FP_COPY_RAW_D(Y,X); 		\
+    _FP_UNPACK_SEMIRAW(D,2,Y);		\
+  } while (0)
+
+#define FP_COPY_SEMIRAW_D(Y, X)		\
+  do {					\
+    Y##_f0 = X##_f0;			\
+    Y##_f1 = X##_f1 &			\
+    		((1 << (_FP_FRACBITS_D	\
+    		- (_FP_IMPLBIT_D != 0)	\
+		- _FP_W_TYPE_SIZE	\
+		+ 3)) -1);		\
+    Y##_e  = X##_e;			\
+    Y##_s  = X##_s;			\
+  } while (0)
+
+#define FP_COPY_CANONICAL_TO_SEMIRAW_D(Y,X)	\
+  do {					\
+    _FP_PACK_CANONICAL(D,2,X);		\
+    FP_COPY_RAW_D(Y,X);			\
+    _FP_UNPACK_SEMIRAW(D,2,Y);		\
+  } while (0)
+
 #define FP_ISSIGNAN_D(X)		_FP_ISSIGNAN(D,2,X)
 #define FP_NEG_D(R,X)			_FP_NEG(D,2,R,X)
 #define FP_ADD_D(R,X,Y)			_FP_ADD(D,2,R,X,Y)
@@ -239,6 +286,50 @@
       _FP_PACK_RAW_1_P(D,val,X);	\
   } while (0)
 
+/* Copy the internal layout between RAW, SEMIRAW, and CANONICAL forms.
+   These macros are used in the fma implementations.  */
+
+#define FP_COPY_RAW_D(Y, X)		\
+  do {					\
+    Y##_f = X##_f & 			\
+    		((1 << ( _FP_FRACBITS_D	\
+    		- (_FP_IMPLBIT_D != 0)))\
+		-1);			\
+    Y##_e = X##_e &			\
+    		((1 << _FP_EXPBITS_D)	\
+		-1);			\
+    Y##_s = X##_s;			\
+  } while (0)
+  
+#define FP_COPY_RAW_TO_CANONICAL_D(Y,X)	\
+  do {					\
+    FP_COPY_RAW_D(Y,X);			\
+    _FP_UNPACK_CANONICAL(D,1,Y);	\
+  } while (0)
+
+#define FP_COPY_RAW_TO_SEMIRAW_D(Y,X)	\
+  do {					\
+    FP_COPY_RAW_D(Y,X);			\
+    _FP_UNPACK_SEMIRAW(D,1,Y);		\
+  } while (0)
+
+#define FP_COPY_SEMIRAW_D(Y, X)		\
+  do {					\
+    Y##_f = X##_f &			\
+    		((1 << ( _FP_FRACBITS_D	\
+    		- (_FP_IMPLBIT_D != 0)	\
+		+ 3)) -1);		\
+    Y##_e = X##_e;			\
+    Y##_s = X##_s;			\
+  } while (0)
+  
+#define FP_COPY_CANONICAL_TO_SEMIRAW_D(Y,X)	\
+  do {					\
+    _FP_PACK_CANONICAL(D,1,X);		\
+    FP_COPY_RAW_D(Y,X);			\
+    _FP_UNPACK_SEMIRAW(D,1,Y);		\
+  } while (0)
+
 #define FP_ISSIGNAN_D(X)		_FP_ISSIGNAN(D,1,X)
 #define FP_NEG_D(R,X)			_FP_NEG(D,1,R,X)
 #define FP_ADD_D(R,X,Y)			_FP_ADD(D,1,R,X,Y)
diff -urN libc25-cvstip-20070320/soft-fp/fmadf4.c libc25/soft-fp/fmadf4.c
--- libc25-cvstip-20070320/soft-fp/fmadf4.c	Wed Dec 31 18:00:00 1969
+++ libc25/soft-fp/fmadf4.c	Fri Mar 30 15:34:14 2007
@@ -0,0 +1,91 @@
+/* soft-fp x * y + z as ternary operation.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Steven Munroe <sjmunroe@us.ibm.com>, 2006.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+#include "quad.h"
+
+/* Compute floating point multiply-add with higher (quad) precision.  */
+DFtype
+__fmadf4 (DFtype a, DFtype b, DFtype c)
+{
+    FP_DECL_EX;
+    FP_DECL_D(A);
+    FP_DECL_D(B);
+    FP_DECL_D(C);
+    FP_DECL_Q(X);
+    FP_DECL_Q(Y);
+    FP_DECL_Q(Z);
+    FP_DECL_Q(U);
+    FP_DECL_Q(V);
+    FP_DECL_D(R);
+    double r;
+  
+    FP_INIT_ROUNDMODE;
+    FP_UNPACK_RAW_D (A, a);
+    FP_UNPACK_RAW_D (B, b);
+    FP_UNPACK_RAW_D (C, c);
+
+    /* Extend double to quad.  */
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+    FP_EXTEND(Q,D,4,2,X,A);
+    FP_EXTEND(Q,D,4,2,Y,B);
+    FP_EXTEND(Q,D,4,2,Z,C);
+#else
+    FP_EXTEND(Q,D,2,1,X,A);
+    FP_EXTEND(Q,D,2,1,Y,B);
+    FP_EXTEND(Q,D,2,1,Z,C);
+#endif
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Multiply.
+       Rounding is not an issue as we keep the full 106 bit product.  */
+    FP_COPY_RAW_TO_CANONICAL_Q(X,X);
+    FP_COPY_RAW_TO_CANONICAL_Q(Y,Y);
+    FP_MUL_Q(U,X,Y);
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Add without rounding.  */
+    FP_COPY_CANONICAL_TO_SEMIRAW_Q(U,U);
+    FP_COPY_RAW_TO_SEMIRAW_Q(Z,Z);
+    FP_ADD_Q(V,U,Z);
+
+    /* Truncate quad to double and round.  */
+    FP_COPY_SEMIRAW_Q(V,V);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+    FP_TRUNC(D,Q,2,4,R,V);
+#else
+    FP_TRUNC(D,Q,1,2,R,V);
+#endif
+    FP_PACK_SEMIRAW_D(r,R);
+    FP_HANDLE_EXCEPTIONS;
+
+    return r;
+}
+
diff -urN libc25-cvstip-20070320/soft-fp/fmasf4.c libc25/soft-fp/fmasf4.c
--- libc25-cvstip-20070320/soft-fp/fmasf4.c	Wed Dec 31 18:00:00 1969
+++ libc25/soft-fp/fmasf4.c	Fri Mar 30 15:34:14 2007
@@ -0,0 +1,90 @@
+/* soft-fp x * y + z as ternary operation.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Steven Munroe <sjmunroe@us.ibm.com>, 2006.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "double.h"
+
+/* Compute floating point multiply-add with higher (double) precision.  */
+SFtype
+__fmasf4 (SFtype a, SFtype b, SFtype c)
+{
+    FP_DECL_EX;
+    FP_DECL_S(A);
+    FP_DECL_S(B);
+    FP_DECL_S(C);
+    FP_DECL_D(X);
+    FP_DECL_D(Y);
+    FP_DECL_D(Z);
+    FP_DECL_D(U);
+    FP_DECL_D(V);
+    FP_DECL_S(R);
+    float r;
+    
+    FP_INIT_ROUNDMODE;
+    FP_UNPACK_RAW_S (A, a);
+    FP_UNPACK_RAW_S (B, b);
+    FP_UNPACK_RAW_S (C, c);
+
+    /* Extend single to double.  */
+#if _FP_W_TYPE_SIZE < _FP_FRACBITS_D
+    FP_EXTEND(D,S,2,1,X,A);
+    FP_EXTEND(D,S,2,1,Y,B);
+    FP_EXTEND(D,S,2,1,Z,C);
+#else
+    FP_EXTEND(D,S,1,1,X,A);
+    FP_EXTEND(D,S,1,1,Y,B);
+    FP_EXTEND(D,S,1,1,Z,C);
+#endif
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Multiply.
+       Rounding is not an issue as we keep the full 48 bit product.  */
+    FP_COPY_RAW_TO_CANONICAL_D(X,X);
+    FP_COPY_RAW_TO_CANONICAL_D(Y,Y);
+    FP_MUL_D(U,X,Y);
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Add without rounding.  */
+    FP_COPY_CANONICAL_TO_SEMIRAW_D(U,U);
+    FP_COPY_RAW_TO_SEMIRAW_D(Z,Z);
+    FP_ADD_D(V,U,Z);
+
+    /* Truncate double to single and round.  */
+    FP_COPY_SEMIRAW_D(V,V);
+#if FP_W_TYPE_SIZE < _FP_FRACBITS_D
+    FP_TRUNC(S,D,1,2,R,V);
+#else
+    FP_TRUNC(S,D,1,1,R,V);
+#endif
+    FP_PACK_SEMIRAW_S(r,R);
+    FP_HANDLE_EXCEPTIONS;
+
+    return r;
+}
diff -urN libc25-cvstip-20070320/soft-fp/quad.h libc25/soft-fp/quad.h
--- libc25-cvstip-20070320/soft-fp/quad.h	2007-03-20 07:10:36.000000000 -0500
+++ libc25/soft-fp/quad.h	2007-03-30 15:34:17.000000000 -0500
@@ -148,6 +148,58 @@
       _FP_PACK_RAW_4_P(Q,val,X);	\
   } while (0)
 
+/* Copy the internal layout between RAW, SEMIRAW, and CANONICAL forms.
+   These macros are used in the fma implementations.  */
+
+#define FP_COPY_RAW_Q(Y, X)		\
+  do {					\
+    Y##_f[0] = X##_f[0];		\
+    Y##_f[1] = X##_f[1];		\
+    Y##_f[2] = X##_f[2];		\
+    Y##_f[3] = X##_f[3] &		\
+    		((1 << (_FP_FRACBITS_Q	\
+    		- (_FP_IMPLBIT_Q != 0)	\
+		-(_FP_W_TYPE_SIZE * 3)))\
+		-1);			\
+    Y##_e    = X##_e  &			\
+    		((1 << _FP_EXPBITS_Q)	\
+		-1);			\
+    Y##_s    = X##_s;			\
+  } while (0)
+  
+#define FP_COPY_RAW_TO_CANONICAL_Q(Y,X)	\
+  do {					\
+    FP_COPY_RAW_Q(Y,X);			\
+    _FP_UNPACK_CANONICAL(Q,4,Y);	\
+  } while (0)
+
+#define FP_COPY_RAW_TO_SEMIRAW_Q(Y,X)	\
+  do {					\
+    FP_COPY_RAW_Q(Y,X);			\
+    _FP_UNPACK_SEMIRAW(Q,4,Y);		\
+  } while (0)
+
+#define FP_COPY_SEMIRAW_Q(Y, X)		\
+  do {					\
+    Y##_f[0] = X##_f[0];		\
+    Y##_f[1] = X##_f[1];		\
+    Y##_f[2] = X##_f[2];		\
+    Y##_f[3] = X##_f[3] &		\
+    		((1 << (_FP_FRACBITS_Q	\
+    		- (_FP_IMPLBIT_Q != 0)	\
+		-(_FP_W_TYPE_SIZE * 3)	\
+		+ 3)) -1);		\
+    Y##_e    = X##_e;			\
+    Y##_s    = X##_s;			\
+  } while (0)
+
+#define FP_COPY_CANONICAL_TO_SEMIRAW_Q(Y,X)	\
+  do {					\
+    _FP_PACK_CANONICAL(Q,4,X);		\
+    FP_COPY_RAW_Q(Y,X);			\
+    _FP_UNPACK_SEMIRAW(Q,4,Y);		\
+  } while (0)
+
 #define FP_ISSIGNAN_Q(X)		_FP_ISSIGNAN(Q,4,X)
 #define FP_NEG_Q(R,X)			_FP_NEG(Q,4,R,X)
 #define FP_ADD_Q(R,X,Y)			_FP_ADD(Q,4,R,X,Y)
@@ -249,6 +301,53 @@
       _FP_PACK_RAW_2_P(Q,val,X);	\
   } while (0)
 
+/* Copy the internal layout between RAW, SEMIRAW, and CANONICAL forms.
+   These macros are used in the fma implementations.  */
+
+#define FP_COPY_RAW_Q(Y, X)		\
+  do {					\
+    Y##_f0 = X##_f0;			\
+    Y##_f1 = X##_f1 &			\
+    		((1 << (_FP_FRACBITS_Q	\
+    		- (_FP_IMPLBIT_Q != 0)	\
+		- _FP_W_TYPE_SIZE)) -1);\
+    Y##_e  = X##_e  &			\
+    		((1 << _FP_EXPBITS_Q)	\
+		-1);			\
+    Y##_s  = X##_s;			\
+  } while (0)
+  
+#define FP_COPY_RAW_TO_CANONICAL_Q(Y,X)	\
+  do {					\
+    FP_COPY_RAW_Q(Y,X);			\
+    _FP_UNPACK_CANONICAL(Q,2,Y);	\
+  } while (0)
+
+#define FP_COPY_RAW_TO_SEMIRAW_Q(Y,X)	\
+  do {					\
+    FP_COPY_RAW_D(Y,X);			\
+    _FP_UNPACK_SEMIRAW(Q,2,Y);		\
+  } while (0)
+
+#define FP_COPY_SEMIRAW_Q(Y, X)		\
+  do {					\
+    Y##_f0 = X##_f0;			\
+    Y##_f1 = X##_f1 &			\
+    		((1 << (_FP_FRACBITS_Q	\
+    		- (_FP_IMPLBIT_Q != 0)	\
+		- _FP_W_TYPE_SIZE	\
+		+ 3)) -1);		\
+    Y##_e  = X##_e;			\
+    Y##_s  = X##_s;			\
+  } while (0)
+
+#define FP_COPY_CANONICAL_TO_SEMIRAW_Q(Y,X)	\
+  do {					\
+    _FP_PACK_CANONICAL(Q,2,X);		\
+    FP_COPY_RAW_Q(Y,X);			\
+    _FP_UNPACK_SEMIRAW(Q,2,Y);		\
+  } while (0)
+
 #define FP_ISSIGNAN_Q(X)		_FP_ISSIGNAN(Q,2,X)
 #define FP_NEG_Q(R,X)			_FP_NEG(Q,2,R,X)
 #define FP_ADD_Q(R,X,Y)			_FP_ADD(Q,2,R,X,Y)


[prev in list] [next in list] [prev in thread] [next in thread]
Configure | About | News | Add a list | Sponsored by KoreLogic