[prev in list] [next in list] [prev in thread] [next in thread] 

List:       freedesktop-xorg
Subject:    Re: CVS Xserver breaks on non-SSE capable i386 machines
From:       Lars Knoll <lars () trolltech ! com>
Date:       2005-07-25 10:43:25
Message-ID: 200507251243.26125.lars () trolltech ! com
[Download RAW message or body]

Hi,

On Sunday 24 July 2005 01:47, Krzysztof Halasa wrote:
> Unless I miss something, this change inconditionally adds "-msse" on
> all i386 machines with gcc 3.4+.
>
>
> The problems are:
>
> 1. "-msse" argument to gcc causes it to generate code using "cmov"
> instruction. Pentium MMX and earlier, and certain VIA C3 CPUs, lack
> cmov instruction, causing Xserver to die with SIGILL.

This is a problem. I didn't anticipate this as the gcc docs state that the 
flag only enables the use of the mmx/sse intrinsics and doesn't affect code 
generation. Obviously the docs are wrong :(

> Fix: Removing "-msse" for those files (fbmmx.c, fbpict.c, fbfill.c,
> fbcopy.c) should fix this problem.
>
>
>
> 2. fbmmx.c does:
> #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
>
> xmmintrin.h (a gcc header file) can't be included unless "-msse" or
> equivalent is passed to gcc:
>
> #ifndef __SSE__
> # error "SSE instruction set not enabled"
>
> fbmmx.c uses _mm_shuffle_pi16 which is pshufw (SSE and not just MMX)
> instruction. fbmmx.c and friends use it when MMX is available (#defined
> USE_MMX and fbHaveMMX()) but it can't work on Pentium MMX and other
> non-SSE capable CPUs.

Yes, this needs SSE or the AMD MMX extensions. The basic question is if we 
want to support assembler code paths on CPUs older than PIII or Athlon.

Not using pshufw on newer CPUs is somewhat stupid (especially as older CPUs 
are getting rare). A compile time option is not good, as you wouldn't get a 
portable binary. So either we do not use the mmx code paths for older CPUs 
(that's what the current code is supposed to do), or we duplicate all code in 
fbmmx.c (something I didn't want to do).

> Fix: xmmintrin.h and "-msse" must not be used, pshufw (if available)
> must be called using other means (asm?) and a replacement routine must
> be written in case no SSE CPU is detected.
>
> Comments?
>
> I can try to fix it if the above is correct.

A simple solution is to remove the -mmmx and -msse flags from all but fbmmx.c, 
but keep -DUSE_MMX for the other files. As a second thing we will have to 
move the CPU detection into a file that is not compiled with -mmmx/-msse.

The attached patch would do exactly that.

Cheers,
Lars

["detection.diff" (text/x-diff)]

? Makefile
? fb.h.flc
? fb.h.gch
? fb24_32.h.gch
? fbbits.h.gch
? fbcompose.c.flc
? fbedgeimp.h.gch
? foo
Index: Imakefile
===================================================================
RCS file: /cvs/xorg/xc/programs/Xserver/fb/Imakefile,v
retrieving revision 1.10
diff -u -p -r1.10 Imakefile
--- Imakefile	12 Jul 2005 10:02:10 -0000	1.10
+++ Imakefile	25 Jul 2005 10:42:05 -0000
@@ -6,12 +6,12 @@ XCOMM Id: Imakefile,v 1.1 1999/11/02 03:
 #if defined(HasGcc34) && HasGcc34
 MMXOPTIONS= -mmmx -msse -Winline --param inline-unit-growth=10000 \
 	--param large-function-growth=10000 -DUSE_MMX
-
+USEMMXOPTIONS= -DUSE_MMX -m32
 #if defined(i386Architecture) || defined(AMD64Architecture)
 SpecialCObjectRule(fbmmx,fbmmx.c,$(MMXOPTIONS))
-SpecialCObjectRule(fbpict,fbpict.c,$(MMXOPTIONS))
-SpecialCObjectRule(fbfill,fbfill.c,$(MMXOPTIONS))
-SpecialCObjectRule(fbcopy,fbcopy.c,$(MMXOPTIONS))
+SpecialCObjectRule(fbpict,fbpict.c,$(USEMMXOPTIONS))
+SpecialCObjectRule(fbfill,fbfill.c,$(USEMMXOPTIONS))
+SpecialCObjectRule(fbcopy,fbcopy.c,$(USEMMXOPTIONS))
 #endif
 
 #endif
Index: fbmmx.c
===================================================================
RCS file: /cvs/xorg/xc/programs/Xserver/fb/fbmmx.c,v
retrieving revision 1.13
diff -u -p -r1.13 fbmmx.c
--- fbmmx.c	13 Jul 2005 08:58:37 -0000	1.13
+++ fbmmx.c	25 Jul 2005 10:42:06 -0000
@@ -2289,108 +2289,6 @@ fbCompositeCopyAreammx (CARD8		op,
 		   width, height);
 }
 
-#if !defined(__amd64__) && !defined(__x86_64__)
-
-enum CPUFeatures {
-    NoFeatures = 0,
-    MMX = 0x1,
-    MMX_Extensions = 0x2, 
-    SSE = 0x6,
-    SSE2 = 0x8,
-    CMOV = 0x10
-};
-
-static unsigned int detectCPUFeatures(void) {
-    unsigned int result;
-    char vendor[13];
-    vendor[0] = 0;
-    vendor[12] = 0;
-    /* see p. 118 of amd64 instruction set manual Vol3 */
-    __asm__ ("push %%ebx\n"
-             "pushf\n"
-             "pop %%eax\n"
-             "mov %%eax, %%ebx\n"
-             "xor $0x00200000, %%eax\n"
-             "push %%eax\n"
-             "popf\n"
-             "pushf\n"
-             "pop %%eax\n"
-             "mov $0x0, %%edx\n"
-             "xor %%ebx, %%eax\n"
-             "jz skip\n"
-
-             "mov $0x00000000, %%eax\n"
-             "cpuid\n"
-             "mov %%ebx, %1\n"
-             "mov %%edx, %2\n"
-             "mov %%ecx, %3\n"
-             "mov $0x00000001, %%eax\n"
-             "cpuid\n"
-             "skip:\n"
-             "pop %%ebx\n"
-             "mov %%edx, %0\n"
-             : "=r" (result), 
-               "=m" (vendor[0]), 
-               "=m" (vendor[4]), 
-               "=m" (vendor[8])
-             :
-             : "%eax", "%ecx", "%edx"
-        );
-
-    unsigned int features = 0;
-    if (result) {
-        /* result now contains the standard feature bits */
-        if (result & (1 << 15))
-            features |= CMOV;
-        if (result & (1 << 23))
-            features |= MMX;
-        if (result & (1 << 25))
-            features |= SSE;
-        if (result & (1 << 26))
-            features |= SSE2;
-        if ((result & MMX) && !(result & SSE) && (strcmp(vendor, "AuthenticAMD") == 0)) {
-            /* check for AMD MMX extensions */
-
-            unsigned int result;            
-            __asm__("push %%ebx\n"
-                    "mov $0x80000000, %%eax\n"
-                    "cpuid\n"
-                    "xor %%edx, %%edx\n"
-                    "cmp $0x1, %%eax\n"
-                    "jge skip2\n"
-                    "mov $0x80000001, %%eax\n"
-                    "cpuid\n"
-                    "skip2:\n"
-                    "mov %%edx, %0\n"
-                    "pop %%ebx\n"
-                    : "=r" (result)
-                    :
-                    : "%eax", "%ecx", "%edx"
-                );
-            if (result & (1<<22))
-                features |= MMX_Extensions;
-        }
-    }
-    return features;
-}
-
-Bool
-fbHaveMMX (void)
-{
-    static Bool initialized = FALSE;
-    static Bool mmx_present;
-    
-    if (!initialized)
-    {
-        unsigned int features = detectCPUFeatures();
-	mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions);
-        initialized = TRUE;
-    }
-    
-    return mmx_present;
-}
-#endif /* __amd64__ */
-
 
 #endif /* RENDER */
 #endif /* USE_MMX */
Index: fbpict.c
===================================================================
RCS file: /cvs/xorg/xc/programs/Xserver/fb/fbpict.c,v
retrieving revision 1.16
diff -u -p -r1.16 fbpict.c
--- fbpict.c	12 Jul 2005 10:02:10 -0000	1.16
+++ fbpict.c	25 Jul 2005 10:42:06 -0000
@@ -1332,3 +1332,113 @@ fbPictureInit (ScreenPtr pScreen, PictFo
 
     return TRUE;
 }
+
+
+#ifdef USE_MMX
+/* The CPU detection code needs to be in a file not compiled with
+ * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
+ * that would lead to SIGILL instructions on old CPUs that don't have
+ * it.
+ */
+#if !defined(__amd64__) && !defined(__x86_64__)
+
+enum CPUFeatures {
+    NoFeatures = 0,
+    MMX = 0x1,
+    MMX_Extensions = 0x2, 
+    SSE = 0x6,
+    SSE2 = 0x8,
+    CMOV = 0x10
+};
+
+static unsigned int detectCPUFeatures(void) {
+    unsigned int result;
+    char vendor[13];
+    vendor[0] = 0;
+    vendor[12] = 0;
+    /* see p. 118 of amd64 instruction set manual Vol3 */
+    __asm__ ("push %%ebx\n"
+             "pushf\n"
+             "pop %%eax\n"
+             "mov %%eax, %%ebx\n"
+             "xor $0x00200000, %%eax\n"
+             "push %%eax\n"
+             "popf\n"
+             "pushf\n"
+             "pop %%eax\n"
+             "mov $0x0, %%edx\n"
+             "xor %%ebx, %%eax\n"
+             "jz skip\n"
+
+             "mov $0x00000000, %%eax\n"
+             "cpuid\n"
+             "mov %%ebx, %1\n"
+             "mov %%edx, %2\n"
+             "mov %%ecx, %3\n"
+             "mov $0x00000001, %%eax\n"
+             "cpuid\n"
+             "skip:\n"
+             "pop %%ebx\n"
+             "mov %%edx, %0\n"
+             : "=r" (result), 
+               "=m" (vendor[0]), 
+               "=m" (vendor[4]), 
+               "=m" (vendor[8])
+             :
+             : "%eax", "%ecx", "%edx"
+        );
+
+    unsigned int features = 0;
+    if (result) {
+        /* result now contains the standard feature bits */
+        if (result & (1 << 15))
+            features |= CMOV;
+        if (result & (1 << 23))
+            features |= MMX;
+        if (result & (1 << 25))
+            features |= SSE;
+        if (result & (1 << 26))
+            features |= SSE2;
+        if ((result & MMX) && !(result & SSE) && (strcmp(vendor, "AuthenticAMD") == 0)) {
+            /* check for AMD MMX extensions */
+
+            unsigned int result;            
+            __asm__("push %%ebx\n"
+                    "mov $0x80000000, %%eax\n"
+                    "cpuid\n"
+                    "xor %%edx, %%edx\n"
+                    "cmp $0x1, %%eax\n"
+                    "jge skip2\n"
+                    "mov $0x80000001, %%eax\n"
+                    "cpuid\n"
+                    "skip2:\n"
+                    "mov %%edx, %0\n"
+                    "pop %%ebx\n"
+                    : "=r" (result)
+                    :
+                    : "%eax", "%ecx", "%edx"
+                );
+            if (result & (1<<22))
+                features |= MMX_Extensions;
+        }
+    }
+    return features;
+}
+
+Bool
+fbHaveMMX (void)
+{
+    static Bool initialized = FALSE;
+    static Bool mmx_present;
+    
+    if (!initialized)
+    {
+        unsigned int features = detectCPUFeatures();
+	mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions);
+        initialized = TRUE;
+    }
+    
+    return mmx_present;
+}
+#endif /* __amd64__ */
+#endif


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic