[prev in list] [next in list] [prev in thread] [next in thread] 

List:       cairo
Subject:    [cairo] Image scaling with pixman
From:       Billy Biggs <vektor () dumbterm ! net>
Date:       2005-08-17 5:12:25
Message-ID: 20050817051225.GG16959 () dumbterm ! net
[Download RAW message or body]

I was asked today about image scaling performance with pixman.

  The code from xserver that is now in HEAD should be much faster than
the old pixman code, as it is much better structured.  Unfortunately,
one of the bugfixes to the xserver code slowed down bilinear scaling a
wee bit, but that's about to get fixed.

  The attached patch speeds up both nearest neighbour and bilinear
scaling in the case where the tranformation is not projective.  I went a
little further and unrolled the nasty loop I added to fix a precision
error, which gives a significant savings.  This patch gives a further 2x
speed improvement over the already improved code.

  Comments?

  -Billy


["fbcompose-avoid-projective-unrolled.diff" (text/plain)]

Index: fbcompose.c
===================================================================
RCS file: /cvs/cairo/cairo/pixman/src/fbcompose.c,v
retrieving revision 1.1
diff -p -u -r1.1 fbcompose.c
--- fbcompose.c	11 Aug 2005 04:10:13 -0000	1.1
+++ fbcompose.c	17 Aug 2005 05:03:28 -0000
@@ -2906,6 +2906,7 @@ static void fbFetchTransformed(PicturePt
 #else
     miIndexedPtr indexed = 0;
 #endif
+    Bool projective = FALSE;
 
     fetch = fetchPixelProcForPicture(pict);
 
@@ -2929,6 +2930,7 @@ static void fbFetchTransformed(PicturePt
         unit.vector[1] = 0;
         unit.vector[2] = 0;
     }
+    projective = (unit.vector[2] != 0);
 
     if (pict->filter == PIXMAN_FILTER_NEAREST || pict->filter == PIXMAN_FILTER_FAST)
     {
@@ -2939,8 +2941,13 @@ static void fbFetchTransformed(PicturePt
                     if (!v.vector[2]) {
                         buffer[i] = 0;
                     } else {
-                        y = MOD(DIV(v.vector[1],v.vector[2]), \
                pict->pDrawable->height);
-                        x = MOD(DIV(v.vector[0],v.vector[2]), \
pict->pDrawable->width); +                        if (projective) {
+                            y = MOD(DIV(v.vector[1],v.vector[2]), \
pict->pDrawable->height); +                            x = \
MOD(DIV(v.vector[0],v.vector[2]), pict->pDrawable->width); +                        } \
else { +                            y = MOD(v.vector[1]>>16, \
pict->pDrawable->height); +                            x = MOD(v.vector[0]>>16, \
pict->pDrawable->width); +                        }
                         buffer[i] = fetch(bits + (y + pict->pDrawable->y)*stride, x \
+ pict->pDrawable->x, indexed);  }
                     v.vector[0] += unit.vector[0];
@@ -2952,8 +2959,13 @@ static void fbFetchTransformed(PicturePt
                     if (!v.vector[2]) {
                         buffer[i] = 0;
                     } else {
-                        y = MOD(DIV(v.vector[1],v.vector[2]), \
                pict->pDrawable->height);
-                        x = MOD(DIV(v.vector[0],v.vector[2]), \
pict->pDrawable->width); +                        if (projective) {
+                            y = MOD(DIV(v.vector[1],v.vector[2]), \
pict->pDrawable->height); +                            x = \
MOD(DIV(v.vector[0],v.vector[2]), pict->pDrawable->width); +                        } \
else { +                            y = MOD(v.vector[1]>>16, \
pict->pDrawable->height); +                            x = MOD(v.vector[0]>>16, \
pict->pDrawable->width); +                        }
                         if (pixman_region_contains_point (pict->pCompositeClip, x, \
                y, &box))
                             buffer[i] = fetch(bits + (y + \
pict->pDrawable->y)*stride, x + pict->pDrawable->x, indexed);  else
@@ -2971,8 +2983,13 @@ static void fbFetchTransformed(PicturePt
                     if (!v.vector[2]) {
                         buffer[i] = 0;
                     } else {
-                        y = DIV(v.vector[1],v.vector[2]);
-                        x = DIV(v.vector[0],v.vector[2]);
+                        if (projective) {
+                            y = DIV(v.vector[1],v.vector[2]);
+                            x = DIV(v.vector[0],v.vector[2]);
+                        } else {
+                            y = v.vector[1]>>16;
+                            x = v.vector[0]>>16;
+                        }
                         buffer[i] = ((x < box.x1) | (x >= box.x2) | (y < box.y1) | \
                (y >= box.y2)) ?
                                     0 : fetch(bits + (y + \
pict->pDrawable->y)*stride, x + pict->pDrawable->x, indexed);  }
@@ -2985,8 +3002,13 @@ static void fbFetchTransformed(PicturePt
                     if (!v.vector[2]) {
                         buffer[i] = 0;
                     } else {
-                        y = DIV(v.vector[1],v.vector[2]);
-                        x = DIV(v.vector[0],v.vector[2]);
+                        if (projective) {
+                            y = DIV(v.vector[1],v.vector[2]);
+                            x = DIV(v.vector[0],v.vector[2]);
+                        } else {
+                            y = v.vector[1]>>16;
+                            x = v.vector[0]>>16;
+                        }
                         if (pixman_region_contains_point (pict->pCompositeClip, x, \
                y, &box))
                             buffer[i] = fetch(bits + (y + \
pict->pDrawable->y)*stride, x + pict->pDrawable->x, indexed);  else
@@ -3009,16 +3031,24 @@ static void fbFetchTransformed(PicturePt
                         int x1, x2, y1, y2, distx, idistx, disty, idisty, k;
                         FbBits *b;
                         CARD32 tl, tr, bl, br, r;
-                        xFixed_48_16 div;
+                        CARD32 ft, fb;
 
-                        div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
-                        x1 = div >> 16;
-                        distx = ((xFixed)div >> 8) & 0xff;
-                        x2 = x1 + 1;
-                        div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
-                        y1 = div >> 16;
-                        y2 = y1 + 1;
-                        disty = ((xFixed)div >> 8) & 0xff;
+                        if (projective) {
+                            xFixed_48_16 div;
+                            div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
+                            x1 = div >> 16;
+                            distx = ((xFixed)div >> 8) & 0xff;
+                            x2 = x1 + 1;
+                            div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
+                            y1 = div >> 16;
+                            y2 = y1 + 1;
+                            disty = ((xFixed)div >> 8) & 0xff;
+                        } else {
+                            x1 = v.vector[0] >> 16;
+                            distx = (v.vector[0] >> 8) & 0xff;
+                            y1 = v.vector[1] >> 16;
+                            disty = (v.vector[1] >> 8) & 0xff;
+                        }
 
                         idistx = 256 - distx;
                         idisty = 256 - disty;
@@ -3036,13 +3066,18 @@ static void fbFetchTransformed(PicturePt
                         bl = fetch(b, x1 + pict->pDrawable->x, indexed);
                         br = fetch(b, x2 + pict->pDrawable->x, indexed);
 
-                        r = 0;
-                        for (k = 0; k < 32; k += 8) {
-                            CARD32 t, b;
-                            t = FbGet8(tl,k) * idistx + FbGet8(tr,k) * distx;
-                            b = FbGet8(bl,k) * idistx + FbGet8(br,k) * distx;
-                            r |= ((((t * idisty) + (b * disty)) >> 16) & 0xff) << k;
-                        }
+                        ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                        fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                        r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                        ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                        fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                        r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                        ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                        fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                        r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                        ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                        fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                        r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
                         buffer[i] = r;
                     }
                     v.vector[0] += unit.vector[0];
@@ -3057,16 +3092,24 @@ static void fbFetchTransformed(PicturePt
                         int x1, x2, y1, y2, distx, idistx, disty, idisty, k;
                         FbBits *b;
                         CARD32 tl, tr, bl, br, r;
-                        xFixed_48_16 div;
+                        CARD32 ft, fb;
 
-                        div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
-                        x1 = div >> 16;
-                        distx = ((xFixed)div >> 8) & 0xff;
-                        x2 = x1 + 1;
-                        div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
-                        y1 = div >> 16;
-                        y2 = y1 + 1;
-                        disty = ((xFixed)div >> 8) & 0xff;
+                        if (projective) {
+                            xFixed_48_16 div;
+                            div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
+                            x1 = div >> 16;
+                            distx = ((xFixed)div >> 8) & 0xff;
+                            x2 = x1 + 1;
+                            div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
+                            y1 = div >> 16;
+                            y2 = y1 + 1;
+                            disty = ((xFixed)div >> 8) & 0xff;
+                        } else {
+                            x1 = v.vector[0] >> 16;
+                            distx = (v.vector[0] >> 8) & 0xff;
+                            y1 = v.vector[1] >> 16;
+                            disty = (v.vector[1] >> 8) & 0xff;
+                        }
 
                         idistx = 256 - distx;
                         idisty = 256 - disty;
@@ -3088,13 +3131,18 @@ static void fbFetchTransformed(PicturePt
                         br = pixman_region_contains_point(pict->pCompositeClip, x2, \
                y2, &box)
                              ? fetch(b, x2 + pict->pDrawable->x, indexed) : 0;
 
-                        r = 0;
-                        for (k = 0; k < 32; k += 8) {
-                            CARD32 t, b;
-                            t = FbGet8(tl,k) * idistx + FbGet8(tr,k) * distx;
-                            b = FbGet8(bl,k) * idistx + FbGet8(br,k) * distx;
-                            r |= ((((t * idisty) + (b * disty)) >> 16) & 0xff) << k;
-                        }
+                        ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                        fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                        r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                        ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                        fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                        r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                        ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                        fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                        r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                        ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                        fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                        r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
                         buffer[i] = r;
                     }
                     v.vector[0] += unit.vector[0];
@@ -3113,16 +3161,24 @@ static void fbFetchTransformed(PicturePt
                         FbBits *b;
                         CARD32 tl, tr, bl, br, r;
                         Bool x1_out, x2_out, y1_out, y2_out;
-                        xFixed_48_16 div;
+                        CARD32 ft, fb;
 
-                        div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
-                        x1 = div >> 16;
-                        distx = ((xFixed)div >> 8) & 0xff;
-                        x2 = x1 + 1;
-                        div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
-                        y1 = div >> 16;
-                        y2 = y1 + 1;
-                        disty = ((xFixed)div >> 8) & 0xff;
+                        if (projective) {
+                            xFixed_48_16 div;
+                            div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
+                            x1 = div >> 16;
+                            distx = ((xFixed)div >> 8) & 0xff;
+                            x2 = x1 + 1;
+                            div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
+                            y1 = div >> 16;
+                            y2 = y1 + 1;
+                            disty = ((xFixed)div >> 8) & 0xff;
+                        } else {
+                            x1 = v.vector[0] >> 16;
+                            distx = (v.vector[0] >> 8) & 0xff;
+                            y1 = v.vector[1] >> 16;
+                            disty = (v.vector[1] >> 8) & 0xff;
+                        }
 
                         idistx = 256 - distx;
                         idisty = 256 - disty;
@@ -3141,13 +3197,18 @@ static void fbFetchTransformed(PicturePt
                         bl = x1_out|y2_out ? 0 : fetch(b, x_off, indexed);
                         br = x2_out|y2_out ? 0 : fetch(b, x_off + 1, indexed);
 
-                        r = 0;
-                        for (k = 0; k < 32; k += 8) {
-                            CARD32 t, b;
-                            t = FbGet8(tl,k) * idistx + FbGet8(tr,k) * distx;
-                            b = FbGet8(bl,k) * idistx + FbGet8(br,k) * distx;
-                            r |= ((((t * idisty) + (b * disty)) >> 16) & 0xff) << k;
-                        }
+                        ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                        fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                        r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                        ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                        fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                        r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                        ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                        fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                        r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                        ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                        fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                        r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
                         buffer[i] = r;
                     }
                     v.vector[0] += unit.vector[0];
@@ -3162,16 +3223,24 @@ static void fbFetchTransformed(PicturePt
                         int x1, x2, y1, y2, distx, idistx, disty, idisty, x_off, k;
                         FbBits *b;
                         CARD32 tl, tr, bl, br, r;
-                        xFixed_48_16 div;
+                        CARD32 ft, fb;
 
-                        div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
-                        x1 = div >> 16;
-                        distx = ((xFixed)div >> 8) & 0xff;
-                        x2 = x1 + 1;
-                        div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
-                        y1 = div >> 16;
-                        y2 = y1 + 1;
-                        disty = ((xFixed)div >> 8) & 0xff;
+                        if (projective) {
+                            xFixed_48_16 div;
+                            div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
+                            x1 = div >> 16;
+                            distx = ((xFixed)div >> 8) & 0xff;
+                            x2 = x1 + 1;
+                            div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
+                            y1 = div >> 16;
+                            y2 = y1 + 1;
+                            disty = ((xFixed)div >> 8) & 0xff;
+                        } else {
+                            x1 = v.vector[0] >> 16;
+                            distx = (v.vector[0] >> 8) & 0xff;
+                            y1 = v.vector[1] >> 16;
+                            disty = (v.vector[1] >> 8) & 0xff;
+                        }
 
                         idistx = 256 - distx;
                         idisty = 256 - disty;
@@ -3189,13 +3258,18 @@ static void fbFetchTransformed(PicturePt
                         br = pixman_region_contains_point(pict->pCompositeClip, x2, \
y2, &box)  ? fetch(b, x_off + 1, indexed) : 0;
 
-                        r = 0;
-                        for (k = 0; k < 32; k += 8) {
-                            CARD32 t, b;
-                            t = FbGet8(tl,k) * idistx + FbGet8(tr,k) * distx;
-                            b = FbGet8(bl,k) * idistx + FbGet8(br,k) * distx;
-                            r |= ((((t * idisty) + (b * disty)) >> 16) & 0xff) << k;
-                        }
+                        ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                        fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                        r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                        ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                        fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                        r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                        ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                        fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                        r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                        ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                        fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                        r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
                         buffer[i] = r;
                     }
                     v.vector[0] += unit.vector[0];



[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic