[prev in list] [next in list] [prev in thread] [next in thread] 

List:       cfe-commits
Subject:    Re: [PATCH] use conversion of builtin vector types to enable constant propagation
From:       Matthias Kretz <kretz () kde ! org>
Date:       2013-10-01 7:55:34
Message-ID: fab9b6acb5e19499a096f4a0eb333722 () llvm-reviews ! chandlerc ! com
[Download RAW message or body]


  Testcase:


    __m128d cvt1() { return _mm_cvtepi32_pd(_mm_set1_epi32(2)); }  // good
    __m128d cvt1(__m128i x) { return _mm_cvtepi32_pd(x); }         // bad

    __m128d cvt2() { return _mm_cvtps_pd(_mm_set1_ps(2.f)); }  // good
    __m128d cvt2(__m128 x) { return _mm_cvtps_pd(x); }         // bad

    __m128 cvt3() { return _mm_cvtpd_ps(_mm_set1_pd(2.)); }  // good
    __m128 cvt3(__m128d x) { return _mm_cvtpd_ps(x); }       // bad

    __m128i cvt4() { return _mm_cvttpd_epi32(_mm_set1_pd(2.)); }  // good
    __m128i cvt4(__m128d x) { return _mm_cvttpd_epi32(x); }       // bad

    __m128i cvt5() { return _mm_cvttps_epi32(_mm_set1_ps(2.f)); }  // good
    __m128i cvt5(__m128 x) { return _mm_cvttps_epi32(x); }       // bad

  resulting IR:



    define <2 x double> @_Z4cvt1v() #0 {
    entry:
      ret <2 x double> <double 2.000000e+00, double 2.000000e+00>
    }
    define <2 x double> @_Z4cvt1Dv2_x(<2 x i64> %x) #0 {
    entry:
      %0 = bitcast <2 x i64> %x to <4 x i32>
      %vecext.i = extractelement <4 x i32> %0, i32 0
      %conv.i = sitofp i32 %vecext.i to double
      %vecinit.i = insertelement <2 x double> undef, double %conv.i, i32 0
      %vecext1.i = extractelement <4 x i32> %0, i32 1
      %conv2.i = sitofp i32 %vecext1.i to double
      %vecinit3.i = insertelement <2 x double> %vecinit.i, double %conv2.i, i32 1
      ret <2 x double> %vecinit3.i
    }
    define <2 x double> @_Z4cvt2v() #0 {
    entry:
      ret <2 x double> <double 2.000000e+00, double 2.000000e+00>
    }
    define <2 x double> @_Z4cvt2Dv4_f(<4 x float> %x) #0 {
    entry:
      %vecext.i = extractelement <4 x float> %x, i32 0
      %conv.i = fpext float %vecext.i to double
      %vecinit.i = insertelement <2 x double> undef, double %conv.i, i32 0
      %vecext1.i = extractelement <4 x float> %x, i32 1
      %conv2.i = fpext float %vecext1.i to double
      %vecinit3.i = insertelement <2 x double> %vecinit.i, double %conv2.i, i32 1
      ret <2 x double> %vecinit3.i
    }
    define <4 x float> @_Z4cvt3v() #0 {
    entry:
      ret <4 x float> <float 2.000000e+00, float 2.000000e+00, float 0.000000e+00, \
float 0.000000e+00>  }
    define <4 x float> @_Z4cvt3Dv2_d(<2 x double> %x) #0 {
    entry:
      %vecext.i = extractelement <2 x double> %x, i32 0
      %conv.i = fptrunc double %vecext.i to float
      %vecinit.i = insertelement <4 x float> undef, float %conv.i, i32 0
      %vecext1.i = extractelement <2 x double> %x, i32 1
      %conv2.i = fptrunc double %vecext1.i to float
      %vecinit3.i = insertelement <4 x float> %vecinit.i, float %conv2.i, i32 1
      %vecinit4.i = insertelement <4 x float> %vecinit3.i, float 0.000000e+00, i32 2
      %vecinit5.i = insertelement <4 x float> %vecinit4.i, float 0.000000e+00, i32 3
      ret <4 x float> %vecinit5.i
    }
    define <2 x i64> @_Z4cvt4v() #0 {
    entry:
      ret <2 x i64> <i64 8589934594, i64 0>
    }
    define <2 x i64> @_Z4cvt4Dv2_d(<2 x double> %x) #0 {
    entry:
      %vecext.i = extractelement <2 x double> %x, i32 0
      %conv.i = fptosi double %vecext.i to i32
      %vecinit.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
      %vecext1.i = extractelement <2 x double> %x, i32 1
      %conv2.i = fptosi double %vecext1.i to i32
      %vecinit3.i = insertelement <4 x i32> %vecinit.i, i32 %conv2.i, i32 1
      %vecinit4.i = insertelement <4 x i32> %vecinit3.i, i32 0, i32 2
      %vecinit5.i = insertelement <4 x i32> %vecinit4.i, i32 0, i32 3
      %0 = bitcast <4 x i32> %vecinit5.i to <2 x i64>
      ret <2 x i64> %0
    }
    define <2 x i64> @_Z4cvt5v() #0 {
    entry:
      ret <2 x i64> <i64 8589934594, i64 8589934594>
    }
    define <2 x i64> @_Z4cvt5Dv4_f(<4 x float> %x) #0 {
    entry:
      %vecext.i = extractelement <4 x float> %x, i32 0
      %conv.i = fptosi float %vecext.i to i32
      %vecinit.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
      %vecext1.i = extractelement <4 x float> %x, i32 1
      %conv2.i = fptosi float %vecext1.i to i32
      %vecinit3.i = insertelement <4 x i32> %vecinit.i, i32 %conv2.i, i32 1
      %vecext4.i = extractelement <4 x float> %x, i32 2
      %conv5.i = fptosi float %vecext4.i to i32
      %vecinit6.i = insertelement <4 x i32> %vecinit3.i, i32 %conv5.i, i32 2
      %vecext7.i = extractelement <4 x float> %x, i32 3
      %conv8.i = fptosi float %vecext7.i to i32
      %vecinit9.i = insertelement <4 x i32> %vecinit6.i, i32 %conv8.i, i32 3
      %0 = bitcast <4 x i32> %vecinit9.i to <2 x i64>
      ret <2 x i64> %0
    }

  resulting x86:


    0000000000000020 <cvt1()>:
      20: ·       c5 f8 28 05 00 00 00 00  · vmovaps 0x0(%rip),%xmm0        # 28 \
<cvt1()+0x8> ·   24: R_X86_64_PC32 ·        .LCPI2_0-0x4  28: ·       c3              \
·    retq

    0000000000000030 <cvt1(long long __vector(2))>:
      30: ·       c4 e3 79 16 c0 01     ·    vpextrd $0x1,%xmm0,%eax
      36: ·       c5 fb 2a c8           ·    vcvtsi2sd %eax,%xmm0,%xmm1
      3a: ·       c5 f9 7e c0           ·    vmovd  %xmm0,%eax
      3e: ·       c5 fb 2a c0           ·    vcvtsi2sd %eax,%xmm0,%xmm0
      42: ·       c5 f9 14 c1           ·    vunpcklpd %xmm1,%xmm0,%xmm0
      46: ·       c3                    ·    retq

    0000000000000050 <cvt2()>:
      50: ·       c5 f8 28 05 00 00 00 00  · vmovaps 0x0(%rip),%xmm0        # 58 \
<cvt2()+0x8> ·   54: R_X86_64_PC32 ·        .LCPI4_0-0x4  58: ·       c3              \
·    retq

    0000000000000060 <cvt2(float __vector(4))>:
      60: ·       c5 fa 5a c8           ·    vcvtss2sd %xmm0,%xmm0,%xmm1
      64: ·       c5 f9 70 c0 01        ·    vpshufd $0x1,%xmm0,%xmm0
      69: ·       c5 fa 5a c0           ·    vcvtss2sd %xmm0,%xmm0,%xmm0
      6d: ·       c5 f1 14 c0           ·    vunpcklpd %xmm0,%xmm1,%xmm0
      71: ·       c3                    ·    retq

    0000000000000080 <cvt3()>:
      80: ·       c5 f8 28 05 00 00 00 00  · vmovaps 0x0(%rip),%xmm0        # 88 \
<cvt3()+0x8> ·   84: R_X86_64_PC32 ·        .LCPI6_0-0x4  88: ·       c3              \
·    retq

    0000000000000090 <cvt3(double __vector(2))>:
      90: ·       c5 fb 5a c8           ·    vcvtsd2ss %xmm0,%xmm0,%xmm1
      94: ·       c5 f9 15 c0           ·    vunpckhpd %xmm0,%xmm0,%xmm0
      98: ·       c5 fb 5a c0           ·    vcvtsd2ss %xmm0,%xmm0,%xmm0
      9c: ·       c5 f0 14 c0           ·    vunpcklps %xmm0,%xmm1,%xmm0
      a0: ·       c5 fa 7e c0           ·    vmovq  %xmm0,%xmm0
      a4: ·       c3                    ·    retq

    00000000000000b0 <cvt4()>:
      b0: ·       48 b8 02 00 00 00 02 00 00 00  ·        movabs $0x200000002,%rax
      ba: ·       c4 e1 f9 6e c0        ·    vmovq  %rax,%xmm0
      bf: ·       c3                    ·    retq

    00000000000000c0 <cvt4(double __vector(2))>:
      c0: ·       c5 fb 2c c0           ·    vcvttsd2si %xmm0,%eax
      c4: ·       c5 f9 6e c8           ·    vmovd  %eax,%xmm1
      c8: ·       c5 f9 15 c0           ·    vunpckhpd %xmm0,%xmm0,%xmm0
      cc: ·       c5 fb 2c c0           ·    vcvttsd2si %xmm0,%eax
      d0: ·       c5 f9 6e c0           ·    vmovd  %eax,%xmm0
      d4: ·       c5 f1 62 c0           ·    vpunpckldq %xmm0,%xmm1,%xmm0
      d8: ·       c5 fa 7e c0           ·    vmovq  %xmm0,%xmm0
      dc: ·       c3                    ·    retq

    00000000000000e0 <cvt5()>:
      e0: ·       c5 f8 28 05 00 00 00 00  · vmovaps 0x0(%rip),%xmm0        # e8 \
<cvt5()+0x8> ·   e4: R_X86_64_PC32 ·        .LCPI10_0-0x4  e8: ·       c3             \
·    retq

    00000000000000f0 <cvt5(float __vector(4))>:
      f0: ·       c5 f9 70 c8 01        ·    vpshufd $0x1,%xmm0,%xmm1
      f5: ·       c5 fa 2c c1           ·    vcvttss2si %xmm1,%eax
      f9: ·       c5 fa 2c c8           ·    vcvttss2si %xmm0,%ecx
      fd: ·       c5 f9 6e c9           ·    vmovd  %ecx,%xmm1
     101: ·       c4 e3 71 22 c8 01     ·    vpinsrd $0x1,%eax,%xmm1,%xmm1
     107: ·       c5 f8 12 d0           ·    vmovhlps %xmm0,%xmm0,%xmm2
     10b: ·       c5 fa 2c c2           ·    vcvttss2si %xmm2,%eax
     10f: ·       c4 e3 71 22 c8 02     ·    vpinsrd $0x2,%eax,%xmm1,%xmm1
     115: ·       c5 f9 70 c0 03        ·    vpshufd $0x3,%xmm0,%xmm0
     11a: ·       c5 fa 2c c0           ·    vcvttss2si %xmm0,%eax
     11e: ·       c4 e3 71 22 c0 03     ·    vpinsrd $0x3,%eax,%xmm1,%xmm0
     124: ·       c3                    ·    retq

http://llvm-reviews.chandlerc.com/D1793
_______________________________________________
cfe-commits mailing list
cfe-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic