[prev in list] [next in list] [prev in thread] [next in thread]
List: cfe-commits
Subject: Re: [PATCH] use conversion of builtin vector types to enable constant propagation
From: Matthias Kretz <kretz () kde ! org>
Date: 2013-10-01 7:55:34
Message-ID: fab9b6acb5e19499a096f4a0eb333722 () llvm-reviews ! chandlerc ! com
[Download RAW message or body]
Testcase:
__m128d cvt1() { return _mm_cvtepi32_pd(_mm_set1_epi32(2)); } // good
__m128d cvt1(__m128i x) { return _mm_cvtepi32_pd(x); } // bad
__m128d cvt2() { return _mm_cvtps_pd(_mm_set1_ps(2.f)); } // good
__m128d cvt2(__m128 x) { return _mm_cvtps_pd(x); } // bad
__m128 cvt3() { return _mm_cvtpd_ps(_mm_set1_pd(2.)); } // good
__m128 cvt3(__m128d x) { return _mm_cvtpd_ps(x); } // bad
__m128i cvt4() { return _mm_cvttpd_epi32(_mm_set1_pd(2.)); } // good
__m128i cvt4(__m128d x) { return _mm_cvttpd_epi32(x); } // bad
__m128i cvt5() { return _mm_cvttps_epi32(_mm_set1_ps(2.f)); } // good
__m128i cvt5(__m128 x) { return _mm_cvttps_epi32(x); } // bad
resulting IR:
define <2 x double> @_Z4cvt1v() #0 {
entry:
ret <2 x double> <double 2.000000e+00, double 2.000000e+00>
}
define <2 x double> @_Z4cvt1Dv2_x(<2 x i64> %x) #0 {
entry:
%0 = bitcast <2 x i64> %x to <4 x i32>
%vecext.i = extractelement <4 x i32> %0, i32 0
%conv.i = sitofp i32 %vecext.i to double
%vecinit.i = insertelement <2 x double> undef, double %conv.i, i32 0
%vecext1.i = extractelement <4 x i32> %0, i32 1
%conv2.i = sitofp i32 %vecext1.i to double
%vecinit3.i = insertelement <2 x double> %vecinit.i, double %conv2.i, i32 1
ret <2 x double> %vecinit3.i
}
define <2 x double> @_Z4cvt2v() #0 {
entry:
ret <2 x double> <double 2.000000e+00, double 2.000000e+00>
}
define <2 x double> @_Z4cvt2Dv4_f(<4 x float> %x) #0 {
entry:
%vecext.i = extractelement <4 x float> %x, i32 0
%conv.i = fpext float %vecext.i to double
%vecinit.i = insertelement <2 x double> undef, double %conv.i, i32 0
%vecext1.i = extractelement <4 x float> %x, i32 1
%conv2.i = fpext float %vecext1.i to double
%vecinit3.i = insertelement <2 x double> %vecinit.i, double %conv2.i, i32 1
ret <2 x double> %vecinit3.i
}
define <4 x float> @_Z4cvt3v() #0 {
entry:
ret <4 x float> <float 2.000000e+00, float 2.000000e+00, float 0.000000e+00, \
float 0.000000e+00> }
define <4 x float> @_Z4cvt3Dv2_d(<2 x double> %x) #0 {
entry:
%vecext.i = extractelement <2 x double> %x, i32 0
%conv.i = fptrunc double %vecext.i to float
%vecinit.i = insertelement <4 x float> undef, float %conv.i, i32 0
%vecext1.i = extractelement <2 x double> %x, i32 1
%conv2.i = fptrunc double %vecext1.i to float
%vecinit3.i = insertelement <4 x float> %vecinit.i, float %conv2.i, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit3.i, float 0.000000e+00, i32 2
%vecinit5.i = insertelement <4 x float> %vecinit4.i, float 0.000000e+00, i32 3
ret <4 x float> %vecinit5.i
}
define <2 x i64> @_Z4cvt4v() #0 {
entry:
ret <2 x i64> <i64 8589934594, i64 0>
}
define <2 x i64> @_Z4cvt4Dv2_d(<2 x double> %x) #0 {
entry:
%vecext.i = extractelement <2 x double> %x, i32 0
%conv.i = fptosi double %vecext.i to i32
%vecinit.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
%vecext1.i = extractelement <2 x double> %x, i32 1
%conv2.i = fptosi double %vecext1.i to i32
%vecinit3.i = insertelement <4 x i32> %vecinit.i, i32 %conv2.i, i32 1
%vecinit4.i = insertelement <4 x i32> %vecinit3.i, i32 0, i32 2
%vecinit5.i = insertelement <4 x i32> %vecinit4.i, i32 0, i32 3
%0 = bitcast <4 x i32> %vecinit5.i to <2 x i64>
ret <2 x i64> %0
}
define <2 x i64> @_Z4cvt5v() #0 {
entry:
ret <2 x i64> <i64 8589934594, i64 8589934594>
}
define <2 x i64> @_Z4cvt5Dv4_f(<4 x float> %x) #0 {
entry:
%vecext.i = extractelement <4 x float> %x, i32 0
%conv.i = fptosi float %vecext.i to i32
%vecinit.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
%vecext1.i = extractelement <4 x float> %x, i32 1
%conv2.i = fptosi float %vecext1.i to i32
%vecinit3.i = insertelement <4 x i32> %vecinit.i, i32 %conv2.i, i32 1
%vecext4.i = extractelement <4 x float> %x, i32 2
%conv5.i = fptosi float %vecext4.i to i32
%vecinit6.i = insertelement <4 x i32> %vecinit3.i, i32 %conv5.i, i32 2
%vecext7.i = extractelement <4 x float> %x, i32 3
%conv8.i = fptosi float %vecext7.i to i32
%vecinit9.i = insertelement <4 x i32> %vecinit6.i, i32 %conv8.i, i32 3
%0 = bitcast <4 x i32> %vecinit9.i to <2 x i64>
ret <2 x i64> %0
}
resulting x86:
0000000000000020 <cvt1()>:
20: · c5 f8 28 05 00 00 00 00 · vmovaps 0x0(%rip),%xmm0 # 28 \
<cvt1()+0x8> · 24: R_X86_64_PC32 · .LCPI2_0-0x4 28: · c3 \
· retq
0000000000000030 <cvt1(long long __vector(2))>:
30: · c4 e3 79 16 c0 01 · vpextrd $0x1,%xmm0,%eax
36: · c5 fb 2a c8 · vcvtsi2sd %eax,%xmm0,%xmm1
3a: · c5 f9 7e c0 · vmovd %xmm0,%eax
3e: · c5 fb 2a c0 · vcvtsi2sd %eax,%xmm0,%xmm0
42: · c5 f9 14 c1 · vunpcklpd %xmm1,%xmm0,%xmm0
46: · c3 · retq
0000000000000050 <cvt2()>:
50: · c5 f8 28 05 00 00 00 00 · vmovaps 0x0(%rip),%xmm0 # 58 \
<cvt2()+0x8> · 54: R_X86_64_PC32 · .LCPI4_0-0x4 58: · c3 \
· retq
0000000000000060 <cvt2(float __vector(4))>:
60: · c5 fa 5a c8 · vcvtss2sd %xmm0,%xmm0,%xmm1
64: · c5 f9 70 c0 01 · vpshufd $0x1,%xmm0,%xmm0
69: · c5 fa 5a c0 · vcvtss2sd %xmm0,%xmm0,%xmm0
6d: · c5 f1 14 c0 · vunpcklpd %xmm0,%xmm1,%xmm0
71: · c3 · retq
0000000000000080 <cvt3()>:
80: · c5 f8 28 05 00 00 00 00 · vmovaps 0x0(%rip),%xmm0 # 88 \
<cvt3()+0x8> · 84: R_X86_64_PC32 · .LCPI6_0-0x4 88: · c3 \
· retq
0000000000000090 <cvt3(double __vector(2))>:
90: · c5 fb 5a c8 · vcvtsd2ss %xmm0,%xmm0,%xmm1
94: · c5 f9 15 c0 · vunpckhpd %xmm0,%xmm0,%xmm0
98: · c5 fb 5a c0 · vcvtsd2ss %xmm0,%xmm0,%xmm0
9c: · c5 f0 14 c0 · vunpcklps %xmm0,%xmm1,%xmm0
a0: · c5 fa 7e c0 · vmovq %xmm0,%xmm0
a4: · c3 · retq
00000000000000b0 <cvt4()>:
b0: · 48 b8 02 00 00 00 02 00 00 00 · movabs $0x200000002,%rax
ba: · c4 e1 f9 6e c0 · vmovq %rax,%xmm0
bf: · c3 · retq
00000000000000c0 <cvt4(double __vector(2))>:
c0: · c5 fb 2c c0 · vcvttsd2si %xmm0,%eax
c4: · c5 f9 6e c8 · vmovd %eax,%xmm1
c8: · c5 f9 15 c0 · vunpckhpd %xmm0,%xmm0,%xmm0
cc: · c5 fb 2c c0 · vcvttsd2si %xmm0,%eax
d0: · c5 f9 6e c0 · vmovd %eax,%xmm0
d4: · c5 f1 62 c0 · vpunpckldq %xmm0,%xmm1,%xmm0
d8: · c5 fa 7e c0 · vmovq %xmm0,%xmm0
dc: · c3 · retq
00000000000000e0 <cvt5()>:
e0: · c5 f8 28 05 00 00 00 00 · vmovaps 0x0(%rip),%xmm0 # e8 \
<cvt5()+0x8> · e4: R_X86_64_PC32 · .LCPI10_0-0x4 e8: · c3 \
· retq
00000000000000f0 <cvt5(float __vector(4))>:
f0: · c5 f9 70 c8 01 · vpshufd $0x1,%xmm0,%xmm1
f5: · c5 fa 2c c1 · vcvttss2si %xmm1,%eax
f9: · c5 fa 2c c8 · vcvttss2si %xmm0,%ecx
fd: · c5 f9 6e c9 · vmovd %ecx,%xmm1
101: · c4 e3 71 22 c8 01 · vpinsrd $0x1,%eax,%xmm1,%xmm1
107: · c5 f8 12 d0 · vmovhlps %xmm0,%xmm0,%xmm2
10b: · c5 fa 2c c2 · vcvttss2si %xmm2,%eax
10f: · c4 e3 71 22 c8 02 · vpinsrd $0x2,%eax,%xmm1,%xmm1
115: · c5 f9 70 c0 03 · vpshufd $0x3,%xmm0,%xmm0
11a: · c5 fa 2c c0 · vcvttss2si %xmm0,%eax
11e: · c4 e3 71 22 c0 03 · vpinsrd $0x3,%eax,%xmm1,%xmm0
124: · c3 · retq
http://llvm-reviews.chandlerc.com/D1793
_______________________________________________
cfe-commits mailing list
cfe-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic