Skip to content

Commit 4298d06

Browse files
committedMay 25, 2016
[X86][SSE] Replace (V)CVTDQ2PD(Y) and (V)CVTPS2PD(Y) lossless conversion intrinsics with generic IR
Followup to D20528 clang patch, this removes the (V)CVTDQ2PD(Y) and (V)CVTPS2PD(Y) llvm intrinsics and auto-upgrades to sitofp/fpext instead. Differential Revision: http://reviews.llvm.org/D20568 llvm-svn: 270678
1 parent 12e322a commit 4298d06

File tree

8 files changed

+170
-143
lines changed

8 files changed

+170
-143
lines changed
 

‎llvm/include/llvm/IR/IntrinsicsX86.td

-8
Original file line numberDiff line numberDiff line change
@@ -485,8 +485,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
485485

486486
// Conversion ops
487487
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
488-
def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
489-
Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
490488
def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
491489
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
492490
def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
@@ -499,8 +497,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
499497
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
500498
def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
501499
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
502-
def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">,
503-
Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
504500
def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
505501
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
506502
def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
@@ -1762,16 +1758,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
17621758

17631759
// Vector convert
17641760
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1765-
def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
1766-
Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
17671761
def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
17681762
Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
17691763
def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
17701764
Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
17711765
def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
17721766
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
1773-
def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">,
1774-
Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
17751767
def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
17761768
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
17771769
def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,

‎llvm/lib/IR/AutoUpgrade.cpp

+27
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
178178
Name.startswith("x86.avx2.pbroadcast") ||
179179
Name.startswith("x86.avx.vpermil.") ||
180180
Name.startswith("x86.sse41.pmovsx") ||
181+
Name == "x86.sse2.cvtdq2pd" ||
182+
Name == "x86.sse2.cvtps2pd" ||
183+
Name == "x86.avx.cvtdq2.pd.256" ||
184+
Name == "x86.avx.cvt.ps2.pd.256" ||
181185
Name == "x86.avx.vinsertf128.pd.256" ||
182186
Name == "x86.avx.vinsertf128.ps.256" ||
183187
Name == "x86.avx.vinsertf128.si.256" ||
@@ -397,6 +401,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
397401
"pcmpgt");
398402
// need to sign extend since icmp returns vector of i1
399403
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
404+
} else if (Name == "llvm.x86.sse2.cvtdq2pd" ||
405+
Name == "llvm.x86.sse2.cvtps2pd" ||
406+
Name == "llvm.x86.avx.cvtdq2.pd.256" ||
407+
Name == "llvm.x86.avx.cvt.ps2.pd.256") {
408+
// Lossless i32/float to double conversion.
409+
// Extract the bottom elements if necessary and convert to double vector.
410+
Value *Src = CI->getArgOperand(0);
411+
VectorType *SrcTy = cast<VectorType>(Src->getType());
412+
VectorType *DstTy = cast<VectorType>(CI->getType());
413+
Rep = CI->getArgOperand(0);
414+
415+
unsigned NumDstElts = DstTy->getNumElements();
416+
if (NumDstElts < SrcTy->getNumElements()) {
417+
assert(NumDstElts == 2 && "Unexpected vector size");
418+
const int ShuffleMask[2] = { 0, 1 };
419+
Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), ShuffleMask);
420+
}
421+
422+
bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
423+
if (Int2Double)
424+
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
425+
else
426+
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
400427
} else if (Name == "llvm.x86.avx.movnt.dq.256" ||
401428
Name == "llvm.x86.avx.movnt.ps.256" ||
402429
Name == "llvm.x86.avx.movnt.pd.256") {

‎llvm/lib/Target/X86/X86InstrSSE.td

+15-29
Original file line numberDiff line numberDiff line change
@@ -2163,30 +2163,24 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
21632163
let Predicates = [HasAVX] in {
21642164
// SSE2 instructions without OpSize prefix
21652165
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2166-
"vcvtps2pd\t{$src, $dst|$dst, $src}",
2167-
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
2168-
IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
2166+
"vcvtps2pd\t{$src, $dst|$dst, $src}",
2167+
[], IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
21692168
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
21702169
"vcvtps2pd\t{$src, $dst|$dst, $src}",
21712170
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
21722171
IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>;
21732172
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
21742173
"vcvtps2pd\t{$src, $dst|$dst, $src}",
2175-
[(set VR256:$dst,
2176-
(int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
2177-
IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
2174+
[], IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
21782175
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
21792176
"vcvtps2pd\t{$src, $dst|$dst, $src}",
2180-
[(set VR256:$dst,
2181-
(int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))],
2182-
IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
2177+
[], IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
21832178
}
21842179

21852180
let Predicates = [UseSSE2] in {
21862181
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2187-
"cvtps2pd\t{$src, $dst|$dst, $src}",
2188-
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
2189-
IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
2182+
"cvtps2pd\t{$src, $dst|$dst, $src}",
2183+
[], IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
21902184
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
21912185
"cvtps2pd\t{$src, $dst|$dst, $src}",
21922186
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
@@ -2197,33 +2191,25 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
21972191
let Predicates = [HasAVX] in {
21982192
let hasSideEffects = 0, mayLoad = 1 in
21992193
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
2200-
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
2201-
[]>, VEX, Sched<[WriteCvtI2FLd]>;
2194+
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
2195+
[]>, VEX, Sched<[WriteCvtI2FLd]>;
22022196
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2203-
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
2204-
[(set VR128:$dst,
2205-
(int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
2206-
Sched<[WriteCvtI2F]>;
2197+
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
2198+
[]>, VEX, Sched<[WriteCvtI2F]>;
22072199
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
2208-
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
2209-
[(set VR256:$dst,
2210-
(int_x86_avx_cvtdq2_pd_256
2211-
(bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L,
2212-
Sched<[WriteCvtI2FLd]>;
2200+
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
2201+
[]>, VEX, VEX_L, Sched<[WriteCvtI2FLd]>;
22132202
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
2214-
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
2215-
[(set VR256:$dst,
2216-
(int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
2217-
Sched<[WriteCvtI2F]>;
2203+
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
2204+
[]>, VEX, VEX_L, Sched<[WriteCvtI2F]>;
22182205
}
22192206

22202207
let hasSideEffects = 0, mayLoad = 1 in
22212208
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
22222209
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
22232210
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
22242211
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2225-
"cvtdq2pd\t{$src, $dst|$dst, $src}",
2226-
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
2212+
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
22272213
IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
22282214

22292215
// AVX register conversion intrinsics

‎llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -2347,8 +2347,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
23472347
case llvm::Intrinsic::x86_sse_cvttss2si:
23482348
handleVectorConvertIntrinsic(I, 1);
23492349
break;
2350-
case llvm::Intrinsic::x86_sse2_cvtdq2pd:
2351-
case llvm::Intrinsic::x86_sse2_cvtps2pd:
23522350
case llvm::Intrinsic::x86_sse_cvtps2pi:
23532351
case llvm::Intrinsic::x86_sse_cvttps2pi:
23542352
handleVectorConvertIntrinsic(I, 2);

‎llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll

+80
Original file line numberDiff line numberDiff line change
@@ -245,3 +245,83 @@ define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
245245
ret <2 x i64> %res
246246
}
247247
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
248+
249+
250+
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
251+
; AVX-LABEL: test_x86_sse2_cvtdq2pd:
252+
; AVX: ## BB#0:
253+
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
254+
; AVX-NEXT: retl
255+
;
256+
; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
257+
; AVX512VL: ## BB#0:
258+
; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
259+
; AVX512VL-NEXT: retl
260+
; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
261+
; CHECK: ## BB#0:
262+
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
263+
; CHECK-NEXT: retl
264+
%res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
265+
ret <2 x double> %res
266+
}
267+
declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
268+
269+
270+
define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
271+
; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
272+
; AVX: ## BB#0:
273+
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
274+
; AVX-NEXT: retl
275+
;
276+
; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
277+
; AVX512VL: ## BB#0:
278+
; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0
279+
; AVX512VL-NEXT: retl
280+
; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
281+
; CHECK: ## BB#0:
282+
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
283+
; CHECK-NEXT: retl
284+
%res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
285+
ret <4 x double> %res
286+
}
287+
declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
288+
289+
290+
define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
291+
; AVX-LABEL: test_x86_sse2_cvtps2pd:
292+
; AVX: ## BB#0:
293+
; AVX-NEXT: vcvtps2pd %xmm0, %xmm0
294+
; AVX-NEXT: retl
295+
;
296+
; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
297+
; AVX512VL: ## BB#0:
298+
; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0
299+
; AVX512VL-NEXT: retl
300+
; CHECK-LABEL: test_x86_sse2_cvtps2pd:
301+
; CHECK: ## BB#0:
302+
; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
303+
; CHECK-NEXT: retl
304+
%res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
305+
ret <2 x double> %res
306+
}
307+
declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
308+
309+
310+
define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
311+
; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
312+
; AVX: ## BB#0:
313+
; AVX-NEXT: vcvtps2pd %xmm0, %ymm0
314+
; AVX-NEXT: retl
315+
;
316+
; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
317+
; AVX512VL: ## BB#0:
318+
; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0
319+
; AVX512VL-NEXT: retl
320+
; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
321+
; CHECK: ## BB#0:
322+
; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
323+
; CHECK-NEXT: retl
324+
%res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
325+
ret <4 x double> %res
326+
}
327+
declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone

‎llvm/test/CodeGen/X86/avx-intrinsics-x86.ll

+6-70
Original file line numberDiff line numberDiff line change
@@ -274,22 +274,6 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
274274
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
275275

276276

277-
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
278-
; AVX-LABEL: test_x86_sse2_cvtdq2pd:
279-
; AVX: ## BB#0:
280-
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
281-
; AVX-NEXT: retl
282-
;
283-
; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
284-
; AVX512VL: ## BB#0:
285-
; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
286-
; AVX512VL-NEXT: retl
287-
%res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
288-
ret <2 x double> %res
289-
}
290-
declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
291-
292-
293277
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
294278
; AVX-LABEL: test_x86_sse2_cvtdq2ps:
295279
; AVX: ## BB#0:
@@ -354,22 +338,6 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
354338
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
355339

356340

357-
define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
358-
; AVX-LABEL: test_x86_sse2_cvtps2pd:
359-
; AVX: ## BB#0:
360-
; AVX-NEXT: vcvtps2pd %xmm0, %xmm0
361-
; AVX-NEXT: retl
362-
;
363-
; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
364-
; AVX512VL: ## BB#0:
365-
; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0
366-
; AVX512VL-NEXT: retl
367-
%res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
368-
ret <2 x double> %res
369-
}
370-
declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
371-
372-
373341
define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
374342
; AVX-LABEL: test_x86_sse2_cvtsd2si:
375343
; AVX: ## BB#0:
@@ -1258,14 +1226,14 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
12581226
; AVX-LABEL: test_x86_sse2_storeu_dq:
12591227
; AVX: ## BB#0:
12601228
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
1261-
; AVX-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0
1229+
; AVX-NEXT: vpaddb LCPI74_0, %xmm0, %xmm0
12621230
; AVX-NEXT: vmovdqu %xmm0, (%eax)
12631231
; AVX-NEXT: retl
12641232
;
12651233
; AVX512VL-LABEL: test_x86_sse2_storeu_dq:
12661234
; AVX512VL: ## BB#0:
12671235
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
1268-
; AVX512VL-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0
1236+
; AVX512VL-NEXT: vpaddb LCPI74_0, %xmm0, %xmm0
12691237
; AVX512VL-NEXT: vmovdqu %xmm0, (%eax)
12701238
; AVX512VL-NEXT: retl
12711239
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -3569,22 +3537,6 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
35693537
declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
35703538

35713539

3572-
define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
3573-
; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
3574-
; AVX: ## BB#0:
3575-
; AVX-NEXT: vcvtps2pd %xmm0, %ymm0
3576-
; AVX-NEXT: retl
3577-
;
3578-
; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
3579-
; AVX512VL: ## BB#0:
3580-
; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0
3581-
; AVX512VL-NEXT: retl
3582-
%res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
3583-
ret <4 x double> %res
3584-
}
3585-
declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
3586-
3587-
35883540
define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
35893541
; AVX-LABEL: test_x86_avx_cvt_ps2dq_256:
35903542
; AVX: ## BB#0:
@@ -3601,22 +3553,6 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
36013553
declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
36023554

36033555

3604-
define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
3605-
; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
3606-
; AVX: ## BB#0:
3607-
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3608-
; AVX-NEXT: retl
3609-
;
3610-
; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
3611-
; AVX512VL: ## BB#0:
3612-
; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0
3613-
; AVX512VL-NEXT: retl
3614-
%res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
3615-
ret <4 x double> %res
3616-
}
3617-
declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
3618-
3619-
36203556
define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
36213557
; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
36223558
; AVX: ## BB#0:
@@ -4190,7 +4126,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
41904126
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
41914127
; AVX512VL: ## BB#0:
41924128
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
4193-
; AVX512VL-NEXT: vpaddb LCPI235_0, %ymm0, %ymm0
4129+
; AVX512VL-NEXT: vpaddb LCPI231_0, %ymm0, %ymm0
41944130
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
41954131
; AVX512VL-NEXT: retl
41964132
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -4431,7 +4367,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
44314367
;
44324368
; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
44334369
; AVX512VL: ## BB#0:
4434-
; AVX512VL-NEXT: vpermilpd LCPI249_0, %ymm0, %ymm0
4370+
; AVX512VL-NEXT: vpermilpd LCPI245_0, %ymm0, %ymm0
44354371
; AVX512VL-NEXT: retl
44364372
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
44374373
ret <4 x double> %res
@@ -4923,15 +4859,15 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
49234859
; AVX-LABEL: movnt_dq:
49244860
; AVX: ## BB#0:
49254861
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
4926-
; AVX-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
4862+
; AVX-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
49274863
; AVX-NEXT: vmovntdq %ymm0, (%eax)
49284864
; AVX-NEXT: vzeroupper
49294865
; AVX-NEXT: retl
49304866
;
49314867
; AVX512VL-LABEL: movnt_dq:
49324868
; AVX512VL: ## BB#0:
49334869
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
4934-
; AVX512VL-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
4870+
; AVX512VL-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
49354871
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
49364872
; AVX512VL-NEXT: retl
49374873
%a2 = add <2 x i64> %a1, <i64 1, i64 1>

‎llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll

+40
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,46 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
4444
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
4545

4646

47+
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
48+
; SSE-LABEL: test_x86_sse2_cvtdq2pd:
49+
; SSE: ## BB#0:
50+
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
51+
; SSE-NEXT: retl
52+
;
53+
; KNL-LABEL: test_x86_sse2_cvtdq2pd:
54+
; KNL: ## BB#0:
55+
; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0
56+
; KNL-NEXT: retl
57+
; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
58+
; CHECK: ## BB#0:
59+
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
60+
; CHECK-NEXT: retl
61+
%res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
62+
ret <2 x double> %res
63+
}
64+
declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
65+
66+
67+
define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
68+
; SSE-LABEL: test_x86_sse2_cvtps2pd:
69+
; SSE: ## BB#0:
70+
; SSE-NEXT: cvtps2pd %xmm0, %xmm0
71+
; SSE-NEXT: retl
72+
;
73+
; KNL-LABEL: test_x86_sse2_cvtps2pd:
74+
; KNL: ## BB#0:
75+
; KNL-NEXT: vcvtps2pd %xmm0, %xmm0
76+
; KNL-NEXT: retl
77+
; CHECK-LABEL: test_x86_sse2_cvtps2pd:
78+
; CHECK: ## BB#0:
79+
; CHECK-NEXT: cvtps2pd %xmm0, %xmm0
80+
; CHECK-NEXT: retl
81+
%res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
82+
ret <2 x double> %res
83+
}
84+
declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
85+
86+
4787
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
4888
; CHECK-LABEL: test_x86_sse2_storel_dq:
4989
; CHECK: ## BB#0:

‎llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll

+2-34
Original file line numberDiff line numberDiff line change
@@ -178,22 +178,6 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
178178
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
179179

180180

181-
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
182-
; SSE-LABEL: test_x86_sse2_cvtdq2pd:
183-
; SSE: ## BB#0:
184-
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
185-
; SSE-NEXT: retl
186-
;
187-
; KNL-LABEL: test_x86_sse2_cvtdq2pd:
188-
; KNL: ## BB#0:
189-
; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0
190-
; KNL-NEXT: retl
191-
%res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
192-
ret <2 x double> %res
193-
}
194-
declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
195-
196-
197181
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
198182
; SSE-LABEL: test_x86_sse2_cvtdq2ps:
199183
; SSE: ## BB#0:
@@ -258,22 +242,6 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
258242
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
259243

260244

261-
define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
262-
; SSE-LABEL: test_x86_sse2_cvtps2pd:
263-
; SSE: ## BB#0:
264-
; SSE-NEXT: cvtps2pd %xmm0, %xmm0
265-
; SSE-NEXT: retl
266-
;
267-
; KNL-LABEL: test_x86_sse2_cvtps2pd:
268-
; KNL: ## BB#0:
269-
; KNL-NEXT: vcvtps2pd %xmm0, %xmm0
270-
; KNL-NEXT: retl
271-
%res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
272-
ret <2 x double> %res
273-
}
274-
declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
275-
276-
277245
define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
278246
; SSE-LABEL: test_x86_sse2_cvtsd2si:
279247
; SSE: ## BB#0:
@@ -1162,14 +1130,14 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
11621130
; SSE-LABEL: test_x86_sse2_storeu_dq:
11631131
; SSE: ## BB#0:
11641132
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
1165-
; SSE-NEXT: paddb LCPI70_0, %xmm0
1133+
; SSE-NEXT: paddb LCPI68_0, %xmm0
11661134
; SSE-NEXT: movdqu %xmm0, (%eax)
11671135
; SSE-NEXT: retl
11681136
;
11691137
; KNL-LABEL: test_x86_sse2_storeu_dq:
11701138
; KNL: ## BB#0:
11711139
; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
1172-
; KNL-NEXT: vpaddb LCPI70_0, %xmm0, %xmm0
1140+
; KNL-NEXT: vpaddb LCPI68_0, %xmm0, %xmm0
11731141
; KNL-NEXT: vmovdqu %xmm0, (%eax)
11741142
; KNL-NEXT: retl
11751143
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>

0 commit comments

Comments
 (0)
Please sign in to comment.