Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -145,18 +145,6 @@ // Arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse_add_ss : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_sub_ss : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_mul_ss : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_sse_div_ss : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; @@ -287,18 +275,6 @@ // FP arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_add_sd : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_sub_sd : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_mul_sd : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_div_sd : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; Index: llvm/trunk/lib/IR/AutoUpgrade.cpp =================================================================== --- llvm/trunk/lib/IR/AutoUpgrade.cpp +++ llvm/trunk/lib/IR/AutoUpgrade.cpp @@ -238,6 +238,14 @@ Name.startswith("avx2.pcmpgt.") || // Added in 3.1 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9 + Name == "sse.add.ss" || // Added in 4.0 + Name == "sse2.add.sd" || // Added in 4.0 + Name == "sse.sub.ss" || // Added in 4.0 + Name == "sse2.sub.sd" || // Added in 4.0 + Name == "sse.mul.ss" || // Added in 4.0 + Name == "sse2.mul.sd" || // Added in 4.0 + Name == "sse.div.ss" || // Added in 4.0 + Name == "sse2.div.sd" || // Added in 4.0 Name == "sse41.pmaxsb" || // Added in 3.9 Name == "sse2.pmaxs.w" || // Added in 3.9 Name == "sse41.pmaxsd" || // Added in 3.9 @@ -732,6 +740,42 @@ Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), "pcmpgt"); Rep = Builder.CreateSExt(Rep, CI->getType(), ""); + } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) { + Type *I32Ty = Type::getInt32Ty(C); + Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), + ConstantInt::get(I32Ty, 0)); + Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), + ConstantInt::get(I32Ty, 0)); + Rep = Builder.CreateInsertElement(CI->getArgOperand(0), + Builder.CreateFAdd(Elt0, Elt1), + ConstantInt::get(I32Ty, 0)); + } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) { + Type *I32Ty = Type::getInt32Ty(C); + Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), + ConstantInt::get(I32Ty, 0)); + Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), + ConstantInt::get(I32Ty, 0)); + Rep = Builder.CreateInsertElement(CI->getArgOperand(0), + Builder.CreateFSub(Elt0, Elt1), + ConstantInt::get(I32Ty, 0)); + } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) { + Type *I32Ty = Type::getInt32Ty(C); + Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), + ConstantInt::get(I32Ty, 0)); + Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), + ConstantInt::get(I32Ty, 0)); + Rep = Builder.CreateInsertElement(CI->getArgOperand(0), + Builder.CreateFMul(Elt0, Elt1), + ConstantInt::get(I32Ty, 0)); + } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) { + Type *I32Ty = Type::getInt32Ty(C); + Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), + ConstantInt::get(I32Ty, 0)); + Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), + ConstantInt::get(I32Ty, 0)); + Rep = Builder.CreateInsertElement(CI->getArgOperand(0), + Builder.CreateFDiv(Elt0, Elt1), + ConstantInt::get(I32Ty, 0)); } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) { Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ); } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) { Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -259,26 +259,24 @@ } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class -multiclass sse12_fp_scalar_int opc, string OpcodeStr, RegisterClass RC, - string asm, string SSEVer, string FPSizeStr, - Operand memopr, ComplexPattern mem_cpat, - Domain d, OpndItins itins, bit Is2Addr = 1> { -let isCodeGenOnly = 1 in { +multiclass sse12_fp_scalar_int opc, string OpcodeStr, + SDPatternOperator Int, RegisterClass RC, + string asm, Operand memopr, + ComplexPattern mem_cpat, Domain d, + OpndItins itins, bit Is2Addr = 1> { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr_Int : SI_Int( - !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], itins.rr, d>, + [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr, d>, Sched<[itins.Sched]>; + let mayLoad = 1 in def rm_Int : SI_Int(!strconcat("int_x86_sse", - SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))], itins.rm, d>, + [(set RC:$dst, (Int RC:$src1, mem_cpat:$src2))], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3064,20 +3062,22 @@ } multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, + SDPatternOperator IntSS, + SDPatternOperator IntSD, SizeItins itins> { - defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG; - defm V#NAME#SD : sse12_fp_scalar_int, XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { - defm SS : sse12_fp_scalar_int, XS; - defm SD : sse12_fp_scalar_int, XD; } } @@ -3085,23 +3085,29 @@ // Binary Arithmetic instructions defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; + basic_sse12_fp_binop_s_int<0x58, "add", null_frag, null_frag, + SSE_ALU_ITINS_S>; defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, - basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; + basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, null_frag, + SSE_MUL_ITINS_S>; let isCommutable = 0 in { defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; + basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, null_frag, + SSE_ALU_ITINS_S>; defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; + basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, null_frag, + SSE_DIV_ITINS_S>; defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; + basic_sse12_fp_binop_s_int<0x5F, "max", int_x86_sse_max_ss, + int_x86_sse2_max_sd, SSE_ALU_ITINS_S>; defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; + basic_sse12_fp_binop_s_int<0x5D, "min", int_x86_sse_min_ss, + int_x86_sse2_min_sd, SSE_ALU_ITINS_S>; } let isCodeGenOnly = 1 in { Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1751,17 +1751,9 @@ break; } - case Intrinsic::x86_sse_add_ss: - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse_div_ss: case Intrinsic::x86_sse_min_ss: case Intrinsic::x86_sse_max_ss: case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse2_add_sd: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - case Intrinsic::x86_sse2_div_sd: case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: case Intrinsic::x86_sse2_cmp_sd: { Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1274,17 +1274,9 @@ // Binary scalar-as-vector operations that work column-wise. A dest element // is a function of the corresponding input elements from the two inputs. - case Intrinsic::x86_sse_add_ss: - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse_div_ss: case Intrinsic::x86_sse_min_ss: case Intrinsic::x86_sse_max_ss: case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse2_add_sd: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - case Intrinsic::x86_sse2_div_sd: case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: case Intrinsic::x86_sse2_cmp_sd: @@ -1297,62 +1289,6 @@ UndefElts2, Depth + 1); if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } - // If only the low elt is demanded and this is a scalarizable intrinsic, - // scalarize it now. - if (DemandedElts == 1) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::x86_sse_add_ss: - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse_div_ss: - case Intrinsic::x86_sse2_add_sd: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - case Intrinsic::x86_sse2_div_sd: - // TODO: Lower MIN/MAX/etc. - Value *LHS = II->getArgOperand(0); - Value *RHS = II->getArgOperand(1); - // Extract the element as scalars. - LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, - ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); - RHS = InsertNewInstWith(ExtractElementInst::Create(RHS, - ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); - - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Case stmts out of sync!"); - case Intrinsic::x86_sse_add_ss: - case Intrinsic::x86_sse2_add_sd: - TmpV = InsertNewInstWith(BinaryOperator::CreateFAdd(LHS, RHS, - II->getName()), *II); - break; - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse2_sub_sd: - TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS, - II->getName()), *II); - break; - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse2_mul_sd: - TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS, - II->getName()), *II); - break; - case Intrinsic::x86_sse_div_ss: - case Intrinsic::x86_sse2_div_sd: - TmpV = InsertNewInstWith(BinaryOperator::CreateFDiv(LHS, RHS, - II->getName()), *II); - break; - } - - Instruction *New = - InsertElementInst::Create( - UndefValue::get(II->getType()), TmpV, - ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false), - II->getName()); - InsertNewInstWith(New, *II); - return New; - } - } - // If lowest element of a scalar op isn't used then use Arg0. if (DemandedElts.getLoBits(1) != 1) return II->getArgOperand(0); Index: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -68,17 +68,6 @@ declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone -define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_sse2_add_sd: -; CHECK: ## BB#0: -; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone - - define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_sse2_cmp_pd: ; CHECK: ## BB#0: @@ -390,17 +379,6 @@ declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone -define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_sse2_div_sd: -; CHECK: ## BB#0: -; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone - - define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_max_pd: @@ -469,17 +447,6 @@ -define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_sse2_mul_sd: -; CHECK: ## BB#0: -; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone - - define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; AVX-LABEL: test_x86_sse2_packssdw_128: ; AVX: ## BB#0: @@ -1121,17 +1088,6 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone -define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test_x86_sse2_sub_sd: -; CHECK: ## BB#0: -; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone - - define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_ucomieq_sd: ; AVX: ## BB#0: @@ -1959,17 +1915,6 @@ } -define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: test_x86_sse_add_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone - - define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: test_x86_sse_cmp_ps: ; CHECK: ## BB#0: @@ -2170,17 +2115,6 @@ declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone -define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: test_x86_sse_div_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone - - define void @test_x86_sse_ldmxcsr(i8* %a0) { ; CHECK-LABEL: test_x86_sse_ldmxcsr: ; CHECK: ## BB#0: @@ -2260,17 +2194,6 @@ -define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: test_x86_sse_mul_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone - - define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { ; AVX-LABEL: test_x86_sse_rcp_ps: ; AVX: ## BB#0: @@ -2359,17 +2282,6 @@ declare void @llvm.x86.sse.stmxcsr(i8*) nounwind -define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: test_x86_sse_sub_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1] -; CHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone - - define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_ucomieq_ss: ; AVX: ## BB#0: @@ -3805,8 +3717,8 @@ ; AVX-LABEL: movnt_dq: ; AVX: ## BB#0: ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX-NEXT: vpaddq LCPI254_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd4,0x05,A,A,A,A] -; AVX-NEXT: ## fixup A - offset: 4, value: LCPI254_0, kind: FK_Data_4 +; AVX-NEXT: vpaddq LCPI246_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd4,0x05,A,A,A,A] +; AVX-NEXT: ## fixup A - offset: 4, value: LCPI246_0, kind: FK_Data_4 ; AVX-NEXT: vmovntdq %ymm0, (%eax) ## encoding: [0xc5,0xfd,0xe7,0x00] ; AVX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; AVX-NEXT: retl ## encoding: [0xc3] @@ -3814,8 +3726,8 @@ ; AVX512VL-LABEL: movnt_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vpaddq LCPI254_0, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI254_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpaddq LCPI246_0, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI246_0, kind: FK_Data_4 ; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ## encoding: [0x62,0xf1,0x7d,0x28,0xe7,0x00] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a2 = add <2 x i64> %a1, Index: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s @@ -25,3 +24,103 @@ declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind +define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: test_x86_sse_add_ss: +; SSE: ## BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse_add_ss: +; AVX2: ## BB#0: +; AVX2-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse_add_ss: +; SKX: ## BB#0: +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x58,0xc1] +; SKX-NEXT: retl ## encoding: [0xc3] +; CHECK-LABEL: test_x86_sse_add_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: test_x86_sse_sub_ss: +; SSE: ## BB#0: +; SSE-NEXT: subss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5c,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse_sub_ss: +; AVX2: ## BB#0: +; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse_sub_ss: +; SKX: ## BB#0: +; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5c,0xc1] +; SKX-NEXT: retl ## encoding: [0xc3] +; CHECK-LABEL: test_x86_sse_sub_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: test_x86_sse_mul_ss: +; SSE: ## BB#0: +; SSE-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse_mul_ss: +; AVX2: ## BB#0: +; AVX2-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse_mul_ss: +; SKX: ## BB#0: +; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x59,0xc1] +; SKX-NEXT: retl ## encoding: [0xc3] +; CHECK-LABEL: test_x86_sse_mul_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: mulss %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { +; SSE-LABEL: test_x86_sse_div_ss: +; SSE: ## BB#0: +; SSE-NEXT: divss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5e,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse_div_ss: +; AVX2: ## BB#0: +; AVX2-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse_div_ss: +; SKX: ## BB#0: +; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5e,0xc1] +; SKX-NEXT: retl ## encoding: [0xc3] +; CHECK-LABEL: test_x86_sse_div_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: divss %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone + + Index: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll @@ -4,22 +4,6 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX -define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { -; SSE-LABEL: test_x86_sse_add_ss: -; SSE: ## BB#0: -; SSE-NEXT: addss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse_add_ss: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone - - define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_cmp_ps: ; SSE: ## BB#0: @@ -292,22 +276,6 @@ declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone -define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { -; SSE-LABEL: test_x86_sse_div_ss: -; SSE: ## BB#0: -; SSE-NEXT: divss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5e,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse_div_ss: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone - - define void @test_x86_sse_ldmxcsr(i8* %a0) { ; SSE-LABEL: test_x86_sse_ldmxcsr: ; SSE: ## BB#0: @@ -418,22 +386,6 @@ -define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { -; SSE-LABEL: test_x86_sse_mul_ss: -; SSE: ## BB#0: -; SSE-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse_mul_ss: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone - - define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { ; SSE-LABEL: test_x86_sse_rcp_ps: ; SSE: ## BB#0: @@ -558,22 +510,6 @@ declare void @llvm.x86.sse.stmxcsr(i8*) nounwind -define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { -; SSE-LABEL: test_x86_sse_sub_ss: -; SSE: ## BB#0: -; SSE-NEXT: subss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5c,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse_sub_ss: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone - - define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_ucomieq_ss: ; SSE: ## BB#0: Index: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -186,3 +186,104 @@ } declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone +define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { +; SSE-LABEL: test_x86_sse2_add_sd: +; SSE: ## BB#0: +; SSE-NEXT: addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse2_add_sd: +; AVX2: ## BB#0: +; AVX2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_add_sd: +; SKX: ## BB#0: +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x58,0xc1] +; SKX-NEXT: retl ## encoding: [0xc3] +; CHECK-LABEL: test_x86_sse2_add_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { +; SSE-LABEL: test_x86_sse2_sub_sd: +; SSE: ## BB#0: +; SSE-NEXT: subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse2_sub_sd: +; AVX2: ## BB#0: +; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_sub_sd: +; SKX: ## BB#0: +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x5c,0xc1] +; SKX-NEXT: retl ## encoding: [0xc3] +; CHECK-LABEL: test_x86_sse2_sub_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { +; SSE-LABEL: test_x86_sse2_mul_sd: +; SSE: ## BB#0: +; SSE-NEXT: mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse2_mul_sd: +; AVX2: ## BB#0: +; AVX2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_mul_sd: +; SKX: ## BB#0: +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x59,0xc1] +; SKX-NEXT: retl ## encoding: [0xc3] +; CHECK-LABEL: test_x86_sse2_mul_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: mulsd %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { +; SSE-LABEL: test_x86_sse2_div_sd: +; SSE: ## BB#0: +; SSE-NEXT: divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1] +; SSE-NEXT: retl ## encoding: [0xc3] +; +; AVX2-LABEL: test_x86_sse2_div_sd: +; AVX2: ## BB#0: +; AVX2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_div_sd: +; SKX: ## BB#0: +; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x5e,0xc1] +; SKX-NEXT: retl ## encoding: [0xc3] +; CHECK-LABEL: test_x86_sse2_div_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: divsd %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone + + + Index: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -3,22 +3,6 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX -define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { -; SSE-LABEL: test_x86_sse2_add_sd: -; SSE: ## BB#0: -; SSE-NEXT: addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse2_add_sd: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone - - define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_cmp_pd: ; SSE: ## BB#0: @@ -584,23 +568,6 @@ declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone -define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { -; SSE-LABEL: test_x86_sse2_div_sd: -; SSE: ## BB#0: -; SSE-NEXT: divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse2_div_sd: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone - - - define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_max_pd: ; SSE: ## BB#0: @@ -693,22 +660,6 @@ -define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { -; SSE-LABEL: test_x86_sse2_mul_sd: -; SSE: ## BB#0: -; SSE-NEXT: mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse2_mul_sd: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone - - define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: test_x86_sse2_packssdw_128: ; SSE: ## BB#0: @@ -1555,22 +1506,6 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone -define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { -; SSE-LABEL: test_x86_sse2_sub_sd: -; SSE: ## BB#0: -; SSE-NEXT: subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse2_sub_sd: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone - - define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_ucomieq_sd: ; SSE: ## BB#0: Index: llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll +++ llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll @@ -10,9 +10,10 @@ ; X32-LABEL: test1: ; X32: ## BB#0: ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: xorps %xmm1, %xmm1 -; X32-NEXT: subss LCPI0_0, %xmm0 +; X32-NEXT: addss LCPI0_0, %xmm0 ; X32-NEXT: mulss LCPI0_1, %xmm0 +; X32-NEXT: xorps %xmm1, %xmm1 +; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X32-NEXT: minss LCPI0_2, %xmm0 ; X32-NEXT: maxss %xmm1, %xmm0 ; X32-NEXT: cvttss2si %xmm0, %eax @@ -21,46 +22,60 @@ ; ; X64-LABEL: test1: ; X64: ## BB#0: +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm0 ; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; X64-NEXT: subss {{.*}}(%rip), %xmm0 -; X64-NEXT: mulss {{.*}}(%rip), %xmm0 ; X64-NEXT: minss {{.*}}(%rip), %xmm0 ; X64-NEXT: maxss %xmm1, %xmm0 ; X64-NEXT: cvttss2si %xmm0, %eax ; X64-NEXT: ## kill: %AX %AX %EAX ; X64-NEXT: retq ; -; X32_AVX-LABEL: test1: -; X32_AVX: ## BB#0: -; X32_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32_AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X32_AVX-NEXT: vsubss LCPI0_0, %xmm0, %xmm0 -; X32_AVX-NEXT: vmulss LCPI0_1, %xmm0, %xmm0 -; X32_AVX-NEXT: vminss LCPI0_2, %xmm0, %xmm0 -; X32_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 -; X32_AVX-NEXT: vcvttss2si %xmm0, %eax -; X32_AVX-NEXT: ## kill: %AX %AX %EAX -; X32_AVX-NEXT: retl +; X32_AVX1-LABEL: test1: +; X32_AVX1: ## BB#0: +; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32_AVX1-NEXT: vaddss LCPI0_0, %xmm0, %xmm0 +; X32_AVX1-NEXT: vmulss LCPI0_1, %xmm0, %xmm0 +; X32_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; X32_AVX1-NEXT: vminss LCPI0_2, %xmm0, %xmm0 +; X32_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; X32_AVX1-NEXT: vcvttss2si %xmm0, %eax +; X32_AVX1-NEXT: ## kill: %AX %AX %EAX +; X32_AVX1-NEXT: retl ; ; X64_AVX1-LABEL: test1: ; X64_AVX1: ## BB#0: ; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; X64_AVX1-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX1-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX1-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X64_AVX1-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; X64_AVX1-NEXT: vcvttss2si %xmm0, %eax ; X64_AVX1-NEXT: ## kill: %AX %AX %EAX ; X64_AVX1-NEXT: retq ; +; X32_AVX512-LABEL: test1: +; X32_AVX512: ## BB#0: +; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32_AVX512-NEXT: vaddss LCPI0_0, %xmm0, %xmm0 +; X32_AVX512-NEXT: vmulss LCPI0_1, %xmm0, %xmm0 +; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; X32_AVX512-NEXT: vminss LCPI0_2, %xmm0, %xmm0 +; X32_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; X32_AVX512-NEXT: vcvttss2si %xmm0, %eax +; X32_AVX512-NEXT: ## kill: %AX %AX %EAX +; X32_AVX512-NEXT: retl +; ; X64_AVX512-LABEL: test1: ; X64_AVX512: ## BB#0: ; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; X64_AVX512-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X64_AVX512-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; X64_AVX512-NEXT: vcvttss2si %xmm0, %eax Index: llvm/trunk/test/Transforms/InstCombine/x86-sse.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/x86-sse.ll +++ llvm/trunk/test/Transforms/InstCombine/x86-sse.ll @@ -89,18 +89,6 @@ ret float %6 } -define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @test_add_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %b) -; CHECK-NEXT: ret <4 x float> [[TMP1]] -; - %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 - %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 - %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 - %4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3) - ret <4 x float> %4 -} - define float @test_add_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_add_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = fadd float %a, %b @@ -133,18 +121,6 @@ ret float %7 } -define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @test_sub_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %b) -; CHECK-NEXT: ret <4 x float> [[TMP1]] -; - %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 - %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 - %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 - %4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3) - ret <4 x float> %4 -} - define float @test_sub_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_sub_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = fsub float %a, %b @@ -177,18 +153,6 @@ ret float %7 } -define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @test_mul_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %b) -; CHECK-NEXT: ret <4 x float> [[TMP1]] -; - %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 - %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 - %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 - %4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3) - ret <4 x float> %4 -} - define float @test_mul_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_mul_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = fmul float %a, %b @@ -221,18 +185,6 @@ ret float %7 } -define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @test_div_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %b) -; CHECK-NEXT: ret <4 x float> [[TMP1]] -; - %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 - %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 - %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 - %4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3) - ret <4 x float> %4 -} - define float @test_div_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_div_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = fdiv float %a, %b Index: llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll +++ llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll @@ -27,16 +27,6 @@ ret double %4 } -define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: @test_add_sd( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %b) -; CHECK-NEXT: ret <2 x double> [[TMP1]] -; - %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 - %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %1) - ret <2 x double> %2 -} - define double @test_add_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_add_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = fadd double %a, %b @@ -64,16 +54,6 @@ ret double %6 } -define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: @test_sub_sd( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %b) -; CHECK-NEXT: ret <2 x double> [[TMP1]] -; - %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 - %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %1) - ret <2 x double> %2 -} - define double @test_sub_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_sub_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = fsub double %a, %b @@ -101,16 +81,6 @@ ret double %6 } -define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: @test_mul_sd( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %b) -; CHECK-NEXT: ret <2 x double> [[TMP1]] -; - %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 - %2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %1) - ret <2 x double> %2 -} - define double @test_mul_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_mul_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = fmul double %a, %b @@ -138,16 +108,6 @@ ret double %6 } -define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: @test_div_sd( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %b) -; CHECK-NEXT: ret <2 x double> [[TMP1]] -; - %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 - %2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %1) - ret <2 x double> %2 -} - define double @test_div_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_div_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = fdiv double %a, %b