Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2212,6 +2212,44 @@ break; } + case Intrinsic::x86_pclmulqdq: { + if (auto *C = dyn_cast(II->getArgOperand(2))) { + unsigned Imm = C->getZExtValue(); + + bool MadeChange = false; + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + unsigned VWidth = Arg0->getType()->getVectorNumElements(); + APInt DemandedElts(VWidth, 0); + + APInt UndefElts1(VWidth, 0); + DemandedElts = (Imm & 0x01) ? 2 : 1; + if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts, + UndefElts1)) { + II->setArgOperand(0, V); + MadeChange = true; + } + + APInt UndefElts2(VWidth, 0); + DemandedElts = (Imm & 0x10) ? 2 : 1; + if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts, + UndefElts2)) { + II->setArgOperand(1, V); + MadeChange = true; + } + + // If both input elements are undef, the result is undef. + if (UndefElts1[(Imm & 0x01) ? 1 : 0] || + UndefElts2[(Imm & 0x10) ? 1 : 0]) + return replaceInstUsesWith(*II, + ConstantAggregateZero::get(II->getType())); + + if (MadeChange) + return II; + } + break; + } + case Intrinsic::x86_sse41_insertps: if (Value *V = simplifyX86insertps(*II, *Builder)) return replaceInstUsesWith(*II, V); Index: test/Transforms/InstCombine/x86-clmulqdq.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/x86-clmulqdq.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) + +define <2 x i64> @test_demanded_elts_pclmulqdq_0(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: @test_demanded_elts_pclmulqdq_0( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], i8 0) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = insertelement <2 x i64> %a0, i64 1, i64 1 + %2 = insertelement <2 x i64> %a1, i64 1, i64 1 + %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 0) + ret <2 x i64> %3 +} + +define <2 x i64> @test_demanded_elts_pclmulqdq_1(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: @test_demanded_elts_pclmulqdq_1( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> , <2 x i64> [[A1:%.*]], i8 1) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = insertelement <2 x i64> %a0, i64 1, i64 1 + %2 = insertelement <2 x i64> %a1, i64 1, i64 1 + %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 1) + ret <2 x i64> %3 +} + +define <2 x i64> @test_demanded_elts_pclmulqdq_16(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: @test_demanded_elts_pclmulqdq_16( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> [[A0:%.*]], <2 x i64> , i8 16) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = insertelement <2 x i64> %a0, i64 1, i64 1 + %2 = insertelement <2 x i64> %a1, i64 1, i64 1 + %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 16) + ret <2 x i64> %3 +} + +define <2 x i64> @test_demanded_elts_pclmulqdq_17(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: @test_demanded_elts_pclmulqdq_17( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> , <2 x i64> , i8 17) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = insertelement <2 x i64> %a0, i64 1, i64 1 + %2 = insertelement <2 x i64> %a1, i64 1, i64 1 + %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 17) + ret <2 x i64> %3 +} + +define <2 x i64> @test_demanded_elts_pclmulqdq_undef_0() { +; CHECK-LABEL: @test_demanded_elts_pclmulqdq_undef_0( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> , <2 x i64> , i8 0) + ret <2 x i64> %1 +} + +define <2 x i64> @test_demanded_elts_pclmulqdq_undef_1() { +; CHECK-LABEL: @test_demanded_elts_pclmulqdq_undef_1( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> , <2 x i64> , i8 1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_demanded_elts_pclmulqdq_undef_16() { +; CHECK-LABEL: @test_demanded_elts_pclmulqdq_undef_16( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> , <2 x i64> , i8 16) + ret <2 x i64> %1 +} + +define <2 x i64> @test_demanded_elts_pclmulqdq_undef_17() { +; CHECK-LABEL: @test_demanded_elts_pclmulqdq_undef_17( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> , <2 x i64> , i8 17) + ret <2 x i64> %1 +}