Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -554,6 +554,21 @@ break; } + case Intrinsic::x86_sse2_pslli_d: { + // Simplify if count is constant. If > 32, 0, otherwise to shl. + if (auto Count = dyn_cast(II->getArgOperand(1))) { + if (Count->getZExtValue() > 31) + return ReplaceInstUsesWith( + CI, ConstantAggregateZero::get(II->getArgOperand(0)->getType())); + else { + unsigned VWidth = + cast(II->getArgOperand(0)->getType())->getNumElements(); + return BinaryOperator::CreateShl( + II->getArgOperand(0), Builder->CreateVectorSplat(VWidth, Count)); + } + } + break; + } case Intrinsic::x86_sse41_pmovsxbw: case Intrinsic::x86_sse41_pmovsxwd: Index: test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- test/Transforms/InstCombine/vec_demanded_elts.ll +++ test/Transforms/InstCombine/vec_demanded_elts.ll @@ -209,4 +209,22 @@ ret <4 x float> %ret } +define <4 x i32> @test_pslli_s() nounwind { +entry: +; Constant fold. +; CHECK: test_pslli_s +; CHECK: + %0 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> , i32 1) + ret <4 x i32> %0 +} + +define <4 x i32> @test_pslli_l() nounwind { +entry: +; Constant 0 +; CHECK: test_pslli_l +; CHECK: zeroinitializer + %0 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> , i32 32) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32)