diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43866,6 +43866,10 @@ assert(N->getOperand(1).getValueType() == MVT::i8 && "Unexpected shift amount type"); + // (shift undef, X) -> 0 + if (N0.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + // Out of range logical bit shifts are guaranteed to be zero. // Out of range arithmetic bit shifts splat the sign bit. unsigned ShiftVal = N->getConstantOperandVal(1); diff --git a/llvm/test/CodeGen/X86/vec_shift5.ll b/llvm/test/CodeGen/X86/vec_shift5.ll --- a/llvm/test/CodeGen/X86/vec_shift5.ll +++ b/llvm/test/CodeGen/X86/vec_shift5.ll @@ -170,6 +170,29 @@ ret <2 x i64> %1 } +; Make sure we fold fully undef input vectors. We previously folded only when +; undef had a single use so use 2 undefs. +define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0, <4 x i32>* %dummy) { +; X86-LABEL: test_x86_sse2_pslli_d: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorps %xmm0, %xmm0 +; X86-NEXT: movaps %xmm0, (%eax) +; X86-NEXT: xorps %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_x86_sse2_pslli_d: +; X64: # %bb.0: +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, (%rdi) +; X64-NEXT: xorps %xmm0, %xmm0 +; X64-NEXT: retq + %a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 6) + store <4 x i32> %a, <4 x i32>* %dummy + %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 7) + ret <4 x i32> %res +} + declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)