diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3824,15 +3824,31 @@ return; } - TE->setOperandsInOrder(); - for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { - ValueList Operands; + SmallVector Operands(VL0->getNumOperands()); + for (unsigned I = 0, Ops = VL0->getNumOperands(); I < Ops; ++I) { + Operands[I].reserve(VL.size()); // Prepare the operand vector. - for (Value *V : VL) - Operands.push_back(cast(V)->getOperand(i)); - - buildTree_rec(Operands, Depth + 1, {TE, i}); + for (Value *V : VL) { + auto *Op = cast(V)->getOperand(I); + if (isa(Op) && BinaryOperator::isIntDivRem(ShuffleOrOp)) { + // Need to replace undefs with actual values for integer division + // operations, otherwise instcombiner transforms such instructions + // to poison values. + // Just try to find the first non-undef operand, if any, otherwise + // use undef if all values are undefs. + const auto *It = find_if(VL, [I](Value *V) { + return isa(V) && + !isa(cast(V)->getOperand(I)); + }); + if (It != VL.end()) + Op = cast(*It)->getOperand(I); + } + Operands[I].push_back(Op); + } + TE->setOperand(I, Operands[I]); } + for (unsigned I = 0, Ops = VL0->getNumOperands(); I < Ops; ++I) + buildTree_rec(Operands[I], Depth + 1, {TE, I}); return; } case Instruction::GetElementPtr: { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll @@ -3,7 +3,8 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; CHECK-LABEL: @sdiv_v8i32_undefs( -; CHECK-NEXT: ret <8 x i32> poison +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <8 x i32> [[A:%.*]], +; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll @@ -13,27 +13,28 @@ ; CHECK-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_A]], %struct.a* [[P]], i64 0, i32 3 ; CHECK-NEXT: [[H:%.*]] = getelementptr inbounds [[CLASS_E:%.*]], %class.e* [[THIS:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[G:%.*]] = getelementptr inbounds [[CLASS_E]], %class.e* [[THIS]], i64 0, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[ADD7:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD7:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[ADD7]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: switch i32 undef, label [[SW_EPILOG:%.*]] [ ; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] ; CHECK-NEXT: i32 2, label [[SW_BB]] ; CHECK-NEXT: ] ; CHECK: sw.bb: -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[G]] to <2 x i32>* -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[G]] to <2 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[SHRINK_SHUFFLE]], -; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[SHRINK_SHUFFLE]], +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP4]], [[TMP5]] ; CHECK-NEXT: br label [[SW_EPILOG]] ; CHECK: sw.epilog: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP5]], [[SW_BB]] ] -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> poison, [[SHUFFLE]] -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP6]], [[SW_BB]] ] +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> poison, [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4 ; CHECK-NEXT: ret void ; entry: