diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3848,6 +3848,21 @@ buildTree_rec(Right, Depth + 1, {TE, 1}); return; } + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: { + if (any_of(VL, [](Value *V) { + return isa(cast(V)->getOperand(1)); + })) { + BS.cancelScheduling(VL, VL0); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + LLVM_DEBUG(dbgs() << "SLP: DIV/REM contains UNDEF denominators.\n"); + return; + } + LLVM_FALLTHROUGH; + } case Instruction::Select: case Instruction::FNeg: case Instruction::Add: @@ -3856,11 +3871,7 @@ case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div-undef.ll @@ -3,7 +3,21 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; CHECK-LABEL: @sdiv_v8i32_undefs( -; CHECK-NEXT: ret <8 x i32> poison +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5 +; CHECK-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], +; CHECK-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R71]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1