Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -869,10 +869,22 @@ return nullptr; } +/// Return true if a given LHS, undef RHS, and a Mask, would introduce an undef. +static bool WouldIntroduceUndef(Value *LHS, ArrayRef Mask) { + const int VWidth = LHS->getType()->getVectorNumElements(); + + for (int i = 0, e = Mask.size(); i != e; ++i) { + if ((VWidth <= Mask[i]) || (-1 == Mask[i])) + return true; + } + + return false; +} + /// Return true if we can evaluate the specified expression tree if the vector /// elements were shuffled in a different order. static bool CanEvaluateShuffled(Value *V, ArrayRef Mask, - unsigned Depth = 5) { + bool UndefBeingIntroduced, unsigned Depth = 5) { // We can always reorder the elements of a constant. if (isa(V)) return true; @@ -888,17 +900,20 @@ if (Depth == 0) return false; switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + if (UndefBeingIntroduced) + return false; + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: @@ -919,7 +934,8 @@ case Instruction::FPExt: case Instruction::GetElementPtr: { for (Value *Operand : I->operands()) { - if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) + if (!CanEvaluateShuffled(Operand, Mask, UndefBeingIntroduced, + Depth - 1)) return false; } return true; @@ -939,7 +955,8 @@ SeenOnce = true; } } - return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); + return CanEvaluateShuffled(I->getOperand(0), Mask, UndefBeingIntroduced, + Depth - 1); } } return false; @@ -1464,7 +1481,8 @@ if (isRHSID) return replaceInstUsesWith(SVI, RHS); } - if (isa(RHS) && CanEvaluateShuffled(LHS, Mask)) { + if (isa(RHS) && + CanEvaluateShuffled(LHS, Mask, WouldIntroduceUndef(LHS, Mask))) { Value *V = EvaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } Index: test/Transforms/InstCombine/stop_bad_undef_propagation.ll =================================================================== --- test/Transforms/InstCombine/stop_bad_undef_propagation.ll +++ test/Transforms/InstCombine/stop_bad_undef_propagation.ll @@ -9,7 +9,12 @@ define <3 x float> @udiv0(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @udiv0( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = udiv <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = uitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -25,7 +30,12 @@ define <3 x float> @udiv1(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @udiv1( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = udiv <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = uitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -41,7 +51,12 @@ define <3 x float> @udiv2(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @udiv2( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = udiv <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = uitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -57,7 +72,12 @@ define <3 x float> @sdiv0(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @sdiv0( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = sdiv <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = sitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -73,7 +93,12 @@ define <3 x float> @sdiv1(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @sdiv1( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = sdiv <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = sitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -89,7 +114,12 @@ define <3 x float> @sdiv2(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @sdiv2( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = sdiv <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = sitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -105,7 +135,12 @@ define <3 x float> @urem0(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @urem0( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = urem <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = uitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -121,7 +156,12 @@ define <3 x float> @urem1(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @urem1( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = urem <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = uitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -137,7 +177,12 @@ define <3 x float> @urem2(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @urem2( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = urem <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = uitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -153,7 +198,12 @@ define <3 x float> @srem0(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @srem0( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = srem <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = sitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -169,7 +219,12 @@ define <3 x float> @srem1(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @srem1( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = srem <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = sitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ; @@ -185,7 +240,12 @@ define <3 x float> @srem2(<3 x float> %x, i32 %y, i32 %z) { ; CHECK-LABEL: @srem2( -; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> , <3 x i32> +; CHECK-NEXT: [[T0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = srem <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = sitofp <2 x i32> [[T2]] to <2 x float> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <2 x float> [[T3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[T4]], <3 x i32> ; CHECK-NEXT: [[T6:%.*]] = fmul <3 x float> [[T5]], [[X]] ; CHECK-NEXT: ret <3 x float> [[T6]] ;