Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -869,10 +869,22 @@ return nullptr; } +/// Return true if a given LHS, undef RHS, and a Mask, would introduce an undef. +static bool WouldIntroduceUndef(Value *LHS, ArrayRef Mask) { + const int VWidth = LHS->getType()->getVectorNumElements(); + + for (int i = 0, e = Mask.size(); i != e; ++i) { + if ((VWidth <= Mask[i]) || (-1 == Mask[i])) + return true; + } + + return false; +} + /// Return true if we can evaluate the specified expression tree if the vector /// elements were shuffled in a different order. static bool CanEvaluateShuffled(Value *V, ArrayRef Mask, - unsigned Depth = 5) { + bool UndefBeingIntroduced, unsigned Depth = 5) { // We can always reorder the elements of a constant. if (isa(V)) return true; @@ -888,17 +900,20 @@ if (Depth == 0) return false; switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + if (UndefBeingIntroduced) + return false; + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: @@ -919,7 +934,8 @@ case Instruction::FPExt: case Instruction::GetElementPtr: { for (Value *Operand : I->operands()) { - if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) + if (!CanEvaluateShuffled(Operand, Mask, UndefBeingIntroduced, + Depth - 1)) return false; } return true; @@ -939,7 +955,8 @@ SeenOnce = true; } } - return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); + return CanEvaluateShuffled(I->getOperand(0), Mask, UndefBeingIntroduced, + Depth - 1); } } return false; @@ -1464,7 +1481,8 @@ if (isRHSID) return replaceInstUsesWith(SVI, RHS); } - if (isa(RHS) && CanEvaluateShuffled(LHS, Mask)) { + if (isa(RHS) && + CanEvaluateShuffled(LHS, Mask, WouldIntroduceUndef(LHS, Mask))) { Value *V = EvaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } Index: test/Transforms/InstCombine/stop_bad_undef_propagation.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/stop_bad_undef_propagation.ll @@ -0,0 +1,278 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define <3 x float> @udiv0(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @udiv0( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = udiv <2 x i32> %1, + %3 = uitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> %3, <2 x float> undef, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @udiv1(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @udiv1( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = udiv <2 x i32> %1, + %3 = uitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> %3, <2 x float> undef, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @udiv2(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @udiv2( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = udiv <2 x i32> %1, + %3 = uitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> undef, <2 x float> %3, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @sdiv0(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @sdiv0( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = sdiv <2 x i32> %1, + %3 = sitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> %3, <2 x float> undef, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @sdiv1(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @sdiv1( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = sdiv <2 x i32> %1, + %3 = sitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> %3, <2 x float> undef, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @sdiv2(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @sdiv2( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = sdiv <2 x i32> %1, + %3 = sitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> undef, <2 x float> %3, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @urem0(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @urem0( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = urem <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2= urem <2 x i32> %1, + %3 = uitofp <2 x i32> %2to <2 x float> + %4 = shufflevector <2 x float> %3, <2 x float> undef, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @urem1(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @urem1( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = urem <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2= urem <2 x i32> %1, + %3 = uitofp <2 x i32> %2to <2 x float> + %4 = shufflevector <2 x float> %3, <2 x float> undef, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @urem2(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @urem2( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = urem <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2= urem <2 x i32> %1, + %3 = uitofp <2 x i32> %2to <2 x float> + %4 = shufflevector <2 x float> undef, <2 x float> %3, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @srem0(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @srem0( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = srem <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = srem <2 x i32> %1, + %3 = sitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> %3, <2 x float> undef, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @srem1(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @srem1( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = srem <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = srem <2 x i32> %1, + %3 = sitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> %3, <2 x float> undef, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +} + +define <3 x float> @srem2(<3 x float> %x, i32 %y, i32 %z) { +; CHECK-LABEL: @srem2( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = srem <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> undef, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> [[TMP4]], <3 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nnan arcp contract <3 x float> [[TMP5]], [[X]] +; CHECK-NEXT: ret <3 x float> [[TMP6]] +; +.entry: + %0 = insertelement <2 x i32> undef, i32 %y, i32 0 + %1 = insertelement <2 x i32> %0, i32 %z, i32 1 + %2 = srem <2 x i32> %1, + %3 = sitofp <2 x i32> %2 to <2 x float> + %4 = shufflevector <2 x float> undef, <2 x float> %3, <3 x i32> + %5 = shufflevector <3 x float> %x, <3 x float> %4, <3 x i32> + %6 = fmul reassoc nnan arcp contract <3 x float> %x, %5 + ret <3 x float> %6 +}