Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -869,10 +869,22 @@ return nullptr; } +/// Return true if a given LHS, undef RHS, and a Mask, would introduce an undef. +static bool WouldIntroduceUndef(Value *LHS, ArrayRef Mask) { + const unsigned VWidth = LHS->getType()->getVectorNumElements(); + + for (int i = 0, e = Mask.size(); i != e; ++i) { + if ((VWidth <= Mask[i]) || (-1 == Mask[i])) + return true; + } + + return false; +} + /// Return true if we can evaluate the specified expression tree if the vector /// elements were shuffled in a different order. static bool CanEvaluateShuffled(Value *V, ArrayRef Mask, - unsigned Depth = 5) { + bool UndefBeingIntroduced, unsigned Depth = 5) { // We can always reorder the elements of a constant. if (isa(V)) return true; @@ -888,18 +900,21 @@ if (Depth == 0) return false; switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: + if (UndefBeingIntroduced) + return false; + LLVM_FALLTHROUGH; + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: @@ -919,7 +934,8 @@ case Instruction::FPExt: case Instruction::GetElementPtr: { for (Value *Operand : I->operands()) { - if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) + if (!CanEvaluateShuffled(Operand, Mask, UndefBeingIntroduced, + Depth - 1)) return false; } return true; @@ -939,7 +955,8 @@ SeenOnce = true; } } - return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); + return CanEvaluateShuffled(I->getOperand(0), Mask, UndefBeingIntroduced, + Depth - 1); } } return false; @@ -1464,7 +1481,8 @@ if (isRHSID) return replaceInstUsesWith(SVI, RHS); } - if (isa(RHS) && CanEvaluateShuffled(LHS, Mask)) { + if (isa(RHS) && + CanEvaluateShuffled(LHS, Mask, WouldIntroduceUndef(LHS, Mask))) { Value *V = EvaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } Index: test/Transforms/InstCombine/stop_bad_undef_propagation.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/stop_bad_undef_propagation.ll @@ -0,0 +1,235 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +define spir_func <3 x float> @udiv0(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %udiv = udiv <2 x i32> %insert.1, + %udiv = udiv <2 x i32> %insert.1, + %uitofp = uitofp <2 x i32> %udiv to <2 x float> + %shuffle.0 = shufflevector <2 x float> %uitofp, <2 x float> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @udiv1(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %udiv = udiv <2 x i32> %insert.1, + %udiv = udiv <2 x i32> %insert.1, + %uitofp = uitofp <2 x i32> %udiv to <2 x float> + %shuffle.0 = shufflevector <2 x float> %uitofp, <2 x float> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @udiv2(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %udiv = udiv <2 x i32> %insert.1, + %udiv = udiv <2 x i32> %insert.1, + %uitofp = uitofp <2 x i32> %udiv to <2 x float> + %shuffle.0 = shufflevector <2 x float> undef, <2 x float> %uitofp, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @sdiv0(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %sdiv = sdiv <2 x i32> %insert.1, + %sdiv = sdiv <2 x i32> %insert.1, + %sitofp = sitofp <2 x i32> %sdiv to <2 x float> + %shuffle.0 = shufflevector <2 x float> %sitofp, <2 x float> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @sdiv1(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %sdiv = sdiv <2 x i32> %insert.1, + %sdiv = sdiv <2 x i32> %insert.1, + %sitofp = sitofp <2 x i32> %sdiv to <2 x float> + %shuffle.0 = shufflevector <2 x float> %sitofp, <2 x float> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @sdiv2(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %sdiv = sdiv <2 x i32> %insert.1, + %sdiv = sdiv <2 x i32> %insert.1, + %sitofp = sitofp <2 x i32> %sdiv to <2 x float> + %shuffle.0 = shufflevector <2 x float> undef, <2 x float> %sitofp, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x i32> @fdiv0(i32 %x, <3 x i32> %y, <3 x i32> %z, float %extract.0, float %extract.1) { +.entry: + %insert.0 = insertelement <2 x float> undef, float %extract.0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %extract.1, i32 1 +; CHECK: %fdiv = fdiv <2 x float> %insert.1, + %fdiv = fdiv <2 x float> %insert.1, + %sitofp = fptosi <2 x float> %fdiv to <2 x i32> + %shuffle.0 = shufflevector <2 x i32> %sitofp, <2 x i32> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x i32> %y, <3 x i32> %shuffle.0, <3 x i32> + %ret = mul <3 x i32> %z, %shuffle.1 + ret <3 x i32> %ret +} + +define spir_func <3 x i32> @fdiv1(i32 %x, <3 x i32> %y, <3 x i32> %z, float %extract.0, float %extract.1) { +.entry: + %insert.0 = insertelement <2 x float> undef, float %extract.0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %extract.1, i32 1 +; CHECK: %fdiv = fdiv <2 x float> %insert.1, + %fdiv = fdiv <2 x float> %insert.1, + %sitofp = fptosi <2 x float> %fdiv to <2 x i32> + %shuffle.0 = shufflevector <2 x i32> %sitofp, <2 x i32> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x i32> %y, <3 x i32> %shuffle.0, <3 x i32> + %ret = mul <3 x i32> %z, %shuffle.1 + ret <3 x i32> %ret +} + +define spir_func <3 x i32> @fdiv2(i32 %x, <3 x i32> %y, <3 x i32> %z, float %extract.0, float %extract.1) { +.entry: + %insert.0 = insertelement <2 x float> undef, float %extract.0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %extract.1, i32 1 +; CHECK: %fdiv = fdiv <2 x float> %insert.1, + %fdiv = fdiv <2 x float> %insert.1, + %sitofp = fptosi <2 x float> %fdiv to <2 x i32> + %shuffle.0 = shufflevector <2 x i32> undef, <2 x i32> %sitofp, <3 x i32> + %shuffle.1 = shufflevector <3 x i32> %y, <3 x i32> %shuffle.0, <3 x i32> + %ret = mul <3 x i32> %z, %shuffle.1 + ret <3 x i32> %ret +} + +define spir_func <3 x float> @urem0(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %urem = urem <2 x i32> %insert.1, + %urem = urem <2 x i32> %insert.1, + %uitofp = uitofp <2 x i32> %urem to <2 x float> + %shuffle.0 = shufflevector <2 x float> %uitofp, <2 x float> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @urem1(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %urem = urem <2 x i32> %insert.1, + %urem = urem <2 x i32> %insert.1, + %uitofp = uitofp <2 x i32> %urem to <2 x float> + %shuffle.0 = shufflevector <2 x float> %uitofp, <2 x float> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @urem2(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %urem = urem <2 x i32> %insert.1, + %urem = urem <2 x i32> %insert.1, + %uitofp = uitofp <2 x i32> %urem to <2 x float> + %shuffle.0 = shufflevector <2 x float> undef, <2 x float> %uitofp, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @srem0(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %srem = srem <2 x i32> %insert.1, + %srem = srem <2 x i32> %insert.1, + %sitofp = sitofp <2 x i32> %srem to <2 x float> + %shuffle.0 = shufflevector <2 x float> %sitofp, <2 x float> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @srem1(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %srem = srem <2 x i32> %insert.1, + %srem = srem <2 x i32> %insert.1, + %sitofp = sitofp <2 x i32> %srem to <2 x float> + %shuffle.0 = shufflevector <2 x float> %sitofp, <2 x float> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x float> @srem2(i32 %x, <3 x float> %y, <3 x float> %z, i32 %extract.0, i32 %extract.1) { +.entry: + %insert.0 = insertelement <2 x i32> undef, i32 %extract.0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %extract.1, i32 1 +; CHECK: %srem = srem <2 x i32> %insert.1, + %srem = srem <2 x i32> %insert.1, + %sitofp = sitofp <2 x i32> %srem to <2 x float> + %shuffle.0 = shufflevector <2 x float> undef, <2 x float> %sitofp, <3 x i32> + %shuffle.1 = shufflevector <3 x float> %y, <3 x float> %shuffle.0, <3 x i32> + %ret = fmul reassoc nnan arcp contract <3 x float> %z, %shuffle.1 + ret <3 x float> %ret +} + +define spir_func <3 x i32> @frem0(i32 %x, <3 x i32> %y, <3 x i32> %z, float %extract.0, float %extract.1) { +.entry: + %insert.0 = insertelement <2 x float> undef, float %extract.0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %extract.1, i32 1 +; CHECK: %frem = frem <2 x float> %insert.1, + %frem = frem <2 x float> %insert.1, + %sitofp = fptosi <2 x float> %frem to <2 x i32> + %shuffle.0 = shufflevector <2 x i32> %sitofp, <2 x i32> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x i32> %y, <3 x i32> %shuffle.0, <3 x i32> + %ret = mul <3 x i32> %z, %shuffle.1 + ret <3 x i32> %ret +} + +define spir_func <3 x i32> @frem1(i32 %x, <3 x i32> %y, <3 x i32> %z, float %extract.0, float %extract.1) { +.entry: + %insert.0 = insertelement <2 x float> undef, float %extract.0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %extract.1, i32 1 +; CHECK: %frem = frem <2 x float> %insert.1, + %frem = frem <2 x float> %insert.1, + %sitofp = fptosi <2 x float> %frem to <2 x i32> + %shuffle.0 = shufflevector <2 x i32> %sitofp, <2 x i32> undef, <3 x i32> + %shuffle.1 = shufflevector <3 x i32> %y, <3 x i32> %shuffle.0, <3 x i32> + %ret = mul <3 x i32> %z, %shuffle.1 + ret <3 x i32> %ret +} + +define spir_func <3 x i32> @frem2(i32 %x, <3 x i32> %y, <3 x i32> %z, float %extract.0, float %extract.1) { +.entry: + %insert.0 = insertelement <2 x float> undef, float %extract.0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %extract.1, i32 1 +; CHECK: %frem = frem <2 x float> %insert.1, + %frem = frem <2 x float> %insert.1, + %sitofp = fptosi <2 x float> %frem to <2 x i32> + %shuffle.0 = shufflevector <2 x i32> undef, <2 x i32> %sitofp, <3 x i32> + %shuffle.1 = shufflevector <3 x i32> %y, <3 x i32> %shuffle.0, <3 x i32> + %ret = mul <3 x i32> %z, %shuffle.1 + ret <3 x i32> %ret +}