Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -212,6 +212,23 @@ } } +/// Integer division/remainder require special handling to avoid undefined +/// behavior. If a constant vector has undef elements, replace those undefs with +/// '1' because that's always safe to execute. +static inline Constant *getSafeVectorConstantForIntDivRem(Constant *In) { + assert(In->getType()->isVectorTy() && "Not expecting scalars here"); + assert(In->getType()->getVectorElementType()->isIntegerTy() && + "Not expecting FP opcodes/operands/constants here"); + + unsigned NumElts = In->getType()->getVectorNumElements(); + SmallVector CVec(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *C = In->getAggregateElement(i); + CVec[i] = isa(C) ? ConstantInt::get(C->getType(), 1) : C; + } + return ConstantVector::get(CVec); +} + /// The core instruction combiner logic. /// /// This class provides both the logic to recursively visit instructions and Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1140,6 +1140,49 @@ return true; } +static Instruction *foldSelectShuffles(ShuffleVectorInst &Shuf) { + if (!Shuf.isSelect()) + return nullptr; + + BinaryOperator *B0 = dyn_cast(Shuf.getOperand(0)); + BinaryOperator *B1 = dyn_cast(Shuf.getOperand(1)); + if (!(B0 && B1)) + return nullptr; + + if (B0->getOpcode() == B1->getOpcode()) { + Value *X; + Constant *C0, *C1; + if (match(B0, m_c_BinOp(m_Value(X), m_Constant(C0))) && + match(B1, m_c_BinOp(m_Specific(X), m_Constant(C1)))) { + // If all operands are constants, let constant folding remove the binops. + if (isa(X)) + return nullptr; + + // We can remove a binop and the shuffle by rearranging the constant: + // shuffle (op X, C0), (op X, C1), M --> op X, C' + // shuffle (op C0, X), (op C1, X), M --> op C', X + Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Shuf.getMask()); + BinaryOperator::BinaryOps Opc = B0->getOpcode(); + + // If the shuffle mask contains undef elements, then the new constant + // vector will have undefs in those lanes. This could cause the entire + // binop to be undef. + if (B0->isIntDivRem()) + NewC = getSafeVectorConstantForIntDivRem(NewC); + + bool Op0IsConst = isa(B0->getOperand(0)); + Instruction *NewBO = Op0IsConst ? BinaryOperator::Create(Opc, NewC, X) : + BinaryOperator::Create(Opc, X, NewC); + // Flags are intersected from the 2 source binops. + NewBO->copyIRFlags(B0); + NewBO->andIRFlags(B1); + return NewBO; + } + } + + return nullptr; +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -1150,6 +1193,9 @@ LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI))) return replaceInstUsesWith(SVI, V); + if (Instruction *I = foldSelectShuffles(SVI)) + return I; + bool MadeChange = false; unsigned VWidth = SVI.getType()->getVectorNumElements(); Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1417,23 +1417,18 @@ } } if (MayChange) { + Constant *NewC = ConstantVector::get(NewVecC); // With integer div/rem instructions, it is not safe to use a vector with // undef elements because the entire instruction can be folded to undef. // So replace undef elements with '1' because that can never induce // undefined behavior. All other binop opcodes are always safe to // speculate, and therefore, it is fine to include undef elements for // unused lanes (and using undefs may help optimization). - if (Inst.isIntDivRem()) { - assert(C->getType()->getScalarType()->isIntegerTy() && - "Not expecting FP opcodes/operands/constants here"); - for (unsigned i = 0; i < VWidth; ++i) - if (isa(NewVecC[i])) - NewVecC[i] = ConstantInt::get(NewVecC[i]->getType(), 1); - } - + if (Inst.isIntDivRem()) + NewC = getSafeVectorConstantForIntDivRem(NewC); + // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) - Constant *NewC = ConstantVector::get(NewVecC); Value *NewLHS = isa(LHS) ? NewC : V1; Value *NewRHS = isa(LHS) ? V1 : NewC; return createBinOpShuffle(NewLHS, NewRHS, Mask); Index: test/Transforms/InstCombine/shuffle_select.ll =================================================================== --- test/Transforms/InstCombine/shuffle_select.ll +++ test/Transforms/InstCombine/shuffle_select.ll @@ -6,9 +6,7 @@ define <4 x i32> @add(<4 x i32> %v0) { ; CHECK-LABEL: @add( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = add <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = add <4 x i32> %v0, @@ -21,9 +19,7 @@ define <4 x i32> @sub(<4 x i32> %v0) { ; CHECK-LABEL: @sub( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = sub <4 x i32> , [[V0]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub <4 x i32> , %v0 @@ -37,9 +33,7 @@ define <4 x i32> @mul(<4 x i32> %v0) { ; CHECK-LABEL: @mul( -; CHECK-NEXT: [[T1:%.*]] = mul <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = mul <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul <4 x i32> %v0, @@ -52,9 +46,7 @@ define <4 x i32> @shl(<4 x i32> %v0) { ; CHECK-LABEL: @shl( -; CHECK-NEXT: [[T1:%.*]] = shl nuw <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = shl nuw <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nuw <4 x i32> %v0, @@ -67,9 +59,7 @@ define <4 x i32> @lshr(<4 x i32> %v0) { ; CHECK-LABEL: @lshr( -; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> , [[V0]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr exact <4 x i32> , %v0 @@ -82,9 +72,7 @@ define <3 x i32> @ashr(<3 x i32> %v0) { ; CHECK-LABEL: @ashr( -; CHECK-NEXT: [[T1:%.*]] = ashr <3 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <3 x i32> [[T1]], <3 x i32> [[T2]], <3 x i32> +; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t1 = ashr <3 x i32> %v0, @@ -95,9 +83,7 @@ define <3 x i42> @and(<3 x i42> %v0) { ; CHECK-LABEL: @and( -; CHECK-NEXT: [[T1:%.*]] = and <3 x i42> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = and <3 x i42> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <3 x i42> [[T1]], <3 x i42> [[T2]], <3 x i32> +; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[V0:%.*]], ; CHECK-NEXT: ret <3 x i42> [[T3]] ; %t1 = and <3 x i42> %v0, @@ -113,8 +99,7 @@ define <4 x i32> @or(<4 x i32> %v0) { ; CHECK-LABEL: @or( ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = or <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[V0]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -127,9 +112,8 @@ define <4 x i32> @xor(<4 x i32> %v0) { ; CHECK-LABEL: @xor( -; CHECK-NEXT: [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = xor <4 x i32> [[V0]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -144,7 +128,7 @@ ; CHECK-LABEL: @udiv( ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> , [[V0]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> , [[V0]] ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] @@ -161,9 +145,7 @@ define <4 x i32> @sdiv(<4 x i32> %v0) { ; CHECK-LABEL: @sdiv( -; CHECK-NEXT: [[T1:%.*]] = sdiv <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = sdiv <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv <4 x i32> %v0, @@ -174,9 +156,7 @@ define <4 x i32> @urem(<4 x i32> %v0) { ; CHECK-LABEL: @urem( -; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = urem <4 x i32> , [[V0]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = urem <4 x i32> , %v0 @@ -187,9 +167,7 @@ define <4 x i32> @srem(<4 x i32> %v0) { ; CHECK-LABEL: @srem( -; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> , [[V0]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = srem <4 x i32> , %v0 @@ -202,9 +180,7 @@ define <4 x float> @fadd(<4 x float> %v0) { ; CHECK-LABEL: @fadd( -; CHECK-NEXT: [[T1:%.*]] = fadd <4 x float> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = fadd <4 x float> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x float> [[T1]], <4 x float> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = fadd <4 x float> [[V0:%.*]], ; CHECK-NEXT: ret <4 x float> [[T3]] ; %t1 = fadd <4 x float> %v0, @@ -215,9 +191,7 @@ define <4 x double> @fsub(<4 x double> %v0) { ; CHECK-LABEL: @fsub( -; CHECK-NEXT: [[T1:%.*]] = fsub <4 x double> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = fsub <4 x double> , [[V0]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; %t1 = fsub <4 x double> , %v0 @@ -230,9 +204,7 @@ define <4 x float> @fmul(<4 x float> %v0) { ; CHECK-LABEL: @fmul( -; CHECK-NEXT: [[T1:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = fmul nnan ninf <4 x float> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x float> [[T1]], <4 x float> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], ; CHECK-NEXT: ret <4 x float> [[T3]] ; %t1 = fmul nnan ninf <4 x float> %v0, @@ -243,9 +215,7 @@ define <4 x double> @fdiv(<4 x double> %v0) { ; CHECK-LABEL: @fdiv( -; CHECK-NEXT: [[T1:%.*]] = fdiv fast <4 x double> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = fdiv nnan arcp <4 x double> , [[V0]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = fdiv nnan arcp <4 x double> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; %t1 = fdiv fast <4 x double> , %v0