Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -19,7 +19,6 @@ // than values not in loops. // //===----------------------------------------------------------------------===// - #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" @@ -207,7 +206,7 @@ I->getOpcode() == Instruction::And)) { Value *V0 = I->getOperand(0); Value *V1 = I->getOperand(1); - if (isa(V0)) + if (isa(V0)) std::swap(V0, V1); if (ConstantInt *C = dyn_cast(V1)) { @@ -216,11 +215,21 @@ isOr = (I->getOpcode() == Instruction::Or); return; } + if (ConstantDataVector *C = dyn_cast(V1)) + if (Constant *Splat = C->getSplatValue()) + if (ConstantInt *C = dyn_cast(Splat)) { + ConstPart = C->getValue(); + SymbolicPart = V0; + isOr = (I->getOpcode() == Instruction::Or); + return; + } } - // view the operand as "V | 0" + // View the operand as "V | 0" SymbolicPart = V; - ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth()); + Type *Ty = V->getType(); + Ty = Ty->isVectorTy() ? Ty->getVectorElementType() : Ty; + ConstPart = APInt::getNullValue(Ty->getIntegerBitWidth()); isOr = true; } @@ -228,7 +237,7 @@ INITIALIZE_PASS(Reassociate, "reassociate", "Reassociate expressions", false, false) -// Public interface to the Reassociate pass +// Public interface to the Reassociate pass. FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } /// isReassociableOp - Return true if V is an instruction of the specified @@ -273,7 +282,7 @@ void Reassociate::BuildRankMap(Function &F) { unsigned i = 2; - // Assign distinct ranks to function arguments + // Assign distinct ranks to function arguments. for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) ValueRankMap[&*I] = ++i; @@ -313,21 +322,17 @@ // If this is a not or neg instruction, do not count it for rank. This // assures us that X and ~X will have the same rank. - Type *Ty = V->getType(); - if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || - (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && - !BinaryOperator::isFNeg(I))) + if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && + !BinaryOperator::isFNeg(I)) ++Rank; - //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " - // << Rank << "\n"); - + DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n"); return ValueRankMap[I] = Rank; } static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore); else { BinaryOperator *Res = @@ -339,7 +344,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore); else { BinaryOperator *Res = @@ -351,7 +356,7 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateNeg(S1, Name, InsertBefore); else { BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore); @@ -361,11 +366,10 @@ } /// LowerNegateToMultiply - Replace 0-X with X*-1. -/// static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { Type *Ty = Neg->getType(); - Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty) - : ConstantFP::get(Ty, -1.0); + Constant *NegOne = Ty->isIntOrIntVectorTy() ? + ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0); BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg); Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op. @@ -846,7 +850,7 @@ Constant *Undef = UndefValue::get(I->getType()); NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), Undef, Undef, "", I); - if (NewOp->getType()->isFloatingPointTy()) + if (NewOp->getType()->isFPOrFPVectorTy()) NewOp->setFastMathFlags(I->getFastMathFlags()); } else { NewOp = NodesToRewrite.pop_back_val(); @@ -891,9 +895,17 @@ /// version of the value is returned, and BI is left pointing at the instruction /// that should be processed next by the reassociation pass. static Value *NegateValue(Value *V, Instruction *BI) { + if (ConstantDataVector *C = dyn_cast(V)) { + if (Constant *Splat = C->getSplatValue()) { + if (isa(Splat)) + return ConstantExpr::getFNeg(C); + if (isa(Splat)) + return ConstantExpr::getNeg(C); + } + } if (ConstantFP *C = dyn_cast(V)) return ConstantExpr::getFNeg(C); - if (Constant *C = dyn_cast(V)) + if (Constant *C = dyn_cast(V)) return ConstantExpr::getNeg(C); // We are trying to expose opportunity for reassociation. One of the things @@ -905,8 +917,7 @@ // the constants. We assume that instcombine will clean up the mess later if // we introduce tons of unnecessary negation instructions. // - if (BinaryOperator *I = - isReassociableOp(V, Instruction::Add, Instruction::FAdd)) { + if (BinaryOperator *I = isReassociableOp(V, Instruction::Add, Instruction::FAdd)) { // Push the negates through the add. I->setOperand(0, NegateValue(I->getOperand(0), BI)); I->setOperand(1, NegateValue(I->getOperand(1), BI)); @@ -1012,7 +1023,11 @@ /// by one, change this into a multiply by a constant to assist with further /// reassociation. static BinaryOperator *ConvertShiftToMul(Instruction *Shl) { - Constant *MulCst = ConstantInt::get(Shl->getType(), 1); + Type *Ty = Shl->getType(); + Type *EltTy = Ty->isVectorTy() ? Ty->getVectorElementType() : Ty; + Constant *MulCst = ConstantInt::get(EltTy, 1); + if (Ty->isVectorTy()) + MulCst = ConstantDataVector::getSplat(Ty->getVectorNumElements(), MulCst); MulCst = ConstantExpr::getShl(MulCst, cast(Shl->getOperand(1))); BinaryOperator *Mul = @@ -1086,7 +1101,7 @@ if (ConstantInt *FC2 = dyn_cast(Factors[i].Op)) if (FC1->getValue() == -FC2->getValue()) { FoundFactor = NeedsNegate = true; - Factors.erase(Factors.begin()+i); + Factors.erase(Factors.begin() + i); break; } } else if (ConstantFP *FC1 = dyn_cast(Factor)) { @@ -1100,6 +1115,21 @@ break; } } + } else if (ConstantDataVector *FFC1 = dyn_cast(Factor)) { + if (ConstantDataVector *FFC2 = dyn_cast(Factors[i].Op)) + if (Constant *Splat1 = FFC1->getSplatValue()) + if (Constant *Splat2 = FFC2->getSplatValue()) + if (ConstantFP *FC1 = dyn_cast(Splat1)) + if (ConstantFP *FC2 = dyn_cast(Splat2)) { + APFloat F1(FC1->getValueAPF()); + APFloat F2(FC2->getValueAPF()); + F2.changeSign(); + if (F1.compare(F2) == APFloat::cmpEqual) { + FoundFactor = NeedsNegate = true; + Factors.erase(Factors.begin() + i); + break; + } + } } } @@ -1203,10 +1233,13 @@ const APInt &ConstOpnd) { if (ConstOpnd != 0) { if (!ConstOpnd.isAllOnesValue()) { - LLVMContext &Ctx = Opnd->getType()->getContext(); + Type *Ty = Opnd->getType(); + LLVMContext &Ctx = Ty->getContext(); Instruction *I; - I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd), - "and.ra", InsertBefore); + Constant *C = ConstantInt::get(Ctx, ConstOpnd); + if (Ty->isVectorTy()) + C = ConstantDataVector::getSplat(Ty->getVectorNumElements(), C); + I = BinaryOperator::CreateAnd(Opnd, C, "and.ra", InsertBefore); I->setDebugLoc(InsertBefore->getDebugLoc()); return I; } @@ -1343,6 +1376,7 @@ SmallVector Opnds; SmallVector OpndPtrs; Type *Ty = Ops[0].Op->getType(); + Ty = Ty->isVectorTy() ? Ty->getVectorElementType() : Ty; APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); // Step 1: Convert ValueEntry to XorOpnd @@ -1421,8 +1455,11 @@ ValueEntry VE(getRank(O.getValue()), O.getValue()); Ops.push_back(VE); } + Type *ITy = I->getType(); if (ConstOpnd != 0) { - Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd); + Constant *C = ConstantInt::get(Ty->getContext(), ConstOpnd); + if (ITy->isVectorTy()) + C = ConstantDataVector::getSplat(ITy->getVectorNumElements(), C); ValueEntry VE(getRank(C), C); Ops.push_back(VE); } @@ -1431,7 +1468,10 @@ return Ops.back().Op; else if (Sz == 0) { assert(ConstOpnd == 0); - return ConstantInt::get(Ty->getContext(), ConstOpnd); + Constant *C = ConstantInt::get(Ty->getContext(), ConstOpnd); + if (ITy->isVectorTy()) + C = ConstantDataVector::getSplat(ITy->getVectorNumElements(), C); + return C; } } @@ -1466,8 +1506,8 @@ // Insert a new multiply. Type *Ty = TheOp->getType(); - Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound) - : ConstantFP::get(Ty, NumFound); + Constant *C = Ty->isIntOrIntVectorTy() ? + ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound); Instruction *Mul = CreateMul(TheOp, C, "factor", I, I); // Now that we have inserted a multiply, optimize it. This allows us to @@ -1543,8 +1583,7 @@ unsigned MaxOcc = 0; Value *MaxOccVal = nullptr; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - BinaryOperator *BOp = - isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); + BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); if (!BOp) continue; @@ -1593,6 +1632,20 @@ MaxOccVal = Factor; } } + } else if (ConstantDataVector *VC1 = dyn_cast(Factor)) { + if (Constant *Splat = VC1->getSplatValue()) { + if (ConstantInt *CI = dyn_cast(Splat)) + if (CI->isNegative() && !CI->isMinValue(true)) { + Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); + assert(!Duplicates.count(Factor) && + "Shouldn't have two constant factors, missed a canonicalize"); + unsigned Occ = ++FactorOccurrences[Factor]; + if (Occ > MaxOcc) { + MaxOcc = Occ; + MaxOccVal = Factor; + } + } + } } } } @@ -1606,16 +1659,15 @@ // this, we could otherwise run into situations where removing a factor // from an expression will drop a use of maxocc, and this can cause // RemoveFactorFromExpression on successive values to behave differently. - Instruction *DummyInst = - I->getType()->isIntegerTy() - ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) - : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); + Instruction *DummyInst = I->getType()->isIntOrIntVectorTy() ? + BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) : + BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); SmallVector NewMulOps; for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. - BinaryOperator *BOp = - isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); + BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul, + Instruction::FMul); if (!BOp) continue; @@ -1738,7 +1790,7 @@ Value *LHS = Ops.pop_back_val(); do { - if (LHS->getType()->isIntegerTy()) + if (LHS->getType()->isIntOrIntVectorTy()) LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); else LHS = Builder.CreateFMul(LHS, Ops.pop_back_val()); @@ -1921,48 +1973,57 @@ if (!isa(I)) return; - if (I->getOpcode() == Instruction::Shl && isa(I->getOperand(1))) - // If an operand of this shift is a reassociable multiply, or if the shift - // is used by a reassociable multiply or add, turn into a multiply. - if (isReassociableOp(I->getOperand(0), Instruction::Mul) || - (I->hasOneUse() && - (isReassociableOp(I->user_back(), Instruction::Mul) || - isReassociableOp(I->user_back(), Instruction::Add)))) { - Instruction *NI = ConvertShiftToMul(I); - RedoInsts.insert(I); - MadeChange = true; - I = NI; + // Commute binary operators, to canonicalize the order of their operands. + // This can potentially expose more CSE opportunities, and makes writing other + // transformations simpler. + if (I->isCommutative()) { + Value *LHS = I->getOperand(0); + Value *RHS = I->getOperand(1); + unsigned LHSRank = getRank(LHS); + unsigned RHSRank = getRank(RHS); + + // Sort the operands by rank. + if (RHSRank < LHSRank && !isa(RHS)) { + I->setOperand(0, RHS); + I->setOperand(1, LHS); } + } - // Commute floating point binary operators, to canonicalize the order of their - // operands. This can potentially expose more CSE opportunities, and makes - // writing other transformations simpler. - if (I->getType()->isFloatingPointTy() || I->getType()->isVectorTy()) { - - // FAdd and FMul can be commuted. - if (I->getOpcode() == Instruction::FMul || - I->getOpcode() == Instruction::FAdd) { - Value *LHS = I->getOperand(0); - Value *RHS = I->getOperand(1); - unsigned LHSRank = getRank(LHS); - unsigned RHSRank = getRank(RHS); - - // Sort the operands by rank. - if (RHSRank < LHSRank) { - I->setOperand(0, RHS); - I->setOperand(1, LHS); + if (I->getOpcode() == Instruction::Shl) { + if (isa(I->getOperand(1))) + // If an operand of this shift is a reassociable multiply, or if the shift + // is used by a reassociable multiply or add, turn into a multiply. + if (isReassociableOp(I->getOperand(0), Instruction::Mul) || + (I->hasOneUse() && + (isReassociableOp(I->user_back(), Instruction::Mul) || + isReassociableOp(I->user_back(), Instruction::Add)))) { + Instruction *NI = ConvertShiftToMul(I); + RedoInsts.insert(I); + MadeChange = true; + I = NI; + } + if (ConstantDataVector *VC1 = dyn_cast(I->getOperand(1))) + if (Constant *Splat = VC1->getSplatValue()) + if (isa(Splat)) { + // If an operand of this shift is a reassociable multiply, or if the shift + // is used by a reassociable multiply or add, turn into a multiply. + if (isReassociableOp(I->getOperand(0), Instruction::Mul) || + (I->hasOneUse() && + (isReassociableOp(I->user_back(), Instruction::Mul) || + isReassociableOp(I->user_back(), Instruction::Add)))) { + Instruction *NI = ConvertShiftToMul(I); + RedoInsts.insert(I); + MadeChange = true; + I = NI; + } } - } - - // FIXME: We should commute vector instructions as well. However, this - // requires further analysis to determine the effect on later passes. - - // Don't try to optimize vector instructions or anything that doesn't have - // unsafe algebra. - if (I->getType()->isVectorTy() || !I->hasUnsafeAlgebra()) - return; } + // Don't try to optimize floating point binary operators that don't have + // unsafe algebra. + if (isa(I) && !I->hasUnsafeAlgebra()) + return; + // Do not reassociate boolean (i1) expressions. We want to preserve the // original order of evaluation for short-circuited comparisons that // SimplifyCFG has folded to AND/OR expressions. If the expression @@ -2035,9 +2096,6 @@ } void Reassociate::ReassociateExpression(BinaryOperator *I) { - assert(!I->getType()->isVectorTy() && - "Reassociation of vector instructions is not supported."); - // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; @@ -2083,18 +2141,33 @@ // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y if (I->hasOneUse()) { if (I->getOpcode() == Instruction::Mul && - cast(I->user_back())->getOpcode() == Instruction::Add && - isa(Ops.back().Op) && - cast(Ops.back().Op)->isAllOnesValue()) { - ValueEntry Tmp = Ops.pop_back_val(); - Ops.insert(Ops.begin(), Tmp); + cast(I->user_back())->getOpcode() == Instruction::Add) { + if (isa(Ops.back().Op) && + cast(Ops.back().Op)->isAllOnesValue()) { + ValueEntry Tmp = Ops.pop_back_val(); + Ops.insert(Ops.begin(), Tmp); + } + if (ConstantDataVector *VC1 = dyn_cast(Ops.back().Op)) + if (Constant *Splat = VC1->getSplatValue()) + if (isa(Splat) && + cast(Splat)->isAllOnesValue()) { + ValueEntry Tmp = Ops.pop_back_val(); + Ops.insert(Ops.begin(), Tmp); + } } else if (I->getOpcode() == Instruction::FMul && - cast(I->user_back())->getOpcode() == - Instruction::FAdd && - isa(Ops.back().Op) && - cast(Ops.back().Op)->isExactlyValue(-1.0)) { - ValueEntry Tmp = Ops.pop_back_val(); - Ops.insert(Ops.begin(), Tmp); + cast(I->user_back())->getOpcode() == Instruction::FAdd) { + if (isa(Ops.back().Op) && + cast(Ops.back().Op)->isExactlyValue(-1.0)) { + ValueEntry Tmp = Ops.pop_back_val(); + Ops.insert(Ops.begin(), Tmp); + } + if (ConstantDataVector *VC1 = dyn_cast(Ops.back().Op)) + if (Constant *Splat = VC1->getSplatValue()) + if (isa(Splat) && + cast(Splat)->isExactlyValue(-1.0)) { + ValueEntry Tmp = Ops.pop_back_val(); + Ops.insert(Ops.begin(), Tmp); + } } } Index: test/Transforms/Reassociate/commute.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/commute.ll @@ -0,0 +1,53 @@ +; RUN: opt -reassociate -S < %s | FileCheck %s + +; Commute integer operations. +define i32 @test1(i32 %x, i32 %y) { +; CHECK-LABEL: @test1 +; CHECK-NEXT: %tmp1 = add i32 %y, %x +; CHECK-NEXT: %tmp2 = mul i32 %y, %x +; CHECK-NEXT: %tmp3 = add i32 %tmp1, %tmp2 + + %tmp1 = add i32 %x, %y + %tmp2 = mul i32 %y, %x + %tmp3 = add i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +; Commute float operations. +define float @test2(float %x, float %y) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: %tmp1 = fadd float %x, %y +; CHECK-NEXT: %tmp2 = fmul float %x, %y +; CHECK-NEXT: %tmp3 = fadd float %tmp1, %tmp2 + + %tmp1 = fadd float %x, %y + %tmp2 = fmul float %y, %x + %tmp3 = fadd float %tmp1, %tmp2 + ret float %tmp3 +} + +; Commute integer vector operations. +define <2 x i32> @test3(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @test3 +; CHECK-NEXT: %tmp1 = add <2 x i32> %y, %x +; CHECK-NEXT: %tmp2 = mul <2 x i32> %y, %x +; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2 + + %tmp1 = add <2 x i32> %x, %y + %tmp2 = mul <2 x i32> %y, %x + %tmp3 = add <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +; Commute float vector operations. +define <2 x float> @test4(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @test4 +; CHECK-NEXT: %tmp1 = fadd <2 x float> %x, %y +; CHECK-NEXT: %tmp2 = fmul <2 x float> %x, %y +; CHECK-NEXT: %tmp3 = fadd <2 x float> %tmp1, %tmp2 + + %tmp1 = fadd <2 x float> %x, %y + %tmp2 = fmul <2 x float> %y, %x + %tmp3 = fadd <2 x float> %tmp1, %tmp2 + ret <2 x float> %tmp3 +} Index: test/Transforms/Reassociate/fast-AgressiveSubMove.ll =================================================================== --- test/Transforms/Reassociate/fast-AgressiveSubMove.ll +++ test/Transforms/Reassociate/fast-AgressiveSubMove.ll @@ -2,8 +2,8 @@ define float @test1(float %A) { ; CHECK-LABEL: test1 -; CHECK-NEXT: %X = fadd float 1.000000e+00, %A -; CHECK-NEXT: %Y = fadd float 1.000000e+00, %A +; CHECK-NEXT: %X = fadd float %A, 1.000000e+00 +; CHECK-NEXT: %Y = fadd float %A, 1.000000e+00 ; CHECK-NEXT: %r = fsub float %X, %Y ; CHECK-NEXT: ret float %r @@ -17,8 +17,8 @@ ; CHECK-LABEL: test2 ; CHECK-NEXT: ret float 0.000000e+00 - %X = fadd fast float 1.000000e+00, %A - %Y = fadd fast float 1.000000e+00, %A + %X = fadd fast float %A, 1.000000e+00 + %Y = fadd fast float %A, 1.000000e+00 %r = fsub fast float %X, %Y ret float %r } Index: test/Transforms/Reassociate/fast-ReassociateVector.ll =================================================================== --- test/Transforms/Reassociate/fast-ReassociateVector.ll +++ test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -1,22 +1,19 @@ -; RUN: opt < %s -reassociate -S | FileCheck %s +; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s -; Don't handle floating point vector operations. define <4 x float> @test1() { ; CHECK-LABEL: test1 -; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer -; CHECK-NEXT: %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1 +; CHECK-NEXT: ret <4 x float> %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1 ret <4 x float> %tmp2 } -; We don't currently commute integer vector operations. define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: test2 -; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y -; CHECK-NEXT: %tmp2 = add <2 x i32> %y, %x -; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2 +; CHECK-NEXT: add <2 x i32> %x, %y +; CHECK-NEXT: shl <2 x i32> %factor12, +; CHECK-NEXT: ret <2 x i32> %tmp1 = add <2 x i32> %x, %y %tmp2 = add <2 x i32> %y, %x Index: test/Transforms/Reassociate/fast-fp-commute.ll =================================================================== --- test/Transforms/Reassociate/fast-fp-commute.ll +++ test/Transforms/Reassociate/fast-fp-commute.ll @@ -33,7 +33,7 @@ define float @test3(float %x, float %y) { ; CHECK-LABEL: test3 -; CHECK-NEXT: %factor = fmul fast float 2.000000e+00, %y +; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00 ; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x ; CHECK-NEXT: ret float %tmp1 Index: test/Transforms/Reassociate/fp-commute.ll =================================================================== --- test/Transforms/Reassociate/fp-commute.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: opt -reassociate -S < %s | FileCheck %s - -declare void @use(float) - -define void @test1(float %x, float %y) { -; CHECK-LABEL: test1 -; CHECK: fmul float %x, %y -; CHECK: fmul float %x, %y -; CHECK: fsub float %1, %2 -; CHECK: call void @use(float %{{.*}}) -; CHECK: call void @use(float %{{.*}}) - - %1 = fmul float %x, %y - %2 = fmul float %y, %x - %3 = fsub float %1, %2 - call void @use(float %1) - call void @use(float %3) - ret void -} Index: test/Transforms/Reassociate/multistep.ll =================================================================== --- test/Transforms/Reassociate/multistep.ll +++ test/Transforms/Reassociate/multistep.ll @@ -9,8 +9,8 @@ %t3 = mul i64 %a, %t2 ; a*(a*c) %t4 = add i64 %t1, %t3 ; CHECK-NEXT: add i64 %c, %b -; CHECK-NEXT: mul i64 %tmp{{.*}}, %a -; CHECK-NEXT: mul i64 %tmp{{.*}}, %a +; CHECK-NEXT: mul i64 %a, %tmp2 +; CHECK-NEXT: mul i64 %tmp3, %a ; CHECK-NEXT: ret ret i64 %t4 } @@ -23,8 +23,8 @@ %t2 = add i64 %t1, %d ; a*c+d %t3 = add i64 %t0, %t2 ; a*b+(a*c+d) ; CHECK-NEXT: add i64 %c, %b -; CHECK-NEXT: mul i64 %tmp{{.*}}, %a -; CHECK-NEXT: add i64 %tmp{{.*}}, %d +; CHECK-NEXT: mul i64 %tmp, %a +; CHECK-NEXT: add i64 %tmp1, %d ; CHECK-NEXT: ret ret i64 %t3 } Index: test/Transforms/Reassociate/vector-basictests.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/vector-basictests.ll @@ -0,0 +1,395 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s +; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s --check-prefix=R-IC +; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s --check-prefix=R-GVN-IC + +; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). +define <2 x float> @test1(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: @test1 +; CHECK-NEXT: fadd fast <2 x float> %c, %b +; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2 +; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a +; CHECK-NEXT: ret <2 x float> + + %t0 = fmul fast <2 x float> %a, %b + %t1 = fmul fast <2 x float> %a, %t0 + %t2 = fmul fast <2 x float> %a, %c + %t3 = fmul fast <2 x float> %a, %t2 + %t4 = fadd fast <2 x float> %t1, %t3 + ret <2 x float> %t4 +} + +; Check that a*b+a*c+d is turned into a*(b+c)+d. +define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: fadd fast <2 x float> %c, %b +; CHECK-NEXT: fmul fast <2 x float> %tmp, %a +; CHECK-NEXT: fadd fast <2 x float> %tmp1, %d +; CHECK-NEXT: ret <2 x float> + + %t0 = fmul fast <2 x float> %a, %b + %t1 = fmul fast <2 x float> %a, %c + %t2 = fadd fast <2 x float> %t1, %d + %t3 = fadd fast <2 x float> %t0, %t2 + ret <2 x float> %t3 +} + +; No fast-math. +define <2 x float> @test3(<2 x float> %A) { +; CHECK-LABEL: @test3 +; CHECK-NEXT: %X = fadd <2 x float> %A, +; CHECK-NEXT: %Y = fadd <2 x float> %A, +; CHECK-NEXT: %r = fsub <2 x float> %X, %Y +; CHECK-NEXT: ret <2 x float> %r + + %X = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 > + %Y = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 > + %R = fsub <2 x float> %X, %Y + ret <2 x float> %R +} + +; FIXME: (A + 1) - (A + 1) -> 0 +define <2 x float> @test4(<2 x float> %A) { +; CHECK-LABEL: @test4 +; CHECK-NEXT: %A.neg1 = fsub fast <2 x float> , %A +; CHECK-NEXT: %A.neg = fadd fast <2 x float> , %A.neg1 +; CHECK-NEXT: %X = fadd fast <2 x float> %A, zeroinitializer +; CHECK-NEXT: %R = fadd fast <2 x float> %X, %A.neg +; CHECK-NEXT: ret <2 x float> %R + + %X = fadd fast <2 x float> , %A + %Y = fadd fast <2 x float> , %A + %R = fsub fast <2 x float> %X, %Y + ret <2 x float> %R +} + +; Should be able to eliminate everything. +define <2 x float> @test5(<2 x float> %a0, <2 x float> %a1, <2 x float> %a2, <2 x float> %a3, <2 x float> %a4) { +; R-IC-LABEL: @test5 +; R-IC: ret <2 x float> + + %tmp.2 = fadd fast <2 x float> %a4, %a3 + %tmp.4 = fadd fast <2 x float> %tmp.2, %a2 + %tmp.6 = fadd fast <2 x float> %tmp.4, %a1 + %tmp.8 = fadd fast <2 x float> %tmp.6, %a0 + %tmp.11 = fadd fast <2 x float> %a3, %a2 + %tmp.13 = fadd fast <2 x float> %tmp.11, %a1 + %tmp.15 = fadd fast <2 x float> %tmp.13, %a0 + %tmp.18 = fadd fast <2 x float> %a2, %a1 + %tmp.20 = fadd fast <2 x float> %tmp.18, %a0 + %tmp.23 = fadd fast <2 x float> %a1, %a0 + %tmp.26 = fsub fast <2 x float> %tmp.8, %tmp.15 + %tmp.28 = fadd fast <2 x float> %tmp.26, %tmp.20 + %tmp.30 = fsub fast <2 x float> %tmp.28, %tmp.23 + %tmp.32 = fsub fast <2 x float> %tmp.30, %a4 + %tmp.34 = fsub fast <2 x float> %tmp.32, %a2 + %T = fmul fast <2 x float> %tmp.34, %tmp.34 + ret <2 x float> %T +} + +; With reassociation, constant folding can eliminate the 12 and -12 constants. +define <2 x float> @test6(<2 x float> %arg) { +; R-GVN-IC-LABEL: @test6 +; R-GVN-IC-NEXT: fsub fast <2 x float> , %arg +; R-GVN-IC-NEXT: ret <2 x float> + + %tmp1 = fsub fast <2 x float> , %arg + %tmp2 = fadd fast <2 x float> %tmp1, + ret <2 x float> %tmp2 +} + +define <2 x float> @test7(<2 x float> %a, <2 x float> %b) { +; R-GVN-IC-LABEL: @test7 +; R-GVN-IC-NEXT: fadd <2 x float> %a, %b +; R-GVN-IC-NEXT: ret <2 x float> + + %tmp1 = fadd fast <2 x float> %a, + %tmp2 = fadd fast <2 x float> %tmp1, %b + %tmp3 = fadd fast <2 x float> %tmp2, + ret <2 x float> %tmp3 +} + +define <4 x float> @test8() { +; CHECK-LABEL: @test8 +; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer +; CHECK-NEXT: %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1 + + %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer + %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1 + ret <4 x float> %tmp2 +} + +@fe = external global <2 x float> +@fa = external global <2 x float> +@fb = external global <2 x float> +@fc = external global <2 x float> +@ff = external global <2 x float> + +define void @test9() { +; R-GVN-IC-LABEL: @test9 +; R-GVN-IC: fadd fast <2 x float> +; R-GVN-IC: fadd fast <2 x float> +; R-GVN-IC-NOT: fadd fast <2 x float> +; R-GVN-IC: ret void + + %A = load <2 x float>* @fa + %B = load <2 x float>* @fb + %C = load <2 x float>* @fc + %t1 = fadd fast <2 x float> %A, %B + %t2 = fadd fast <2 x float> %t1, %C + %t3 = fadd fast <2 x float> %C, %A + %t4 = fadd fast <2 x float> %t3, %B + ; e = (a+b)+c; + store <2 x float> %t2, <2 x float>* @fe + ; f = (a+c)+b + store <2 x float> %t4, <2 x float>* @ff + ret void +} + +define void @test10() { +; R-GVN-IC-LABEL: @test10 +; R-GVN-IC: fadd fast <2 x float> +; R-GVN-IC: fadd fast <2 x float> +; R-GVN-IC-NOT: fadd +; R-GVN-IC: ret void + + %A = load <2 x float>* @fa + %B = load <2 x float>* @fb + %C = load <2 x float>* @fc + %t1 = fadd fast <2 x float> %A, %B + %t2 = fadd fast <2 x float> %t1, %C + %t3 = fadd fast <2 x float> %C, %A + %t4 = fadd fast <2 x float> %t3, %B + ; e = c+(a+b) + store <2 x float> %t2, <2 x float>* @fe + ; f = (c+a)+b + store <2 x float> %t4, <2 x float>* @ff + ret void +} + +define void @test11() { +; R-GVN-IC-LABEL: @test11 +; R-GVN-IC: fadd fast <2 x float> +; R-GVN-IC: fadd fast <2 x float> +; R-GVN-IC-NOT: fadd +; R-GVN-IC: ret void + + %A = load <2 x float>* @fa + %B = load <2 x float>* @fb + %C = load <2 x float>* @fc + %t1 = fadd fast <2 x float> %B, %A + %t2 = fadd fast <2 x float> %t1, %C + %t3 = fadd fast <2 x float> %C, %A + %t4 = fadd fast <2 x float> %t3, %B + ; e = c+(b+a) + store <2 x float> %t2, <2 x float>* @fe + ; f = (c+a)+b + store <2 x float> %t4, <2 x float>* @ff + ret void +} + +define <2 x float> @test12(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +; R-GVN-IC-LABEL: @test12 +; R-GVN-IC-NEXT: fadd fast <2 x float> %C, %B +; R-GVN-IC-NEXT: fmul fast <2 x float> %A, %A +; R-GVN-IC-NEXT: fmul fast <2 x float> %1, %tmp2 +; R-GVN-IC-NEXT: ret <2 x float> + + %aa = fmul fast <2 x float> %A, %A + %aab = fmul fast <2 x float> %aa, %B + %ac = fmul fast <2 x float> %A, %C + %aac = fmul fast <2 x float> %ac, %A + %r = fadd fast <2 x float> %aab, %aac + ret <2 x float> %r +} + +; (-X)*Y + Z -> Z-X*Y +define <2 x float> @test13(<2 x float> %X, <2 x float> %Y, <2 x float> %Z) { +; R-GVN-IC-LABEL: @test13 +; R-GVN-IC-NEXT: fmul fast <2 x float> %Y, %X +; R-GVN-IC-NEXT: fsub fast <2 x float> %Z +; R-GVN-IC-NEXT: ret <2 x float> + + %A = fsub fast <2 x float> , %X + %B = fmul fast <2 x float> %A, %Y + %C = fadd fast <2 x float> %B, %Z + ret <2 x float> %C +} + +define <2 x float> @test14(<2 x float> %X) { +; R-GVN-IC-LABEL: @test14 +; R-GVN-IC-NEXT: fmul fast <2 x float> %X, +; R-GVN-IC-NEXT: ret <2 x float> + + %Y = fmul fast <2 x float> %X, + %Z = fadd fast <2 x float> %Y, %Y + ret <2 x float> %Z +} + +define <2 x float> @test15(<2 x float> %X) { +; R-GVN-IC-LABEL: @test15 +; R-GVN-IC-NEXT: fmul fast <2 x float> %X, 3.000000e+00 +; R-GVN-IC-NEXT: ret <2 x float> + + %Y = fadd fast <2 x float> %X ,%X + %Z = fadd fast <2 x float> %Y, %X + ret <2 x float> %Z +} + +define <2 x float> @test16(<2 x float> %W) { +; R-GVN-IC-LABEL: @test16 +; R-GVN-IC-NEXT: fmul fast <2 x float> %W, +; R-GVN-IC-NEXT: ret <2 x float> + + %X = fmul fast <2 x float> %W, + %Y = fadd fast <2 x float> %X ,%X + %Z = fadd fast <2 x float> %Y, %X + ret <2 x float> %Z +} + +define <2 x float> @test17(<2 x float> %X) { +; R-GVN-IC-LABEL: @test17 +; R-GVN-IC-NEXT: fmul fast <2 x float> %X, -3.000000e+00 +; R-GVN-IC-NEXT: fadd fast <2 x float> %factor, 6.000000e+00 +; R-GVN-IC-NEXT: ret <2 x float> + + %A = fsub fast <2 x float> , %X + %B = fsub fast <2 x float> , %X + %C = fsub fast <2 x float> , %X + %Y = fadd fast <2 x float> %A ,%B + %Z = fadd fast <2 x float> %Y, %C + ret <2 x float> %Z +} + +; -X1*X2 + X1*X3 -> X1*(X3-X2) +define <2 x float> @test18(<2 x float> %X1, <2 x float> %X2, <2 x float> %X3) { +; R-GVN-IC-LABEL: @test18 +; R-GVN-IC-NEXT: fsub fast <2 x float> %X3, %X2 +; R-GVN-IC-NEXT: fmul fast <2 x float> {{.*}}, %X1 +; R-GVN-IC-NEXT: ret <2 x float> + + %A = fsub fast <2 x float> , %X1 + %B = fmul fast <2 x float> %A, %X2 + %C = fmul fast <2 x float> %X1, %X3 + %D = fadd fast <2 x float> %B, %C + ret <2 x float> %D +} + +; X1*47 + X2*-47 -> 47*(X1-X2) +define <2 x float> @test19(<2 x float> %X1, <2 x float> %X2) { +; R-GVN-IC-LABEL: @test19 +; R-GVN-IC-NEXT: fsub fast <2 x float> %X1, %X2 +; R-GVN-IC-NEXT: fmul fast <2 x float> %tmp, +; R-GVN-IC-NEXT: ret <2 x float> + + %B = fmul fast <2 x float> %X1, + %C = fmul fast <2 x float> %X2, + %D = fadd fast <2 x float> %B, %C + ret <2 x float> %D +} + +define <2 x float> @test20(<2 x float> %arg) { +; R-GVN-IC-LABEL: @test20 +; R-GVN-IC-NEXT: fmul fast <2 x float> %arg, +; R-GVN-IC-NEXT: ret <2 x float> %tmp2 + + %tmp1 = fmul fast <2 x float> , %arg + %tmp2 = fmul fast <2 x float> %tmp1, + ret <2 x float> %tmp2 +} + +; (b+(a+1234))+-a -> b+1234 +define <2 x float> @test21(<2 x float> %b, <2 x float> %a) { +; R-GVN-IC-LABEL: @test21 +; R-GVN-IC-NEXT: fadd fast <2 x float> %b, +; R-GVN-IC-NEXT: ret <2 x float> + + %1 = fadd fast <2 x float> %a, + %2 = fadd fast <2 x float> %b, %1 + %3 = fsub fast <2 x float> , %a + %4 = fadd fast <2 x float> %2, %3 + ret <2 x float> %4 +} + +; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z. +define <2 x float> @test22(<2 x float> %a, <2 x float> %b, <2 x float> %z) { +; R-GVN-IC-LABEL: @test22 +; R-GVN-IC-NEXT: fmul fast <2 x float> %a, +; R-GVN-IC-NEXT: fmul fast <2 x float> %e, %b +; R-GVN-IC-NEXT: fmul fast <2 x float> %f, %z +; R-GVN-IC-NEXT: ret <2 x float> + + %c = fsub fast <2 x float> , %z + %d = fmul fast <2 x float> %a, %b + %e = fmul fast <2 x float> %c, %d + %f = fmul fast <2 x float> %e, + %g = fsub fast <2 x float> , %f + ret <2 x float> %g +} + +define <2 x float> @test23(<2 x float> %a, <2 x float> %b, <2 x float> %z) { +; R-GVN-IC-LABEL: @test23 +; R-GVN-IC-NEXT: fmul fast <2 x float> %a, 4.000000e+01 +; R-GVN-IC-NEXT: fmul fast <2 x float> %e, %z +; R-GVN-IC-NEXT: ret <2 x float> + + %d = fmul fast <2 x float> %z, + %c = fsub fast <2 x float> , %d + %e = fmul fast <2 x float> %a, %c + %f = fsub fast <2 x float> , %e + ret <2 x float> %f +} + +; With sub reassociation, constant folding can eliminate the 12 and -12 constants. +define <2 x float> @test24(<2 x float> %A, <2 x float> %B) { +; R-GVN-IC-LABEL: @test24 +; R-GVN-IC-NEXT: fsub fast <2 x float> %A, %B +; R-GVN-IC-NEXT: ret <2 x float> + + %X = fadd fast <2 x float> , %A + %Y = fsub fast <2 x float> %X, %B + %Z = fadd fast <2 x float> %Y, + ret <2 x float> %Z +} + +; With sub reassociation, constant folding can eliminate the uses of %a. +define <2 x float> @test25(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; R-GVN-IC-LABEL: @test25 +; R-GVN-IC-NEXT: fsub fast <2 x float> , %b +; R-GVN-IC-NEXT: fsub fast <2 x float> %b.neg, %c +; R-GVN-IC-NEXT: ret <2 x float> + +; FIXME: Should be able to generate the below, which may expose more +; opportunites for FAdd reassociation. +; %sum = fadd fast <2 x float> %c, %b +; %tmp7 = fsub fast <2 x float> 0, %sum + + %tmp3 = fsub fast <2 x float> %a, %b + %tmp5 = fsub fast <2 x float> %tmp3, %c + %tmp7 = fsub fast <2 x float> %tmp5, %a + ret <2 x float> %tmp7 +} + +define <2 x float> @test26(<2 x float> %x, <2 x float> %y) { +; R-GVN-IC-LABEL: @test26 +; R-GVN-IC-NEXT: %factor = fmul fast <2 x float> , %y +; R-GVN-IC-NEXT: %tmp1 = fmul fast <2 x float> %factor, %x +; R-GVN-IC-NEXT: ret <2 x float> %tmp1 + + %1 = fmul fast <2 x float> %x, %y + %2 = fmul fast <2 x float> %y, %x + %3 = fadd fast <2 x float> %1, %2 + ret <2 x float> %3 +} + +define <2 x float> @test27(<2 x float> %A, <2 x float> %B) { +; R-IC-LABEL: @test27 +; R-IC: %Z = fadd fast <2 x float> %A, %B +; R-IC: ret <2 x float> %Z + + %W = fadd fast <2 x float> %B, + %Y = fadd fast <2 x float> %A, + %Z = fadd fast <2 x float> %W, %Y + ret <2 x float> %Z +} + +