Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -239,6 +239,14 @@ return cast(V); return nullptr; } +static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, + unsigned Opcode2) { + if (V->hasOneUse() && isa(V) && + (cast(V)->getOpcode() == Opcode1 || + cast(V)->getOpcode() == Opcode2)) + return cast(V); + return nullptr; +} static bool isUnmovableInstruction(Instruction *I) { switch (I->getOpcode()) { @@ -304,8 +312,10 @@ // If this is a not or neg instruction, do not count it for rank. This // assures us that X and ~X will have the same rank. - if (!I->getType()->isIntegerTy() || - (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) + Type *Ty = V->getType(); + if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || + (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && + !BinaryOperator::isFNeg(I))) ++Rank; //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " @@ -314,14 +324,54 @@ return ValueRankMap[I] = Rank; } +static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, + Instruction *InsertBefore) { + if (S1->getType()->isIntegerTy()) + return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore); + else { + BinaryOperator *Res = + BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore); + FastMathFlags Flags = + cast(InsertBefore)->getFastMathFlags(); + Res->setFastMathFlags(Flags); + return Res; + } +} + +static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, + Instruction *InsertBefore) { + if (S1->getType()->isIntegerTy()) + return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore); + else { + BinaryOperator *Res = + BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore); + FastMathFlags Flags = + cast(InsertBefore)->getFastMathFlags(); + Res->setFastMathFlags(Flags); + return Res; + } +} + +static BinaryOperator *CreateNeg(Value *S1, const Twine &Name, + Instruction *InsertBefore, Value *FlagsOp) { + if (S1->getType()->isIntegerTy()) + return BinaryOperator::CreateNeg(S1, Name, InsertBefore); + else { + BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore); + Res->setFastMathFlags(cast(FlagsOp)->getFastMathFlags()); + return Res; + } +} + /// LowerNegateToMultiply - Replace 0-X with X*-1. /// static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { - Constant *Cst = Constant::getAllOnesValue(Neg->getType()); + Type *Ty = Neg->getType(); + Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty) + : ConstantFP::get(Ty, -1.0); - BinaryOperator *Res = - BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); - Neg->setOperand(1, Constant::getNullValue(Neg->getType())); // Drop use of op. + BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg); + Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op. Res->takeName(Neg); Neg->replaceAllUsesWith(Res); Res->setDebugLoc(Neg->getDebugLoc()); @@ -377,13 +427,14 @@ LHS = 0; // 1 + 1 === 0 modulo 2. return; } - if (Opcode == Instruction::Add) { + if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) { // TODO: Reduce the weight by exploiting nsw/nuw? LHS += RHS; return; } - assert(Opcode == Instruction::Mul && "Unknown associative operation!"); + assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) && + "Unknown associative operation!"); unsigned Bitwidth = LHS.getBitWidth(); // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth @@ -499,8 +550,7 @@ DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits(); unsigned Opcode = I->getOpcode(); - assert(Instruction::isAssociative(Opcode) && - Instruction::isCommutative(Opcode) && + assert(I->isAssociative() && I->isCommutative() && "Expected an associative and commutative operation!"); // Visit all operands of the expression, keeping track of their weight (the @@ -619,15 +669,16 @@ // If this is a multiply expression, turn any internal negations into // multiplies by -1 so they can be reassociated. - BinaryOperator *BO = dyn_cast(Op); - if (Opcode == Instruction::Mul && BO && BinaryOperator::isNeg(BO)) { - DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); - BO = LowerNegateToMultiply(BO); - DEBUG(dbgs() << *BO << 'n'); - Worklist.push_back(std::make_pair(BO, Weight)); - MadeChange = true; - continue; - } + if (BinaryOperator *BO = dyn_cast(Op)) + if ((Opcode == Instruction::Mul && BinaryOperator::isNeg(BO)) || + (Opcode == Instruction::FMul && BinaryOperator::isFNeg(BO))) { + DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); + BO = LowerNegateToMultiply(BO); + DEBUG(dbgs() << *BO << '\n'); + Worklist.push_back(std::make_pair(BO, Weight)); + MadeChange = true; + continue; + } // Failed to morph into an expression of the right type. This really is // a leaf. @@ -722,6 +773,11 @@ break; if (NewLHS == OldRHS && NewRHS == OldLHS) { + // FIXME: Don't reverse ordering of FP instructions to maintain original + // functionallity. Need to investigate impact on other passes + // (e.g., commuting operands changed SLP costs). + if (isa(I)) + break; // The order of the operands was reversed. Swap them. DEBUG(dbgs() << "RA: " << *Op << '\n'); Op->swapOperands(); @@ -798,6 +854,8 @@ Constant *Undef = UndefValue::get(I->getType()); NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), Undef, Undef, "", I); + if (NewOp->getType()->isFloatingPointTy()) + NewOp->setFastMathFlags(I->getFastMathFlags()); } else { NewOp = NodesToRewrite.pop_back_val(); } @@ -817,7 +875,14 @@ // expression tree is dominated by all of Ops. if (ExpressionChanged) do { - ExpressionChanged->clearSubclassOptionalData(); + // Preserve FastMathFlags. + if (isa(I)) { + FastMathFlags Flags = I->getFastMathFlags(); + ExpressionChanged->clearSubclassOptionalData(); + ExpressionChanged->setFastMathFlags(Flags); + } else + ExpressionChanged->clearSubclassOptionalData(); + if (ExpressionChanged == I) break; ExpressionChanged->moveBefore(I); @@ -834,6 +899,8 @@ /// version of the value is returned, and BI is left pointing at the instruction /// that should be processed next by the reassociation pass. static Value *NegateValue(Value *V, Instruction *BI) { + if (ConstantFP *C = dyn_cast(V)) + return ConstantExpr::getFNeg(C); if (Constant *C = dyn_cast(V)) return ConstantExpr::getNeg(C); @@ -846,7 +913,8 @@ // the constants. We assume that instcombine will clean up the mess later if // we introduce tons of unnecessary negation instructions. // - if (BinaryOperator *I = isReassociableOp(V, Instruction::Add)) { + if (BinaryOperator *I = + isReassociableOp(V, Instruction::Add, Instruction::FAdd)) { // Push the negates through the add. I->setOperand(0, NegateValue(I->getOperand(0), BI)); I->setOperand(1, NegateValue(I->getOperand(1), BI)); @@ -864,7 +932,8 @@ // Okay, we need to materialize a negated version of V with an instruction. // Scan the use lists of V to see if we have one already. for (User *U : V->users()) { - if (!BinaryOperator::isNeg(U)) continue; + if (!BinaryOperator::isNeg(U) && !BinaryOperator::isFNeg(U)) + continue; // We found one! Now we have to make sure that the definition dominates // this use. We do this by moving it to the entry block (if it is a @@ -894,27 +963,30 @@ // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. - return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); + return CreateNeg(V, V->getName() + ".neg", BI, BI); } /// ShouldBreakUpSubtract - Return true if we should break up this subtract of /// X-Y into (X + -Y). static bool ShouldBreakUpSubtract(Instruction *Sub) { // If this is a negation, we can't split it up! - if (BinaryOperator::isNeg(Sub)) + if (BinaryOperator::isNeg(Sub) || BinaryOperator::isFNeg(Sub)) return false; // Don't bother to break this up unless either the LHS is an associable add or // subtract or if this is only used by one. - if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || - isReassociableOp(Sub->getOperand(0), Instruction::Sub)) + Value *V0 = Sub->getOperand(0); + if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) || + isReassociableOp(V0, Instruction::Sub, Instruction::FSub)) return true; - if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || - isReassociableOp(Sub->getOperand(1), Instruction::Sub)) + Value *V1 = Sub->getOperand(1); + if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) || + isReassociableOp(V1, Instruction::Sub, Instruction::FSub)) return true; + Value *VB = Sub->user_back(); if (Sub->hasOneUse() && - (isReassociableOp(Sub->user_back(), Instruction::Add) || - isReassociableOp(Sub->user_back(), Instruction::Sub))) + (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) || + isReassociableOp(VB, Instruction::Sub, Instruction::FSub))) return true; return false; @@ -931,8 +1003,7 @@ // and set it as the RHS of the add instruction we just made. // Value *NegVal = NegateValue(Sub->getOperand(1), Sub); - BinaryOperator *New = - BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); + BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub); Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op. New->takeName(Sub); @@ -988,15 +1059,16 @@ Value *V1 = Ops.back(); Ops.pop_back(); Value *V2 = EmitAddTreeOfValues(I, Ops); - return BinaryOperator::CreateAdd(V2, V1, "tmp", I); + return CreateAdd(V2, V1, "tmp", I); } /// RemoveFactorFromExpression - If V is an expression tree that is a /// multiplication sequence, and if this sequence contains a multiply by Factor, /// remove Factor from the tree and return the new tree. Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { - BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); - if (!BO) return nullptr; + BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul); + if (!BO) + return nullptr; SmallVector Tree; MadeChange |= LinearizeExprTree(BO, Tree); @@ -1018,13 +1090,25 @@ } // If this is a negative version of this factor, remove it. - if (ConstantInt *FC1 = dyn_cast(Factor)) + if (ConstantInt *FC1 = dyn_cast(Factor)) { if (ConstantInt *FC2 = dyn_cast(Factors[i].Op)) if (FC1->getValue() == -FC2->getValue()) { FoundFactor = NeedsNegate = true; Factors.erase(Factors.begin()+i); break; } + } else if (ConstantFP *FC1 = dyn_cast(Factor)) { + if (ConstantFP *FC2 = dyn_cast(Factors[i].Op)) { + APFloat F1(FC1->getValueAPF()); + APFloat F2(FC2->getValueAPF()); + F2.changeSign(); + if (F1.compare(F2) == APFloat::cmpEqual) { + FoundFactor = NeedsNegate = true; + Factors.erase(Factors.begin() + i); + break; + } + } + } } if (!FoundFactor) { @@ -1046,7 +1130,7 @@ } if (NeedsNegate) - V = BinaryOperator::CreateNeg(V, "neg", InsertPt); + V = CreateNeg(V, "neg", InsertPt, BO); return V; } @@ -1058,7 +1142,7 @@ static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl &Factors, const SmallVectorImpl &Ops) { - BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); + BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul); if (!BO) { Factors.push_back(V); return; @@ -1389,13 +1473,15 @@ ++NumFactor; // Insert a new multiply. - Value *Mul = ConstantInt::get(cast(I->getType()), NumFound); - Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); + Type *Ty = TheOp->getType(); + Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound) + : ConstantFP::get(Ty, NumFound); + Instruction *Mul = CreateMul(TheOp, C, "factor", I); // Now that we have inserted a multiply, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 - RedoInsts.insert(cast(Mul)); + RedoInsts.insert(Mul); // If every add operand was a duplicate, return the multiply. if (Ops.empty()) @@ -1412,11 +1498,12 @@ } // Check for X and -X or X and ~X in the operand list. - if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isNot(TheOp)) + if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isFNeg(TheOp) && + !BinaryOperator::isNot(TheOp)) continue; Value *X = nullptr; - if (BinaryOperator::isNeg(TheOp)) + if (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp)) X = BinaryOperator::getNegArgument(TheOp); else if (BinaryOperator::isNot(TheOp)) X = BinaryOperator::getNotArgument(TheOp); @@ -1426,7 +1513,8 @@ continue; // Remove X and -X from the operand list. - if (Ops.size() == 2 && BinaryOperator::isNeg(TheOp)) + if (Ops.size() == 2 && + (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp))) return Constant::getNullValue(X->getType()); // Remove X and ~X from the operand list. @@ -1463,7 +1551,8 @@ unsigned MaxOcc = 0; Value *MaxOccVal = nullptr; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); + BinaryOperator *BOp = + isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); if (!BOp) continue; @@ -1476,23 +1565,43 @@ SmallPtrSet Duplicates; for (unsigned i = 0, e = Factors.size(); i != e; ++i) { Value *Factor = Factors[i]; - if (!Duplicates.insert(Factor)) continue; + if (!Duplicates.insert(Factor)) + continue; unsigned Occ = ++FactorOccurrences[Factor]; - if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } + if (Occ > MaxOcc) { + MaxOcc = Occ; + MaxOccVal = Factor; + } // If Factor is a negative constant, add the negated value as a factor // because we can percolate the negate out. Watch for minint, which // cannot be positivified. - if (ConstantInt *CI = dyn_cast(Factor)) + if (ConstantInt *CI = dyn_cast(Factor)) { if (CI->isNegative() && !CI->isMinValue(true)) { Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); assert(!Duplicates.count(Factor) && "Shouldn't have two constant factors, missed a canonicalize"); - unsigned Occ = ++FactorOccurrences[Factor]; - if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } + if (Occ > MaxOcc) { + MaxOcc = Occ; + MaxOccVal = Factor; + } } + } else if (ConstantFP *CF = dyn_cast(Factor)) { + if (CF->isNegative()) { + APFloat F(CF->getValueAPF()); + F.changeSign(); + Factor = ConstantFP::get(CF->getContext(), F); + assert(!Duplicates.count(Factor) && + "Shouldn't have two constant factors, missed a canonicalize"); + unsigned Occ = ++FactorOccurrences[Factor]; + if (Occ > MaxOcc) { + MaxOcc = Occ; + MaxOccVal = Factor; + } + } + } } } @@ -1505,11 +1614,16 @@ // this, we could otherwise run into situations where removing a factor // from an expression will drop a use of maxocc, and this can cause // RemoveFactorFromExpression on successive values to behave differently. - Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); + Instruction *DummyInst = + I->getType()->isIntegerTy() + ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) + : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); + SmallVector NewMulOps; for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. - BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); + BinaryOperator *BOp = + isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); if (!BOp) continue; @@ -1542,7 +1656,7 @@ RedoInsts.insert(VI); // Create the multiply. - Instruction *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); + Instruction *V2 = CreateMul(V, MaxOccVal, "tmp", I); // Rerun associate on the multiply in case the inner expression turned into // a multiply. We want to make sure that we keep things in canonical form. @@ -1632,7 +1746,10 @@ Value *LHS = Ops.pop_back_val(); do { - LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); + if (LHS->getType()->isIntegerTy()) + LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); + else + LHS = Builder.CreateFMul(LHS, Ops.pop_back_val()); } while (!Ops.empty()); return LHS; @@ -1765,11 +1882,13 @@ break; case Instruction::Add: + case Instruction::FAdd: if (Value *Result = OptimizeAdd(I, Ops)) return Result; break; case Instruction::Mul: + case Instruction::FMul: if (Value *Result = OptimizeMul(I, Ops)) return Result; break; @@ -1810,8 +1929,7 @@ if (!isa(I)) return; - if (I->getOpcode() == Instruction::Shl && - isa(I->getOperand(1))) + if (I->getOpcode() == Instruction::Shl && isa(I->getOperand(1))) // If an operand of this shift is a reassociable multiply, or if the shift // is used by a reassociable multiply or add, turn into a multiply. if (isReassociableOp(I->getOperand(0), Instruction::Mul) || @@ -1824,28 +1942,30 @@ I = NI; } - // Floating point binary operators are not associative, but we can still - // commute (some) of them, to canonicalize the order of their operands. - // This can potentially expose more CSE opportunities, and makes writing - // other transformations simpler. - if ((I->getType()->isFloatingPointTy() || I->getType()->isVectorTy())) { - // FAdd and FMul can be commuted. - if (I->getOpcode() != Instruction::FMul && - I->getOpcode() != Instruction::FAdd) - return; + // Commute floating point binary operators, to canonicalize the order of their + // operands. This can potentially expose more CSE opportunities, and makes + // writing other transformations simpler. + if (I->getType()->isFloatingPointTy() || I->getType()->isVectorTy()) { - Value *LHS = I->getOperand(0); - Value *RHS = I->getOperand(1); - unsigned LHSRank = getRank(LHS); - unsigned RHSRank = getRank(RHS); - - // Sort the operands by rank. - if (RHSRank < LHSRank) { - I->setOperand(0, RHS); - I->setOperand(1, LHS); + // FAdd and FMul can be commuted. + if (I->getOpcode() == Instruction::FMul || + I->getOpcode() == Instruction::FAdd) { + Value *LHS = I->getOperand(0); + Value *RHS = I->getOperand(1); + unsigned LHSRank = getRank(LHS); + unsigned RHSRank = getRank(RHS); + + // Sort the operands by rank. + if (RHSRank < LHSRank) { + I->setOperand(0, RHS); + I->setOperand(1, LHS); + } } - return; + // Don't try to optimize vector instructions or anything that doesn't have + // unsafe algebra. + if (I->getType()->isVectorTy() || !I->hasUnsafeAlgebra()) + return; } // Do not reassociate boolean (i1) expressions. We want to preserve the @@ -1877,6 +1997,24 @@ I = NI; } } + } else if (I->getOpcode() == Instruction::FSub) { + if (ShouldBreakUpSubtract(I)) { + Instruction *NI = BreakUpSubtract(I); + RedoInsts.insert(I); + MadeChange = true; + I = NI; + } else if (BinaryOperator::isFNeg(I)) { + // Otherwise, this is a negation. See if the operand is a multiply tree + // and if this is not an inner node of a multiply tree. + if (isReassociableOp(I->getOperand(1), Instruction::FMul) && + (!I->hasOneUse() || + !isReassociableOp(I->user_back(), Instruction::FMul))) { + Instruction *NI = LowerNegateToMultiply(I); + RedoInsts.insert(I); + MadeChange = true; + I = NI; + } + } } // If this instruction is an associative binary operator, process it. @@ -1894,11 +2032,16 @@ if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add && cast(BO->user_back())->getOpcode() == Instruction::Sub) return; + if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd && + cast(BO->user_back())->getOpcode() == Instruction::FSub) + return; ReassociateExpression(BO); } void Reassociate::ReassociateExpression(BinaryOperator *I) { + assert(!I->getType()->isVectorTy() && + "Reassociation of vector instructions is not supported."); // First, walk the expression tree, linearizing the tree, collecting the // operand information. @@ -1943,12 +2086,21 @@ // this is a multiply tree used only by an add, and the immediate is a -1. // In this case we reassociate to put the negation on the outside so that we // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y - if (I->getOpcode() == Instruction::Mul && I->hasOneUse() && - cast(I->user_back())->getOpcode() == Instruction::Add && - isa(Ops.back().Op) && - cast(Ops.back().Op)->isAllOnesValue()) { - ValueEntry Tmp = Ops.pop_back_val(); - Ops.insert(Ops.begin(), Tmp); + if (I->hasOneUse()) { + if (I->getOpcode() == Instruction::Mul && + cast(I->user_back())->getOpcode() == Instruction::Add && + isa(Ops.back().Op) && + cast(Ops.back().Op)->isAllOnesValue()) { + ValueEntry Tmp = Ops.pop_back_val(); + Ops.insert(Ops.begin(), Tmp); + } else if (I->getOpcode() == Instruction::FMul && + cast(I->user_back())->getOpcode() == + Instruction::FAdd && + isa(Ops.back().Op) && + cast(Ops.back().Op)->isExactlyValue(-1.0)) { + ValueEntry Tmp = Ops.pop_back_val(); + Ops.insert(Ops.begin(), Tmp); + } } DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); Index: test/Transforms/Reassociate/fast-AgressiveSubMove.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-AgressiveSubMove.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s + +define float @test1(float %A) { +; CHECK-LABEL: test1 +; CHECK-NEXT: %X = fadd float 1.000000e+00, %A +; CHECK-NEXT: %Y = fadd float 1.000000e+00, %A +; CHECK-NEXT: %r = fsub float %X, %Y +; CHECK-NEXT: ret float %r + + %X = fadd float %A, 1.000000e+00 + %Y = fadd float %A, 1.000000e+00 + %r = fsub float %X, %Y + ret float %r +} + +define float @test2(float %A) { +; CHECK-LABEL: test2 +; CHECK-NEXT: ret float 0.000000e+00 + + %X = fadd fast float 1.000000e+00, %A + %Y = fadd fast float 1.000000e+00, %A + %r = fsub fast float %X, %Y + ret float %r +} Index: test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s + +; Not marked as fast, so must not change. +define float @test1(float %a0, float %a1, float %a2, float %a3, float %a4) { +; CHECK-LABEL: test1 +; CHECK-NEXT: %tmp.2 = fadd float %a3, %a4 +; CHECK-NEXT: %tmp.4 = fadd float %tmp.2, %a2 +; CHECK-NEXT: %tmp.6 = fadd float %tmp.4, %a1 +; CHECK-NEXT: %tmp.8 = fadd float %tmp.6, %a0 +; CHECK-NEXT: %tmp.11 = fadd float %a2, %a3 +; CHECK-NEXT: %tmp.13 = fadd float %tmp.11, %a1 +; CHECK-NEXT: %tmp.15 = fadd float %tmp.13, %a0 +; CHECK-NEXT: %tmp.18 = fadd float %a1, %a2 +; CHECK-NEXT: %tmp.20 = fadd float %tmp.18, %a0 +; CHECK-NEXT: %tmp.23 = fadd float %a0, %a1 +; CHECK-NEXT: %tmp.26 = fsub float %tmp.8, %tmp.15 +; CHECK-NEXT: %tmp.28 = fadd float %tmp.20, %tmp.26 +; CHECK-NEXT: %tmp.30 = fsub float %tmp.28, %tmp.23 +; CHECK-NEXT: %tmp.32 = fsub float %tmp.30, %a4 +; CHECK-NEXT: %tmp.34 = fsub float %tmp.32, %a2 +; CHECK-NEXT: %T = fmul float %tmp.34, %tmp.34 +; CHECK-NEXT: ret float %T + + %tmp.2 = fadd float %a4, %a3 + %tmp.4 = fadd float %tmp.2, %a2 + %tmp.6 = fadd float %tmp.4, %a1 + %tmp.8 = fadd float %tmp.6, %a0 + %tmp.11 = fadd float %a3, %a2 + %tmp.13 = fadd float %tmp.11, %a1 + %tmp.15 = fadd float %tmp.13, %a0 + %tmp.18 = fadd float %a2, %a1 + %tmp.20 = fadd float %tmp.18, %a0 + %tmp.23 = fadd float %a1, %a0 + %tmp.26 = fsub float %tmp.8, %tmp.15 + %tmp.28 = fadd float %tmp.26, %tmp.20 + %tmp.30 = fsub float %tmp.28, %tmp.23 + %tmp.32 = fsub float %tmp.30, %a4 + %tmp.34 = fsub float %tmp.32, %a2 + %T = fmul float %tmp.34, %tmp.34 + ret float %T +} + +; Should be able to eliminate everything. +define float @test2(float %a0, float %a1, float %a2, float %a3, float %a4) { +; CHECK-LABEL: test2 +; CHECK: ret float 0.000000e+00 + + %tmp.2 = fadd fast float %a4, %a3 + %tmp.4 = fadd fast float %tmp.2, %a2 + %tmp.6 = fadd fast float %tmp.4, %a1 + %tmp.8 = fadd fast float %tmp.6, %a0 + %tmp.11 = fadd fast float %a3, %a2 + %tmp.13 = fadd fast float %tmp.11, %a1 + %tmp.15 = fadd fast float %tmp.13, %a0 + %tmp.18 = fadd fast float %a2, %a1 + %tmp.20 = fadd fast float %tmp.18, %a0 + %tmp.23 = fadd fast float %a1, %a0 + %tmp.26 = fsub fast float %tmp.8, %tmp.15 + %tmp.28 = fadd fast float %tmp.26, %tmp.20 + %tmp.30 = fsub fast float %tmp.28, %tmp.23 + %tmp.32 = fsub fast float %tmp.30, %a4 + %tmp.34 = fsub fast float %tmp.32, %a2 + %T = fmul fast float %tmp.34, %tmp.34 + ret float %T +} Index: test/Transforms/Reassociate/fast-MissedTree.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-MissedTree.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s + +define float @test1(float %A, float %B) { +; CHECK-LABEL: test1 +; CHECK: %Z = fadd fast float %A, %B +; CHECK: ret float %Z + %W = fadd fast float %B, -5.0 + %Y = fadd fast float %A, 5.0 + %Z = fadd fast float %W, %Y + ret float %Z +} Index: test/Transforms/Reassociate/fast-ReassociateVector.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s + +; Don't handle floating point vector operations. +define <4 x float> @test1() { +; CHECK-LABEL: test1 +; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer +; CHECK-NEXT: %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1 + + %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer + %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1 + ret <4 x float> %tmp2 +} + +; We don't currently commute integer vector operations. +define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: test2 +; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y +; CHECK-NEXT: %tmp2 = add <2 x i32> %y, %x +; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2 + + %tmp1 = add <2 x i32> %x, %y + %tmp2 = add <2 x i32> %y, %x + %tmp3 = add <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} Index: test/Transforms/Reassociate/fast-SubReassociate.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-SubReassociate.ll @@ -0,0 +1,70 @@ +; RUN: opt < %s -reassociate -constprop -instcombine -S | FileCheck %s + +define float @test1(float %A, float %B) { +; CHECK-LABEL: test1 +; CHECK-NEXT: %W = fadd float %B, 5.000000e+00 +; CHECK-NEXT: %X = fadd float %A, -7.000000e+00 +; CHECK-NEXT: %Y = fsub float %X, %W +; CHECK-NEXT: %Z = fadd float %Y, 1.200000e+01 +; CHECK-NEXT: ret float %Z + + %W = fadd float 5.0, %B + %X = fadd float -7.0, %A + %Y = fsub float %X, %W + %Z = fadd float %Y, 12.0 + ret float %Z +} + +; With sub reassociation, constant folding can eliminate all of the constants. +define float @test2(float %A, float %B) { +; CHECK-LABEL: test2 +; CHECK-NEXT: %Z = fsub fast float %A, %B +; CHECK-NEXT: ret float %Z + + %W = fadd fast float %B, 5.000000e+00 + %X = fadd fast float %A, -7.000000e+00 + %Y = fsub fast float %X, %W + %Z = fadd fast float %Y, 1.200000e+01 + ret float %Z + +} + +define float @test3(float %A, float %B, float %C, float %D) { +; CHECK-LABEL: test3 +; CHECK-NEXT: %M = fadd float %A, 1.200000e+01 +; CHECK-NEXT: %N = fadd float %M, %B +; CHECK-NEXT: %O = fadd float %N, %C +; CHECK-NEXT: %P = fsub float %D, %O +; CHECK-NEXT: %Q = fadd float %P, 1.200000e+01 +; CHECK-NEXT: ret float %Q + + %M = fadd float %A, 1.200000e+01 + %N = fadd float %M, %B + %O = fadd float %N, %C + %P = fsub float %D, %O + %Q = fadd float %P, 1.200000e+01 + ret float %Q +} + +; With sub reassociation, constant folding can eliminate the two 12 constants. +define float @test4(float %A, float %B, float %C, float %D) { +; CHECK-LABEL: test4 +; CHECK-NEXT: %B.neg = fsub fast float -0.000000e+00, %B +; CHECK-NEXT: %O.neg = fsub fast float %B.neg, %A +; CHECK-NEXT: %P = fsub fast float %O.neg, %C +; CHECK-NEXT: %Q = fadd fast float %P, %D +; CHECK-NEXT: ret float %Q + +; FIXME: InstCombine should be able to get us to the following: +; %sum = fadd fast float %B, %A +; %sum1 = fadd fast float %sum, %C +; %Q = fsub fast float %D, %sum1 +; ret i32 %Q + + %M = fadd fast float 1.200000e+01, %A + %N = fadd fast float %M, %B + %O = fadd fast float %N, %C + %P = fsub fast float %D, %O + %Q = fadd fast float 1.200000e+01, %P + ret float %Q +} Index: test/Transforms/Reassociate/fast-basictest.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-basictest.ll @@ -0,0 +1,285 @@ +; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s + +; With reassociation, constant folding can eliminate the 12 and -12 constants. +define float @test1(float %arg) { +; CHECK-LABEL: @test1 +; CHECK-NEXT: fsub fast float -0.000000e+00, %arg +; CHECK-NEXT: ret float + + %tmp1 = fsub fast float -1.200000e+01, %arg + %tmp2 = fadd fast float %tmp1, 1.200000e+01 + ret float %tmp2 +} + +define float @test2(float %reg109, float %reg1111) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: fadd float %reg109, -3.000000e+01 +; CHECK-NEXT: fadd float %reg115, %reg1111 +; CHECK-NEXT: fadd float %reg116, 3.000000e+01 +; CHECK-NEXT: ret float + + %reg115 = fadd float %reg109, -3.000000e+01 + %reg116 = fadd float %reg115, %reg1111 + %reg117 = fadd float %reg116, 3.000000e+01 + ret float %reg117 +} + +define float @test3(float %reg109, float %reg1111) { +; CHECK-LABEL: @test3 +; CHECK-NEXT: %reg117 = fadd fast float %reg109, %reg1111 +; CHECK-NEXT: ret float %reg117 + + %reg115 = fadd fast float %reg109, -3.000000e+01 + %reg116 = fadd fast float %reg115, %reg1111 + %reg117 = fadd fast float %reg116, 3.000000e+01 + ret float %reg117 +} + +@fe = external global float +@fa = external global float +@fb = external global float +@fc = external global float +@ff = external global float + +define void @test4() { +; CHECK-LABEL: @test4 +; CHECK: fadd fast float +; CHECK: fadd fast float +; CHECK-NOT: fadd fast float +; CHECK: ret void + + %A = load float* @fa + %B = load float* @fb + %C = load float* @fc + %t1 = fadd fast float %A, %B + %t2 = fadd fast float %t1, %C + %t3 = fadd fast float %C, %A + %t4 = fadd fast float %t3, %B + ; e = (a+b)+c; + store float %t2, float* @fe + ; f = (a+c)+b + store float %t4, float* @ff + ret void +} + +define void @test5() { +; CHECK-LABEL: @test5 +; CHECK: fadd fast float +; CHECK: fadd fast float +; CHECK-NOT: fadd +; CHECK: ret void + + %A = load float* @fa + %B = load float* @fb + %C = load float* @fc + %t1 = fadd fast float %A, %B + %t2 = fadd fast float %t1, %C + %t3 = fadd fast float %C, %A + %t4 = fadd fast float %t3, %B + ; e = c+(a+b) + store float %t2, float* @fe + ; f = (c+a)+b + store float %t4, float* @ff + ret void +} + +define void @test6() { +; CHECK-LABEL: @test6 +; CHECK: fadd fast float +; CHECK: fadd fast float +; CHECK-NOT: fadd +; CHECK: ret void + + %A = load float* @fa + %B = load float* @fb + %C = load float* @fc + %t1 = fadd fast float %B, %A + %t2 = fadd fast float %t1, %C + %t3 = fadd fast float %C, %A + %t4 = fadd fast float %t3, %B + ; e = c+(b+a) + store float %t2, float* @fe + ; f = (c+a)+b + store float %t4, float* @ff + ret void +} + +define float @test7(float %A, float %B, float %C) { +; CHECK-LABEL: @test7 +; CHECK-NEXT: fadd fast float %B, %C +; CHECK-NEXT: fmul fast float %A, %A +; CHECK-NEXT: fmul fast float %1, %tmp2 +; CHECK-NEXT: ret float + + %aa = fmul fast float %A, %A + %aab = fmul fast float %aa, %B + %ac = fmul fast float %A, %C + %aac = fmul fast float %ac, %A + %r = fadd fast float %aab, %aac + ret float %r +} + +define float @test8(float %X, float %Y, float %Z) { +; CHECK-LABEL: @test8 +; CHECK-NEXT: fmul fast float %Y, %X +; CHECK-NEXT: fsub fast float %Z +; CHECK-NEXT: ret float + + %A = fsub fast float 0.0, %X + %B = fmul fast float %A, %Y + ; (-X)*Y + Z -> Z-X*Y + %C = fadd fast float %B, %Z + ret float %C +} + +define float @test9(float %X) { +; CHECK-LABEL: @test9 +; CHECK-NEXT: fmul fast float %X, 9.400000e+01 +; CHECK-NEXT: ret float + + %Y = fmul fast float %X, 4.700000e+01 + %Z = fadd fast float %Y, %Y + ret float %Z +} + +define float @test10(float %X) { +; CHECK-LABEL: @test10 +; CHECK-NEXT: fmul fast float %X, 3.000000e+00 +; CHECK-NEXT: ret float + + %Y = fadd fast float %X ,%X + %Z = fadd fast float %Y, %X + ret float %Z +} + +define float @test11(float %W) { +; CHECK-LABEL: test11 +; CHECK-NEXT: fmul fast float %W, 3.810000e+02 +; CHECK-NEXT: ret float + + %X = fmul fast float %W, 127.0 + %Y = fadd fast float %X ,%X + %Z = fadd fast float %Y, %X + ret float %Z +} + +define float @test12(float %X) { +; CHECK-LABEL: @test12 +; CHECK-NEXT: fmul fast float %X, -3.000000e+00 +; CHECK-NEXT: fadd fast float %factor, 6.000000e+00 +; CHECK-NEXT: ret float + + %A = fsub fast float 1.000000e+00, %X + %B = fsub fast float 2.000000e+00, %X + %C = fsub fast float 3.000000e+00, %X + %Y = fadd fast float %A ,%B + %Z = fadd fast float %Y, %C + ret float %Z +} + +define float @test13(float %X1, float %X2, float %X3) { +; CHECK-LABEL: @test13 +; CHECK-NEXT: fsub fast float %X3, %X2 +; CHECK-NEXT: fmul fast float {{.*}}, %X1 +; CHECK-NEXT: ret float + + %A = fsub fast float 0.000000e+00, %X1 + %B = fmul fast float %A, %X2 ; -X1*X2 + %C = fmul fast float %X1, %X3 ; X1*X3 + %D = fadd fast float %B, %C ; -X1*X2 + X1*X3 -> X1*(X3-X2) + ret float %D +} + +define float @test14(float %X1, float %X2) { +; CHECK-LABEL: @test14 +; CHECK-NEXT: fsub fast float %X1, %X2 +; CHECK-NEXT: fmul fast float %tmp, 4.700000e+01 +; CHECK-NEXT: ret float + + %B = fmul fast float %X1, 47. ; X1*47 + %C = fmul fast float %X2, -47. ; X2*-47 + %D = fadd fast float %B, %C ; X1*47 + X2*-47 -> 47*(X1-X2) + ret float %D +} + +define float @test15(float %arg) { +; CHECK-LABEL: test15 +; CHECK-NEXT: fmul fast float %arg, 1.440000e+02 +; CHECK-NEXT: ret float %tmp2 + + %tmp1 = fmul fast float 1.200000e+01, %arg + %tmp2 = fmul fast float %tmp1, 1.200000e+01 + ret float %tmp2 +} + +; (b+(a+1234))+-a -> b+1234 +define float @test16(float %b, float %a) { +; CHECK-LABEL: @test16 +; CHECK-NEXT: fadd fast float %b, 1.234000e+03 +; CHECK-NEXT: ret float + + %1 = fadd fast float %a, 1234.0 + %2 = fadd fast float %b, %1 + %3 = fsub fast float 0.0, %a + %4 = fadd fast float %2, %3 + ret float %4 +} + +; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z. + +define float @test17(float %a, float %b, float %z) { +; CHECK-LABEL: test17 +; CHECK-NEXT: fmul fast float %a, 1.234500e+04 +; CHECK-NEXT: fmul fast float %e, %b +; CHECK-NEXT: fmul fast float %f, %z +; CHECK-NEXT: ret float + + %c = fsub fast float 0.000000e+00, %z + %d = fmul fast float %a, %b + %e = fmul fast float %c, %d + %f = fmul fast float %e, 1.234500e+04 + %g = fsub fast float 0.000000e+00, %f + ret float %g +} + +define float @test18(float %a, float %b, float %z) { +; CHECK-LABEL: test18 +; CHECK-NEXT: fmul fast float %a, 4.000000e+01 +; CHECK-NEXT: fmul fast float %e, %z +; CHECK-NEXT: ret float + + %d = fmul fast float %z, 4.000000e+01 + %c = fsub fast float 0.000000e+00, %d + %e = fmul fast float %a, %c + %f = fsub fast float 0.000000e+00, %e + ret float %f +} + +; With sub reassociation, constant folding can eliminate the 12 and -12 constants. +define float @test19(float %A, float %B) { +; CHECK-LABEL: @test19 +; CHECK-NEXT: fsub fast float %A, %B +; CHECK-NEXT: ret float + %X = fadd fast float -1.200000e+01, %A + %Y = fsub fast float %X, %B + %Z = fadd fast float %Y, 1.200000e+01 + ret float %Z +} + +; With sub reassociation, constant folding can eliminate the uses of %a. +define float @test20(float %a, float %b, float %c) nounwind { +; CHECK-LABEL: @test20 +; CHECK-NEXT: fsub fast float -0.000000e+00, %b +; CHECK-NEXT: fsub fast float %b.neg, %c +; CHECK-NEXT: ret float + +; FIXME: Should be able to generate the below, which may expose more +; opportunites for FAdd reassociation. +; %sum = fadd fast float %c, %b +; %tmp7 = fsub fast float 0, %sum + + %tmp3 = fsub fast float %a, %b + %tmp5 = fsub fast float %tmp3, %c + %tmp7 = fsub fast float %tmp5, %a + ret float %tmp7 +} Index: test/Transforms/Reassociate/fast-fp-commute.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-fp-commute.ll @@ -0,0 +1,44 @@ +; RUN: opt -reassociate -S < %s | FileCheck %s + +declare void @use(float) + +define void @test1(float %x, float %y) { +; CHECK-LABEL: test1 +; CHECK: fmul fast float %x, %y +; CHECK: fmul fast float %x, %y +; CHECK: fsub fast float %1, %2 +; CHECK: call void @use(float %{{.*}}) +; CHECK: call void @use(float %{{.*}}) + + %1 = fmul fast float %x, %y + %2 = fmul fast float %y, %x + %3 = fsub fast float %1, %2 + call void @use(float %1) + call void @use(float %3) + ret void +} + +define float @test2(float %x, float %y) { +; CHECK-LABEL: test2 +; CHECK-NEXT: fmul fast float %x, %y +; CHECK-NEXT: fmul fast float %x, %y +; CHECK-NEXT: fsub fast float %1, %2 +; CHECK-NEXT: ret float %3 + + %1 = fmul fast float %x, %y + %2 = fmul fast float %y, %x + %3 = fsub fast float %1, %2 + ret float %3 +} + +define float @test3(float %x, float %y) { +; CHECK-LABEL: test3 +; CHECK-NEXT: %factor = fmul fast float 2.000000e+00, %x +; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y +; CHECK-NEXT: ret float %tmp1 + + %1 = fmul fast float %x, %y + %2 = fmul fast float %y, %x + %3 = fadd fast float %1, %2 + ret float %3 +} Index: test/Transforms/Reassociate/fast-mightymul.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-mightymul.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -reassociate -disable-output +; PR13021 + +define float @test2(float %x) { + %t0 = fmul fast float %x, %x + %t1 = fmul fast float %t0, %t0 + %t2 = fmul fast float %t1, %t1 + %t3 = fmul fast float %t2, %t2 + %t4 = fmul fast float %t3, %t3 + %t5 = fmul fast float %t4, %t4 + %t6 = fmul fast float %t5, %t5 + %t7 = fmul fast float %t6, %t6 + %t8 = fmul fast float %t7, %t7 + %t9 = fmul fast float %t8, %t8 + %t10 = fmul fast float %t9, %t9 + %t11 = fmul fast float %t10, %t10 + %t12 = fmul fast float %t11, %t11 + %t13 = fmul fast float %t12, %t12 + %t14 = fmul fast float %t13, %t13 + %t15 = fmul fast float %t14, %t14 + %t16 = fmul fast float %t15, %t15 + %t17 = fmul fast float %t16, %t16 + %t18 = fmul fast float %t17, %t17 + %t19 = fmul fast float %t18, %t18 + %t20 = fmul fast float %t19, %t19 + %t21 = fmul fast float %t20, %t20 + %t22 = fmul fast float %t21, %t21 + %t23 = fmul fast float %t22, %t22 + %t24 = fmul fast float %t23, %t23 + %t25 = fmul fast float %t24, %t24 + %t26 = fmul fast float %t25, %t25 + %t27 = fmul fast float %t26, %t26 + %t28 = fmul fast float %t27, %t27 + ret float %t28 +} Index: test/Transforms/Reassociate/fast-multistep.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/fast-multistep.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s + +define float @fmultistep1(float %a, float %b, float %c) { +; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). +; CHECK-LABEL: @fmultistep1 +; CHECK-NEXT: fadd fast float %b, %c +; CHECK-NEXT: fmul fast float %a, %tmp2 +; CHECK-NEXT: fmul fast float %a, %tmp3 +; CHECK-NEXT: ret float + + %t0 = fmul fast float %a, %b + %t1 = fmul fast float %a, %t0 ; a*(a*b) + %t2 = fmul fast float %a, %c + %t3 = fmul fast float %a, %t2 ; a*(a*c) + %t4 = fadd fast float %t1, %t3 + ret float %t4 +} + +define float @fmultistep2(float %a, float %b, float %c, float %d) { +; Check that a*b+a*c+d is turned into a*(b+c)+d. +; CHECK-LABEL: @fmultistep2 +; CHECK-NEXT: fadd fast float %b, %c +; CHECK-NEXT: fmul fast float %a, %tmp +; CHECK-NEXT: fadd fast float %tmp1, %d +; CHECK-NEXT: ret float + + %t0 = fmul fast float %a, %b + %t1 = fmul fast float %a, %c + %t2 = fadd fast float %t1, %d ; a*c+d + %t3 = fadd fast float %t0, %t2 ; a*b+(a*c+d) + ret float %t3 +}