Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -893,7 +893,10 @@ /// that computes the negative version of the value specified. The negative /// version of the value is returned, and BI is left pointing at the instruction /// that should be processed next by the reassociation pass. -static Value *NegateValue(Value *V, Instruction *BI) { +/// Also adds the intermediate instructions that it creates to be redone at the +/// end to see if we have more opportunities to reassociate. +static Value *NegateValue(Value *V, Instruction *BI, + SetVector> &ToRedo) { if (Constant *C = dyn_cast(V)) { if (C->getType()->isFPOrFPVectorTy()) { return ConstantExpr::getFNeg(C); @@ -914,8 +917,8 @@ if (BinaryOperator *I = isReassociableOp(V, Instruction::Add, Instruction::FAdd)) { // Push the negates through the add. - I->setOperand(0, NegateValue(I->getOperand(0), BI)); - I->setOperand(1, NegateValue(I->getOperand(1), BI)); + I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo)); + I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo)); if (I->getOpcode() == Instruction::Add) { I->setHasNoUnsignedWrap(false); I->setHasNoSignedWrap(false); @@ -928,6 +931,10 @@ // I->moveBefore(BI); I->setName(I->getName()+".neg"); + + // Add the intermediate negates to the redo list as processing them later + // could open up more reassociating opportunities. + ToRedo.insert(I); return I; } @@ -968,12 +975,15 @@ } else { TheNeg->andIRFlags(BI); } + ToRedo.insert(TheNeg); return TheNeg; } // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. - return CreateNeg(V, V->getName() + ".neg", BI, BI); + BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI); + ToRedo.insert(NewNeg); + return NewNeg; } /// Return true if we should break up this subtract of X-Y into (X + -Y). @@ -1007,14 +1017,15 @@ /// If we have (X-Y), and if either X is an add, or if this is only used by an /// add, transform this into (X+(0-Y)) to promote better reassociation. -static BinaryOperator *BreakUpSubtract(Instruction *Sub) { +static BinaryOperator * +BreakUpSubtract(Instruction *Sub, SetVector> &ToRedo) { // Convert a subtract into an add and a neg instruction. This allows sub // instructions to be commuted with other add instructions. // // Calculate the negative value of Operand 1 of the sub instruction, // and set it as the RHS of the add instruction we just made. // - Value *NegVal = NegateValue(Sub->getOperand(1), Sub); + Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo); BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub); Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op. @@ -2081,7 +2092,7 @@ // see if we can convert it to X+-Y. if (I->getOpcode() == Instruction::Sub) { if (ShouldBreakUpSubtract(I)) { - Instruction *NI = BreakUpSubtract(I); + Instruction *NI = BreakUpSubtract(I, RedoInsts); RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2092,6 +2103,13 @@ (!I->hasOneUse() || !isReassociableOp(I->user_back(), Instruction::Mul))) { Instruction *NI = LowerNegateToMultiply(I); + // If the negate got simplified, add the users to be + // redone later to see if we can reassociate further (especially + // when we optimizing this instruction while redoing) + for (User *U : NI->users()) { + if (BinaryOperator *Tmp = dyn_cast(U)) + RedoInsts.insert(Tmp); + } RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2099,7 +2117,7 @@ } } else if (I->getOpcode() == Instruction::FSub) { if (ShouldBreakUpSubtract(I)) { - Instruction *NI = BreakUpSubtract(I); + Instruction *NI = BreakUpSubtract(I, RedoInsts); RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2109,7 +2127,14 @@ if (isReassociableOp(I->getOperand(1), Instruction::FMul) && (!I->hasOneUse() || !isReassociableOp(I->user_back(), Instruction::FMul))) { + // If the negate got simplified, add the users to be + // redone later to see if we can reassociate further (especially + // when we optimizing this instruction while redoing) Instruction *NI = LowerNegateToMultiply(I); + for (User *U : NI->users()) { + if (BinaryOperator *Tmp = dyn_cast(U)) + RedoInsts.insert(Tmp); + } RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2124,8 +2149,14 @@ // If this is an interior node of a reassociable tree, ignore it until we // get to the root of the tree, to avoid N^2 analysis. unsigned Opcode = BO->getOpcode(); - if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) + if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) { + // During the initial run we will get to the root of the tree. + // But if we get here while we are redoing instructions, there is no + // guarantee that the root will be visited. So Redo later + if (BO->user_back() != BO) + RedoInsts.insert(BO->user_back()); return; + } // If this is an add tree that is used by a sub instruction, ignore it // until we process the subtract. Index: test/Transforms/Reassociate/fast-ReassociateVector.ll =================================================================== --- test/Transforms/Reassociate/fast-ReassociateVector.ll +++ test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -16,9 +16,9 @@ ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { ; CHECK-LABEL: @test2 -; CHECK-NEXT: fadd fast <2 x float> %c, %b -; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2 -; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a +; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <2 x float> %t0 = fmul fast <2 x float> %a, %b @@ -133,8 +133,8 @@ ; Check x*y+y*x -> x*y*2. define <2 x double> @test11(<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: @test11 -; CHECK-NEXT: %factor = fmul fast <2 x double> %y, -; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x +; CHECK-NEXT: %factor = fmul fast <2 x double> %x, +; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y ; CHECK-NEXT: ret <2 x double> %tmp1 %1 = fmul fast <2 x double> %x, %y Index: test/Transforms/Reassociate/fast-basictest.ll =================================================================== --- test/Transforms/Reassociate/fast-basictest.ll +++ test/Transforms/Reassociate/fast-basictest.ll @@ -108,7 +108,7 @@ ; CHECK-LABEL: @test7 ; CHECK-NEXT: fadd fast float %C, %B ; CHECK-NEXT: fmul fast float %A, %A -; CHECK-NEXT: fmul fast float %1, %tmp2 +; CHECK-NEXT: fmul fast float %tmp3, %tmp2 ; CHECK-NEXT: ret float %aa = fmul fast float %A, %A Index: test/Transforms/Reassociate/fast-fp-commute.ll =================================================================== --- test/Transforms/Reassociate/fast-fp-commute.ll +++ test/Transforms/Reassociate/fast-fp-commute.ll @@ -33,8 +33,8 @@ define float @test3(float %x, float %y) { ; CHECK-LABEL: test3 -; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00 -; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x +; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00 +; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y ; CHECK-NEXT: ret float %tmp1 %1 = fmul fast float %x, %y Index: test/Transforms/Reassociate/fast-multistep.ll =================================================================== --- test/Transforms/Reassociate/fast-multistep.ll +++ test/Transforms/Reassociate/fast-multistep.ll @@ -3,9 +3,9 @@ define float @fmultistep1(float %a, float %b, float %c) { ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). ; CHECK-LABEL: @fmultistep1 -; CHECK-NEXT: fadd fast float %c, %b -; CHECK-NEXT: fmul fast float %a, %tmp2 -; CHECK-NEXT: fmul fast float %tmp3, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a +; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret float %t0 = fmul fast float %a, %b Index: test/Transforms/Reassociate/multistep.ll =================================================================== --- test/Transforms/Reassociate/multistep.ll +++ test/Transforms/Reassociate/multistep.ll @@ -8,9 +8,9 @@ %t2 = mul i64 %a, %c %t3 = mul i64 %a, %t2 ; a*(a*c) %t4 = add i64 %t1, %t3 -; CHECK-NEXT: add i64 %c, %b -; CHECK-NEXT: mul i64 %a, %tmp{{.*}} -; CHECK-NEXT: mul i64 %tmp{{.*}}, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a +; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret ret i64 %t4 } Index: test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s +; CHECK: faddsubAssoc1 +; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500 +; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500 +; CHECK: fsub fast half [[TMP2]], [[TMP1]] +; CHECK: ret +; Input is A op (B op C) +define half @faddsubAssoc1(half %a, half %b) { + %tmp1 = fmul fast half %b, 0xH4200 ; 3*b + %tmp2 = fmul fast half %a, 0xH4500 ; 5*a + %tmp3 = fmul fast half %b, 0xH4000 ; 2*b + %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b + %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b) + ret half %tmp5 ; = 5 * (b - a) +} + +; CHECK: faddsubAssoc2 +; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500 +; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00 +; CHECK: fadd fast half [[TMP2]], [[TMP1]] +; CHECK: ret +; Input is (A op B) op C +define half @faddsubAssoc2(half %a, half %b) { + %tmp1 = fmul fast half %b, 0xH4200 ; 3*b + %tmp2 = fmul fast half %a, 0xH4500 ; 5*a + %tmp3 = fmul fast half %b, 0xH4000 ; 2*b + %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b + %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b) + ret half %tmp5 ; = 5 * a + b +} Index: test/Transforms/Reassociate/secondary.ll =================================================================== --- test/Transforms/Reassociate/secondary.ll +++ test/Transforms/Reassociate/secondary.ll @@ -6,7 +6,7 @@ ; CHECK: define ; CHECK-NOT: undef -; CHECK: %factor = mul i32 %tmp3, -2 +; CHECK: %factor = mul i32 %tmp3.neg, 2 ; CHECK-NOT: undef ; CHECK: } Index: test/Transforms/Reassociate/xor_reassoc.ll =================================================================== --- test/Transforms/Reassociate/xor_reassoc.ll +++ test/Transforms/Reassociate/xor_reassoc.ll @@ -88,8 +88,8 @@ %xor1 = xor i32 %xor, %and ret i32 %xor1 ; CHECK-LABEL: @xor_special2( -; CHECK: %xor = xor i32 %y, 123 -; CHECK: %xor1 = xor i32 %xor, %x +; CHECK: %xor = xor i32 %x, 123 +; CHECK: %xor1 = xor i32 %xor, %y ; CHECK: ret i32 %xor1 }