Index: llvm/lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Reassociate.cpp +++ llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -142,12 +142,21 @@ isOr = true; } + +/// Return true if I is an instruction with the 'fast' flags set, or if I is +/// associative with the subset of 'fast' required for reassociation set. +/// This routine is only intended to be called for floating-point operations. +static bool isFPFastOrFPAssociative(Instruction *I) { + assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops"); + return I->isFast() || I->isAssociative(); +} + /// Return true if V is an instruction of the specified opcode and if it /// only has one use. static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { auto *I = dyn_cast(V); if (I && I->hasOneUse() && I->getOpcode() == Opcode) - if (!isa(I) || I->isFast()) + if (!isa(I) || isFPFastOrFPAssociative(I)) return cast(I); return nullptr; } @@ -157,7 +166,7 @@ auto *I = dyn_cast(V); if (I && I->hasOneUse() && (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2)) - if (!isa(I) || I->isFast()) + if (!isa(I) || isFPFastOrFPAssociative(I)) return cast(I); return nullptr; } @@ -449,7 +458,8 @@ /// of the expression) if it can turn them into binary operators of the right /// type and thus make the expression bigger. static bool LinearizeExprTree(Instruction *I, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + ReassociatePass::OrderedSet &ToRedo) { assert((isa(I) || isa(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -572,7 +582,7 @@ assert((!isa(Op) || cast(Op)->getOpcode() != Opcode || (isa(Op) && - !cast(Op)->isFast())) && + !isFPFastOrFPAssociative(cast(Op)))) && "Should have been handled above!"); assert(Op->hasOneUse() && "Has uses outside the expression tree!"); @@ -583,9 +593,14 @@ (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) { LLVM_DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); - Tmp = LowerNegateToMultiply(Tmp); - LLVM_DEBUG(dbgs() << *Tmp << '\n'); - Worklist.push_back(std::make_pair(Tmp, Weight)); + Instruction *NI = LowerNegateToMultiply(Tmp); + LLVM_DEBUG(dbgs() << *NI << '\n'); + Worklist.push_back(std::make_pair(NI, Weight)); + for (User *U : NI->users()) { + if (BinaryOperator *UTmp = dyn_cast(U)) + ToRedo.insert(UTmp); + } + ToRedo.insert(Tmp); Changed = true; continue; } @@ -1141,7 +1156,7 @@ return nullptr; SmallVector Tree; - MadeChange |= LinearizeExprTree(BO, Tree); + MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts); SmallVector Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -2206,8 +2221,9 @@ if (Instruction *Res = canonicalizeNegFPConstants(I)) I = Res; - // Don't optimize floating-point instructions unless they are 'fast'. - if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) + // Don't optimize floating-point instructions unless they are 'fast', or the + // associative subset of 'fast' is set. + if (I->getType()->isFPOrFPVectorTy() && !isFPFastOrFPAssociative(I)) return; // Do not reassociate boolean (i1) expressions. We want to preserve the @@ -2320,7 +2336,7 @@ // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; - MadeChange |= LinearizeExprTree(I, Tree); + MadeChange |= LinearizeExprTree(I, Tree, RedoInsts); SmallVector Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) Index: llvm/test/Transforms/PhaseOrdering/fast-basictest.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/fast-basictest.ll +++ llvm/test/Transforms/PhaseOrdering/fast-basictest.ll @@ -122,14 +122,16 @@ ret float %4 } -; TODO: check if it is possible to perform the optimization without 'fast' -; with 'reassoc' and 'nsz' only. define float @test15_reassoc_nsz(float %b, float %a) { -; CHECK-LABEL: @test15_reassoc_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03 -; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[TMP1]], [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc nsz float [[TMP2]], [[A]] -; CHECK-NEXT: ret float [[TMP3]] +; REASSOC_AND_IC-LABEL: @test15_reassoc_nsz( +; REASSOC_AND_IC-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[B:%.*]], 1.234000e+03 +; REASSOC_AND_IC-NEXT: ret float [[TMP1]] +; +; O2-LABEL: @test15_reassoc_nsz( +; O2-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03 +; O2-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[TMP1]], [[B:%.*]] +; O2-NEXT: [[TMP3:%.*]] = fsub reassoc nsz float [[TMP2]], [[A]] +; O2-NEXT: ret float [[TMP3]] ; %1 = fadd reassoc nsz float %a, 1234.0 %2 = fadd reassoc nsz float %b, %1 @@ -197,15 +199,18 @@ ret float %g } -; TODO: check if it is possible to perform the optimization without 'fast' -; with 'reassoc' and 'nsz' only. define float @test16_reassoc_nsz(float %a, float %b, float %z) { -; CHECK-LABEL: @test16_reassoc_nsz( -; CHECK-NEXT: [[C:%.*]] = fneg reassoc nsz float [[Z:%.*]] -; CHECK-NEXT: [[D:%.*]] = fmul reassoc nsz float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[E:%.*]] = fmul reassoc nsz float [[D]], [[C]] -; CHECK-NEXT: [[G:%.*]] = fmul reassoc nsz float [[E]], -1.234500e+04 -; CHECK-NEXT: ret float [[G]] +; REASSOC_AND_IC-LABEL: @test16_reassoc_nsz( +; REASSOC_AND_IC-NEXT: [[C:%.*]] = fmul reassoc nsz float [[A:%.*]], 1.234500e+04 +; REASSOC_AND_IC-NEXT: [[E:%.*]] = fmul reassoc nsz float [[C]], [[B:%.*]] +; REASSOC_AND_IC-NEXT: [[F:%.*]] = fmul reassoc nsz float [[E]], [[Z:%.*]] +; REASSOC_AND_IC-NEXT: ret float [[F]] +; +; O2-LABEL: @test16_reassoc_nsz( +; O2-NEXT: [[D:%.*]] = fmul reassoc nsz float [[A:%.*]], 1.234500e+04 +; O2-NEXT: [[E:%.*]] = fmul reassoc nsz float [[D]], [[B:%.*]] +; O2-NEXT: [[G:%.*]] = fmul reassoc nsz float [[E]], [[Z:%.*]] +; O2-NEXT: ret float [[G]] ; %c = fsub reassoc nsz float 0.000000e+00, %z %d = fmul reassoc nsz float %a, %b @@ -282,7 +287,7 @@ define float @test19_reassoc_nsz(float %a, float %b, float %c) nounwind { ; CHECK-LABEL: @test19_reassoc_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[C:%.*]], [[B:%.*]] ; CHECK-NEXT: [[T7:%.*]] = fneg reassoc nsz float [[TMP1]] ; CHECK-NEXT: ret float [[T7]] ; Index: llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll =================================================================== --- llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll +++ llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -281,11 +281,10 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x float> zeroinitializer, zeroinitializer ; CHECK-NEXT: [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], ; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer -; CHECK-NEXT: ret <2 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer +; CHECK-NEXT: ret <2 x float> [[TMP1]] ; %d = fmul fast <2 x float> %z, %c = fsub fast <2 x float> , %d @@ -296,7 +295,6 @@ define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast <2 x float> zeroinitializer ; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], ; CHECK-NEXT: [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x float> [[F]] Index: llvm/test/Transforms/Reassociate/fast-basictest.ll =================================================================== --- llvm/test/Transforms/Reassociate/fast-basictest.ll +++ llvm/test/Transforms/Reassociate/fast-basictest.ll @@ -181,16 +181,11 @@ } ; (-X)*Y + Z -> Z-X*Y -; TODO: check why IR transformation of test7 with 'fast' math flag -; is worse than without it (and even without transformation) - define float @test7(float %X, float %Y, float %Z) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00 -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fsub fast float 0.0, %X %B = fmul fast float %A, %Y @@ -200,11 +195,9 @@ define float @test7_unary_fneg(float %X, float %Y, float %Z) { ; CHECK-LABEL: @test7_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00 -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fneg fast float %X %B = fmul fast float %A, %Y @@ -214,10 +207,9 @@ define float @test7_reassoc_nsz(float %X, float %Y, float %Z) { ; CHECK-LABEL: @test7_reassoc_nsz( -; CHECK-NEXT: [[A:%.*]] = fsub reassoc nsz float 0.000000e+00, [[X:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul reassoc nsz float [[A]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = fadd reassoc nsz float [[B]], [[Z:%.*]] -; CHECK-NEXT: ret float [[C]] +; CHECK-NEXT: [[B:%.*]] = fmul reassoc nsz float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[Z:%.*]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fsub reassoc nsz float 0.0, %X %B = fmul reassoc nsz float %A, %Y @@ -276,7 +268,6 @@ define float @test11(float %X) { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 ; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], -3.000000e+00 ; CHECK-NEXT: [[Z:%.*]] = fadd fast float [[FACTOR]], 6.000000e+00 ; CHECK-NEXT: ret float [[Z]] @@ -289,17 +280,12 @@ ret float %Z } -; TODO: check why IR transformation of test12 with 'fast' math flag -; is worse than without it (and even without transformation) - define float @test12(float %X1, float %X2, float %X3) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00 +; CHECK-NEXT: [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]] ; CHECK-NEXT: [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[C]], [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[C]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fsub fast float 0.000000e+00, %X1 %B = fmul fast float %A, %X2 ; -X1*X2 @@ -310,12 +296,10 @@ define float @test12_unary_fneg(float %X1, float %X2, float %X3) { ; CHECK-LABEL: @test12_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00 +; CHECK-NEXT: [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]] ; CHECK-NEXT: [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[C]], [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[C]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fneg fast float %X1 %B = fmul fast float %A, %X2 ; -X1*X2 @@ -326,11 +310,10 @@ define float @test12_reassoc_nsz(float %X1, float %X2, float %X3) { ; CHECK-LABEL: @test12_reassoc_nsz( -; CHECK-NEXT: [[A:%.*]] = fsub reassoc nsz float 0.000000e+00, [[X1:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul reassoc nsz float [[A]], [[X2:%.*]] -; CHECK-NEXT: [[C:%.*]] = fmul reassoc nsz float [[X1]], [[X3:%.*]] -; CHECK-NEXT: [[D:%.*]] = fadd reassoc nsz float [[B]], [[C]] -; CHECK-NEXT: ret float [[D]] +; CHECK-NEXT: [[B:%.*]] = fmul reassoc nsz float [[X2:%.*]], [[X1:%.*]] +; CHECK-NEXT: [[C:%.*]] = fmul reassoc nsz float [[X3:%.*]], [[X1]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[C]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fsub reassoc nsz float 0.000000e+00, %X1 %B = fmul reassoc nsz float %A, %X2 ; -X1*X2 @@ -454,13 +437,12 @@ ret float %4 } +; TODO: check if we can remove dead fsub. define float @test15_reassoc_nsz(float %b, float %a) { ; CHECK-LABEL: @test15_reassoc_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03 -; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[B:%.*]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc nsz float 0.000000e+00, [[A]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc nsz float [[TMP3]], [[TMP2]] -; CHECK-NEXT: ret float [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float 0.000000e+00, [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[B:%.*]], 1.234000e+03 +; CHECK-NEXT: ret float [[TMP2]] ; %1 = fadd reassoc nsz float %a, 1234.0 %2 = fadd reassoc nsz float %b, %1 @@ -490,12 +472,11 @@ define float @test16(float %a, float %b, float %z) { ; CHECK-LABEL: @test16( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00 ; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04 ; CHECK-NEXT: [[E:%.*]] = fmul fast float [[C]], [[B:%.*]] ; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[F]], 0.000000e+00 -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[F]], 0.000000e+00 +; CHECK-NEXT: ret float [[TMP1]] ; %c = fsub fast float 0.000000e+00, %z %d = fmul fast float %a, %b @@ -507,7 +488,6 @@ define float @test16_unary_fneg(float %a, float %b, float %z) { ; CHECK-LABEL: @test16_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 ; CHECK-NEXT: [[E:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04 ; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[B:%.*]] ; CHECK-NEXT: [[G:%.*]] = fmul fast float [[F]], [[Z:%.*]] @@ -539,16 +519,14 @@ } ; TODO: check if we can remove: -; - fsub fast 0, 0 ; - fadd fast x, 0 ; ... as 'fast' implies 'nsz' define float @test17(float %a, float %b, float %z) { ; CHECK-LABEL: @test17( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00 ; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01 ; CHECK-NEXT: [[E:%.*]] = fmul fast float [[C]], [[Z:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[E]], 0.000000e+00 -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[E]], 0.000000e+00 +; CHECK-NEXT: ret float [[TMP1]] ; %d = fmul fast float %z, 4.000000e+01 %c = fsub fast float 0.000000e+00, %d @@ -557,10 +535,8 @@ ret float %f } -; TODO: check if we can remove fneg fast 0 as 'fast' implies 'nsz' define float @test17_unary_fneg(float %a, float %b, float %z) { ; CHECK-LABEL: @test17_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 ; CHECK-NEXT: [[E:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01 ; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]] ; CHECK-NEXT: ret float [[F]] Index: llvm/test/Transforms/Reassociate/pr42349.ll =================================================================== --- llvm/test/Transforms/Reassociate/pr42349.ll +++ llvm/test/Transforms/Reassociate/pr42349.ll @@ -5,7 +5,6 @@ ; CHECK-LABEL: @wibble( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP7:%.*]] = fmul float [[TMP6:%.*]], -1.000000e+00 -; CHECK-NEXT: [[TMP0:%.*]] = fsub float -0.000000e+00, 0.000000e+00 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], 0xFFF0000000000000 ; CHECK-NEXT: ret float [[TMP9]] ;