Index: llvm/lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Reassociate.cpp +++ llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -142,12 +142,20 @@ isOr = true; } +/// Return true if I is an instruction with the FastMathFlags that are needed +/// for general reassociation set. (This routine is only intended to be called +/// for floating-point operations.) +static bool hasFPAssociativeFlags(Instruction *I) { + assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops"); + return I->hasAllowReassoc() && I->hasNoSignedZeros(); +} + /// Return true if V is an instruction of the specified opcode and if it /// only has one use. static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { auto *I = dyn_cast(V); if (I && I->hasOneUse() && I->getOpcode() == Opcode) - if (!isa(I) || I->isFast()) + if (!isa(I) || hasFPAssociativeFlags(I)) return cast(I); return nullptr; } @@ -157,7 +165,7 @@ auto *I = dyn_cast(V); if (I && I->hasOneUse() && (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2)) - if (!isa(I) || I->isFast()) + if (!isa(I) || hasFPAssociativeFlags(I)) return cast(I); return nullptr; } @@ -573,7 +581,7 @@ assert((!isa(Op) || cast(Op)->getOpcode() != Opcode || (isa(Op) && - !cast(Op)->isFast())) && + !hasFPAssociativeFlags(cast(Op)))) && "Should have been handled above!"); assert(Op->hasOneUse() && "Has uses outside the expression tree!"); @@ -2216,8 +2224,9 @@ if (Instruction *Res = canonicalizeNegFPConstants(I)) I = Res; - // Don't optimize floating-point instructions unless they are 'fast'. - if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) + // Don't optimize floating-point instructions unless they have the + // appropriate FastMathFlags for reassociation enabled. + if (I->getType()->isFPOrFPVectorTy() && !hasFPAssociativeFlags(I)) return; // Do not reassociate boolean (i1) expressions. We want to preserve the Index: llvm/test/Transforms/PhaseOrdering/fast-basictest.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/fast-basictest.ll +++ llvm/test/Transforms/PhaseOrdering/fast-basictest.ll @@ -122,14 +122,10 @@ ret float %4 } -; TODO: check if it is possible to perform the optimization without 'fast' -; with 'reassoc' and 'nsz' only. define float @test15_reassoc_nsz(float %b, float %a) { ; CHECK-LABEL: @test15_reassoc_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03 -; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[TMP1]], [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc nsz float [[TMP2]], [[A]] -; CHECK-NEXT: ret float [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[B:%.*]], 1.234000e+03 +; CHECK-NEXT: ret float [[TMP1]] ; %1 = fadd reassoc nsz float %a, 1234.0 %2 = fadd reassoc nsz float %b, %1 @@ -197,15 +193,18 @@ ret float %g } -; TODO: check if it is possible to perform the optimization without 'fast' -; with 'reassoc' and 'nsz' only. define float @test16_reassoc_nsz(float %a, float %b, float %z) { -; CHECK-LABEL: @test16_reassoc_nsz( -; CHECK-NEXT: [[C:%.*]] = fneg reassoc nsz float [[Z:%.*]] -; CHECK-NEXT: [[D:%.*]] = fmul reassoc nsz float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[E:%.*]] = fmul reassoc nsz float [[D]], [[C]] -; CHECK-NEXT: [[G:%.*]] = fmul reassoc nsz float [[E]], -1.234500e+04 -; CHECK-NEXT: ret float [[G]] +; REASSOC_AND_IC-LABEL: @test16_reassoc_nsz( +; REASSOC_AND_IC-NEXT: [[C:%.*]] = fmul reassoc nsz float [[A:%.*]], 1.234500e+04 +; REASSOC_AND_IC-NEXT: [[E:%.*]] = fmul reassoc nsz float [[C]], [[B:%.*]] +; REASSOC_AND_IC-NEXT: [[F:%.*]] = fmul reassoc nsz float [[E]], [[Z:%.*]] +; REASSOC_AND_IC-NEXT: ret float [[F]] +; +; O2-LABEL: @test16_reassoc_nsz( +; O2-NEXT: [[D:%.*]] = fmul reassoc nsz float [[A:%.*]], 1.234500e+04 +; O2-NEXT: [[E:%.*]] = fmul reassoc nsz float [[D]], [[B:%.*]] +; O2-NEXT: [[G:%.*]] = fmul reassoc nsz float [[E]], [[Z:%.*]] +; O2-NEXT: ret float [[G]] ; %c = fsub reassoc nsz float 0.000000e+00, %z %d = fmul reassoc nsz float %a, %b @@ -282,7 +281,7 @@ define float @test19_reassoc_nsz(float %a, float %b, float %c) nounwind { ; CHECK-LABEL: @test19_reassoc_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[C:%.*]], [[B:%.*]] ; CHECK-NEXT: [[T7:%.*]] = fneg reassoc nsz float [[TMP1]] ; CHECK-NEXT: ret float [[T7]] ; Index: llvm/test/Transforms/Reassociate/fast-basictest.ll =================================================================== --- llvm/test/Transforms/Reassociate/fast-basictest.ll +++ llvm/test/Transforms/Reassociate/fast-basictest.ll @@ -181,7 +181,6 @@ } ; (-X)*Y + Z -> Z-X*Y - define float @test7(float %X, float %Y, float %Z) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] @@ -208,10 +207,9 @@ define float @test7_reassoc_nsz(float %X, float %Y, float %Z) { ; CHECK-LABEL: @test7_reassoc_nsz( -; CHECK-NEXT: [[A:%.*]] = fsub reassoc nsz float 0.000000e+00, [[X:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul reassoc nsz float [[A]], [[Y:%.*]] -; CHECK-NEXT: [[C:%.*]] = fadd reassoc nsz float [[B]], [[Z:%.*]] -; CHECK-NEXT: ret float [[C]] +; CHECK-NEXT: [[B:%.*]] = fmul reassoc nsz float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[Z:%.*]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fsub reassoc nsz float 0.0, %X %B = fmul reassoc nsz float %A, %Y @@ -328,11 +326,10 @@ define float @test12_reassoc_nsz(float %X1, float %X2, float %X3) { ; CHECK-LABEL: @test12_reassoc_nsz( -; CHECK-NEXT: [[A:%.*]] = fsub reassoc nsz float 0.000000e+00, [[X1:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul reassoc nsz float [[A]], [[X2:%.*]] -; CHECK-NEXT: [[C:%.*]] = fmul reassoc nsz float [[X1]], [[X3:%.*]] -; CHECK-NEXT: [[D:%.*]] = fadd reassoc nsz float [[B]], [[C]] -; CHECK-NEXT: ret float [[D]] +; CHECK-NEXT: [[B:%.*]] = fmul reassoc nsz float [[X2:%.*]], [[X1:%.*]] +; CHECK-NEXT: [[C:%.*]] = fmul reassoc nsz float [[X3:%.*]], [[X1]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[C]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fsub reassoc nsz float 0.000000e+00, %X1 %B = fmul reassoc nsz float %A, %X2 ; -X1*X2 @@ -456,13 +453,12 @@ ret float %4 } +; TODO: check if we can remove dead fsub. define float @test15_reassoc_nsz(float %b, float %a) { ; CHECK-LABEL: @test15_reassoc_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03 -; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[B:%.*]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc nsz float 0.000000e+00, [[A]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc nsz float [[TMP3]], [[TMP2]] -; CHECK-NEXT: ret float [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float 0.000000e+00, [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc nsz float [[B:%.*]], 1.234000e+03 +; CHECK-NEXT: ret float [[TMP2]] ; %1 = fadd reassoc nsz float %a, 1234.0 %2 = fadd reassoc nsz float %b, %1