Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -511,7 +511,8 @@ } Value *FAddCombine::simplify(Instruction *I) { - assert(I->isFast() && "Expected 'fast' instruction"); + assert(I->hasAllowReassoc() && I->hasNoSignedZeros() && + "Expected 'reassoc'+'nsz' instruction"); // Currently we are not able to handle vector type. if (I->getType()->isVectorTy()) @@ -1378,7 +1379,7 @@ if (Value *V = SimplifySelectsFeedingBinaryOp(I, LHS, RHS)) return replaceInstUsesWith(I, V); - if (I.isFast()) { + if (I.hasAllowReassoc() && I.hasNoSignedZeros()) { if (Value *V = FAddCombine(Builder).simplify(&I)) return replaceInstUsesWith(I, V); } @@ -1747,7 +1748,7 @@ if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1)) return replaceInstUsesWith(I, V); - if (I.isFast()) { + if (I.hasAllowReassoc() && I.hasNoSignedZeros()) { if (Value *V = FAddCombine(Builder).simplify(&I)) return replaceInstUsesWith(I, V); } Index: test/Transforms/InstCombine/fast-math.ll =================================================================== --- test/Transforms/InstCombine/fast-math.ll +++ test/Transforms/InstCombine/fast-math.ll @@ -37,16 +37,42 @@ } ; C * f1 + f1 = (C+1) * f1 +; TODO: The particular case where C is 2 (so the folded result is 3.0*f1) is +; always safe, and so doesn't need any FMF. +; That is, (x + x + x) and (3*x) each have only a single rounding. define double @fold3(double %f1) { ; CHECK-LABEL: @fold3( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[F1:%.*]], 3.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[F1:%.*]], 6.000000e+00 ; CHECK-NEXT: ret double [[TMP1]] ; - %t1 = fmul fast double 2.000000e+00, %f1 + %t1 = fmul fast double 5.000000e+00, %f1 %t2 = fadd fast double %f1, %t1 ret double %t2 } +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define double @fold3_reassoc_nsz(double %f1) { +; CHECK-LABEL: @fold3_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz double [[F1:%.*]], 6.000000e+00 +; CHECK-NEXT: ret double [[TMP1]] +; + %t1 = fmul reassoc nsz double 5.000000e+00, %f1 + %t2 = fadd reassoc nsz double %f1, %t1 + ret double %t2 +} + +; TODO: This doesn't require 'nsz'. It should fold to f1 * 6.0. +define double @fold3_reassoc(double %f1) { +; CHECK-LABEL: @fold3_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc double [[F1:%.*]], 5.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc double [[TMP1]], [[F1]] +; CHECK-NEXT: ret double [[TMP2]] +; + %t1 = fmul reassoc double 5.000000e+00, %f1 + %t2 = fadd reassoc double %f1, %t1 + ret double %t2 +} + ; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y) define float @fold4(float %f1, float %f2) { ; CHECK-LABEL: @fold4( @@ -60,8 +86,35 @@ ret float %add } +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @fold4_reassoc_nsz(float %f1, float %f2) { +; CHECK-LABEL: @fold4_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc nsz float 9.000000e+00, [[TMP1]] +; CHECK-NEXT: ret float [[TMP2]] +; + %sub = fsub float 4.000000e+00, %f1 + %sub1 = fsub float 5.000000e+00, %f2 + %add = fadd reassoc nsz float %sub, %sub1 + ret float %add +} + +; TODO: This doesn't require 'nsz'. It should fold to (9.0 - (f1 + f2)). +define float @fold4_reassoc(float %f1, float %f2) { +; CHECK-LABEL: @fold4_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fsub float 4.000000e+00, [[F1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub float 5.000000e+00, [[F2:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %sub = fsub float 4.000000e+00, %f1 + %sub1 = fsub float 5.000000e+00, %f2 + %add = fadd reassoc float %sub, %sub1 + ret float %add +} + ; (X + C1) + C2 => X + (C1 + C2) -define float @fold5(float %f1, float %f2) { +define float @fold5(float %f1) { ; CHECK-LABEL: @fold5( ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[F1:%.*]], 9.000000e+00 ; CHECK-NEXT: ret float [[ADD1]] @@ -71,15 +124,65 @@ ret float %add1 } -; (X + X) + X => 3.0 * X +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @fold5_reassoc_nsz(float %f1) { +; CHECK-LABEL: @fold5_reassoc_nsz( +; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc nsz float [[F1:%.*]], 9.000000e+00 +; CHECK-NEXT: ret float [[ADD1]] +; + %add = fadd float %f1, 4.000000e+00 + %add1 = fadd reassoc nsz float %add, 5.000000e+00 + ret float %add1 +} + +; TODO: This doesn't require 'nsz'. It should fold to f1 + 9.0 +define float @fold5_reassoc(float %f1) { +; CHECK-LABEL: @fold5_reassoc( +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00 +; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00 +; CHECK-NEXT: ret float [[ADD1]] +; + %add = fadd float %f1, 4.000000e+00 + %add1 = fadd reassoc float %add, 5.000000e+00 + ret float %add1 +} + +; (X + X) + X + X => 4.0 * X define float @fold6(float %f1) { ; CHECK-LABEL: @fold6( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00 ; CHECK-NEXT: ret float [[TMP1]] ; %t1 = fadd fast float %f1, %f1 %t2 = fadd fast float %f1, %t1 - ret float %t2 + %t3 = fadd fast float %t2, %f1 + ret float %t3 +} + +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @fold6_reassoc_nsz(float %f1) { +; CHECK-LABEL: @fold6_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 4.000000e+00 +; CHECK-NEXT: ret float [[TMP1]] +; + %t1 = fadd reassoc nsz float %f1, %f1 + %t2 = fadd reassoc nsz float %f1, %t1 + %t3 = fadd reassoc nsz float %t2, %f1 + ret float %t3 +} + +; TODO: This doesn't require 'nsz'. It should fold to f1 * 4.0. +define float @fold6_reassoc(float %f1) { +; CHECK-LABEL: @fold6_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[TMP1]], [[F1]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP2]], [[F1]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t1 = fadd reassoc float %f1, %f1 + %t2 = fadd reassoc float %f1, %t1 + %t3 = fadd reassoc float %t2, %f1 + ret float %t3 } ; C1 * X + (X + X) = (C1 + 2) * X @@ -94,16 +197,72 @@ ret float %t3 } -; (X + X) + (X + X) => 4.0 * X +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @fold7_reassoc_nsz(float %f1) { +; CHECK-LABEL: @fold7_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 7.000000e+00 +; CHECK-NEXT: ret float [[TMP1]] +; + %t1 = fmul reassoc nsz float %f1, 5.000000e+00 + %t2 = fadd reassoc nsz float %f1, %f1 + %t3 = fadd reassoc nsz float %t1, %t2 + ret float %t3 +} + +; TODO: This doesn't require 'nsz'. It should fold to f1 * 7.0. +define float @fold7_reassoc(float %f1) { +; CHECK-LABEL: @fold7_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[F1:%.*]], 5.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t1 = fmul reassoc float %f1, 5.000000e+00 + %t2 = fadd reassoc float %f1, %f1 + %t3 = fadd reassoc float %t1, %t2 + ret float %t3 +} + +; (X + X) + (X + X) + X => 5.0 * X define float @fold8(float %f1) { ; CHECK-LABEL: @fold8( -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 5.000000e+00 ; CHECK-NEXT: ret float [[TMP1]] ; %t1 = fadd fast float %f1, %f1 %t2 = fadd fast float %f1, %f1 %t3 = fadd fast float %t1, %t2 - ret float %t3 + %t4 = fadd fast float %t3, %f1 + ret float %t4 +} + +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @fold8_reassoc_nsz(float %f1) { +; CHECK-LABEL: @fold8_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 5.000000e+00 +; CHECK-NEXT: ret float [[TMP1]] +; + %t1 = fadd reassoc nsz float %f1, %f1 + %t2 = fadd reassoc nsz float %f1, %f1 + %t3 = fadd reassoc nsz float %t1, %t2 + %t4 = fadd reassoc nsz float %t3, %f1 + ret float %t4 +} + +; TODO: This doesn't require 'nsz'. It should fold to f1 * 5.0. +define float @fold8_reassoc(float %f1) { +; CHECK-LABEL: @fold8_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc float [[TMP3]], [[F1]] +; CHECK-NEXT: ret float [[TMP4]] +; + %t1 = fadd reassoc float %f1, %f1 + %t2 = fadd reassoc float %f1, %f1 + %t3 = fadd reassoc float %t1, %t2 + %t4 = fadd reassoc float %t3, %f1 + ret float %t4 } ; X - (X + Y) => 0 - Y @@ -117,6 +276,29 @@ ret float %t3 } +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @fold9_reassoc_nsz(float %f1, float %f2) { +; CHECK-LABEL: @fold9_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float -0.000000e+00, [[F2:%.*]] +; CHECK-NEXT: ret float [[TMP1]] +; + %t1 = fadd float %f1, %f2 + %t3 = fsub reassoc nsz float %f1, %t1 + ret float %t3 +} + +; TODO: This doesn't require 'nsz'. It should fold to 0 - f2 +define float @fold9_reassoc(float %f1, float %f2) { +; CHECK-LABEL: @fold9_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[F1:%.*]], [[F2:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc float [[F1]], [[TMP1]] +; CHECK-NEXT: ret float [[TMP2]] +; + %t1 = fadd float %f1, %f2 + %t3 = fsub reassoc float %f1, %t1 + ret float %t3 +} + ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of ; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the ; top of resulting simplified expression tree may potentially reveal some @@ -134,6 +316,36 @@ ret float %t3 } +; Check again with 'reassoc' and 'nsz'. +; TODO: We may be able to remove the 'nsz' requirement. +define float @fold10_reassoc_nsz(float %f1, float %f2) { +; CHECK-LABEL: @fold10_reassoc_nsz( +; CHECK-NEXT: [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]] +; CHECK-NEXT: [[T3:%.*]] = fadd reassoc nsz float [[T2]], -1.000000e+00 +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fadd reassoc nsz float 2.000000e+00, %f1 + %t2 = fsub reassoc nsz float %f2, 3.000000e+00 + %t3 = fadd reassoc nsz float %t1, %t2 + ret float %t3 +} + +; Observe that the fold is not done with only reassoc (the instructions are +; canonicalized, but not folded). +; TODO: As noted above, 'nsz' may not be required for this to be fully folded. +define float @fold10_reassoc(float %f1, float %f2) { +; CHECK-LABEL: @fold10_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00 +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t1 = fadd reassoc float 2.000000e+00, %f1 + %t2 = fsub reassoc float %f2, 3.000000e+00 + %t3 = fadd reassoc float %t1, %t2 + ret float %t3 +} + ; This used to crash/miscompile. define float @fail1(float %f1, float %f2) { @@ -171,40 +383,66 @@ ret float %sub } +; Check again using the minimal subset of FMF. +define float @fold13_reassoc_nsz(float %x) { +; CHECK-LABEL: @fold13_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 6.000000e+00 +; CHECK-NEXT: ret float [[TMP1]] +; + %mul = fmul reassoc nsz float %x, 7.000000e+00 + %sub = fsub reassoc nsz float %mul, %x + ret float %sub +} + +; Verify the fold is not done with only 'reassoc' ('nsz' is required). +define float @fold13_reassoc(float %x) { +; CHECK-LABEL: @fold13_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 7.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc float [[TMP1]], [[X]] +; CHECK-NEXT: ret float [[TMP2]] +; + %mul = fmul reassoc float %x, 7.000000e+00 + %sub = fsub reassoc float %mul, %x + ret float %sub +} + ; -x + y => y - x +; This is always safe. No FMF required. define float @fold14(float %x, float %y) { ; CHECK-LABEL: @fold14( -; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = fsub float [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret float [[ADD]] ; - %neg = fsub fast float -0.0, %x - %add = fadd fast float %neg, %y + %neg = fsub float -0.0, %x + %add = fadd float %neg, %y ret float %add } ; x + -y => x - y +; This is always safe. No FMF required. define float @fold15(float %x, float %y) { ; CHECK-LABEL: @fold15( -; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = fsub float [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret float [[ADD]] ; - %neg = fsub fast float -0.0, %y - %add = fadd fast float %x, %neg + %neg = fsub float -0.0, %y + %add = fadd float %x, %neg ret float %add } ; (select X+Y, X-Y) => X + (select Y, -Y) +; This is always safe. No FMF required. define float @fold16(float %x, float %y) { ; CHECK-LABEL: @fold16( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float -0.000000e+00, [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[Y]] ; CHECK-NEXT: [[R_P:%.*]] = select i1 [[CMP]], float [[Y]], float [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = fadd fast float [[R_P]], [[X]] +; CHECK-NEXT: [[R:%.*]] = fadd float [[R_P]], [[X]] ; CHECK-NEXT: ret float [[R]] ; %cmp = fcmp ogt float %x, %y - %plus = fadd fast float %x, %y - %minus = fsub fast float %x, %y + %plus = fadd float %x, %y + %minus = fsub float %x, %y %r = select i1 %cmp, float %plus, float %minus ret float %r } @@ -331,6 +569,33 @@ ret float %t3 } +; Check again using the minimal subset of FMF. +define float @fact_mul1_reassoc_nsz(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_mul1_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret float [[TMP2]] +; + %t1 = fmul reassoc nsz float %x, %z + %t2 = fmul reassoc nsz float %y, %z + %t3 = fadd reassoc nsz float %t1, %t2 + ret float %t3 +} + +; Verify the fold is not done with only 'reassoc' ('nsz' is required). +define float @fact_mul1_reassoc(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_mul1_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]] [[Z]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t1 = fmul reassoc float %x, %z + %t2 = fmul reassoc float %y, %z + %t3 = fadd reassoc float %t1, %t2 + ret float %t3 +} + ; z*x + y*z => (x+y) * z define float @fact_mul2(float %x, float %y, float %z) { ; CHECK-LABEL: @fact_mul2( @@ -344,6 +609,33 @@ ret float %t3 } +; Check again using the minimal subset of FMF. +define float @fact_mul2_reassoc_nsz(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_mul2_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret float [[TMP2]] +; + %t1 = fmul reassoc nsz float %z, %x + %t2 = fmul reassoc nsz float %y, %z + %t3 = fsub reassoc nsz float %t1, %t2 + ret float %t3 +} + +; Verify the fold is not done with only 'reassoc' ('nsz' is required). +define float @fact_mul2_reassoc(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_mul2_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t1 = fmul reassoc float %z, %x + %t2 = fmul reassoc float %y, %z + %t3 = fsub reassoc float %t1, %t2 + ret float %t3 +} + ; z*x - z*y => (x-y) * z define float @fact_mul3(float %x, float %y, float %z) { ; CHECK-LABEL: @fact_mul3( @@ -357,6 +649,33 @@ ret float %t3 } +; Check again using the minimal subset of FMF. +define float @fact_mul3_reassoc_nsz(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_mul3_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret float [[TMP2]] +; + %t2 = fmul reassoc nsz float %z, %y + %t1 = fmul reassoc nsz float %z, %x + %t3 = fsub reassoc nsz float %t1, %t2 + ret float %t3 +} + +; Verify the fold is not done with only 'reassoc' ('nsz' is required). +define float @fact_mul3_reassoc(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_mul3_reassoc( +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Z:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[Z]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t2 = fmul reassoc float %z, %y + %t1 = fmul reassoc float %z, %x + %t3 = fsub reassoc float %t1, %t2 + ret float %t3 +} + ; x*z - z*y => (x-y) * z define float @fact_mul4(float %x, float %y, float %z) { ; CHECK-LABEL: @fact_mul4( @@ -370,6 +689,33 @@ ret float %t3 } +; Check again using the minimal subset of FMF. +define float @fact_mul4_reassoc_nsz(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_mul4_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret float [[TMP2]] +; + %t1 = fmul reassoc nsz float %x, %z + %t2 = fmul reassoc nsz float %z, %y + %t3 = fsub reassoc nsz float %t1, %t2 + ret float %t3 +} + +; Verify the fold is not done with only 'reassoc' ('nsz' is required). +define float @fact_mul4_reassoc(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_mul4_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Z]], [[Y:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t1 = fmul reassoc float %x, %z + %t2 = fmul reassoc float %z, %y + %t3 = fsub reassoc float %t1, %t2 + ret float %t3 +} + ; x/y + x/z, no xform define float @fact_div1(float %x, float %y, float %z) { ; CHECK-LABEL: @fact_div1( @@ -411,6 +757,33 @@ ret float %t3 } +; Check again using the minimal subset of FMF. +define float @fact_div3_reassoc_nsz(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_div3_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret float [[TMP2]] +; + %t1 = fdiv reassoc nsz float %y, %x + %t2 = fdiv reassoc nsz float %z, %x + %t3 = fadd reassoc nsz float %t1, %t2 + ret float %t3 +} + +; Verify the fold is not done with only 'reassoc' ('nsz' is required). +define float @fact_div3_reassoc(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_div3_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t1 = fdiv reassoc float %y, %x + %t2 = fdiv reassoc float %z, %x + %t3 = fadd reassoc float %t1, %t2 + ret float %t3 +} + ; y/x - z/x => (y-z)/x define float @fact_div4(float %x, float %y, float %z) { ; CHECK-LABEL: @fact_div4( @@ -424,6 +797,33 @@ ret float %t3 } +; Check again using the minimal subset of FMF. +define float @fact_div4_reassoc_nsz(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_div4_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret float [[TMP2]] +; + %t1 = fdiv reassoc nsz float %y, %x + %t2 = fdiv reassoc nsz float %z, %x + %t3 = fsub reassoc nsz float %t1, %t2 + ret float %t3 +} + +; Verify the fold is not done with only 'reassoc' ('nsz' is required). +define float @fact_div4_reassoc(float %x, float %y, float %z) { +; CHECK-LABEL: @fact_div4_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret float [[TMP3]] +; + %t1 = fdiv reassoc float %y, %x + %t2 = fdiv reassoc float %z, %x + %t3 = fsub reassoc float %t1, %t2 + ret float %t3 +} + ; y/x - z/x => (y-z)/x is disabled if y-z is denormal. define float @fact_div5(float %x) { ; CHECK-LABEL: @fact_div5( @@ -735,12 +1135,14 @@ ret fp128 %c } +; ((which ? 2.0 : a) + 1.0) => (which ? 3.0 : (a + 1.0)) +; This is always safe. No FMF required. define float @test55(i1 %which, float %a) { ; CHECK-LABEL: @test55( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]] ; CHECK: delay: -; CHECK-NEXT: [[PHITMP:%.*]] = fadd fast float [[A:%.*]], 1.000000e+00 +; CHECK-NEXT: [[PHITMP:%.*]] = fadd float [[A:%.*]], 1.000000e+00 ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: ; CHECK-NEXT: [[A:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[PHITMP]], [[DELAY]] ] @@ -754,7 +1156,6 @@ final: %A = phi float [ 2.0, %entry ], [ %a, %delay ] - %value = fadd fast float %A, 1.0 + %value = fadd float %A, 1.0 ret float %value } - Index: test/Transforms/Reassociate/fast-MissedTree.ll =================================================================== --- test/Transforms/Reassociate/fast-MissedTree.ll +++ test/Transforms/Reassociate/fast-MissedTree.ll @@ -12,7 +12,19 @@ } ; Check again using minimal subset of FMF. +; Both 'reassoc' and 'nsz' are required. +define float @test1_reassoc_nsz(float %A, float %B) { +; CHECK-LABEL: @test1_reassoc_nsz( +; CHECK-NEXT: [[Z:%.*]] = fadd reassoc nsz float %A, %B +; CHECK-NEXT: ret float [[Z]] +; + %W = fadd reassoc nsz float %B, -5.0 + %Y = fadd reassoc nsz float %A, 5.0 + %Z = fadd reassoc nsz float %W, %Y + ret float %Z +} +; Verify the fold is not done with only 'reassoc' ('nsz' is required). define float @test1_reassoc(float %A, float %B) { ; CHECK-LABEL: @test1_reassoc( ; CHECK-NEXT: [[W:%.*]] = fadd reassoc float %B, -5.000000e+00 @@ -25,4 +37,3 @@ %Z = fadd reassoc float %W, %Y ret float %Z } - Index: test/Transforms/Reassociate/fast-SubReassociate.ll =================================================================== --- test/Transforms/Reassociate/fast-SubReassociate.ll +++ test/Transforms/Reassociate/fast-SubReassociate.ll @@ -29,7 +29,20 @@ } ; Check again using minimal subset of FMF. +; Both 'reassoc' and 'nsz' are required. +define float @test2_minimal(float %A, float %B) { +; CHECK-LABEL: @test2_minimal( +; CHECK-NEXT: [[Z:%.*]] = fsub reassoc nsz float %A, %B +; CHECK-NEXT: ret float [[Z]] +; + %W = fadd reassoc nsz float %B, 5.000000e+00 + %X = fadd reassoc nsz float %A, -7.000000e+00 + %Y = fsub reassoc nsz float %X, %W + %Z = fadd reassoc nsz float %Y, 1.200000e+01 + ret float %Z +} +; Verify the fold is not done with only 'reassoc' ('nsz' is required). define float @test2_reassoc(float %A, float %B) { ; CHECK-LABEL: @test2_reassoc( ; CHECK-NEXT: [[W:%.*]] = fadd reassoc float %B, 5.000000e+00 Index: test/Transforms/Reassociate/fast-basictest.ll =================================================================== --- test/Transforms/Reassociate/fast-basictest.ll +++ test/Transforms/Reassociate/fast-basictest.ll @@ -12,6 +12,19 @@ ret float %t2 } +; Check again using the minimal subset of FMF. +; Both 'reassoc' and 'nsz' are required. +define float @test1_minimal(float %arg) { +; CHECK-LABEL: @test1_minimal( +; CHECK-NEXT: [[ARG_NEG:%.*]] = fsub reassoc nsz float -0.000000e+00, [[ARG:%.*]] +; CHECK-NEXT: ret float [[ARG_NEG]] +; + %t1 = fsub reassoc nsz float -1.200000e+01, %arg + %t2 = fadd reassoc nsz float %t1, 1.200000e+01 + ret float %t2 +} + +; Verify the fold is not done with only 'reassoc' ('nsz' is required). define float @test1_reassoc(float %arg) { ; CHECK-LABEL: @test1_reassoc( ; CHECK-NEXT: [[T1:%.*]] = fsub reassoc float -1.200000e+01, [[ARG:%.*]] @@ -210,6 +223,18 @@ ret float %Z } +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @test9_reassoc_nsz(float %X) { +; CHECK-LABEL: @test9_reassoc_nsz( +; CHECK-NEXT: [[FACTOR:%.*]] = fmul reassoc nsz float [[X:%.*]], 9.400000e+01 +; CHECK-NEXT: ret float [[FACTOR]] +; + %Y = fmul reassoc nsz float %X, 4.700000e+01 + %Z = fadd reassoc nsz float %Y, %Y + ret float %Z +} + +; TODO: This doesn't require 'nsz'. It should fold to X * 94.0 define float @test9_reassoc(float %X) { ; CHECK-LABEL: @test9_reassoc( ; CHECK-NEXT: [[Y:%.*]] = fmul reassoc float [[X:%.*]], 4.700000e+01 @@ -221,25 +246,44 @@ ret float %Z } +; Side note: (x + x + x) and (3*x) each have only a single rounding. So +; transforming x+x+x to 3*x is always safe, even without any FMF. +; To avoid that special-case, we have the addition of 'x' four times, here. define float @test10(float %X) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00 +; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], 4.000000e+00 ; CHECK-NEXT: ret float [[FACTOR]] ; %Y = fadd fast float %X ,%X %Z = fadd fast float %Y, %X - ret float %Z + %W = fadd fast float %Z, %X + ret float %W +} + +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @test10_reassoc_nsz(float %X) { +; CHECK-LABEL: @test10_reassoc_nsz( +; CHECK-NEXT: [[FACTOR:%.*]] = fmul reassoc nsz float [[X:%.*]], 4.000000e+00 +; CHECK-NEXT: ret float [[FACTOR]] +; + %Y = fadd reassoc nsz float %X ,%X + %Z = fadd reassoc nsz float %Y, %X + %W = fadd reassoc nsz float %Z, %X + ret float %W } +; TODO: This doesn't require 'nsz'. It should fold to 4 * x define float @test10_reassoc(float %X) { ; CHECK-LABEL: @test10_reassoc( ; CHECK-NEXT: [[Y:%.*]] = fadd reassoc float [[X:%.*]], [[X]] ; CHECK-NEXT: [[Z:%.*]] = fadd reassoc float [[Y]], [[X]] -; CHECK-NEXT: ret float [[Z]] +; CHECK-NEXT: [[W:%.*]] = fadd reassoc float [[Z]], [[X]] +; CHECK-NEXT: ret float [[W]] ; %Y = fadd reassoc float %X ,%X %Z = fadd reassoc float %Y, %X - ret float %Z + %W = fadd reassoc float %Z, %X + ret float %W } define float @test11(float %W) { @@ -253,6 +297,20 @@ ret float %Z } +; Check again using the minimal subset of FMF. +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @test11_reassoc_nsz(float %W) { +; CHECK-LABEL: @test11_reassoc_nsz( +; CHECK-NEXT: [[FACTOR:%.*]] = fmul reassoc nsz float [[W:%.*]], 3.810000e+02 +; CHECK-NEXT: ret float [[FACTOR]] +; + %X = fmul reassoc nsz float %W, 127.0 + %Y = fadd reassoc nsz float %X ,%X + %Z = fadd reassoc nsz float %Y, %X + ret float %Z +} + +; TODO: This doesn't require 'nsz'. It should fold to W*381.0. define float @test11_reassoc(float %W) { ; CHECK-LABEL: @test11_reassoc( ; CHECK-NEXT: [[X:%.*]] = fmul reassoc float [[W:%.*]], 1.270000e+02 @@ -280,6 +338,22 @@ ret float %Z } +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @test12_reassoc_nsz(float %X) { +; CHECK-LABEL: @test12_reassoc_nsz( +; CHECK-NEXT: [[FACTOR:%.*]] = fmul reassoc nsz float [[X:%.*]], 3.000000e+00 +; CHECK-NEXT: [[Z:%.*]] = fsub reassoc nsz float 6.000000e+00, [[FACTOR]] +; CHECK-NEXT: ret float [[Z]] +; + %A = fsub reassoc nsz float 1.000000e+00, %X + %B = fsub reassoc nsz float 2.000000e+00, %X + %C = fsub reassoc nsz float 3.000000e+00, %X + %Y = fadd reassoc nsz float %A ,%B + %Z = fadd reassoc nsz float %Y, %C + ret float %Z +} + +; TODO: This doesn't require 'nsz'. It should fold to (6.0 - 3.0*x) define float @test12_reassoc(float %X) { ; CHECK-LABEL: @test12_reassoc( ; CHECK-NEXT: [[A:%.*]] = fsub reassoc float 1.000000e+00, [[X:%.*]] @@ -337,6 +411,21 @@ ret float %D } +; (x1 * 47) + (x2 * -47) => (x1 - x2) * 47 +; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). +define float @test14_reassoc_nsz(float %X1, float %X2) { +; CHECK-LABEL: @test14_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X1:%.*]], [[X2:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], 4.700000e+01 +; CHECK-NEXT: ret float [[TMP2]] +; + %B = fmul reassoc nsz float %X1, 47. ; X1*47 + %C = fmul reassoc nsz float %X2, -47. ; X2*-47 + %D = fadd reassoc nsz float %B, %C ; X1*47 + X2*-47 -> 47*(X1-X2) + ret float %D +} + +; TODO: This doesn't require 'nsz'. It should fold to ((x1 - x2) * 47.0) define float @test14_reassoc(float %X1, float %X2) { ; CHECK-LABEL: @test14_reassoc( ; CHECK-NEXT: [[B:%.*]] = fmul reassoc float [[X1:%.*]], 4.700000e+01