diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12872,11 +12872,6 @@ return DAG.getBuildVector(VT, DL, Ops); } -static bool isContractable(SDNode *N) { - SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasAllowReassociation(); -} - /// Try to perform FMA combining on a given FADD node. SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDValue N0 = N->getOperand(0); @@ -12898,13 +12893,12 @@ if (!HasFMAD && !HasFMA) return SDValue(); - bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool CanReassociate = Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - CanFuse || HasFMAD); + Options.UnsafeFPMath || HasFMAD); // If the addition is not contractable, do not combine. - if (!AllowFusionGlobally && !isContractable(N)) + if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) return SDValue(); if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) @@ -12919,7 +12913,7 @@ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; - return AllowFusionGlobally || isContractable(N.getNode()); + return AllowFusionGlobally || N->getFlags().hasAllowContract(); }; // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. @@ -13108,12 +13102,11 @@ return SDValue(); const SDNodeFlags Flags = N->getFlags(); - bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - CanFuse || HasFMAD); + Options.UnsafeFPMath || HasFMAD); // If the subtraction is not contractable, do not combine. - if (!AllowFusionGlobally && !isContractable(N)) + if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) return SDValue(); if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) @@ -13129,7 +13122,7 @@ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; - return AllowFusionGlobally || isContractable(N.getNode()); + return AllowFusionGlobally || N->getFlags().hasAllowContract(); }; // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) @@ -13261,6 +13254,7 @@ // More folding opportunities when target permits. if (Aggressive) { + bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract(); // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll --- a/llvm/test/CodeGen/AArch64/fadd-combines.ll +++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -217,10 +217,10 @@ ; CHECK-NEXT: fmadd s2, s2, s3, s4 ; CHECK-NEXT: fmadd s0, s0, s1, s2 ; CHECK-NEXT: ret - %m1 = fmul float %a, %b - %m2 = fmul float %c, %d + %m1 = fmul contract float %a, %b + %m2 = fmul contract float %c, %d %a1 = fadd contract float %m1, %m2 - %a2 = fadd reassoc float %n0, %a1 + %a2 = fadd contract reassoc float %n0, %a1 ret float %a2 } diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -66,7 +66,7 @@ %r0 = load half, half addrspace(1)* %in1 %r1 = load half, half addrspace(1)* %in2 %r2 = load half, half addrspace(1)* %in3 - %mul = fmul half %r0, %r1 + %mul = fmul contract half %r0, %r1 %add = fadd contract half %mul, %r2 store half %add, half addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -81,7 +81,7 @@ %r0 = load volatile float, float addrspace(1)* %in1 %r1 = load volatile float, float addrspace(1)* %in2 %r2 = load volatile float, float addrspace(1)* %in3 - %mul = fmul float %r0, %r1 + %mul = fmul contract float %r0, %r1 %add = fadd contract float %mul, %r2 store float %add, float addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll @@ -41,7 +41,7 @@ %r0 = load double, double addrspace(1)* %in1 %r1 = load double, double addrspace(1)* %in2 %r2 = load double, double addrspace(1)* %in3 - %tmp = fmul double %r0, %r1 + %tmp = fmul contract double %r0, %r1 %r3 = fadd contract double %tmp, %r2 store double %r3, double addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll @@ -53,7 +53,7 @@ %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1 %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2 %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3 - %r3 = fmul <2 x half> %r0, %r1 + %r3 = fmul contract <2 x half> %r0, %r1 %r4 = fadd contract <2 x half> %r3, %r2 store <2 x half> %r4, <2 x half> addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll --- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -23,7 +23,7 @@ entry: %splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0 %splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer - %div = fdiv reassoc nsz arcp ninf <4 x double> %a1, %splat.splat - %sub = fsub reassoc nsz <4 x double> , %div + %div = fdiv contract reassoc nsz arcp ninf <4 x double> %a1, %splat.splat + %sub = fsub contract reassoc nsz <4 x double> , %div ret <4 x double> %sub } diff --git a/llvm/test/CodeGen/PowerPC/fdiv.ll b/llvm/test/CodeGen/PowerPC/fdiv.ll --- a/llvm/test/CodeGen/PowerPC/fdiv.ll +++ b/llvm/test/CodeGen/PowerPC/fdiv.ll @@ -7,7 +7,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: xsdivsp 1, 1, 2 ; CHECK-NEXT: blr - %3 = fdiv reassoc arcp nsz float %0, %1 + %3 = fdiv contract reassoc arcp nsz float %0, %1 ret float %3 } @@ -20,6 +20,6 @@ ; CHECK-NEXT: xsmaddasp 0, 3, 1 ; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: blr - %3 = fdiv reassoc arcp nsz ninf float %0, %1 + %3 = fdiv contract reassoc arcp nsz ninf float %0, %1 ret float %3 } diff --git a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll --- a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll +++ b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll @@ -22,10 +22,10 @@ define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) { ; CHECK-LABEL: no_fma_with_fewer_uses: ; CHECK: # %bb.0: -; CHECK-NEXT: xsmulsp 0, 1, 2 -; CHECK-NEXT: fmr 1, 0 -; CHECK-NEXT: xsmaddasp 1, 3, 4 -; CHECK-NEXT: xsdivsp 1, 0, 1 +; CHECK-NEXT: xsmulsp 0, 3, 4 +; CHECK-NEXT: xsmulsp 3, 1, 2 +; CHECK-NEXT: xsmaddasp 0, 1, 2 +; CHECK-NEXT: xsdivsp 1, 3, 0 ; CHECK-NEXT: blr %mul1 = fmul contract float %f1, %f2 %mul2 = fmul float %f3, %f4 diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll --- a/llvm/test/CodeGen/PowerPC/fma-assoc.ll +++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll @@ -320,18 +320,20 @@ ret double %I } +; FIXME: -ffp-contract=fast does NOT work here? define double @test_reassoc_FMSUB_ASSOC1(double %A, double %B, double %C, ; CHECK-LABEL: test_reassoc_FMSUB_ASSOC1: ; CHECK: # %bb.0: -; CHECK-NEXT: fmsub 0, 3, 4, 5 -; CHECK-NEXT: fmadd 1, 1, 2, 0 +; CHECK-NEXT: fmul 0, 3, 4 +; CHECK-NEXT: fmadd 0, 1, 2, 0 +; CHECK-NEXT: fsub 1, 0, 5 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: test_reassoc_FMSUB_ASSOC1: ; CHECK-VSX: # %bb.0: -; CHECK-VSX-NEXT: xsmsubmdp 3, 4, 5 -; CHECK-VSX-NEXT: xsmaddadp 3, 1, 2 -; CHECK-VSX-NEXT: fmr 1, 3 +; CHECK-VSX-NEXT: xsmuldp 0, 3, 4 +; CHECK-VSX-NEXT: xsmaddadp 0, 1, 2 +; CHECK-VSX-NEXT: xssubdp 1, 0, 5 ; CHECK-VSX-NEXT: blr double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] @@ -341,6 +343,28 @@ ret double %I } +define double @test_reassoc_FMSUB_ASSOC11(double %A, double %B, double %C, +; CHECK-LABEL: test_reassoc_FMSUB_ASSOC11: +; CHECK: # %bb.0: +; CHECK-NEXT: fmsub 0, 3, 4, 5 +; CHECK-NEXT: fmadd 1, 1, 2, 0 +; CHECK-NEXT: blr +; +; CHECK-VSX-LABEL: test_reassoc_FMSUB_ASSOC11: +; CHECK-VSX: # %bb.0: +; CHECK-VSX-NEXT: xsmsubmdp 3, 4, 5 +; CHECK-VSX-NEXT: xsmaddadp 3, 1, 2 +; CHECK-VSX-NEXT: fmr 1, 3 +; CHECK-VSX-NEXT: blr + double %D, double %E) { + %F = fmul contract reassoc double %A, %B ; [#uses=1] + %G = fmul contract reassoc double %C, %D ; [#uses=1] + %H = fadd contract reassoc double %F, %G ; [#uses=1] + %I = fsub contract reassoc double %H, %E ; [#uses=1] + ret double %I +} + + define double @test_reassoc_FMSUB_ASSOC2(double %A, double %B, double %C, ; CHECK-LABEL: test_reassoc_FMSUB_ASSOC2: ; CHECK: # %bb.0: @@ -366,15 +390,16 @@ define double @test_fast_FMSUB_ASSOC2(double %A, double %B, double %C, ; CHECK-LABEL: test_fast_FMSUB_ASSOC2: ; CHECK: # %bb.0: -; CHECK-NEXT: fnmsub 0, 3, 4, 5 -; CHECK-NEXT: fnmsub 1, 1, 2, 0 +; CHECK-NEXT: fmul 0, 3, 4 +; CHECK-NEXT: fmadd 0, 1, 2, 0 +; CHECK-NEXT: fsub 1, 5, 0 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: test_fast_FMSUB_ASSOC2: ; CHECK-VSX: # %bb.0: -; CHECK-VSX-NEXT: xsnmsubmdp 3, 4, 5 -; CHECK-VSX-NEXT: xsnmsubadp 3, 1, 2 -; CHECK-VSX-NEXT: fmr 1, 3 +; CHECK-VSX-NEXT: xsmuldp 0, 3, 4 +; CHECK-VSX-NEXT: xsmaddadp 0, 1, 2 +; CHECK-VSX-NEXT: xssubdp 1, 5, 0 ; CHECK-VSX-NEXT: blr double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll --- a/llvm/test/CodeGen/PowerPC/fma-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll @@ -184,16 +184,16 @@ ; CHECK-NEXT: blr %tmp = load float, float* undef, align 4 %tmp2 = load float, float* undef, align 4 - %tmp3 = fmul reassoc float %tmp, 0x3FE372D780000000 - %tmp4 = fadd reassoc float %tmp3, 1.000000e+00 - %tmp5 = fmul reassoc float %tmp2, %tmp4 + %tmp3 = fmul contract reassoc float %tmp, 0x3FE372D780000000 + %tmp4 = fadd contract reassoc float %tmp3, 1.000000e+00 + %tmp5 = fmul contract reassoc float %tmp2, %tmp4 %tmp6 = load float, float* undef, align 4 %tmp7 = load float, float* undef, align 4 - %tmp8 = fmul reassoc float %tmp7, 0x3FE372D780000000 - %tmp9 = fsub reassoc nsz float -1.000000e+00, %tmp8 - %tmp10 = fmul reassoc float %tmp9, %tmp6 - %tmp11 = fadd reassoc float %tmp5, 5.000000e-01 - %tmp12 = fadd reassoc float %tmp11, %tmp10 + %tmp8 = fmul contract reassoc float %tmp7, 0x3FE372D780000000 + %tmp9 = fsub contract reassoc nsz float -1.000000e+00, %tmp8 + %tmp10 = fmul contract reassoc float %tmp9, %tmp6 + %tmp11 = fadd contract reassoc float %tmp5, 5.000000e-01 + %tmp12 = fadd contract reassoc float %tmp11, %tmp10 ret float %tmp12 } diff --git a/llvm/test/CodeGen/PowerPC/fma-mutate.ll b/llvm/test/CodeGen/PowerPC/fma-mutate.ll --- a/llvm/test/CodeGen/PowerPC/fma-mutate.ll +++ b/llvm/test/CodeGen/PowerPC/fma-mutate.ll @@ -29,7 +29,7 @@ ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: xssqrtdp 1, 1 ; CHECK-NEXT: blr - %r = call reassoc afn ninf double @llvm.sqrt.f64(double %a) + %r = call contract reassoc afn ninf double @llvm.sqrt.f64(double %a) ret double %r } diff --git a/llvm/test/CodeGen/PowerPC/fma-negate.ll b/llvm/test/CodeGen/PowerPC/fma-negate.ll --- a/llvm/test/CodeGen/PowerPC/fma-negate.ll +++ b/llvm/test/CodeGen/PowerPC/fma-negate.ll @@ -179,8 +179,8 @@ ; NO-VSX-NEXT: fnmsub 1, 2, 3, 1 ; NO-VSX-NEXT: blr entry: - %0 = fmul reassoc nsz double %b, %c - %1 = fsub reassoc nsz double %a, %0 + %0 = fmul contract reassoc nsz double %b, %c + %1 = fsub contract reassoc nsz double %a, %0 ret double %1 } @@ -199,9 +199,9 @@ ; NO-VSX-NEXT: blr double %d) { entry: - %0 = fmul reassoc double %a, %b - %1 = fmul reassoc double %c, %d - %2 = fsub reassoc double %0, %1 + %0 = fmul contract reassoc double %a, %b + %1 = fmul contract reassoc double %c, %d + %2 = fsub contract reassoc double %0, %1 ret double %2 } @@ -233,8 +233,8 @@ ; NO-VSX-NEXT: fnmsubs 1, 2, 3, 1 ; NO-VSX-NEXT: blr entry: - %0 = fmul reassoc float %b, %c - %1 = fsub reassoc nsz float %a, %0 + %0 = fmul contract reassoc float %b, %c + %1 = fsub contract reassoc nsz float %a, %0 ret float %1 } @@ -252,9 +252,9 @@ ; NO-VSX-NEXT: fmsubs 1, 1, 2, 0 ; NO-VSX-NEXT: blr entry: - %0 = fmul reassoc float %a, %b - %1 = fmul reassoc float %c, %d - %2 = fsub reassoc nsz float %0, %1 + %0 = fmul contract reassoc float %a, %b + %1 = fmul contract reassoc float %c, %d + %2 = fsub contract reassoc nsz float %0, %1 ret float %2 } diff --git a/llvm/test/CodeGen/PowerPC/fma-precision.ll b/llvm/test/CodeGen/PowerPC/fma-precision.ll --- a/llvm/test/CodeGen/PowerPC/fma-precision.ll +++ b/llvm/test/CodeGen/PowerPC/fma-precision.ll @@ -11,10 +11,10 @@ ; CHECK-NEXT: xsmuldp 1, 0, 1 ; CHECK-NEXT: blr entry: - %mul = fmul reassoc double %b, %a - %mul1 = fmul reassoc double %d, %c - %sub = fsub reassoc nsz double %mul, %mul1 - %mul3 = fmul reassoc double %mul, %sub + %mul = fmul contract reassoc double %b, %a + %mul1 = fmul contract reassoc double %d, %c + %sub = fsub contract reassoc nsz double %mul, %mul1 + %mul3 = fmul contract reassoc double %mul, %sub ret double %mul3 } @@ -28,10 +28,10 @@ ; CHECK-NEXT: xsmuldp 1, 0, 3 ; CHECK-NEXT: blr entry: - %mul = fmul reassoc double %b, %a - %mul1 = fmul reassoc double %d, %c - %sub = fsub reassoc double %mul, %mul1 - %mul3 = fmul reassoc double %mul1, %sub + %mul = fmul contract reassoc double %b, %a + %mul1 = fmul contract reassoc double %d, %c + %sub = fsub contract reassoc double %mul, %mul1 + %mul3 = fmul contract reassoc double %mul1, %sub ret double %mul3 } @@ -44,9 +44,9 @@ ; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: blr entry: - %mul = fmul reassoc double %b, %a - %mul1 = fmul reassoc double %d, %c - %sub = fsub reassoc double %mul, %mul1 + %mul = fmul contract reassoc double %b, %a + %mul1 = fmul contract reassoc double %d, %c + %sub = fsub contract reassoc double %mul, %mul1 ret double %sub } @@ -60,10 +60,10 @@ ; CHECK-NEXT: xsmuldp 1, 0, 1 ; CHECK-NEXT: blr entry: - %mul = fmul reassoc double %b, %a - %mul1 = fmul reassoc double %d, %c - %add = fadd reassoc double %mul1, %mul - %mul3 = fmul reassoc double %mul, %add + %mul = fmul contract reassoc double %b, %a + %mul1 = fmul contract reassoc double %d, %c + %add = fadd contract reassoc double %mul1, %mul + %mul3 = fmul contract reassoc double %mul, %add ret double %mul3 } @@ -77,10 +77,10 @@ ; CHECK-NEXT: xsmuldp 1, 0, 3 ; CHECK-NEXT: blr entry: - %mul = fmul reassoc double %b, %a - %mul1 = fmul reassoc double %d, %c - %add = fadd reassoc double %mul1, %mul - %mul3 = fmul reassoc double %mul1, %add + %mul = fmul contract reassoc double %b, %a + %mul1 = fmul contract reassoc double %d, %c + %add = fadd contract reassoc double %mul1, %mul + %mul3 = fmul contract reassoc double %mul1, %add ret double %mul3 } @@ -92,9 +92,9 @@ ; CHECK-NEXT: xsmaddadp 1, 4, 3 ; CHECK-NEXT: blr entry: - %mul = fmul reassoc double %b, %a - %mul1 = fmul reassoc double %d, %c - %add = fadd reassoc double %mul1, %mul + %mul = fmul contract reassoc double %b, %a + %mul1 = fmul contract reassoc double %d, %c + %add = fadd contract reassoc double %mul1, %mul ret double %add } @@ -108,12 +108,12 @@ ; CHECK-NEXT: xsnmsubadp 1, 3, 4 ; CHECK-NEXT: stfd 0, 0(9) ; CHECK-NEXT: blr - %ab = fmul reassoc double %a, %b - %cd = fmul reassoc double %c, %d + %ab = fmul contract reassoc double %a, %b + %cd = fmul contract reassoc double %c, %d store double %ab, double* %p1 ; extra use of %ab store double %ab, double* %p2 ; another extra use of %ab store double %cd, double* %p3 ; extra use of %cd - %r = fsub reassoc nsz double %ab, %cd + %r = fsub contract reassoc nsz double %ab, %cd ret double %r } @@ -128,12 +128,12 @@ ; CHECK-NEXT: xsmsubadp 0, 1, 2 ; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: blr - %ab = fmul reassoc double %a, %b - %cd = fmul reassoc double %c, %d + %ab = fmul contract reassoc double %a, %b + %cd = fmul contract reassoc double %c, %d store double %ab, double* %p1 ; extra use of %ab store double %cd, double* %p2 ; extra use of %cd store double %cd, double* %p3 ; another extra use of %cd - %r = fsub reassoc double %ab, %cd + %r = fsub contract reassoc double %ab, %cd ret double %r } @@ -150,14 +150,14 @@ ; CHECK-NEXT: xsnmsubadp 0, 3, 4 ; CHECK-NEXT: xsadddp 1, 0, 1 ; CHECK-NEXT: blr - %ab = fmul reassoc double %a, %b - %cd = fmul reassoc double %c, %d - %fg = fmul reassoc double %f, %g + %ab = fmul contract reassoc double %a, %b + %cd = fmul contract reassoc double %c, %d + %fg = fmul contract reassoc double %f, %g store double %ab, double* %p1 ; extra use of %ab store double %ab, double* %p2 ; another extra use of %ab store double %fg, double* %p3 ; extra use of %fg - %q = fsub reassoc nsz double %fg, %cd ; The uses of %cd reduce to 1 after %r is folded. 2 uses of %fg, fold %cd, remove def of %cd - %r = fsub reassoc nsz double %ab, %cd ; Fold %r before %q. 3 uses of %ab, 2 uses of %cd, fold %cd - %add = fadd reassoc double %r, %q + %q = fsub contract reassoc nsz double %fg, %cd ; The uses of %cd reduce to 1 after %r is folded. 2 uses of %fg, fold %cd, remove def of %cd + %r = fsub contract reassoc nsz double %ab, %cd ; Fold %r before %q. 3 uses of %ab, 2 uses of %cd, fold %cd + %add = fadd contract reassoc double %r, %q ret double %add } diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -14,15 +14,17 @@ ; X * Y + Z --> fma(X, Y, Z) +; contract bits in fmul is checked. + ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:' -; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:' define float @fmul_fadd_contract1(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_contract1: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_contract1: @@ -35,20 +37,41 @@ ret float %add } -; This shouldn't change anything - the intermediate fmul result is now also flagged. +; contract bits in fadd is also checked. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:' -; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:' define float @fmul_fadd_contract2(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_contract2: ; FMF: # %bb.0: +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fadd_contract2: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xsmaddasp 3, 1, 2 +; GLOBAL-NEXT: fmr 1, 3 +; GLOBAL-NEXT: blr + %mul = fmul contract float %x, %y + %add = fadd float %mul, %z + ret float %add +} + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract3:' +; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract3:' + +define float @fmul_fadd_contract3(float %x, float %y, float %z) { +; FMF-LABEL: fmul_fadd_contract3: +; FMF: # %bb.0: ; FMF-NEXT: xsmaddasp 3, 1, 2 ; FMF-NEXT: fmr 1, 3 ; FMF-NEXT: blr ; -; GLOBAL-LABEL: fmul_fadd_contract2: +; GLOBAL-LABEL: fmul_fadd_contract3: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 ; GLOBAL-NEXT: fmr 1, 3 @@ -58,17 +81,17 @@ ret float %add } -; Reassociation implies that FMA contraction is allowed. +; Reassociation does NOT imply that FMA contraction is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:' define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc1: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc1: @@ -82,16 +105,15 @@ } ; This shouldn't change anything - the intermediate fmul result is now also flagged. - ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:' define float @fmul_fadd_reassoc2(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc2: @@ -104,17 +126,17 @@ ret float %add } -; The fadd is now fully 'fast'. This implies that contraction is allowed. +; The fadd is now fully 'fast', but fmul is not yet. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:' define float @fmul_fadd_fast1(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_fast1: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_fast1: @@ -122,15 +144,15 @@ ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr - %mul = fmul reassoc float %x, %y - %add = fadd reassoc float %mul, %z + %mul = fmul float %x, %y + %add = fadd fast float %mul, %z ret float %add } -; This shouldn't change anything - the intermediate fmul result is now also flagged. +; This implies that contraction is allowed - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:' define float @fmul_fadd_fast2(float %x, float %y, float %z) { @@ -145,8 +167,8 @@ ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr - %mul = fmul reassoc float %x, %y - %add = fadd reassoc float %mul, %z + %mul = fmul fast float %x, %y + %add = fadd fast float %mul, %z ret float %add } @@ -164,15 +186,15 @@ define float @fmul_fma_reassoc1(float %x) { ; FMF-LABEL: fmul_fma_reassoc1: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI6_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_reassoc1: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI6_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul float %x, 42.0 @@ -193,15 +215,15 @@ define float @fmul_fma_reassoc2(float %x) { ; FMF-LABEL: fmul_fma_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_reassoc2: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul reassoc float %x, 42.0 @@ -212,58 +234,58 @@ ; The FMA is now fully 'fast'. This implies that reassociation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' -; FMFDEBUG: fmul reassoc {{t[0-9]+}} +; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' -; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' define float @fmul_fma_fast1(float %x) { ; FMF-LABEL: fmul_fma_fast1: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_fast1: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul float %x, 42.0 - %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) + %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul) ret float %fma } ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' -; FMFDEBUG: fmul reassoc {{t[0-9]+}} +; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' -; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' define float @fmul_fma_fast2(float %x) { ; FMF-LABEL: fmul_fma_fast2: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_fast2: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr - %mul = fmul reassoc float %x, 42.0 - %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) + %mul = fmul fast float %x, 42.0 + %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul) ret float %fma } @@ -281,45 +303,45 @@ ; FMF-LABEL: sqrt_afn_ieee: ; FMF: # %bb.0: ; FMF-NEXT: xsabsdp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI10_2@toc@ha -; FMF-NEXT: lfs 2, .LCPI10_2@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI11_2@toc@ha +; FMF-NEXT: lfs 2, .LCPI11_2@toc@l(3) ; FMF-NEXT: fcmpu 0, 0, 2 ; FMF-NEXT: xxlxor 0, 0, 0 -; FMF-NEXT: blt 0, .LBB10_2 +; FMF-NEXT: blt 0, .LBB11_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI10_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI10_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmulsp 1, 1, 2 ; FMF-NEXT: xsaddsp 0, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 -; FMF-NEXT: .LBB10_2: +; FMF-NEXT: .LBB11_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: sqrt_afn_ieee: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI10_2@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI10_2@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI11_2@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI11_2@toc@l(3) ; GLOBAL-NEXT: fcmpu 0, 0, 2 ; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB10_2 +; GLOBAL-NEXT: blt 0, .LBB11_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI10_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI10_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB10_2: +; GLOBAL-NEXT: .LBB11_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) @@ -353,19 +375,19 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB12_2 +; FMF-NEXT: beq 0, .LBB13_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI12_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI12_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI12_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI12_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI13_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI13_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI13_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmulsp 1, 1, 2 ; FMF-NEXT: xsaddsp 0, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 -; FMF-NEXT: .LBB12_2: +; FMF-NEXT: .LBB13_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; @@ -373,18 +395,18 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB12_2 +; GLOBAL-NEXT: beq 0, .LBB13_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI12_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI12_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI12_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI13_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI13_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI13_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB12_2: +; GLOBAL-NEXT: .LBB13_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) @@ -408,69 +430,69 @@ ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:' -; FMFDEBUG: fmul ninf afn reassoc {{t[0-9]+}} +; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:' -; GLOBALDEBUG: fmul ninf afn reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:' define float @sqrt_fast_ieee(float %x) #0 { ; FMF-LABEL: sqrt_fast_ieee: ; FMF: # %bb.0: ; FMF-NEXT: xsabsdp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI14_2@toc@ha -; FMF-NEXT: lfs 2, .LCPI14_2@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI15_2@toc@ha +; FMF-NEXT: lfs 2, .LCPI15_2@toc@l(3) ; FMF-NEXT: fcmpu 0, 0, 2 ; FMF-NEXT: xxlxor 0, 0, 0 -; FMF-NEXT: blt 0, .LBB14_2 +; FMF-NEXT: blt 0, .LBB15_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI14_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI14_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI14_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI14_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI15_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI15_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI15_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmaddasp 2, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 3 ; FMF-NEXT: xsmulsp 0, 0, 2 -; FMF-NEXT: .LBB14_2: +; FMF-NEXT: .LBB15_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: sqrt_fast_ieee: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI14_2@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI14_2@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI15_2@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI15_2@toc@l(3) ; GLOBAL-NEXT: fcmpu 0, 0, 2 ; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB14_2 +; GLOBAL-NEXT: blt 0, .LBB15_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI14_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI14_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI14_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI14_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI15_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI15_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI15_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB14_2: +; GLOBAL-NEXT: .LBB15_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr - %rt = call reassoc afn ninf float @llvm.sqrt.f32(float %x) + %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x) ret float %rt } ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:' -; FMFDEBUG: fmul ninf afn reassoc {{t[0-9]+}} +; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:' -; GLOBALDEBUG: fmul ninf afn reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:' define float @sqrt_fast_preserve_sign(float %x) #1 { @@ -478,18 +500,18 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB15_2 +; FMF-NEXT: beq 0, .LBB16_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI15_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI15_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI15_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI16_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI16_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI16_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI16_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmaddasp 2, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 3 ; FMF-NEXT: xsmulsp 0, 0, 2 -; FMF-NEXT: .LBB15_2: +; FMF-NEXT: .LBB16_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; @@ -497,21 +519,21 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB15_2 +; GLOBAL-NEXT: beq 0, .LBB16_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI15_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI15_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI15_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI16_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI16_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI16_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB15_2: +; GLOBAL-NEXT: .LBB16_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr - %rt = call reassoc ninf afn float @llvm.sqrt.f32(float %x) + %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x) ret float %rt } @@ -530,10 +552,10 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: xscmpudp 0, 1, 0 -; FMF-NEXT: blt 0, .LBB16_2 +; FMF-NEXT: blt 0, .LBB17_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: fmr 3, 2 -; FMF-NEXT: .LBB16_2: +; FMF-NEXT: .LBB17_2: ; FMF-NEXT: fmr 1, 3 ; FMF-NEXT: blr ; @@ -541,10 +563,10 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: xscmpudp 0, 1, 0 -; GLOBAL-NEXT: blt 0, .LBB16_2 +; GLOBAL-NEXT: blt 0, .LBB17_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: fmr 3, 2 -; GLOBAL-NEXT: .LBB16_2: +; GLOBAL-NEXT: .LBB17_2: ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr %cmp = fcmp nnan ult double %a, 0.0 diff --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll --- a/llvm/test/CodeGen/PowerPC/machine-combiner.ll +++ b/llvm/test/CodeGen/PowerPC/machine-combiner.ll @@ -208,11 +208,11 @@ ; CHECK-PWR-DAG: xsmaddadp 2, 4, 3 ; CHECK-PWR: xsadddp 1, 2, 1 ; CHECK-NEXT: blr - %7 = fmul reassoc nsz double %3, %2 - %8 = fmul reassoc nsz double %5, %4 - %9 = fadd reassoc nsz double %1, %0 - %10 = fadd reassoc nsz double %9, %7 - %11 = fadd reassoc nsz double %10, %8 + %7 = fmul contract reassoc nsz double %3, %2 + %8 = fmul contract reassoc nsz double %5, %4 + %9 = fadd contract reassoc nsz double %1, %0 + %10 = fadd contract reassoc nsz double %9, %7 + %11 = fadd contract reassoc nsz double %10, %8 ret double %11 } @@ -223,11 +223,11 @@ ; CHECK-DAG: fmadds [[REG1:[0-9]+]], 6, 5, 1 ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %7 = fmul reassoc nsz float %3, %2 - %8 = fmul reassoc nsz float %5, %4 - %9 = fadd reassoc nsz float %1, %0 - %10 = fadd reassoc nsz float %9, %7 - %11 = fadd reassoc nsz float %10, %8 + %7 = fmul contract reassoc nsz float %3, %2 + %8 = fmul contract reassoc nsz float %5, %4 + %9 = fadd contract reassoc nsz float %1, %0 + %10 = fadd contract reassoc nsz float %9, %7 + %11 = fadd contract reassoc nsz float %10, %8 ret float %11 } @@ -238,11 +238,11 @@ ; CHECK-PWR-DAG: xvmaddasp [[REG1:[0-9]+]], 37, 36 ; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]] ; CHECK-NEXT: blr - %7 = fmul reassoc nsz <4 x float> %3, %2 - %8 = fmul reassoc nsz <4 x float> %5, %4 - %9 = fadd reassoc nsz <4 x float> %1, %0 - %10 = fadd reassoc nsz <4 x float> %9, %7 - %11 = fadd reassoc nsz <4 x float> %10, %8 + %7 = fmul contract reassoc nsz <4 x float> %3, %2 + %8 = fmul contract reassoc nsz <4 x float> %5, %4 + %9 = fadd contract reassoc nsz <4 x float> %1, %0 + %10 = fadd contract reassoc nsz <4 x float> %9, %7 + %11 = fadd contract reassoc nsz <4 x float> %10, %8 ret <4 x float> %11 } @@ -255,14 +255,14 @@ ; CHECK-PWR-DAG: xsmaddadp [[REG0]], 9, 8 ; CHECK-PWR: xsadddp 1, 7, [[REG0]] ; CHECK-NEXT: blr - %10 = fmul reassoc nsz double %1, %0 - %11 = fmul reassoc nsz double %3, %2 - %12 = fmul reassoc nsz double %5, %4 - %13 = fmul reassoc nsz double %8, %7 - %14 = fadd reassoc nsz double %11, %10 - %15 = fadd reassoc nsz double %14, %6 - %16 = fadd reassoc nsz double %15, %12 - %17 = fadd reassoc nsz double %16, %13 + %10 = fmul contract reassoc nsz double %1, %0 + %11 = fmul contract reassoc nsz double %3, %2 + %12 = fmul contract reassoc nsz double %5, %4 + %13 = fmul contract reassoc nsz double %8, %7 + %14 = fadd contract reassoc nsz double %11, %10 + %15 = fadd contract reassoc nsz double %14, %6 + %16 = fadd contract reassoc nsz double %15, %12 + %17 = fadd contract reassoc nsz double %16, %13 ret double %17 } @@ -282,22 +282,22 @@ ; CHECK-DAG: fmadds [[REG7:[0-9]+]], 5, 4, [[REG5]] ; CHECK: fadds 1, [[REG7]], [[REG6]] ; CHECK-NEXT: blr - %18 = fmul reassoc nsz float %2, %1 - %19 = fadd reassoc nsz float %18, %0 - %20 = fmul reassoc nsz float %4, %3 - %21 = fadd reassoc nsz float %19, %20 - %22 = fmul reassoc nsz float %6, %5 - %23 = fadd reassoc nsz float %21, %22 - %24 = fmul reassoc nsz float %8, %7 - %25 = fadd reassoc nsz float %23, %24 - %26 = fmul reassoc nsz float %10, %9 - %27 = fadd reassoc nsz float %25, %26 - %28 = fmul reassoc nsz float %12, %11 - %29 = fadd reassoc nsz float %27, %28 - %30 = fmul reassoc nsz float %14, %13 - %31 = fadd reassoc nsz float %29, %30 - %32 = fmul reassoc nsz float %16, %15 - %33 = fadd reassoc nsz float %31, %32 + %18 = fmul contract reassoc nsz float %2, %1 + %19 = fadd contract reassoc nsz float %18, %0 + %20 = fmul contract reassoc nsz float %4, %3 + %21 = fadd contract reassoc nsz float %19, %20 + %22 = fmul contract reassoc nsz float %6, %5 + %23 = fadd contract reassoc nsz float %21, %22 + %24 = fmul contract reassoc nsz float %8, %7 + %25 = fadd contract reassoc nsz float %23, %24 + %26 = fmul contract reassoc nsz float %10, %9 + %27 = fadd contract reassoc nsz float %25, %26 + %28 = fmul contract reassoc nsz float %12, %11 + %29 = fadd contract reassoc nsz float %27, %28 + %30 = fmul contract reassoc nsz float %14, %13 + %31 = fadd contract reassoc nsz float %29, %30 + %32 = fmul contract reassoc nsz float %16, %15 + %33 = fadd contract reassoc nsz float %31, %32 ret float %33 } diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -67,8 +67,8 @@ ; CHECK-P9-NEXT: xsmuldp 0, 0, 4 ; CHECK-P9-NEXT: xsmuldp 1, 1, 0 ; CHECK-P9-NEXT: blr - %x = call arcp reassoc double @llvm.sqrt.f64(double %b) - %r = fdiv arcp reassoc double %a, %x + %x = call arcp contract reassoc double @llvm.sqrt.f64(double %b) + %r = fdiv arcp contract reassoc double %a, %x ret double %r } @@ -160,9 +160,9 @@ ; CHECK-P9-NEXT: xsmulsp 0, 0, 3 ; CHECK-P9-NEXT: xsmuldp 1, 1, 0 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp float @llvm.sqrt.f32(float %b) + %x = call contract reassoc arcp float @llvm.sqrt.f32(float %b) %y = fpext float %x to double - %r = fdiv reassoc arcp double %a, %y + %r = fdiv contract reassoc arcp double %a, %y ret double %r } @@ -249,9 +249,9 @@ ; CHECK-P9-NEXT: xsrsp 0, 0 ; CHECK-P9-NEXT: xsmulsp 1, 1, 0 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp double @llvm.sqrt.f64(double %b) + %x = call contract reassoc arcp double @llvm.sqrt.f64(double %b) %y = fptrunc double %x to float - %r = fdiv reassoc arcp float %a, %y + %r = fdiv contract reassoc arcp float %a, %y ret float %r } @@ -324,8 +324,8 @@ ; CHECK-P9-NEXT: xsmulsp 0, 0, 3 ; CHECK-P9-NEXT: xsmulsp 1, 1, 0 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp float @llvm.sqrt.f32(float %b) - %r = fdiv reassoc arcp float %a, %x + %x = call contract reassoc arcp float @llvm.sqrt.f32(float %b) + %r = fdiv contract reassoc arcp float %a, %x ret float %r } @@ -429,9 +429,9 @@ ; CHECK-P9-NEXT: xsmaddasp 4, 1, 0 ; CHECK-P9-NEXT: xsmulsp 1, 3, 4 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp nsz float @llvm.sqrt.f32(float %a) - %y = fmul reassoc nsz float %x, %b - %z = fdiv reassoc arcp nsz ninf float %c, %y + %x = call contract reassoc arcp nsz float @llvm.sqrt.f32(float %a) + %y = fmul contract reassoc nsz float %x, %b + %z = fdiv contract reassoc arcp nsz ninf float %c, %y ret float %z } @@ -512,8 +512,8 @@ ; CHECK-P9-NEXT: xvmulsp 0, 0, 2 ; CHECK-P9-NEXT: xvmulsp 34, 34, 0 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) - %r = fdiv reassoc arcp <4 x float> %a, %x + %x = call contract reassoc arcp <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv contract reassoc arcp <4 x float> %a, %x ret <4 x float> %r } @@ -602,7 +602,7 @@ ; CHECK-P9-NEXT: xsmaddadp 0, 3, 1 ; CHECK-P9-NEXT: fmr 1, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz ninf double %a, %b + %r = fdiv contract reassoc arcp nsz ninf double %a, %b ret double %r } @@ -651,7 +651,7 @@ ; CHECK-P9-NEXT: xsmaddasp 0, 3, 1 ; CHECK-P9-NEXT: fmr 1, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz ninf float %a, %b + %r = fdiv contract reassoc arcp nsz ninf float %a, %b ret float %r } @@ -702,7 +702,7 @@ ; CHECK-P9-NEXT: xvmaddasp 0, 1, 34 ; CHECK-P9-NEXT: xxlor 34, 0, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz ninf <4 x float> %a, %b + %r = fdiv contract reassoc arcp nsz ninf <4 x float> %a, %b ret <4 x float> %r } @@ -817,7 +817,7 @@ ; CHECK-P9-NEXT: .LBB20_2: ; CHECK-P9-NEXT: xssqrtdp 1, 1 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn double @llvm.sqrt.f64(double %a) + %r = call contract reassoc ninf afn double @llvm.sqrt.f64(double %a) ret double %r } @@ -901,7 +901,7 @@ ; CHECK-P9-NEXT: .LBB21_2: ; CHECK-P9-NEXT: xssqrtdp 1, 1 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn double @llvm.sqrt.f64(double %a) + %r = call contract reassoc ninf afn double @llvm.sqrt.f64(double %a) ret double %r } @@ -991,7 +991,7 @@ ; CHECK-P9-NEXT: .LBB23_2: ; CHECK-P9-NEXT: fmr 1, 0 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn float @llvm.sqrt.f32(float %a) + %r = call contract reassoc ninf afn float @llvm.sqrt.f32(float %a) ret float %r } @@ -1076,7 +1076,7 @@ ; CHECK-P9-NEXT: .LBB25_2: ; CHECK-P9-NEXT: xvsqrtsp 34, 34 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + %r = call contract reassoc ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) ret <4 x float> %r } @@ -1207,7 +1207,7 @@ ; CHECK-P9-NEXT: .LBB27_2: ; CHECK-P9-NEXT: xvsqrtdp 34, 34 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) + %r = call contract reassoc ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) ret <2 x double> %r } @@ -1260,7 +1260,7 @@ ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: xssqrtqp 2, 2 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn fp128 @llvm.sqrt.f128(fp128 %a) + %r = call contract reassoc ninf afn fp128 @llvm.sqrt.f128(fp128 %a) ret fp128 %r } diff --git a/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll b/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll --- a/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll +++ b/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll @@ -37,10 +37,10 @@ ; CHECK-FMA-NEXT: xsmaddasp f1, f4, f2 ; CHECK-FMA-NEXT: xsmaddasp f1, f3, f0 ; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz float %1, %0 - %6 = fsub reassoc nsz float %2, %3 - %7 = fmul reassoc nsz float %6, 0x3DB2533FE0000000 - %8 = fadd reassoc nsz float %7, %5 + %5 = fmul contract reassoc nsz float %1, %0 + %6 = fsub contract reassoc nsz float %2, %3 + %7 = fmul contract reassoc nsz float %6, 0x3DB2533FE0000000 + %8 = fadd contract reassoc nsz float %7, %5 ret float %8 } @@ -73,10 +73,10 @@ ; CHECK-FMA-NEXT: xsmaddadp f1, f4, f2 ; CHECK-FMA-NEXT: xsmaddadp f1, f3, f0 ; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz double %1, %0 - %6 = fsub reassoc nsz double %2, %3 - %7 = fmul reassoc nsz double %6, 0x3DB2533FE68CADDE - %8 = fadd reassoc nsz double %7, %5 + %5 = fmul contract reassoc nsz double %1, %0 + %6 = fsub contract reassoc nsz double %2, %3 + %7 = fmul contract reassoc nsz double %6, 0x3DB2533FE68CADDE + %8 = fadd contract reassoc nsz double %7, %5 ret double %8 } @@ -125,11 +125,11 @@ ; CHECK-FMA-NEXT: xsmulsp f0, f2, f5 ; CHECK-FMA-NEXT: stfs f0, 0(r3) ; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz float %1, %0 - %6 = fsub reassoc nsz float %2, %3 - %7 = fmul reassoc nsz float %6, 0x3DB2533FE0000000 - %8 = fadd reassoc nsz float %7, %5 - %9 = fmul reassoc nsz float %1, 0xBDB2533FE0000000 + %5 = fmul contract reassoc nsz float %1, %0 + %6 = fsub contract reassoc nsz float %2, %3 + %7 = fmul contract reassoc nsz float %6, 0x3DB2533FE0000000 + %8 = fadd contract reassoc nsz float %7, %5 + %9 = fmul contract reassoc nsz float %1, 0xBDB2533FE0000000 store float %9, float* @global_val, align 4 ret float %8 } diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll --- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -43,8 +43,8 @@ ; CHECK-NEXT: blr %ins = insertelement <4 x float> undef, float %a, i32 0 %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer - %t1 = fmul reassoc <4 x float> %b, - %mul = fdiv reassoc arcp nsz ninf <4 x float> %t1, %splat + %t1 = fmul contract reassoc <4 x float> %b, + %mul = fdiv contract reassoc arcp nsz ninf <4 x float> %t1, %splat ret <4 x float> %mul } diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll --- a/llvm/test/CodeGen/X86/machine-combiner.ll +++ b/llvm/test/CodeGen/X86/machine-combiner.ll @@ -240,8 +240,8 @@ ; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: vaddps %xmm0, %xmm3, %xmm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <4 x float> %x0, %x1 - %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t0 = fmul contract reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd contract reassoc nsz <4 x float> %x2, %t0 %t2 = fadd reassoc nsz <4 x float> %x3, %t1 ret <4 x float> %t2 } @@ -268,8 +268,8 @@ ; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <2 x double> %x0, %x1 - %t1 = fadd reassoc nsz <2 x double> %x2, %t0 + %t0 = fmul contract reassoc nsz <2 x double> %x0, %x1 + %t1 = fadd contract reassoc nsz <2 x double> %x2, %t0 %t2 = fadd reassoc nsz <2 x double> %x3, %t1 ret <2 x double> %t2 } @@ -343,8 +343,8 @@ ; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: vaddps %ymm0, %ymm3, %ymm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <8 x float> %x0, %x1 - %t1 = fadd reassoc nsz <8 x float> %x2, %t0 + %t0 = fmul contract reassoc nsz <8 x float> %x0, %x1 + %t1 = fadd contract reassoc nsz <8 x float> %x2, %t0 %t2 = fadd reassoc nsz <8 x float> %x3, %t1 ret <8 x float> %t2 } @@ -374,8 +374,8 @@ ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <4 x double> %x0, %x1 - %t1 = fadd reassoc nsz <4 x double> %x2, %t0 + %t0 = fmul contract reassoc nsz <4 x double> %x0, %x1 + %t1 = fadd contract reassoc nsz <4 x double> %x2, %t0 %t2 = fadd reassoc nsz <4 x double> %x3, %t1 ret <4 x double> %t2 } @@ -464,8 +464,8 @@ ; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: vaddps %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <16 x float> %x0, %x1 - %t1 = fadd reassoc nsz <16 x float> %x2, %t0 + %t0 = fmul contract reassoc nsz <16 x float> %x0, %x1 + %t1 = fadd contract reassoc nsz <16 x float> %x2, %t0 %t2 = fadd reassoc nsz <16 x float> %x3, %t1 ret <16 x float> %t2 } @@ -504,8 +504,8 @@ ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: vaddpd %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <8 x double> %x0, %x1 - %t1 = fadd reassoc nsz <8 x double> %x2, %t0 + %t0 = fmul contract reassoc nsz <8 x double> %x0, %x1 + %t1 = fadd contract reassoc nsz <8 x double> %x2, %t0 %t2 = fadd reassoc nsz <8 x double> %x3, %t1 ret <8 x double> %t2 } diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll --- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -712,8 +712,8 @@ ; AVX512-NEXT: retq %s = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %z) %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y) - %m = fmul reassoc <4 x float> %a, %s - %d = fdiv reassoc arcp <4 x float> %x, %m + %m = fmul contract reassoc <4 x float> %a, %s + %d = fdiv contract reassoc arcp <4 x float> %x, %m ret <4 x float> %d } @@ -895,8 +895,8 @@ ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %s = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %y) - %m = fmul reassoc <4 x float> %y, %s - %d = fdiv reassoc arcp <4 x float> %x, %m + %m = fmul contract reassoc <4 x float> %y, %s + %d = fdiv contract reassoc arcp <4 x float> %x, %m ret <4 x float> %d }