diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12872,11 +12872,6 @@ return DAG.getBuildVector(VT, DL, Ops); } -static bool isContractable(SDNode *N) { - SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasAllowReassociation(); -} - /// Try to perform FMA combining on a given FADD node. SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDValue N0 = N->getOperand(0); @@ -12898,13 +12893,12 @@ if (!HasFMAD && !HasFMA) return SDValue(); - bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool CanReassociate = Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - CanFuse || HasFMAD); + Options.UnsafeFPMath || HasFMAD); // If the addition is not contractable, do not combine. - if (!AllowFusionGlobally && !isContractable(N)) + if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) return SDValue(); if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) @@ -12919,7 +12913,7 @@ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; - return AllowFusionGlobally || isContractable(N.getNode()); + return AllowFusionGlobally || N->getFlags().hasAllowContract(); }; // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. @@ -13108,12 +13102,11 @@ return SDValue(); const SDNodeFlags Flags = N->getFlags(); - bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - CanFuse || HasFMAD); + Options.UnsafeFPMath || HasFMAD); // If the subtraction is not contractable, do not combine. - if (!AllowFusionGlobally && !isContractable(N)) + if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) return SDValue(); if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) @@ -13129,7 +13122,7 @@ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; - return AllowFusionGlobally || isContractable(N.getNode()); + return AllowFusionGlobally || N->getFlags().hasAllowContract(); }; // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) @@ -13261,6 +13254,7 @@ // More folding opportunities when target permits. if (Aggressive) { + bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract(); // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll --- a/llvm/test/CodeGen/AArch64/fadd-combines.ll +++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -217,10 +217,10 @@ ; CHECK-NEXT: fmadd s2, s2, s3, s4 ; CHECK-NEXT: fmadd s0, s0, s1, s2 ; CHECK-NEXT: ret - %m1 = fmul float %a, %b - %m2 = fmul float %c, %d + %m1 = fmul contract float %a, %b + %m2 = fmul contract float %c, %d %a1 = fadd contract float %m1, %m2 - %a2 = fadd reassoc float %n0, %a1 + %a2 = fadd contract reassoc float %n0, %a1 ret float %a2 } diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -148,7 +148,7 @@ ; CHECK-NEXT: add x8, x8, #32 // =32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 ; CHECK-NEXT: fmul v4.4s, v2.4s, v0.4s -; CHECK-NEXT: fmla v4.4s, v1.4s, v3.4s +; CHECK-NEXT: fmla v4.4s, v3.4s, v1.4s ; CHECK-NEXT: str q4, [x2], #16 ; CHECK-NEXT: b.ne .LBB3_1 ; CHECK-NEXT: // %bb.2: // %while.end diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -66,7 +66,7 @@ %r0 = load half, half addrspace(1)* %in1 %r1 = load half, half addrspace(1)* %in2 %r2 = load half, half addrspace(1)* %in3 - %mul = fmul half %r0, %r1 + %mul = fmul contract half %r0, %r1 %add = fadd contract half %mul, %r2 store half %add, half addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -81,7 +81,7 @@ %r0 = load volatile float, float addrspace(1)* %in1 %r1 = load volatile float, float addrspace(1)* %in2 %r2 = load volatile float, float addrspace(1)* %in3 - %mul = fmul float %r0, %r1 + %mul = fmul contract float %r0, %r1 %add = fadd contract float %mul, %r2 store float %add, float addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll @@ -41,7 +41,7 @@ %r0 = load double, double addrspace(1)* %in1 %r1 = load double, double addrspace(1)* %in2 %r2 = load double, double addrspace(1)* %in3 - %tmp = fmul double %r0, %r1 + %tmp = fmul contract double %r0, %r1 %r3 = fadd contract double %tmp, %r2 store double %r3, double addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll --- a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll @@ -53,7 +53,7 @@ %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1 %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2 %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3 - %r3 = fmul <2 x half> %r0, %r1 + %r3 = fmul contract <2 x half> %r0, %r1 %r4 = fadd contract <2 x half> %r3, %r2 store <2 x half> %r4, <2 x half> addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll --- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -23,7 +23,7 @@ entry: %splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0 %splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer - %div = fdiv reassoc nsz arcp ninf <4 x double> %a1, %splat.splat - %sub = fsub reassoc nsz <4 x double> , %div + %div = fdiv contract reassoc nsz arcp ninf <4 x double> %a1, %splat.splat + %sub = fsub contract reassoc nsz <4 x double> , %div ret <4 x double> %sub } diff --git a/llvm/test/CodeGen/PowerPC/fdiv.ll b/llvm/test/CodeGen/PowerPC/fdiv.ll --- a/llvm/test/CodeGen/PowerPC/fdiv.ll +++ b/llvm/test/CodeGen/PowerPC/fdiv.ll @@ -7,7 +7,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: xsdivsp 1, 1, 2 ; CHECK-NEXT: blr - %3 = fdiv reassoc arcp nsz float %0, %1 + %3 = fdiv contract reassoc arcp nsz float %0, %1 ret float %3 } @@ -20,6 +20,6 @@ ; CHECK-NEXT: xsmaddasp 0, 3, 1 ; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: blr - %3 = fdiv reassoc arcp nsz ninf float %0, %1 + %3 = fdiv contract reassoc arcp nsz ninf float %0, %1 ret float %3 } diff --git a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll --- a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll +++ b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll @@ -22,10 +22,10 @@ define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) { ; CHECK-LABEL: no_fma_with_fewer_uses: ; CHECK: # %bb.0: -; CHECK-NEXT: xsmulsp 0, 1, 2 -; CHECK-NEXT: fmr 1, 0 -; CHECK-NEXT: xsmaddasp 1, 3, 4 -; CHECK-NEXT: xsdivsp 1, 0, 1 +; CHECK-NEXT: xsmulsp 0, 3, 4 +; CHECK-NEXT: xsmulsp 3, 1, 2 +; CHECK-NEXT: xsmaddasp 0, 1, 2 +; CHECK-NEXT: xsdivsp 1, 3, 0 ; CHECK-NEXT: blr %mul1 = fmul contract float %f1, %f2 %mul2 = fmul float %f3, %f4 diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll --- a/llvm/test/CodeGen/PowerPC/fma-assoc.ll +++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll @@ -336,8 +336,8 @@ double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] %G = fmul reassoc double %C, %D ; [#uses=1] - %H = fadd reassoc double %F, %G ; [#uses=1] - %I = fsub reassoc double %H, %E ; [#uses=1] + %H = fadd contract reassoc double %F, %G ; [#uses=1] + %I = fsub contract reassoc double %H, %E ; [#uses=1] ret double %I } @@ -379,8 +379,8 @@ double %D, double %E) { %F = fmul reassoc double %A, %B ; [#uses=1] %G = fmul reassoc double %C, %D ; [#uses=1] - %H = fadd reassoc double %F, %G ; [#uses=1] - %I = fsub reassoc nsz double %E, %H ; [#uses=1] + %H = fadd contract reassoc double %F, %G ; [#uses=1] + %I = fsub contract reassoc nsz double %E, %H ; [#uses=1] ret double %I } diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll --- a/llvm/test/CodeGen/PowerPC/fma-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll @@ -184,16 +184,16 @@ ; CHECK-NEXT: blr %tmp = load float, float* undef, align 4 %tmp2 = load float, float* undef, align 4 - %tmp3 = fmul reassoc float %tmp, 0x3FE372D780000000 - %tmp4 = fadd reassoc float %tmp3, 1.000000e+00 - %tmp5 = fmul reassoc float %tmp2, %tmp4 + %tmp3 = fmul contract reassoc float %tmp, 0x3FE372D780000000 + %tmp4 = fadd contract reassoc float %tmp3, 1.000000e+00 + %tmp5 = fmul contract reassoc float %tmp2, %tmp4 %tmp6 = load float, float* undef, align 4 %tmp7 = load float, float* undef, align 4 - %tmp8 = fmul reassoc float %tmp7, 0x3FE372D780000000 - %tmp9 = fsub reassoc nsz float -1.000000e+00, %tmp8 - %tmp10 = fmul reassoc float %tmp9, %tmp6 - %tmp11 = fadd reassoc float %tmp5, 5.000000e-01 - %tmp12 = fadd reassoc float %tmp11, %tmp10 + %tmp8 = fmul contract reassoc float %tmp7, 0x3FE372D780000000 + %tmp9 = fsub contract reassoc nsz float -1.000000e+00, %tmp8 + %tmp10 = fmul contract reassoc float %tmp9, %tmp6 + %tmp11 = fadd contract reassoc float %tmp5, 5.000000e-01 + %tmp12 = fadd contract reassoc float %tmp11, %tmp10 ret float %tmp12 } diff --git a/llvm/test/CodeGen/PowerPC/fma-mutate.ll b/llvm/test/CodeGen/PowerPC/fma-mutate.ll --- a/llvm/test/CodeGen/PowerPC/fma-mutate.ll +++ b/llvm/test/CodeGen/PowerPC/fma-mutate.ll @@ -29,7 +29,7 @@ ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: xssqrtdp 1, 1 ; CHECK-NEXT: blr - %r = call reassoc afn ninf double @llvm.sqrt.f64(double %a) + %r = call contract reassoc afn ninf double @llvm.sqrt.f64(double %a) ret double %r } diff --git a/llvm/test/CodeGen/PowerPC/fma-negate.ll b/llvm/test/CodeGen/PowerPC/fma-negate.ll --- a/llvm/test/CodeGen/PowerPC/fma-negate.ll +++ b/llvm/test/CodeGen/PowerPC/fma-negate.ll @@ -179,8 +179,8 @@ ; NO-VSX-NEXT: fnmsub 1, 2, 3, 1 ; NO-VSX-NEXT: blr entry: - %0 = fmul reassoc nsz double %b, %c - %1 = fsub reassoc nsz double %a, %0 + %0 = fmul contract reassoc nsz double %b, %c + %1 = fsub contract reassoc nsz double %a, %0 ret double %1 } @@ -199,9 +199,9 @@ ; NO-VSX-NEXT: blr double %d) { entry: - %0 = fmul reassoc double %a, %b - %1 = fmul reassoc double %c, %d - %2 = fsub reassoc double %0, %1 + %0 = fmul contract reassoc double %a, %b + %1 = fmul contract reassoc double %c, %d + %2 = fsub contract reassoc double %0, %1 ret double %2 } @@ -233,8 +233,8 @@ ; NO-VSX-NEXT: fnmsubs 1, 2, 3, 1 ; NO-VSX-NEXT: blr entry: - %0 = fmul reassoc float %b, %c - %1 = fsub reassoc nsz float %a, %0 + %0 = fmul contract reassoc float %b, %c + %1 = fsub contract reassoc nsz float %a, %0 ret float %1 } @@ -252,9 +252,9 @@ ; NO-VSX-NEXT: fmsubs 1, 1, 2, 0 ; NO-VSX-NEXT: blr entry: - %0 = fmul reassoc float %a, %b - %1 = fmul reassoc float %c, %d - %2 = fsub reassoc nsz float %0, %1 + %0 = fmul contract reassoc float %a, %b + %1 = fmul contract reassoc float %c, %d + %2 = fsub contract reassoc nsz float %0, %1 ret float %2 } diff --git a/llvm/test/CodeGen/PowerPC/fma-precision.ll b/llvm/test/CodeGen/PowerPC/fma-precision.ll --- a/llvm/test/CodeGen/PowerPC/fma-precision.ll +++ b/llvm/test/CodeGen/PowerPC/fma-precision.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-linux-gnu | FileCheck %s +; RUN: llc < %s -fp-contract=fast -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-linux-gnu | FileCheck %s ; Verify that the fold of a*b-c*d respect the uses of a*b define double @fsub1(double %a, double %b, double %c, double %d) { diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -30,7 +30,7 @@ ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr - %mul = fmul float %x, %y + %mul = fmul contract float %x, %y %add = fadd contract float %mul, %z ret float %add } @@ -58,17 +58,17 @@ ret float %add } -; Reassociation implies that FMA contraction is allowed. +; Reassociation does NOT imply that FMA contraction is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:' define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc1: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc1: @@ -84,14 +84,14 @@ ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:' define float @fmul_fadd_reassoc2(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc2: @@ -107,14 +107,14 @@ ; The fadd is now fully 'fast'. This implies that contraction is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:' define float @fmul_fadd_fast1(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_fast1: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_fast1: @@ -130,14 +130,14 @@ ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:' define float @fmul_fadd_fast2(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_fast2: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_fast2: @@ -408,11 +408,11 @@ ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:' -; FMFDEBUG: fmul ninf afn reassoc {{t[0-9]+}} +; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:' -; GLOBALDEBUG: fmul ninf afn reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:' define float @sqrt_fast_ieee(float %x) #0 { @@ -459,18 +459,18 @@ ; GLOBAL-NEXT: .LBB14_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr - %rt = call reassoc afn ninf float @llvm.sqrt.f32(float %x) + %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x) ret float %rt } ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:' -; FMFDEBUG: fmul ninf afn reassoc {{t[0-9]+}} +; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:' -; GLOBALDEBUG: fmul ninf afn reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:' define float @sqrt_fast_preserve_sign(float %x) #1 { @@ -511,7 +511,7 @@ ; GLOBAL-NEXT: .LBB15_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr - %rt = call reassoc ninf afn float @llvm.sqrt.f32(float %x) + %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x) ret float %rt } diff --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll --- a/llvm/test/CodeGen/PowerPC/machine-combiner.ll +++ b/llvm/test/CodeGen/PowerPC/machine-combiner.ll @@ -208,11 +208,11 @@ ; CHECK-PWR-DAG: xsmaddadp 2, 4, 3 ; CHECK-PWR: xsadddp 1, 2, 1 ; CHECK-NEXT: blr - %7 = fmul reassoc nsz double %3, %2 - %8 = fmul reassoc nsz double %5, %4 - %9 = fadd reassoc nsz double %1, %0 - %10 = fadd reassoc nsz double %9, %7 - %11 = fadd reassoc nsz double %10, %8 + %7 = fmul contract reassoc nsz double %3, %2 + %8 = fmul contract reassoc nsz double %5, %4 + %9 = fadd contract reassoc nsz double %1, %0 + %10 = fadd contract reassoc nsz double %9, %7 + %11 = fadd contract reassoc nsz double %10, %8 ret double %11 } @@ -223,11 +223,11 @@ ; CHECK-DAG: fmadds [[REG1:[0-9]+]], 6, 5, 1 ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %7 = fmul reassoc nsz float %3, %2 - %8 = fmul reassoc nsz float %5, %4 - %9 = fadd reassoc nsz float %1, %0 - %10 = fadd reassoc nsz float %9, %7 - %11 = fadd reassoc nsz float %10, %8 + %7 = fmul contract reassoc nsz float %3, %2 + %8 = fmul contract reassoc nsz float %5, %4 + %9 = fadd contract reassoc nsz float %1, %0 + %10 = fadd contract reassoc nsz float %9, %7 + %11 = fadd contract reassoc nsz float %10, %8 ret float %11 } @@ -238,11 +238,11 @@ ; CHECK-PWR-DAG: xvmaddasp [[REG1:[0-9]+]], 37, 36 ; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]] ; CHECK-NEXT: blr - %7 = fmul reassoc nsz <4 x float> %3, %2 - %8 = fmul reassoc nsz <4 x float> %5, %4 - %9 = fadd reassoc nsz <4 x float> %1, %0 - %10 = fadd reassoc nsz <4 x float> %9, %7 - %11 = fadd reassoc nsz <4 x float> %10, %8 + %7 = fmul contract reassoc nsz <4 x float> %3, %2 + %8 = fmul contract reassoc nsz <4 x float> %5, %4 + %9 = fadd contract reassoc nsz <4 x float> %1, %0 + %10 = fadd contract reassoc nsz <4 x float> %9, %7 + %11 = fadd contract reassoc nsz <4 x float> %10, %8 ret <4 x float> %11 } @@ -255,14 +255,14 @@ ; CHECK-PWR-DAG: xsmaddadp [[REG0]], 9, 8 ; CHECK-PWR: xsadddp 1, 7, [[REG0]] ; CHECK-NEXT: blr - %10 = fmul reassoc nsz double %1, %0 - %11 = fmul reassoc nsz double %3, %2 - %12 = fmul reassoc nsz double %5, %4 - %13 = fmul reassoc nsz double %8, %7 - %14 = fadd reassoc nsz double %11, %10 - %15 = fadd reassoc nsz double %14, %6 - %16 = fadd reassoc nsz double %15, %12 - %17 = fadd reassoc nsz double %16, %13 + %10 = fmul contract reassoc nsz double %1, %0 + %11 = fmul contract reassoc nsz double %3, %2 + %12 = fmul contract reassoc nsz double %5, %4 + %13 = fmul contract reassoc nsz double %8, %7 + %14 = fadd contract reassoc nsz double %11, %10 + %15 = fadd contract reassoc nsz double %14, %6 + %16 = fadd contract reassoc nsz double %15, %12 + %17 = fadd contract reassoc nsz double %16, %13 ret double %17 } @@ -282,22 +282,22 @@ ; CHECK-DAG: fmadds [[REG7:[0-9]+]], 5, 4, [[REG5]] ; CHECK: fadds 1, [[REG7]], [[REG6]] ; CHECK-NEXT: blr - %18 = fmul reassoc nsz float %2, %1 - %19 = fadd reassoc nsz float %18, %0 - %20 = fmul reassoc nsz float %4, %3 - %21 = fadd reassoc nsz float %19, %20 - %22 = fmul reassoc nsz float %6, %5 - %23 = fadd reassoc nsz float %21, %22 - %24 = fmul reassoc nsz float %8, %7 - %25 = fadd reassoc nsz float %23, %24 - %26 = fmul reassoc nsz float %10, %9 - %27 = fadd reassoc nsz float %25, %26 - %28 = fmul reassoc nsz float %12, %11 - %29 = fadd reassoc nsz float %27, %28 - %30 = fmul reassoc nsz float %14, %13 - %31 = fadd reassoc nsz float %29, %30 - %32 = fmul reassoc nsz float %16, %15 - %33 = fadd reassoc nsz float %31, %32 + %18 = fmul contract reassoc nsz float %2, %1 + %19 = fadd contract reassoc nsz float %18, %0 + %20 = fmul contract reassoc nsz float %4, %3 + %21 = fadd contract reassoc nsz float %19, %20 + %22 = fmul contract reassoc nsz float %6, %5 + %23 = fadd contract reassoc nsz float %21, %22 + %24 = fmul contract reassoc nsz float %8, %7 + %25 = fadd contract reassoc nsz float %23, %24 + %26 = fmul contract reassoc nsz float %10, %9 + %27 = fadd contract reassoc nsz float %25, %26 + %28 = fmul contract reassoc nsz float %12, %11 + %29 = fadd contract reassoc nsz float %27, %28 + %30 = fmul contract reassoc nsz float %14, %13 + %31 = fadd contract reassoc nsz float %29, %30 + %32 = fmul contract reassoc nsz float %16, %15 + %33 = fadd contract reassoc nsz float %31, %32 ret float %33 } diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -67,8 +67,8 @@ ; CHECK-P9-NEXT: xsmuldp 0, 0, 4 ; CHECK-P9-NEXT: xsmuldp 1, 1, 0 ; CHECK-P9-NEXT: blr - %x = call arcp reassoc double @llvm.sqrt.f64(double %b) - %r = fdiv arcp reassoc double %a, %x + %x = call arcp contract reassoc double @llvm.sqrt.f64(double %b) + %r = fdiv arcp contract reassoc double %a, %x ret double %r } @@ -160,9 +160,9 @@ ; CHECK-P9-NEXT: xsmulsp 0, 0, 3 ; CHECK-P9-NEXT: xsmuldp 1, 1, 0 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp float @llvm.sqrt.f32(float %b) + %x = call contract reassoc arcp float @llvm.sqrt.f32(float %b) %y = fpext float %x to double - %r = fdiv reassoc arcp double %a, %y + %r = fdiv contract reassoc arcp double %a, %y ret double %r } @@ -249,9 +249,9 @@ ; CHECK-P9-NEXT: xsrsp 0, 0 ; CHECK-P9-NEXT: xsmulsp 1, 1, 0 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp double @llvm.sqrt.f64(double %b) + %x = call contract reassoc arcp double @llvm.sqrt.f64(double %b) %y = fptrunc double %x to float - %r = fdiv reassoc arcp float %a, %y + %r = fdiv contract reassoc arcp float %a, %y ret float %r } @@ -324,8 +324,8 @@ ; CHECK-P9-NEXT: xsmulsp 0, 0, 3 ; CHECK-P9-NEXT: xsmulsp 1, 1, 0 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp float @llvm.sqrt.f32(float %b) - %r = fdiv reassoc arcp float %a, %x + %x = call contract reassoc arcp float @llvm.sqrt.f32(float %b) + %r = fdiv contract reassoc arcp float %a, %x ret float %r } @@ -429,9 +429,9 @@ ; CHECK-P9-NEXT: xsmaddasp 4, 1, 0 ; CHECK-P9-NEXT: xsmulsp 1, 3, 4 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp nsz float @llvm.sqrt.f32(float %a) - %y = fmul reassoc nsz float %x, %b - %z = fdiv reassoc arcp nsz ninf float %c, %y + %x = call contract reassoc arcp nsz float @llvm.sqrt.f32(float %a) + %y = fmul contract reassoc nsz float %x, %b + %z = fdiv contract reassoc arcp nsz ninf float %c, %y ret float %z } @@ -512,8 +512,8 @@ ; CHECK-P9-NEXT: xvmulsp 0, 0, 2 ; CHECK-P9-NEXT: xvmulsp 34, 34, 0 ; CHECK-P9-NEXT: blr - %x = call reassoc arcp <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) - %r = fdiv reassoc arcp <4 x float> %a, %x + %x = call contract reassoc arcp <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv contract reassoc arcp <4 x float> %a, %x ret <4 x float> %r } @@ -602,7 +602,7 @@ ; CHECK-P9-NEXT: xsmaddadp 0, 3, 1 ; CHECK-P9-NEXT: fmr 1, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz ninf double %a, %b + %r = fdiv contract reassoc arcp nsz ninf double %a, %b ret double %r } @@ -651,7 +651,7 @@ ; CHECK-P9-NEXT: xsmaddasp 0, 3, 1 ; CHECK-P9-NEXT: fmr 1, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz ninf float %a, %b + %r = fdiv contract reassoc arcp nsz ninf float %a, %b ret float %r } @@ -702,7 +702,7 @@ ; CHECK-P9-NEXT: xvmaddasp 0, 1, 34 ; CHECK-P9-NEXT: xxlor 34, 0, 0 ; CHECK-P9-NEXT: blr - %r = fdiv reassoc arcp nsz ninf <4 x float> %a, %b + %r = fdiv contract reassoc arcp nsz ninf <4 x float> %a, %b ret <4 x float> %r } @@ -817,7 +817,7 @@ ; CHECK-P9-NEXT: .LBB20_2: ; CHECK-P9-NEXT: xssqrtdp 1, 1 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn double @llvm.sqrt.f64(double %a) + %r = call contract reassoc ninf afn double @llvm.sqrt.f64(double %a) ret double %r } @@ -901,7 +901,7 @@ ; CHECK-P9-NEXT: .LBB21_2: ; CHECK-P9-NEXT: xssqrtdp 1, 1 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn double @llvm.sqrt.f64(double %a) + %r = call contract reassoc ninf afn double @llvm.sqrt.f64(double %a) ret double %r } @@ -991,7 +991,7 @@ ; CHECK-P9-NEXT: .LBB23_2: ; CHECK-P9-NEXT: fmr 1, 0 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn float @llvm.sqrt.f32(float %a) + %r = call contract reassoc ninf afn float @llvm.sqrt.f32(float %a) ret float %r } @@ -1076,7 +1076,7 @@ ; CHECK-P9-NEXT: .LBB25_2: ; CHECK-P9-NEXT: xvsqrtsp 34, 34 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + %r = call contract reassoc ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) ret <4 x float> %r } @@ -1207,7 +1207,7 @@ ; CHECK-P9-NEXT: .LBB27_2: ; CHECK-P9-NEXT: xvsqrtdp 34, 34 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) + %r = call contract reassoc ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) ret <2 x double> %r } @@ -1260,7 +1260,7 @@ ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: xssqrtqp 2, 2 ; CHECK-P9-NEXT: blr - %r = call reassoc ninf afn fp128 @llvm.sqrt.f128(fp128 %a) + %r = call contract reassoc ninf afn fp128 @llvm.sqrt.f128(fp128 %a) ret fp128 %r } diff --git a/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll b/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll --- a/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll +++ b/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll @@ -37,10 +37,10 @@ ; CHECK-FMA-NEXT: xsmaddasp f1, f4, f2 ; CHECK-FMA-NEXT: xsmaddasp f1, f3, f0 ; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz float %1, %0 - %6 = fsub reassoc nsz float %2, %3 - %7 = fmul reassoc nsz float %6, 0x3DB2533FE0000000 - %8 = fadd reassoc nsz float %7, %5 + %5 = fmul contract reassoc nsz float %1, %0 + %6 = fsub contract reassoc nsz float %2, %3 + %7 = fmul contract reassoc nsz float %6, 0x3DB2533FE0000000 + %8 = fadd contract reassoc nsz float %7, %5 ret float %8 } @@ -73,10 +73,10 @@ ; CHECK-FMA-NEXT: xsmaddadp f1, f4, f2 ; CHECK-FMA-NEXT: xsmaddadp f1, f3, f0 ; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz double %1, %0 - %6 = fsub reassoc nsz double %2, %3 - %7 = fmul reassoc nsz double %6, 0x3DB2533FE68CADDE - %8 = fadd reassoc nsz double %7, %5 + %5 = fmul contract reassoc nsz double %1, %0 + %6 = fsub contract reassoc nsz double %2, %3 + %7 = fmul contract reassoc nsz double %6, 0x3DB2533FE68CADDE + %8 = fadd contract reassoc nsz double %7, %5 ret double %8 } @@ -125,11 +125,11 @@ ; CHECK-FMA-NEXT: xsmulsp f0, f2, f5 ; CHECK-FMA-NEXT: stfs f0, 0(r3) ; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz float %1, %0 - %6 = fsub reassoc nsz float %2, %3 - %7 = fmul reassoc nsz float %6, 0x3DB2533FE0000000 - %8 = fadd reassoc nsz float %7, %5 - %9 = fmul reassoc nsz float %1, 0xBDB2533FE0000000 + %5 = fmul contract reassoc nsz float %1, %0 + %6 = fsub contract reassoc nsz float %2, %3 + %7 = fmul contract reassoc nsz float %6, 0x3DB2533FE0000000 + %8 = fadd contract reassoc nsz float %7, %5 + %9 = fmul contract reassoc nsz float %1, 0xBDB2533FE0000000 store float %9, float* @global_val, align 4 ret float %8 } diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll --- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -43,8 +43,8 @@ ; CHECK-NEXT: blr %ins = insertelement <4 x float> undef, float %a, i32 0 %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer - %t1 = fmul reassoc <4 x float> %b, - %mul = fdiv reassoc arcp nsz ninf <4 x float> %t1, %splat + %t1 = fmul contract reassoc <4 x float> %b, + %mul = fdiv contract reassoc arcp nsz ninf <4 x float> %t1, %splat ret <4 x float> %mul } diff --git a/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll b/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll --- a/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vldshuffle.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs -fp-contract=fast %s -o - | FileCheck %s +; FIXME: The fmul <8 x half> %5, %6 generated in %vector.body does NOT have contract bits on by default. define void @arm_cmplx_mag_squared_f16(half* nocapture readonly %pSrc, half* nocapture %pDst, i32 %numSamples) { ; CHECK-LABEL: arm_cmplx_mag_squared_f16: diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll --- a/llvm/test/CodeGen/X86/machine-combiner.ll +++ b/llvm/test/CodeGen/X86/machine-combiner.ll @@ -240,8 +240,8 @@ ; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: vaddps %xmm0, %xmm3, %xmm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <4 x float> %x0, %x1 - %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t0 = fmul contract reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd contract reassoc nsz <4 x float> %x2, %t0 %t2 = fadd reassoc nsz <4 x float> %x3, %t1 ret <4 x float> %t2 } @@ -268,8 +268,8 @@ ; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <2 x double> %x0, %x1 - %t1 = fadd reassoc nsz <2 x double> %x2, %t0 + %t0 = fmul contract reassoc nsz <2 x double> %x0, %x1 + %t1 = fadd contract reassoc nsz <2 x double> %x2, %t0 %t2 = fadd reassoc nsz <2 x double> %x3, %t1 ret <2 x double> %t2 } @@ -343,8 +343,8 @@ ; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: vaddps %ymm0, %ymm3, %ymm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <8 x float> %x0, %x1 - %t1 = fadd reassoc nsz <8 x float> %x2, %t0 + %t0 = fmul contract reassoc nsz <8 x float> %x0, %x1 + %t1 = fadd contract reassoc nsz <8 x float> %x2, %t0 %t2 = fadd reassoc nsz <8 x float> %x3, %t1 ret <8 x float> %t2 } @@ -374,8 +374,8 @@ ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <4 x double> %x0, %x1 - %t1 = fadd reassoc nsz <4 x double> %x2, %t0 + %t0 = fmul contract reassoc nsz <4 x double> %x0, %x1 + %t1 = fadd contract reassoc nsz <4 x double> %x2, %t0 %t2 = fadd reassoc nsz <4 x double> %x3, %t1 ret <4 x double> %t2 } @@ -464,8 +464,8 @@ ; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: vaddps %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <16 x float> %x0, %x1 - %t1 = fadd reassoc nsz <16 x float> %x2, %t0 + %t0 = fmul contract reassoc nsz <16 x float> %x0, %x1 + %t1 = fadd contract reassoc nsz <16 x float> %x2, %t0 %t2 = fadd reassoc nsz <16 x float> %x3, %t1 ret <16 x float> %t2 } @@ -504,8 +504,8 @@ ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: vaddpd %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq - %t0 = fmul reassoc nsz <8 x double> %x0, %x1 - %t1 = fadd reassoc nsz <8 x double> %x2, %t0 + %t0 = fmul contract reassoc nsz <8 x double> %x0, %x1 + %t1 = fadd contract reassoc nsz <8 x double> %x2, %t0 %t2 = fadd reassoc nsz <8 x double> %x3, %t1 ret <8 x double> %t2 } diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll --- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -712,8 +712,8 @@ ; AVX512-NEXT: retq %s = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %z) %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y) - %m = fmul reassoc <4 x float> %a, %s - %d = fdiv reassoc arcp <4 x float> %x, %m + %m = fmul contract reassoc <4 x float> %a, %s + %d = fdiv contract reassoc arcp <4 x float> %x, %m ret <4 x float> %d } @@ -895,8 +895,8 @@ ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %s = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %y) - %m = fmul reassoc <4 x float> %y, %s - %d = fdiv reassoc arcp <4 x float> %x, %m + %m = fmul contract reassoc <4 x float> %y, %s + %d = fdiv contract reassoc arcp <4 x float> %x, %m ret <4 x float> %d }