Index: clang/test/CodeGenCUDA/fp-contract.cu =================================================================== --- clang/test/CodeGenCUDA/fp-contract.cu +++ clang/test/CodeGenCUDA/fp-contract.cu @@ -1,5 +1,10 @@ // REQUIRES: x86-registered-target, nvptx-registered-target, amdgpu-registered-target +// FIXME: This test fails. The comment below describes broken behavior. +// The front end should generate IR for the semantics it expects and +// backends should respect the IR. Backends should never "disregard" +// elements of the IR. + // By default CUDA uses -ffp-contract=fast, HIP uses -ffp-contract=fast-honor-pragmas. // we should fuse multiply/add into fma instruction. // In IR, fmul/fadd instructions with contract flag are emitted. Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13021,8 +13021,7 @@ static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { assert(N.getOpcode() == ISD::FMUL); - return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || - N->getFlags().hasAllowContract(); + return N->getFlags().hasAllowContract(); } // Return true if `N` can assume no infinities involved in it's computation. @@ -13053,10 +13052,8 @@ bool CanReassociate = Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); - bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD); // If the addition is not contractable, do not combine. - if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) + if (!HasFMAD && !N->getFlags().hasAllowContract()) return SDValue(); if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) @@ -13073,10 +13070,10 @@ // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. - auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { + auto isContractableFMUL = [HasFMAD](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; - return AllowFusionGlobally || N->getFlags().hasAllowContract(); + return HasFMAD || N->getFlags().hasAllowContract(); }; // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. @@ -13265,11 +13262,9 @@ return SDValue(); const SDNodeFlags Flags = N->getFlags(); - bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD); // If the subtraction is not contractable, do not combine. - if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) + if (!HasFMAD && !N->getFlags().hasAllowContract()) return SDValue(); if (TLI.generateFMAsInMachineCombiner(VT, OptLevel)) @@ -13282,10 +13277,10 @@ // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. - auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { + auto isContractableFMUL = [HasFMAD](SDValue N) { if (N.getOpcode() != ISD::FMUL) return false; - return AllowFusionGlobally || N->getFlags().hasAllowContract(); + return HasFMAD || N->getFlags().hasAllowContract(); }; // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) @@ -13572,8 +13567,13 @@ // The transforms below are incorrect when x == 0 and y == inf, because the // intermediate multiplication produces a nan. - SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1; - if (!hasNoInfs(Options, FAdd)) + SDValue FAddOrSub; + if (N0.getOpcode() == ISD::FADD || N0.getOpcode() == ISD::FSUB) + FAddOrSub = N0; + else + FAddOrSub = N1; + + if (!hasNoInfs(Options, FAddOrSub)) return SDValue(); // Floating-point multiply-add without intermediate rounding. Index: llvm/test/CodeGen/AMDGPU/fdot2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fdot2.ll +++ llvm/test/CodeGen/AMDGPU/fdot2.ll @@ -1,10 +1,10 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900 -; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE -; RUN: llc -march=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT -; RUN: llc -march=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT -; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906 -; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT -; RUN: llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT +; RUN: sed -e "s,FASTMATH_FLAGS,fast,g" %s | llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX900 +; RUN: sed -e "s,FASTMATH_FLAGS,fast,g" %s | llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE +; RUN: sed -e "s,FASTMATH_FLAGS,fast,g" %s | llc -march=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT +; RUN: sed -e "s,FASTMATH_FLAGS,fast,g" %s | llc -march=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX906 +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z) ; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions @@ -34,11 +34,11 @@ %src1.el2 = extractelement <2 x half> %src1.vec, i64 1 %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 - %mul2 = fmul half %src1.el2, %src2.el2 - %mul1 = fmul half %src1.el1, %src2.el1 + %mul2 = fmul FASTMATH_FLAGS half %src1.el2, %src2.el2 + %mul1 = fmul FASTMATH_FLAGS half %src1.el1, %src2.el1 %acc = load half, half addrspace(1)* %dst, align 2 - %acc1 = fadd half %mul2, %acc - %acc2 = fadd half %mul1, %acc1 + %acc1 = fadd FASTMATH_FLAGS half %mul2, %acc + %acc2 = fadd FASTMATH_FLAGS half %mul1, %acc1 store half %acc2, half addrspace(1)* %dst, align 2 ret void } @@ -76,11 +76,11 @@ %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 %csrc2.el2 = fpext half %src2.el2 to float - %mul2 = fmul float %csrc1.el2, %csrc2.el2 - %mul1 = fmul float %csrc1.el1, %csrc2.el1 + %mul2 = fmul FASTMATH_FLAGS float %csrc1.el2, %csrc2.el2 + %mul1 = fmul FASTMATH_FLAGS float %csrc1.el1, %csrc2.el1 %acc = load float, float addrspace(1)* %dst, align 4 - %acc1 = fadd float %mul2, %acc - %acc2 = fadd float %mul1, %acc1 + %acc1 = fadd FASTMATH_FLAGS float %mul2, %acc + %acc2 = fadd FASTMATH_FLAGS float %mul1, %acc1 store float %acc2, float addrspace(1)* %dst, align 4 ret void } @@ -116,11 +116,11 @@ %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 %csrc2.el2 = fpext half %src2.el2 to float - %mul2 = fmul float %csrc2.el2, %csrc1.el2 - %mul1 = fmul float %csrc1.el1, %csrc2.el1 + %mul2 = fmul FASTMATH_FLAGS float %csrc2.el2, %csrc1.el2 + %mul1 = fmul FASTMATH_FLAGS float %csrc1.el1, %csrc2.el1 %acc = load float, float addrspace(1)* %dst, align 4 - %acc1 = fadd float %mul2, %acc - %acc2 = fadd float %mul1, %acc1 + %acc1 = fadd FASTMATH_FLAGS float %mul2, %acc + %acc2 = fadd FASTMATH_FLAGS float %mul1, %acc1 store float %acc2, float addrspace(1)* %dst, align 4 ret void } @@ -153,11 +153,11 @@ %src2.el2 = extractelement <4 x half> %src2.vec, i64 1 %csrc2.el2 = fpext half %src2.el2 to float - %mul2 = fmul float %csrc1.el2, %csrc2.el2 - %mul1 = fmul float %csrc1.el1, %csrc2.el1 + %mul2 = fmul FASTMATH_FLAGS float %csrc1.el2, %csrc2.el2 + %mul1 = fmul FASTMATH_FLAGS float %csrc1.el1, %csrc2.el1 %acc = load float, float addrspace(1)* %dst, align 4 - %acc1 = fadd float %mul2, %acc - %acc2 = fadd float %mul1, %acc1 + %acc1 = fadd FASTMATH_FLAGS float %mul2, %acc + %acc2 = fadd FASTMATH_FLAGS float %mul1, %acc1 store float %acc2, float addrspace(1)* %dst, align 4 ret void } @@ -190,11 +190,11 @@ %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 %csrc2.el2 = fpext half %src2.el2 to float - %mul2 = fmul float %csrc1.el2, %csrc1.el1 - %mul1 = fmul float %csrc2.el1, %csrc2.el2 + %mul2 = fmul FASTMATH_FLAGS float %csrc1.el2, %csrc1.el1 + %mul1 = fmul FASTMATH_FLAGS float %csrc2.el1, %csrc2.el2 %acc = load float, float addrspace(1)* %dst, align 4 - %acc1 = fadd float %mul2, %acc - %acc2 = fadd float %mul1, %acc1 + %acc1 = fadd FASTMATH_FLAGS float %mul2, %acc + %acc2 = fadd FASTMATH_FLAGS float %mul1, %acc1 store float %acc2, float addrspace(1)* %dst, align 4 ret void } @@ -227,11 +227,11 @@ %src2.el2 = extractelement <2 x half> %src2.vec, i64 1 %csrc2.el2 = fpext half %src2.el2 to float - %mul2 = fmul float %csrc1.el2, %csrc2.el1 - %mul1 = fmul float %csrc1.el1, %csrc2.el2 + %mul2 = fmul FASTMATH_FLAGS float %csrc1.el2, %csrc2.el1 + %mul1 = fmul FASTMATH_FLAGS float %csrc1.el1, %csrc2.el2 %acc = load float, float addrspace(1)* %dst, align 4 - %acc1 = fadd float %mul2, %acc - %acc2 = fadd float %mul1, %acc1 + %acc1 = fadd FASTMATH_FLAGS float %mul2, %acc + %acc2 = fadd FASTMATH_FLAGS float %mul1, %acc1 store float %acc2, float addrspace(1)* %dst, align 4 ret void } Index: llvm/test/CodeGen/AMDGPU/fma-combine.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed -e "s,FASTMATH_FLAGS,fast,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs @@ -32,8 +32,8 @@ %b = load volatile double, double addrspace(1)* %gep.1 %c = load volatile double, double addrspace(1)* %gep.2 - %mul = fmul double %a, %b - %fma = fadd double %mul, %c + %mul = fmul FASTMATH_FLAGS double %a, %b + %fma = fadd FASTMATH_FLAGS double %mul, %c store double %fma, double addrspace(1)* %gep.out ret void } @@ -63,9 +63,9 @@ %c = load volatile double, double addrspace(1)* %gep.2 %d = load volatile double, double addrspace(1)* %gep.3 - %mul = fmul double %a, %b - %fma0 = fadd double %mul, %c - %fma1 = fadd double %mul, %d + %mul = fmul FASTMATH_FLAGS double %a, %b + %fma0 = fadd FASTMATH_FLAGS double %mul, %c + %fma1 = fadd FASTMATH_FLAGS double %mul, %d store volatile double %fma0, double addrspace(1)* %gep.out.0 store volatile double %fma1, double addrspace(1)* %gep.out.1 ret void @@ -89,8 +89,8 @@ %b = load volatile double, double addrspace(1)* %gep.1 %c = load volatile double, double addrspace(1)* %gep.2 - %mul = fmul double %a, %b - %fma = fadd double %c, %mul + %mul = fmul FASTMATH_FLAGS double %a, %b + %fma = fadd FASTMATH_FLAGS double %c, %mul store double %fma, double addrspace(1)* %gep.out ret void } @@ -113,8 +113,8 @@ %b = load volatile double, double addrspace(1)* %gep.1 %c = load volatile double, double addrspace(1)* %gep.2 - %mul = fmul double %a, %b - %fma = fsub double %mul, %c + %mul = fmul FASTMATH_FLAGS double %a, %b + %fma = fsub FASTMATH_FLAGS double %mul, %c store double %fma, double addrspace(1)* %gep.out ret void } @@ -144,9 +144,9 @@ %c = load volatile double, double addrspace(1)* %gep.2 %d = load volatile double, double addrspace(1)* %gep.3 - %mul = fmul double %a, %b - %fma0 = fsub double %mul, %c - %fma1 = fsub double %mul, %d + %mul = fmul FASTMATH_FLAGS double %a, %b + %fma0 = fsub FASTMATH_FLAGS double %mul, %c + %fma1 = fsub FASTMATH_FLAGS double %mul, %d store volatile double %fma0, double addrspace(1)* %gep.out.0 store volatile double %fma1, double addrspace(1)* %gep.out.1 ret void @@ -170,8 +170,8 @@ %b = load volatile double, double addrspace(1)* %gep.1 %c = load volatile double, double addrspace(1)* %gep.2 - %mul = fmul double %a, %b - %fma = fsub double %c, %mul + %mul = fmul FASTMATH_FLAGS double %a, %b + %fma = fsub FASTMATH_FLAGS double %c, %mul store double %fma, double addrspace(1)* %gep.out ret void } @@ -201,9 +201,9 @@ %c = load volatile double, double addrspace(1)* %gep.2 %d = load volatile double, double addrspace(1)* %gep.3 - %mul = fmul double %a, %b - %fma0 = fsub double %c, %mul - %fma1 = fsub double %d, %mul + %mul = fmul FASTMATH_FLAGS double %a, %b + %fma0 = fsub FASTMATH_FLAGS double %c, %mul + %fma1 = fsub FASTMATH_FLAGS double %d, %mul store volatile double %fma0, double addrspace(1)* %gep.out.0 store volatile double %fma1, double addrspace(1)* %gep.out.1 ret void @@ -227,9 +227,9 @@ %b = load volatile double, double addrspace(1)* %gep.1 %c = load volatile double, double addrspace(1)* %gep.2 - %mul = fmul double %a, %b - %mul.neg = fsub double -0.0, %mul - %fma = fsub double %mul.neg, %c + %mul = fmul FASTMATH_FLAGS double %a, %b + %mul.neg = fsub FASTMATH_FLAGS double -0.0, %mul + %fma = fsub FASTMATH_FLAGS double %mul.neg, %c store double %fma, double addrspace(1)* %gep.out ret void @@ -260,10 +260,10 @@ %c = load volatile double, double addrspace(1)* %gep.2 %d = load volatile double, double addrspace(1)* %gep.3 - %mul = fmul double %a, %b - %mul.neg = fsub double -0.0, %mul - %fma0 = fsub double %mul.neg, %c - %fma1 = fsub double %mul.neg, %d + %mul = fmul FASTMATH_FLAGS double %a, %b + %mul.neg = fsub FASTMATH_FLAGS double -0.0, %mul + %fma0 = fsub FASTMATH_FLAGS double %mul.neg, %c + %fma1 = fsub FASTMATH_FLAGS double %mul.neg, %d store volatile double %fma0, double addrspace(1)* %gep.out.0 store volatile double %fma1, double addrspace(1)* %gep.out.1 @@ -295,10 +295,10 @@ %c = load volatile double, double addrspace(1)* %gep.2 %d = load volatile double, double addrspace(1)* %gep.3 - %mul = fmul double %a, %b - %mul.neg = fsub double -0.0, %mul - %fma0 = fsub double %mul.neg, %c - %fma1 = fsub double %mul, %d + %mul = fmul FASTMATH_FLAGS double %a, %b + %mul.neg = fsub FASTMATH_FLAGS double -0.0, %mul + %fma0 = fsub FASTMATH_FLAGS double %mul.neg, %c + %fma1 = fsub FASTMATH_FLAGS double %mul, %d store volatile double %fma0, double addrspace(1)* %gep.out.0 store volatile double %fma1, double addrspace(1)* %gep.out.1 @@ -337,9 +337,9 @@ %u = load volatile double, double addrspace(1)* %gep.3 %v = load volatile double, double addrspace(1)* %gep.4 - %tmp0 = fmul double %u, %v + %tmp0 = fmul FASTMATH_FLAGS double %u, %v %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0 - %tmp2 = fsub double %tmp1, %z + %tmp2 = fsub FASTMATH_FLAGS double %tmp1, %z store double %tmp2, double addrspace(1)* %gep.out ret void @@ -379,9 +379,9 @@ %v = load volatile double, double addrspace(1)* %gep.4 ; nsz flag is needed since this combine may change sign of zero - %tmp0 = fmul nsz double %u, %v - %tmp1 = call nsz double @llvm.fma.f64(double %y, double %z, double %tmp0) #0 - %tmp2 = fsub nsz double %x, %tmp1 + %tmp0 = fmul FASTMATH_FLAGS nsz double %u, %v + %tmp1 = call FASTMATH_FLAGS nsz double @llvm.fma.f64(double %y, double %z, double %tmp0) #0 + %tmp2 = fsub FASTMATH_FLAGS nsz double %x, %tmp1 store double %tmp2, double addrspace(1)* %gep.out ret void @@ -401,8 +401,8 @@ float addrspace(1)* %in2) { %x = load volatile float, float addrspace(1)* %in1 %y = load volatile float, float addrspace(1)* %in2 - %a = fadd float %x, 1.0 - %m = fmul float %a, %y + %a = fadd FASTMATH_FLAGS float %x, 1.0 + %m = fmul FASTMATH_FLAGS float %a, %y store float %m, float addrspace(1)* %out ret void } @@ -417,8 +417,8 @@ float addrspace(1)* %in2) { %x = load volatile float, float addrspace(1)* %in1 %y = load volatile float, float addrspace(1)* %in2 - %a = fadd float %x, 1.0 - %m = fmul float %y, %a + %a = fadd FASTMATH_FLAGS float %x, 1.0 + %m = fmul FASTMATH_FLAGS float %y, %a store float %m, float addrspace(1)* %out ret void } @@ -433,8 +433,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %a = fadd float %x, -1.0 - %m = fmul float %a, %y + %a = fadd FASTMATH_FLAGS float %x, -1.0 + %m = fmul FASTMATH_FLAGS float %a, %y store float %m, float addrspace(1)* %out ret void } @@ -449,8 +449,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %a = fadd float %x, -1.0 - %m = fmul float %y, %a + %a = fadd FASTMATH_FLAGS float %x, -1.0 + %m = fmul FASTMATH_FLAGS float %y, %a store float %m, float addrspace(1)* %out ret void } @@ -465,8 +465,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %s = fsub float 1.0, %x - %m = fmul float %s, %y + %s = fsub FASTMATH_FLAGS float 1.0, %x + %m = fmul FASTMATH_FLAGS float %s, %y store float %m, float addrspace(1)* %out ret void } @@ -481,8 +481,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %s = fsub float 1.0, %x - %m = fmul float %y, %s + %s = fsub FASTMATH_FLAGS float 1.0, %x + %m = fmul FASTMATH_FLAGS float %y, %s store float %m, float addrspace(1)* %out ret void } @@ -497,8 +497,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %s = fsub float -1.0, %x - %m = fmul float %s, %y + %s = fsub FASTMATH_FLAGS float -1.0, %x + %m = fmul FASTMATH_FLAGS float %s, %y store float %m, float addrspace(1)* %out ret void } @@ -513,8 +513,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %s = fsub float -1.0, %x - %m = fmul float %y, %s + %s = fsub FASTMATH_FLAGS float -1.0, %x + %m = fmul FASTMATH_FLAGS float %y, %s store float %m, float addrspace(1)* %out ret void } @@ -529,8 +529,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %s = fsub float %x, 1.0 - %m = fmul float %s, %y + %s = fsub FASTMATH_FLAGS float %x, 1.0 + %m = fmul FASTMATH_FLAGS float %s, %y store float %m, float addrspace(1)* %out ret void } @@ -545,8 +545,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %s = fsub float %x, 1.0 - %m = fmul float %y, %s + %s = fsub FASTMATH_FLAGS float %x, 1.0 + %m = fmul FASTMATH_FLAGS float %y, %s store float %m, float addrspace(1)* %out ret void } @@ -561,8 +561,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %s = fsub float %x, -1.0 - %m = fmul float %s, %y + %s = fsub FASTMATH_FLAGS float %x, -1.0 + %m = fmul FASTMATH_FLAGS float %s, %y store float %m, float addrspace(1)* %out ret void } @@ -577,8 +577,8 @@ float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 - %s = fsub float %x, -1.0 - %m = fmul float %y, %s + %s = fsub FASTMATH_FLAGS float %x, -1.0 + %m = fmul FASTMATH_FLAGS float %y, %s store float %m, float addrspace(1)* %out ret void } @@ -601,10 +601,10 @@ %x = load float, float addrspace(1)* %in1 %y = load float, float addrspace(1)* %in2 %t = load float, float addrspace(1)* %in3 - %t1 = fsub float 1.0, %t - %tx = fmul float %x, %t - %ty = fmul float %y, %t1 - %r = fadd float %tx, %ty + %t1 = fsub FASTMATH_FLAGS float 1.0, %t + %tx = fmul FASTMATH_FLAGS float %x, %t + %ty = fmul FASTMATH_FLAGS float %y, %t1 + %r = fadd FASTMATH_FLAGS float %tx, %ty store float %r, float addrspace(1)* %out ret void } @@ -623,10 +623,10 @@ %x = load double, double addrspace(1)* %in1 %y = load double, double addrspace(1)* %in2 %t = load double, double addrspace(1)* %in3 - %t1 = fsub double 1.0, %t - %tx = fmul double %x, %t - %ty = fmul double %y, %t1 - %r = fadd double %tx, %ty + %t1 = fsub FASTMATH_FLAGS double 1.0, %t + %tx = fmul FASTMATH_FLAGS double %x, %t + %ty = fmul FASTMATH_FLAGS double %y, %t1 + %r = fadd FASTMATH_FLAGS double %tx, %ty store double %r, double addrspace(1)* %out ret void } @@ -647,7 +647,7 @@ %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r1.fneg = fneg float %r1 + %r1.fneg = fneg FASTMATH_FLAGS float %r1 %r3 = tail call float @llvm.fma.f32(float -2.0, float %r1.fneg, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -669,7 +669,7 @@ %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r1.fneg = fneg float %r1 + %r1.fneg = fneg FASTMATH_FLAGS float %r1 %r3 = tail call float @llvm.fma.f32(float 2.0, float %r1.fneg, float %r2) store float %r3, float addrspace(1)* %gep.out Index: llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -7,8 +7,8 @@ ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't ; make add an instruction if the fadd has more than one use. -declare half @llvm.fabs.f16(half) #1 -declare float @llvm.fabs.f32(float) #1 +declare half @llvm.fabs.f16(half) #0 +declare float @llvm.fabs.f32(float) #0 ; GCN-LABEL: {{^}}multiple_fadd_use_test_f32: ; SI: v_max_legacy_f32_e64 [[A16:v[0-9]+]], @@ -25,17 +25,17 @@ ; GFX8_10: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 ; GFX10: v_fma_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 -define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out, float %x, float %y, float %z) #0 { - %a11 = fadd float %y, -1.0 +define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out, float %x, float %y, float %z) { + %a11 = fadd fast float %y, -1.0 %a12 = call float @llvm.fabs.f32(float %a11) - %a13 = fadd float %x, -1.0 + %a13 = fadd fast float %x, -1.0 %a14 = call float @llvm.fabs.f32(float %a13) %a15 = fcmp ogt float %a12, %a14 %a16 = select i1 %a15, float %a12, float %a14 - %a17 = fmul float %a16, 2.0 - %a18 = fmul float %a17, %a17 - %a19 = fmul float %a18, %a17 - %a20 = fsub float 1.0, %a19 + %a17 = fmul fast float %a16, 2.0 + %a18 = fmul fast float %a17, %a17 + %a19 = fmul fast float %a18, %a17 + %a20 = fsub fast float 1.0, %a19 store float %a20, float addrspace(1)* %out ret void } @@ -47,7 +47,7 @@ ; GCN-DAG: buffer_store_dword [[MUL2]] ; GCN-DAG: buffer_store_dword [[MAD]] ; GCN: s_endpgm -define amdgpu_kernel void @multiple_use_fadd_fmac_f32(float addrspace(1)* %out, float %x, [8 x i32], float %y) #0 { +define amdgpu_kernel void @multiple_use_fadd_fmac_f32(float addrspace(1)* %out, float %x, [8 x i32], float %y) { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %mul2 = fmul fast float %x, 2.0 %mad = fadd fast float %mul2, %y @@ -63,7 +63,7 @@ ; GCN-DAG: buffer_store_dword [[MUL2]] ; GCN-DAG: buffer_store_dword [[MAD]] ; GCN: s_endpgm -define amdgpu_kernel void @multiple_use_fadd_fmad_f32(float addrspace(1)* %out, float %x, float %y) #0 { +define amdgpu_kernel void @multiple_use_fadd_fmad_f32(float addrspace(1)* %out, float %x, float %y) { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %x.abs = call float @llvm.fabs.f32(float %x) %mul2 = fmul fast float %x.abs, 2.0 @@ -78,7 +78,7 @@ ; SIVI: v_mad_f32 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}} ; GFX10: v_fma_f32 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, {{s[0-9]+}} ; GFX10: v_fma_f32 {{v[0-9]+}}, |[[X]]|, 2.0, {{s[0-9]+}} -define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(float addrspace(1)* %out, float %x, float %y, float %z) #0 { +define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(float addrspace(1)* %out, float %x, float %y, float %z) { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %x.abs = call float @llvm.fabs.f32(float %x) %mul2 = fmul fast float %x.abs, 2.0 @@ -93,7 +93,7 @@ ; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]] ; GCN: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @fmul_x2_xn2_f32(float addrspace(1)* %out, float %x, float %y) #0 { +define amdgpu_kernel void @fmul_x2_xn2_f32(float addrspace(1)* %out, float %x, float %y) { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %mul2 = fmul fast float %x, 2.0 %muln2 = fmul fast float %x, -2.0 @@ -108,7 +108,7 @@ ; GFX10: v_mul_f32_e64 [[TMP0:v[0-9]+]], 0xc0c00000, [[X:s[0-9]+]] ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]] ; GCN: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, float %y) #0 { +define amdgpu_kernel void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, float %y) { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %mul2 = fmul fast float %x, 2.0 %muln2 = fmul fast float %x, -3.0 @@ -128,20 +128,20 @@ ; VI-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 ; GFX10-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0 ; GFX10-FLUSH: v_sub_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} -define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { +define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half %z = bitcast i16 %z.arg to half - %a11 = fadd half %y, -1.0 + %a11 = fadd fast half %y, -1.0 %a12 = call half @llvm.fabs.f16(half %a11) - %a13 = fadd half %x, -1.0 + %a13 = fadd fast half %x, -1.0 %a14 = call half @llvm.fabs.f16(half %a13) %a15 = fcmp ogt half %a12, %a14 %a16 = select i1 %a15, half %a12, half %a14 - %a17 = fmul half %a16, 2.0 - %a18 = fmul half %a17, %a17 - %a19 = fmul half %a18, %a17 - %a20 = fsub half 1.0, %a19 + %a17 = fmul fast half %a16, 2.0 + %a18 = fmul fast half %a17, %a17 + %a19 = fmul fast half %a18, %a17 + %a20 = fsub fast half 1.0, %a19 store half %a20, half addrspace(1)* %out ret void } @@ -157,7 +157,7 @@ ; GCN-DAG: buffer_store_short [[MUL2]] ; GCN-DAG: buffer_store_short [[MAD]] ; GCN: s_endpgm -define amdgpu_kernel void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 { +define amdgpu_kernel void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1 @@ -179,7 +179,7 @@ ; GCN-DAG: buffer_store_short [[MUL2]] ; GCN-DAG: buffer_store_short [[MAD]] ; GCN: s_endpgm -define amdgpu_kernel void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 { +define amdgpu_kernel void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1 @@ -204,7 +204,7 @@ ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, s{{[0-9]+}} ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, s{{[0-9]+}} -define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { +define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half %z = bitcast i16 %z.arg to half @@ -222,7 +222,7 @@ ; GCN: v_mul_f16_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0 ; GCN: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]] ; GCN: buffer_store_short [[RESULT]] -define amdgpu_kernel void @fmul_x2_xn2_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 { +define amdgpu_kernel void @fmul_x2_xn2_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1 @@ -239,7 +239,7 @@ ; GFX10: v_mul_f16_e64 [[TMP0:v[0-9]+]], 0xc600, [[X:s[0-9]+]] ; GCN: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]] ; GCN: buffer_store_short [[RESULT]] -define amdgpu_kernel void @fmul_x2_xn3_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 { +define amdgpu_kernel void @fmul_x2_xn3_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) { %x = bitcast i16 %x.arg to half %y = bitcast i16 %y.arg to half %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1 @@ -250,5 +250,4 @@ ret void } -attributes #0 = { nounwind "unsafe-fp-math"="true" } -attributes #1 = { nounwind readnone } +attributes #0 = { nounwind readnone } Index: llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -1,13 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI-FLUSH,VI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,VI-FLUSH,VI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-STRICT,VI-DENORM,VI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10-FLUSH,GFX10 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10-FLUSH,GFX10 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-STRICT,GFX10-DENORM-STRICT,GFX10-DENORM,GFX10 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-CONTRACT,GFX10-DENORM-CONTRACT,GFX10-DENORM,GFX10 %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10-FLUSH,GFX10 %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10-FLUSH,GFX10 %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-STRICT,GFX10-DENORM-STRICT,GFX10-DENORM,GFX10 %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-CONTRACT,GFX10-DENORM-CONTRACT,GFX10-DENORM,GFX10 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare half @llvm.fmuladd.f16(half, half, half) #1 @@ -46,8 +46,8 @@ %r0 = load half, half addrspace(1)* %in1 %r1 = load half, half addrspace(1)* %in2 %r2 = load half, half addrspace(1)* %in3 - %mul = fmul half %r0, %r1 - %add = fadd half %mul, %r2 + %mul = fmul FASTMATH_FLAGS half %r0, %r1 + %add = fadd FASTMATH_FLAGS half %mul, %r2 store half %add, half addrspace(1)* %out ret void } @@ -163,8 +163,8 @@ %r0 = load volatile half, half addrspace(1)* %gep.0 %r1 = load volatile half, half addrspace(1)* %gep.1 - %add.0 = fadd half %r0, %r0 - %add.1 = fadd half %add.0, %r1 + %add.0 = fadd FASTMATH_FLAGS half %r0, %r0 + %add.1 = fadd FASTMATH_FLAGS half %add.0, %r1 store half %add.1, half addrspace(1)* %gep.out ret void } @@ -200,8 +200,8 @@ %r0 = load volatile half, half addrspace(1)* %gep.0 %r1 = load volatile half, half addrspace(1)* %gep.1 - %add.0 = fadd half %r0, %r0 - %add.1 = fadd half %r1, %add.0 + %add.0 = fadd FASTMATH_FLAGS half %r0, %r0 + %add.1 = fadd FASTMATH_FLAGS half %r1, %add.0 store half %add.1, half addrspace(1)* %gep.out ret void } @@ -348,8 +348,8 @@ %a = load volatile half, half addrspace(1)* %gep0, align 2 %b = load volatile half, half addrspace(1)* %gep1, align 2 %c = load volatile half, half addrspace(1)* %gep2, align 2 - %mul = fmul half %a, %b - %sub = fsub half %mul, %c + %mul = fmul FASTMATH_FLAGS half %a, %b + %sub = fsub FASTMATH_FLAGS half %mul, %c store half %sub, half addrspace(1)* %outgep, align 2 ret void } @@ -382,8 +382,8 @@ %a = load volatile half, half addrspace(1)* %gep0, align 2 %b = load volatile half, half addrspace(1)* %gep1, align 2 %c = load volatile half, half addrspace(1)* %gep2, align 2 - %mul = fmul half %a, %b - %sub = fsub half %c, %mul + %mul = fmul FASTMATH_FLAGS half %a, %b + %sub = fsub FASTMATH_FLAGS half %c, %mul store half %sub, half addrspace(1)* %outgep, align 2 ret void } @@ -417,8 +417,8 @@ %b = load volatile half, half addrspace(1)* %gep1, align 2 %c = load volatile half, half addrspace(1)* %gep2, align 2 %c.abs = call half @llvm.fabs.f16(half %c) #0 - %mul = fmul half %a, %b - %sub = fsub half %mul, %c.abs + %mul = fmul FASTMATH_FLAGS half %a, %b + %sub = fsub FASTMATH_FLAGS half %mul, %c.abs store half %sub, half addrspace(1)* %outgep, align 2 ret void } @@ -453,8 +453,8 @@ %b = load volatile half, half addrspace(1)* %gep1, align 2 %c = load volatile half, half addrspace(1)* %gep2, align 2 %c.abs = call half @llvm.fabs.f16(half %c) #0 - %mul = fmul half %a, %b - %sub = fsub half %c.abs, %mul + %mul = fmul FASTMATH_FLAGS half %a, %b + %sub = fsub FASTMATH_FLAGS half %c.abs, %mul store half %sub, half addrspace(1)* %outgep, align 2 ret void } @@ -493,8 +493,8 @@ %c = load volatile half, half addrspace(1)* %gep2, align 2 %nega = fneg half %a %negb = fneg half %b - %mul = fmul half %nega, %negb - %sub = fadd half %mul, %c + %mul = fmul FASTMATH_FLAGS half %nega, %negb + %sub = fadd FASTMATH_FLAGS half %mul, %c store half %sub, half addrspace(1)* %outgep, align 2 ret void } @@ -529,8 +529,8 @@ %b = load volatile half, half addrspace(1)* %gep1, align 2 %c = load volatile half, half addrspace(1)* %gep2, align 2 %b.abs = call half @llvm.fabs.f16(half %b) #0 - %mul = fmul half %a, %b.abs - %sub = fsub half %mul, %c + %mul = fmul FASTMATH_FLAGS half %a, %b.abs + %sub = fsub FASTMATH_FLAGS half %mul, %c store half %sub, half addrspace(1)* %outgep, align 2 ret void } @@ -563,8 +563,8 @@ %r1 = load volatile half, half addrspace(1)* %gep.0 %r2 = load volatile half, half addrspace(1)* %gep.1 - %add = fadd half %r1, %r1 - %r3 = fsub half %r2, %add + %add = fadd FASTMATH_FLAGS half %r1, %r1 + %r3 = fsub FASTMATH_FLAGS half %r2, %add store half %r3, half addrspace(1)* %gep.out ret void @@ -595,8 +595,8 @@ %r1 = load volatile half, half addrspace(1)* %gep.0 %r2 = load volatile half, half addrspace(1)* %gep.1 - %add = fadd half %r1, %r1 - %r3 = fsub half %add, %r2 + %add = fadd FASTMATH_FLAGS half %r1, %r1 + %r3 = fsub FASTMATH_FLAGS half %add, %r2 store half %r3, half addrspace(1)* %gep.out ret void Index: llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -1,24 +1,24 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,SI-DENORM,GCN-DENORM-FASTFMA,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,SI-DENORM,GCN-DENORM-SLOWFMA,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,SI-DENORM,GCN-DENORM-FASTFMA,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,SI-DENORM,GCN-DENORM-SLOWFMA,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf | FileCheck -enable-var-scope -check-prefixes=GCN,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf | FileCheck -enable-var-scope -check-prefixes=GCN,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM-FASTFMA %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -denormal-fp-math-f32=ieee | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM-FASTFMA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-FMAC %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-FMAC %s ; FIXME: Should probably test this, but sometimes selecting fmac is painful to match. -; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM-FASTFMA %s +; XUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=ieee | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM-FASTFMA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-FMAC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -denormal-fp-math-f32=ieee -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-FMAC %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -denormal-fp-math-f32=ieee -mattr=+mad-mac-f32-insts | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT %s ; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow. @@ -63,8 +63,8 @@ %r0 = load volatile float, float addrspace(1)* %in1 %r1 = load volatile float, float addrspace(1)* %in2 %r2 = load volatile float, float addrspace(1)* %in3 - %mul = fmul float %r0, %r1 - %add = fadd float %mul, %r2 + %mul = fmul FASTMATH_FLAGS float %r0, %r1 + %add = fadd FASTMATH_FLAGS float %mul, %r2 store float %add, float addrspace(1)* %out ret void } @@ -178,8 +178,8 @@ %r0 = load volatile float, float addrspace(1)* %gep.0 %r1 = load volatile float, float addrspace(1)* %gep.1 - %add.0 = fadd float %r0, %r0 - %add.1 = fadd float %add.0, %r1 + %add.0 = fadd FASTMATH_FLAGS float %r0, %r0 + %add.1 = fadd FASTMATH_FLAGS float %add.0, %r1 store float %add.1, float addrspace(1)* %gep.out ret void } @@ -214,8 +214,8 @@ %r0 = load volatile float, float addrspace(1)* %gep.0 %r1 = load volatile float, float addrspace(1)* %gep.1 - %add.0 = fadd float %r0, %r0 - %add.1 = fadd float %r1, %add.0 + %add.0 = fadd FASTMATH_FLAGS float %r0, %r0 + %add.1 = fadd FASTMATH_FLAGS float %r1, %add.0 store float %add.1, float addrspace(1)* %gep.out ret void } @@ -374,8 +374,8 @@ %a = load volatile float, float addrspace(1)* %gep0, align 4 %b = load volatile float, float addrspace(1)* %gep1, align 4 %c = load volatile float, float addrspace(1)* %gep2, align 4 - %mul = fmul float %a, %b - %sub = fsub float %mul, %c + %mul = fmul FASTMATH_FLAGS float %a, %b + %sub = fsub FASTMATH_FLAGS float %mul, %c store float %sub, float addrspace(1)* %outgep, align 4 ret void } @@ -409,8 +409,8 @@ %a = load volatile float, float addrspace(1)* %gep0, align 4 %b = load volatile float, float addrspace(1)* %gep1, align 4 %c = load volatile float, float addrspace(1)* %gep2, align 4 - %mul = fmul float %a, %b - %sub = fsub float %c, %mul + %mul = fmul FASTMATH_FLAGS float %a, %b + %sub = fsub FASTMATH_FLAGS float %c, %mul store float %sub, float addrspace(1)* %outgep, align 4 ret void } @@ -444,8 +444,8 @@ %b = load volatile float, float addrspace(1)* %gep1, align 4 %c = load volatile float, float addrspace(1)* %gep2, align 4 %c.abs = call float @llvm.fabs.f32(float %c) #0 - %mul = fmul float %a, %b - %sub = fsub float %mul, %c.abs + %mul = fmul FASTMATH_FLAGS float %a, %b + %sub = fsub FASTMATH_FLAGS float %mul, %c.abs store float %sub, float addrspace(1)* %outgep, align 4 ret void } @@ -480,8 +480,8 @@ %b = load volatile float, float addrspace(1)* %gep1, align 4 %c = load volatile float, float addrspace(1)* %gep2, align 4 %c.abs = call float @llvm.fabs.f32(float %c) #0 - %mul = fmul float %a, %b - %sub = fsub float %c.abs, %mul + %mul = fmul FASTMATH_FLAGS float %a, %b + %sub = fsub FASTMATH_FLAGS float %c.abs, %mul store float %sub, float addrspace(1)* %outgep, align 4 ret void } @@ -519,8 +519,8 @@ %c = load volatile float, float addrspace(1)* %gep2, align 4 %nega = fneg float %a %negb = fneg float %b - %mul = fmul float %nega, %negb - %sub = fadd float %mul, %c + %mul = fmul FASTMATH_FLAGS float %nega, %negb + %sub = fadd FASTMATH_FLAGS float %mul, %c store float %sub, float addrspace(1)* %outgep, align 4 ret void } @@ -554,8 +554,8 @@ %b = load volatile float, float addrspace(1)* %gep1, align 4 %c = load volatile float, float addrspace(1)* %gep2, align 4 %b.abs = call float @llvm.fabs.f32(float %b) #0 - %mul = fmul float %a, %b.abs - %sub = fsub float %mul, %c + %mul = fmul FASTMATH_FLAGS float %a, %b.abs + %sub = fsub FASTMATH_FLAGS float %mul, %c store float %sub, float addrspace(1)* %outgep, align 4 ret void } @@ -586,8 +586,8 @@ %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %add = fadd float %r1, %r1 - %r3 = fsub float %r2, %add + %add = fadd FASTMATH_FLAGS float %r1, %r1 + %r3 = fsub FASTMATH_FLAGS float %r2, %add store float %r3, float addrspace(1)* %gep.out ret void @@ -617,8 +617,8 @@ %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %add = fadd float %r1, %r1 - %r3 = fsub float %add, %r2 + %add = fadd FASTMATH_FLAGS float %r1, %r1 + %r3 = fsub FASTMATH_FLAGS float %add, %r2 store float %r3, float addrspace(1)* %gep.out ret void Index: llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll +++ llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll @@ -1,9 +1,9 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefixes=GCN,GCN-STRICT,VI %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI %s ; GCN-LABEL: {{^}}fmuladd_f64: ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} @@ -27,8 +27,8 @@ %r0 = load double, double addrspace(1)* %in1 %r1 = load double, double addrspace(1)* %in2 %r2 = load double, double addrspace(1)* %in3 - %tmp = fmul double %r0, %r1 - %r3 = fadd double %tmp, %r2 + %tmp = fmul FASTMATH_FLAGS double %r0, %r1 + %r3 = fadd FASTMATH_FLAGS double %tmp, %r2 store double %r3, double addrspace(1)* %out ret void } @@ -69,8 +69,8 @@ %r0 = load volatile double, double addrspace(1)* %gep.0 %r1 = load volatile double, double addrspace(1)* %gep.1 - %add.0 = fadd double %r0, %r0 - %add.1 = fadd double %add.0, %r1 + %add.0 = fadd FASTMATH_FLAGS double %r0, %r0 + %add.1 = fadd FASTMATH_FLAGS double %add.0, %r1 store double %add.1, double addrspace(1)* %gep.out ret void } @@ -97,8 +97,8 @@ %r0 = load volatile double, double addrspace(1)* %gep.0 %r1 = load volatile double, double addrspace(1)* %gep.1 - %add.0 = fadd double %r0, %r0 - %add.1 = fadd double %r1, %add.0 + %add.0 = fadd FASTMATH_FLAGS double %r0, %r0 + %add.1 = fadd FASTMATH_FLAGS double %r1, %add.0 store double %add.1, double addrspace(1)* %gep.out ret void } @@ -120,8 +120,8 @@ %a = load volatile double, double addrspace(1)* %gep0, align 8 %b = load volatile double, double addrspace(1)* %gep1, align 8 %c = load volatile double, double addrspace(1)* %gep2, align 8 - %mul = fmul double %a, %b - %sub = fsub double %mul, %c + %mul = fmul FASTMATH_FLAGS double %a, %b + %sub = fsub FASTMATH_FLAGS double %mul, %c store double %sub, double addrspace(1)* %outgep, align 8 ret void } @@ -143,7 +143,7 @@ %r1 = load volatile double, double addrspace(1)* %gep.1 %add.0 = fadd fast double %r0, %r0 - %add.1 = fadd double %add.0, %r1 + %add.1 = fadd FASTMATH_FLAGS double %add.0, %r1 store double %add.1, double addrspace(1)* %gep.out ret void } @@ -164,7 +164,7 @@ %r0 = load volatile double, double addrspace(1)* %gep.0 %r1 = load volatile double, double addrspace(1)* %gep.1 - %add.0 = fadd double %r0, %r0 + %add.0 = fadd FASTMATH_FLAGS double %r0, %r0 %add.1 = fadd fast double %add.0, %r1 store double %add.1, double addrspace(1)* %gep.out ret void Index: llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll +++ llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-FLUSH %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9-FLUSH %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9-FLUSH %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9-FLUSH %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-DENORM-STRICT,GFX9-DENORM %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-DENORM-STRICT,GFX9-DENORM %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-DENORM-CONTRACT,GFX9-DENORM %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-DENORM-CONTRACT,GFX9-DENORM %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9-DENORM-STRICT,GFX9-DENORM %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9-DENORM-STRICT,GFX9-DENORM %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9-DENORM-CONTRACT,GFX9-DENORM %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9-DENORM-CONTRACT,GFX9-DENORM %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1 @@ -37,8 +37,8 @@ %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1 %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2 %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3 - %r3 = fmul <2 x half> %r0, %r1 - %r4 = fadd <2 x half> %r3, %r2 + %r3 = fmul FASTMATH_FLAGS <2 x half> %r0, %r1 + %r4 = fadd FASTMATH_FLAGS <2 x half> %r3, %r2 store <2 x half> %r4, <2 x half> addrspace(1)* %out ret void } @@ -131,8 +131,8 @@ %r0 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.0 %r1 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.1 - %add.0 = fadd <2 x half> %r0, %r0 - %add.1 = fadd <2 x half> %add.0, %r1 + %add.0 = fadd FASTMATH_FLAGS <2 x half> %r0, %r0 + %add.1 = fadd FASTMATH_FLAGS <2 x half> %add.0, %r1 store <2 x half> %add.1, <2 x half> addrspace(1)* %gep.out ret void } Index: llvm/test/CodeGen/AMDGPU/mad-combine.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mad-combine.ll +++ llvm/test/CodeGen/AMDGPU/mad-combine.ll @@ -1,14 +1,14 @@ ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma. -; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: sed -e "s,FASTMATH_FLAGS,reassoc contract,g" %s | llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs ; Make sure we don't form mad with denormals -; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare float @llvm.fabs.f32(float) #0 @@ -44,8 +44,8 @@ %b = load volatile float, float addrspace(1)* %gep.1 %c = load volatile float, float addrspace(1)* %gep.2 - %mul = fmul float %a, %b - %fma = fadd float %mul, %c + %mul = fmul FASTMATH_FLAGS float %a, %b + %fma = fadd FASTMATH_FLAGS float %mul, %c store float %fma, float addrspace(1)* %gep.out ret void } @@ -86,9 +86,9 @@ %c = load volatile float, float addrspace(1)* %gep.2 %d = load volatile float, float addrspace(1)* %gep.3 - %mul = fmul float %a, %b - %fma0 = fadd float %mul, %c - %fma1 = fadd float %mul, %d + %mul = fmul FASTMATH_FLAGS float %a, %b + %fma0 = fadd FASTMATH_FLAGS float %mul, %c + %fma1 = fadd FASTMATH_FLAGS float %mul, %d store volatile float %fma0, float addrspace(1)* %gep.out.0 store volatile float %fma1, float addrspace(1)* %gep.out.1 @@ -120,8 +120,8 @@ %b = load volatile float, float addrspace(1)* %gep.1 %c = load volatile float, float addrspace(1)* %gep.2 - %mul = fmul float %a, %b - %fma = fadd float %c, %mul + %mul = fmul FASTMATH_FLAGS float %a, %b + %fma = fadd FASTMATH_FLAGS float %c, %mul store float %fma, float addrspace(1)* %gep.out ret void } @@ -150,8 +150,8 @@ %b = load volatile float, float addrspace(1)* %gep.1 %c = load volatile float, float addrspace(1)* %gep.2 - %mul = fmul float %a, %b - %fma = fsub float %mul, %c + %mul = fmul FASTMATH_FLAGS float %a, %b + %fma = fsub FASTMATH_FLAGS float %mul, %c store float %fma, float addrspace(1)* %gep.out ret void } @@ -190,9 +190,9 @@ %c = load volatile float, float addrspace(1)* %gep.2 %d = load volatile float, float addrspace(1)* %gep.3 - %mul = fmul float %a, %b - %fma0 = fsub float %mul, %c - %fma1 = fsub float %mul, %d + %mul = fmul FASTMATH_FLAGS float %a, %b + %fma0 = fsub FASTMATH_FLAGS float %mul, %c + %fma1 = fsub FASTMATH_FLAGS float %mul, %d store volatile float %fma0, float addrspace(1)* %gep.out.0 store volatile float %fma1, float addrspace(1)* %gep.out.1 ret void @@ -222,8 +222,8 @@ %b = load volatile float, float addrspace(1)* %gep.1 %c = load volatile float, float addrspace(1)* %gep.2 - %mul = fmul float %a, %b - %fma = fsub float %c, %mul + %mul = fmul FASTMATH_FLAGS float %a, %b + %fma = fsub FASTMATH_FLAGS float %c, %mul store float %fma, float addrspace(1)* %gep.out ret void } @@ -262,9 +262,9 @@ %c = load volatile float, float addrspace(1)* %gep.2 %d = load volatile float, float addrspace(1)* %gep.3 - %mul = fmul float %a, %b - %fma0 = fsub float %c, %mul - %fma1 = fsub float %d, %mul + %mul = fmul FASTMATH_FLAGS float %a, %b + %fma0 = fsub FASTMATH_FLAGS float %c, %mul + %fma1 = fsub FASTMATH_FLAGS float %d, %mul store volatile float %fma0, float addrspace(1)* %gep.out.0 store volatile float %fma1, float addrspace(1)* %gep.out.1 ret void @@ -295,9 +295,9 @@ %b = load volatile float, float addrspace(1)* %gep.1 %c = load volatile float, float addrspace(1)* %gep.2 - %mul = fmul float %a, %b - %mul.neg = fneg float %mul - %fma = fsub float %mul.neg, %c + %mul = fmul FASTMATH_FLAGS float %a, %b + %mul.neg = fneg FASTMATH_FLAGS float %mul + %fma = fsub FASTMATH_FLAGS float %mul.neg, %c store float %fma, float addrspace(1)* %gep.out ret void @@ -337,10 +337,10 @@ %c = load volatile float, float addrspace(1)* %gep.2 %d = load volatile float, float addrspace(1)* %gep.3 - %mul = fmul float %a, %b - %mul.neg = fneg float %mul - %fma0 = fsub float %mul.neg, %c - %fma1 = fsub float %mul.neg, %d + %mul = fmul FASTMATH_FLAGS float %a, %b + %mul.neg = fneg FASTMATH_FLAGS float %mul + %fma0 = fsub FASTMATH_FLAGS float %mul.neg, %c + %fma1 = fsub FASTMATH_FLAGS float %mul.neg, %d store volatile float %fma0, float addrspace(1)* %gep.out.0 store volatile float %fma1, float addrspace(1)* %gep.out.1 @@ -381,10 +381,10 @@ %c = load volatile float, float addrspace(1)* %gep.2 %d = load volatile float, float addrspace(1)* %gep.3 - %mul = fmul float %a, %b - %mul.neg = fneg float %mul - %fma0 = fsub float %mul.neg, %c - %fma1 = fsub float %mul, %d + %mul = fmul FASTMATH_FLAGS float %a, %b + %mul.neg = fneg FASTMATH_FLAGS float %mul + %fma0 = fsub FASTMATH_FLAGS float %mul.neg, %c + %fma1 = fsub FASTMATH_FLAGS float %mul, %d store volatile float %fma0, float addrspace(1)* %gep.out.0 store volatile float %fma1, float addrspace(1)* %gep.out.1 @@ -427,9 +427,9 @@ %u = load volatile float, float addrspace(1)* %gep.3 %v = load volatile float, float addrspace(1)* %gep.4 - %tmp0 = fmul float %u, %v - %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0 - %tmp2 = fsub float %tmp1, %z + %tmp0 = fmul FASTMATH_FLAGS float %u, %v + %tmp1 = call FASTMATH_FLAGS float @llvm.fma.f32(float %x, float %y, float %tmp0) #0 + %tmp2 = fsub FASTMATH_FLAGS float %tmp1, %z store float %tmp2, float addrspace(1)* %gep.out ret void @@ -470,9 +470,9 @@ %u = load volatile float, float addrspace(1)* %gep.3 %v = load volatile float, float addrspace(1)* %gep.4 - %tmp0 = fmul float %u, %v - %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0 - %tmp2 = fsub float %x, %tmp1 + %tmp0 = fmul FASTMATH_FLAGS float %u, %v + %tmp1 = call FASTMATH_FLAGS float @llvm.fma.f32(float %y, float %z, float %tmp0) #0 + %tmp2 = fsub FASTMATH_FLAGS float %x, %tmp1 store float %tmp2, float addrspace(1)* %gep.out ret void @@ -520,9 +520,9 @@ %u = load volatile float, float addrspace(1)* %gep.3 %v = load volatile float, float addrspace(1)* %gep.4 - %tmp0 = fmul float %u, %v - %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0 - %tmp2 = fsub float %tmp1, %z + %tmp0 = fmul FASTMATH_FLAGS float %u, %v + %tmp1 = call FASTMATH_FLAGS float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0 + %tmp2 = fsub FASTMATH_FLAGS float %tmp1, %z store float %tmp2, float addrspace(1)* %gep.out ret void @@ -572,9 +572,9 @@ %v = load volatile float, float addrspace(1)* %gep.4 ; nsz flag is needed since this combine may change sign of zero - %tmp0 = fmul nsz float %u, %v - %tmp1 = call nsz float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0 - %tmp2 = fsub nsz float %x, %tmp1 + %tmp0 = fmul FASTMATH_FLAGS nsz float %u, %v + %tmp1 = call FASTMATH_FLAGS nsz float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0 + %tmp2 = fsub FASTMATH_FLAGS nsz float %x, %tmp1 store float %tmp2, float addrspace(1)* %gep.out ret void Index: llvm/test/CodeGen/AMDGPU/madak.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/madak.ll +++ llvm/test/CodeGen/AMDGPU/madak.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s +; RUN: sed -e "s,FASTMATH_FLAGS,,g" %s | llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone @@ -28,8 +28,8 @@ %a = load float, float addrspace(1)* %in.a.gep, align 4 %b = load float, float addrspace(1)* %in.b.gep, align 4 - %mul = fmul float %a, %b - %madak = fadd float %mul, 10.0 + %mul = fmul FASTMATH_FLAGS float %a, %b + %madak = fadd FASTMATH_FLAGS float %mul, 10.0 store float %madak, float addrspace(1)* %out.gep, align 4 ret void } @@ -69,10 +69,10 @@ %b = load volatile float, float addrspace(1)* %in.gep.1, align 4 %c = load volatile float, float addrspace(1)* %in.gep.2, align 4 - %mul0 = fmul float %a, %b - %mul1 = fmul float %a, %c - %madak0 = fadd float %mul0, 10.0 - %madak1 = fadd float %mul1, 10.0 + %mul0 = fmul FASTMATH_FLAGS float %a, %b + %mul1 = fmul FASTMATH_FLAGS float %a, %c + %madak0 = fadd FASTMATH_FLAGS float %mul0, 10.0 + %madak1 = fadd FASTMATH_FLAGS float %mul1, 10.0 store volatile float %madak0, float addrspace(1)* %out.gep.0, align 4 store volatile float %madak1, float addrspace(1)* %out.gep.1, align 4 @@ -91,8 +91,8 @@ %a = load float, float addrspace(1)* %in.a.gep, align 4 - %mul = fmul float 4.0, %a - %madak = fadd float %mul, 10.0 + %mul = fmul FASTMATH_FLAGS float 4.0, %a + %madak = fadd FASTMATH_FLAGS float %mul, 10.0 store float %madak, float addrspace(1)* %out.gep, align 4 ret void } @@ -121,8 +121,8 @@ %a = load float, float addrspace(1)* %in.a.gep, align 4 %b = load float, float addrspace(1)* %in.b.gep, align 4 - %mul = fmul float %a, %b - %madak = fadd float %mul, 4.0 + %mul = fmul FASTMATH_FLAGS float %a, %b + %madak = fadd FASTMATH_FLAGS float %mul, 4.0 store float %madak, float addrspace(1)* %out.gep, align 4 ret void } @@ -143,8 +143,8 @@ %a = load float, float addrspace(1)* %in.a.gep, align 4 - %mul = fmul float %a, %b - %madak = fadd float %mul, 10.0 + %mul = fmul FASTMATH_FLAGS float %a, %b + %madak = fadd FASTMATH_FLAGS float %mul, 10.0 store float %madak, float addrspace(1)* %out.gep, align 4 ret void } @@ -164,8 +164,8 @@ %b = load float, float addrspace(1)* %in.b.gep, align 4 - %mul = fmul float %a, %b - %madak = fadd float %mul, 10.0 + %mul = fmul FASTMATH_FLAGS float %a, %b + %madak = fadd FASTMATH_FLAGS float %mul, 10.0 store float %madak, float addrspace(1)* %out.gep, align 4 ret void } @@ -176,8 +176,8 @@ ; GFX10-MAD: v_mac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; FMA: v_fmac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) #0 { - %mul = fmul float %a, %b - %madak = fadd float %mul, 10.0 + %mul = fmul FASTMATH_FLAGS float %a, %b + %madak = fadd FASTMATH_FLAGS float %mul, 10.0 store float %madak, float addrspace(1)* %out, align 4 ret void } @@ -202,8 +202,8 @@ %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone - %mul = fmul float %a.fabs, %b - %madak = fadd float %mul, 10.0 + %mul = fmul FASTMATH_FLAGS float %a.fabs, %b + %madak = fadd FASTMATH_FLAGS float %mul, 10.0 store float %madak, float addrspace(1)* %out.gep, align 4 ret void } @@ -228,8 +228,8 @@ %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone - %mul = fmul float %a, %b.fabs - %madak = fadd float %mul, 10.0 + %mul = fmul FASTMATH_FLAGS float %a, %b.fabs + %madak = fadd FASTMATH_FLAGS float %mul, 10.0 store float %madak, float addrspace(1)* %out.gep, align 4 ret void } @@ -260,9 +260,9 @@ bb4: %vgpr = load volatile float, float addrspace(1)* undef - %tmp0 = fmul float %sgpr0, 0.5 - %tmp1 = fadd float %tmp0, 42.0 - %tmp2 = fmul float %tmp1, %vgpr + %tmp0 = fmul FASTMATH_FLAGS float %sgpr0, 0.5 + %tmp1 = fadd FASTMATH_FLAGS float %tmp0, 42.0 + %tmp2 = fmul FASTMATH_FLAGS float %tmp1, %vgpr store volatile float %tmp2, float addrspace(1)* undef, align 4 ret void } Index: llvm/test/CodeGen/X86/avx512-fma.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-fma.ll +++ llvm/test/CodeGen/X86/avx512-fma.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; ALL-LABEL: test_x86_fmadd_ps_z: ; ALL: ## %bb.0: ; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fadd <16 x float> %x, %a2 + %x = fmul contract <16 x float> %a0, %a1 + %res = fadd contract <16 x float> %x, %a2 ret <16 x float> %res } @@ -17,8 +17,8 @@ ; ALL: ## %bb.0: ; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fsub <16 x float> %x, %a2 + %x = fmul contract <16 x float> %a0, %a1 + %res = fsub contract <16 x float> %x, %a2 ret <16 x float> %res } @@ -27,8 +27,8 @@ ; ALL: ## %bb.0: ; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fsub <16 x float> %a2, %x + %x = fmul contract <16 x float> %a0, %a1 + %res = fsub contract <16 x float> %a2, %x ret <16 x float> %res } @@ -37,12 +37,12 @@ ; ALL: ## %bb.0: ; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %y = fsub <16 x float> %a0, %a1 + %y = fsub contract <16 x float> , %x - %res = fsub <16 x float> %y, %a2 + %res = fsub contract <16 x float> %y, %a2 ret <16 x float> %res } @@ -51,8 +51,8 @@ ; ALL: ## %bb.0: ; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fadd <8 x double> %x, %a2 + %x = fmul contract <8 x double> %a0, %a1 + %res = fadd contract <8 x double> %x, %a2 ret <8 x double> %res } @@ -61,8 +61,8 @@ ; ALL: ## %bb.0: ; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fsub <8 x double> %x, %a2 + %x = fmul contract <8 x double> %a0, %a1 + %res = fsub contract <8 x double> %x, %a2 ret <8 x double> %res } @@ -71,8 +71,8 @@ ; ALL: ## %bb.0: ; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; ALL-NEXT: retq - %x = fmul double %a0, %a1 - %res = fsub double %x, %a2 + %x = fmul contract double %a0, %a1 + %res = fsub contract double %x, %a2 ret double %res } @@ -82,8 +82,8 @@ ; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem ; ALL-NEXT: retq %a2 = load double , double *%a2_ptr - %x = fmul double %a0, %a1 - %res = fsub double %x, %a2 + %x = fmul contract double %a0, %a1 + %res = fsub contract double %x, %a2 ret double %res } @@ -93,8 +93,8 @@ ; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 ; ALL-NEXT: retq %a2 = load double , double *%a2_ptr - %x = fmul double %a0, %a2 - %res = fsub double %x, %a1 + %x = fmul contract double %a0, %a2 + %res = fsub contract double %x, %a1 ret double %res } @@ -103,8 +103,8 @@ ; ALL: ## %bb.0: ; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; ALL-NEXT: retq - %b1 = fmul <16 x float> %a1, - %b2 = fadd <16 x float> %b1, %a2 + %b1 = fmul contract <16 x float> %a1, + %b2 = fadd contract <16 x float> %b1, %a2 ret <16 x float> %b2 } @@ -113,8 +113,8 @@ ; ALL: ## %bb.0: ; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem ; ALL-NEXT: retq - %b1 = fmul <16 x float> %a1, %a2 - %b2 = fadd <16 x float> %b1, + %b1 = fmul contract <16 x float> %a1, %a2 + %b2 = fadd contract <16 x float> %b1, ret <16 x float> %b2 } @@ -135,8 +135,8 @@ ; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1 ; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 - %x = fmul <16 x float> %a0, %a2 - %y = fadd <16 x float> %x, %a1 + %x = fmul contract <16 x float> %a0, %a2 + %y = fadd contract <16 x float> %x, %a1 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 ret <16 x float> %res } @@ -160,8 +160,8 @@ ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 - %x = fmul <16 x float> %a0, %a2 - %y = fadd <16 x float> %x, %a1 + %x = fmul contract <16 x float> %a0, %a2 + %y = fadd contract <16 x float> %x, %a1 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 ret <16 x float> %res } @@ -185,8 +185,8 @@ ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 - %x = fmul <16 x float> %a1, %a0 - %y = fadd <16 x float> %x, %a2 + %x = fmul contract <16 x float> %a1, %a0 + %y = fadd contract <16 x float> %x, %a2 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 ret <16 x float> %res } Index: llvm/test/CodeGen/X86/fma-do-not-commute.ll =================================================================== --- llvm/test/CodeGen/X86/fma-do-not-commute.ll +++ llvm/test/CodeGen/X86/fma-do-not-commute.ll @@ -1,4 +1,4 @@ -; RUN: llc -fp-contract=fast -mattr=+fma -disable-cgp < %s -o - | FileCheck %s +; RUN: llc -mattr=+fma -disable-cgp < %s -o - | FileCheck %s ; Check that the 2nd and 3rd arguments of fmaXXX231 reg1, reg2, mem3 are not commuted. ; target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -20,8 +20,8 @@ %sum0 = phi float [ %fma, %loop ], [ %arg, %entry ] %addrVal = load float, float* %addr, align 4 %addr2Val = load float, float* %addr2, align 4 - %fmul = fmul float %addrVal, %addr2Val - %fma = fadd float %sum0, %fmul + %fmul = fmul contract float %addrVal, %addr2Val + %fma = fadd contract float %sum0, %fmul br i1 true, label %exit, label %loop exit: Index: llvm/test/CodeGen/X86/fma_patterns.ll =================================================================== --- llvm/test/CodeGen/X86/fma_patterns.ll +++ llvm/test/CodeGen/X86/fma_patterns.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA,FMA-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefixes=AVX512,AVX512-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA,FMA-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=AVX512,AVX512-NOINFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefixes=FMA,FMA-INFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma | FileCheck %s --check-prefixes=FMA4,FMA4-INFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 | FileCheck %s --check-prefixes=FMA4,FMA4-INFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512-INFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract ninf,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefixes=FMA,FMA-NOINFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract ninf,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract ninf,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract ninf,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512-NOINFS ; ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) @@ -27,8 +27,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul float %a0, %a1 - %res = fadd float %x, %a2 + %x = fmul FASTMATH_FLAGS float %a0, %a1 + %res = fadd FASTMATH_FLAGS float %x, %a2 ret float %res } @@ -47,8 +47,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul <4 x float> %a0, %a1 - %res = fadd <4 x float> %x, %a2 + %x = fmul FASTMATH_FLAGS <4 x float> %a0, %a1 + %res = fadd FASTMATH_FLAGS <4 x float> %x, %a2 ret <4 x float> %res } @@ -67,8 +67,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %x = fmul <8 x float> %a0, %a1 - %res = fadd <8 x float> %x, %a2 + %x = fmul FASTMATH_FLAGS <8 x float> %a0, %a1 + %res = fadd FASTMATH_FLAGS <8 x float> %x, %a2 ret <8 x float> %res } @@ -87,8 +87,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul double %a0, %a1 - %res = fadd double %x, %a2 + %x = fmul FASTMATH_FLAGS double %a0, %a1 + %res = fadd FASTMATH_FLAGS double %x, %a2 ret double %res } @@ -107,8 +107,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul <2 x double> %a0, %a1 - %res = fadd <2 x double> %x, %a2 + %x = fmul FASTMATH_FLAGS <2 x double> %a0, %a1 + %res = fadd FASTMATH_FLAGS <2 x double> %x, %a2 ret <2 x double> %res } @@ -127,8 +127,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %x = fmul <4 x double> %a0, %a1 - %res = fadd <4 x double> %x, %a2 + %x = fmul FASTMATH_FLAGS <4 x double> %a0, %a1 + %res = fadd FASTMATH_FLAGS <4 x double> %x, %a2 ret <4 x double> %res } @@ -151,8 +151,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul float %a0, %a1 - %res = fsub float %x, %a2 + %x = fmul FASTMATH_FLAGS float %a0, %a1 + %res = fsub FASTMATH_FLAGS float %x, %a2 ret float %res } @@ -171,8 +171,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul <4 x float> %a0, %a1 - %res = fsub <4 x float> %x, %a2 + %x = fmul FASTMATH_FLAGS <4 x float> %a0, %a1 + %res = fsub FASTMATH_FLAGS <4 x float> %x, %a2 ret <4 x float> %res } @@ -191,8 +191,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %x = fmul <8 x float> %a0, %a1 - %res = fsub <8 x float> %x, %a2 + %x = fmul FASTMATH_FLAGS <8 x float> %a0, %a1 + %res = fsub FASTMATH_FLAGS <8 x float> %x, %a2 ret <8 x float> %res } @@ -211,8 +211,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul double %a0, %a1 - %res = fsub double %x, %a2 + %x = fmul FASTMATH_FLAGS double %a0, %a1 + %res = fsub FASTMATH_FLAGS double %x, %a2 ret double %res } @@ -231,8 +231,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul <2 x double> %a0, %a1 - %res = fsub <2 x double> %x, %a2 + %x = fmul FASTMATH_FLAGS <2 x double> %a0, %a1 + %res = fsub FASTMATH_FLAGS <2 x double> %x, %a2 ret <2 x double> %res } @@ -251,8 +251,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %x = fmul <4 x double> %a0, %a1 - %res = fsub <4 x double> %x, %a2 + %x = fmul FASTMATH_FLAGS <4 x double> %a0, %a1 + %res = fsub FASTMATH_FLAGS <4 x double> %x, %a2 ret <4 x double> %res } @@ -275,8 +275,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul float %a0, %a1 - %res = fsub float %a2, %x + %x = fmul FASTMATH_FLAGS float %a0, %a1 + %res = fsub FASTMATH_FLAGS float %a2, %x ret float %res } @@ -295,8 +295,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul <4 x float> %a0, %a1 - %res = fsub <4 x float> %a2, %x + %x = fmul FASTMATH_FLAGS <4 x float> %a0, %a1 + %res = fsub FASTMATH_FLAGS <4 x float> %a2, %x ret <4 x float> %res } @@ -315,8 +315,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %x = fmul <8 x float> %a0, %a1 - %res = fsub <8 x float> %a2, %x + %x = fmul FASTMATH_FLAGS <8 x float> %a0, %a1 + %res = fsub FASTMATH_FLAGS <8 x float> %a2, %x ret <8 x float> %res } @@ -335,8 +335,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul double %a0, %a1 - %res = fsub double %a2, %x + %x = fmul FASTMATH_FLAGS double %a0, %a1 + %res = fsub FASTMATH_FLAGS double %a2, %x ret double %res } @@ -355,8 +355,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul <2 x double> %a0, %a1 - %res = fsub <2 x double> %a2, %x + %x = fmul FASTMATH_FLAGS <2 x double> %a0, %a1 + %res = fsub FASTMATH_FLAGS <2 x double> %a2, %x ret <2 x double> %res } @@ -375,8 +375,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %x = fmul <4 x double> %a0, %a1 - %res = fsub <4 x double> %a2, %x + %x = fmul FASTMATH_FLAGS <4 x double> %a0, %a1 + %res = fsub FASTMATH_FLAGS <4 x double> %a2, %x ret <4 x double> %res } @@ -399,9 +399,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul float %a0, %a1 - %y = fsub float -0.000000e+00, %x - %res = fsub float %y, %a2 + %x = fmul FASTMATH_FLAGS float %a0, %a1 + %y = fsub FASTMATH_FLAGS float -0.000000e+00, %x + %res = fsub FASTMATH_FLAGS float %y, %a2 ret float %res } @@ -420,9 +420,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul <4 x float> %a0, %a1 - %y = fsub <4 x float> , %x - %res = fsub <4 x float> %y, %a2 + %x = fmul FASTMATH_FLAGS <4 x float> %a0, %a1 + %y = fsub FASTMATH_FLAGS <4 x float> , %x + %res = fsub FASTMATH_FLAGS <4 x float> %y, %a2 ret <4 x float> %res } @@ -441,9 +441,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %x = fmul <8 x float> %a0, %a1 - %y = fsub <8 x float> , %x - %res = fsub <8 x float> %y, %a2 + %x = fmul FASTMATH_FLAGS <8 x float> %a0, %a1 + %y = fsub FASTMATH_FLAGS <8 x float> , %x + %res = fsub FASTMATH_FLAGS <8 x float> %y, %a2 ret <8 x float> %res } @@ -462,9 +462,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul double %a0, %a1 - %y = fsub double -0.000000e+00, %x - %res = fsub double %y, %a2 + %x = fmul FASTMATH_FLAGS double %a0, %a1 + %y = fsub FASTMATH_FLAGS double -0.000000e+00, %x + %res = fsub FASTMATH_FLAGS double %y, %a2 ret double %res } @@ -483,9 +483,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul <2 x double> %a0, %a1 - %y = fsub <2 x double> , %x - %res = fsub <2 x double> %y, %a2 + %x = fmul FASTMATH_FLAGS <2 x double> %a0, %a1 + %y = fsub FASTMATH_FLAGS <2 x double> , %x + %res = fsub FASTMATH_FLAGS <2 x double> %y, %a2 ret <2 x double> %res } @@ -504,9 +504,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %x = fmul <4 x double> %a0, %a1 - %y = fsub <4 x double> , %x - %res = fsub <4 x double> %y, %a2 + %x = fmul FASTMATH_FLAGS <4 x double> %a0, %a1 + %y = fsub FASTMATH_FLAGS <4 x double> , %x + %res = fsub FASTMATH_FLAGS <4 x double> %y, %a2 ret <4 x double> %res } @@ -530,8 +530,8 @@ ; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 ; AVX512-NEXT: retq %x = load <4 x float>, <4 x float>* %a0 - %y = fmul <4 x float> %x, %a1 - %res = fadd <4 x float> %y, %a2 + %y = fmul FASTMATH_FLAGS <4 x float> %x, %a1 + %res = fadd FASTMATH_FLAGS <4 x float> %y, %a2 ret <4 x float> %res } @@ -551,8 +551,8 @@ ; AVX512-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 ; AVX512-NEXT: retq %x = load <2 x double>, <2 x double>* %a0 - %y = fmul <2 x double> %x, %a1 - %res = fsub <2 x double> %y, %a2 + %y = fmul FASTMATH_FLAGS <2 x double> %x, %a1 + %res = fsub FASTMATH_FLAGS <2 x double> %y, %a2 ret <2 x double> %res } @@ -593,8 +593,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %a, %y + %a = fadd FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %a, %y ret <4 x float> %m } @@ -631,8 +631,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %y, %a + %a = fadd FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %y, %a ret <4 x float> %m } @@ -669,8 +669,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %y, %a + %a = fadd FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %y, %a ret <4 x float> %m } @@ -707,8 +707,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %a, %y + %a = fadd FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %a, %y ret <4 x float> %m } @@ -745,8 +745,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %y, %a + %a = fadd FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %y, %a ret <4 x float> %m } @@ -783,8 +783,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %y, %a + %a = fadd FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %y, %a ret <4 x float> %m } @@ -824,8 +824,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %s, %y + %s = fsub FASTMATH_FLAGS <4 x float> , %x + %m = fmul FASTMATH_FLAGS <4 x float> %s, %y ret <4 x float> %m } @@ -865,8 +865,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %y, %s + %s = fsub FASTMATH_FLAGS <4 x float> , %x + %m = fmul FASTMATH_FLAGS <4 x float> %y, %s ret <4 x float> %m } @@ -906,8 +906,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %y, %s + %s = fsub FASTMATH_FLAGS <4 x float> , %x + %m = fmul FASTMATH_FLAGS <4 x float> %y, %s ret <4 x float> %m } @@ -947,8 +947,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %s, %y + %s = fsub FASTMATH_FLAGS <4 x float> , %x + %m = fmul FASTMATH_FLAGS <4 x float> %s, %y ret <4 x float> %m } @@ -988,8 +988,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %y, %s + %s = fsub FASTMATH_FLAGS <4 x float> , %x + %m = fmul FASTMATH_FLAGS <4 x float> %y, %s ret <4 x float> %m } @@ -1029,8 +1029,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %y, %s + %s = fsub FASTMATH_FLAGS <4 x float> , %x + %m = fmul FASTMATH_FLAGS <4 x float> %y, %s ret <4 x float> %m } @@ -1067,8 +1067,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %s, %y + %s = fsub FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %s, %y ret <4 x float> %m } @@ -1105,8 +1105,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %y, %s + %s = fsub FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %y, %s ret <4 x float> %m } @@ -1143,8 +1143,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %y, %s + %s = fsub FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %y, %s ret <4 x float> %m } @@ -1181,8 +1181,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %s, %y + %s = fsub FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %s, %y ret <4 x float> %m } @@ -1219,8 +1219,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %y, %s + %s = fsub FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %y, %s ret <4 x float> %m } @@ -1257,8 +1257,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %y, %s + %s = fsub FASTMATH_FLAGS <4 x float> %x, + %m = fmul FASTMATH_FLAGS <4 x float> %y, %s ret <4 x float> %m } @@ -1308,10 +1308,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz float 1.0, %t - %tx = fmul nsz float %x, %t - %ty = fmul nsz float %y, %t1 - %r = fadd nsz float %tx, %ty + %t1 = fsub FASTMATH_FLAGS nsz float 1.0, %t + %tx = fmul FASTMATH_FLAGS nsz float %x, %t + %ty = fmul FASTMATH_FLAGS nsz float %y, %t1 + %r = fadd FASTMATH_FLAGS nsz float %tx, %ty ret float %r } @@ -1357,10 +1357,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <4 x float> , %t - %tx = fmul nsz <4 x float> %x, %t - %ty = fmul nsz <4 x float> %y, %t1 - %r = fadd nsz <4 x float> %tx, %ty + %t1 = fsub FASTMATH_FLAGS nsz <4 x float> , %t + %tx = fmul FASTMATH_FLAGS nsz <4 x float> %x, %t + %ty = fmul FASTMATH_FLAGS nsz <4 x float> %y, %t1 + %r = fadd FASTMATH_FLAGS nsz <4 x float> %tx, %ty ret <4 x float> %r } @@ -1406,10 +1406,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <8 x float> , %t - %tx = fmul nsz <8 x float> %x, %t - %ty = fmul nsz <8 x float> %y, %t1 - %r = fadd nsz <8 x float> %tx, %ty + %t1 = fsub FASTMATH_FLAGS nsz <8 x float> , %t + %tx = fmul FASTMATH_FLAGS nsz <8 x float> %x, %t + %ty = fmul FASTMATH_FLAGS nsz <8 x float> %y, %t1 + %r = fadd FASTMATH_FLAGS nsz <8 x float> %tx, %ty ret <8 x float> %r } @@ -1455,10 +1455,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz double 1.0, %t - %tx = fmul nsz double %x, %t - %ty = fmul nsz double %y, %t1 - %r = fadd nsz double %tx, %ty + %t1 = fsub FASTMATH_FLAGS nsz double 1.0, %t + %tx = fmul FASTMATH_FLAGS nsz double %x, %t + %ty = fmul FASTMATH_FLAGS nsz double %y, %t1 + %r = fadd FASTMATH_FLAGS nsz double %tx, %ty ret double %r } @@ -1504,10 +1504,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <2 x double> , %t - %tx = fmul nsz <2 x double> %x, %t - %ty = fmul nsz <2 x double> %y, %t1 - %r = fadd nsz <2 x double> %tx, %ty + %t1 = fsub FASTMATH_FLAGS nsz <2 x double> , %t + %tx = fmul FASTMATH_FLAGS nsz <2 x double> %x, %t + %ty = fmul FASTMATH_FLAGS nsz <2 x double> %y, %t1 + %r = fadd FASTMATH_FLAGS nsz <2 x double> %tx, %ty ret <2 x double> %r } @@ -1553,10 +1553,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <4 x double> , %t - %tx = fmul nsz <4 x double> %x, %t - %ty = fmul nsz <4 x double> %y, %t1 - %r = fadd nsz <4 x double> %tx, %ty + %t1 = fsub FASTMATH_FLAGS nsz <4 x double> , %t + %tx = fmul FASTMATH_FLAGS nsz <4 x double> %x, %t + %ty = fmul FASTMATH_FLAGS nsz <4 x double> %y, %t1 + %r = fadd FASTMATH_FLAGS nsz <4 x double> %tx, %ty ret <4 x double> %r } @@ -1579,9 +1579,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <4 x float> %a0, %a1 - %add = fadd nsz <4 x float> %mul, %a2 - %neg = fsub nsz <4 x float> , %add + %mul = fmul FASTMATH_FLAGS nsz <4 x float> %a0, %a1 + %add = fadd FASTMATH_FLAGS nsz <4 x float> %mul, %a2 + %neg = fsub FASTMATH_FLAGS nsz <4 x float> , %add ret <4 x float> %neg } @@ -1600,9 +1600,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %mul = fmul nsz <4 x double> %a0, %a1 - %sub = fsub nsz <4 x double> %mul, %a2 - %neg = fsub nsz <4 x double> , %sub + %mul = fmul FASTMATH_FLAGS nsz <4 x double> %a0, %a1 + %sub = fsub FASTMATH_FLAGS nsz <4 x double> %mul, %a2 + %neg = fsub FASTMATH_FLAGS nsz <4 x double> , %sub ret <4 x double> %neg } @@ -1621,10 +1621,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <4 x float> %a0, %a1 - %neg0 = fsub nsz <4 x float> , %mul - %add = fadd nsz <4 x float> %neg0, %a2 - %neg1 = fsub nsz <4 x float> , %add + %mul = fmul FASTMATH_FLAGS nsz <4 x float> %a0, %a1 + %neg0 = fsub FASTMATH_FLAGS nsz <4 x float> , %mul + %add = fadd FASTMATH_FLAGS nsz <4 x float> %neg0, %a2 + %neg1 = fsub FASTMATH_FLAGS nsz <4 x float> , %add ret <4 x float> %neg1 } @@ -1643,10 +1643,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %mul = fmul nsz <4 x double> %a0, %a1 - %neg0 = fsub nsz <4 x double> , %mul - %sub = fsub nsz <4 x double> %neg0, %a2 - %neg1 = fsub nsz <4 x double> , %sub + %mul = fmul FASTMATH_FLAGS nsz <4 x double> %a0, %a1 + %neg0 = fsub FASTMATH_FLAGS nsz <4 x double> , %mul + %sub = fsub FASTMATH_FLAGS nsz <4 x double> %neg0, %a2 + %neg1 = fsub FASTMATH_FLAGS nsz <4 x double> , %sub ret <4 x double> %neg1 } @@ -1669,9 +1669,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-NEXT: retq - %m0 = fmul <4 x float> %x, - %m1 = fmul <4 x float> %x, - %a = fadd <4 x float> %m0, %m1 + %m0 = fmul FASTMATH_FLAGS <4 x float> %x, + %m1 = fmul FASTMATH_FLAGS <4 x float> %x, + %a = fadd FASTMATH_FLAGS <4 x float> %m0, %m1 ret <4 x float> %a } @@ -1694,9 +1694,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 ; AVX512-NEXT: retq - %m0 = fmul <4 x float> %x, - %m1 = fmul <4 x float> %m0, - %a = fadd <4 x float> %m1, %y + %m0 = fmul FASTMATH_FLAGS <4 x float> %x, + %m1 = fmul FASTMATH_FLAGS <4 x float> %m0, + %a = fadd FASTMATH_FLAGS <4 x float> %m1, %y ret <4 x float> %a } @@ -1720,8 +1720,8 @@ ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %m = fmul nsz double %x, %y - %n = fsub double -0.0, %m + %m = fmul FASTMATH_FLAGS nsz double %x, %y + %n = fsub FASTMATH_FLAGS double -0.0, %m ret double %n } @@ -1743,8 +1743,8 @@ ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %m = fmul nsz <4 x float> %x, %y - %n = fsub <4 x float> , %m + %m = fmul FASTMATH_FLAGS nsz <4 x float> %x, %y + %n = fsub FASTMATH_FLAGS <4 x float> , %m ret <4 x float> %n } @@ -1766,8 +1766,8 @@ ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %m = fmul nsz <4 x double> %x, %y - %n = fsub <4 x double> , %m + %m = fmul FASTMATH_FLAGS nsz <4 x double> %x, %y + %n = fsub FASTMATH_FLAGS <4 x double> , %m ret <4 x double> %n } @@ -1789,8 +1789,8 @@ ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512-NEXT: retq - %m = fmul <4 x double> %x, %y - %n = fsub <4 x double> , %m + %m = fmul FASTMATH_FLAGS <4 x double> %x, %y + %n = fsub FASTMATH_FLAGS <4 x double> , %m ret <4 x double> %n } @@ -1843,10 +1843,10 @@ ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %m1 = fmul float %a, %b - %m2 = fmul float %c, %d + %m1 = fmul FASTMATH_FLAGS float %a, %b + %m2 = fmul FASTMATH_FLAGS float %c, %d %a1 = fadd contract float %m1, %m2 - %a2 = fadd reassoc float %n0, %a1 + %a2 = fadd FASTMATH_FLAGS reassoc float %n0, %a1 ret float %a2 } @@ -1873,8 +1873,8 @@ ; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 ; AVX512-NEXT: retq - %m1 = fmul float %a, %b - %m2 = fmul float %c, %d + %m1 = fmul FASTMATH_FLAGS float %a, %b + %m2 = fmul FASTMATH_FLAGS float %c, %d %a1 = fadd contract float %m1, %m2 %a2 = fadd contract float %n0, %a1 ret float %a2 Index: llvm/test/CodeGen/X86/fma_patterns_wide.ll =================================================================== --- llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA --check-prefix=FMA-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=FMA --check-prefix=FMA-INFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract ninf,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract ninf,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract ninf,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS +; RUN: sed -e "s,FASTMATH_FLAGS,contract ninf,g" %s | llc -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS ; ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) @@ -29,8 +29,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fadd <16 x float> %x, %a2 + %x = fmul FASTMATH_FLAGS <16 x float> %a0, %a1 + %res = fadd FASTMATH_FLAGS <16 x float> %x, %a2 ret <16 x float> %res } @@ -51,8 +51,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fadd <8 x double> %x, %a2 + %x = fmul FASTMATH_FLAGS <8 x double> %a0, %a1 + %res = fadd FASTMATH_FLAGS <8 x double> %x, %a2 ret <8 x double> %res } @@ -77,8 +77,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fsub <16 x float> %x, %a2 + %x = fmul FASTMATH_FLAGS <16 x float> %a0, %a1 + %res = fsub FASTMATH_FLAGS <16 x float> %x, %a2 ret <16 x float> %res } @@ -99,8 +99,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fsub <8 x double> %x, %a2 + %x = fmul FASTMATH_FLAGS <8 x double> %a0, %a1 + %res = fsub FASTMATH_FLAGS <8 x double> %x, %a2 ret <8 x double> %res } @@ -125,8 +125,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fsub <16 x float> %a2, %x + %x = fmul FASTMATH_FLAGS <16 x float> %a0, %a1 + %res = fsub FASTMATH_FLAGS <16 x float> %a2, %x ret <16 x float> %res } @@ -147,8 +147,8 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fsub <8 x double> %a2, %x + %x = fmul FASTMATH_FLAGS <8 x double> %a0, %a1 + %res = fsub FASTMATH_FLAGS <8 x double> %a2, %x ret <8 x double> %res } @@ -173,9 +173,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %y = fsub <16 x float> , %x - %res = fsub <16 x float> %y, %a2 + %x = fmul FASTMATH_FLAGS <16 x float> %a0, %a1 + %y = fsub FASTMATH_FLAGS <16 x float> , %x + %res = fsub FASTMATH_FLAGS <16 x float> %y, %a2 ret <16 x float> %res } @@ -196,9 +196,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %y = fsub <8 x double> , %x - %res = fsub <8 x double> %y, %a2 + %x = fmul FASTMATH_FLAGS <8 x double> %a0, %a1 + %y = fsub FASTMATH_FLAGS <8 x double> , %x + %res = fsub FASTMATH_FLAGS <8 x double> %y, %a2 ret <8 x double> %res } @@ -224,8 +224,8 @@ ; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; AVX512-NEXT: retq %x = load <16 x float>, <16 x float>* %a0 - %y = fmul <16 x float> %x, %a1 - %res = fadd <16 x float> %y, %a2 + %y = fmul FASTMATH_FLAGS <16 x float> %x, %a1 + %res = fadd FASTMATH_FLAGS <16 x float> %y, %a2 ret <16 x float> %res } @@ -247,8 +247,8 @@ ; AVX512-NEXT: vfmsub132pd {{.*#+}} zmm0 = (zmm0 * mem) - zmm1 ; AVX512-NEXT: retq %x = load <8 x double>, <8 x double>* %a0 - %y = fmul <8 x double> %x, %a1 - %res = fsub <8 x double> %y, %a2 + %y = fmul FASTMATH_FLAGS <8 x double> %x, %a1 + %res = fsub FASTMATH_FLAGS <8 x double> %y, %a2 ret <8 x double> %res } @@ -297,8 +297,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <16 x float> %x, - %m = fmul <16 x float> %a, %y + %a = fadd FASTMATH_FLAGS <16 x float> %x, + %m = fmul FASTMATH_FLAGS <16 x float> %a, %y ret <16 x float> %m } @@ -343,8 +343,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <8 x double> %x, - %m = fmul <8 x double> %y, %a + %a = fadd FASTMATH_FLAGS <8 x double> %x, + %m = fmul FASTMATH_FLAGS <8 x double> %y, %a ret <8 x double> %m } @@ -389,8 +389,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <16 x float> %x, - %m = fmul <16 x float> %a, %y + %a = fadd FASTMATH_FLAGS <16 x float> %x, + %m = fmul FASTMATH_FLAGS <16 x float> %a, %y ret <16 x float> %m } @@ -435,8 +435,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <8 x double> %x, - %m = fmul <8 x double> %y, %a + %a = fadd FASTMATH_FLAGS <8 x double> %x, + %m = fmul FASTMATH_FLAGS <8 x double> %y, %a ret <8 x double> %m } @@ -482,8 +482,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <16 x float> , %x - %m = fmul <16 x float> %s, %y + %s = fsub FASTMATH_FLAGS <16 x float> , %x + %m = fmul FASTMATH_FLAGS <16 x float> %s, %y ret <16 x float> %m } @@ -529,8 +529,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <8 x double> , %x - %m = fmul <8 x double> %y, %s + %s = fsub FASTMATH_FLAGS <8 x double> , %x + %m = fmul FASTMATH_FLAGS <8 x double> %y, %s ret <8 x double> %m } @@ -576,8 +576,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <16 x float> , %x - %m = fmul <16 x float> %s, %y + %s = fsub FASTMATH_FLAGS <16 x float> , %x + %m = fmul FASTMATH_FLAGS <16 x float> %s, %y ret <16 x float> %m } @@ -623,8 +623,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <8 x double> , %x - %m = fmul <8 x double> %y, %s + %s = fsub FASTMATH_FLAGS <8 x double> , %x + %m = fmul FASTMATH_FLAGS <8 x double> %y, %s ret <8 x double> %m } @@ -669,8 +669,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <16 x float> %x, - %m = fmul <16 x float> %s, %y + %s = fsub FASTMATH_FLAGS <16 x float> %x, + %m = fmul FASTMATH_FLAGS <16 x float> %s, %y ret <16 x float> %m } @@ -715,8 +715,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <8 x double> %x, - %m = fmul <8 x double> %y, %s + %s = fsub FASTMATH_FLAGS <8 x double> %x, + %m = fmul FASTMATH_FLAGS <8 x double> %y, %s ret <8 x double> %m } @@ -761,8 +761,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <16 x float> %x, - %m = fmul <16 x float> %s, %y + %s = fsub FASTMATH_FLAGS <16 x float> %x, + %m = fmul FASTMATH_FLAGS <16 x float> %s, %y ret <16 x float> %m } @@ -807,8 +807,8 @@ ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <8 x double> %x, - %m = fmul <8 x double> %y, %s + %s = fsub FASTMATH_FLAGS <8 x double> %x, + %m = fmul FASTMATH_FLAGS <8 x double> %y, %s ret <8 x double> %m } @@ -868,10 +868,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <16 x float> , %t - %tx = fmul nsz <16 x float> %x, %t - %ty = fmul nsz <16 x float> %y, %t1 - %r = fadd nsz <16 x float> %tx, %ty + %t1 = fsub FASTMATH_FLAGS nsz <16 x float> , %t + %tx = fmul FASTMATH_FLAGS nsz <16 x float> %x, %t + %ty = fmul FASTMATH_FLAGS nsz <16 x float> %y, %t1 + %r = fadd FASTMATH_FLAGS nsz <16 x float> %tx, %ty ret <16 x float> %r } @@ -927,10 +927,10 @@ ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <8 x double> , %t - %tx = fmul nsz <8 x double> %x, %t - %ty = fmul nsz <8 x double> %y, %t1 - %r = fadd nsz <8 x double> %tx, %ty + %t1 = fsub FASTMATH_FLAGS nsz <8 x double> , %t + %tx = fmul FASTMATH_FLAGS nsz <8 x double> %x, %t + %ty = fmul FASTMATH_FLAGS nsz <8 x double> %y, %t1 + %r = fadd FASTMATH_FLAGS nsz <8 x double> %tx, %ty ret <8 x double> %r } @@ -955,9 +955,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <16 x float> %a0, %a1 - %add = fadd nsz <16 x float> %mul, %a2 - %neg = fsub nsz <16 x float> , %add + %mul = fmul FASTMATH_FLAGS nsz <16 x float> %a0, %a1 + %add = fadd FASTMATH_FLAGS nsz <16 x float> %mul, %a2 + %neg = fsub FASTMATH_FLAGS nsz <16 x float> , %add ret <16 x float> %neg } @@ -978,9 +978,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <8 x double> %a0, %a1 - %sub = fsub nsz <8 x double> %mul, %a2 - %neg = fsub nsz <8 x double> , %sub + %mul = fmul FASTMATH_FLAGS nsz <8 x double> %a0, %a1 + %sub = fsub FASTMATH_FLAGS nsz <8 x double> %mul, %a2 + %neg = fsub FASTMATH_FLAGS nsz <8 x double> , %sub ret <8 x double> %neg } @@ -1001,10 +1001,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <16 x float> %a0, %a1 - %neg0 = fsub nsz <16 x float> , %mul - %add = fadd nsz <16 x float> %neg0, %a2 - %neg1 = fsub nsz <16 x float> , %add + %mul = fmul FASTMATH_FLAGS nsz <16 x float> %a0, %a1 + %neg0 = fsub FASTMATH_FLAGS nsz <16 x float> , %mul + %add = fadd FASTMATH_FLAGS nsz <16 x float> %neg0, %a2 + %neg1 = fsub FASTMATH_FLAGS nsz <16 x float> , %add ret <16 x float> %neg1 } @@ -1025,10 +1025,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <8 x double> %a0, %a1 - %neg0 = fsub nsz <8 x double> , %mul - %sub = fsub nsz <8 x double> %neg0, %a2 - %neg1 = fsub nsz <8 x double> , %sub + %mul = fmul FASTMATH_FLAGS nsz <8 x double> %a0, %a1 + %neg0 = fsub FASTMATH_FLAGS nsz <8 x double> , %mul + %sub = fsub FASTMATH_FLAGS nsz <8 x double> %neg0, %a2 + %neg1 = fsub FASTMATH_FLAGS nsz <8 x double> , %sub ret <8 x double> %neg1 } @@ -1053,9 +1053,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: retq - %m0 = fmul <16 x float> %x, - %m1 = fmul <16 x float> %x, - %a = fadd <16 x float> %m0, %m1 + %m0 = fmul FASTMATH_FLAGS <16 x float> %x, + %m1 = fmul FASTMATH_FLAGS <16 x float> %x, + %a = fadd FASTMATH_FLAGS <16 x float> %m0, %m1 ret <16 x float> %a } @@ -1080,9 +1080,9 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; AVX512-NEXT: retq - %m0 = fmul <16 x float> %x, - %m1 = fmul <16 x float> %m0, - %a = fadd <16 x float> %m1, %y + %m0 = fmul FASTMATH_FLAGS <16 x float> %x, + %m1 = fmul FASTMATH_FLAGS <16 x float> %m0, + %a = fadd FASTMATH_FLAGS <16 x float> %m1, %y ret <16 x float> %a } @@ -1108,8 +1108,8 @@ ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %m = fmul nsz <16 x float> %x, %y - %n = fsub <16 x float> , %m + %m = fmul FASTMATH_FLAGS nsz <16 x float> %x, %y + %n = fsub FASTMATH_FLAGS <16 x float> , %m ret <16 x float> %n } @@ -1133,8 +1133,8 @@ ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %m = fmul nsz <8 x double> %x, %y - %n = fsub <8 x double> , %m + %m = fmul FASTMATH_FLAGS nsz <8 x double> %x, %y + %n = fsub FASTMATH_FLAGS <8 x double> , %m ret <8 x double> %n } @@ -1162,8 +1162,8 @@ ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-NEXT: retq - %m = fmul <8 x double> %x, %y - %n = fsub <8 x double> , %m + %m = fmul FASTMATH_FLAGS <8 x double> %x, %y + %n = fsub FASTMATH_FLAGS <8 x double> , %m ret <8 x double> %n } Index: llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -23,16 +23,16 @@ ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] - ; CHECK: %3:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: %3:fr32 = ninf contract afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) - ; CHECK: %5:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %5:fr32 = ninf contract afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load (s32) from constant-pool) - ; CHECK: %7:fr32 = ninf afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr - ; CHECK: %8:fr32 = ninf afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr - ; CHECK: %9:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr - ; CHECK: %10:fr32 = ninf afn nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr - ; CHECK: %11:fr32 = ninf afn nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr - ; CHECK: %12:fr32 = ninf afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr + ; CHECK: %7:fr32 = ninf contract afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %8:fr32 = ninf contract afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr + ; CHECK: %9:fr32 = ninf contract afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr + ; CHECK: %10:fr32 = ninf contract afn nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %11:fr32 = ninf contract afn nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %12:fr32 = ninf contract afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12 ; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]] ; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load (s32) from constant-pool) @@ -44,7 +44,7 @@ ; CHECK: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]] ; CHECK: $xmm0 = COPY [[COPY5]] ; CHECK: RET 0, $xmm0 - %call = tail call ninf afn float @llvm.sqrt.f32(float %f) + %call = tail call ninf contract afn float @llvm.sqrt.f32(float %f) ret float %call } @@ -68,16 +68,16 @@ ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] - ; CHECK: %3:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: %3:fr32 = ninf contract afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) - ; CHECK: %5:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %5:fr32 = ninf contract afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load (s32) from constant-pool) - ; CHECK: %7:fr32 = ninf afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr - ; CHECK: %8:fr32 = ninf afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr - ; CHECK: %9:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr - ; CHECK: %10:fr32 = ninf afn nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr - ; CHECK: %11:fr32 = ninf afn nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr - ; CHECK: %12:fr32 = ninf afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr + ; CHECK: %7:fr32 = ninf contract afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %8:fr32 = ninf contract afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr + ; CHECK: %9:fr32 = ninf contract afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr + ; CHECK: %10:fr32 = ninf contract afn nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %11:fr32 = ninf contract afn nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %12:fr32 = ninf contract afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12 ; CHECK: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS ; CHECK: %15:fr32 = nofpexcept VCMPSSrr [[COPY]], killed [[FsFLD0SS]], 0, implicit $mxcsr @@ -86,7 +86,7 @@ ; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]] ; CHECK: $xmm0 = COPY [[COPY3]] ; CHECK: RET 0, $xmm0 - %call = tail call ninf afn float @llvm.sqrt.f32(float %f) + %call = tail call ninf contract afn float @llvm.sqrt.f32(float %f) ret float %call } Index: llvm/test/CodeGen/X86/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -118,7 +118,7 @@ ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq - %call = tail call ninf afn float @__sqrtf_finite(float %f) #2 + %call = tail call contract ninf afn float @__sqrtf_finite(float %f) #2 ret float %call } @@ -177,7 +177,7 @@ ; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq - %call = tail call ninf afn float @__sqrtf_finite(float %f) #2 + %call = tail call contract ninf afn float @__sqrtf_finite(float %f) #2 ret float %call } @@ -262,7 +262,7 @@ ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq - %call = tail call ninf afn float @__sqrtf_finite(float %x) #2 + %call = tail call contract ninf afn float @__sqrtf_finite(float %x) #2 ret float %call } @@ -327,7 +327,7 @@ ; AVX512-NEXT: vcmpleps %xmm0, %xmm2, %xmm0 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %call = tail call ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2 + %call = tail call contract ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2 ret <4 x float> %call }