Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -820,7 +820,7 @@ }); } case ISD::FADD: - if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) + if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) return 0; // After operation legalization, it might not be legal to create new FSUBs. @@ -893,7 +893,7 @@ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); } case ISD::FADD: - assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); + assert(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -11998,7 +11998,7 @@ // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) - if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) + if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -12056,7 +12056,7 @@ // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -12175,7 +12175,7 @@ // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.UnsafeFPMath || + if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { return N0; } @@ -12202,7 +12202,7 @@ } } - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4645,7 +4645,7 @@ return getUNDEF(VT); // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Flags); Index: test/CodeGen/AArch64/fadd-combines.ll =================================================================== --- test/CodeGen/AArch64/fadd-combines.ll +++ test/CodeGen/AArch64/fadd-combines.ll @@ -150,24 +150,22 @@ ; The machine combiner transforms this into a chain of 3 dependent adds: ; ((x + 59.0) + 17.0) + x -define float @fadd_const_multiuse_attr(float %x) #0 { +define float @fadd_const_multiuse_attr(float %x) { ; CHECK-LABEL: fadd_const_multiuse_attr: ; CHECK: // %bb.0: -; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144 ; CHECK-DAG: mov [[W17:w[0-9]+]], #1109917696 -; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]] +; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144 ; CHECK-NEXT: fmov [[FP17:s[0-9]+]], [[W17]] -; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP59]] -; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], [[FP17]], [[TMP1]] -; CHECK-NEXT: fadd s0, s0, [[TMP2]] +; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]] +; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP17]] +; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]] +; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret - %a1 = fadd float %x, 42.0 - %a2 = fadd float %a1, 17.0 - %a3 = fadd float %a1, %a2 + %a1 = fadd fast float %x, 42.0 + %a2 = fadd fast float %a1, 17.0 + %a3 = fadd fast float %a1, %a2 ret float %a3 } -attributes #0 = { "unsafe-fp-math"="true" } - declare void @use(double) Index: test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll =================================================================== --- test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll +++ test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll @@ -1,17 +1,29 @@ -; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s -; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s -; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s +; RUN: llc -march=amdgcn < %s | FileCheck --check-prefixes=GCN,GCN-FMF,GCN-SAFE %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; Test that the -enable-no-signed-zeros-fp-math flag works -; GCN-LABEL: {{^}}fneg_fsub_f32: +; GCN-LABEL: {{^}}fneg_fsub_f32_fmf: ; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} -; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] +; GCN-FMF-NOT: xor +define amdgpu_kernel void @fneg_fsub_f32_fmf(float addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %add = add i32 %tid, 1 + %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 %add + %a = load float, float addrspace(1)* %gep, align 4 + %b = load float, float addrspace(1)* %b_ptr, align 4 + %result = fsub fast float %a, %b + %neg.result = fsub fast float -0.0, %result + store float %neg.result, float addrspace(1)* %out, align 4 + ret void +} -; GCN-UNSAFE-NOT: xor -define amdgpu_kernel void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +; GCN-LABEL: {{^}}fneg_fsub_f32_safe: +; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} +; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] +define amdgpu_kernel void @fneg_fsub_f32_safe(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %tid, 1 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid Index: test/CodeGen/AMDGPU/ffloor.f64.ll =================================================================== --- test/CodeGen/AMDGPU/ffloor.f64.ll +++ test/CodeGen/AMDGPU/ffloor.f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.fabs.f64(double %Val) declare double @llvm.floor.f64(double) nounwind readnone @@ -20,7 +20,7 @@ ; SI: v_add_f64 ; SI: s_endpgm define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) { - %y = call double @llvm.floor.f64(double %x) nounwind readnone + %y = call fast double @llvm.floor.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out ret void } @@ -35,8 +35,8 @@ ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]] ; SI: s_endpgm define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x) { - %neg = fsub double 0.0, %x - %y = call double @llvm.floor.f64(double %neg) nounwind readnone + %neg = fsub nsz double 0.0, %x + %y = call fast double @llvm.floor.f64(double %neg) nounwind readnone store double %y, double addrspace(1)* %out ret void } @@ -51,9 +51,9 @@ ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]| ; SI: s_endpgm define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) { - %abs = call double @llvm.fabs.f64(double %x) - %neg = fsub double 0.0, %abs - %y = call double @llvm.floor.f64(double %neg) nounwind readnone + %abs = call fast double @llvm.fabs.f64(double %x) + %neg = fsub nsz double 0.0, %abs + %y = call fast double @llvm.floor.f64(double %neg) nounwind readnone store double %y, double addrspace(1)* %out ret void } @@ -62,7 +62,7 @@ ; CI: v_floor_f64_e32 ; CI: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { - %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone + %y = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone store <2 x double> %y, <2 x double> addrspace(1)* %out ret void } @@ -73,7 +73,7 @@ ; CI: v_floor_f64_e32 ; CI-NOT: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { - %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone + %y = call fast <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone store <3 x double> %y, <3 x double> addrspace(1)* %out ret void } @@ -84,7 +84,7 @@ ; CI: v_floor_f64_e32 ; CI: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { - %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone + %y = call fast <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone store <4 x double> %y, <4 x double> addrspace(1)* %out ret void } @@ -99,7 +99,7 @@ ; CI: v_floor_f64_e32 ; CI: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { - %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone + %y = call fast <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone store <8 x double> %y, <8 x double> addrspace(1)* %out ret void } @@ -122,7 +122,7 @@ ; CI: v_floor_f64_e32 ; CI: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { - %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone + %y = call fast <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone store <16 x double> %y, <16 x double> addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-combines.ll +++ test/CodeGen/AMDGPU/fneg-combines.ll @@ -219,8 +219,11 @@ ; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]], ; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]] ; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]] -; GCN-NSZ-DAG: v_mac_f32_e32 [[C:v[0-9]+]], -; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[C]] +; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]], +; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]], +; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]], +; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]], +; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]] define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 { .entry: Index: test/CodeGen/PowerPC/fma-mutate.ll =================================================================== --- test/CodeGen/PowerPC/fma-mutate.ll +++ test/CodeGen/PowerPC/fma-mutate.ll @@ -3,19 +3,26 @@ ; same as the FMA target register. The second one is legal. The third ; one doesn't fit the feeding-copy pattern. -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=+vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx -disable-ppc-vsx-fma-mutation=false | FileCheck --check-prefixes=CHECK-SAFE,FMF %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" declare double @llvm.sqrt.f64(double) -define double @foo3(double %a) nounwind { - %r = call double @llvm.sqrt.f64(double %a) +define double @foo3_fmf(double %a) nounwind { +; FMF: @foo3_fmf +; FMF-NOT: fmr +; FMF: xsmaddmdp +; FMF: xsmaddadp + %r = call fast double @llvm.sqrt.f64(double %a) ret double %r +} -; CHECK: @foo3 -; CHECK-NOT: fmr -; CHECK: xsmaddmdp -; CHECK: xsmaddadp +define double @foo3_safe(double %a) nounwind { +; CHECK-SAFE: @foo3_safe +; CHECK-SAFE-NOT: fmr +; CHECK-SAFE: xssqrtdp + %r = call double @llvm.sqrt.f64(double %a) + ret double %r } Index: test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- test/CodeGen/PowerPC/fmf-propagation.ll +++ test/CodeGen/PowerPC/fmf-propagation.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG -; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBAL +; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL ; Test FP transforms using instruction/node-level fast-math-flags. ; We're also checking debug output to verify that FMF is propagated to the newly created nodes. Index: test/CodeGen/PowerPC/qpx-recipest.ll =================================================================== --- test/CodeGen/PowerPC/qpx-recipest.ll +++ test/CodeGen/PowerPC/qpx-recipest.ll @@ -1,194 +1,224 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck --check-prefixes=CHECK-SAFE,FMF %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) -define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind { +define <4 x double> @foo_fmf(<4 x double> %a, <4 x double> %b) nounwind { +; FMF-LABEL: @foo_fmf +; FMF: qvfrsqrte +; FMF-DAG: qvfmul +; FMF-DAG: qvfmsub +; FMF-DAG: qvfnmsub +; FMF: qvfmul +; FMF: qvfmul +; FMF: qvfnmsub +; FMF: qvfmul +; FMF: qvfmul +; FMF: blr entry: - %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) - %r = fdiv <4 x double> %a, %x + %x = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %r = fdiv fast <4 x double> %a, %x ret <4 x double> %r +} -; CHECK-LABEL: @foo -; CHECK: qvfrsqrte -; CHECK-DAG: qvfmul -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadd instead of a qvfnmsub -; CHECK-DAG: qvfmadd -; CHECK-DAG: qvfmadd -; CHECK: qvfmul -; CHECK: qvfmul -; CHECK: qvfmadd -; CHECK: qvfmul -; CHECK: qvfmul -; CHECK: blr - -; CHECK-SAFE-LABEL: @foo +define <4 x double> @foo_safe(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-SAFE-LABEL: @foo_safe ; CHECK-SAFE: fsqrt ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr -} - -define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind { entry: - %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) - %y = fpext <4 x float> %x to <4 x double> - %r = fdiv <4 x double> %a, %y + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %r = fdiv <4 x double> %a, %x ret <4 x double> %r +} -; CHECK-LABEL: @foof -; CHECK: qvfrsqrtes -; CHECK-DAG: qvfmuls +define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind { +; FMF-LABEL: @foof_fmf +; FMF: qvfrsqrtes +; FMF-DAG: qvfmuls ; FIXME: We're currently loading two constants here (1.5 and -1.5), and using ; an qvfmadd instead of a qvfnmsubs -; CHECK-DAG: qvfmadds -; CHECK-DAG: qvfmadds -; CHECK: qvfmuls -; CHECK: qvfmul -; CHECK: blr +; FMF-DAG: qvfmadds +; FMF-DAG: qvfmadds +; FMF: qvfmuls +; FMF: qvfmul +; FMF: blr +entry: + %x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %y = fpext <4 x float> %x to <4 x double> + %r = fdiv fast <4 x double> %a, %y + ret <4 x double> %r +} -; CHECK-SAFE-LABEL: @foof +define <4 x double> @foof_safe(<4 x double> %a, <4 x float> %b) nounwind { +; CHECK-SAFE-LABEL: @foof_safe ; CHECK-SAFE: fsqrts ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr +entry: + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %y = fpext <4 x float> %x to <4 x double> + %r = fdiv <4 x double> %a, %y + ret <4 x double> %r } -define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind { +define <4 x float> @food_fmf(<4 x float> %a, <4 x double> %b) nounwind { +; FMF-LABEL: @food_fmf +; FMF: qvfrsqrte +; FMF-DAG: qvfmul +; FMF-DAG: qvfmsub +; FMF-DAG: qvfnmsub +; FMF: qvfmul +; FMF: qvfmul +; FMF: qvfnmsub +; FMF: qvfmul +; FMF: qvfrsp +; FMF: qvfmuls +; FMF: blr entry: - %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %x = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) %y = fptrunc <4 x double> %x to <4 x float> - %r = fdiv <4 x float> %a, %y + %r = fdiv fast <4 x float> %a, %y ret <4 x float> %r +} -; CHECK-LABEL: @food -; CHECK: qvfrsqrte -; CHECK-DAG: qvfmul -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadd instead of a qvfnmsub -; CHECK-DAG: qvfmadd -; CHECK-DAG: qvfmadd -; CHECK: qvfmul -; CHECK: qvfmul -; CHECK: qvfmadd -; CHECK: qvfmul -; CHECK: qvfrsp -; CHECK: qvfmuls -; CHECK: blr - -; CHECK-SAFE-LABEL: @food +define <4 x float> @food_safe(<4 x float> %a, <4 x double> %b) nounwind { +; CHECK-SAFE-LABEL: @food_safe ; CHECK-SAFE: fsqrt ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr -} - -define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind { entry: - %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) - %r = fdiv <4 x float> %a, %x + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %y = fptrunc <4 x double> %x to <4 x float> + %r = fdiv <4 x float> %a, %y ret <4 x float> %r +} -; CHECK-LABEL: @goo -; CHECK: qvfrsqrtes -; CHECK-DAG: qvfmuls +define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind { +; FMF-LABEL: @goo_fmf +; FMF: qvfrsqrtes +; FMF-DAG: qvfmuls ; FIXME: We're currently loading two constants here (1.5 and -1.5), and using ; an qvfmadd instead of a qvfnmsubs -; CHECK-DAG: qvfmadds -; CHECK-DAG: qvfmadds -; CHECK: qvfmuls -; CHECK: qvfmuls -; CHECK: blr +; FMF-DAG: qvfmadds +; FMF-DAG: qvfmadds +; FMF: qvfmuls +; FMF: qvfmuls +; FMF: blr +entry: + %x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv fast <4 x float> %a, %x + ret <4 x float> %r +} -; CHECK-SAFE-LABEL: @goo +define <4 x float> @goo_safe(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-SAFE-LABEL: @goo_safe ; CHECK-SAFE: fsqrts ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr +entry: + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv <4 x float> %a, %x + ret <4 x float> %r } -define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind { +define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind { +; FMF-LABEL: @foo2_fmf +; FMF: qvfre +; FMF: qvfnmsub +; FMF: qvfmadd +; FMF: qvfnmsub +; FMF: qvfmadd +; FMF: qvfmul +; FMF: blr entry: - %r = fdiv <4 x double> %a, %b + %r = fdiv fast <4 x double> %a, %b ret <4 x double> %r +} -; CHECK-LABEL: @foo2 -; CHECK: qvfre -; CHECK: qvfnmsub -; CHECK: qvfmadd -; CHECK: qvfnmsub -; CHECK: qvfmadd -; CHECK: qvfmul -; CHECK: blr - -; CHECK-SAFE-LABEL: @foo2 +define <4 x double> @foo2_safe(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-SAFE-LABEL: @foo2_safe ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr + %r = fdiv <4 x double> %a, %b + ret <4 x double> %r } -define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind { +define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind { +; FMF-LABEL: @goo2_fmf +; FMF: qvfres +; FMF: qvfnmsubs +; FMF: qvfmadds +; FMF: qvfmuls +; FMF: blr entry: - %r = fdiv <4 x float> %a, %b + %r = fdiv fast <4 x float> %a, %b ret <4 x float> %r +} -; CHECK-LABEL: @goo2 -; CHECK: qvfres -; CHECK: qvfnmsubs -; CHECK: qvfmadds -; CHECK: qvfmuls -; CHECK: blr - -; CHECK-SAFE-LABEL: @goo2 +define <4 x float> @goo2_safe(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-SAFE-LABEL: @goo2_safe ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr +entry: + %r = fdiv <4 x float> %a, %b + ret <4 x float> %r } -define <4 x double> @foo3(<4 x double> %a) nounwind { +define <4 x double> @foo3_fmf(<4 x double> %a) nounwind { +; FMF-LABEL: @foo3_fmf +; FMF: qvfrsqrte +; FMF: qvfmul +; FMF-DAG: qvfmsub +; FMF-DAG: qvfcmpeq +; FMF-DAG: qvfnmsub +; FMF-DAG: qvfmul +; FMF-DAG: qvfmul +; FMF-DAG: qvfnmsub +; FMF-DAG: qvfmul +; FMF-DAG: qvfmul +; FMF: qvfsel +; FMF: blr entry: - %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) + %r = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) ret <4 x double> %r +} -; CHECK-LABEL: @foo3 -; CHECK: qvfrsqrte -; CHECK: qvfmul -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadd instead of a qvfnmsub -; CHECK-DAG: qvfmadd -; CHECK-DAG: qvfcmpeq -; CHECK-DAG: qvfmadd -; CHECK-DAG: qvfmul -; CHECK-DAG: qvfmul -; CHECK-DAG: qvfmadd -; CHECK-DAG: qvfmul -; CHECK-DAG: qvfmul -; CHECK: qvfsel -; CHECK: blr - -; CHECK-SAFE-LABEL: @foo3 +define <4 x double> @foo3_safe(<4 x double> %a) nounwind { +; CHECK-SAFE-LABEL: @foo3_safe ; CHECK-SAFE: fsqrt ; CHECK-SAFE: blr -} - -define <4 x float> @goo3(<4 x float> %a) nounwind { entry: - %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) - ret <4 x float> %r + %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) + ret <4 x double> %r +} -; CHECK-LABEL: @goo3 -; CHECK: qvfrsqrtes -; CHECK: qvfmuls +define <4 x float> @goo3_fmf(<4 x float> %a) nounwind { +; FMF-LABEL: @goo3_fmf +; FMF: qvfrsqrtes +; FMF: qvfmuls ; FIXME: We're currently loading two constants here (1.5 and -1.5), and using ; an qvfmadds instead of a qvfnmsubs -; CHECK-DAG: qvfmadds -; CHECK-DAG: qvfcmpeq -; CHECK-DAG: qvfmadds -; CHECK-DAG: qvfmuls -; CHECK-DAG: qvfmuls -; CHECK: qvfsel -; CHECK: blr +; FMF-DAG: qvfmadds +; FMF-DAG: qvfcmpeq +; FMF-DAG: qvfmadds +; FMF-DAG: qvfmuls +; FMF-DAG: qvfmuls +; FMF: qvfsel +; FMF: blr +entry: + %r = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r +} -; CHECK-SAFE-LABEL: @goo3 +define <4 x float> @goo3_safe(<4 x float> %a) nounwind { +; CHECK-SAFE-LABEL: @goo3_safe ; CHECK-SAFE: fsqrts ; CHECK-SAFE: blr +entry: + %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r } Index: test/CodeGen/PowerPC/recipest.ll =================================================================== --- test/CodeGen/PowerPC/recipest.ll +++ test/CodeGen/PowerPC/recipest.ll @@ -1,5 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck --check-prefixes=CHECK-SAFE,FMF %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -8,263 +7,305 @@ declare float @llvm.sqrt.f32(float) declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) -define double @foo(double %a, double %b) nounwind { - %x = call double @llvm.sqrt.f64(double %b) - %r = fdiv double %a, %x +define double @foo_fmf(double %a, double %b) nounwind { +; FMF: @foo +; FMF: frsqrte +; FMF: fmul +; FMF-NEXT: fmadd +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF-NEXT: fmadd +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF: blr + %x = call fast double @llvm.sqrt.f64(double %b) + %r = fdiv fast double %a, %x ret double %r +} -; CHECK: @foo -; CHECK: frsqrte -; CHECK: fmul -; CHECK-NEXT: fmadd -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK-NEXT: fmadd -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK: blr - +define double @foo_safe(double %a, double %b) nounwind { ; CHECK-SAFE: @foo ; CHECK-SAFE: fsqrt ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr -} - -define double @no_estimate_refinement_f64(double %a, double %b) #0 { %x = call double @llvm.sqrt.f64(double %b) %r = fdiv double %a, %x ret double %r - -; CHECK-LABEL: @no_estimate_refinement_f64 -; CHECK: frsqrte -; CHECK-NOT: fmadd -; CHECK: fmul -; CHECK-NOT: fmadd -; CHECK: blr } +define double @no_estimate_refinement_f64(double %a, double %b) #0 { +; FMF-LABEL: @no_estimate_refinement_f64 +; FMF: frsqrte +; FMF-NOT: fmadd +; FMF: fmul +; FMF-NOT: fmadd +; FMF: blr + %x = call fast double @llvm.sqrt.f64(double %b) + %r = fdiv fast double %a, %x + ret double %r +} -define double @foof(double %a, float %b) nounwind { - %x = call float @llvm.sqrt.f32(float %b) +define double @foof_fmf(double %a, float %b) nounwind { +; FMF: @foof_fmf +; FMF-DAG: frsqrtes +; FMF: fmuls +; FMF-NEXT: fmadds +; FMF-NEXT: fmuls +; FMF-NEXT: fmuls +; FMF-NEXT: fmul +; FMF-NEXT: blr + %x = call fast float @llvm.sqrt.f32(float %b) %y = fpext float %x to double - %r = fdiv double %a, %y + %r = fdiv fast double %a, %y ret double %r +} -; CHECK: @foof -; CHECK-DAG: frsqrtes -; CHECK: fmuls -; CHECK-NEXT: fmadds -; CHECK-NEXT: fmuls -; CHECK-NEXT: fmuls -; CHECK-NEXT: fmul -; CHECK-NEXT: blr - -; CHECK-SAFE: @foof +define double @foof_safe(double %a, float %b) nounwind { +; CHECK-SAFE: @foof_safe ; CHECK-SAFE: fsqrts ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr + %x = call float @llvm.sqrt.f32(float %b) + %y = fpext float %x to double + %r = fdiv double %a, %y + ret double %r } -define float @food(float %a, double %b) nounwind { - %x = call double @llvm.sqrt.f64(double %b) +define float @food_fmf(float %a, double %b) nounwind { +; FMF: @food_fmf +; FMF-DAG: frsqrte +; FMF: fmul +; FMF-NEXT: fmadd +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF-NEXT: fmadd +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF-NEXT: frsp +; FMF-NEXT: fmuls +; FMF-NEXT: blr + %x = call fast double @llvm.sqrt.f64(double %b) %y = fptrunc double %x to float - %r = fdiv float %a, %y + %r = fdiv fast float %a, %y ret float %r +} -; CHECK: @foo -; CHECK-DAG: frsqrte -; CHECK: fmul -; CHECK-NEXT: fmadd -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK-NEXT: fmadd -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK-NEXT: frsp -; CHECK-NEXT: fmuls -; CHECK-NEXT: blr - -; CHECK-SAFE: @foo +define float @food_safe(float %a, double %b) nounwind { +; CHECK-SAFE: @food_safe ; CHECK-SAFE: fsqrt ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr + %x = call double @llvm.sqrt.f64(double %b) + %y = fptrunc double %x to float + %r = fdiv float %a, %y + ret float %r } -define float @goo(float %a, float %b) nounwind { - %x = call float @llvm.sqrt.f32(float %b) - %r = fdiv float %a, %x +define float @goo_fmf(float %a, float %b) nounwind { +; FMF: @goo_fmf +; FMF-DAG: frsqrtes +; FMF: fmuls +; FMF-NEXT: fmadds +; FMF-NEXT: fmuls +; FMF-NEXT: fmuls +; FMF-NEXT: fmuls +; FMF-NEXT: blr + %x = call fast float @llvm.sqrt.f32(float %b) + %r = fdiv fast float %a, %x ret float %r +} -; CHECK: @goo -; CHECK-DAG: frsqrtes -; CHECK: fmuls -; CHECK-NEXT: fmadds -; CHECK-NEXT: fmuls -; CHECK-NEXT: fmuls -; CHECK-NEXT: fmuls -; CHECK-NEXT: blr - -; CHECK-SAFE: @goo +define float @goo_safe(float %a, float %b) nounwind { +; CHECK-SAFE: @goo_safe ; CHECK-SAFE: fsqrts ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr -} - - -define float @no_estimate_refinement_f32(float %a, float %b) #0 { %x = call float @llvm.sqrt.f32(float %b) %r = fdiv float %a, %x ret float %r +} -; CHECK-LABEL: @no_estimate_refinement_f32 -; CHECK: frsqrtes -; CHECK-NOT: fmadds -; CHECK: fmuls -; CHECK-NOT: fmadds -; CHECK: blr +define float @no_estimate_refinement_f32(float %a, float %b) #0 { +; FMF-LABEL: @no_estimate_refinement_f32 +; FMF: frsqrtes +; FMF-NOT: fmadds +; FMF: fmuls +; FMF-NOT: fmadds +; FMF: blr + %x = call fast float @llvm.sqrt.f32(float %b) + %r = fdiv fast float %a, %x + ret float %r } ; Recognize that this is rsqrt(a) * rcp(b) * c, ; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. -define float @rsqrt_fmul(float %a, float %b, float %c) { - %x = call float @llvm.sqrt.f32(float %a) - %y = fmul float %x, %b - %z = fdiv float %c, %y +define float @rsqrt_fmul_fmf(float %a, float %b, float %c) { +; FMF: @rsqrt_fmul_fmf +; FMF-DAG: frsqrtes +; FMF-DAG: fres +; FMF-DAG: fnmsubs +; FMF-DAG: fmuls +; FMF-DAG: fmadds +; FMF-DAG: fmadds +; FMF: fmuls +; FMF-NEXT: fmuls +; FMF-NEXT: fmuls +; FMF-NEXT: blr + %x = call fast float @llvm.sqrt.f32(float %a) + %y = fmul fast float %x, %b + %z = fdiv fast float %c, %y ret float %z +} -; CHECK: @rsqrt_fmul -; CHECK-DAG: frsqrtes -; CHECK-DAG: fres -; CHECK-DAG: fnmsubs -; CHECK-DAG: fmuls -; CHECK-DAG: fmadds -; CHECK-DAG: fmadds -; CHECK: fmuls -; CHECK-NEXT: fmuls -; CHECK-NEXT: fmuls -; CHECK-NEXT: blr - -; CHECK-SAFE: @rsqrt_fmul +; Recognize that this is rsqrt(a) * rcp(b) * c, +; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. +define float @rsqrt_fmul_safe(float %a, float %b, float %c) { +; CHECK-SAFE: @rsqrt_fmul_safe ; CHECK-SAFE: fsqrts ; CHECK-SAFE: fmuls ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr + %x = call float @llvm.sqrt.f32(float %a) + %y = fmul float %x, %b + %z = fdiv float %c, %y + ret float %z } -define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind { - %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) - %r = fdiv <4 x float> %a, %x +define <4 x float> @hoo_fmf(<4 x float> %a, <4 x float> %b) nounwind { +; FMF: @hoo_fmf +; FMF: vrsqrtefp + %x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv fast <4 x float> %a, %x ret <4 x float> %r +} -; CHECK: @hoo -; CHECK: vrsqrtefp - -; CHECK-SAFE: @hoo +define <4 x float> @hoo_safe(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-SAFE: @hoo_safe ; CHECK-SAFE-NOT: vrsqrtefp ; CHECK-SAFE: blr + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv <4 x float> %a, %x + ret <4 x float> %r } -define double @foo2(double %a, double %b) nounwind { - %r = fdiv double %a, %b +define double @foo2_fmf(double %a, double %b) nounwind { +; FMF: @foo2_fmf +; FMF-DAG: fre +; FMF-DAG: fnmsub +; FMF: fmadd +; FMF-NEXT: fnmsub +; FMF-NEXT: fmadd +; FMF-NEXT: fmul +; FMF-NEXT: blr + %r = fdiv fast double %a, %b ret double %r +} -; CHECK: @foo2 -; CHECK-DAG: fre -; CHECK-DAG: fnmsub -; CHECK: fmadd -; CHECK-NEXT: fnmsub -; CHECK-NEXT: fmadd -; CHECK-NEXT: fmul -; CHECK-NEXT: blr - -; CHECK-SAFE: @foo2 +define double @foo2_safe(double %a, double %b) nounwind { +; CHECK-SAFE: @foo2_safe ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr + %r = fdiv double %a, %b + ret double %r } -define float @goo2(float %a, float %b) nounwind { - %r = fdiv float %a, %b +define float @goo2_fmf(float %a, float %b) nounwind { +; FMF: @goo2_fmf +; FMF-DAG: fres +; FMF-DAG: fnmsubs +; FMF: fmadds +; FMF-NEXT: fmuls +; FMF-NEXT: blr + %r = fdiv fast float %a, %b ret float %r +} -; CHECK: @goo2 -; CHECK-DAG: fres -; CHECK-DAG: fnmsubs -; CHECK: fmadds -; CHECK-NEXT: fmuls -; CHECK-NEXT: blr - -; CHECK-SAFE: @goo2 +define float @goo2_safe(float %a, float %b) nounwind { +; CHECK-SAFE: @goo2_safe ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr + %r = fdiv float %a, %b + ret float %r } -define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind { - %r = fdiv <4 x float> %a, %b +define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind { +; FMF: @hoo2_fmf +; FMF: vrefp + %r = fdiv fast <4 x float> %a, %b ret <4 x float> %r +} -; CHECK: @hoo2 -; CHECK: vrefp - -; CHECK-SAFE: @hoo2 +define <4 x float> @hoo2_safe(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-SAFE: @hoo2_safe ; CHECK-SAFE-NOT: vrefp ; CHECK-SAFE: blr + %r = fdiv <4 x float> %a, %b + ret <4 x float> %r } -define double @foo3(double %a) nounwind { - %r = call double @llvm.sqrt.f64(double %a) +define double @foo3_fmf(double %a) nounwind { +; FMF: @foo3_fmf +; FMF: fcmpu +; FMF-DAG: frsqrte +; FMF: fmul +; FMF-NEXT: fmadd +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF-NEXT: fmadd +; FMF-NEXT: fmul +; FMF-NEXT: fmul +; FMF: blr + %r = call fast double @llvm.sqrt.f64(double %a) ret double %r +} -; CHECK: @foo3 -; CHECK: fcmpu -; CHECK-DAG: frsqrte -; CHECK: fmul -; CHECK-NEXT: fmadd -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK-NEXT: fmadd -; CHECK-NEXT: fmul -; CHECK-NEXT: fmul -; CHECK: blr - -; CHECK-SAFE: @foo3 +define double @foo3_safe(double %a) nounwind { +; CHECK-SAFE: @foo3_safe ; CHECK-SAFE: fsqrt ; CHECK-SAFE: blr + %r = call double @llvm.sqrt.f64(double %a) + ret double %r } -define float @goo3(float %a) nounwind { - %r = call float @llvm.sqrt.f32(float %a) +define float @goo3_fmf(float %a) nounwind { +; FMF: @goo3_fmf +; FMF: fcmpu +; FMF-DAG: frsqrtes +; FMF: fmuls +; FMF-NEXT: fmadds +; FMF-NEXT: fmuls +; FMF-NEXT: fmuls +; FMF: blr + %r = call fast float @llvm.sqrt.f32(float %a) ret float %r +} -; CHECK: @goo3 -; CHECK: fcmpu -; CHECK-DAG: frsqrtes -; CHECK: fmuls -; CHECK-NEXT: fmadds -; CHECK-NEXT: fmuls -; CHECK-NEXT: fmuls -; CHECK: blr - -; CHECK-SAFE: @goo3 +define float @goo3_safe(float %a) nounwind { +; CHECK-SAFE: @goo3_safe ; CHECK-SAFE: fsqrts ; CHECK-SAFE: blr + %r = call float @llvm.sqrt.f32(float %a) + ret float %r } -define <4 x float> @hoo3(<4 x float> %a) nounwind { - %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) +define <4 x float> @hoo3_fmf(<4 x float> %a) nounwind { +; FMF: @hoo3_fmf +; FMF: vrsqrtefp +; FMF-DAG: vcmpeqfp + %r = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) ret <4 x float> %r +} -; CHECK: @hoo3 -; CHECK: vrsqrtefp -; CHECK-DAG: vcmpeqfp - -; CHECK-SAFE: @hoo3 +define <4 x float> @hoo3_safe(<4 x float> %a) nounwind { +; CHECK-SAFE: @hoo3_safe ; CHECK-SAFE-NOT: vrsqrtefp ; CHECK-SAFE: blr + %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r } attributes #0 = { nounwind "reciprocal-estimates"="sqrtf:0,sqrtd:0" } - Index: test/CodeGen/X86/dagcombine-unsafe-math.ll =================================================================== --- test/CodeGen/X86/dagcombine-unsafe-math.ll +++ test/CodeGen/X86/dagcombine-unsafe-math.ll @@ -1,5 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s ; rdar://13126763 @@ -62,9 +61,9 @@ %splat = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> zeroinitializer %v1 = extractelement <4 x float> %splat, i32 1 %v0 = extractelement <4 x float> %splat, i32 0 - %add1 = fadd float %v0, %v1 + %add1 = fadd contract reassoc nsz float %v0, %v1 %v2 = extractelement <4 x float> %splat, i32 2 - %add2 = fadd float %v2, %add1 + %add2 = fadd contract reassoc nsz float %v2, %add1 ret float %add2 } Index: test/CodeGen/X86/fmul-combines.ll =================================================================== --- test/CodeGen/X86/fmul-combines.ll +++ test/CodeGen/X86/fmul-combines.ll @@ -76,7 +76,7 @@ ret <4 x float> %y } -define <4 x float> @fmul0_v4f32_nsz_nnan(<4 x float> %x) #0 { +define <4 x float> @fmul0_v4f32_nsz_nnan(<4 x float> %x) { ; CHECK-LABEL: fmul0_v4f32_nsz_nnan: ; CHECK: # %bb.0: ; CHECK-NEXT: xorps %xmm0, %xmm0 @@ -85,7 +85,7 @@ ret <4 x float> %y } -define <4 x float> @fmul0_v4f32_undef(<4 x float> %x) #0 { +define <4 x float> @fmul0_v4f32_undef(<4 x float> %x) { ; CHECK-LABEL: fmul0_v4f32_undef: ; CHECK: # %bb.0: ; CHECK-NEXT: xorps %xmm0, %xmm0 @@ -94,23 +94,23 @@ ret <4 x float> %y } -define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) #0 { +define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) { ; CHECK-LABEL: fmul_c2_c4_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fmul <4 x float> %y, + %y = fmul fast <4 x float> %x, + %z = fmul fast <4 x float> %y, ret <4 x float> %z } -define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 { +define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) { ; CHECK-LABEL: fmul_c3_c4_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fmul <4 x float> %y, + %y = fmul fast <4 x float> %x, + %z = fmul fast <4 x float> %y, ret <4 x float> %z } @@ -120,24 +120,24 @@ ; CHECK: float 32 ; We should be able to pre-multiply the two constant vectors. -define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 { +define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) { ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fmul <4 x float> %y, + %y = fmul fast <4 x float> %x, + %z = fmul fast <4 x float> %y, ret <4 x float> %z } ; Same as above, but reverse operands to make sure non-canonical form is also handled. -define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) #0 { +define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) { ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> , %x - %z = fmul <4 x float> , %y + %y = fmul fast <4 x float> , %x + %z = fmul fast <4 x float> , %y ret <4 x float> %z } @@ -172,14 +172,14 @@ ; More than one use of a constant multiply should not inhibit the optimization. ; Instead of a chain of 2 dependent mults, this test will have 2 independent mults. -define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 { +define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) { ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fmul <4 x float> %y, - %a = fadd <4 x float> %y, %z + %y = fmul fast <4 x float> %x, + %z = fmul fast <4 x float> %y, + %a = fadd fast <4 x float> %y, %z ret <4 x float> %a } @@ -191,7 +191,7 @@ ; CHECK: float 24 ; CHECK: float 24 -define <4 x float> @PR22698_splats(<4 x float> %a) #0 { +define <4 x float> @PR22698_splats(<4 x float> %a) { ; CHECK-LABEL: PR22698_splats: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 @@ -209,7 +209,7 @@ ; CHECK: float 231 ; CHECK: float 384 -define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 { +define <4 x float> @PR22698_no_splats(<4 x float> %a) { ; CHECK-LABEL: PR22698_no_splats: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 @@ -220,23 +220,23 @@ ret <4 x float> %mul3 } -define float @fmul_c2_c4_f32(float %x) #0 { +define float @fmul_c2_c4_f32(float %x) { ; CHECK-LABEL: fmul_c2_c4_f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul float %x, 2.0 - %z = fmul float %y, 4.0 + %y = fmul fast float %x, 2.0 + %z = fmul fast float %y, 4.0 ret float %z } -define float @fmul_c3_c4_f32(float %x) #0 { +define float @fmul_c3_c4_f32(float %x) { ; CHECK-LABEL: fmul_c3_c4_f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul float %x, 3.0 - %z = fmul float %y, 4.0 + %y = fmul fast float %x, 3.0 + %z = fmul fast float %y, 4.0 ret float %z } @@ -261,5 +261,3 @@ %mul = fmul <4 x float> %x.neg, %y.neg ret <4 x float> %mul } - -attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } Index: test/CodeGen/X86/fp-fast.ll =================================================================== --- test/CodeGen/X86/fp-fast.ll +++ test/CodeGen/X86/fp-fast.ll @@ -1,106 +1,106 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math --enable-no-nans-fp-math < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s -define float @test1(float %a) { +define float @test1(float %a) #0 { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %r = fadd float %t1, %t1 + %t1 = fadd nnan contract reassoc nsz float %a, %a + %r = fadd nnan contract reassoc nsz float %t1, %t1 ret float %r } -define float @test2(float %a) { +define float @test2(float %a) #0 { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float 4.0, %a - %t2 = fadd float %a, %a - %r = fadd float %t1, %t2 + %t1 = fmul nnan contract reassoc nsz float 4.0, %a + %t2 = fadd nnan contract reassoc nsz float %a, %a + %r = fadd nnan contract reassoc nsz float %t1, %t2 ret float %r } -define float @test3(float %a) { +define float @test3(float %a) #0 { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float %a, 4.0 - %t2 = fadd float %a, %a - %r = fadd float %t1, %t2 + %t1 = fmul nnan contract reassoc nsz float %a, 4.0 + %t2 = fadd nnan contract reassoc nsz float %a, %a + %r = fadd nnan contract reassoc nsz float %t1, %t2 ret float %r } -define float @test4(float %a) { +define float @test4(float %a) #0 { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %t2 = fmul float 4.0, %a - %r = fadd float %t1, %t2 + %t1 = fadd nnan contract reassoc nsz float %a, %a + %t2 = fmul nnan contract reassoc nsz float 4.0, %a + %r = fadd nnan contract reassoc nsz float %t1, %t2 ret float %r } -define float @test5(float %a) { +define float @test5(float %a) #0 { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %t2 = fmul float %a, 4.0 - %r = fadd float %t1, %t2 + %t1 = fadd nnan contract reassoc nsz float %a, %a + %t2 = fmul nnan contract reassoc nsz float %a, 4.0 + %r = fadd nnan contract reassoc nsz float %t1, %t2 ret float %r } -define float @test6(float %a) { +define float @test6(float %a) #0 { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float 2.0, %a - %t2 = fadd float %a, %a - %r = fsub float %t1, %t2 + %t1 = fmul nnan contract reassoc nsz float 2.0, %a + %t2 = fadd nnan contract reassoc nsz float %a, %a + %r = fsub nnan contract reassoc nsz float %t1, %t2 ret float %r } -define float @test7(float %a) { +define float @test7(float %a) #0 { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float %a, 2.0 - %t2 = fadd float %a, %a - %r = fsub float %t1, %t2 + %t1 = fmul nnan contract reassoc nsz float %a, 2.0 + %t2 = fadd nnan contract reassoc nsz float %a, %a + %r = fsub nnan contract reassoc nsz float %t1, %t2 ret float %r } -define float @test8(float %a) { +define float @test8(float %a) #0 { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: ; CHECK-NEXT: retq - %t1 = fmul float %a, 0.0 - %t2 = fadd float %a, %t1 + %t1 = fmul nnan contract reassoc nsz float %a, 0.0 + %t2 = fadd nnan contract reassoc nsz float %a, %t1 ret float %t2 } -define float @test9(float %a) { +define float @test9(float %a) #0 { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: ; CHECK-NEXT: retq - %t1 = fmul float 0.0, %a - %t2 = fadd float %t1, %a + %t1 = fmul nnan contract reassoc nsz float 0.0, %a + %t2 = fadd nnan contract reassoc nsz float %t1, %a ret float %t2 } -define float @test10(float %a) { +define float @test10(float %a) #0 { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fsub float -0.0, %a - %t2 = fadd float %a, %t1 + %t1 = fsub nnan contract reassoc nsz float -0.0, %a + %t2 = fadd nnan contract reassoc nsz float %a, %t1 ret float %t2 } + Index: test/CodeGen/X86/fp-fold.ll =================================================================== --- test/CodeGen/X86/fp-fold.ll +++ test/CodeGen/X86/fp-fold.ll @@ -1,17 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=ANY,STRICT -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s --check-prefixes=ANY,UNSAFE -define float @fadd_zero(float %x) { -; STRICT-LABEL: fadd_zero: +define float @fadd_zero_strict(float %x) { +; STRICT-LABEL: fadd_zero_strict: ; STRICT: # %bb.0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: addss %xmm1, %xmm0 ; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_zero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq %r = fadd float %x, 0.0 ret float %r } @@ -192,34 +186,42 @@ ret <4 x float> %r } -define float @fsub_negzero(float %x) { -; STRICT-LABEL: fsub_negzero: +define float @fsub_negzero_strict(float %x) { +; STRICT-LABEL: fsub_negzero_strict: ; STRICT: # %bb.0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: addss %xmm1, %xmm0 ; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_negzero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq %r = fsub float %x, -0.0 ret float %r } -define <4 x float> @fsub_negzero_vector(<4 x float> %x) { -; STRICT-LABEL: fsub_negzero_vector: +define float @fsub_negzero_nsz(float %x) { +; ANY-LABEL: fsub_negzero_nsz: +; ANY: # %bb.0: +; ANY-NEXT: retq + %r = fsub nsz float %x, -0.0 + ret float %r +} + +define <4 x float> @fsub_negzero_strict_vector(<4 x float> %x) { +; STRICT-LABEL: fsub_negzero_strict_vector: ; STRICT: # %bb.0: ; STRICT-NEXT: xorps %xmm1, %xmm1 ; STRICT-NEXT: addps %xmm1, %xmm0 ; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_negzero_vector: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq %r = fsub <4 x float> %x, ret <4 x float> %r } +define <4 x float> @fsub_negzero_nsz_vector(<4 x float> %x) { +; ANY-LABEL: fsub_negzero_nsz_vector: +; ANY: # %bb.0: +; ANY-NEXT: retq + %r = fsub nsz <4 x float> %x, + ret <4 x float> %r +} + define float @fsub_zero_nsz_1(float %x) { ; ANY-LABEL: fsub_zero_nsz_1: ; ANY: # %bb.0: @@ -237,14 +239,6 @@ ret float %r } -define float @fsub_negzero_nsz(float %x) { -; ANY-LABEL: fsub_negzero_nsz: -; ANY: # %bb.0: -; ANY-NEXT: retq - %r = fsub nsz float %x, -0.0 - ret float %r -} - define float @fmul_zero(float %x) { ; ANY-LABEL: fmul_zero: ; ANY: # %bb.0: