Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -839,7 +839,7 @@ }); } case ISD::FADD: - if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) + if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) return 0; // After operation legalization, it might not be legal to create new FSUBs. @@ -912,7 +912,7 @@ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); } case ISD::FADD: - assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); + assert(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -12017,7 +12017,7 @@ // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) - if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) + if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -12075,7 +12075,7 @@ // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -12194,7 +12194,7 @@ // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.UnsafeFPMath || + if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { return N0; } @@ -12221,7 +12221,7 @@ } } - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4630,7 +4630,7 @@ return getUNDEF(VT); // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Flags); Index: llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll +++ llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll @@ -146,28 +146,23 @@ ret float %a3 } -; DAGCombiner transforms this into: (x + 59.0) + (x + 17.0). -; The machine combiner transforms this into a chain of 3 dependent adds: -; ((x + 59.0) + 17.0) + x - -define float @fadd_const_multiuse_attr(float %x) #0 { +; DAGCombiner transforms this into: (x + 17.0) + (x + 59.0). +define float @fadd_const_multiuse_attr(float %x) { ; CHECK-LABEL: fadd_const_multiuse_attr: ; CHECK: // %bb.0: -; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144 ; CHECK-DAG: mov [[W17:w[0-9]+]], #1109917696 -; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]] +; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144 ; CHECK-NEXT: fmov [[FP17:s[0-9]+]], [[W17]] -; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP59]] -; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], [[FP17]], [[TMP1]] -; CHECK-NEXT: fadd s0, s0, [[TMP2]] +; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]] +; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP17]] +; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]] +; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret - %a1 = fadd float %x, 42.0 - %a2 = fadd float %a1, 17.0 - %a3 = fadd float %a1, %a2 + %a1 = fadd fast float %x, 42.0 + %a2 = fadd fast float %a1, 17.0 + %a3 = fadd fast float %a1, %a2 ret float %a3 } -attributes #0 = { "unsafe-fp-math"="true" } - declare void @use(double) Index: llvm/trunk/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll +++ llvm/trunk/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll @@ -1,17 +1,29 @@ -; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s -; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s -; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s +; RUN: llc -march=amdgcn < %s | FileCheck --check-prefixes=GCN,GCN-FMF,GCN-SAFE %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; Test that the -enable-no-signed-zeros-fp-math flag works -; GCN-LABEL: {{^}}fneg_fsub_f32: +; GCN-LABEL: {{^}}fneg_fsub_f32_fmf: ; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} -; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] +; GCN-FMF-NOT: xor +define amdgpu_kernel void @fneg_fsub_f32_fmf(float addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %add = add i32 %tid, 1 + %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 %add + %a = load float, float addrspace(1)* %gep, align 4 + %b = load float, float addrspace(1)* %b_ptr, align 4 + %result = fsub fast float %a, %b + %neg.result = fsub fast float -0.0, %result + store float %neg.result, float addrspace(1)* %out, align 4 + ret void +} -; GCN-UNSAFE-NOT: xor -define amdgpu_kernel void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +; GCN-LABEL: {{^}}fneg_fsub_f32_safe: +; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} +; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] +define amdgpu_kernel void @fneg_fsub_f32_safe(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %tid, 1 %gep = getelementptr float, float addrspace(1)* %in, i32 %tid Index: llvm/trunk/test/CodeGen/AMDGPU/ffloor.f64.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/ffloor.f64.ll +++ llvm/trunk/test/CodeGen/AMDGPU/ffloor.f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.fabs.f64(double %Val) declare double @llvm.floor.f64(double) nounwind readnone @@ -20,7 +20,7 @@ ; SI: v_add_f64 ; SI: s_endpgm define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) { - %y = call double @llvm.floor.f64(double %x) nounwind readnone + %y = call fast double @llvm.floor.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out ret void } @@ -35,8 +35,8 @@ ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]] ; SI: s_endpgm define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x) { - %neg = fsub double 0.0, %x - %y = call double @llvm.floor.f64(double %neg) nounwind readnone + %neg = fsub nsz double 0.0, %x + %y = call fast double @llvm.floor.f64(double %neg) nounwind readnone store double %y, double addrspace(1)* %out ret void } @@ -51,9 +51,9 @@ ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]| ; SI: s_endpgm define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) { - %abs = call double @llvm.fabs.f64(double %x) - %neg = fsub double 0.0, %abs - %y = call double @llvm.floor.f64(double %neg) nounwind readnone + %abs = call fast double @llvm.fabs.f64(double %x) + %neg = fsub nsz double 0.0, %abs + %y = call fast double @llvm.floor.f64(double %neg) nounwind readnone store double %y, double addrspace(1)* %out ret void } @@ -62,7 +62,7 @@ ; CI: v_floor_f64_e32 ; CI: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { - %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone + %y = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone store <2 x double> %y, <2 x double> addrspace(1)* %out ret void } @@ -73,7 +73,7 @@ ; CI: v_floor_f64_e32 ; CI-NOT: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { - %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone + %y = call fast <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone store <3 x double> %y, <3 x double> addrspace(1)* %out ret void } @@ -84,7 +84,7 @@ ; CI: v_floor_f64_e32 ; CI: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { - %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone + %y = call fast <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone store <4 x double> %y, <4 x double> addrspace(1)* %out ret void } @@ -99,7 +99,7 @@ ; CI: v_floor_f64_e32 ; CI: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { - %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone + %y = call fast <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone store <8 x double> %y, <8 x double> addrspace(1)* %out ret void } @@ -122,7 +122,7 @@ ; CI: v_floor_f64_e32 ; CI: v_floor_f64_e32 define amdgpu_kernel void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { - %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone + %y = call fast <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone store <16 x double> %y, <16 x double> addrspace(1)* %out ret void } Index: llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll +++ llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll @@ -219,8 +219,11 @@ ; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]], ; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]] ; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]] -; GCN-NSZ-DAG: v_mac_f32_e32 [[C:v[0-9]+]], -; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[C]] +; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]], +; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]], +; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]], +; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]], +; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]] define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 { .entry: Index: llvm/trunk/test/CodeGen/PowerPC/fma-mutate.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/fma-mutate.ll +++ llvm/trunk/test/CodeGen/PowerPC/fma-mutate.ll @@ -3,19 +3,26 @@ ; same as the FMA target register. The second one is legal. The third ; one doesn't fit the feeding-copy pattern. -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=+vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" declare double @llvm.sqrt.f64(double) -define double @foo3(double %a) nounwind { - %r = call double @llvm.sqrt.f64(double %a) - ret double %r - -; CHECK: @foo3 +define double @foo3_fmf(double %a) nounwind { +; CHECK: @foo3_fmf ; CHECK-NOT: fmr ; CHECK: xsmaddmdp ; CHECK: xsmaddadp + %r = call fast double @llvm.sqrt.f64(double %a) + ret double %r +} + +define double @foo3_safe(double %a) nounwind { +; CHECK: @foo3_safe +; CHECK-NOT: fmr +; CHECK: xssqrtdp + %r = call double @llvm.sqrt.f64(double %a) + ret double %r } Index: llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll +++ llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG -; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBAL +; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL ; Test FP transforms using instruction/node-level fast-math-flags. ; We're also checking debug output to verify that FMF is propagated to the newly created nodes. Index: llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll +++ llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll @@ -1,45 +1,41 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) -define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind { -entry: - %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) - %r = fdiv <4 x double> %a, %x - ret <4 x double> %r - -; CHECK-LABEL: @foo +define <4 x double> @foo_fmf(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: @foo_fmf ; CHECK: qvfrsqrte ; CHECK-DAG: qvfmul -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadd instead of a qvfnmsub -; CHECK-DAG: qvfmadd -; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmsub +; CHECK-DAG: qvfnmsub ; CHECK: qvfmul ; CHECK: qvfmul -; CHECK: qvfmadd +; CHECK: qvfnmsub ; CHECK: qvfmul ; CHECK: qvfmul ; CHECK: blr - -; CHECK-SAFE-LABEL: @foo -; CHECK-SAFE: fsqrt -; CHECK-SAFE: fdiv -; CHECK-SAFE: blr +entry: + %x = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %r = fdiv fast <4 x double> %a, %x + ret <4 x double> %r } -define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind { +define <4 x double> @foo_safe(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: @foo_safe +; CHECK: fsqrt +; CHECK: fdiv +; CHECK: blr entry: - %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) - %y = fpext <4 x float> %x to <4 x double> - %r = fdiv <4 x double> %a, %y + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %r = fdiv <4 x double> %a, %x ret <4 x double> %r +} -; CHECK-LABEL: @foof +define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: @foof_fmf ; CHECK: qvfrsqrtes ; CHECK-DAG: qvfmuls ; FIXME: We're currently loading two constants here (1.5 and -1.5), and using @@ -49,48 +45,59 @@ ; CHECK: qvfmuls ; CHECK: qvfmul ; CHECK: blr - -; CHECK-SAFE-LABEL: @foof -; CHECK-SAFE: fsqrts -; CHECK-SAFE: fdiv -; CHECK-SAFE: blr +entry: + %x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %y = fpext <4 x float> %x to <4 x double> + %r = fdiv fast <4 x double> %a, %y + ret <4 x double> %r } -define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind { +define <4 x double> @foof_safe(<4 x double> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: @foof_safe +; CHECK: fsqrts +; CHECK: fdiv +; CHECK: blr entry: - %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) - %y = fptrunc <4 x double> %x to <4 x float> - %r = fdiv <4 x float> %a, %y - ret <4 x float> %r + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %y = fpext <4 x float> %x to <4 x double> + %r = fdiv <4 x double> %a, %y + ret <4 x double> %r +} -; CHECK-LABEL: @food +define <4 x float> @food_fmf(<4 x float> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: @food_fmf ; CHECK: qvfrsqrte ; CHECK-DAG: qvfmul -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadd instead of a qvfnmsub -; CHECK-DAG: qvfmadd -; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmsub +; CHECK-DAG: qvfnmsub ; CHECK: qvfmul ; CHECK: qvfmul -; CHECK: qvfmadd +; CHECK: qvfnmsub ; CHECK: qvfmul ; CHECK: qvfrsp ; CHECK: qvfmuls ; CHECK: blr - -; CHECK-SAFE-LABEL: @food -; CHECK-SAFE: fsqrt -; CHECK-SAFE: fdivs -; CHECK-SAFE: blr +entry: + %x = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %y = fptrunc <4 x double> %x to <4 x float> + %r = fdiv fast <4 x float> %a, %y + ret <4 x float> %r } -define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind { +define <4 x float> @food_safe(<4 x float> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: @food_safe +; CHECK: fsqrt +; CHECK: fdivs +; CHECK: blr entry: - %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) - %r = fdiv <4 x float> %a, %x + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %y = fptrunc <4 x double> %x to <4 x float> + %r = fdiv <4 x float> %a, %y ret <4 x float> %r +} -; CHECK-LABEL: @goo +define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: @goo_fmf ; CHECK: qvfrsqrtes ; CHECK-DAG: qvfmuls ; FIXME: We're currently loading two constants here (1.5 and -1.5), and using @@ -100,19 +107,25 @@ ; CHECK: qvfmuls ; CHECK: qvfmuls ; CHECK: blr - -; CHECK-SAFE-LABEL: @goo -; CHECK-SAFE: fsqrts -; CHECK-SAFE: fdivs -; CHECK-SAFE: blr +entry: + %x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv fast <4 x float> %a, %x + ret <4 x float> %r } -define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind { +define <4 x float> @goo_safe(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: @goo_safe +; CHECK: fsqrts +; CHECK: fdivs +; CHECK: blr entry: - %r = fdiv <4 x double> %a, %b - ret <4 x double> %r + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv <4 x float> %a, %x + ret <4 x float> %r +} -; CHECK-LABEL: @foo2 +define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: @foo2_fmf ; CHECK: qvfre ; CHECK: qvfnmsub ; CHECK: qvfmadd @@ -120,61 +133,70 @@ ; CHECK: qvfmadd ; CHECK: qvfmul ; CHECK: blr - -; CHECK-SAFE-LABEL: @foo2 -; CHECK-SAFE: fdiv -; CHECK-SAFE: blr +entry: + %r = fdiv fast <4 x double> %a, %b + ret <4 x double> %r } -define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind { -entry: - %r = fdiv <4 x float> %a, %b - ret <4 x float> %r +define <4 x double> @foo2_safe(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: @foo2_safe +; CHECK: fdiv +; CHECK: blr + %r = fdiv <4 x double> %a, %b + ret <4 x double> %r +} -; CHECK-LABEL: @goo2 +define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: @goo2_fmf ; CHECK: qvfres ; CHECK: qvfnmsubs ; CHECK: qvfmadds ; CHECK: qvfmuls ; CHECK: blr - -; CHECK-SAFE-LABEL: @goo2 -; CHECK-SAFE: fdivs -; CHECK-SAFE: blr +entry: + %r = fdiv fast <4 x float> %a, %b + ret <4 x float> %r } -define <4 x double> @foo3(<4 x double> %a) nounwind { +define <4 x float> @goo2_safe(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: @goo2_safe +; CHECK: fdivs +; CHECK: blr entry: - %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) - ret <4 x double> %r + %r = fdiv <4 x float> %a, %b + ret <4 x float> %r +} -; CHECK-LABEL: @foo3 +define <4 x double> @foo3_fmf(<4 x double> %a) nounwind { +; CHECK-LABEL: @foo3_fmf ; CHECK: qvfrsqrte ; CHECK: qvfmul -; FIXME: We're currently loading two constants here (1.5 and -1.5), and using -; an qvfmadd instead of a qvfnmsub -; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmsub ; CHECK-DAG: qvfcmpeq -; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfnmsub ; CHECK-DAG: qvfmul ; CHECK-DAG: qvfmul -; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfnmsub ; CHECK-DAG: qvfmul ; CHECK-DAG: qvfmul ; CHECK: qvfsel ; CHECK: blr - -; CHECK-SAFE-LABEL: @foo3 -; CHECK-SAFE: fsqrt -; CHECK-SAFE: blr +entry: + %r = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) + ret <4 x double> %r } -define <4 x float> @goo3(<4 x float> %a) nounwind { +define <4 x double> @foo3_safe(<4 x double> %a) nounwind { +; CHECK-LABEL: @foo3_safe +; CHECK: fsqrt +; CHECK: blr entry: - %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) - ret <4 x float> %r + %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) + ret <4 x double> %r +} -; CHECK-LABEL: @goo3 +define <4 x float> @goo3_fmf(<4 x float> %a) nounwind { +; CHECK-LABEL: @goo3_fmf ; CHECK: qvfrsqrtes ; CHECK: qvfmuls ; FIXME: We're currently loading two constants here (1.5 and -1.5), and using @@ -186,9 +208,17 @@ ; CHECK-DAG: qvfmuls ; CHECK: qvfsel ; CHECK: blr +entry: + %r = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r +} -; CHECK-SAFE-LABEL: @goo3 -; CHECK-SAFE: fsqrts -; CHECK-SAFE: blr +define <4 x float> @goo3_safe(<4 x float> %a) nounwind { +; CHECK-LABEL: @goo3_safe +; CHECK: fsqrts +; CHECK: blr +entry: + %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r } Index: llvm/trunk/test/CodeGen/PowerPC/recipest.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/recipest.ll +++ llvm/trunk/test/CodeGen/PowerPC/recipest.ll @@ -1,5 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -8,12 +7,8 @@ declare float @llvm.sqrt.f32(float) declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) -define double @foo(double %a, double %b) nounwind { - %x = call double @llvm.sqrt.f64(double %b) - %r = fdiv double %a, %x - ret double %r - -; CHECK: @foo +define double @foo_fmf(double %a, double %b) nounwind { +; CHECK: @foo_fmf ; CHECK: frsqrte ; CHECK: fmul ; CHECK-NEXT: fmadd @@ -25,34 +20,35 @@ ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul ; CHECK: blr - -; CHECK-SAFE: @foo -; CHECK-SAFE: fsqrt -; CHECK-SAFE: fdiv -; CHECK-SAFE: blr + %x = call fast double @llvm.sqrt.f64(double %b) + %r = fdiv fast double %a, %x + ret double %r } -define double @no_estimate_refinement_f64(double %a, double %b) #0 { +define double @foo_safe(double %a, double %b) nounwind { +; CHECK: @foo_safe +; CHECK: fsqrt +; CHECK: fdiv +; CHECK: blr %x = call double @llvm.sqrt.f64(double %b) %r = fdiv double %a, %x ret double %r +} +define double @no_estimate_refinement_f64(double %a, double %b) #0 { ; CHECK-LABEL: @no_estimate_refinement_f64 ; CHECK: frsqrte ; CHECK-NOT: fmadd ; CHECK: fmul ; CHECK-NOT: fmadd ; CHECK: blr -} - - -define double @foof(double %a, float %b) nounwind { - %x = call float @llvm.sqrt.f32(float %b) - %y = fpext float %x to double - %r = fdiv double %a, %y + %x = call fast double @llvm.sqrt.f64(double %b) + %r = fdiv fast double %a, %x ret double %r +} -; CHECK: @foof +define double @foof_fmf(double %a, float %b) nounwind { +; CHECK: @foof_fmf ; CHECK-DAG: frsqrtes ; CHECK: fmuls ; CHECK-NEXT: fmadds @@ -60,20 +56,25 @@ ; CHECK-NEXT: fmuls ; CHECK-NEXT: fmul ; CHECK-NEXT: blr - -; CHECK-SAFE: @foof -; CHECK-SAFE: fsqrts -; CHECK-SAFE: fdiv -; CHECK-SAFE: blr + %x = call fast float @llvm.sqrt.f32(float %b) + %y = fpext float %x to double + %r = fdiv fast double %a, %y + ret double %r } -define float @food(float %a, double %b) nounwind { - %x = call double @llvm.sqrt.f64(double %b) - %y = fptrunc double %x to float - %r = fdiv float %a, %y - ret float %r +define double @foof_safe(double %a, float %b) nounwind { +; CHECK: @foof_safe +; CHECK: fsqrts +; CHECK: fdiv +; CHECK: blr + %x = call float @llvm.sqrt.f32(float %b) + %y = fpext float %x to double + %r = fdiv double %a, %y + ret double %r +} -; CHECK: @foo +define float @food_fmf(float %a, double %b) nounwind { +; CHECK: @food_fmf ; CHECK-DAG: frsqrte ; CHECK: fmul ; CHECK-NEXT: fmadd @@ -86,19 +87,25 @@ ; CHECK-NEXT: frsp ; CHECK-NEXT: fmuls ; CHECK-NEXT: blr - -; CHECK-SAFE: @foo -; CHECK-SAFE: fsqrt -; CHECK-SAFE: fdivs -; CHECK-SAFE: blr + %x = call fast double @llvm.sqrt.f64(double %b) + %y = fptrunc double %x to float + %r = fdiv fast float %a, %y + ret float %r } -define float @goo(float %a, float %b) nounwind { - %x = call float @llvm.sqrt.f32(float %b) - %r = fdiv float %a, %x +define float @food_safe(float %a, double %b) nounwind { +; CHECK: @food_safe +; CHECK: fsqrt +; CHECK: fdivs +; CHECK: blr + %x = call double @llvm.sqrt.f64(double %b) + %y = fptrunc double %x to float + %r = fdiv float %a, %y ret float %r +} -; CHECK: @goo +define float @goo_fmf(float %a, float %b) nounwind { +; CHECK: @goo_fmf ; CHECK-DAG: frsqrtes ; CHECK: fmuls ; CHECK-NEXT: fmadds @@ -106,36 +113,37 @@ ; CHECK-NEXT: fmuls ; CHECK-NEXT: fmuls ; CHECK-NEXT: blr - -; CHECK-SAFE: @goo -; CHECK-SAFE: fsqrts -; CHECK-SAFE: fdivs -; CHECK-SAFE: blr + %x = call fast float @llvm.sqrt.f32(float %b) + %r = fdiv fast float %a, %x + ret float %r } - -define float @no_estimate_refinement_f32(float %a, float %b) #0 { +define float @goo_safe(float %a, float %b) nounwind { +; CHECK: @goo_safe +; CHECK: fsqrts +; CHECK: fdivs +; CHECK: blr %x = call float @llvm.sqrt.f32(float %b) %r = fdiv float %a, %x ret float %r +} +define float @no_estimate_refinement_f32(float %a, float %b) #0 { ; CHECK-LABEL: @no_estimate_refinement_f32 ; CHECK: frsqrtes ; CHECK-NOT: fmadds ; CHECK: fmuls ; CHECK-NOT: fmadds ; CHECK: blr + %x = call fast float @llvm.sqrt.f32(float %b) + %r = fdiv fast float %a, %x + ret float %r } ; Recognize that this is rsqrt(a) * rcp(b) * c, ; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. -define float @rsqrt_fmul(float %a, float %b, float %c) { - %x = call float @llvm.sqrt.f32(float %a) - %y = fmul float %x, %b - %z = fdiv float %c, %y - ret float %z - -; CHECK: @rsqrt_fmul +define float @rsqrt_fmul_fmf(float %a, float %b, float %c) { +; CHECK: @rsqrt_fmul_fmf ; CHECK-DAG: frsqrtes ; CHECK-DAG: fres ; CHECK-DAG: fnmsubs @@ -146,32 +154,45 @@ ; CHECK-NEXT: fmuls ; CHECK-NEXT: fmuls ; CHECK-NEXT: blr + %x = call fast float @llvm.sqrt.f32(float %a) + %y = fmul fast float %x, %b + %z = fdiv fast float %c, %y + ret float %z +} + +; Recognize that this is rsqrt(a) * rcp(b) * c, +; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. +define float @rsqrt_fmul_safe(float %a, float %b, float %c) { +; CHECK: @rsqrt_fmul_safe +; CHECK: fsqrts +; CHECK: fmuls +; CHECK: fdivs +; CHECK: blr + %x = call float @llvm.sqrt.f32(float %a) + %y = fmul float %x, %b + %z = fdiv float %c, %y + ret float %z +} -; CHECK-SAFE: @rsqrt_fmul -; CHECK-SAFE: fsqrts -; CHECK-SAFE: fmuls -; CHECK-SAFE: fdivs -; CHECK-SAFE: blr +define <4 x float> @hoo_fmf(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK: @hoo_fmf +; CHECK: vrsqrtefp + %x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv fast <4 x float> %a, %x + ret <4 x float> %r } -define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind { +define <4 x float> @hoo_safe(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK: @hoo_safe +; CHECK-NOT: vrsqrtefp +; CHECK: blr %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) %r = fdiv <4 x float> %a, %x ret <4 x float> %r - -; CHECK: @hoo -; CHECK: vrsqrtefp - -; CHECK-SAFE: @hoo -; CHECK-SAFE-NOT: vrsqrtefp -; CHECK-SAFE: blr } -define double @foo2(double %a, double %b) nounwind { - %r = fdiv double %a, %b - ret double %r - -; CHECK: @foo2 +define double @foo2_fmf(double %a, double %b) nounwind { +; CHECK: @foo2_fmf ; CHECK-DAG: fre ; CHECK-DAG: fnmsub ; CHECK: fmadd @@ -179,45 +200,54 @@ ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: blr - -; CHECK-SAFE: @foo2 -; CHECK-SAFE: fdiv -; CHECK-SAFE: blr + %r = fdiv fast double %a, %b + ret double %r } -define float @goo2(float %a, float %b) nounwind { - %r = fdiv float %a, %b - ret float %r +define double @foo2_safe(double %a, double %b) nounwind { +; CHECK: @foo2_safe +; CHECK: fdiv +; CHECK: blr + %r = fdiv double %a, %b + ret double %r +} -; CHECK: @goo2 +define float @goo2_fmf(float %a, float %b) nounwind { +; CHECK: @goo2_fmf ; CHECK-DAG: fres ; CHECK-DAG: fnmsubs ; CHECK: fmadds ; CHECK-NEXT: fmuls ; CHECK-NEXT: blr - -; CHECK-SAFE: @goo2 -; CHECK-SAFE: fdivs -; CHECK-SAFE: blr + %r = fdiv fast float %a, %b + ret float %r } -define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind { - %r = fdiv <4 x float> %a, %b - ret <4 x float> %r +define float @goo2_safe(float %a, float %b) nounwind { +; CHECK: @goo2_safe +; CHECK: fdivs +; CHECK: blr + %r = fdiv float %a, %b + ret float %r +} -; CHECK: @hoo2 +define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK: @hoo2_fmf ; CHECK: vrefp - -; CHECK-SAFE: @hoo2 -; CHECK-SAFE-NOT: vrefp -; CHECK-SAFE: blr + %r = fdiv fast <4 x float> %a, %b + ret <4 x float> %r } -define double @foo3(double %a) nounwind { - %r = call double @llvm.sqrt.f64(double %a) - ret double %r +define <4 x float> @hoo2_safe(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK: @hoo2_safe +; CHECK-NOT: vrefp +; CHECK: blr + %r = fdiv <4 x float> %a, %b + ret <4 x float> %r +} -; CHECK: @foo3 +define double @foo3_fmf(double %a) nounwind { +; CHECK: @foo3_fmf ; CHECK: fcmpu ; CHECK-DAG: frsqrte ; CHECK: fmul @@ -229,17 +259,20 @@ ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul ; CHECK: blr - -; CHECK-SAFE: @foo3 -; CHECK-SAFE: fsqrt -; CHECK-SAFE: blr + %r = call fast double @llvm.sqrt.f64(double %a) + ret double %r } -define float @goo3(float %a) nounwind { - %r = call float @llvm.sqrt.f32(float %a) - ret float %r +define double @foo3_safe(double %a) nounwind { +; CHECK: @foo3_safe +; CHECK: fsqrt +; CHECK: blr + %r = call double @llvm.sqrt.f64(double %a) + ret double %r +} -; CHECK: @goo3 +define float @goo3_fmf(float %a) nounwind { +; CHECK: @goo3_fmf ; CHECK: fcmpu ; CHECK-DAG: frsqrtes ; CHECK: fmuls @@ -247,24 +280,32 @@ ; CHECK-NEXT: fmuls ; CHECK-NEXT: fmuls ; CHECK: blr - -; CHECK-SAFE: @goo3 -; CHECK-SAFE: fsqrts -; CHECK-SAFE: blr + %r = call fast float @llvm.sqrt.f32(float %a) + ret float %r } -define <4 x float> @hoo3(<4 x float> %a) nounwind { - %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) - ret <4 x float> %r +define float @goo3_safe(float %a) nounwind { +; CHECK: @goo3_safe +; CHECK: fsqrts +; CHECK: blr + %r = call float @llvm.sqrt.f32(float %a) + ret float %r +} -; CHECK: @hoo3 +define <4 x float> @hoo3_fmf(<4 x float> %a) nounwind { +; CHECK: @hoo3_fmf ; CHECK: vrsqrtefp ; CHECK-DAG: vcmpeqfp + %r = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r +} -; CHECK-SAFE: @hoo3 -; CHECK-SAFE-NOT: vrsqrtefp -; CHECK-SAFE: blr +define <4 x float> @hoo3_safe(<4 x float> %a) nounwind { +; CHECK: @hoo3_safe +; CHECK-NOT: vrsqrtefp +; CHECK: blr + %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r } attributes #0 = { nounwind "reciprocal-estimates"="sqrtf:0,sqrtd:0" } - Index: llvm/trunk/test/CodeGen/X86/dagcombine-unsafe-math.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/dagcombine-unsafe-math.ll +++ llvm/trunk/test/CodeGen/X86/dagcombine-unsafe-math.ll @@ -1,5 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s ; rdar://13126763 @@ -62,9 +61,9 @@ %splat = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> zeroinitializer %v1 = extractelement <4 x float> %splat, i32 1 %v0 = extractelement <4 x float> %splat, i32 0 - %add1 = fadd float %v0, %v1 + %add1 = fadd reassoc nsz float %v0, %v1 %v2 = extractelement <4 x float> %splat, i32 2 - %add2 = fadd float %v2, %add1 + %add2 = fadd reassoc nsz float %v2, %add1 ret float %add2 } Index: llvm/trunk/test/CodeGen/X86/fmul-combines.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fmul-combines.ll +++ llvm/trunk/test/CodeGen/X86/fmul-combines.ll @@ -76,7 +76,7 @@ ret <4 x float> %y } -define <4 x float> @fmul0_v4f32_nsz_nnan(<4 x float> %x) #0 { +define <4 x float> @fmul0_v4f32_nsz_nnan(<4 x float> %x) { ; CHECK-LABEL: fmul0_v4f32_nsz_nnan: ; CHECK: # %bb.0: ; CHECK-NEXT: xorps %xmm0, %xmm0 @@ -85,7 +85,7 @@ ret <4 x float> %y } -define <4 x float> @fmul0_v4f32_undef(<4 x float> %x) #0 { +define <4 x float> @fmul0_v4f32_undef(<4 x float> %x) { ; CHECK-LABEL: fmul0_v4f32_undef: ; CHECK: # %bb.0: ; CHECK-NEXT: xorps %xmm0, %xmm0 @@ -94,23 +94,23 @@ ret <4 x float> %y } -define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) #0 { +define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) { ; CHECK-LABEL: fmul_c2_c4_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fmul <4 x float> %y, + %y = fmul fast <4 x float> %x, + %z = fmul fast <4 x float> %y, ret <4 x float> %z } -define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 { +define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) { ; CHECK-LABEL: fmul_c3_c4_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fmul <4 x float> %y, + %y = fmul fast <4 x float> %x, + %z = fmul fast <4 x float> %y, ret <4 x float> %z } @@ -120,24 +120,24 @@ ; CHECK: float 32 ; We should be able to pre-multiply the two constant vectors. -define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 { +define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) { ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fmul <4 x float> %y, + %y = fmul fast <4 x float> %x, + %z = fmul fast <4 x float> %y, ret <4 x float> %z } ; Same as above, but reverse operands to make sure non-canonical form is also handled. -define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) #0 { +define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) { ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> , %x - %z = fmul <4 x float> , %y + %y = fmul fast <4 x float> , %x + %z = fmul fast <4 x float> , %y ret <4 x float> %z } @@ -172,14 +172,14 @@ ; More than one use of a constant multiply should not inhibit the optimization. ; Instead of a chain of 2 dependent mults, this test will have 2 independent mults. -define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 { +define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) { ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fmul <4 x float> %y, - %a = fadd <4 x float> %y, %z + %y = fmul fast <4 x float> %x, + %z = fmul fast <4 x float> %y, + %a = fadd fast <4 x float> %y, %z ret <4 x float> %a } @@ -191,7 +191,7 @@ ; CHECK: float 24 ; CHECK: float 24 -define <4 x float> @PR22698_splats(<4 x float> %a) #0 { +define <4 x float> @PR22698_splats(<4 x float> %a) { ; CHECK-LABEL: PR22698_splats: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 @@ -209,7 +209,7 @@ ; CHECK: float 231 ; CHECK: float 384 -define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 { +define <4 x float> @PR22698_no_splats(<4 x float> %a) { ; CHECK-LABEL: PR22698_no_splats: ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 @@ -220,23 +220,23 @@ ret <4 x float> %mul3 } -define float @fmul_c2_c4_f32(float %x) #0 { +define float @fmul_c2_c4_f32(float %x) { ; CHECK-LABEL: fmul_c2_c4_f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul float %x, 2.0 - %z = fmul float %y, 4.0 + %y = fmul fast float %x, 2.0 + %z = fmul fast float %y, 4.0 ret float %z } -define float @fmul_c3_c4_f32(float %x) #0 { +define float @fmul_c3_c4_f32(float %x) { ; CHECK-LABEL: fmul_c3_c4_f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul float %x, 3.0 - %z = fmul float %y, 4.0 + %y = fmul fast float %x, 3.0 + %z = fmul fast float %y, 4.0 ret float %z } @@ -261,5 +261,3 @@ %mul = fmul <4 x float> %x.neg, %y.neg ret <4 x float> %mul } - -attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } Index: llvm/trunk/test/CodeGen/X86/fp-fast.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fp-fast.ll +++ llvm/trunk/test/CodeGen/X86/fp-fast.ll @@ -1,106 +1,106 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math --enable-no-nans-fp-math < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s -define float @test1(float %a) { +define float @test1(float %a) #0 { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %r = fadd float %t1, %t1 + %t1 = fadd nnan reassoc nsz float %a, %a + %r = fadd nnan reassoc nsz float %t1, %t1 ret float %r } -define float @test2(float %a) { +define float @test2(float %a) #0 { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float 4.0, %a - %t2 = fadd float %a, %a - %r = fadd float %t1, %t2 + %t1 = fmul nnan reassoc nsz float 4.0, %a + %t2 = fadd nnan reassoc nsz float %a, %a + %r = fadd nnan reassoc nsz float %t1, %t2 ret float %r } -define float @test3(float %a) { +define float @test3(float %a) #0 { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float %a, 4.0 - %t2 = fadd float %a, %a - %r = fadd float %t1, %t2 + %t1 = fmul nnan reassoc nsz float %a, 4.0 + %t2 = fadd nnan reassoc nsz float %a, %a + %r = fadd nnan reassoc nsz float %t1, %t2 ret float %r } -define float @test4(float %a) { +define float @test4(float %a) #0 { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %t2 = fmul float 4.0, %a - %r = fadd float %t1, %t2 + %t1 = fadd nnan reassoc nsz float %a, %a + %t2 = fmul nnan reassoc nsz float 4.0, %a + %r = fadd nnan reassoc nsz float %t1, %t2 ret float %r } -define float @test5(float %a) { +define float @test5(float %a) #0 { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %t2 = fmul float %a, 4.0 - %r = fadd float %t1, %t2 + %t1 = fadd nnan reassoc nsz float %a, %a + %t2 = fmul nnan reassoc nsz float %a, 4.0 + %r = fadd nnan reassoc nsz float %t1, %t2 ret float %r } -define float @test6(float %a) { +define float @test6(float %a) #0 { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float 2.0, %a - %t2 = fadd float %a, %a - %r = fsub float %t1, %t2 + %t1 = fmul nnan reassoc nsz float 2.0, %a + %t2 = fadd nnan reassoc nsz float %a, %a + %r = fsub nnan reassoc nsz float %t1, %t2 ret float %r } -define float @test7(float %a) { +define float @test7(float %a) #0 { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float %a, 2.0 - %t2 = fadd float %a, %a - %r = fsub float %t1, %t2 + %t1 = fmul nnan reassoc nsz float %a, 2.0 + %t2 = fadd nnan reassoc nsz float %a, %a + %r = fsub nnan reassoc nsz float %t1, %t2 ret float %r } -define float @test8(float %a) { +define float @test8(float %a) #0 { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: ; CHECK-NEXT: retq - %t1 = fmul float %a, 0.0 - %t2 = fadd float %a, %t1 + %t1 = fmul nsz float %a, 0.0 + %t2 = fadd nnan reassoc nsz float %a, %t1 ret float %t2 } -define float @test9(float %a) { +define float @test9(float %a) #0 { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: ; CHECK-NEXT: retq - %t1 = fmul float 0.0, %a - %t2 = fadd float %t1, %a + %t1 = fmul nsz float 0.0, %a + %t2 = fadd nnan reassoc nsz float %t1, %a ret float %t2 } -define float @test10(float %a) { +define float @test10(float %a) #0 { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fsub float -0.0, %a - %t2 = fadd float %a, %t1 + %t1 = fsub nsz float -0.0, %a + %t2 = fadd nnan reassoc nsz float %a, %t1 ret float %t2 } + Index: llvm/trunk/test/CodeGen/X86/fp-fold.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fp-fold.ll +++ llvm/trunk/test/CodeGen/X86/fp-fold.ll @@ -1,98 +1,92 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=ANY,STRICT -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s --check-prefixes=ANY,UNSAFE - -define float @fadd_zero(float %x) { -; STRICT-LABEL: fadd_zero: -; STRICT: # %bb.0: -; STRICT-NEXT: xorps %xmm1, %xmm1 -; STRICT-NEXT: addss %xmm1, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_zero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +define float @fadd_zero_strict(float %x) { +; CHECK-LABEL: fadd_zero_strict: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: retq %r = fadd float %x, 0.0 ret float %r } define float @fadd_negzero(float %x) { -; ANY-LABEL: fadd_negzero: -; ANY: # %bb.0: -; ANY-NEXT: retq +; CHECK-LABEL: fadd_negzero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = fadd float %x, -0.0 ret float %r } define float @fadd_produce_zero(float %x) { -; ANY-LABEL: fadd_produce_zero: -; ANY: # %bb.0: -; ANY-NEXT: xorps %xmm0, %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fadd_produce_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq %neg = fsub nsz float 0.0, %x %r = fadd nnan float %neg, %x ret float %r } define float @fadd_reassociate(float %x) { -; ANY-LABEL: fadd_reassociate: -; ANY: # %bb.0: -; ANY-NEXT: addss {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fadd_reassociate: +; CHECK: # %bb.0: +; CHECK-NEXT: addss {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %sum = fadd float %x, 8.0 %r = fadd reassoc nsz float %sum, 12.0 ret float %r } define float @fadd_negzero_nsz(float %x) { -; ANY-LABEL: fadd_negzero_nsz: -; ANY: # %bb.0: -; ANY-NEXT: retq +; CHECK-LABEL: fadd_negzero_nsz: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = fadd nsz float %x, -0.0 ret float %r } define float @fadd_zero_nsz(float %x) { -; ANY-LABEL: fadd_zero_nsz: -; ANY: # %bb.0: -; ANY-NEXT: retq +; CHECK-LABEL: fadd_zero_nsz: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = fadd nsz float %x, 0.0 ret float %r } define float @fsub_zero(float %x) { -; ANY-LABEL: fsub_zero: -; ANY: # %bb.0: -; ANY-NEXT: retq +; CHECK-LABEL: fsub_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = fsub float %x, 0.0 ret float %r } define float @fsub_self(float %x) { -; ANY-LABEL: fsub_self: -; ANY: # %bb.0: -; ANY-NEXT: xorps %xmm0, %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_self: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq %r = fsub nnan float %x, %x ret float %r } define float @fsub_neg_x_y(float %x, float %y) { -; ANY-LABEL: fsub_neg_x_y: -; ANY: # %bb.0: -; ANY-NEXT: subss %xmm0, %xmm1 -; ANY-NEXT: movaps %xmm1, %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_neg_x_y: +; CHECK: # %bb.0: +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq %neg = fsub nsz float 0.0, %x %r = fadd nsz float %neg, %y ret float %r } define float @fsub_neg_y(float %x, float %y) { -; ANY-LABEL: fsub_neg_y: -; ANY: # %bb.0: -; ANY-NEXT: mulss {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_neg_y: +; CHECK: # %bb.0: +; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %mul = fmul float %x, 5.0 %add = fadd float %mul, %y %r = fsub nsz reassoc float %y, %add @@ -100,10 +94,10 @@ } define <4 x float> @fsub_neg_y_vector(<4 x float> %x, <4 x float> %y) { -; ANY-LABEL: fsub_neg_y_vector: -; ANY: # %bb.0: -; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_neg_y_vector: +; CHECK: # %bb.0: +; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %mul, %y %r = fsub nsz reassoc <4 x float> %y, %add @@ -111,10 +105,10 @@ } define <4 x float> @fsub_neg_y_vector_nonuniform(<4 x float> %x, <4 x float> %y) { -; ANY-LABEL: fsub_neg_y_vector_nonuniform: -; ANY: # %bb.0: -; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_neg_y_vector_nonuniform: +; CHECK: # %bb.0: +; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %mul, %y %r = fsub nsz reassoc <4 x float> %y, %add @@ -122,10 +116,10 @@ } define float @fsub_neg_y_commute(float %x, float %y) { -; ANY-LABEL: fsub_neg_y_commute: -; ANY: # %bb.0: -; ANY-NEXT: mulss {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_neg_y_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %mul = fmul float %x, 5.0 %add = fadd float %y, %mul %r = fsub nsz reassoc float %y, %add @@ -133,10 +127,10 @@ } define <4 x float> @fsub_neg_y_commute_vector(<4 x float> %x, <4 x float> %y) { -; ANY-LABEL: fsub_neg_y_commute_vector: -; ANY: # %bb.0: -; ANY-NEXT: mulps {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_neg_y_commute_vector: +; CHECK: # %bb.0: +; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %mul = fmul <4 x float> %x, %add = fadd <4 x float> %y, %mul %r = fsub nsz reassoc <4 x float> %y, %add @@ -146,10 +140,10 @@ ; Y - (X + Y) --> -X define float @fsub_fadd_common_op_fneg(float %x, float %y) { -; ANY-LABEL: fsub_fadd_common_op_fneg: -; ANY: # %bb.0: -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_fadd_common_op_fneg: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %a = fadd float %x, %y %r = fsub reassoc nsz float %y, %a ret float %r @@ -158,10 +152,10 @@ ; Y - (X + Y) --> -X define <4 x float> @fsub_fadd_common_op_fneg_vec(<4 x float> %x, <4 x float> %y) { -; ANY-LABEL: fsub_fadd_common_op_fneg_vec: -; ANY: # %bb.0: -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_fadd_common_op_fneg_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %a = fadd <4 x float> %x, %y %r = fsub nsz reassoc <4 x float> %y, %a ret <4 x float> %r @@ -171,10 +165,10 @@ ; Commute operands of the 'add'. define float @fsub_fadd_common_op_fneg_commute(float %x, float %y) { -; ANY-LABEL: fsub_fadd_common_op_fneg_commute: -; ANY: # %bb.0: -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_fadd_common_op_fneg_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %a = fadd float %y, %x %r = fsub reassoc nsz float %y, %a ret float %r @@ -183,90 +177,90 @@ ; Y - (Y + X) --> -X define <4 x float> @fsub_fadd_common_op_fneg_commute_vec(<4 x float> %x, <4 x float> %y) { -; ANY-LABEL: fsub_fadd_common_op_fneg_commute_vec: -; ANY: # %bb.0: -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_fadd_common_op_fneg_commute_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %a = fadd <4 x float> %y, %x %r = fsub reassoc nsz <4 x float> %y, %a ret <4 x float> %r } -define float @fsub_negzero(float %x) { -; STRICT-LABEL: fsub_negzero: -; STRICT: # %bb.0: -; STRICT-NEXT: xorps %xmm1, %xmm1 -; STRICT-NEXT: addss %xmm1, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_negzero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +define float @fsub_negzero_strict(float %x) { +; CHECK-LABEL: fsub_negzero_strict: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: retq %r = fsub float %x, -0.0 ret float %r } -define <4 x float> @fsub_negzero_vector(<4 x float> %x) { -; STRICT-LABEL: fsub_negzero_vector: -; STRICT: # %bb.0: -; STRICT-NEXT: xorps %xmm1, %xmm1 -; STRICT-NEXT: addps %xmm1, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_negzero_vector: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +define float @fsub_negzero_nsz(float %x) { +; CHECK-LABEL: fsub_negzero_nsz: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %r = fsub nsz float %x, -0.0 + ret float %r +} + +define <4 x float> @fsub_negzero_strict_vector(<4 x float> %x) { +; CHECK-LABEL: fsub_negzero_strict_vector: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: retq %r = fsub <4 x float> %x, ret <4 x float> %r } +define <4 x float> @fsub_negzero_nsz_vector(<4 x float> %x) { +; CHECK-LABEL: fsub_negzero_nsz_vector: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %r = fsub nsz <4 x float> %x, + ret <4 x float> %r +} + define float @fsub_zero_nsz_1(float %x) { -; ANY-LABEL: fsub_zero_nsz_1: -; ANY: # %bb.0: -; ANY-NEXT: retq +; CHECK-LABEL: fsub_zero_nsz_1: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = fsub nsz float %x, 0.0 ret float %r } define float @fsub_zero_nsz_2(float %x) { -; ANY-LABEL: fsub_zero_nsz_2: -; ANY: # %bb.0: -; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fsub_zero_nsz_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %r = fsub nsz float 0.0, %x ret float %r } -define float @fsub_negzero_nsz(float %x) { -; ANY-LABEL: fsub_negzero_nsz: -; ANY: # %bb.0: -; ANY-NEXT: retq - %r = fsub nsz float %x, -0.0 - ret float %r -} - define float @fmul_zero(float %x) { -; ANY-LABEL: fmul_zero: -; ANY: # %bb.0: -; ANY-NEXT: xorps %xmm0, %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fmul_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq %r = fmul nnan nsz float %x, 0.0 ret float %r } define float @fmul_one(float %x) { -; ANY-LABEL: fmul_one: -; ANY: # %bb.0: -; ANY-NEXT: retq +; CHECK-LABEL: fmul_one: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = fmul float %x, 1.0 ret float %r } define float @fmul_x_const_const(float %x) { -; ANY-LABEL: fmul_x_const_const: -; ANY: # %bb.0: -; ANY-NEXT: mulss {{.*}}(%rip), %xmm0 -; ANY-NEXT: retq +; CHECK-LABEL: fmul_x_const_const: +; CHECK: # %bb.0: +; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %mul = fmul reassoc float %x, 9.0 %r = fmul reassoc float %mul, 4.0 ret float %r