Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -674,6 +674,7 @@ // Don't allow anything with multiple uses unless we know it is free. EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); if (!Op.hasOneUse()) if (!(Op.getOpcode() == ISD::FP_EXTEND && TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) @@ -694,8 +695,8 @@ TLI.isFPImmLegal(neg(cast(Op)->getValueAPF()), VT); } case ISD::FADD: - // FIXME: determine better conditions for this xform. - if (!Options->UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath && + !Flags.hasNoNaNs() && !Flags.hasNoInfs()) return 0; // After operation legalization, it might not be legal to create new FSUBs. if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) @@ -705,13 +706,14 @@ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options->NoSignedZerosFPMath && - !Op.getNode()->getFlags().hasNoSignedZeros()) + !Flags.hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -719,7 +721,8 @@ case ISD::FMUL: case ISD::FDIV: - if (Options->HonorSignDependentRoundingFPMath()) return 0; + if (Options->HonorSignDependentRoundingFPMathOption && + !Options->UnsafeFPMath && !Flags.hasNoNaNs()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, @@ -756,8 +759,8 @@ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } case ISD::FADD: - // FIXME: determine better conditions for this xform. - assert(Options.UnsafeFPMath); + assert(Options.UnsafeFPMath || + (Flags.hasNoNaNs() && Flags.hasNoInfs())); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -783,7 +786,7 @@ case ISD::FMUL: case ISD::FDIV: - assert(!Options.HonorSignDependentRoundingFPMath()); + assert(!Options.HonorSignDependentRoundingFPMath() || Flags.hasNoNaNs()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -10205,19 +10208,31 @@ return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); } - // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { - // fold (fadd A, 0) -> A - if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) - if (N1C->isZero()) - return N0; + ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1); + if (N1C && N1C->isZero()) { + if (N1C->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + // fold (fadd A, 0) -> A + return N0; + } } // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { + bool AllowNewConst = (Level < AfterLegalizeDAG); + if (Options.UnsafeFPMath || Flags.hasNoNaNs()) { + // If allowed, fold (fadd (fneg x), x) -> 0.0 + if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + return DAG.getConstantFP(0.0, DL, VT); + + // If allowed, fold (fadd x, (fneg x)) -> 0.0 + if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + return DAG.getConstantFP(0.0, DL, VT); + } + + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. - bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && @@ -10227,14 +10242,6 @@ Flags), Flags); - // If allowed, fold (fadd (fneg x), x) -> 0.0 - if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, DL, VT); - - // If allowed, fold (fadd x, (fneg x)) -> 0.0 - if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, DL, VT); - // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number // of rounding steps. @@ -10361,10 +10368,6 @@ // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { - // (fsub A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; - // (fsub x, x) -> 0.0 if (N0 == N1) return DAG.getConstantFP(0.0f, DL, VT); @@ -10381,6 +10384,19 @@ if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } + } else { + // (fsub x, x) -> 0.0 + if (N0 == N1 && Flags.hasNoNaNs()) { + return DAG.getConstantFP(0.0f, DL, VT); + } + } + + // (fsub A, 0) -> A + if (N1CFP && N1CFP->isZero()) { + if (!N1CFP->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + return N0; + } } // FSUB -> FMA combines: @@ -10425,11 +10441,14 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || + (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) return N1; + } + if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) if (N0.getOpcode() == ISD::FMUL) { // Fold scalars or any vector constants (not just splats). @@ -10736,7 +10755,7 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. @@ -11121,6 +11140,7 @@ SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); + SDNodeFlags Flags = N->getFlags(); // fold (fp_round c1fp) -> c1fp if (N0CFP) @@ -11150,7 +11170,8 @@ // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. - if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + if (DAG.getTarget().Options.UnsafeFPMath || + Flags.hasAllowReassociation() || N0IsTrunc) { SDLoc DL(N); return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4442,24 +4442,6 @@ case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) { - if (Opcode == ISD::FADD) { - // x+0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FSUB) { - // x-0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FMUL) { - // x*0 --> 0 - if (N2CFP && N2CFP->isZero()) - return N2; - // x*1 --> x - if (N2CFP && N2CFP->isExactlyValue(1.0)) - return N1; - } - } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); Index: test/CodeGen/AArch64/fdiv-combine.ll =================================================================== --- test/CodeGen/AArch64/fdiv-combine.ll +++ test/CodeGen/AArch64/fdiv-combine.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s ; Following test cases check: ; a / D; b / D; c / D; @@ -11,9 +12,9 @@ ; CHECK: fmul ; CHECK: fmul ; CHECK: fmul - %div = fdiv float %a, %D - %div1 = fdiv float %b, %D - %div2 = fdiv float %c, %D + %div = fdiv fast float %a, %D + %div1 = fdiv fast float %b, %D + %div2 = fdiv fast float %c, %D tail call void @foo_3f(float %div, float %div1, float %div2) ret void } @@ -25,9 +26,9 @@ ; CHECK: fmul ; CHECK: fmul ; CHECK: fmul - %div = fdiv double %a, %D - %div1 = fdiv double %b, %D - %div2 = fdiv double %c, %D + %div = fdiv fast double %a, %D + %div1 = fdiv fast double %b, %D + %div2 = fdiv fast double %c, %D tail call void @foo_3d(double %div, double %div1, double %div2) ret void } @@ -39,9 +40,9 @@ ; CHECK: fmul ; CHECK: fmul ; CHECK: fmul - %div = fdiv <4 x float> %a, %D - %div1 = fdiv <4 x float> %b, %D - %div2 = fdiv <4 x float> %c, %D + %div = fdiv fast <4 x float> %a, %D + %div1 = fdiv fast <4 x float> %b, %D + %div2 = fdiv fast <4 x float> %c, %D tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2) ret void } @@ -53,9 +54,9 @@ ; CHECK: fmul ; CHECK: fmul ; CHECK: fmul - %div = fdiv <2 x double> %a, %D - %div1 = fdiv <2 x double> %b, %D - %div2 = fdiv <2 x double> %c, %D + %div = fdiv fast <2 x double> %a, %D + %div1 = fdiv fast <2 x double> %b, %D + %div2 = fdiv fast <2 x double> %c, %D tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2) ret void } @@ -67,8 +68,8 @@ ; CHECK: fdiv s ; CHECK: fdiv s ; CHECK-NOT: fmul - %div = fdiv float %a, %D - %div1 = fdiv float %b, %D + %div = fdiv fast float %a, %D + %div1 = fdiv fast float %b, %D tail call void @foo_2f(float %div, float %div1) ret void } @@ -78,8 +79,8 @@ ; CHECK: fdiv d ; CHECK: fdiv d ; CHECK-NOT: fmul - %div = fdiv double %a, %D - %div1 = fdiv double %b, %D + %div = fdiv fast double %a, %D + %div1 = fdiv fast double %b, %D tail call void @foo_2d(double %div, double %div1) ret void } @@ -90,5 +91,3 @@ declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) declare void @foo_2f(float, float) declare void @foo_2d(double, double) - -attributes #0 = { "unsafe-fp-math"="true" } Index: test/CodeGen/AMDGPU/fadd.ll =================================================================== --- test/CodeGen/AMDGPU/fadd.ll +++ test/CodeGen/AMDGPU/fadd.ll @@ -66,7 +66,7 @@ ; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32: ; SI-NOT: v_add_f32 define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 { - %add = fadd float %a, 0.0 + %add = fadd nsz float %a, 0.0 store float %add, float addrspace(1)* %out, align 4 ret void } Index: test/CodeGen/AMDGPU/fdiv.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f16.ll +++ test/CodeGen/AMDGPU/fdiv.f16.ll @@ -218,7 +218,7 @@ } ; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16: -; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}} +; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dccc000, v{{[0-9]+}} ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}} ; GFX8_9: buffer_store_short [[MUL]] @@ -230,7 +230,7 @@ } ; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16: -; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}} +; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdccc000, v{{[0-9]+}} ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}} ; GFX8_9: buffer_store_short [[MUL]] Index: test/CodeGen/PowerPC/fdiv-combine.ll =================================================================== --- test/CodeGen/PowerPC/fdiv-combine.ll +++ test/CodeGen/PowerPC/fdiv-combine.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -mcpu=ppc64 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=ppc64 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=ppc64 -enable-unsafe-fp-math < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -14,9 +15,9 @@ ; CHECK: fmul ; CHECK: fmul ; CHECK: fmul - %div = fdiv double %a, %D - %div1 = fdiv double %b, %D - %div2 = fdiv double %c, %D + %div = fdiv fast double %a, %D + %div1 = fdiv fast double %b, %D + %div2 = fdiv fast double %c, %D tail call void @foo_3d(double %div, double %div1, double %div2) ret void } @@ -26,8 +27,8 @@ ; CHECK: fdiv {{[0-9]}} ; CHECK: fdiv {{[0-9]}} ; CHECK-NOT: fmul - %div = fdiv double %a, %D - %div1 = fdiv double %b, %D + %div = fdiv fast double %a, %D + %div1 = fdiv fast double %b, %D tail call void @foo_2d(double %div, double %div1) ret void } @@ -35,5 +36,3 @@ declare void @foo_3d(double, double, double) declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) declare void @foo_2d(double, double) - -attributes #0 = { "unsafe-fp-math"="true" } Index: test/CodeGen/PowerPC/fmf-math.ll =================================================================== --- test/CodeGen/PowerPC/fmf-math.ll +++ test/CodeGen/PowerPC/fmf-math.ll @@ -0,0 +1,8 @@ +; RUN: llc -verify-machineinstrs < %s -mattr=-vsx -mtriple=ppc32-- | grep fmul | count 1 + +define double @foo(double %X) nounwind { + %tmp1 = fmul fast double %X, 1.23 + %tmp2 = fmul fast double %tmp1, 4.124 + ret double %tmp2 +} + Index: test/CodeGen/X86/change-ir-fp-math.ll =================================================================== --- test/CodeGen/X86/change-ir-fp-math.ll +++ test/CodeGen/X86/change-ir-fp-math.ll @@ -0,0 +1,22 @@ +; Check that we can enable/disable fast IR flag attributes. + +; RUN: llc < %s -mtriple=x86_64-unknown-unknown \ +; RUN: | FileCheck %s --check-prefix=CHECK + +; The div in these functions should be converted to a mul when unsafe-fp-math +; is enabled. + +; CHECK-LABEL: fast_fp_math: +define double @fast_fp_math(double %x) { +; CHECK: mulsd + %div = fdiv fast double %x, 2.0 + ret double %div +} + +; CHECK-LABEL: noflags_fp_math: +define double @noflags_fp_math(double %x) { +; CHECK: divsd + %div = fdiv double %x, 2.0 + ret double %div +} + Index: test/CodeGen/X86/fadd-combines.ll =================================================================== --- test/CodeGen/X86/fadd-combines.ll +++ test/CodeGen/X86/fadd-combines.ll @@ -1,11 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown < %s -enable-unsafe-fp-math | FileCheck %s define float @fadd_zero_f32(float %x) #0 { ; CHECK-LABEL: fadd_zero_f32: ; CHECK: # %bb.0: ; CHECK-NEXT: retq - %y = fadd float %x, 0.0 + %y = fadd fast float %x, 0.0 ret float %y } @@ -13,7 +14,7 @@ ; CHECK-LABEL: fadd_zero_4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: retq - %y = fadd <4 x float> %x, zeroinitializer + %y = fadd fast <4 x float> %x, zeroinitializer ret <4 x float> %y } @@ -23,8 +24,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd float %x, 1.0 - %z = fadd float %y, 2.0 + %y = fadd fast float %x, 1.0 + %z = fadd fast float %y, 2.0 ret float %z } @@ -37,8 +38,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd <4 x float> %x, - %z = fadd <4 x float> %y, + %y = fadd fast <4 x float> %x, + %z = fadd fast <4 x float> %y, ret <4 x float> %z } @@ -48,8 +49,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul float %x, 2.0 - %z = fadd float %x, %y + %y = fmul fast float %x, 2.0 + %z = fadd fast float %x, %y ret float %z } @@ -62,8 +63,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fadd <4 x float> %x, %y + %y = fmul fast <4 x float> %x, + %z = fadd fast <4 x float> %x, %y ret <4 x float> %z } @@ -73,8 +74,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul float %x, 2.0 - %z = fadd float %y, %x + %y = fmul fast float %x, 2.0 + %z = fadd fast float %y, %x ret float %z } @@ -87,8 +88,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fmul <4 x float> %x, - %z = fadd <4 x float> %y, %x + %y = fmul fast <4 x float> %x, + %z = fadd fast <4 x float> %y, %x ret <4 x float> %z } @@ -98,9 +99,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd float %x, %x - %z = fmul float %x, 2.0 - %w = fadd float %y, %z + %y = fadd fast float %x, %x + %z = fmul fast float %x, 2.0 + %w = fadd fast float %y, %z ret float %w } @@ -113,9 +114,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd <4 x float> %x, %x - %z = fmul <4 x float> %x, - %w = fadd <4 x float> %y, %z + %y = fadd fast <4 x float> %x, %x + %z = fmul fast <4 x float> %x, + %w = fadd fast <4 x float> %y, %z ret <4 x float> %w } @@ -125,9 +126,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd float %x, %x - %z = fmul float %x, 2.0 - %w = fadd float %z, %y + %y = fadd fast float %x, %x + %z = fmul fast float %x, 2.0 + %w = fadd fast float %z, %y ret float %w } @@ -140,9 +141,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd <4 x float> %x, %x - %z = fmul <4 x float> %x, - %w = fadd <4 x float> %z, %y + %y = fadd fast <4 x float> %x, %x + %z = fmul fast <4 x float> %x, + %w = fadd fast <4 x float> %z, %y ret <4 x float> %w } @@ -152,8 +153,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd float %x, %x - %z = fadd float %x, %y + %y = fadd fast float %x, %x + %z = fadd fast float %x, %y ret float %z } @@ -166,8 +167,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd <4 x float> %x, %x - %z = fadd <4 x float> %x, %y + %y = fadd fast <4 x float> %x, %x + %z = fadd fast <4 x float> %x, %y ret <4 x float> %z } @@ -177,8 +178,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd float %x, %x - %z = fadd float %y, %x + %y = fadd fast float %x, %x + %z = fadd fast float %y, %x ret float %z } @@ -191,8 +192,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd <4 x float> %x, %x - %z = fadd <4 x float> %y, %x + %y = fadd fast <4 x float> %x, %x + %z = fadd fast <4 x float> %y, %x ret <4 x float> %z } @@ -202,8 +203,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd float %x, %x - %z = fadd float %y, %y + %y = fadd fast float %x, %x + %z = fadd fast float %y, %y ret float %z } @@ -216,9 +217,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %y = fadd <4 x float> %x, %x - %z = fadd <4 x float> %y, %y + %y = fadd fast <4 x float> %x, %x + %z = fadd fast <4 x float> %y, %y ret <4 x float> %z } -attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "no-signed-zeros-fp-math"="true" } +attributes #0 = { "less-precise-fpmad"="true" } Index: test/CodeGen/X86/fdiv-combine.ll =================================================================== --- test/CodeGen/X86/fdiv-combine.ll +++ test/CodeGen/X86/fdiv-combine.ll @@ -19,7 +19,7 @@ define float @div2_arcp_all(float %x, float %y, float %z) { ; CHECK-LABEL: div2_arcp_all: ; CHECK: # %bb.0: -; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*}}(%rip), %xmm3 ; CHECK-NEXT: divss %xmm2, %xmm3 ; CHECK-NEXT: mulss %xmm3, %xmm0 ; CHECK-NEXT: mulss %xmm1, %xmm0 Index: test/CodeGen/X86/fdiv.ll =================================================================== --- test/CodeGen/X86/fdiv.ll +++ test/CodeGen/X86/fdiv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s define double @exact(double %x) { @@ -7,7 +8,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %div = fdiv double %x, 2.0 + %div = fdiv fast double %x, 2.0 ret double %div } @@ -17,7 +18,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %div = fdiv double %x, 0x41DFFFFFFFC00000 + %div = fdiv fast double %x, 0x41DFFFFFFFC00000 ret double %div } @@ -28,7 +29,7 @@ ; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: divsd %xmm1, %xmm0 ; CHECK-NEXT: retq - %div = fdiv double %x, 0.0 + %div = fdiv fast double %x, 0.0 ret double %div } @@ -38,7 +39,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: divsd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %div = fdiv double %x, 0x7FD0000000000001 + %div = fdiv fast double %x, 0x7FD0000000000001 ret double %div } @@ -48,7 +49,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: divsd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %div = fdiv double %x, 0x7FEFFFFFFFFFFFFF + %div = fdiv fast double %x, 0x7FEFFFFFFFFFFFFF ret double %div } @@ -59,11 +60,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: divss %xmm1, %xmm0 ; CHECK-NEXT: retq - %neg1 = fsub float -0.0, %x - %neg2 = fsub float -0.0, %y - %div = fdiv float %neg1, %neg2 + %neg1 = fsub fast float -0.0, %x + %neg2 = fsub fast float -0.0, %y + %div = fdiv fast float %neg1, %neg2 ret float %div } - -attributes #0 = { "unsafe-fp-math"="false" } - Index: test/CodeGen/X86/fmf-flags.ll =================================================================== --- test/CodeGen/X86/fmf-flags.ll +++ test/CodeGen/X86/fmf-flags.ll @@ -7,9 +7,12 @@ define float @fast_recip_sqrt(float %x) { ; X64-LABEL: fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_recip_sqrt: @@ -29,18 +32,13 @@ define float @fast_fmuladd_opts(float %a , float %b , float %c) { ; X64-LABEL: fast_fmuladd_opts: ; X64: # %bb.0: -; X64-NEXT: movaps %xmm0, %xmm1 -; X64-NEXT: addss %xmm0, %xmm1 -; X64-NEXT: addss %xmm0, %xmm1 -; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_fmuladd_opts: ; X86: # %bb.0: ; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fld %st(0) -; X86-NEXT: fadd %st(1) -; X86-NEXT: faddp %st(1) +; X86-NEXT: fmuls {{.*}} ; X86-NEXT: retl %res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a) ret float %res @@ -53,9 +51,9 @@ define double @not_so_fast_mul_add(double %x) { ; X64-LABEL: not_so_fast_mul_add: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: movsd {{.*}}(%rip), %xmm1 ; X64-NEXT: mulsd %xmm0, %xmm1 -; X64-NEXT: addsd %xmm1, %xmm0 +; X64-NEXT: mulsd {{.*}}(%rip), %xmm0 ; X64-NEXT: movsd %xmm1, {{.*}}(%rip) ; X64-NEXT: retq ; @@ -64,7 +62,9 @@ ; X86-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NEXT: fld %st(0) ; X86-NEXT: fmull {{\.LCPI.*}} -; X86-NEXT: fadd %st(0), %st(1) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fmull {{\.LCPI.*}} +; X86-NEXT: fxch %st(1) ; X86-NEXT: fstpl mul1 ; X86-NEXT: retl %m = fmul double %x, 4.2 @@ -80,10 +80,14 @@ define float @not_so_fast_recip_sqrt(float %x) { ; X64-LABEL: not_so_fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 -; X64-NEXT: movss %xmm1, {{.*}}(%rip) +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: sqrtss %xmm0, %xmm2 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: movss %xmm2, {{.*}}(%rip) ; X64-NEXT: retq ; ; X86-LABEL: not_so_fast_recip_sqrt: Index: test/CodeGen/X86/fp-fast.ll =================================================================== --- test/CodeGen/X86/fp-fast.ll +++ test/CodeGen/X86/fp-fast.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s define float @test1(float %a) { @@ -6,8 +7,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %r = fadd float %t1, %t1 + %t1 = fadd fast float %a, %a + %r = fadd fast float %t1, %t1 ret float %r } @@ -16,9 +17,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float 4.0, %a - %t2 = fadd float %a, %a - %r = fadd float %t1, %t2 + %t1 = fmul fast float 4.0, %a + %t2 = fadd fast float %a, %a + %r = fadd fast float %t1, %t2 ret float %r } @@ -27,9 +28,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float %a, 4.0 - %t2 = fadd float %a, %a - %r = fadd float %t1, %t2 + %t1 = fmul fast float %a, 4.0 + %t2 = fadd fast float %a, %a + %r = fadd fast float %t1, %t2 ret float %r } @@ -38,9 +39,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %t2 = fmul float 4.0, %a - %r = fadd float %t1, %t2 + %t1 = fadd fast float %a, %a + %t2 = fmul fast float 4.0, %a + %r = fadd fast float %t1, %t2 ret float %r } @@ -49,9 +50,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fadd float %a, %a - %t2 = fmul float %a, 4.0 - %r = fadd float %t1, %t2 + %t1 = fadd fast float %a, %a + %t2 = fmul fast float %a, 4.0 + %r = fadd fast float %t1, %t2 ret float %r } @@ -60,9 +61,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float 2.0, %a - %t2 = fadd float %a, %a - %r = fsub float %t1, %t2 + %t1 = fmul fast float 2.0, %a + %t2 = fadd fast float %a, %a + %r = fsub fast float %t1, %t2 ret float %r } @@ -71,9 +72,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fmul float %a, 2.0 - %t2 = fadd float %a, %a - %r = fsub float %t1, %t2 + %t1 = fmul fast float %a, 2.0 + %t2 = fadd fast float %a, %a + %r = fsub fast float %t1, %t2 ret float %r } @@ -81,8 +82,8 @@ ; CHECK-LABEL: test8: ; CHECK: # %bb.0: ; CHECK-NEXT: retq - %t1 = fmul float %a, 0.0 - %t2 = fadd float %a, %t1 + %t1 = fmul fast float %a, 0.0 + %t2 = fadd fast float %a, %t1 ret float %t2 } @@ -90,8 +91,8 @@ ; CHECK-LABEL: test9: ; CHECK: # %bb.0: ; CHECK-NEXT: retq - %t1 = fmul float 0.0, %a - %t2 = fadd float %t1, %a + %t1 = fmul fast float 0.0, %a + %t2 = fadd fast float %t1, %a ret float %t2 } @@ -100,8 +101,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fsub float -0.0, %a - %t2 = fadd float %a, %t1 + %t1 = fsub fast float -0.0, %a + %t2 = fadd fast float %a, %t1 ret float %t2 } @@ -110,8 +111,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq - %t1 = fsub float -0.0, %a - %t2 = fadd float %a, %t1 + %t1 = fsub fast float -0.0, %a + %t2 = fadd fast float %a, %t1 ret float %t2 } Index: test/CodeGen/X86/fp-fold.ll =================================================================== --- test/CodeGen/X86/fp-fold.ll +++ test/CodeGen/X86/fp-fold.ll @@ -17,14 +17,9 @@ } define float @fadd_negzero(float %x) { -; STRICT-LABEL: fadd_negzero: -; STRICT: # %bb.0: -; STRICT-NEXT: addss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_negzero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_negzero: +; ANY: # %bb.0: +; ANY-NEXT: retq %r = fadd float %x, -0.0 ret float %r } @@ -46,14 +41,9 @@ } define float @fsub_zero(float %x) { -; STRICT-LABEL: fsub_zero: -; STRICT: # %bb.0: -; STRICT-NEXT: addss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_zero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fsub_zero: +; ANY: # %bb.0: +; ANY-NEXT: retq %r = fsub float %x, 0.0 ret float %r } @@ -90,16 +80,10 @@ ; TODO: handle x*0 for fast flags the same as unsafe define float @fmul_zero(float %x) { -; STRICT-LABEL: fmul_zero: -; STRICT: # %bb.0: -; STRICT-NEXT: xorps %xmm1, %xmm1 -; STRICT-NEXT: mulss %xmm1, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_zero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: xorps %xmm0, %xmm0 -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_zero: +; ANY: # %bb.0: +; ANY-NEXT: xorps %xmm0, %xmm0 +; ANY-NEXT: retq %r = fmul nnan nsz float %x, 0.0 ret float %r }