Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1007,6 +1007,8 @@ setTargetDAGCombine(ISD::VECREDUCE_OR); setTargetDAGCombine(ISD::VECREDUCE_XOR); + setTargetDAGCombine(ISD::FMUL); + // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemset = @@ -22962,6 +22964,58 @@ return SDValue(); } +// If fmul's constant operand is the reciprocal of a power of 2 (i.e 1/2^n) and +// the other operand is SINT_TO_FP, try "X * (1 / C) --> X / C". It will be +// matched with scvtf patterns with fixed-point. +static SDValue tryCombineFMULWithFDIV(SDValue Op1, SDValue Op2, EVT VT, + SDLoc DL, SelectionDAG &DAG) { + if (Op1.getOpcode() != ISD::SINT_TO_FP) + return SDValue(); + + ConstantFPSDNode *CstFP = dyn_cast(Op2); + if (!CstFP) + return SDValue(); + + APFloat InverseAPFImm(0.0); + if (!CstFP->getValueAPF().getExactInverse(&InverseAPFImm)) + return SDValue(); + + bool IsExact; + APSInt InverseAPSIImm(64, true); + InverseAPFImm.convertToInteger(InverseAPSIImm, APFloat::rmTowardZero, + &IsExact); + + if (!IsExact || !InverseAPSIImm.isPowerOf2()) + return SDValue(); + + unsigned FBits = InverseAPSIImm.logBase2(); + if (FBits == 0 || FBits > VT.getScalarSizeInBits()) + return SDValue(); + + return DAG.getNode(ISD::FDIV, DL, VT, Op1, + DAG.getConstantFP(InverseAPFImm, DL, VT)); +} + +static SDValue performFMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + if (SDValue Val = tryCombineFMULWithFDIV(N->getOperand(0), N->getOperand(1), + VT, DL, DAG)) + return Val; + + if (SDValue Val = tryCombineFMULWithFDIV(N->getOperand(1), N->getOperand(0), + VT, DL, DAG)) + return Val; + + return SDValue(); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -23277,6 +23331,8 @@ return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine()); case ISD::CTLZ: return performCTLZCombine(N, DAG, Subtarget); + case ISD::FMUL: + return performFMULCombine(N, DCI, DAG); } return SDValue(); } Index: llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple aarch64-none-linux-gnu < %s | FileCheck %s + +define float @scvtf_f32_2(i32 %state) { +; CHECK-LABEL: scvtf_f32_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #1 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 5.000000e-01 + ret float %div +} + +define float @scvtf_f32_4(i32 %state) { +; CHECK-LABEL: scvtf_f32_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #2 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 2.500000e-01 + ret float %div +} + +define float @scvtf_f32_8(i32 %state) { +; CHECK-LABEL: scvtf_f32_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #3 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 1.250000e-01 + ret float %div +} + +define float @scvtf_f32_16(i32 %state) { +; CHECK-LABEL: scvtf_f32_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #4 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 6.250000e-02 + ret float %div +} + +define float @scvtf_f32_32(i32 %state) { +; CHECK-LABEL: scvtf_f32_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #5 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 3.125000e-02 + ret float %div +} + +define double @scvtf_f64_2(i64 %state) { +; CHECK-LABEL: scvtf_f64_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #1 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 5.000000e-01 + ret double %div +} + +define double @scvtf_f64_4(i64 %state) { +; CHECK-LABEL: scvtf_f64_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #2 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 2.500000e-01 + ret double %div +} + +define double @scvtf_f64_8(i64 %state) { +; CHECK-LABEL: scvtf_f64_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #3 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 1.250000e-01 + ret double %div +} + +define double @scvtf_f64_16(i64 %state) { +; CHECK-LABEL: scvtf_f64_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #4 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 6.250000e-02 + ret double %div +} + +define double @scvtf_f64_32(i64 %state) { +; CHECK-LABEL: scvtf_f64_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #5 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 3.125000e-02 + ret double %div +}