diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -462,6 +462,14 @@ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); + template + bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) { + return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth); + } + + bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos, + unsigned Width); + bool SelectCMP_SWAP(SDNode *N); bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); @@ -3625,9 +3633,10 @@ return true; } -bool -AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth) { +static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, + SDValue &FixedPos, + unsigned RegWidth, + bool isReciprocal) { APFloat FVal(0.0); if (ConstantFPSDNode *CN = dyn_cast(N)) FVal = CN->getValueAPF(); @@ -3652,13 +3661,18 @@ // integers. bool IsExact; + if (isReciprocal) + if (!FVal.getExactInverse(&FVal)) + return false; + // fbits is between 1 and 64 in the worst-case, which means the fmul // could have 2^64 as an actual operand. Need 65 bits of precision. APSInt IntVal(65, true); FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); // N.b. isPowerOf2 also checks for > 0. - if (!IsExact || !IntVal.isPowerOf2()) return false; + if (!IsExact || !IntVal.isPowerOf2()) + return false; unsigned FBits = IntVal.logBase2(); // Checks above should have guaranteed that we haven't lost information in @@ -3669,6 +3683,19 @@ return true; } +bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, + false); +} + +bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N, + SDValue &FixedPos, + unsigned RegWidth) { + return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, + true); +} + // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields // of the string and obtains the integer values from them and combines these // into a single value to be used in the MRS/MSR instruction. diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -703,6 +703,28 @@ def fixedpoint_f32_i64 : fixedpoint_i64; def fixedpoint_f64_i64 : fixedpoint_i64; +class fixedpoint_recip_i32 + : Operand, + ComplexPattern", [fpimm, ld]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm32"; +} + +class fixedpoint_recip_i64 + : Operand, + ComplexPattern", [fpimm, ld]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm64"; +} + +def fixedpoint_recip_f16_i32 : fixedpoint_recip_i32; +def fixedpoint_recip_f32_i32 : fixedpoint_recip_i32; +def fixedpoint_recip_f64_i32 : fixedpoint_recip_i32; + +def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64; +def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64; +def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64; + def vecshiftR8 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); }]> { @@ -4984,55 +5006,55 @@ } // Scaled - def SWHri: BaseIntegerToFP { + (fmul (node GPR32:$Rn), + fixedpoint_recip_f16_i32:$scale))]> { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b11; // 16-bit FPR flag let scale{5} = 1; let Predicates = [HasFullFP16]; } - def SWSri: BaseIntegerToFP { + (fmul (node GPR32:$Rn), + fixedpoint_recip_f32_i32:$scale))]> { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b00; // 32-bit FPR flag let scale{5} = 1; } - def SWDri: BaseIntegerToFP { + (fmul (node GPR32:$Rn), + fixedpoint_recip_f64_i32:$scale))]> { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b01; // 64-bit FPR flag let scale{5} = 1; } - def SXHri: BaseIntegerToFP { + (fmul (node GPR64:$Rn), + fixedpoint_recip_f16_i64:$scale))]> { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b11; // 16-bit FPR flag let Predicates = [HasFullFP16]; } - def SXSri: BaseIntegerToFP { + (fmul (node GPR64:$Rn), + fixedpoint_recip_f32_i64:$scale))]> { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b00; // 32-bit FPR flag } - def SXDri: BaseIntegerToFP { + (fmul (node GPR64:$Rn), + fixedpoint_recip_f64_i64:$scale))]> { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b01; // 64-bit FPR flag } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4348,6 +4348,34 @@ defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; +def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), + (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; +def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), + (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; +def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), + (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; + +def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), + (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; +def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), + (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; +def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), + (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; + +def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)), + (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>; +def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)), + (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>; +def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)), + (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>; + +def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), + (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; +def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)), + (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>; +def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)), + (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>; + //===----------------------------------------------------------------------===// // Unscaled integer to floating point conversion instruction. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple aarch64-none-linux-gnu -enable-unsafe-fp-math -mattr=+fullfp16 < %s | FileCheck %s + +define half @scvtf_f16_2(i32 %state) { +; CHECK-LABEL: scvtf_f16_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf h0, w0, #1 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to half + %div = fmul half %conv, 5.000000e-01 + ret half %div +} + +define half @scvtf_f16_4(i32 %state) { +; CHECK-LABEL: scvtf_f16_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf h0, w0, #2 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to half + %div = fmul half %conv, 2.500000e-01 + ret half %div +} + +define half @scvtf_f16_8(i32 %state) { +; CHECK-LABEL: scvtf_f16_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf h0, w0, #3 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to half + %div = fmul half %conv, 1.250000e-01 + ret half %div +} + +define half @scvtf_f16_16(i32 %state) { +; CHECK-LABEL: scvtf_f16_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf h0, w0, #4 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to half + %div = fmul half %conv, 6.250000e-02 + ret half %div +} + +define half @scvtf_f16_32(i32 %state) { +; CHECK-LABEL: scvtf_f16_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf h0, w0, #5 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to half + %div = fmul half %conv, 3.125000e-02 + ret half %div +} + +define float @scvtf_f32_2(i32 %state) { +; CHECK-LABEL: scvtf_f32_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #1 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 5.000000e-01 + ret float %div +} + +define float @scvtf_f32_4(i32 %state) { +; CHECK-LABEL: scvtf_f32_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #2 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 2.500000e-01 + ret float %div +} + +define float @scvtf_f32_8(i32 %state) { +; CHECK-LABEL: scvtf_f32_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #3 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 1.250000e-01 + ret float %div +} + +define float @scvtf_f32_16(i32 %state) { +; CHECK-LABEL: scvtf_f32_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #4 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 6.250000e-02 + ret float %div +} + +define float @scvtf_f32_32(i32 %state) { +; CHECK-LABEL: scvtf_f32_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #5 +; CHECK-NEXT: ret +entry: + %conv = sitofp i32 %state to float + %div = fmul float %conv, 3.125000e-02 + ret float %div +} + +define double @scvtf_f64_2(i64 %state) { +; CHECK-LABEL: scvtf_f64_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #1 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 5.000000e-01 + ret double %div +} + +define double @scvtf_f64_4(i64 %state) { +; CHECK-LABEL: scvtf_f64_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #2 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 2.500000e-01 + ret double %div +} + +define double @scvtf_f64_8(i64 %state) { +; CHECK-LABEL: scvtf_f64_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #3 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 1.250000e-01 + ret double %div +} + +define double @scvtf_f64_16(i64 %state) { +; CHECK-LABEL: scvtf_f64_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #4 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 6.250000e-02 + ret double %div +} + +define double @scvtf_f64_32(i64 %state) { +; CHECK-LABEL: scvtf_f64_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #5 +; CHECK-NEXT: ret +entry: + %conv = sitofp i64 %state to double + %div = fmul double %conv, 3.125000e-02 + ret double %div +}