diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -324,6 +324,21 @@ /// Transform zext(trunc(x)) to x. bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg); + /// Transform fptrunc(fpext(x)) to x. + bool matchCombineFPTruncExt(MachineInstr &MI, Register &Reg); + + /// Transform fptrunc([su]itofp(x)) to [su]itofp x. + bool matchCombineFPTruncIntToFloat(MachineInstr &MI, + std::tuple &MatchInfo); + bool applyCombineFPTruncIntToFloat(MachineInstr &MI, + std::tuple &MatchInfo); + + /// Transform fpext([su]itofp(x)) to [su]itofp x. + bool matchCombineFPExtIntToFloat(MachineInstr &MI, + std::tuple &MatchInfo); + bool applyCombineFPExtIntToFloat(MachineInstr &MI, + std::tuple &MatchInfo); + /// Transform [asz]ext([asz]ext(x)) to [asz]ext x. bool matchCombineExtOfExt(MachineInstr &MI, std::tuple &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -347,7 +347,7 @@ def constant_fp_op_matchinfo: GIDefMatchData<"Optional">; def constant_fp_op: GICombineRule < (defs root:$root, constant_fp_op_matchinfo:$info), - (match (wip_match_opcode G_FNEG, G_FABS, G_FPTRUNC, G_FSQRT, G_FLOG2):$root, + (match (wip_match_opcode G_FNEG, G_FABS, G_FPTRUNC, G_FPEXT, G_FSQRT, G_FLOG2):$root, [{ return Helper.matchCombineConstantFoldFpUnary(*${root}, ${info}); }]), (apply [{ Helper.applyCombineConstantFoldFpUnary(*${root}, ${info}); }]) >; @@ -465,6 +465,33 @@ (apply [{ Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }]) >; +// Fold (fptrunc (fpext x)) -> x if source type is same as destination type. +def fptrunc_fpext_fold: GICombineRule < + (defs root:$root, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_FPTRUNC):$root, + [{ return Helper.matchCombineFPTruncExt(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) +>; + +// Fold (fptrunc ([su]itofp x)) -> ([su]itofp x) +def fptrunc_int_to_float_matchinfo : GIDefMatchData<"std::tuple">; +def fptrunc_int_to_float: GICombineRule< + (defs root:$d, fptrunc_int_to_float_matchinfo:$info), + (match (wip_match_opcode G_FPTRUNC): $d, + [{ return Helper.matchCombineFPTruncIntToFloat(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineFPTruncIntToFloat(*${d}, ${info}); }]) +>; + +// Fold (fpext ([su]itofp x)) -> ([su]itofp x), if x is representable in Dst +// without losing precision. +def fpext_int_to_float_matchinfo : GIDefMatchData<"std::tuple">; +def fpext_int_to_float: GICombineRule< + (defs root:$d, fpext_int_to_float_matchinfo:$info), + (match (wip_match_opcode G_FPEXT): $d, + [{ return Helper.matchCombineFPExtIntToFloat(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineFPExtIntToFloat(*${d}, ${info}); }]) +>; + def not_cmp_fold_matchinfo : GIDefMatchData<"SmallVector">; def not_cmp_fold : GICombineRule< (defs root:$d, not_cmp_fold_matchinfo:$info), @@ -666,7 +693,8 @@ binop_same_val, binop_left_to_zero, binop_right_to_zero, p2i_to_i2p, i2p_to_p2i, anyext_trunc_fold, - fneg_fneg_fold, right_identity_one]>; + fneg_fneg_fold, right_identity_one, + fptrunc_fpext_fold]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, overlapping_and]>; @@ -697,7 +725,8 @@ unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, - div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract]>; + div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, + fptrunc_int_to_float, fpext_int_to_float]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1658,6 +1658,7 @@ return V; } case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FPEXT: break; case TargetOpcode::G_FSQRT: { bool Unused; @@ -1673,8 +1674,8 @@ } } // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise, - // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`, - // and `G_FLOG2` reach here. + // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FPEXT`, + // `G_FSQRT`, and `G_FLOG2` reach here. bool Unused; V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused); return V; @@ -2447,6 +2448,73 @@ return false; } +bool CombinerHelper::matchCombineFPTruncExt(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC && "Expected a G_FPTRUNC"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + return mi_match(SrcReg, MRI, + m_GFPExt(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); +} + +bool CombinerHelper::matchCombineFPTruncIntToFloat( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC && "Expected a G_FPTRUNC"); + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + unsigned SrcOpc = SrcMI->getOpcode(); + if (SrcOpc == TargetOpcode::G_SITOFP || SrcOpc == TargetOpcode::G_UITOFP) { + MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); + return true; + } + return false; +} + +bool CombinerHelper::applyCombineFPTruncIntToFloat( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC && "Expected a G_FPTRUNC"); + Register Reg = std::get<0>(MatchInfo); + unsigned SrcOpc = std::get<1>(MatchInfo); + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstrAndDebugLoc(MI); + Builder.buildInstr(SrcOpc, {DstReg}, {Reg}); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineFPExtIntToFloat( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FPEXT && "Expected a G_FPEXT"); + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + unsigned SrcOpc = SrcMI->getOpcode(); + if ((SrcOpc == TargetOpcode::G_SITOFP || SrcOpc == TargetOpcode::G_UITOFP)) { + // 8-bit integers are fully representable in supported floating point types. + // 16-bit/32-bit integers can have a representation when converted to 16bit + // floating point types, but will have altered precision when folded to + // float. We will only allow this fold for Unsafe and 8bit cases. + LLT SrcInputTy = MRI.getType(SrcMI->getOperand(1).getReg()); + bool UnsafeFPMath = MI.getMF()->getTarget().Options.UnsafeFPMath; + if (SrcInputTy.getSizeInBits() == 8 || UnsafeFPMath) { + MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); + return true; + } + } + return false; +} + +bool CombinerHelper::applyCombineFPExtIntToFloat( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FPEXT && "Expected a G_FPEXT"); + Register Reg = std::get<0>(MatchInfo); + unsigned SrcOpc = std::get<1>(MatchInfo); + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstrAndDebugLoc(MI); + Builder.buildInstr(SrcOpc, {DstReg}, {Reg}); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineExtOfExt( MachineInstr &MI, std::tuple &MatchInfo) { assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext.mir @@ -194,3 +194,36 @@ %2:_(<2 x s64>) = G_ZEXT %1(<2 x s32>) $q0 = COPY %2(<2 x s64>) ... +--- +name: test_combine_half_to_float_fpext_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_half_to_float_fpext_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -5.500000e+00 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s16) = G_FCONSTANT half 0xHC580 + %1:_(s32) = G_FPEXT %0(s16) + $w0 = COPY %1(s32) +... +--- +name: test_combine_half_to_double_fpext_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_half_to_double_fpext_constant + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -5.500000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s16) = G_FCONSTANT half 0xHC580 + %1:_(s64) = G_FPEXT %0(s16) + $x0 = COPY %1(s64) +... +--- +name: test_combine_float_to_double_fpext_constant +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_float_to_double_fpext_constant + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.500000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s32) = G_FCONSTANT float 5.500000e+00 + %1:_(s64) = G_FPEXT %0(s32) + $x0 = COPY %1(s64) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fpext-suitofp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fpext-suitofp.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fpext-suitofp.mir @@ -0,0 +1,62 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +# RUN: llc -o - -enable-unsafe-fp-math -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefix=CHECK-UNSAFE + +# (fpext ([us]itofp x)) -> ([us]itofp x) +# Due to loss of precision, this is only enabled for x = i8, or unsafe math. +--- +name: test_combine_fpext_sitofp_8_to_16 +body: | + bb.1: + liveins: $b0 + ; CHECK-LABEL: name: test_combine_fpext_sitofp_8_to_16 + ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 + ; CHECK: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[COPY]](s8) + ; CHECK: $h1 = COPY [[SITOFP]](s16) + ; CHECK-UNSAFE-LABEL: name: test_combine_fpext_sitofp_8_to_16 + ; CHECK-UNSAFE: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 + ; CHECK-UNSAFE: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[COPY]](s8) + ; CHECK-UNSAFE: $h1 = COPY [[SITOFP]](s16) + %0:_(s8) = COPY $b0 + %1:_(s8) = G_SITOFP %0(s8) + %2:_(s16) = G_FPEXT %1(s8) + $h1 = COPY %2(s16) +... +--- +name: test_combine_fpext_sitofp_8_to_32 +body: | + bb.1: + liveins: $b0 + ; CHECK-LABEL: name: test_combine_fpext_sitofp_8_to_32 + ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 + ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s8) + ; CHECK: $w1 = COPY [[SITOFP]](s32) + ; CHECK-UNSAFE-LABEL: name: test_combine_fpext_sitofp_8_to_32 + ; CHECK-UNSAFE: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 + ; CHECK-UNSAFE: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s8) + ; CHECK-UNSAFE: $w1 = COPY [[SITOFP]](s32) + %0:_(s8) = COPY $b0 + %1:_(s8) = G_SITOFP %0(s8) + %2:_(s32) = G_FPEXT %1(s8) + $w1 = COPY %2(s32) +... +# Only fold this when unsafe is enabled. +--- +name: test_combine_fpext_sitofp_16_to_32 +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_fpext_sitofp_16_to_32 + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[COPY]](s16) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[SITOFP]](s16) + ; CHECK: $w1 = COPY [[FPEXT]](s32) + ; CHECK-UNSAFE-LABEL: name: test_combine_fpext_sitofp_16_to_32 + ; CHECK-UNSAFE: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-UNSAFE: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s16) + ; CHECK-UNSAFE: $w1 = COPY [[SITOFP]](s32) + %0:_(s16) = COPY $h0 + %1:_(s16) = G_SITOFP %0(s16) + %2:_(s32) = G_FPEXT %1(s16) + $w1 = COPY %2(s32) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fptrunc.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fptrunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fptrunc.mir @@ -34,3 +34,90 @@ %1:_(s32) = G_FPTRUNC %0(s64) $w0 = COPY %1(s32) ... +# (fptrunc (fpext x)) -> x +--- +name: test_combine_truncate_ext_32 +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_truncate_ext_32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: $w1 = COPY [[COPY]](s32) + %0:_(s32) = COPY $w0 + %1:_(s64) = G_FPEXT %0(s32) + %2:_(s32) = G_FPTRUNC %1(s64) + $w1 = COPY %2(s32) +... +--- +name: test_combine_truncate_ext_16 +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_truncate_ext_16 + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: $h1 = COPY [[COPY]](s16) + %0:_(s16) = COPY $h0 + %1:_(s32) = G_FPEXT %0(s16) + %2:_(s16) = G_FPTRUNC %1(s32) + $h1 = COPY %2(s16) +... +# Don't combine when the types mismatch. +--- +name: test_combine_truncate_ext_32_non_identity +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_truncate_ext_32_non_identity + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s32) + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FPEXT]](s64) + ; CHECK: $h1 = COPY [[FPTRUNC]](s16) + %0:_(s32) = COPY $w0 + %1:_(s64) = G_FPEXT %0(s32) + %2:_(s16) = G_FPTRUNC %1(s64) + $h1 = COPY %2(s16) +... +--- +name: test_combine_truncate_ext_16_non_identity +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_truncate_ext_16_non_identity + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s16) + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[FPEXT]](s64) + ; CHECK: $w1 = COPY [[FPTRUNC]](s32) + %0:_(s16) = COPY $h0 + %1:_(s64) = G_FPEXT %0(s16) + %2:_(s32) = G_FPTRUNC %1(s64) + $w1 = COPY %2(s32) +... +# (fptrunc ([us]itofp x)) -> ([us]itofp x) +--- +name: test_combine_truncate_sitofp_16 +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_truncate_sitofp_16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[COPY]](s32) + ; CHECK: $h1 = COPY [[SITOFP]](s16) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_SITOFP %0(s32) + %2:_(s16) = G_FPTRUNC %1(s32) + $h1 = COPY %2(s16) +... +--- +name: test_combine_truncate_uitofp_32 +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_truncate_uitofp_32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s64) + ; CHECK: $w1 = COPY [[UITOFP]](s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_UITOFP %0(s64) + %2:_(s32) = G_FPTRUNC %1(s64) + $w1 = COPY %2(s32) +...