Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -258,6 +258,16 @@ bool applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair &PtrRegAndCommute); + /// Transform anyext(trunc(x)) to x. + bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg); + bool applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg); + + /// Transform [asz]ext([asz]ext(x)) to [asz]ext x. + bool matchCombineExtOfExt(MachineInstr &MI, + std::tuple &MatchInfo); + bool applyCombineExtOfExt(MachineInstr &MI, + std::tuple &MatchInfo); + /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -354,6 +354,25 @@ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; +// Fold (anyext (trunc x)) -> x if the source type is same as +// the destination type. +def anyext_trunc_fold_matchinfo : GIDefMatchData<"Register">; +def anyext_trunc_fold: GICombineRule < + (defs root:$root, anyext_trunc_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_ANYEXT):$root, + [{ return Helper.matchCombineAnyExtTrunc(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineAnyExtTrunc(*${root}, ${matchinfo}); }]) +>; + +// Fold ([asz]ext ([asz]ext x)) -> ([asz]ext x). +def ext_ext_fold_matchinfo : GIDefMatchData<"std::tuple">; +def ext_ext_fold: GICombineRule < + (defs root:$root, ext_ext_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_ANYEXT, G_SEXT, G_ZEXT):$root, + [{ return Helper.matchCombineExtOfExt(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -365,7 +384,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, binop_same_val, binop_left_to_zero, binop_right_to_zero, p2i_to_i2p, - i2p_to_p2i]>; + i2p_to_p2i, anyext_trunc_fold]>; def known_bits_simplifications : GICombineGroup<[ and_trivial_mask, redundant_sext_inreg]>; @@ -381,4 +400,4 @@ hoist_logic_op_with_same_opcode_hands, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, - known_bits_simplifications]>; + known_bits_simplifications, ext_ext_fold]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1744,6 +1744,78 @@ return true; } +bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + return mi_match(SrcReg, MRI, + m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); +} + +bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); + Register DstReg = MI.getOperand(0).getReg(); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, Reg); + return true; +} + +bool CombinerHelper::matchCombineExtOfExt( + MachineInstr &MI, std::tuple &MatchInfo) { + assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || + MI.getOpcode() == TargetOpcode::G_SEXT || + MI.getOpcode() == TargetOpcode::G_ZEXT) && + "Expected a G_[ASZ]EXT"); + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + // Match exts with the same opcode, anyext([sz]ext) and sext(zext). + unsigned Opc = MI.getOpcode(); + unsigned SrcOpc = SrcMI->getOpcode(); + if (Opc == SrcOpc || + (Opc == TargetOpcode::G_ANYEXT && + (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) || + (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) { + MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc); + return true; + } + return false; +} + +bool CombinerHelper::applyCombineExtOfExt( + MachineInstr &MI, std::tuple &MatchInfo) { + assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || + MI.getOpcode() == TargetOpcode::G_SEXT || + MI.getOpcode() == TargetOpcode::G_ZEXT) && + "Expected a G_[ASZ]EXT"); + + Register Reg = std::get<0>(MatchInfo); + unsigned SrcExtOp = std::get<1>(MatchInfo); + + // Combine exts with the same opcode. + if (MI.getOpcode() == SrcExtOp) { + Observer.changingInstr(MI); + MI.getOperand(1).setReg(Reg); + Observer.changedInstr(MI); + return true; + } + + // Combine: + // - anyext([sz]ext x) to [sz]ext x + // - sext(zext x) to zext x + if (MI.getOpcode() == TargetOpcode::G_ANYEXT || + (MI.getOpcode() == TargetOpcode::G_SEXT && + SrcExtOp == TargetOpcode::G_ZEXT)) { + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstrAndDebugLoc(MI); + Builder.buildInstr(SrcExtOp, {DstReg}, {Reg}); + MI.eraseFromParent(); + return true; + } + + return false; +} + bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-ext.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/combine-ext.mir @@ -0,0 +1,196 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +--- +name: test_combine_anyext_trunc +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_anyext_trunc + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: $x1 = COPY [[COPY]](s64) + %0:_(s64) = COPY $x0 + %1:_(s32) = G_TRUNC %0(s64) + %2:_(s64) = G_ANYEXT %1(s32) + $x1 = COPY %2(s64) +... +--- +name: test_combine_anyext_trunc_vec +body: | + bb.1: + liveins: $q0 + ; CHECK-LABEL: name: test_combine_anyext_trunc_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: $q0 = COPY [[COPY]](<2 x s64>) + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s32>) = G_TRUNC %0(<2 x s64>) + %2:_(<2 x s64>) = G_ANYEXT %1(<2 x s32>) + $q0 = COPY %2(<2 x s64>) +... +--- +name: test_combine_anyext_anyext +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_anyext_anyext + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16) + ; CHECK: $x0 = COPY [[ANYEXT]](s64) + %0:_(s16) = COPY $h0 + %1:_(s32) = G_ANYEXT %0(s16) + %2:_(s64) = G_ANYEXT %1(s32) + $x0 = COPY %2(s64) +... +--- +name: test_combine_anyext_anyext_vec +body: | + bb.1: + liveins: $s0 + ; CHECK-LABEL: name: test_combine_anyext_anyext_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[COPY]](<2 x s16>) + ; CHECK: $q0 = COPY [[ANYEXT]](<2 x s64>) + %0:_(<2 x s16>) = COPY $s0 + %1:_(<2 x s32>) = G_ANYEXT %0(<2 x s16>) + %2:_(<2 x s64>) = G_ANYEXT %1(<2 x s32>) + $q0 = COPY %2(<2 x s64>) +... +--- +name: test_combine_anyext_sext +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_anyext_sext + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16) + ; CHECK: $x0 = COPY [[SEXT]](s64) + %0:_(s16) = COPY $h0 + %1:_(s32) = G_SEXT %0(s16) + %2:_(s64) = G_ANYEXT %1(s32) + $x0 = COPY %2(s64) +... +--- +name: test_combine_anyext_sext_vec +body: | + bb.1: + liveins: $s0 + ; CHECK-LABEL: name: test_combine_anyext_sext_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; CHECK: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[COPY]](<2 x s16>) + ; CHECK: $q0 = COPY [[SEXT]](<2 x s64>) + %0:_(<2 x s16>) = COPY $s0 + %1:_(<2 x s32>) = G_SEXT %0(<2 x s16>) + %2:_(<2 x s64>) = G_ANYEXT %1(<2 x s32>) + $q0 = COPY %2(<2 x s64>) +... +--- +name: test_combine_anyext_zext +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_anyext_zext + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) + ; CHECK: $x0 = COPY [[ZEXT]](s64) + %0:_(s16) = COPY $h0 + %1:_(s32) = G_ZEXT %0(s16) + %2:_(s64) = G_ANYEXT %1(s32) + $x0 = COPY %2(s64) +... +--- +name: test_combine_anyext_zext_vec +body: | + bb.1: + liveins: $s0 + ; CHECK-LABEL: name: test_combine_anyext_zext_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(<2 x s64>) = G_ZEXT [[COPY]](<2 x s16>) + ; CHECK: $q0 = COPY [[ZEXT]](<2 x s64>) + %0:_(<2 x s16>) = COPY $s0 + %1:_(<2 x s32>) = G_ZEXT %0(<2 x s16>) + %2:_(<2 x s64>) = G_ANYEXT %1(<2 x s32>) + $q0 = COPY %2(<2 x s64>) +... +--- +name: test_combine_sext_sext +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_sext_sext + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16) + ; CHECK: $x0 = COPY [[SEXT]](s64) + %0:_(s16) = COPY $h0 + %1:_(s32) = G_SEXT %0(s16) + %2:_(s64) = G_SEXT %1(s32) + $x0 = COPY %2(s64) +... +--- +name: test_combine_sext_sext_vec +body: | + bb.1: + liveins: $s0 + ; CHECK-LABEL: name: test_combine_sext_sext_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; CHECK: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[COPY]](<2 x s16>) + ; CHECK: $q0 = COPY [[SEXT]](<2 x s64>) + %0:_(<2 x s16>) = COPY $s0 + %1:_(<2 x s32>) = G_SEXT %0(<2 x s16>) + %2:_(<2 x s64>) = G_SEXT %1(<2 x s32>) + $q0 = COPY %2(<2 x s64>) +... +--- +name: test_combine_sext_zext +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_sext_zext + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) + ; CHECK: $x0 = COPY [[ZEXT]](s64) + %0:_(s16) = COPY $h0 + %1:_(s32) = G_ZEXT %0(s16) + %2:_(s64) = G_SEXT %1(s32) + $x0 = COPY %2(s64) +... +--- +name: test_combine_sext_zext_vec +body: | + bb.1: + liveins: $s0 + ; CHECK-LABEL: name: test_combine_sext_zext_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(<2 x s64>) = G_ZEXT [[COPY]](<2 x s16>) + ; CHECK: $q0 = COPY [[ZEXT]](<2 x s64>) + %0:_(<2 x s16>) = COPY $s0 + %1:_(<2 x s32>) = G_ZEXT %0(<2 x s16>) + %2:_(<2 x s64>) = G_SEXT %1(<2 x s32>) + $q0 = COPY %2(<2 x s64>) +... +--- +name: test_combine_zext_zext +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_zext_zext + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) + ; CHECK: $x0 = COPY [[ZEXT]](s64) + %0:_(s16) = COPY $h0 + %1:_(s32) = G_ZEXT %0(s16) + %2:_(s64) = G_ZEXT %1(s32) + $x0 = COPY %2(s64) +... +--- +name: test_combine_zext_zext_vec +body: | + bb.1: + liveins: $s0 + ; CHECK-LABEL: name: test_combine_zext_zext_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(<2 x s64>) = G_ZEXT [[COPY]](<2 x s16>) + ; CHECK: $q0 = COPY [[ZEXT]](<2 x s64>) + %0:_(<2 x s16>) = COPY $s0 + %1:_(<2 x s32>) = G_ZEXT %0(<2 x s16>) + %2:_(<2 x s64>) = G_ZEXT %1(<2 x s32>) + $q0 = COPY %2(<2 x s64>) +... Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-s1.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-s1.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-s1.mir @@ -25,9 +25,8 @@ ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.ptr) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[LOAD]](s1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) - ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) + ; CHECK: $w0 = COPY [[ZEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 %1:_(s1) = G_LOAD %0(p0) :: (load 1 from %ir.ptr) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir @@ -228,9 +228,7 @@ ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_8 ; CHECK: liveins: $vgpr0 ; CHECK: %arg:_(s32) = COPY $vgpr0 - ; CHECK: %trunc:_(s16) = G_TRUNC %arg(s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %trunc(s16) - ; CHECK: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 [[ANYEXT]] + ; CHECK: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE1 %arg ; CHECK: $vgpr0 = COPY %result(s32) %arg:_(s32) = COPY $vgpr0 %trunc:_(s16) = G_TRUNC %arg @@ -251,9 +249,7 @@ ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16 ; CHECK: liveins: $vgpr0 ; CHECK: %arg:_(s32) = COPY $vgpr0 - ; CHECK: %trunc:_(s16) = G_TRUNC %arg(s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %trunc(s16) - ; CHECK: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 [[ANYEXT]] + ; CHECK: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg ; CHECK: $vgpr0 = COPY %result(s32) %arg:_(s32) = COPY $vgpr0 %trunc:_(s16) = G_TRUNC %arg @@ -274,9 +270,7 @@ ; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24 ; CHECK: liveins: $vgpr0 ; CHECK: %arg:_(s32) = COPY $vgpr0 - ; CHECK: %trunc:_(s16) = G_TRUNC %arg(s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %trunc(s16) - ; CHECK: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 [[ANYEXT]] + ; CHECK: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %arg ; CHECK: $vgpr0 = COPY %result(s32) %arg:_(s32) = COPY $vgpr0 %trunc:_(s16) = G_TRUNC %arg @@ -297,9 +291,7 @@ ; CHECK-LABEL: name: cvt_f32_ubyte1_zext_lshr_8 ; CHECK: liveins: $vgpr0 ; CHECK: %arg:_(s32) = COPY $vgpr0 - ; CHECK: %trunc:_(s16) = G_TRUNC %arg(s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %trunc(s16) - ; CHECK: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 [[ANYEXT]] + ; CHECK: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg ; CHECK: $vgpr0 = COPY %result(s32) %arg:_(s32) = COPY $vgpr0 %trunc:_(s16) = G_TRUNC %arg Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir @@ -91,8 +91,7 @@ ; GFX6: %masked:_(s8) = G_AND %narrow, %masklow6 ; GFX6: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 ; GFX6: [[SHL:%[0-9]+]]:_(s8) = G_SHL %masked, [[C]](s8) - ; GFX6: %shl:_(s16) = G_ZEXT [[SHL]](s8) - ; GFX6: %result:_(s32) = G_ANYEXT %shl(s16) + ; GFX6: %result:_(s32) = G_ZEXT [[SHL]](s8) ; GFX6: $vgpr0 = COPY %result(s32) ; GFX9-LABEL: name: narrow_shl_s16_by_2_from_zext_s8 ; GFX9: liveins: $vgpr0 @@ -102,8 +101,7 @@ ; GFX9: %masked:_(s8) = G_AND %narrow, %masklow6 ; GFX9: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 ; GFX9: [[SHL:%[0-9]+]]:_(s8) = G_SHL %masked, [[C]](s8) - ; GFX9: %shl:_(s16) = G_ZEXT [[SHL]](s8) - ; GFX9: %result:_(s32) = G_ANYEXT %shl(s16) + ; GFX9: %result:_(s32) = G_ZEXT [[SHL]](s8) ; GFX9: $vgpr0 = COPY %result(s32) %argument:_(s32) = COPY $vgpr0 %narrow:_(s8) = G_TRUNC %argument