Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -268,6 +268,19 @@ bool applyCombineExtOfExt(MachineInstr &MI, std::tuple &MatchInfo); + /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x). + bool matchCombineTruncOfExt(MachineInstr &MI, + std::pair &MatchInfo); + bool applyCombineTruncOfExt(MachineInstr &MI, + std::pair &MatchInfo); + + /// Transform trunc (shl x, K) to shl (trunc x), + /// K => K < VT.getScalarSizeInBits(). + bool matchCombineTruncOfShl(MachineInstr &MI, + std::pair &MatchInfo); + bool applyCombineTruncOfShl(MachineInstr &MI, + std::pair &MatchInfo); + /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -198,7 +198,7 @@ // replaced with undef. def propagate_undef_any_op: GICombineRule< (defs root:$root), - (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR):$root, + (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC):$root, [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; @@ -381,6 +381,24 @@ (apply [{ return Helper.applyNotCmp(*${d}, ${info}); }]) >; +// Fold trunc ([asz]ext x) -> x or ([asz]ext x) or (trunc x). +def trunc_ext_fold_matchinfo : GIDefMatchData<"std::pair">; +def trunc_ext_fold: GICombineRule < + (defs root:$root, trunc_ext_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_TRUNC):$root, + [{ return Helper.matchCombineTruncOfExt(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }]) +>; + +// Fold trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits(). +def trunc_shl_matchinfo : GIDefMatchData<"std::pair">; +def trunc_shl: GICombineRule < + (defs root:$root, trunc_shl_matchinfo:$matchinfo), + (match (wip_match_opcode G_TRUNC):$root, + [{ return Helper.matchCombineTruncOfShl(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -409,4 +427,4 @@ shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, - not_cmp_fold]>; + not_cmp_fold, trunc_ext_fold, trunc_shl]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1816,6 +1816,82 @@ return false; } +bool CombinerHelper::matchCombineTruncOfExt( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + unsigned SrcOpc = SrcMI->getOpcode(); + if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT || + SrcOpc == TargetOpcode::G_ZEXT) { + MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc); + return true; + } + return false; +} + +bool CombinerHelper::applyCombineTruncOfExt( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register SrcReg = MatchInfo.first; + unsigned SrcExtOp = MatchInfo.second; + Register DstReg = MI.getOperand(0).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(DstReg); + if (SrcTy == DstTy) { + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, SrcReg); + return true; + } + Builder.setInstrAndDebugLoc(MI); + if (SrcTy.getSizeInBits() < DstTy.getSizeInBits()) + Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg}); + else + Builder.buildTrunc(DstReg, SrcReg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineTruncOfShl( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register ShiftSrc; + Register ShiftAmt; + + LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(DstTy); + if (MRI.hasOneNonDBGUse(SrcReg) && + mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) && + isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {DstTy, ShiftAmtTy}})) { + KnownBits Known = KB->getKnownBits(ShiftAmt); + unsigned Size = DstTy.getSizeInBits(); + if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + MatchInfo = std::make_pair(ShiftSrc, ShiftAmt); + return true; + } + } + return false; +} + +bool CombinerHelper::applyCombineTruncOfShl( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + + Register ShiftSrc = MatchInfo.first; + Register ShiftAmt = MatchInfo.second; + Builder.setInstrAndDebugLoc(MI); + Builder.buildShl(DstReg, Builder.buildTrunc(DstTy, ShiftSrc), + Builder.buildTrunc(DstTy, ShiftAmt), SrcMI->getFlags()); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -107,8 +107,8 @@ ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_ (in function: nonpow2_add_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing: -define void @nonpow2_add_narrowing() { - %a = add i128 undef, undef +define void @nonpow2_add_narrowing(i128 %x, i128 %y) { + %a = add i128 %x, %y %b = trunc i128 %a to i96 %dummy = add i96 %b, %b store i96 %dummy, i96* undef Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +--- +name: test_combine_trunc_undef +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_trunc_undef + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + %0:_(s64) = G_IMPLICIT_DEF + %1:_(s32) = G_TRUNC %0(s64) + $w0 = COPY %1(s32) +... +--- +name: test_combine_trunc_undef_vec +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_trunc_undef_vec + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: $x0 = COPY [[DEF]](<2 x s32>) + %0:_(<2 x s64>) = G_IMPLICIT_DEF + %1:_(<2 x s32>) = G_TRUNC %0(<2 x s64>) + $x0 = COPY %1(<2 x s32>) +... +--- +name: test_combine_trunc_anyext_s32_s16 +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + %0:_(s16) = COPY $h0 + %1:_(s64) = G_ANYEXT %0(s16) + %2:_(s32) = G_TRUNC %1(s64) + $w0 = COPY %2(s32) +... +--- +name: test_combine_trunc_anyext_s32_s16_vec +body: | + bb.1: + liveins: $s0 + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16_vec + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[COPY]](<2 x s16>) + ; CHECK: $x0 = COPY [[ANYEXT]](<2 x s32>) + %0:_(<2 x s16>) = COPY $s0 + %1:_(<2 x s64>) = G_ANYEXT %0(<2 x s16>) + %2:_(<2 x s32>) = G_TRUNC %1(<2 x s64>) + $x0 = COPY %2(<2 x s32>) +... +--- +name: test_combine_trunc_sext_s32_s16 +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_trunc_sext_s32_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) + ; CHECK: $w0 = COPY [[SEXT]](s32) + %0:_(s16) = COPY $h0 + %1:_(s64) = G_SEXT %0(s16) + %2:_(s32) = G_TRUNC %1(s64) + $w0 = COPY %2(s32) +... +--- +name: test_combine_trunc_zext_s32_s16 +body: | + bb.1: + liveins: $h0 + ; CHECK-LABEL: name: test_combine_trunc_zext_s32_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) + ; CHECK: $w0 = COPY [[ZEXT]](s32) + %0:_(s16) = COPY $h0 + %1:_(s64) = G_ZEXT %0(s16) + %2:_(s32) = G_TRUNC %1(s64) + $w0 = COPY %2(s32) +... +--- +name: test_combine_trunc_anyext_s32_s32 +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: $w0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $w0 + %1:_(s64) = G_ANYEXT %0(s32) + %2:_(s32) = G_TRUNC %1(s64) + $w0 = COPY %2(s32) +... +--- +name: test_combine_trunc_anyext_s32_s64 +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: $w0 = COPY [[TRUNC]](s32) + %0:_(s64) = COPY $x0 + %1:_(s128) = G_ANYEXT %0(s64) + %2:_(s32) = G_TRUNC %1(s128) + $w0 = COPY %2(s32) +... +--- +name: test_combine_trunc_shl_s32_by_2 +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; CHECK: $h0 = COPY [[SHL]](s16) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 2 + %2:_(s32) = G_SHL %0(s32), %1(s32) + %3:_(s16) = G_TRUNC %2(s32) + $h0 = COPY %3(s16) +... +--- +name: test_combine_trunc_shl_s32_by_17 +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_17 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK: $h0 = COPY [[TRUNC]](s16) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 17 + %2:_(s32) = G_SHL %0(s32), %1(s32) + %3:_(s16) = G_TRUNC %2(s32) + $h0 = COPY %3(s16) +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -82,14 +82,14 @@ ; ; GFX8-LABEL: s_shl_i8_7: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xff -; GFX8-NEXT: s_lshl_b32 s0, s0, 7 +; GFX8-NEXT: s_bfe_u32 s1, 7, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl_i8_7: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xff -; GFX9-NEXT: s_lshl_b32 s0, s0, 7 +; GFX9-NEXT: s_bfe_u32 s1, 7, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s1 ; GFX9-NEXT: ; return to shader part epilog %result = shl i8 %value, 7 ret i8 %result @@ -426,14 +426,14 @@ ; ; GFX8-LABEL: s_shl_i16_15: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff -; GFX8-NEXT: s_lshl_b32 s0, s0, 15 +; GFX8-NEXT: s_bfe_u32 s1, 15, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_shl_i16_15: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xffff -; GFX9-NEXT: s_lshl_b32 s0, s0, 15 +; GFX9-NEXT: s_bfe_u32 s1, 15, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s1 ; GFX9-NEXT: ; return to shader part epilog %result = shl i16 %value, 15 ret i16 %result Index: llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -37,7 +37,6 @@ ; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: s_mov_b32 s3, s2 ; GFX8-NEXT: s_and_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 ; GFX8-NEXT: s_and_b32 s0, s0, s2 @@ -121,10 +120,8 @@ ; GFX8-NEXT: s_mov_b32 s5, s4 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16 ; GFX8-NEXT: s_and_b32 s6, s1, s4 -; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] -; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] -; GFX8-NEXT: s_and_b64 s[2:3], s[6:7], s[4:5] -; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5] +; GFX8-NEXT: s_xor_b64 s[2:3], s[6:7], s[4:5] ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 ; GFX8-NEXT: s_and_b32 s0, s0, s4 ; GFX8-NEXT: s_or_b32 s0, s1, s0