Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -465,6 +465,10 @@ bool applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl &MatchInfo); + /// sext (cmp pred, x, y) -> select (cmp pred, x, y) tval, 0 + bool matchSextOfCmp(MachineInstr &MI, int64_t &MatchInfo); + bool applySextOfCmp(MachineInstr &MI, int64_t &MatchInfo); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2039,6 +2039,11 @@ return false; } + /// LLT variant. + virtual bool convertSelectOfConstantsToMath(LLT Ty) const { + return false; + } + /// Return true if it is profitable to transform an integer /// multiplication-by-constant into simpler operations like shifts and adds. /// This may be true if the target does not directly support the Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -86,6 +86,7 @@ def extending_load_matchdata : GIDefMatchData<"PreferredTuple">; def indexed_load_store_matchdata : GIDefMatchData<"IndexedLoadStoreMatchInfo">; def instruction_steps_matchdata: GIDefMatchData<"InstructionStepsMatchInfo">; +def int64_t_matchdata: GIDefMatchData<"int64_t">; /// The operator at the root of a GICombineRule.Match dag. def match; @@ -542,6 +543,13 @@ def insert_vec_elt_combines : GICombineGroup< [combine_insert_vec_elts_build_vector]>; +// Transform sext (cmp pred, x, y) -> select (cmp pred, x, y) tval, 0 +def sext_of_cmp : GICombineRule< + (defs root:$root, int64_t_matchdata:$info), + (match (wip_match_opcode G_SEXT):$root, + [{ return Helper.matchSextOfCmp(*${root}, ${info}); }]), + (apply [{ return Helper.applySextOfCmp(*${root}, ${info}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -580,4 +588,4 @@ unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, - shift_immed_chain, shift_of_shifted_logic_chain]>; + shift_immed_chain, shift_of_shifted_logic_chain, sext_of_cmp]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3129,6 +3129,66 @@ return true; } +bool CombinerHelper::matchSextOfCmp(MachineInstr &MI, int64_t &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT); + // Match: + // + // %cmp = G_ICMP pred, %x, %y + // %ext_ty = G_SEXT %cmp + // + // And see if it is worth changing it into: + // + // %cmp = G_ICMP pred, %x, %y + // %ext_ty = G_SELECT %cmp, tval, 0 + // + // Where tval is either -1 (when the type is s1), or whatever is provided + // by getICmpTrueVal otherwise. + + // Some targets may prefer to change a constant G_SELECT into a G_SEXT or + // something similar. If the target wants to do that, it should not run this + // combine as well. + LLT ExtTy = MRI.getType(MI.getOperand(0).getReg()); + if (ExtTy.isVector() || + getTargetLowering().convertSelectOfConstantsToMath(ExtTy)) + return false; + + Register CmpLHS; + Register CmpDst = MI.getOperand(1).getReg(); + if (!mi_match(CmpDst, MRI, m_GICmp(m_Pred(), m_Reg(CmpLHS), m_Reg()))) + return false; + auto CmpTy = MRI.getType(CmpDst); + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SELECT, {ExtTy, CmpTy}})) + return false; + + // Depending on the result type of the compare, we may need a different + // true value. + // E.g. let's say we have + // + // %cmp:_(s32) = G_ICMP pred, %x, %y + // %sext:_(s64) = G_SEXT %cmp + // + // Let's say that the true value on our target is 1. Then, if %cmp is true, it + // should be 1. After sign extension, the value is still 1. + // + // If %cmp produced a s1, however, then %sext would be -1. + MatchInfo = (CmpTy == LLT::scalar(1)) + ? -1 + : getICmpTrueVal(getTargetLowering(), /*IsVector = */ false, + /*IsFP = */ false); + return true; +} + +bool CombinerHelper::applySextOfCmp(MachineInstr &MI, int64_t &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + Builder.buildSelect(Dst, MI.getOperand(1).getReg(), + Builder.buildConstant(Ty, MatchInfo), + Builder.buildConstant(Ty, 0)); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-sext-to-select.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-sext-to-select.mir @@ -0,0 +1,92 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=aarch64-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s +# +# Test that we can change a G_SEXT fed by a G_ICMP into a G_SELECT. +# +# This is profitable in AArch64 because a G_ICMP feeding into a G_SEXT requires +# e.g. +# +# cmp x0, x1 +# cset w8, ge +# sbfx w0, w8, #0, #1 +# +# While an equivalent G_SELECT + G_ICMP pair can be selected to +# +# cmp x0, x1 +# csetm w0, ge +# + +... +--- +name: sext_to_select_true_negative_one +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; In this case, %cmp produces a s1, so we should use -1 as the true value. + + ; CHECK-LABEL: name: sext_to_select_true_negative_one + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg0:_(s64) = COPY $x0 + ; CHECK: %reg1:_(s64) = COPY $x1 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(sge), %reg0(s64), %reg1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: %sext:_(s32) = G_SELECT %cmp(s1), [[C]], [[C1]] + ; CHECK: $w0 = COPY %sext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:_(s64) = COPY $x0 + %reg1:_(s64) = COPY $x1 + %cmp:_(s1) = G_ICMP intpred(sge), %reg0(s64), %reg1 + %sext:_(s32) = G_SEXT %cmp + $w0 = COPY %sext(s32) + RET_ReallyLR implicit $w0 +... +--- +name: sext_to_select_true_one +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; In this case, %cmp produces a s8. So if we sign-extended %cmp, and %cmp + ; was true, its value would be 1. As a result, the true value should be 1. + + ; CHECK-LABEL: name: sext_to_select_true_one + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg0:_(s64) = COPY $x0 + ; CHECK: %reg1:_(s64) = COPY $x1 + ; CHECK: %cmp:_(s8) = G_ICMP intpred(sge), %reg0(s64), %reg1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: %sext:_(s32) = G_SELECT %cmp(s8), [[C]], [[C1]] + ; CHECK: $w0 = COPY %sext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:_(s64) = COPY $x0 + %reg1:_(s64) = COPY $x1 + %cmp:_(s8) = G_ICMP intpred(sge), %reg0(s64), %reg1 + %sext:_(s32) = G_SEXT %cmp + $w0 = COPY %sext(s32) + RET_ReallyLR implicit $w0 +... +--- +name: dont_combine_vector +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; Combine isn't supported for vectors. + + ; CHECK-LABEL: name: dont_combine_vector + ; CHECK: liveins: $x0, $x1 + ; CHECK: %reg0:_(<2 x s32>) = COPY $x0 + ; CHECK: %reg1:_(<2 x s32>) = COPY $x1 + ; CHECK: %cmp:_(<2 x s1>) = G_ICMP intpred(sge), %reg0(<2 x s32>), %reg1 + ; CHECK: %sext:_(<2 x s32>) = G_SEXT %cmp(<2 x s1>) + ; CHECK: $x0 = COPY %sext(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:_(<2 x s32>) = COPY $x0 + %reg1:_(<2 x s32>) = COPY $x1 + %cmp:_(<2 x s1>) = G_ICMP intpred(sge), %reg0(<2 x s32>), %reg1 + %sext:_(<2 x s32>) = G_SEXT %cmp + $x0 = COPY %sext(<2 x s32>) + RET_ReallyLR implicit $x0