diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -489,6 +489,9 @@ bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); bool applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); + bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg); + void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -113,6 +113,8 @@ class GIApplyKind; class GIApplyKindWithArgs; +def register_matchinfo: GIDefMatchData<"Register">; + def copy_prop : GICombineRule< (defs root:$d), (match (COPY $d, $s):$mi, @@ -338,18 +340,16 @@ >; // Fold int2ptr(ptr2int(x)) -> x -def p2i_to_i2p_matchinfo: GIDefMatchData<"Register">; def p2i_to_i2p: GICombineRule< - (defs root:$root, p2i_to_i2p_matchinfo:$info), + (defs root:$root, register_matchinfo:$info), (match (wip_match_opcode G_INTTOPTR):$root, [{ return Helper.matchCombineI2PToP2I(*${root}, ${info}); }]), (apply [{ return Helper.applyCombineI2PToP2I(*${root}, ${info}); }]) >; // Fold ptr2int(int2ptr(x)) -> x -def i2p_to_p2i_matchinfo: GIDefMatchData<"Register">; def i2p_to_p2i: GICombineRule< - (defs root:$root, i2p_to_p2i_matchinfo:$info), + (defs root:$root, register_matchinfo:$info), (match (wip_match_opcode G_PTRTOINT):$root, [{ return Helper.matchCombineP2IToI2P(*${root}, ${info}); }]), (apply [{ return Helper.applyCombineP2IToI2P(*${root}, ${info}); }]) @@ -390,18 +390,16 @@ (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}]) >; // Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y. -def redundant_and_matchinfo : GIDefMatchData<"Register">; def redundant_and: GICombineRule < - (defs root:$root, redundant_and_matchinfo:$matchinfo), + (defs root:$root, register_matchinfo:$matchinfo), (match (wip_match_opcode G_AND):$root, [{ return Helper.matchRedundantAnd(*${root}, ${matchinfo}); }]), (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) >; // Fold (x | y) -> x or (x | y) -> y when (x | y) is known to equal x or equal y. -def redundant_or_matchinfo : GIDefMatchData<"Register">; def redundant_or: GICombineRule < - (defs root:$root, redundant_or_matchinfo:$matchinfo), + (defs root:$root, register_matchinfo:$matchinfo), (match (wip_match_opcode G_OR):$root, [{ return Helper.matchRedundantOr(*${root}, ${matchinfo}); }]), (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) @@ -419,9 +417,8 @@ // Fold (anyext (trunc x)) -> x if the source type is same as // the destination type. -def anyext_trunc_fold_matchinfo : GIDefMatchData<"Register">; def anyext_trunc_fold: GICombineRule < - (defs root:$root, anyext_trunc_fold_matchinfo:$matchinfo), + (defs root:$root, register_matchinfo:$matchinfo), (match (wip_match_opcode G_ANYEXT):$root, [{ return Helper.matchCombineAnyExtTrunc(*${root}, ${matchinfo}); }]), (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) @@ -445,9 +442,8 @@ >; // Fold (fneg (fneg x)) -> x. -def fneg_fneg_fold_matchinfo : GIDefMatchData<"Register">; def fneg_fneg_fold: GICombineRule < - (defs root:$root, fneg_fneg_fold_matchinfo:$matchinfo), + (defs root:$root, register_matchinfo:$matchinfo), (match (wip_match_opcode G_FNEG):$root, [{ return Helper.matchCombineFNegOfFNeg(*${root}, ${matchinfo}); }]), (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) @@ -463,9 +459,8 @@ >; // Fold (fabs (fabs x)) -> (fabs x). -def fabs_fabs_fold_matchinfo : GIDefMatchData<"Register">; def fabs_fabs_fold: GICombineRule< - (defs root:$root, fabs_fabs_fold_matchinfo:$matchinfo), + (defs root:$root, register_matchinfo:$matchinfo), (match (wip_match_opcode G_FABS):$root, [{ return Helper.matchCombineFAbsOfFAbs(*${root}, ${matchinfo}); }]), (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) @@ -565,6 +560,14 @@ def insert_vec_elt_combines : GICombineGroup< [combine_insert_vec_elts_build_vector]>; +def extract_vec_elt_build_vec : GICombineRule< + (defs root:$root, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root, + [{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>; + +def extract_vec_elt_combines : GICombineGroup<[extract_vec_elt_build_vec]>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -595,9 +598,9 @@ mul_by_neg_one]>; def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, - ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store, - undef_combines, identity_combines, phi_combines, simplify_add_to_sub, - hoist_logic_op_with_same_opcode_hands, + extract_vec_elt_combines, ptr_add_immed_chain, combines_for_extload, + combine_indexed_load_store, undef_combines, identity_combines, phi_combines, + simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -22,6 +23,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" @@ -3648,6 +3650,59 @@ return true; } +bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI, + Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); + // If we have a constant index, look for a G_BUILD_VECTOR source + // and find the source register that the index maps to. + Register SrcVec = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcVec); + auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements()) + return false; + + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_BUILD_VECTOR, SrcTy})) + return false; + + unsigned VecIdx = Cst->Value.getZExtValue(); + MachineInstr *BuildVecMI = + getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI); + if (!BuildVecMI) { + BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI); + if (!BuildVecMI) + return false; + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcTy})) + return false; + } + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + EVT Ty(getMVTForLLT(SrcTy)); + if (!MRI.hasOneNonDBGUse(SrcVec) && + !TLI.aggressivelyPreferBuildVectorSources(Ty)) + return false; + + Reg = BuildVecMI->getOperand(VecIdx + 1).getReg(); + return true; +} + +void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, + Register &Reg) { + // Check the type of the register, since it may have come from a + // G_BUILD_VECTOR_TRUNC. + LLT ScalarTy = MRI.getType(Reg); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + Builder.setInstrAndDebugLoc(MI); + if (ScalarTy != DstTy) { + assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits()); + Builder.buildTrunc(DstReg, Reg); + MI.eraseFromParent(); + return; + } + replaceSingleDefInstWithReg(MI, Reg); +} + bool CombinerHelper::applyLoadOrCombine( MachineInstr &MI, std::function &MatchInfo) { Builder.setInstrAndDebugLoc(MI); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -638,6 +638,9 @@ .clampNumElements(0, v2s64, v2s64) .minScalarSameAs(1, 0); + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) + .lower(); + getActionDefinitionsBuilder(G_CTLZ) .legalForCartesianProduct( {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir @@ -0,0 +1,189 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +--- +name: extract_from_build_vector +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: extract_from_build_vector + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY %arg1(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %zero:_(s32) = G_CONSTANT i32 0 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_build_vector_idx1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: extract_from_build_vector_idx1 + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg2:_(s64) = COPY $x1 + ; CHECK: $x0 = COPY %arg2(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %one:_(s32) = G_CONSTANT i32 1 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_build_vector_idx_invalid +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: extract_from_build_vector_idx_invalid + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: %arg2:_(s64) = COPY $x1 + ; CHECK: %idx:_(s32) = G_CONSTANT i32 4 + ; CHECK: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + ; CHECK: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + ; CHECK: $x0 = COPY %extract(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %idx:_(s32) = G_CONSTANT i32 4 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_build_vector_trunc +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: extract_from_build_vector_trunc + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: %extract:_(s32) = G_TRUNC %arg1(s64) + ; CHECK: $w0 = COPY %extract(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %zero:_(s32) = G_CONSTANT i32 0 + %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s32) + $w0 = COPY %extract(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: extract_from_build_vector_multiple_uses +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: extract_from_build_vector_multiple_uses + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: %arg2:_(s64) = COPY $x1 + ; CHECK: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + ; CHECK: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + ; CHECK: $x0 = COPY %extract(s64) + ; CHECK: $q0 = COPY %bv(<2 x s64>) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %zero:_(s32) = G_CONSTANT i32 0 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + $x0 = COPY %extract(s64) + $q0 = COPY %bv(<2 x s64>) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_insert +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; This test checks that this combine runs after the insertvec->build_vector + ; combine. + ; CHECK-LABEL: name: extract_from_insert + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY %arg1(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %undef:_(<2 x s64>) = G_IMPLICIT_DEF + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 1 + %ins1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %arg1(s64), %zero(s32) + %ins2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %ins1, %arg2(s64), %one(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %ins2(<2 x s64>), %zero(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -97,8 +97,8 @@ # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # # DEBUG-NEXT: G_BUILD_VECTOR_TRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # # DEBUG-NEXT: G_CONCAT_VECTORS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK