diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -495,6 +495,13 @@ bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg); void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg); + bool matchExtractAllEltsFromBuildVector( + MachineInstr &MI, + SmallVectorImpl> &MatchInfo); + void applyExtractAllEltsFromBuildVector( + MachineInstr &MI, + SmallVectorImpl> &MatchInfo); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -576,7 +576,18 @@ [{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>; -def extract_vec_elt_combines : GICombineGroup<[extract_vec_elt_build_vec]>; +// Fold away full elt extracts from a build_vector. +def extract_all_elts_from_build_vector_matchinfo : + GIDefMatchData<"SmallVector>">; +def extract_all_elts_from_build_vector : GICombineRule< + (defs root:$root, extract_all_elts_from_build_vector_matchinfo:$matchinfo), + (match (wip_match_opcode G_BUILD_VECTOR):$root, + [{ return Helper.matchExtractAllEltsFromBuildVector(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyExtractAllEltsFromBuildVector(*${root}, ${matchinfo}); }])>; + +def extract_vec_elt_combines : GICombineGroup<[ + extract_vec_elt_build_vec, + extract_all_elts_from_build_vector]>; // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" @@ -3719,6 +3720,61 @@ replaceSingleDefInstWithReg(MI, Reg); } +bool CombinerHelper::matchExtractAllEltsFromBuildVector( + MachineInstr &MI, + SmallVectorImpl> &SrcDstPairs) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + // This combine tries to find build_vector's which have every source element + // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like + // the masked load scalarization is run late in the pipeline. There's already + // a combine for a similar pattern starting from the extract, but that + // doesn't attempt to do it if there are multiple uses of the build_vector, + // which in this case is true. Starting the combine from the build_vector + // feels more natural than trying to find sibling nodes of extracts. + // E.g. + // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4 + // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0 + // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1 + // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2 + // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3 + // ==> + // replace ext{1,2,3,4} with %s{1,2,3,4} + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + unsigned NumElts = DstTy.getNumElements(); + + SmallBitVector ExtractedElts(NumElts); + for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg), + MRI.use_instr_nodbg_end())) { + if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT) + return false; + auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI); + if (!Cst) + return false; + unsigned Idx = Cst.getValue().getZExtValue(); + if (Idx >= NumElts) + return false; // Out of range. + ExtractedElts.set(Idx); + SrcDstPairs.emplace_back( + std::make_pair(MI.getOperand(Idx + 1).getReg(), &II)); + } + // Match if every element was extracted. + return ExtractedElts.all(); +} + +void CombinerHelper::applyExtractAllEltsFromBuildVector( + MachineInstr &MI, + SmallVectorImpl> &SrcDstPairs) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + for (auto &Pair : SrcDstPairs) { + auto *ExtMI = Pair.second; + replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first); + ExtMI->eraseFromParent(); + } + MI.eraseFromParent(); +} + bool CombinerHelper::applyLoadOrCombine( MachineInstr &MI, std::function &MatchInfo) { Builder.setInstrAndDebugLoc(MI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir @@ -0,0 +1,154 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +--- +name: full_extracts_from_build_vector +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: full_extracts_from_build_vector + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: %arg2:_(s64) = COPY $x1 + ; CHECK: $x0 = COPY %arg1(s64) + ; CHECK: $x1 = COPY %arg2(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 1 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + $x0 = COPY %extract(s64) + $x1 = COPY %extract2(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: full_extracts_from_build_vector_other_use +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: full_extracts_from_build_vector_other_use + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: %arg2:_(s64) = COPY $x1 + ; CHECK: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + ; CHECK: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + ; CHECK: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + ; CHECK: $x0 = COPY %extract(s64) + ; CHECK: $x1 = COPY %extract2(s64) + ; CHECK: $q0 = COPY %bv(<2 x s64>) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 1 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + $x0 = COPY %extract(s64) + $x1 = COPY %extract2(s64) + $q0 = COPY %bv(<2 x s64>) + RET_ReallyLR implicit $x0 + +... +--- +name: partial_extracts_from_build_vector_multiple_per_elt +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: partial_extracts_from_build_vector_multiple_per_elt + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: %arg2:_(s64) = COPY $x1 + ; CHECK: $x0 = COPY %arg1(s64) + ; CHECK: $x1 = COPY %arg2(s64) + ; CHECK: $x2 = COPY %arg2(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 1 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + %extract3:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + $x0 = COPY %extract(s64) + $x1 = COPY %extract2(s64) + $x2 = COPY %extract3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: full_extracts_from_build_vector_idx_out_of_range +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: full_extracts_from_build_vector_idx_out_of_range + ; CHECK: liveins: $x0, $x1 + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK: %arg2:_(s64) = COPY $x1 + ; CHECK: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK: %two:_(s32) = G_CONSTANT i32 2 + ; CHECK: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + ; CHECK: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + ; CHECK: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %two(s32) + ; CHECK: $x0 = COPY %extract(s64) + ; CHECK: $x1 = COPY %extract2(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %zero:_(s32) = G_CONSTANT i32 0 + %one:_(s32) = G_CONSTANT i32 1 + %two:_(s32) = G_CONSTANT i32 2 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %two(s32) + $x0 = COPY %extract(s64) + $x1 = COPY %extract2(s64) + RET_ReallyLR implicit $x0 + +...