diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -557,6 +557,9 @@ void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl &MatchInfo); + bool matchInsertVectorEltToShuffle(MachineInstr &MI, unsigned &Idx); + void applyInsertVectorEltToShuffle(MachineInstr &MI, unsigned &Idx); + /// Match expression trees of the form /// /// \code diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -685,9 +685,16 @@ [{ return Helper.matchExtendThroughPhis(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyExtendThroughPhis(*${root}, ${matchinfo}); }])>; -// Currently only the one combine above. +// Canonicalizes (insert_vector_elt X, K) into a shuffle_vector. +def insert_vec_elt_to_shuffle : GICombineRule< + (defs root:$insertelt, unsigned_matchinfo:$matchinfo), + (match (wip_match_opcode G_INSERT_VECTOR_ELT):$insertelt, + [{ return Helper.matchInsertVectorEltToShuffle(*${insertelt}, ${matchinfo}); }]), + (apply [{ Helper.applyInsertVectorEltToShuffle(*${insertelt}, ${matchinfo}); }])>; + def insert_vec_elt_combines : GICombineGroup< - [combine_insert_vec_elts_build_vector]>; + [combine_insert_vec_elts_build_vector, + insert_vec_elt_to_shuffle]>; def extract_vec_elt_build_vec : GICombineRule< (defs root:$root, register_matchinfo:$matchinfo), diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2630,6 +2630,73 @@ MI.eraseFromParent(); } +bool CombinerHelper::matchInsertVectorEltToShuffle(MachineInstr &MI, + unsigned &Idx) { + assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); + + // Canonicalizes a G_INSERT_VECTOR_ELT w/ a constant index into an equivalent + // G_SHUFFLE_VECTOR if it is a legal transformation. + + // If this MI is part of a sequence of insert_vec_elts, then + // don't do the combine in the middle of the sequence. + Register DstReg = MI.getOperand(0).getReg(); + if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() == + TargetOpcode::G_INSERT_VECTOR_ELT) + return false; + + LLT VecTy = MRI.getType(DstReg); + LLT EltTy = MRI.getType(MI.getOperand(2).getReg()); + LLT IdxTy = MRI.getType(MI.getOperand(3).getReg()); + + if (VecTy.isScalable() || + !isLegalOrBeforeLegalizer( + {TargetOpcode::G_INSERT_VECTOR_ELT, {VecTy, EltTy, IdxTy}})) + return false; + + const auto MaybeIdxVal = + getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI); + if (!MaybeIdxVal) + return false; + + Idx = MaybeIdxVal->Value.getZExtValue(); + return Idx < VecTy.getNumElements(); +} + +void CombinerHelper::applyInsertVectorEltToShuffle(MachineInstr &MI, + unsigned &Idx) { + Builder.setInstrAndDebugLoc(MI); + + Register Ins = MI.getOperand(2).getReg(); + Register Vec = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + + LLT VecTy = MRI.getType(Dst); + LLT EltTy = VecTy.getElementType(); + const unsigned NumElts = VecTy.getNumElements(); + + Register Undef = Builder.buildUndef(EltTy).getReg(0); + + SmallVector Srcs; + Srcs.push_back(Ins); + for (unsigned K = 1; K < NumElts; ++K) + Srcs.push_back(Undef); + + Register OtherVec = Builder.buildBuildVector(VecTy, Srcs).getReg(0); + + // NumElts == Ins in OtherVec + // 0...(NumElts-1) = Original elements + SmallVector ShuffleMask; + for (unsigned CurIdx = 0; CurIdx < NumElts; ++CurIdx) { + if (CurIdx == Idx) + ShuffleMask.push_back(NumElts); + else + ShuffleMask.push_back(CurIdx); + } + + Builder.buildShuffleVector(Dst, Vec, OtherVec, ShuffleMask); + MI.eraseFromParent(); +} + void CombinerHelper::applySimplifyAddToSub( MachineInstr &MI, std::tuple &MatchInfo) { Builder.setInstr(MI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-insertvecelt-to-shufflevector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-insertvecelt-to-shufflevector.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-insertvecelt-to-shufflevector.mir @@ -0,0 +1,135 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: test_v2s16_idx0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_v2s16_idx0 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: %elt:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %elt(s16), [[DEF]](s16) + ; CHECK-NEXT: %ins:_(<2 x s16>) = G_SHUFFLE_VECTOR %src(<2 x s16>), [[BUILD_VECTOR]], shufflemask(2, 1) + ; CHECK-NEXT: $vgpr0 = COPY %ins(<2 x s16>) + %src:_(<2 x s16>) = COPY $vgpr0 + %idx:_(s32) = G_CONSTANT i32 0 + %elt:_(s16) = G_CONSTANT i16 42 + %ins:_(<2 x s16>) = G_INSERT_VECTOR_ELT %src, %elt, %idx + $vgpr0 = COPY %ins +... + +--- +name: test_v2s16_idx1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_v2s16_idx1 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: %elt:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %elt(s16), [[DEF]](s16) + ; CHECK-NEXT: %ins:_(<2 x s16>) = G_SHUFFLE_VECTOR %src(<2 x s16>), [[BUILD_VECTOR]], shufflemask(0, 2) + ; CHECK-NEXT: $vgpr0 = COPY %ins(<2 x s16>) + %src:_(<2 x s16>) = COPY $vgpr0 + %idx:_(s32) = G_CONSTANT i32 1 + %elt:_(s16) = G_CONSTANT i16 42 + %ins:_(<2 x s16>) = G_INSERT_VECTOR_ELT %src, %elt, %idx + $vgpr0 = COPY %ins +... + +--- +name: test_v2s16_idx2_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_v2s16_idx2_nofold + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ins:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY %ins(<2 x s16>) + %src:_(<2 x s16>) = COPY $vgpr0 + %idx:_(s32) = G_CONSTANT i32 2 + %elt:_(s16) = G_CONSTANT i16 42 + %ins:_(<2 x s16>) = G_INSERT_VECTOR_ELT %src, %elt, %idx + $vgpr0 = COPY %ins +... + +--- +name: test_v3s16_idx2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + ; CHECK-LABEL: name: test_v3s16_idx2 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: %truncsrc:_(<3 x s16>) = G_TRUNC %src(<3 x s32>) + ; CHECK-NEXT: %elt:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR %elt(s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: %ins:_(<3 x s16>) = G_SHUFFLE_VECTOR %truncsrc(<3 x s16>), [[BUILD_VECTOR]], shufflemask(0, 1, 3) + ; CHECK-NEXT: %zextins:_(<3 x s32>) = G_ZEXT %ins(<3 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY %zextins(<3 x s32>) + %src:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %truncsrc:_(<3 x s16>) = G_TRUNC %src + %idx:_(s32) = G_CONSTANT i32 2 + %elt:_(s16) = G_CONSTANT i16 42 + %ins:_(<3 x s16>) = G_INSERT_VECTOR_ELT %truncsrc, %elt, %idx + %zextins:_(<3 x s32>) = G_ZEXT %ins + $vgpr0_vgpr1_vgpr2 = COPY %zextins +... + +--- +name: test_v2s32_idx1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_v2s32_idx1 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %elt:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %elt(s32), [[DEF]](s32) + ; CHECK-NEXT: %ins:_(<2 x s32>) = G_SHUFFLE_VECTOR %src(<2 x s32>), [[BUILD_VECTOR]], shufflemask(0, 2) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %ins(<2 x s32>) + %src:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %idx:_(s32) = G_CONSTANT i32 1 + %elt:_(s32) = G_CONSTANT i32 42 + %ins:_(<2 x s32>) = G_INSERT_VECTOR_ELT %src, %elt, %idx + $vgpr0_vgpr1 = COPY %ins +... + +--- +name: test_v4s16_idx3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_v4s16_idx3 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %elt:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %elt(s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: %ins:_(<4 x s16>) = G_SHUFFLE_VECTOR %src(<4 x s16>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 4) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %ins(<4 x s16>) + %src:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %idx:_(s32) = G_CONSTANT i32 3 + %elt:_(s16) = G_CONSTANT i16 42 + %ins:_(<4 x s16>) = G_INSERT_VECTOR_ELT %src, %elt, %idx + $vgpr0_vgpr1 = COPY %ins +...