diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -243,6 +243,14 @@ bool applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal); bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount); + /// Transform G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z. + bool + matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, + SmallVectorImpl &Operands); + bool + applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, + SmallVectorImpl &Operands); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -381,6 +381,15 @@ (apply [{ return Helper.applyNotCmp(*${d}, ${info}); }]) >; +// Fold (unmerge(merge x, y, z)) -> z, y, z. +def unmerge_merge_matchinfo : GIDefMatchData<"SmallVector">; +def unmerge_merge : GICombineRule< + (defs root:$d, unmerge_merge_matchinfo:$info), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -409,4 +418,4 @@ shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, - not_cmp_fold]>; + not_cmp_fold, unmerge_merge]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1556,6 +1556,65 @@ return true; } +static Register peekThroughBitcast(Register Reg, + const MachineRegisterInfo &MRI) { + while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg)))) + ; + + return Reg; +} + +bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + Register SrcReg = + peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI); + + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES && + SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR && + SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS) + return false; + + // Check the source type of the merge. + LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg()); + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits(); + if (SrcMergeTy != Dst0Ty && !SameSize) + return false; + // They are the same now (modulo a bitcast). + // We can collect all the src registers. + for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx; + ++Idx) + Operands.push_back(SrcInstr->getOperand(Idx).getReg()); + return true; +} + +bool CombinerHelper::applyCombineUnmergeMergeToPlainValues( + MachineInstr &MI, SmallVectorImpl &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Operands.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + + LLT SrcTy = MRI.getType(Operands[0]); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + bool CanReuseInputDirectly = DstTy == SrcTy; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Register SrcReg = Operands[Idx]; + if (CanReuseInputDirectly) + replaceRegWith(MRI, DstReg, SrcReg); + else + Builder.buildCast(DstReg, SrcReg); + } + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -0,0 +1,183 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s + +# Simple unmerge(merge) case with two operands. +# The sources of the merge can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_merge +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_merge + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(s64) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) +... + +# Simple unmerge(merge) case with three operands. +# The sources of the merge can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_merge_3ops +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_merge_3ops + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + ; CHECK: $w2 = COPY [[DEF2]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %5:_(s32) = G_IMPLICIT_DEF + %2:_(s96) = G_MERGE_VALUES %0(s32), %1(s32), %5(s32) + %3:_(s32), %4:_(s32), %6:_(s32) = G_UNMERGE_VALUES %2(s96) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) + $w2 = COPY %6(s32) +... + +# Simple unmerge(buildvector) case with two operands. +# The sources of the buildvector can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_build_vector +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_build_vector + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %2:_(<2 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32) + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(<2 x s32>) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) +... + +# Simple unmerge(buildvector) case with three operands. +# The sources of the buildvector can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_buildvector_3ops +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_buildvector_3ops + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + ; CHECK: $w2 = COPY [[DEF2]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %5:_(s32) = G_IMPLICIT_DEF + %2:_(<3 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %5(s32) + %3:_(s32), %4:_(s32), %6:_(s32) = G_UNMERGE_VALUES %2(<3 x s32>) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) + $w2 = COPY %6(s32) +... + +# Simple unmerge(concatvectors) case. +# The sources of the concatvectors can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_concat_vectors +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_concat_vectors + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1 + ; CHECK: $w0 = COPY [[COPY]](<2 x s16>) + ; CHECK: $w1 = COPY [[COPY1]](<2 x s16>) + %0:_(<2 x s16>) = COPY $w0 + %1:_(<2 x s16>) = COPY $w1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>) + %3:_(<2 x s16>), %4:_(<2 x s16>) = G_UNMERGE_VALUES %2(<4 x s16>) + $w0 = COPY %3(<2 x s16>) + $w1 = COPY %4(<2 x s16>) +... + +# Unmerge(merge) case with two operands and a bitcast in the middle. +# The sources of the merge can be used in place of +# the destinations of the unmerge. +--- +name: test_combine_unmerge_bitcast_merge +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_bitcast_merge + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: $w0 = COPY [[DEF]](s32) + ; CHECK: $w1 = COPY [[DEF1]](s32) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5:_(<2 x s32>) = G_BITCAST %2(s64) + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %5(<2 x s32>) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) +... + +# Unmerge(merge) with incompatible types: unmerge destTy != merge inputTy. +# The sources of the merge cannot be used in place of +# the destinations of the unmerge, since the types don't match. +--- +name: test_combine_unmerge_merge_incompatible_types +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK: $h0 = COPY [[UV]](s16) + ; CHECK: $h1 = COPY [[UV1]](s16) + ; CHECK: $h2 = COPY [[UV2]](s16) + ; CHECK: $h3 = COPY [[UV3]](s16) + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %2(s64) + $h0 = COPY %3(s16) + $h1 = COPY %4(s16) + $h2 = COPY %5(s16) + $h3 = COPY %6(s16) +... + +# Unmerge(concatvectors) with incompatible types: unmerge destTy != merge inputTy +# but destTy.size() == inputTy.size(). +# The sources of the concatvectors can be used in place of +# the destinations of the unmerge with a bitcast since the sizes +# match. +--- +name: test_combine_unmerge_merge_incompatible_types_but_same_size +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types_but_same_size + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: $w0 = COPY [[BITCAST]](s32) + ; CHECK: $w1 = COPY [[BITCAST1]](s32) + %0:_(<2 x s16>) = COPY $w0 + %1:_(<2 x s16>) = COPY $w1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>) + %5:_(s64) = G_BITCAST %2(<4 x s16>) + %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %5(s64) + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) +... +