Index: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp +++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp @@ -1900,13 +1900,8 @@ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... // Check if one of the operand defines the subreg we are interested in. for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) { - if (RegSeqInput.SubIdx == DefSubReg) { - if (RegSeqInput.SubReg) - // Bail if we have to compose sub registers. - return ValueTrackerResult(); - + if (RegSeqInput.SubIdx == DefSubReg) return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); - } } // If the subreg we are tracking is super-defined by another subreg, Index: llvm/trunk/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir +++ llvm/trunk/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass peephole-opt -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +# Check that when we jump through several subregisters in sequence of +# reg_sequence we can still find a plain src for a copy. +# In this specific test, we want %4 to read directly from %1 and +# %5 from %0. These values come from the respective chains: +# %4 -> %3.sub0 -> %2.sub1 -> %1 +# %5 -> %3.sub1 -> %2.sub0 -> %0 +# +# We used to not simplify this because we were bailing when two +# subreg indices were in the same chain (%3.subX and %2.subY) +--- +name: reg_sequence_removal +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: reg_sequence_removal + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[REG_SEQUENCE]].sub1, %subreg.sub0, [[REG_SEQUENCE]].sub0, %subreg.sub1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GCN: KILL [[COPY3]], implicit [[COPY2]] + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + %3:vreg_64 = REG_SEQUENCE %2.sub1, %subreg.sub0, %2.sub0, %subreg.sub1 + %4:vgpr_32 = COPY %3.sub0 + %5:vgpr_32 = COPY %3.sub1 + KILL implicit %4, %5 +...