Index: llvm/trunk/include/llvm/CodeGen/GlobalISel/Legalizer.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/GlobalISel/Legalizer.h +++ llvm/trunk/include/llvm/CodeGen/GlobalISel/Legalizer.h @@ -59,7 +59,7 @@ const TargetInstrInfo &TII); bool combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII); + const TargetInstrInfo &TII, MachineIRBuilder &MIRBuilder); bool runOnMachineFunction(MachineFunction &MF) override; }; Index: llvm/trunk/lib/CodeGen/GlobalISel/Legalizer.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalISel/Legalizer.cpp +++ llvm/trunk/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -60,7 +60,7 @@ unsigned SrcReg = MI.getOperand(NumDefs).getReg(); MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg); if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE) - return Changed; + return Changed; unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2; bool AllDefsReplaced = true; @@ -115,7 +115,8 @@ } bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + MachineIRBuilder &MIRBuilder) { if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) return false; @@ -125,18 +126,62 @@ if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES) return false; - if (MergeI.getNumOperands() - 1 != NumDefs) - return false; + const unsigned NumMergeRegs = MergeI.getNumOperands() - 1; - // FIXME: is a COPY appropriate if the types mismatch? We know both registers - // are allocatable by now. - if (MRI.getType(MI.getOperand(0).getReg()) != - MRI.getType(MergeI.getOperand(1).getReg())) - return false; + if (NumMergeRegs < NumDefs) { + if (NumDefs % NumMergeRegs != 0) + return false; + + MIRBuilder.setInstr(MI); + // Transform to UNMERGEs, for example + // %1 = G_MERGE_VALUES %4, %5 + // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 + // to + // %9, %10 = G_UNMERGE_VALUES %4 + // %11, %12 = G_UNMERGE_VALUES %5 + + const unsigned NewNumDefs = NumDefs / NumMergeRegs; + for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { + SmallVector DstRegs; + for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; + ++j, ++DefIdx) + DstRegs.push_back(MI.getOperand(DefIdx).getReg()); + + MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg()); + } - for (unsigned Idx = 0; Idx < NumDefs; ++Idx) - MRI.replaceRegWith(MI.getOperand(Idx).getReg(), - MergeI.getOperand(Idx + 1).getReg()); + } else if (NumMergeRegs > NumDefs) { + if (NumMergeRegs % NumDefs != 0) + return false; + + MIRBuilder.setInstr(MI); + // Transform to MERGEs + // %6 = G_MERGE_VALUES %17, %18, %19, %20 + // %7, %8 = G_UNMERGE_VALUES %6 + // to + // %7 = G_MERGE_VALUES %17, %18 + // %8 = G_MERGE_VALUES %19, %20 + + const unsigned NumRegs = NumMergeRegs / NumDefs; + for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { + SmallVector Regs; + for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx) + Regs.push_back(MergeI.getOperand(Idx).getReg()); + + MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs); + } + + } else { + // FIXME: is a COPY appropriate if the types mismatch? We know both + // registers are allocatable by now. + if (MRI.getType(MI.getOperand(0).getReg()) != + MRI.getType(MergeI.getOperand(1).getReg())) + return false; + + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) + MRI.replaceRegWith(MI.getOperand(Idx).getReg(), + MergeI.getOperand(Idx + 1).getReg()); + } MI.eraseFromParent(); if (MRI.use_empty(MergeI.getOperand(0).getReg())) @@ -232,7 +277,7 @@ Changed = true; continue; } - Changed |= combineMerges(*MI, MRI, TII); + Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder); } } Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir @@ -216,16 +216,16 @@ # AVX1-NEXT: %3(<32 x s8>) = COPY %ymm1 # AVX1-NEXT: %4(<32 x s8>) = COPY %ymm2 # AVX1-NEXT: %5(<32 x s8>) = COPY %ymm3 -# AVX1-NEXT: %0(<64 x s8>) = G_MERGE_VALUES %2(<32 x s8>), %3(<32 x s8>) -# AVX1-NEXT: %1(<64 x s8>) = G_MERGE_VALUES %4(<32 x s8>), %5(<32 x s8>) -# AVX1-NEXT: %9(<16 x s8>), %10(<16 x s8>), %11(<16 x s8>), %12(<16 x s8>) = G_UNMERGE_VALUES %0(<64 x s8>) -# AVX1-NEXT: %13(<16 x s8>), %14(<16 x s8>), %15(<16 x s8>), %16(<16 x s8>) = G_UNMERGE_VALUES %1(<64 x s8>) +# AVX1-NEXT: %9(<16 x s8>), %10(<16 x s8>) = G_UNMERGE_VALUES %2(<32 x s8>) +# AVX1-NEXT: %11(<16 x s8>), %12(<16 x s8>) = G_UNMERGE_VALUES %3(<32 x s8>) +# AVX1-NEXT: %13(<16 x s8>), %14(<16 x s8>) = G_UNMERGE_VALUES %4(<32 x s8>) +# AVX1-NEXT: %15(<16 x s8>), %16(<16 x s8>) = G_UNMERGE_VALUES %5(<32 x s8>) # AVX1-NEXT: %17(<16 x s8>) = G_ADD %9, %13 # AVX1-NEXT: %18(<16 x s8>) = G_ADD %10, %14 # AVX1-NEXT: %19(<16 x s8>) = G_ADD %11, %15 # AVX1-NEXT: %20(<16 x s8>) = G_ADD %12, %16 -# AVX1-NEXT: %6(<64 x s8>) = G_MERGE_VALUES %17(<16 x s8>), %18(<16 x s8>), %19(<16 x s8>), %20(<16 x s8>) -# AVX1-NEXT: %7(<32 x s8>), %8(<32 x s8>) = G_UNMERGE_VALUES %6(<64 x s8>) +# AVX1-NEXT: %7(<32 x s8>) = G_MERGE_VALUES %17(<16 x s8>), %18(<16 x s8>) +# AVX1-NEXT: %8(<32 x s8>) = G_MERGE_VALUES %19(<16 x s8>), %20(<16 x s8>) # AVX1-NEXT: %ymm0 = COPY %7(<32 x s8>) # AVX1-NEXT: %ymm1 = COPY %8(<32 x s8>) # AVX1-NEXT: RET 0, implicit %ymm0, implicit %ymm1