Index: llvm/trunk/lib/CodeGen/VirtRegMap.cpp =================================================================== --- llvm/trunk/lib/CodeGen/VirtRegMap.cpp +++ llvm/trunk/lib/CodeGen/VirtRegMap.cpp @@ -406,6 +406,8 @@ return; if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) { + SmallVector MIs({&MI}); + // Only do this when the complete bundle is made out of COPYs. MachineBasicBlock &MBB = *MI.getParent(); for (MachineBasicBlock::reverse_instr_iterator I = @@ -413,16 +415,53 @@ I != E && I->isBundledWithSucc(); ++I) { if (!I->isCopy()) return; + MIs.push_back(&*I); } + MachineInstr *FirstMI = MIs.back(); + + auto anyRegsAlias = [](const MachineInstr *Dst, + ArrayRef Srcs, + const TargetRegisterInfo *TRI) { + for (const MachineInstr *Src : Srcs) + if (Src != Dst) + if (TRI->regsOverlap(Dst->getOperand(0).getReg(), + Src->getOperand(1).getReg())) + return true; + return false; + }; + + // If any of the destination registers in the bundle of copies alias any of + // the source registers, try to schedule the instructions to avoid any + // clobbering. + for (int E = MIs.size(), PrevE; E > 1; PrevE = E) { + for (int I = E; I--; ) + if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) { + if (I + 1 != E) + std::swap(MIs[I], MIs[E - 1]); + --E; + } + if (PrevE == E) { + MF->getFunction().getContext().emitError( + "register rewriting failed: cycle in copy bundle"); + break; + } + } + + MachineInstr *BundleStart = FirstMI; + for (MachineInstr *BundledMI : llvm::reverse(MIs)) { + // If instruction is in the middle of the bundle, move it before the + // bundle starts, otherwise, just unbundle it. When we get to the last + // instruction, the bundle will have been completely undone. + if (BundledMI != BundleStart) { + BundledMI->removeFromBundle(); + MBB.insert(FirstMI, BundledMI); + } else if (BundledMI->isBundledWithSucc()) { + BundledMI->unbundleFromSucc(); + BundleStart = &*std::next(BundledMI->getIterator()); + } - for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator(); - I->isBundledWithPred(); ) { - MachineInstr &MI = *I; - ++I; - - MI.unbundleFromPred(); - if (Indexes) - Indexes->insertMachineInstrInMaps(MI); + if (Indexes && BundledMI != FirstMI) + Indexes->insertMachineInstrInMaps(*BundledMI); } } } Index: llvm/trunk/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir +++ llvm/trunk/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir @@ -0,0 +1,16 @@ +# RUN: not llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o /dev/null 2>&1 | FileCheck %s + +# Check we don't infinitely loop on cycles in copy bundles. +# CHECK: error: register rewriting failed: cycle in copy bundle + +--- +name: func0 +body: | + bb.0: + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2_q3 = COPY $q0_q1_q2 { + $q2_q3_q4 = COPY $q1_q2_q3 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 +... Index: llvm/trunk/test/CodeGen/AArch64/overlapping-copy-bundle.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/overlapping-copy-bundle.mir +++ llvm/trunk/test/CodeGen/AArch64/overlapping-copy-bundle.mir @@ -0,0 +1,80 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o - | FileCheck %s +--- +name: func0 +body: | + bb.0: + ; Make sure we don't clobber q3 when we expand the bundle + ; CHECK-LABEL: name: func0 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q4 = COPY $q3 + ; CHECK: $q1_q2_q3 = COPY $q0_q1_q2 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2_q3 = COPY $q0_q1_q2 { + $q4 = COPY $q3 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func1 +body: | + bb.0: + ; If it was already ordered, make sure we don't break it + ; CHECK-LABEL: name: func1 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q4 = COPY $q3 + ; CHECK: $q1_q2_q3 = COPY $q0_q1_q2 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q4 = COPY $q3 { + $q1_q2_q3 = COPY $q0_q1_q2 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func2 +body: | + bb.0: + ; A bit less realistic, but check that we handle multiple nodes + ; CHECK-LABEL: name: func2 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q3 = COPY $q2 + ; CHECK: $q4 = COPY $q1 + ; CHECK: $q1_q2 = COPY $q0_q1 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2 = COPY $q0_q1 { + $q3 = COPY $q2 + $q4 = COPY $q1 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func3 +body: | + bb.0: + ; If there was nothing wrong, don't change the order for no reason + ; CHECK-LABEL: name: func3 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q1_q2_q3_q4 = IMPLICIT_DEF + ; CHECK: $q0_q1 = COPY $q1_q2 + ; CHECK: $q2_q3 = COPY $q3_q4 + ; CHECK: ST4i64 $q0_q1_q2_q3, 0, $x0 + $x0 = IMPLICIT_DEF + $q1_q2_q3_q4 = IMPLICIT_DEF + $q0_q1 = COPY $q1_q2 { + $q2_q3 = COPY $q3_q4 + } + ST4i64 $q0_q1_q2_q3, 0, $x0 + +...