diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -130,11 +130,8 @@ /// Indicate that this basic block is the entry block of a cleanup funclet. bool IsCleanupFuncletEntry = false; - /// Default target of the callbr of a basic block. - bool InlineAsmBrDefaultTarget = false; - /// List of indirect targets of the callbr of a basic block. - SmallPtrSet InlineAsmBrIndirectTargets; + SmallPtrSet InlineAsmBrIndirectTargets; /// since getSymbol is a relatively heavy-weight operation, the symbol /// is only computed once and is cached. @@ -432,16 +429,6 @@ return InlineAsmBrIndirectTargets.clear(); } - /// Returns true if this is the default dest of an INLINEASM_BR. - bool isInlineAsmBrDefaultTarget() const { - return InlineAsmBrDefaultTarget; - } - - /// Indicates if this is the default deft of an INLINEASM_BR. - void setInlineAsmBrDefaultTarget() { - InlineAsmBrDefaultTarget = true; - } - /// Returns true if it is legal to hoist instructions into this block. bool isLegalToHoistInto() const; diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1117,7 +1117,8 @@ } bool isCopy() const { - return getOpcode() == TargetOpcode::COPY; + return getOpcode() == TargetOpcode::COPY || + getOpcode() == TargetOpcode::COPY_BR; } bool isFullCopy() const { diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -72,7 +72,7 @@ /// virtual registers have been created for all the instructions, and it's /// only needed in cases where the register classes implied by the /// instructions are insufficient. It is emitted as a COPY MachineInstr. - HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) +HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic HANDLE_TARGET_OPCODE(DBG_VALUE) @@ -91,11 +91,17 @@ /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5 /// After register coalescing references of v1024 should be replace with /// v1027:3, v1025 with v1027:4, etc. - HANDLE_TARGET_OPCODE(REG_SEQUENCE) +HANDLE_TARGET_OPCODE(REG_SEQUENCE) /// COPY - Target-independent register copy. This instruction can also be /// used to copy between subregisters of virtual registers. - HANDLE_TARGET_OPCODE(COPY) +HANDLE_TARGET_OPCODE(COPY) + +/// COPY_BR - This instruction is the terminal version of COPY. The purpose is +/// to allow copies from terminals to be properly represented (e.g. an +/// INLINEASM_BR that defines a physical register) without having +/// to introduce "live-ins" for physical registers before register allocation. +HANDLE_TARGET_OPCODE(COPY_BR) /// BUNDLE - This instruction represents an instruction bundle. Instructions /// which immediately follow a BUNDLE instruction which are marked with diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1114,6 +1114,16 @@ let isAsCheapAsAMove = 1; let hasNoSchedulingInfo = 0; } +def COPY_BR : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let AsmString = ""; + let hasSideEffects = 0; + let isAsCheapAsAMove = 1; + let isTerminator = 1; + let isBranch = 1; + let isIndirectBranch = 1; +} def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -133,7 +133,6 @@ } bool ExpandPostRA::LowerCopy(MachineInstr *MI) { - if (MI->allDefsAreDead()) { LLVM_DEBUG(dbgs() << "dead copy: " << *MI); MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -188,9 +187,8 @@ bool MadeChange = false; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); - mbbi != mbbe; ++mbbi) { - for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + for (auto &MBB : MF) { + for (MachineBasicBlock::iterator mi = MBB.begin(), me = MBB.end(); mi != me;) { MachineInstr &MI = *mi; // Advance iterator here because MI may be erased. @@ -212,6 +210,7 @@ MadeChange |= LowerSubregToReg(&MI); break; case TargetOpcode::COPY: + case TargetOpcode::COPY_BR: MadeChange |= LowerCopy(&MI); break; case TargetOpcode::DBG_VALUE: diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -842,8 +842,8 @@ /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, + const TargetInstrInfo *TII, SmallVectorImpl &DbgValuesToSink) { - // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting // wrong location information. @@ -858,15 +858,15 @@ SuccToSinkTo.splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); + if (MI.getOpcode() == TargetOpcode::COPY_BR) + MI.setDesc(TII->get(TargetOpcode::COPY)); + // Sink a copy of debug users to the insert position. Mark the original // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. - for (SmallVectorImpl::iterator DBI = DbgValuesToSink.begin(), - DBE = DbgValuesToSink.end(); - DBI != DBE; ++DBI) { - MachineInstr *DbgMI = *DBI; - MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI); + for (auto *DbgMI : DbgValuesToSink) { + MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); if (!attemptDebugCopyProp(MI, *DbgMI)) @@ -891,6 +891,10 @@ if (MI.isConvergent()) return false; + // Sink COPY_BR instructions after register allocation. + if (MI.getOpcode() == TargetOpcode::COPY_BR) + return false; + // Don't break implicit null checks. This is a performance heuristic, and not // required for correctness. if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) @@ -1017,7 +1021,7 @@ if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo); - performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink); + performSink(MI, *SuccToSinkTo, InsertPos, TII, DbgUsersToSink); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. @@ -1290,6 +1294,7 @@ return false; bool Changed = false; + bool InvalidateLiveness = false; // Track which registers have been modified and used between the end of the // block and the current instruction. @@ -1380,12 +1385,16 @@ // block. clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); - performSink(*MI, *SuccBB, InsertPos, DbgValsToSink); + performSink(*MI, *SuccBB, InsertPos, TII, DbgValsToSink); updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; ++NumPostRACopySink; + InvalidateLiveness |= MI->getOpcode() == TargetOpcode::COPY_BR; } + + if (InvalidateLiveness) + MF.getRegInfo().invalidateLiveness(); return Changed; } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -858,8 +858,13 @@ if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator && !MI->isDebugEntryValue()) { - report("Non-terminator instruction after the first terminator", MI); - errs() << "First terminator was:\t" << *FirstTerminator; + if (FirstTerminator->getOpcode() == TargetOpcode::INLINEASM_BR && + TM->getOptLevel() == CodeGenOpt::None && MI->mayStore()) { + /* Ignore copies after an INLINEASM_BR. */ + } else { + report("Non-terminator instruction after the first terminator", MI); + errs() << "First terminator was:\t" << *FirstTerminator; + } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -174,7 +174,11 @@ } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + unsigned TgtOpc = TargetOpcode::COPY; + const MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); + if (Term != MBB->end() && Term->getOpcode() == TargetOpcode::INLINEASM_BR) + TgtOpc = TargetOpcode::COPY_BR; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TgtOpc), VRBase).addReg(SrcReg); } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -1030,44 +1030,33 @@ // Split after an INLINEASM_BR block with outputs. This allows us to keep the // copy to/from register instructions from being between two terminator // instructions, which causes the machine instruction verifier agita. - auto TI = llvm::find_if(*BB, [](const MachineInstr &MI){ - return MI.getOpcode() == TargetOpcode::INLINEASM_BR; + auto Iter = llvm::find_if(*BB, [](MachineInstr &MI){ + MachineBasicBlock::iterator Next(MI); + Next = std::next(Next); + return Next != MI.getParent()->end() && + (MI.getOpcode() == TargetOpcode::COPY_BR || + MI.getOpcode() == TargetOpcode::INLINEASM_BR) && + Next->getOpcode() == TargetOpcode::COPY; }); - auto SplicePt = TI != BB->end() ? std::next(TI) : BB->end(); - if (TI != BB->end() && SplicePt != BB->end() && - TI->getOpcode() == TargetOpcode::INLINEASM_BR && - SplicePt->getOpcode() == TargetOpcode::COPY) { - MachineBasicBlock *FallThrough = BB->getFallThrough(); - if (!FallThrough) + if (Iter != BB->end()) { + MachineBasicBlock *DefaultTarget = BB->getFallThrough(); + if (!DefaultTarget) for (const MachineOperand &MO : BB->back().operands()) if (MO.isMBB()) { - FallThrough = MO.getMBB(); + DefaultTarget = MO.getMBB(); break; } - assert(FallThrough && "Cannot find default dest block for callbr!"); + assert(DefaultTarget && "Cannot find default dest block for callbr!"); MachineBasicBlock *CopyBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); MachineFunction::iterator BBI(*BB); MF.insert(++BBI, CopyBB); - CopyBB->splice(CopyBB->begin(), BB, SplicePt, BB->end()); - CopyBB->setInlineAsmBrDefaultTarget(); + CopyBB->splice(CopyBB->begin(), BB, std::next(Iter), BB->end()); - CopyBB->addSuccessor(FallThrough, BranchProbability::getOne()); + CopyBB->addSuccessor(DefaultTarget, BranchProbability::getOne()); BB->addSuccessor(CopyBB, BranchProbability::getOne()); - // Mark all physical registers defined in the original block as being live - // on entry to the copy block. - for (const auto &MI : *CopyBB) - for (const MachineOperand &MO : MI.operands()) - if (MO.isReg()) { - Register reg = MO.getReg(); - if (Register::isPhysicalRegister(reg)) { - CopyBB->addLiveIn(reg); - break; - } - } - // Bit of a hack: The copy block we created here exists only because we want // the CFG to work with the current system. However, the successors to the // block with the INLINEASM_BR instruction expect values to come from *that* diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll --- a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll +++ b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0 @X = common local_unnamed_addr global i32 0, align 4 @@ -9,6 +10,13 @@ ; CHECK-LABEL: .Ltmp0: ; CHECK-LABEL: .LBB0_1: // %l_yes ; CHECK-LABEL: .LBB0_2: // %cleanup + +; CHECK-O0-LABEL: test1: +; CHECK-O0: .word b +; CHECK-O0-NEXT: .word .Ltmp1 +; CHECK-O0-LABEL: .Ltmp1: +; CHECK-O0-LABEL: .LBB0_1: // %l_yes +; CHECK-O0-LABEL: .LBB0_2: // %cleanup entry: callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test1, %l_yes)) to label %cleanup [label %l_yes] @@ -22,7 +30,8 @@ } define void @test2() { -; CHECK-LABEL: test2: +; CHECK-LABEL: test2: +; CHECK-O0-LABEL: test2: entry: %0 = load i32, i32* @X, align 4 %and = and i32 %0, 1 @@ -34,6 +43,11 @@ ; CHECK-NEXT: .word .Ltmp2 ; CHECK-LABEL: .Ltmp2: ; CHECK-NEXT: .LBB1_3: // %if.end6 + +; CHECK-O0: .word b +; CHECK-O0-NEXT: .word .Ltmp3 +; CHECK-O0-LABEL: .Ltmp3: +; CHECK-O0-NEXT: .LBB1_3: // %if.end6 callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test2, %if.end6)) to label %if.then4 [label %if.end6] @@ -50,6 +64,9 @@ if.then9: ; CHECK-LABEL: .Ltmp4: ; CHECK-NEXT: .LBB1_5: // %l_yes + +; CHECK-O0-LABEL: .Ltmp5: +; CHECK-O0-NEXT: .LBB1_6: // %l_yes callbr void asm sideeffect "", "X"(i8* blockaddress(@test2, %l_yes)) to label %if.end10 [label %l_yes] diff --git a/llvm/test/CodeGen/SystemZ/asm-20.ll b/llvm/test/CodeGen/SystemZ/asm-20.ll --- a/llvm/test/CodeGen/SystemZ/asm-20.ll +++ b/llvm/test/CodeGen/SystemZ/asm-20.ll @@ -1,6 +1,7 @@ ; Test that asm goto can be compiled. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O0 define i32 @c() { entry: diff --git a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll --- a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0 define i32 @test1(i32 %x) { ; CHECK-LABEL: test1: @@ -9,6 +10,16 @@ ; CHECK-NEXT: callq foo ; CHECK-LABEL: .Ltmp0: ; CHECK-NEXT: # %bb.2: # %baz + +; CHECK-O0-LABEL: test1: +; CHECK-O0: .quad .Ltmp0 +; CHECK-O0-NEXT: .quad .Ltmp1 +; CHECK-O0-LABEL: .Ltmp1: +; CHECK-O0-LABEL: .LBB0_2: # %bar +; CHECK-O0-NEXT: movl +; CHECK-O0-NEXT: callq foo +; CHECK-O0-LABEL: .Ltmp0: +; CHECK-O0-NEXT: # %bb.3: # %baz entry: callbr void asm sideeffect ".quad ${0:l}\0A\09.quad ${1:l}", "i,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test1, %baz), i8* blockaddress(@test1, %bar)) to label %asm.fallthrough [label %bar]