diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1128,7 +1128,8 @@ } bool isCopy() const { - return getOpcode() == TargetOpcode::COPY; + return getOpcode() == TargetOpcode::COPY || + getOpcode() == TargetOpcode::TCOPY; } bool isFullCopy() const { diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -72,7 +72,7 @@ /// virtual registers have been created for all the instructions, and it's /// only needed in cases where the register classes implied by the /// instructions are insufficient. It is emitted as a COPY MachineInstr. - HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) +HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic HANDLE_TARGET_OPCODE(DBG_VALUE) @@ -91,11 +91,17 @@ /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5 /// After register coalescing references of v1024 should be replace with /// v1027:3, v1025 with v1027:4, etc. - HANDLE_TARGET_OPCODE(REG_SEQUENCE) +HANDLE_TARGET_OPCODE(REG_SEQUENCE) /// COPY - Target-independent register copy. This instruction can also be /// used to copy between subregisters of virtual registers. - HANDLE_TARGET_OPCODE(COPY) +HANDLE_TARGET_OPCODE(COPY) + +/// TCOPY - This instruction is the terminator version of COPY. The purpose +/// is to allow copies from terminators to be properly represented (e.g. an +/// INLINEASM_BR that defines a physical register) without having +/// to introduce "live-ins" for physical registers before register allocation. +HANDLE_TARGET_OPCODE(TCOPY) /// BUNDLE - This instruction represents an instruction bundle. Instructions /// which immediately follow a BUNDLE instruction which are marked with diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1121,6 +1121,14 @@ let isAsCheapAsAMove = 1; let hasNoSchedulingInfo = 0; } +def TCOPY : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let AsmString = ""; + let hasSideEffects = 0; + let isAsCheapAsAMove = 1; + let isTerminator = 1; +} def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp --- a/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -140,6 +140,7 @@ // are not lowered to a COPY. switch (MI.getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::PHI: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::REG_SEQUENCE: @@ -235,6 +236,7 @@ switch (MI.getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::PHI: return UsedLanes; case TargetOpcode::REG_SEQUENCE: { @@ -337,6 +339,7 @@ break; } case TargetOpcode::COPY: + case TargetOpcode::TCOPY: case TargetOpcode::PHI: break; default: diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -133,7 +133,6 @@ } bool ExpandPostRA::LowerCopy(MachineInstr *MI) { - if (MI->allDefsAreDead()) { LLVM_DEBUG(dbgs() << "dead copy: " << *MI); MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -188,13 +187,12 @@ bool MadeChange = false; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); - mbbi != mbbe; ++mbbi) { - for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); - mi != me;) { - MachineInstr &MI = *mi; + for (auto &MBB : MF) { + for (MachineBasicBlock::iterator MII = MBB.begin(), ME = MBB.end(); + MII != ME;) { + MachineInstr &MI = *MII; // Advance iterator here because MI may be erased. - ++mi; + ++MII; // Only expand pseudos. if (!MI.isPseudo()) @@ -212,6 +210,7 @@ MadeChange |= LowerSubregToReg(&MI); break; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: MadeChange |= LowerCopy(&MI); break; case TargetOpcode::DBG_VALUE: diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -309,6 +309,37 @@ if (initializeMachineFunction(YamlMF, MF)) return true; + // A block with INLINEASM_BR has extra information associated with it. The + // indirect targets need to be identified as does the default target. + DenseMap BBToMBB; + + for (auto &mbb : MF) + if (const auto *bb = mbb.getBasicBlock()) + BBToMBB[bb] = &mbb; + + for (auto &mbb : MF) { + SmallPtrSet Succs(mbb.succ_begin(), mbb.succ_end()); + + for (const auto &term : mbb.terminators()) { + if (term.getOpcode() != TargetOpcode::INLINEASM_BR) + continue; + + for (const auto &mo : term.operands()) { + if (!mo.isBlockAddress()) + continue; + + auto *succ = mo.getBlockAddress()->getBasicBlock(); + auto *msucc = BBToMBB[succ]; + + Succs.erase(msucc); + mbb.addInlineAsmBrIndirectTarget(msucc); + } + + if (Succs.size() == 1) + (*Succs.begin())->setInlineAsmBrDefaultTarget(); + } + } + return false; } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -838,8 +838,8 @@ /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, + const TargetInstrInfo *TII, SmallVectorImpl &DbgValuesToSink) { - // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting // wrong location information. @@ -854,15 +854,17 @@ SuccToSinkTo.splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); + // The copy no longer needs to be a terminator, so convert it to a normal + // COPY. + if (MI.getOpcode() == TargetOpcode::TCOPY) + MI.setDesc(TII->get(TargetOpcode::COPY)); + // Sink a copy of debug users to the insert position. Mark the original // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. - for (SmallVectorImpl::iterator DBI = DbgValuesToSink.begin(), - DBE = DbgValuesToSink.end(); - DBI != DBE; ++DBI) { - MachineInstr *DbgMI = *DBI; - MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI); + for (auto *DbgMI : DbgValuesToSink) { + MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); if (!attemptDebugCopyProp(MI, *DbgMI)) @@ -887,6 +889,11 @@ if (MI.isConvergent()) return false; + // Sink TCOPY instructions after register allocation to avoid mucking with + // live-ins. + if (MI.getOpcode() == TargetOpcode::TCOPY) + return false; + // Don't break implicit null checks. This is a performance heuristic, and not // required for correctness. if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) @@ -1013,7 +1020,7 @@ if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo); - performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink); + performSink(MI, *SuccToSinkTo, InsertPos, TII, DbgUsersToSink); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. @@ -1376,12 +1383,13 @@ // block. clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); - performSink(*MI, *SuccBB, InsertPos, DbgValsToSink); + performSink(*MI, *SuccBB, InsertPos, TII, DbgValsToSink); updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; ++NumPostRACopySink; } + return Changed; } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -613,10 +613,26 @@ } // Count the number of INLINEASM_BR indirect target successors. - SmallPtrSet IndirectTargetSuccs; + unsigned NumIndirectTargetSuccs = 0; + const MachineBasicBlock *IAB = MBB; + if (IAB->pred_size() == 1 && + llvm::any_of((*IAB->pred_begin())->terminators(), + [](const MachineInstr &MI){ + return MI.getOpcode() == TargetOpcode::INLINEASM_BR; + })) + IAB = *IAB->pred_begin(); + + for (const auto &term : IAB->terminators()) { + if (term.getOpcode() != TargetOpcode::INLINEASM_BR) + continue; + if (MBB->succ_size() != 1) + for (const auto &mo : term.operands()) + if (mo.isBlockAddress()) + ++NumIndirectTargetSuccs; + break; + } + for (const auto *succ : MBB->successors()) { - if (MBB->isInlineAsmBrIndirectTarget(succ)) - IndirectTargetSuccs.insert(succ); if (!FunctionBlocks.count(succ)) report("MBB has successor that isn't part of the function.", MBB); if (!MBBInfoMap[succ].Preds.count(MBB)) { @@ -662,14 +678,14 @@ // call or an unreachable, in which case it won't actually fall // out the bottom of the function. } else if (MBB->succ_size() == LandingPadSuccs.size() || - MBB->succ_size() == IndirectTargetSuccs.size()) { + MBB->succ_size() == NumIndirectTargetSuccs) { // It's possible that the block legitimately ends with a noreturn // call or an unreachable, in which case it won't actually fall // out of the block. } else if ((LandingPadSuccs.size() && MBB->succ_size() != 1 + LandingPadSuccs.size()) || - (IndirectTargetSuccs.size() && - MBB->succ_size() != 1 + IndirectTargetSuccs.size())) { + (NumIndirectTargetSuccs && + MBB->succ_size() != 1 + NumIndirectTargetSuccs)) { report("MBB exits via unconditional fall-through but doesn't have " "exactly one CFG successor!", MBB); } else if (!MBB->isSuccessor(&*MBBI)) { @@ -692,9 +708,8 @@ if (MBB->succ_size() != 1+LandingPadSuccs.size() && (MBB->succ_size() != 1 || LandingPadSuccs.size() != 1 || *MBB->succ_begin() != *LandingPadSuccs.begin()) && - MBB->succ_size() != 1 + IndirectTargetSuccs.size() && - (MBB->succ_size() != 1 || IndirectTargetSuccs.size() != 1 || - *MBB->succ_begin() != *IndirectTargetSuccs.begin())) { + MBB->succ_size() != 1 + NumIndirectTargetSuccs && + (MBB->succ_size() != 1 || NumIndirectTargetSuccs != 1)) { report("MBB exits via unconditional branch but doesn't have " "exactly one CFG successor!", MBB); } else if (!MBB->isSuccessor(TBB)) { @@ -816,15 +831,22 @@ lastIndex = idx; } - // Ensure non-terminators don't follow terminators. - // Ignore predicated terminators formed by if conversion. + // Ensure non-terminators don't follow terminators. Ignore predicated + // terminators formed by if conversion. + // // FIXME: If conversion shouldn't need to violate this rule. if (MI->isTerminator() && !TII->isPredicated(*MI)) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { - report("Non-terminator instruction after the first terminator", MI); - errs() << "First terminator was:\t" << *FirstTerminator; + // Ignore stack dumps after a terminator at -O0. These are most likely from + // a TCOPY. + if (TM->getOptLevel() == CodeGenOpt::None && MI->mayStore()) { + FirstTerminator = MI; + } else { + report("Non-terminator instruction after the first terminator", MI); + errs() << "First terminator was:\t" << *FirstTerminator; + } } } @@ -1503,6 +1525,19 @@ // Verify properties of various specific instruction types switch (MI->getOpcode()) { + case TargetOpcode::TCOPY: { + MachineBasicBlock::const_iterator MII(MI), MIE = MI->getParent()->end(); + for (; MII != MIE; ++MII) { + if (MII->getOpcode() != TargetOpcode::COPY) + continue; + report("TCOPY and COPY instructions are intermixed", &*MII); + errs() << "- TCOPY instruction: "; + if (Indexes && Indexes->hasIndex(*MI)) + errs() << Indexes->getInstructionIndex(*MI) << '\t'; + MI->print(errs(), /*SkipOpers=*/true); + } + LLVM_FALLTHROUGH; + } case TargetOpcode::COPY: { if (foundErrors) break; diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -1094,6 +1094,7 @@ default: return nullptr; case TargetOpcode::COPY: + case TargetOpcode::TCOPY: return new CopyRewriter(MI); case TargetOpcode::INSERT_SUBREG: return new InsertSubregRewriter(MI); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -175,8 +175,16 @@ } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), - VRBase).addReg(SrcReg); + const MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); + // FIXME: The predicate to determine whether an instruction is a COPY or + // TCOPY should be generic. At this time though the criteria isn't + // well-known except for INLINEASM_BR instructions. + unsigned TgtOpc = + Term != MBB->end() && Term->getOpcode() == TargetOpcode::INLINEASM_BR + ? TargetOpcode::TCOPY + : TargetOpcode::COPY; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TgtOpc), VRBase) + .addReg(SrcReg); } SDValue Op(Node, ResNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -1031,44 +1031,46 @@ // Split after an INLINEASM_BR block with outputs. This allows us to keep the // copy to/from register instructions from being between two terminator // instructions, which causes the machine instruction verifier agita. - auto TI = llvm::find_if(*BB, [](const MachineInstr &MI){ - return MI.getOpcode() == TargetOpcode::INLINEASM_BR; + auto Iter = llvm::find_if(*BB, [](MachineInstr &MI) { + MachineBasicBlock::iterator Next(MI); + Next = std::next(Next); + return Next != MI.getParent()->end() && + (MI.getOpcode() == TargetOpcode::TCOPY || + MI.getOpcode() == TargetOpcode::INLINEASM_BR) && + Next->getOpcode() == TargetOpcode::COPY; }); - auto SplicePt = TI != BB->end() ? std::next(TI) : BB->end(); - if (TI != BB->end() && SplicePt != BB->end() && - TI->getOpcode() == TargetOpcode::INLINEASM_BR && - SplicePt->getOpcode() == TargetOpcode::COPY) { - MachineBasicBlock *FallThrough = BB->getFallThrough(); - if (!FallThrough) + if (Iter != BB->end()) { + MachineBasicBlock *DefaultTarget = BB->getFallThrough(); + if (!DefaultTarget) for (const MachineOperand &MO : BB->back().operands()) if (MO.isMBB()) { - FallThrough = MO.getMBB(); + DefaultTarget = MO.getMBB(); break; } - assert(FallThrough && "Cannot find default dest block for callbr!"); + assert(DefaultTarget && "Cannot find default dest block for callbr!"); MachineBasicBlock *CopyBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); MachineFunction::iterator BBI(*BB); MF.insert(++BBI, CopyBB); - CopyBB->splice(CopyBB->begin(), BB, SplicePt, BB->end()); + CopyBB->splice(CopyBB->begin(), BB, std::next(Iter), BB->end()); CopyBB->setInlineAsmBrDefaultTarget(); - CopyBB->addSuccessor(FallThrough, BranchProbability::getOne()); - BB->removeSuccessor(FallThrough); + CopyBB->addSuccessor(DefaultTarget, BranchProbability::getOne()); BB->addSuccessor(CopyBB, BranchProbability::getOne()); - // Mark all physical registers defined in the original block as being live - // on entry to the copy block. - for (const auto &MI : *CopyBB) - for (const MachineOperand &MO : MI.operands()) - if (MO.isReg()) { - Register reg = MO.getReg(); - if (Register::isPhysicalRegister(reg)) { - CopyBB->addLiveIn(reg); - break; - } - } + // Bit of a hack: The copy block we created here exists only because we want + // the CFG to work with the current system. However, the successors to the + // block with the INLINEASM_BR instruction expect values to come from *that* + // block, not this usurper block. Thus we steal its successors and add them + // to the copy so that everyone is happy. + for (auto *Succ : BB->successors()) + if (Succ != CopyBB && !CopyBB->isSuccessor(Succ)) + CopyBB->addSuccessor(Succ, BranchProbability::getZero()); + + for (auto *Succ : CopyBB->successors()) + if (BB->isSuccessor(Succ)) + BB->removeSuccessor(Succ); CopyBB->normalizeSuccProbs(); BB->normalizeSuccProbs(); diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll --- a/llvm/test/CodeGen/AArch64/callbr-asm-label.ll +++ b/llvm/test/CodeGen/AArch64/callbr-asm-label.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0 @X = common local_unnamed_addr global i32 0, align 4 @@ -9,6 +10,13 @@ ; CHECK-LABEL: .LBB0_1: // %cleanup ; CHECK-LABEL: .Ltmp0: ; CHECK-LABEL: .LBB0_2: // %indirect + +; CHECK-O0-LABEL: test1: +; CHECK-O0: .word b +; CHECK-O0-NEXT: .word .Ltmp1 +; CHECK-O0-LABEL: .Ltmp1: +; CHECK-O0-LABEL: .LBB0_1: // %indirect +; CHECK-O0-LABEL: .LBB0_2: // %cleanup entry: callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test1, %indirect)) to label %cleanup [label %indirect] @@ -22,7 +30,8 @@ } define void @test2() { -; CHECK-LABEL: test2: +; CHECK-LABEL: test2: +; CHECK-O0-LABEL: test2: entry: %0 = load i32, i32* @X, align 4 %and = and i32 %0, 1 @@ -34,6 +43,11 @@ ; CHECK-NEXT: .word .Ltmp2 ; CHECK-LABEL: .Ltmp2: ; CHECK-NEXT: .LBB1_3: // %if.end6 + +; CHECK-O0: .word b +; CHECK-O0-NEXT: .word .Ltmp3 +; CHECK-O0-LABEL: .Ltmp3: +; CHECK-O0-NEXT: .LBB1_3: // %if.end6 callbr void asm sideeffect "1:\0A\09.word b, ${0:l}\0A\09", "X"(i8* blockaddress(@test2, %if.end6)) to label %if.then4 [label %if.end6] @@ -50,6 +64,9 @@ if.then9: ; CHECK-LABEL: .Ltmp4: ; CHECK-NEXT: .LBB1_5: // %l_yes + +; CHECK-O0-LABEL: .Ltmp5: +; CHECK-O0-NEXT: .LBB1_6: // %l_yes callbr void asm sideeffect "", "X"(i8* blockaddress(@test2, %l_yes)) to label %if.end10 [label %l_yes] diff --git a/llvm/test/CodeGen/SystemZ/asm-20.ll b/llvm/test/CodeGen/SystemZ/asm-20.ll --- a/llvm/test/CodeGen/SystemZ/asm-20.ll +++ b/llvm/test/CodeGen/SystemZ/asm-20.ll @@ -1,6 +1,7 @@ ; Test that asm goto can be compiled. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O0 define i32 @c() { entry: diff --git a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll --- a/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-label-addr.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O0 | FileCheck %s --check-prefix=CHECK-O0 define i32 @test1(i32 %x) { ; CHECK-LABEL: test1: @@ -9,6 +10,16 @@ ; CHECK-NEXT: callq foo ; CHECK-LABEL: .Ltmp0: ; CHECK-NEXT: # %bb.2: # %baz + +; CHECK-O0-LABEL: test1: +; CHECK-O0: .quad .Ltmp0 +; CHECK-O0-NEXT: .quad .Ltmp1 +; CHECK-O0-LABEL: .Ltmp1: +; CHECK-O0-LABEL: .LBB0_2: # %bar +; CHECK-O0-NEXT: movl +; CHECK-O0-NEXT: callq foo +; CHECK-O0-LABEL: .Ltmp0: +; CHECK-O0-NEXT: # %bb.3: # %baz entry: callbr void asm sideeffect ".quad ${0:l}\0A\09.quad ${1:l}", "i,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test1, %baz), i8* blockaddress(@test1, %bar)) to label %asm.fallthrough [label %bar] diff --git a/llvm/test/CodeGen/X86/peephole-tcopy.mir b/llvm/test/CodeGen/X86/peephole-tcopy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/peephole-tcopy.mir @@ -0,0 +1,283 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + %struct.anon = type { i32, i32 } + + @x = dso_local local_unnamed_addr global %struct.anon zeroinitializer, align 4 + + ; Function Attrs: nounwind uwtable + define dso_local i32 @test1(i32 %out1) #0 { + entry: + %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @x, i64 0, i32 0), align 4, !tbaa !2 + %1 = callbr i32 asm sideeffect "testl $0, $0; testl $1, $1; jne ${2:l}", "={si},r,X,X,0,~{dirflag},~{fpsr},~{flags}"(i32 %out1, i8* blockaddress(@test1, %label_true), i8* blockaddress(@test1, %landing), i32 %0) #2 + to label %asm.fallthrough [label %label_true, label %landing], !srcloc !7 + + asm.fallthrough: ; preds = %entry + br label %landing + + landing: ; preds = %label_true, %asm.fallthrough, %entry + %out2.0 = phi i32 [ %1, %asm.fallthrough ], [ %0, %entry ], [ %0, %label_true ] + %add = add nsw i32 %out2.0, %out1 + br label %cleanup + + label_true: ; preds = %label_true, %entry + %2 = callbr i32 asm sideeffect "testl $0, $0; testl $1, $1; jne ${2:l}", "={si},r,X,X,0,~{dirflag},~{fpsr},~{flags}"(i32 %out1, i8* blockaddress(@test1, %label_true), i8* blockaddress(@test1, %landing), i32 %0) #2 + to label %cleanup [label %label_true, label %landing], !srcloc !8 + + cleanup: ; preds = %label_true, %landing + %retval.0 = phi i32 [ %add, %landing ], [ -2, %label_true ] + ret i32 %retval.0 + } + + ; Function Attrs: nounwind uwtable + define dso_local i32 @test2(i32 %out1) #0 { + entry: + %0 = callbr i32 asm sideeffect "", "=r,r,X,~{dirflag},~{fpsr},~{flags}"(i32 %out1, i8* blockaddress(@test2, %exit)) #2 + to label %asm.fallthrough [label %exit], !srcloc !9 + + asm.fallthrough: ; preds = %entry + %cmp = icmp eq i32 %0, 37 + br i1 %cmp, label %exit, label %return + + exit: ; preds = %exit, %asm.fallthrough, %entry + %out1.addr.0 = phi i32 [ 37, %asm.fallthrough ], [ 42, %exit ], [ %out1, %entry ] + %1 = tail call i32 asm sideeffect "", "={si},r,0,~{dirflag},~{fpsr},~{flags}"(i32 %out1.addr.0, i32 %out1.addr.0) #2, !srcloc !10 + %cmp1 = icmp eq i32 %1, 42 + br i1 %cmp1, label %exit, label %if.end3 + + if.end3: ; preds = %exit + %call = tail call i32 @g(i32 %1) #2 + ret i32 %call + + return: ; preds = %asm.fallthrough + ret i32 0 + } + + declare dso_local i32 @g(i32) local_unnamed_addr #1 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #2 = { nounwind } + + !llvm.module.flags = !{!0} + !llvm.ident = !{!1} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 8f86db59685eb90131cd355f434a31b4a2880590)"} + !2 = !{!3, !4, i64 0} + !3 = !{!"", !4, i64 0, !4, i64 4} + !4 = !{!"int", !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C/C++ TBAA"} + !7 = !{i32 98} + !8 = !{i32 249} + !9 = !{i32 409} + !10 = !{i32 514} + +... +--- +name: test1 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } + - { id: 4, class: gr32 } + - { id: 5, class: gr32 } + - { id: 6, class: gr32 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr32 } + - { id: 11, class: gr32 } + - { id: 12, class: gr32 } +liveins: + - { reg: '$edi', virtual-reg: '%5' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test1 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $edi + ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $rip, 1, $noreg, @x, $noreg :: (dereferenceable load 4 from `i32* getelementptr inbounds (%struct.anon, %struct.anon* @x, i64 0, i32 0)`, !tbaa !2) + ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY [[MOV32rm]] + ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY [[COPY]] + ; CHECK: INLINEASM_BR &"testl $0, $0; testl $1, $1; jne ${2:l}", 1, 10, implicit-def $esi, 2228233, [[COPY2]], 13, blockaddress(@test1, %ir-block.label_true), 13, blockaddress(@test1, %ir-block.landing), 2147483657, [[MOV32rm]](tied-def 3), 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags, !7 + ; CHECK: [[TCOPY:%[0-9]+]]:gr32 = TCOPY $esi + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000000), %bb.3(0x00000000) + ; CHECK: JMP_1 %bb.2 + ; CHECK: bb.2.asm.fallthrough: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[COPY3:%[0-9]+]]:gr32 = COPY [[TCOPY]] + ; CHECK: bb.3.landing (address-taken): + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: [[PHI:%[0-9]+]]:gr32 = PHI [[COPY1]], %bb.1, [[COPY1]], %bb.4, [[COPY3]], %bb.2 + ; CHECK: [[ADD32rr:%[0-9]+]]:gr32 = nsw ADD32rr [[PHI]], [[COPY]], implicit-def dead $eflags + ; CHECK: JMP_1 %bb.5 + ; CHECK: bb.4.label_true (address-taken): + ; CHECK: successors: %bb.5(0x80000000), %bb.4(0x00000000), %bb.3(0x00000000) + ; CHECK: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri -2 + ; CHECK: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY]] + ; CHECK: [[COPY5:%[0-9]+]]:gr32 = COPY [[COPY1]] + ; CHECK: INLINEASM_BR &"testl $0, $0; testl $1, $1; jne ${2:l}", 1, 10, implicit-def $esi, 2228233, [[COPY4]], 13, blockaddress(@test1, %ir-block.label_true), 13, blockaddress(@test1, %ir-block.landing), 2147483657, [[COPY5]](tied-def 3), 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags, !8 + ; CHECK: [[TCOPY1:%[0-9]+]]:gr32 = TCOPY $esi + ; CHECK: JMP_1 %bb.5 + ; CHECK: bb.5.cleanup: + ; CHECK: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32ri]], %bb.4, [[ADD32rr]], %bb.3 + ; CHECK: $eax = COPY [[PHI1]] + ; CHECK: RET 0, $eax + bb.0.entry: + liveins: $edi + + %5:gr32 = COPY $edi + %7:gr32 = MOV32rm $rip, 1, $noreg, @x, $noreg :: (dereferenceable load 4 from `i32* getelementptr inbounds (%struct.anon, %struct.anon* @x, i64 0, i32 0)`, !tbaa !2) + %0:gr32 = COPY %7 + %6:gr32 = COPY %5 + INLINEASM_BR &"testl $0, $0; testl $1, $1; jne ${2:l}", 1, 10, implicit-def $esi, 2228233, %6, 13, blockaddress(@test1, %ir-block.label_true), 13, blockaddress(@test1, %ir-block.landing), 2147483657, %7(tied-def 3), 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags, !7 + %8:gr32 = TCOPY $esi + + bb.5.entry: + successors: %bb.1(0x80000000), %bb.3(0x00000000), %bb.2(0x00000000) + + JMP_1 %bb.1 + + bb.1.asm.fallthrough: + %1:gr32 = COPY %8 + + bb.2.landing (address-taken): + %2:gr32 = PHI %0, %bb.5, %0, %bb.3, %1, %bb.1 + %3:gr32 = nsw ADD32rr %2, %5, implicit-def dead $eflags + JMP_1 %bb.4 + + bb.3.label_true (address-taken): + successors: %bb.4(0x80000000), %bb.3(0x00000000), %bb.2(0x00000000) + + %9:gr32 = MOV32ri -2 + %10:gr32 = COPY %5 + %11:gr32 = COPY %0 + INLINEASM_BR &"testl $0, $0; testl $1, $1; jne ${2:l}", 1, 10, implicit-def $esi, 2228233, %10, 13, blockaddress(@test1, %ir-block.label_true), 13, blockaddress(@test1, %ir-block.landing), 2147483657, %11(tied-def 3), 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags, !8 + %12:gr32 = TCOPY $esi + JMP_1 %bb.4 + + bb.4.cleanup: + %4:gr32 = PHI %9, %bb.3, %3, %bb.2 + $eax = COPY %4 + RET 0, $eax + +... +--- +name: test2 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } + - { id: 4, class: gr32 } + - { id: 5, class: gr32 } + - { id: 6, class: gr32 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr32 } + - { id: 11, class: gr32 } + - { id: 12, class: gr32 } + - { id: 13, class: gr32 } +liveins: + - { reg: '$edi', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test2 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $edi + ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY [[COPY]] + ; CHECK: INLINEASM_BR &"", 1, 2228234, def %4, 2228233, [[COPY1]], 13, blockaddress(@test2, %ir-block.exit), 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags, !9 + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x80000000), %bb.3(0x00000000) + ; CHECK: JMP_1 %bb.2 + ; CHECK: bb.2.asm.fallthrough: + ; CHECK: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 37 + ; CHECK: CMP32ri8 %4, 37, implicit-def $eflags + ; CHECK: JCC_1 %bb.5, 5, implicit $eflags + ; CHECK: JMP_1 %bb.3 + ; CHECK: bb.3.exit (address-taken): + ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) + ; CHECK: [[PHI:%[0-9]+]]:gr32 = PHI [[COPY]], %bb.1, [[MOV32ri]], %bb.2, %11, %bb.3 + ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY [[PHI]] + ; CHECK: INLINEASM &"", 1, 10, implicit-def $esi, 2228233, [[COPY2]], 2147483657, [[COPY2]](tied-def 3), 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags, !10 + ; CHECK: [[COPY3:%[0-9]+]]:gr32 = COPY $esi + ; CHECK: [[MOV32ri1:%[0-9]+]]:gr32 = MOV32ri 42 + ; CHECK: CMP32ri8 [[COPY3]], 42, implicit-def $eflags + ; CHECK: JCC_1 %bb.3, 4, implicit $eflags + ; CHECK: JMP_1 %bb.4 + ; CHECK: bb.4.if.end3: + ; CHECK: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY3]] + ; CHECK: $edi = COPY [[COPY4]] + ; CHECK: TCRETURNdi64 @g, 0, csr_64, implicit $rsp, implicit $ssp, implicit $edi + ; CHECK: bb.5.return: + ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; CHECK: $eax = COPY [[MOV32r0_]] + ; CHECK: RET 0, $eax + bb.0.entry: + liveins: $edi + + %3:gr32 = COPY $edi + %5:gr32 = COPY %3 + INLINEASM_BR &"", 1, 2228234, def %4, 2228233, %5, 13, blockaddress(@test2, %ir-block.exit), 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags, !9 + + bb.5.entry: + successors: %bb.1(0x80000000), %bb.2(0x00000000) + + JMP_1 %bb.1 + + bb.1.asm.fallthrough: + successors: %bb.2, %bb.4 + + %6:gr32 = MOV32ri 37 + %7:gr32 = SUB32ri8 %4, 37, implicit-def $eflags + JCC_1 %bb.4, 5, implicit $eflags + JMP_1 %bb.2 + + bb.2.exit (address-taken): + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + + %1:gr32 = PHI %3, %bb.5, %6, %bb.1, %11, %bb.2 + %9:gr32 = COPY %1 + %10:gr32 = COPY %1 + INLINEASM &"", 1, 10, implicit-def $esi, 2228233, %9, 2147483657, %10(tied-def 3), 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags, !10 + %12:gr32 = COPY $esi + %11:gr32 = MOV32ri 42 + %13:gr32 = SUB32ri8 %12, 42, implicit-def $eflags + JCC_1 %bb.2, 4, implicit $eflags + JMP_1 %bb.3 + + bb.3.if.end3: + %2:gr32 = COPY %12 + $edi = COPY %2 + TCRETURNdi64 @g, 0, csr_64, implicit $rsp, implicit $ssp, implicit $edi + + bb.4.return: + %8:gr32 = MOV32r0 implicit-def dead $eflags + $eax = COPY %8 + RET 0, $eax + +...