diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -26737,6 +26737,42 @@ If the result value does not fit in the result type, then the result is a :ref:`poison value `. +.. _llvm_fake_use: + +'``llvm.fake.use``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.fake.use(...) + +Overview: +""""""""" + +The ``llvm.fake.use`` intrinsic is a no-op. It takes a single +value as an operand and is treated as a use of that operand, to force the +optimizer to preserve that value prior to the fake use. This is used for +extending the lifetimes of variables, where this intrinsic placed at the end of +a variable's scope helps prevent that variable from being optimized out. + +Arguments: +"""""""""" + +The ``llvm.fake.use`` intrinsic takes one argument, which may be any +function-local SSA value. Note that the signature is variadic so that the +intrinsic can take any type of argument, but passing more than one argument will +result in an error. + +Semantics: +"""""""""" + +This intrinsic does nothing, but optimizers must consider it a use of its single +operand and should try to preserve the intrinsic and its position in the +function. + Stack Map Intrinsics -------------------- diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h --- a/llvm/include/llvm/Analysis/PtrUseVisitor.h +++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -279,6 +279,7 @@ default: return Base::visitIntrinsicInst(II); + case Intrinsic::fake_use: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return; // No-op intrinsics. diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1274,6 +1274,11 @@ LIFETIME_START, LIFETIME_END, + /// FAKE_USE represents a use of the operand but does not do anything. + /// Its purpose is the extension of the operand's lifetime mainly for + /// debugging purposes. + FAKE_USE, + /// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the /// beginning and end of GC transition sequence, and carry arbitrary /// information that target might need for lowering. The first operand is diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1405,6 +1405,8 @@ return getOpcode() == TargetOpcode::EXTRACT_SUBREG; } + bool isFakeUse() const { return getOpcode() == TargetOpcode::FAKE_USE; } + /// Return true if the instruction behaves like a copy. /// This does not include native copy instructions. bool isCopyLike() const { diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -316,6 +316,7 @@ void Select_READ_REGISTER(SDNode *Op); void Select_WRITE_REGISTER(SDNode *Op); void Select_UNDEF(SDNode *N); + void Select_FAKE_USE(SDNode *N); void CannotYetSelect(SDNode *N); void Select_FREEZE(SDNode *N); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1729,6 +1729,9 @@ [IntrNoMem, IntrWillReturn, IntrConvergent], "llvm.is.constant">; +// Introduce a use of the argument without generating any code. +def int_fake_use : Intrinsic<[], [llvm_vararg_ty]>; + // Intrinsic to mask out bits of a pointer. def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -222,6 +222,9 @@ HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL) +/// Represents a use of the operand but generates no code. +HANDLE_TARGET_OPCODE(FAKE_USE) + // This is a fence with the singlethread scope. It represents a compiler memory // barrier, but does not correspond to any generated instruction. HANDLE_TARGET_OPCODE(MEMBARRIER) diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1326,6 +1326,14 @@ let isTerminator = true; let isBranch = true; } +def FAKE_USE : StandardPseudoInstruction { + // An instruction that uses its operands but does nothing. + let OutOperandList = (outs); + let InOperandList = (ins variable_ops); + let AsmString = "FAKE_USE"; + let hasSideEffects = 0; + let isMeta = true; +} def PATCHABLE_OP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -605,7 +605,8 @@ if (const IntrinsicInst *II = dyn_cast(BBI)) if (II->getIntrinsicID() == Intrinsic::lifetime_end || II->getIntrinsicID() == Intrinsic::assume || - II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) + II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl || + II->getIntrinsicID() == Intrinsic::fake_use) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !isSafeToSpeculativelyExecute(&*BBI)) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1068,11 +1068,46 @@ } } +// Recognize cases where a spilled register is reloaded solely to feed into a +// FAKE_USE. +static bool isLoadFeedingIntoFakeUse(const MachineInstr &MI) { + const MachineFunction *MF = MI.getMF(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + // If the restore size is std::nullopt then we are not dealing with a reload + // of a spilled register. + if (!MI.getRestoreSize(TII)) + return false; + + // Check if this register is the operand of a FAKE_USE and + // does it have the kill flag set there. + auto NextI = std::next(MI.getIterator()); + if (NextI == MI.getParent()->end() || !NextI->isFakeUse()) + return false; + + unsigned Reg = MI.getOperand(0).getReg(); + for (const MachineOperand &MO : NextI->operands()) { + // Return true if we came across the register from the + // previous spill instruction that is killed in NextI. + if (MO.isReg() && MO.isUse() && MO.isKill() && MO.getReg() == Reg) + return true; + } + + return false; +} + /// emitComments - Pretty-print comments for instructions. static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + // If this is a reload of a spilled register that only feeds into a FAKE_USE + // instruction, meaning the load value has no effect on the program and has + // only been kept alive for debugging; since it is still available on the + // stack, we can skip the load itself. + if (isLoadFeedingIntoFakeUse(MI)) + return; + // Check for spills and reloads // We assume a single instruction only has a spill or reload, not @@ -1714,6 +1749,8 @@ case TargetOpcode::KILL: if (isVerbose()) emitKill(&MI, *this); break; + case TargetOpcode::FAKE_USE: + break; case TargetOpcode::PSEUDO_PROBE: emitPseudoProbe(MI); break; @@ -1725,6 +1762,12 @@ OutStreamer->emitRawComment("MEMBARRIER"); break; default: + // If this is a reload of a spilled register that only feeds into a + // FAKE_USE instruction, meaning the load value has no effect on the + // program and has only been kept alive for debugging; since it is + // still available on the stack, we can skip the load itself. + if (isLoadFeedingIntoFakeUse(MI)) + break; emitInstruction(&MI); if (CanDoExtraAnalysis) { MCInst MCI; diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2529,12 +2529,34 @@ return false; }; + SmallVector FakeUses; + + auto isFakeUse = [&FakeUses](const Instruction *Inst) { + if (auto *II = dyn_cast(Inst); + II && II->getIntrinsicID() == Intrinsic::fake_use) { + // Record the instruction so it can be preserved when the exit block is + // removed. Do not preserve the fake use that uses the result of the + // PHI instruction. + // Do not copy fake uses that use the result of a PHI node. + // FIXME: If we do want to copy the fake use into the return blocks, we + // have to figure out which of the PHI node operands to use for each + // copy. + if (!isa(II->getOperand(0))) { + FakeUses.push_back(II); + } + return true; + } + + return false; + }; + // Make sure there are no instructions between the first instruction // and return. const Instruction *BI = BB->getFirstNonPHI(); // Skip over debug and the bitcast. while (isa(BI) || BI == BCI || BI == EVI || - isa(BI) || isLifetimeEndOrBitCastFor(BI)) + isa(BI) || isLifetimeEndOrBitCastFor(BI) || + isFakeUse(BI)) BI = BI->getNextNode(); if (BI != RetI) return false; @@ -2543,6 +2565,7 @@ /// call. const Function *F = BB->getParent(); SmallVector TailCallBBs; + SmallVector CallInsts; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { // Look through bitcasts. @@ -2554,6 +2577,9 @@ TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) TailCallBBs.push_back(PredBB); + // Record the call instruction so we can insert any fake uses + // that need to be preserved before it. + CallInsts.push_back(CI); } } else { SmallPtrSet VisitedBBs; @@ -2563,8 +2589,12 @@ if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) { CallInst *CI = dyn_cast(I); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) + attributesPermitTailCall(F, CI, RetI, *TLI)) { TailCallBBs.push_back(Pred); + // Record the call instruction so we can insert any fake uses + // that need to be preserved before it. + CallInsts.push_back(CI); + } } } } @@ -2590,8 +2620,17 @@ } // If we eliminated all predecessors of the block, delete the block now. - if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) + if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) { + // Copy the fake uses found in the original return block to all blocks + // that contain tail calls. + for (auto *CI : CallInsts) { + for (auto const *FakeUse : FakeUses) { + auto *ClonedInst = FakeUse->clone(); + ClonedInst->insertBefore(CI); + } + } BB->eraseFromParent(); + } return Changed; } diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -66,7 +66,8 @@ return false; // Don't delete frame allocation labels. - if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE) + if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE || + MI->getOpcode() == TargetOpcode::FAKE_USE) return false; // Don't delete instructions with side effects. diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -403,7 +403,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || - MI->isInlineAsm() || MI->isDebugInstr()) + MI->isInlineAsm() || MI->isDebugInstr() || MI->isFakeUse()) return false; // Ignore copies. diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -494,7 +494,8 @@ MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII) { - return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); + return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) || + MI->isFakeUse(); } /// A region of an MBB for scheduling. diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1380,6 +1380,9 @@ updateValueMap(II, ResultReg); return true; } + case Intrinsic::fake_use: + // At -O0, we don't need fake use, so just ignore it. + return true; case Intrinsic::experimental_stackmap: return selectStackmap(II); case Intrinsic::experimental_patchpoint_void: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2189,6 +2189,9 @@ report_fatal_error("Do not know how to promote this operator's operand!"); case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; + case ISD::FAKE_USE: + R = PromoteFloatOp_FAKE_USE(N, OpNo); + break; case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; @@ -2222,6 +2225,13 @@ return DAG.getBitcast(N->getValueType(0), Convert); } +SDValue DAGTypeLegalizer::PromoteFloatOp_FAKE_USE(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op = GetPromotedFloat(N->getOperand(OpNo)); + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, N->getOperand(0), + Op); +} + // Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by // PromoteFloatRes_FCOPYSIGN. SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { @@ -3019,6 +3029,9 @@ "operand!"); case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break; + case ISD::FAKE_USE: + Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo); + break; case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; @@ -3056,6 +3069,13 @@ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op = GetSoftPromotedHalf(N->getOperand(OpNo)); + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::Other, N->getOperand(0), + Op); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Only Operand 1 must need promotion here"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1665,6 +1665,9 @@ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::FAKE_USE: + Res = PromoteIntOp_FAKE_USE(N); + break; case ISD::INSERT_VECTOR_ELT: Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo); break; @@ -5007,6 +5010,9 @@ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::FAKE_USE: + Res = ExpandOp_FAKE_USE(N); + break; case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break; @@ -5846,6 +5852,19 @@ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); } +// FIXME: We wouldn't need this if clang could promote short integers +// that are arguments to FAKE_USE. +SDValue DAGTypeLegalizer::PromoteIntOp_FAKE_USE(SDNode *N) { + SDLoc dl(N); + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + EVT InVT1 = V1.getValueType(); + SDValue VPromoted = + DAG.getNode(ISD::ANY_EXTEND, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), InVT1), V1); + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), V0, VPromoted); +} + SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -377,6 +377,7 @@ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N); + SDValue PromoteIntOp_FAKE_USE(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_ScalarOp(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); @@ -709,6 +710,7 @@ bool PromoteFloatOperand(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FAKE_USE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); @@ -749,6 +751,7 @@ bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_BITCAST(SDNode *N); + SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N); SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N); @@ -821,6 +824,7 @@ SDValue ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N); SDValue ScalarizeVecOp_VECREDUCE(SDNode *N); SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N); + SDValue ScalarizeVecOp_FAKE_USE(SDNode *N); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -903,6 +907,7 @@ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); + SDValue SplitVecOp_FAKE_USE(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo); @@ -1000,6 +1005,7 @@ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue WidenVecOp_FAKE_USE(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo); @@ -1127,6 +1133,7 @@ SDValue ExpandOp_BITCAST (SDNode *N); SDValue ExpandOp_BUILD_VECTOR (SDNode *N); SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_FAKE_USE(SDNode *N); SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -403,6 +403,17 @@ return N->getConstantOperandVal(1) ? Hi : Lo; } +// Split the integer operand in two and create a second FAKE_USE node for +// the other half. The original SDNode is updated in place. +SDValue DAGTypeLegalizer::ExpandOp_FAKE_USE(SDNode *N) { + SDValue Lo, Hi; + SDValue Chain = N->getOperand(0); + GetExpandedOp(N->getOperand(1), Lo, Hi); + SDValue LoUse = DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Lo); + DAG.UpdateNodeOperands(N, LoUse, Hi); + return SDValue(N, 0); +} + SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. EVT VecVT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -672,6 +672,9 @@ case ISD::BITCAST: Res = ScalarizeVecOp_BITCAST(N); break; + case ISD::FAKE_USE: + Res = ScalarizeVecOp_FAKE_USE(N); + break; case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: @@ -761,6 +764,14 @@ N->getValueType(0), Elt); } +// Need to legalize vector operands of fake uses. Must be <1 x ty>. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FAKE_USE(SDNode *N) { + assert(N->getOperand(1).getValueType().getVectorNumElements() == 1 && + "Fake Use: Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Elt); +} + /// If the input is a vector that needs to be scalarized, it must be <1 x ty>. /// Do the operation on the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { @@ -2977,7 +2988,9 @@ case ISD::FLDEXP: Res = SplitVecOp_FPOpDifferentTypes(N); break; - + case ISD::FAKE_USE: + Res = SplitVecOp_FAKE_USE(N); + break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -3186,6 +3199,15 @@ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } +// Split a FAKE_USE use of a vector into FAKE_USEs of hi and lo part. +SDValue DAGTypeLegalizer::SplitVecOp_FAKE_USE(SDNode *N) { + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + SDValue Chain = + DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Lo); + return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Hi); +} + SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will // end up being split all the way down to individual components. Convert the @@ -5937,6 +5959,9 @@ report_fatal_error("Do not know how to widen this operator's operand!"); case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; + case ISD::FAKE_USE: + Res = WidenVecOp_FAKE_USE(N); + break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; @@ -6270,6 +6295,16 @@ return CreateStackStoreLoad(InOp, VT); } +// Vectors with sizes that are not powers of 2 need to be widened to the +// next largest power of 2. For example, we may get a vector of 3 32-bit +// integers or of 6 16-bit integers, both of which have to be widened to a +// 128-bit vector. +SDValue DAGTypeLegalizer::WidenVecOp_FAKE_USE(SDNode *N) { + SDValue WidenedOp = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), + WidenedOp); +} + SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7201,6 +7201,24 @@ return; } + case Intrinsic::fake_use: { + Value *V = I.getArgOperand(0); + SDValue Ops[2]; + // If this fake use uses an argument that has an empty SDValue, it is a + // zero-length array or some other type that does not produce a register, + // so do not translate a fake use for it. + if (isa(V) && !NodeMap[V]) + return; + Ops[0] = getRoot(); + Ops[1] = getValue(V); + // Also, do not translate a fake use with an undef operand, or any other + // empty SDValues. + if (!Ops[1] || Ops[1].isUndef()) + return; + DAG.setRoot(DAG.getNode(ISD::FAKE_USE, sdl, MVT::Other, Ops)); + return; + } + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -420,6 +420,8 @@ case ISD::UBSANTRAP: return "ubsantrap"; case ISD::LIFETIME_START: return "lifetime.start"; case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::FAKE_USE: + return "fake_use"; case ISD::PSEUDO_PROBE: return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -678,6 +678,50 @@ LLVM_DEBUG(dbgs() << R.getMsg() << "\n"); } +// Detect any fake uses that follow a tail call and move them before the tail +// call. Ignore fake uses that use values that are def'd by or after the tail +// call. +static void preserveFakeUses(BasicBlock::iterator Begin, + BasicBlock::iterator End) { + BasicBlock::iterator I = End; + if (--I == Begin || !isa(*I)) + return; + // Detect whether there are any fake uses trailing a (potential) tail call. + bool HaveFakeUse = false; + bool HaveTailCall = false; + do { + if (const CallInst *CI = dyn_cast(--I)) + if (CI->isTailCall()) { + HaveTailCall = true; + break; + } + if (const IntrinsicInst *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::fake_use) + HaveFakeUse = true; + } while (I != Begin); + + // If we didn't find any tail calls followed by fake uses, we are done. + if (!HaveTailCall || !HaveFakeUse) + return; + + SmallVector FakeUses; + // Record the fake uses we found so we can move them to the front of the + // tail call. Ignore them if they use a value that is def'd by or after + // the tail call. + for (BasicBlock::iterator Inst = I; Inst != End; Inst++) { + if (IntrinsicInst *FakeUse = dyn_cast(Inst); + FakeUse && FakeUse->getIntrinsicID() == Intrinsic::fake_use) { + if (auto UsedDef = dyn_cast(FakeUse->getOperand(0)); + !UsedDef || UsedDef->getParent() != I->getParent() || + UsedDef->comesBefore(&*I)) + FakeUses.push_back(FakeUse); + } + } + + for (auto *Inst : FakeUses) + Inst->moveBefore(*Inst->getParent(), I); +} + void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { @@ -1530,6 +1574,16 @@ FuncInfo->VisitedBBs.insert(LLVMBB); } + // Fake uses that follow tail calls are dropped. To avoid this, move + // such fake uses in front of the tail call, provided they don't + // use anything def'd by or after the tail call. + { + BasicBlock::iterator BBStart = + const_cast(LLVMBB)->getFirstNonPHI()->getIterator(); + BasicBlock::iterator BBEnd = const_cast(LLVMBB)->end(); + preserveFakeUses(BBStart, BBEnd); + } + BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI()->getIterator(); BasicBlock::const_iterator const End = LLVMBB->end(); @@ -2300,6 +2354,13 @@ CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } +// Use the generic target FAKE_USE target opcode. The chain operand +// must come last, because InstrEmitter::AddOperand() requires it. +void SelectionDAGISel::Select_FAKE_USE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::FAKE_USE, N->getValueType(0), + N->getOperand(1), N->getOperand(0)); +} + void SelectionDAGISel::Select_FREEZE(SDNode *N) { // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. // If FREEZE instruction is added later, the code below must be changed as @@ -2966,6 +3027,9 @@ case ISD::UNDEF: Select_UNDEF(NodeToMatch); return; + case ISD::FAKE_USE: + Select_FAKE_USE(NodeToMatch); + return; case ISD::FREEZE: Select_FREEZE(NodeToMatch); return; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -880,7 +880,10 @@ const Instruction * Instruction::getPrevNonDebugInstruction(bool SkipPseudoOp) const { for (const Instruction *I = getPrevNode(); I; I = I->getPrevNode()) - if (!isa(I) && !(SkipPseudoOp && isa(I))) + if (!isa(I) && + !(SkipPseudoOp && isa(I)) && + !(isa(I) && + cast(I)->getIntrinsicID() == Intrinsic::fake_use)) return I; return nullptr; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5009,6 +5009,7 @@ F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void || F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 || + F->getIntrinsicID() == Intrinsic::fake_use || F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint || F->getIntrinsicID() == Intrinsic::wasm_rethrow || IsAttachedCallOperand(F, CBI, i), diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -433,6 +433,24 @@ if (MI.isCall()) FPInstClass = X86II::SpecialFP; + // A fake_use with a floating point pseudo register argument that is + // killed must behave like any other floating point operation and pop + // the floating point stack (this is done in handleSpecialFP()). + // Fake_use is, however, unusual, in that sometimes its operand is not + // killed because a later instruction (probably a return) will use it. + // It is this instruction that will pop the stack. + // In this scenario we can safely remove the fake_use's operand + // (it is live anyway). + if (MI.isFakeUse()) { + const MachineOperand &MO = MI.getOperand(0); + if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) { + if (MO.isKill()) + FPInstClass = X86II::SpecialFP; + else + MI.removeOperand(0); + } + } + if (FPInstClass == X86II::NotFP) continue; // Efficiently ignore non-fp insts! @@ -1736,6 +1754,20 @@ // Don't delete the inline asm! return; } + + // FAKE_USE must pop its register operand off the stack if it is killed, + // because this constitutes the register's last use. If the operand + // is not killed, it will have its last use later, so we leave it alone. + // In either case we remove the operand so later passes don't see it. + case TargetOpcode::FAKE_USE: { + assert(MI.getNumExplicitOperands() == 1 && + "FAKE_USE must have exactly one operand"); + if (MI.getOperand(0).isKill()) { + freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0))); + } + MI.removeOperand(0); + return; + } } Inst = MBB->erase(Inst); // Remove the pseudo instruction diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3621,6 +3621,12 @@ struct LoadOpSplitter : public OpSplitter { AAMDNodes AATags; + // A vector to hold the split components that we want to emit + // separate fake uses for. + SmallVector Components; + // A vector to hold all the fake uses of the struct that we are splitting. + // Usually there should only be one, but we are handling the general case. + SmallVector FakeUses; LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, AAMDNodes AATags, Align BaseAlign, const DataLayout &DL, @@ -3644,10 +3650,32 @@ if (AATags && GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); + // Record the load so we can generate a fake use for this aggregate + // component. + Components.push_back(Load); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } + + // Stash the fake uses that use the value generated by this instruction. + void recordFakeUses(LoadInst &LI) { + for (Use &U : LI.uses()) + if (auto *II = dyn_cast(U.getUser())) + if (II->getIntrinsicID() == Intrinsic::fake_use) + FakeUses.push_back(II); + } + + // Replace all fake uses of the aggregate with a series of fake uses, one + // for each split component. + void emitFakeUses() { + for (Instruction *I : FakeUses) { + IRB.SetInsertPoint(I); + for (auto *V : Components) + IRB.CreateIntrinsic(Intrinsic::fake_use, {}, {V}); + I->eraseFromParent(); + } + } }; bool visitLoadInst(LoadInst &LI) { @@ -3659,8 +3687,10 @@ LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(), getAdjustedAlignment(&LI, 0), DL, IRB); + Splitter.recordFakeUses(LI); Value *V = PoisonValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); + Splitter.emitFakeUses(); Visited.erase(&LI); LI.replaceAllUsesWith(V); LI.eraseFromParent(); diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -496,6 +496,12 @@ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { + // Don't clone fake_use as it may suppress many optimizations + // due to inlining, especially SROA. + if (auto *IntrInst = dyn_cast(II)) + if (IntrInst->getIntrinsicID() == Intrinsic::fake_use) + continue; + Instruction *NewInst = cloneInstruction(II); NewInst->insertInto(NewBB, NewBB->end()); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2894,6 +2894,9 @@ unsigned Count = 0; for (Use &U : llvm::make_early_inc_range(From->uses())) { + auto *II = dyn_cast(U.getUser()); + if (II && II->getIntrinsicID() == Intrinsic::fake_use) + continue; if (!Dominates(Root, U)) continue; LLVM_DEBUG(dbgs() << "Replace dominated use of '"; diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -77,7 +77,8 @@ if (SI->isVolatile()) return false; } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) + if (!II->isLifetimeStartOrEnd() && !II->isDroppable() && + II->getIntrinsicID() != Intrinsic::fake_use) return false; } else if (const BitCastInst *BCI = dyn_cast(U)) { if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-phi.mir b/llvm/test/CodeGen/MIR/X86/fake-use-phi.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/fake-use-phi.mir @@ -0,0 +1,95 @@ +# RUN: llc < %s -x mir -run-pass=codegenprepare | FileCheck %s --implicit-check-not="llvm.fake.use" +# +# When performing return duplication to enable +# tail call optimization we clone fake uses that exist in the to-be-eliminated +# return block into the predecessor blocks. When doing this with fake uses +# of PHI-nodes, they cannot be easily copied, but require the correct operand. +# We are currently not able to do this correctly, so we suppress the cloning +# of such fake uses at the moment. +# +# There should be no fake use of a call result in any of the resulting return +# blocks. + + +# CHECK: declare void @llvm.fake.use + +# Fake uses of `this` should be duplicated into both return blocks. +# CHECK: if.then: +# CHECK: @llvm.fake.use({{.*}}this +# CHECK: if.else: +# CHECK: @llvm.fake.use({{.*}}this + +--- | + source_filename = "test.ll" + + %class.a = type { i8 } + + declare void @llvm.fake.use(...) + declare i32 @foo(ptr nonnull dereferenceable(1)) local_unnamed_addr + declare i32 @bar(ptr nonnull dereferenceable(1)) local_unnamed_addr + + define hidden void @func(ptr nonnull dereferenceable(1) %this) local_unnamed_addr align 2 { + entry: + %b = getelementptr inbounds %class.a, ptr %this, i64 0, i32 0 + %0 = load i8, i8* %b, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %if.else, label %if.then + + if.then: ; preds = %entry + %call = tail call i32 @foo(ptr nonnull dereferenceable(1) %this) + %call2 = tail call i32 @bar(ptr nonnull dereferenceable(1) %this) + br label %if.end + + if.else: ; preds = %entry + %call4 = tail call i32 @bar(ptr nonnull dereferenceable(1) %this) + %call5 = tail call i32 @foo(ptr nonnull dereferenceable(1) %this) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %call4.sink = phi i32 [ %call4, %if.else ], [ %call, %if.then ] + notail call void (...) @llvm.fake.use(i32 %call4.sink) + notail call void (...) @llvm.fake.use(ptr nonnull %this) + ret void + } + +... +--- +name: func +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + +... diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-scheduler.mir b/llvm/test/CodeGen/MIR/X86/fake-use-scheduler.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/fake-use-scheduler.mir @@ -0,0 +1,115 @@ +# Prevent the machine scheduler from moving instructions past FAKE_USE. +# RUN: llc -run-pass machine-scheduler -o - %s | FileCheck %s +# +# We make sure that, beginning with the first FAKE_USE instruction, +# no changes to the sequence of instructions are undertaken by the +# scheduler. We don't bother to check that the order of the FAKE_USEs +# remains the same. They should, but it is irrelevant. +# +# CHECK: bb.{{.*}}: +# CHECK: FAKE_USE +# CHECK-NEXT: FAKE_USE +# CHECK-NEXT: FAKE_USE +# CHECK-NEXT: FAKE_USE +# CHECK-NEXT: COPY +# CHECK-NEXT: RET +# +--- | + @glb = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 16 + + ; Function Attrs: nounwind uwtable + define dso_local i64 @foo(i32* %p) local_unnamed_addr { + entry: + %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @glb, i64 0, i64 0), align 16, !tbaa !2 + store i32 %0, i32* %p, align 4, !tbaa !2 + %conv = sext i32 %0 to i64 + %1 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @glb, i64 0, i64 1), align 4, !tbaa !2 + %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %1, i32* %arrayidx1, align 4, !tbaa !2 + %conv2 = sext i32 %1 to i64 + %add3 = add nsw i64 %conv2, %conv + notail call void (...) @llvm.fake.use(i64 %add3) + notail call void (...) @llvm.fake.use(i32 %1) + notail call void (...) @llvm.fake.use(i32 %0) + notail call void (...) @llvm.fake.use(i32* %p) + ret i64 %add3 + } + + ; Function Attrs: nounwind + declare void @llvm.fake.use(...) #1 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + + !llvm.module.flags = !{!0} + !llvm.ident = !{!1} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{!"clang version 9.0.0"} + !2 = !{!3, !3, i64 0} + !3 = !{!"int", !4, i64 0} + !4 = !{!"omnipotent char", !5, i64 0} + !5 = !{!"Simple C/C++ TBAA"} + +... +--- +name: foo +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64_with_sub_8bit, preferred-register: '' } + - { id: 2, class: gr32, preferred-register: '' } + - { id: 3, class: gr64_with_sub_8bit, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr64, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $rdi + + %0:gr64 = COPY $rdi + %1:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb, $noreg :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @glb, i64 0, i64 0)`, align 16, !tbaa !2) + MOV32mr %0, 1, $noreg, 0, $noreg, %1.sub_32bit :: (store 4 into %ir.p, !tbaa !2) + %3:gr64_with_sub_8bit = MOVSX64rm32 $rip, 1, $noreg, @glb + 4, $noreg :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @glb, i64 0, i64 1)`, !tbaa !2) + MOV32mr %0, 1, $noreg, 4, $noreg, %3.sub_32bit :: (store 4 into %ir.arrayidx1, !tbaa !2) + %5:gr64 = COPY %3 + %5:gr64 = nsw ADD64rr %5, %1, implicit-def dead $eflags + FAKE_USE %5 + FAKE_USE %3.sub_32bit + FAKE_USE %1.sub_32bit + FAKE_USE %0 + $rax = COPY %5 + RET 0, killed $rax + +... diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir b/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/fake-use-tailcall.mir @@ -0,0 +1,106 @@ +# In certain cases CodeGenPrepare folds a return instruction into +# the return block's predecessor blocks and subsequently deletes the return block. +# The purpose of this is to enable tail call optimization in the predecessor blocks. +# Removal of the return block also removes fake use instructions that were present +# in the return block, potentially causing debug information to be lost. +# +# The fix is to clone any fake use instructions that are not dominated by definitions +# in the return block itself into the predecessor blocks. This test enures that we do so. +# +# Generated from the following source with +# clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-before=codegenprepare -o test.mir test.c +# +# extern int f0(); +# extern int f1(); +# +# int foo(int i) { +# int temp = i; +# if (temp == 0) +# temp = f0(); +# else +# temp = f1(); +# return temp; +# } +# +# RUN: llc -run-pass=codegenprepare -o - %s | FileCheck %s +# +# CHECK: define{{.*}}foo +# CHECK: if.then: +# CHECK-NEXT: call{{.*}}fake.use(i32 %i) +# CHECK-NEXT: tail call i32{{.*}}@f0 +# CHECK-NEXT: ret +# CHECK: if.else: +# CHECK-NEXT: call{{.*}}fake.use(i32 %i) +# CHECK-NEXT: tail call i32{{.*}}@f1 +# CHECK-NEXT: ret + +--- | + define hidden i32 @foo(i32 %i) local_unnamed_addr { + entry: + %cmp = icmp eq i32 %i, 0 + br i1 %cmp, label %if.then, label %if.else + + if.then: + %call = tail call i32 (...) @f0() + br label %if.end + + if.else: + %call1 = tail call i32 (...) @f1() + br label %if.end + + if.end: + %temp.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] + notail call void (...) @llvm.fake.use(i32 %temp.0) + notail call void (...) @llvm.fake.use(i32 %i) + ret i32 %temp.0 + } + declare i32 @f0(...) local_unnamed_addr + declare i32 @f1(...) local_unnamed_addr + declare void @llvm.fake.use(...) + + !llvm.module.flags = !{!0} + !llvm.ident = !{!1} + + !0 = !{i32 1, !"wchar_size", i32 2} + !1 = !{!"clang version 10.0.0"} + +... +--- +name: foo +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + +... diff --git a/llvm/test/CodeGen/MIR/X86/fake-use-zero-length.ll b/llvm/test/CodeGen/MIR/X86/fake-use-zero-length.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/fake-use-zero-length.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -stop-after=finalize-isel | FileCheck %s --implicit-check-not=FAKE_USE +; +; Make sure SelectionDAG does not crash handling fake uses of zero-length arrays +; and structs. Check also that they are not propagated. +; +; Generated from the following source with +; clang -fextend-lifetimes -S -emit-llvm -O2 -mllvm -stop-after=safe-stack -o test.mir test.cpp +; +; int main () +; { int array[0]; } +; +; +; CHECK: liveins: $[[IN_REG:[a-zA-Z0-9]+]] +; CHECK: %[[IN_VREG:[a-zA-Z0-9]+]]:gr32 = COPY $[[IN_REG]] +; CHECK: FAKE_USE %[[IN_VREG]] + +source_filename = "test.ll" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define hidden i32 @main([0 x i32] %zero, [1 x i32] %one) local_unnamed_addr { +entry: + notail call void (...) @bar([0 x i32] %zero) + notail call void (...) @baz([1 x i32] %one) + notail call void (...) @llvm.fake.use([0 x i32] %zero) + notail call void (...) @llvm.fake.use([1 x i32] %one) + ret i32 0 +} + +declare void @bar([0 x i32] %a) +declare void @baz([1 x i32] %a) + +; Function Attrs: nounwind +declare void @llvm.fake.use(...) + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 10.0.0"} \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/fake-use-hpfloat.ll b/llvm/test/CodeGen/X86/fake-use-hpfloat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-hpfloat.ll @@ -0,0 +1,17 @@ +; assert in DAGlegalizer with fake use of half precision float. +; Changes to half float promotion. +; RUN: llc -O2 -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s +; +; CHECK: bb.0.entry: +; CHECK-NEXT: %0:fr16 = FsFLD0SH +; CHECK-NEXT: FAKE_USE killed %0 +; +target triple = "x86_64-unknown-unknown" + +define void @_Z6doTestv() local_unnamed_addr { +entry: + tail call void (...) @llvm.fake.use(half 0xH0000) + ret void +} + +declare void @llvm.fake.use(...) diff --git a/llvm/test/CodeGen/X86/fake-use-ld.ll b/llvm/test/CodeGen/X86/fake-use-ld.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-ld.ll @@ -0,0 +1,51 @@ +; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s + +; Checks that fake uses of the FP stack do not cause a crash. +; +; /*******************************************************************/ +; extern long double foo(long double, long double, long double); +; +; long double actual(long double p1, long double p2, long double p3) { +; return fmal(p1, p2, p3); +; } +; /*******************************************************************/ + +define x86_fp80 @actual(x86_fp80 %p1, x86_fp80 %p2, x86_fp80 %p3) { +; +; CHECK: actual +; +entry: + %p1.addr = alloca x86_fp80, align 16 + %p2.addr = alloca x86_fp80, align 16 + %p3.addr = alloca x86_fp80, align 16 + store x86_fp80 %p1, ptr %p1.addr, align 16 + store x86_fp80 %p2, ptr %p2.addr, align 16 + store x86_fp80 %p3, ptr %p3.addr, align 16 + %0 = load x86_fp80, ptr %p1.addr, align 16 + %1 = load x86_fp80, ptr %p2.addr, align 16 + %2 = load x86_fp80, ptr %p3.addr, align 16 +; +; CHECK: callq{{.*}}foo +; + %3 = call x86_fp80 @foo(x86_fp80 %0, x86_fp80 %1, x86_fp80 %2) + %4 = load x86_fp80, ptr %p1.addr, align 16 + call void (...) @llvm.fake.use(x86_fp80 %4) + %5 = load x86_fp80, ptr %p2.addr, align 16 + call void (...) @llvm.fake.use(x86_fp80 %5) + %6 = load x86_fp80, ptr %p3.addr, align 16 + call void (...) @llvm.fake.use(x86_fp80 %6) +; +; CHECK: ret +; + ret x86_fp80 %3 +} + +declare x86_fp80 @foo(x86_fp80, x86_fp80, x86_fp80) + +declare void @llvm.fake.use(...) + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.9.0"} diff --git a/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll b/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-simple-tail-call.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O2 -o - \ +; RUN: | FileCheck %s --implicit-check-not=TAILCALL +; Generated with: clang -emit-llvm -O2 -S -fextend-lifetimes test.cpp -o - +; =========== test.cpp =============== +; extern int bar(int); +; int foo1(int i) +; { +; return bar(i); +; } +; =========== test.cpp =============== + +; CHECK: TAILCALL + +; ModuleID = 'test.cpp' +source_filename = "test.cpp" + +define i32 @_Z4foo1i(i32 %i) local_unnamed_addr { +entry: + %call = tail call i32 @_Z3bari(i32 %i) + tail call void (...) @llvm.fake.use(i32 %i) + ret i32 %call +} + +declare i32 @_Z3bari(i32) local_unnamed_addr + +declare void @llvm.fake.use(...) + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 5.0.1"} diff --git a/llvm/test/CodeGen/X86/fake-use-split-ret.ll b/llvm/test/CodeGen/X86/fake-use-split-ret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-split-ret.ll @@ -0,0 +1,53 @@ +; RUN: opt -mtriple=x86_64-unknown-unknown -S -codegenprepare <%s -o - | FileCheck %s +; +; Ensure return instruction splitting ignores fake uses. +; +; IR Generated with clang -O2 -S -emit-llvm -fextend-lifetimes test.cpp +; +;// test.cpp +;extern int bar(int); +; +;int foo2(int i) +;{ +; --i; +; if (i <= 0) +; return -1; +; return bar(i); +;} + +; ModuleID = 'test.cpp' +source_filename = "test.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +declare i32 @_Z3bari(i32) local_unnamed_addr + +; Function Attrs: nounwind +declare void @llvm.fake.use(...) + +; Function Attrs: nounwind sspstrong uwtable +define i32 @_Z4foo2i(i32 %i) local_unnamed_addr { +entry: + %dec = add nsw i32 %i, -1 + %cmp = icmp slt i32 %i, 2 + br i1 %cmp, label %cleanup, label %if.end + +if.end: ; preds = %entry + %call = tail call i32 @_Z3bari(i32 %dec) +; CHECK: ret i32 %call + br label %cleanup + +cleanup: ; preds = %entry, %if.end +; CHECK: cleanup: + %retval.0 = phi i32 [ %call, %if.end ], [ -1, %entry ] + tail call void (...) @llvm.fake.use(i32 %dec) +; CHECK: ret i32 -1 + ret i32 %retval.0 +} + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 7.0.0"} diff --git a/llvm/test/CodeGen/X86/fake-use-sroa.ll b/llvm/test/CodeGen/X86/fake-use-sroa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-sroa.ll @@ -0,0 +1,54 @@ +; RUN: opt -S -passes=sroa %s | FileCheck %s +; With fake use instrinsics generated for small aggregates, check that when +; SROA slices the aggregate, we generate individual fake use intrinsics for +; the individual values. + +; Generated from the following source: +; struct s { +; int i; +; int j; +; }; +; +; void foo(struct s S) { +; } +; +; void bar() { +; int arr[2] = {5, 6}; +; } +; +%struct.s = type { i32, i32 } +@__const.bar.arr = private unnamed_addr constant [2 x i32] [i32 5, i32 6], align 4 + +; A small struct passed as parameter +; CHECK-LABEL: define{{.*}}foo +; CHECK: %[[SLICE1:[^ ]+]] = trunc i64 +; CHECK: %[[SLICE2:[^ ]+]] = trunc i64 +; CHECK-DAG: call{{.*}} @llvm.fake.use(i32 %[[SLICE1]]) +; CHECK-DAG: call{{.*}} @llvm.fake.use(i32 %[[SLICE2]]) +define dso_local void @foo(i64 %S.coerce) { +entry: + %S = alloca %struct.s, align 4 + store i64 %S.coerce, ptr %S, align 4 + %fake.use = load %struct.s, ptr %S, align 4 + notail call void (...) @llvm.fake.use(%struct.s %fake.use) + ret void +} + +declare void @llvm.fake.use(...) + +; A local variable with a small array type. +; CHECK-LABEL: define{{.*}}bar +; CHECK: %[[ARRAYSLICE1:[^ ]+]] = load +; CHECK: %[[ARRAYSLICE2:[^ ]+]] = load +; CHECK-DAG: call{{.*}} @llvm.fake.use(i32 %[[ARRAYSLICE1]]) +; CHECK-DAG: call{{.*}} @llvm.fake.use(i32 %[[ARRAYSLICE2]]) +define dso_local void @bar() { +entry: + %arr = alloca [2 x i32], align 4 + call void @llvm.memcpy.p0i8.p0i8.i64(ptr align 4 %arr, ptr align 4 bitcast (ptr @__const.bar.arr to ptr), i64 8, i1 false) + %fake.use = load [2 x i32], ptr %arr, align 4 + notail call void (...) @llvm.fake.use([2 x i32] %fake.use) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) diff --git a/llvm/test/CodeGen/X86/fake-use-suppress-load.ll b/llvm/test/CodeGen/X86/fake-use-suppress-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-suppress-load.ll @@ -0,0 +1,23 @@ +; Suppress redundant loads feeding into fake uses. +; RUN: llc -filetype=asm -o - %s --mtriple=x86_64-unknown-unknown | FileCheck %s +; Windows ABI works differently, there's no offset. +; +; Look for the spill +; CHECK: movq %r{{[a-z]+,}} -{{[0-9]+\(%rsp\)}} +; CHECK-NOT: movq -{{[0-9]+\(%rsp\)}}, %r{{[a-z]+}} + +define dso_local i32 @f(ptr %p) local_unnamed_addr { +entry: + call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"() #1, !srcloc !2 + notail call void (...) @llvm.fake.use(ptr %p) + ret i32 4 +} + +declare void @llvm.fake.use(...) #1 + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0"} +!2 = !{i32 -2147471544} diff --git a/llvm/test/CodeGen/X86/fake-use-tailcall.ll b/llvm/test/CodeGen/X86/fake-use-tailcall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-tailcall.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -stop-after=finalize-isel -O2 - | FileCheck %s --implicit-check-not FAKE_USE +; Fake uses following tail calls should be pulled in front +; of the TCRETURN instruction. Fake uses using something defined by +; the tail call or after it should be suppressed. + +; CHECK: body: +; CHECK: bb.0.{{.*}}: +; CHECK: %0:{{.*}}= COPY +; CHECK: FAKE_USE %0 +; CHECK: TCRETURN + +define void @bar(i32 %v) { +entry: + %call = tail call i32 @_Z3fooi(i32 %v) + %mul = mul nsw i32 %call, 3 + notail call void (...) @llvm.fake.use(i32 %mul) + notail call void (...) @llvm.fake.use(i32 %call) + notail call void (...) @llvm.fake.use(i32 %v) + ret void +} + +declare i32 @_Z3fooi(i32) local_unnamed_addr +declare void @llvm.fake.use(...) diff --git a/llvm/test/CodeGen/X86/fake-use-vector.ll b/llvm/test/CodeGen/X86/fake-use-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-vector.ll @@ -0,0 +1,45 @@ +; assert in DAGlegalizer with fake use of 1-element vectors. +; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s +; +; ModuleID = 't2.cpp' +; source_filename = "t2.cpp" +; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +; +; Check that we get past ISel and generate FAKE_USE machine instructions for +; one-element vectors. +; +; CHECK: bb.0.entry: +; CHECK-DAG: %1:gr64 = COPY $rdi +; CHECK-DAG: %0:vr128 = COPY $xmm0 +; CHECK: %2:vr64 = +; CHECK-DAG: FAKE_USE %1 +; CHECK-DAG: FAKE_USE %0 +; CHECK: RET + + +target triple = "x86_64-unknown-unknown" + +; Function Attrs: nounwind sspstrong uwtable +define <4 x float> @_Z3runDv4_fDv1_x(<4 x float> %r, i64 %b.coerce) local_unnamed_addr #0 { +entry: + %0 = insertelement <1 x i64> undef, i64 %b.coerce, i32 0 + %1 = bitcast i64 %b.coerce to x86_mmx + %2 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %r, x86_mmx %1) + tail call void (...) @llvm.fake.use(<1 x i64> %0) + tail call void (...) @llvm.fake.use(<4 x float> %r) + ret <4 x float> %2 +} + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) + +; Function Attrs: nounwind +declare void @llvm.fake.use(...) + +attributes #0 = { "target-cpu"="btver2" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 5.0.0"} diff --git a/llvm/test/CodeGen/X86/fake-use-vector2.ll b/llvm/test/CodeGen/X86/fake-use-vector2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fake-use-vector2.ll @@ -0,0 +1,33 @@ +; RUN: llc -stop-after=finalize-isel -filetype=asm -o - %s | FileCheck %s +; +; Make sure we can split vectors that are used as operands of FAKE_USE. + +; Generated from: +; +; typedef long __attribute__((ext_vector_type(8))) long8; +; void test0() { long8 id208 {0, 1, 2, 3, 4, 5, 6, 7}; } + +; ModuleID = 't5.cpp' +source_filename = "t5.cpp" + + +; CHECK: %0:vr256 = VMOV +; CHECK: %1:vr256 = VMOV +; CHECK-DAG: FAKE_USE killed %1 +; CHECK-DAG: FAKE_USE killed %0 +; CHECK: RET +define void @_Z5test0v() local_unnamed_addr #0 { +entry: + tail call void (...) @llvm.fake.use(<8 x i64> ) #1 + ret void +} + +declare void @llvm.fake.use(...) + +attributes #0 = { "target-cpu"="btver2" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 5.0.0"} diff --git a/llvm/test/DebugInfo/X86/Inputs/check-fake-use.py b/llvm/test/DebugInfo/X86/Inputs/check-fake-use.py new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/X86/Inputs/check-fake-use.py @@ -0,0 +1,100 @@ +# Parsing dwarfdump's output to determine whether the location list for the +# parameter "b" covers all of the function. The script is written in form of a +# state machine and expects that dwarfdump output adheres to a certain order: +# 1) The .debug_info section must appear before the .debug_loc section. +# 2) The DW_AT_location attribute must appear before the parameter's name in the +# formal parameter DIE. +# +import re +import sys + +DebugInfoPattern = r"\.debug_info contents:" +SubprogramPattern = r"^0x[0-9a-f]+:\s+DW_TAG_subprogram" +HighPCPattern = r"DW_AT_high_pc.*0x([0-9a-f]+)" +FormalPattern = r"^0x[0-9a-f]+:\s+DW_TAG_formal_parameter" +LocationPattern = r"DW_AT_location\s+\[DW_FORM_sec_offset\].*0x([a-f0-9]+)" +DebugLocPattern = r'\[0x([a-f0-9]+),\s+0x([a-f0-9]+)\) ".text":' + +# States +LookingForDebugInfo = 0 +LookingForSubProgram = LookingForDebugInfo + 1 # 1 +LookingForHighPC = LookingForSubProgram + 1 # 2 +LookingForFormal = LookingForHighPC + 1 # 3 +LookingForLocation = LookingForFormal + 1 # 4 +DebugLocations = LookingForLocation + 1 # 5 +AllDone = DebugLocations + 1 # 6 + +# For each state, the state table contains 3-item sublists with the following +# entries: +# 1) The regex pattern we use in each state. +# 2) The state we enter when we have a successful match for the current pattern. +# 3) The state we enter when we do not have a successful match for the +# current pattern. +StateTable = [ + # LookingForDebugInfo + [DebugInfoPattern, LookingForSubProgram, LookingForDebugInfo], + # LookingForSubProgram + [SubprogramPattern, LookingForHighPC, LookingForSubProgram], + # LookingForHighPC + [HighPCPattern, LookingForFormal, LookingForHighPC], + # LookingForFormal + [FormalPattern, LookingForLocation, LookingForFormal], + # LookingForLocation + [LocationPattern, DebugLocations, LookingForFormal], + # DebugLocations + [DebugLocPattern, DebugLocations, AllDone], + # AllDone + [None, AllDone, AllDone], +] + +# Symbolic indices +StatePattern = 0 +NextState = 1 +FailState = 2 + +State = LookingForDebugInfo +FirstBeginOffset = -1 + +# Read output from file provided as command arg +with open(sys.argv[1], "r") as dwarf_dump_file: + for line in dwarf_dump_file: + if State == AllDone: + break + Pattern = StateTable[State][StatePattern] + # print "State: %d - Searching '%s' for '%s'" % (State, line, Pattern) + m = re.search(Pattern, line) + if m: + # Match. Depending on the state, we extract various values. + if State == LookingForHighPC: + HighPC = int(m.group(1), 16) + elif State == DebugLocations: + # Extract the range values + if FirstBeginOffset == -1: + FirstBeginOffset = int(m.group(1), 16) + # print "FirstBeginOffset set to %d" % FirstBeginOffset + EndOffset = int(m.group(2), 16) + # print "EndOffset set to %d" % EndOffset + State = StateTable[State][NextState] + else: + State = StateTable[State][FailState] + +Success = True + +# Check that the first entry start with 0 and that the last ending address +# in our location list is close to the high pc of the subprogram. +if State != AllDone: + print("Error in expected sequence of DWARF information:") + print(" State = %d\n" % State) + Success = False +elif FirstBeginOffset == -1: + print("Location list for 'b' not found, did the debug info format change?") + Success = False +elif FirstBeginOffset != 0 or abs(EndOffset - HighPC) > 16: + print("Location list for 'b' does not cover the whole function:") + print( + "Location starts at 0x%x, ends at 0x%x, HighPC = 0x%x" + % (FirstBeginOffset, EndOffset, HighPC) + ) + Success = False + +sys.exit(not Success) diff --git a/llvm/test/DebugInfo/X86/fake-use.ll b/llvm/test/DebugInfo/X86/fake-use.ll new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/X86/fake-use.ll @@ -0,0 +1,98 @@ +; REQUIRES: object-emission + +; Make sure the fake use of 'b' at the end of 'foo' causes location information for 'b' +; to extend all the way to the end of the function. + +; RUN: %llc_dwarf -O2 -filetype=obj -dwarf-linkage-names=Abstract < %s | llvm-dwarfdump -v - -o %t +; RUN: %python %p/Inputs/check-fake-use.py %t + +; Generated with: +; clang -O2 -g -S -emit-llvm -fextend-this-ptr fake-use.c +; +; int glob[10]; +; extern void bar(); +; +; int foo(int b, int i) +; { +; int loc = glob[i] * 2; +; if (b) { +; glob[2] = loc; +; bar(); +; } +; return loc; +; } +; +; ModuleID = 't2.c' +source_filename = "t2.c" + +@glob = common local_unnamed_addr global [10 x i32] zeroinitializer, align 16, !dbg !0 + +; Function Attrs: nounwind sspstrong uwtable +define i32 @foo(i32 %b, i32 %i) local_unnamed_addr !dbg !13 { +entry: + tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !17, metadata !20), !dbg !21 + %idxprom = sext i32 %i to i64, !dbg !22 + %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @glob, i64 0, i64 %idxprom, !dbg !22 + %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !23 + %mul = shl nsw i32 %0, 1, !dbg !22 + %tobool = icmp eq i32 %b, 0, !dbg !27 + br i1 %tobool, label %if.end, label %if.then, !dbg !29 + +if.then: ; preds = %entry + store i32 %mul, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @glob, i64 0, i64 2), align 8, !dbg !30, !tbaa !23 + tail call void (...) @bar() #2, !dbg !32 + br label %if.end, !dbg !33 + +if.end: ; preds = %entry, %if.then + tail call void (...) @llvm.fake.use(i32 %b), !dbg !34 + ret i32 %mul, !dbg !35 +} + +declare void @bar(...) local_unnamed_addr + +; Function Attrs: nounwind +declare void @llvm.fake.use(...) + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!9, !10, !11} +!llvm.ident = !{!12} + +!0 = distinct !DIGlobalVariableExpression(var: !DIGlobalVariable(name: "glob", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true), expr: !DIExpression()) +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "t2.c", directory: "/") +!3 = !{} +!4 = !{!0} +!5 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 320, align: 32, elements: !7) +!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!7 = !{!8} +!8 = !DISubrange(count: 10) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{i32 1, !"PIC Level", i32 2} +!12 = !{!"clang version 4.0.0"} +!13 = distinct !DISubprogram(name: "foo", scope: !2, file: !2, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !16) +!14 = !DISubroutineType(types: !15) +!15 = !{!6, !6, !6} +!16 = !{!17, !18, !19} +!17 = !DILocalVariable(name: "b", arg: 1, scope: !13, file: !2, line: 4, type: !6) +!18 = !DILocalVariable(name: "i", arg: 2, scope: !13, file: !2, line: 4, type: !6) +!19 = !DILocalVariable(name: "loc", scope: !13, file: !2, line: 6, type: !6) +!20 = !DIExpression() +!21 = !DILocation(line: 4, scope: !13) +!22 = !DILocation(line: 6, scope: !13) +!23 = !{!24, !24, i64 0} +!24 = !{!"int", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C/C++ TBAA"} +!27 = !DILocation(line: 7, scope: !28) +!28 = distinct !DILexicalBlock(scope: !13, file: !2, line: 7) +!29 = !DILocation(line: 7, scope: !13) +!30 = !DILocation(line: 8, scope: !31) +!31 = distinct !DILexicalBlock(scope: !28, file: !2, line: 7) +!32 = !DILocation(line: 9, scope: !31) +!33 = !DILocation(line: 10, scope: !31) +!34 = !DILocation(line: 12, scope: !13) +!35 = !DILocation(line: 11, scope: !13) diff --git a/llvm/test/Transforms/GVN/fake-use-constprop.ll b/llvm/test/Transforms/GVN/fake-use-constprop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GVN/fake-use-constprop.ll @@ -0,0 +1,69 @@ +; RUN: opt -passes=gvn -S < %s | FileCheck %s +; +; The Global Value Numbering pass (GVN) propagates boolean values +; that are constant in dominated basic blocks to all the uses +; in these basic blocks. However, we don't want the constant propagated +; into fake.use intrinsics since this would render the intrinsic useless +; with respect to keeping the variable live up until the fake.use. +; This test checks that we don't generate any fake.uses with constant 0. +; +; Reduced from the following test case, generated with clang -O2 -S -emit-llvm -fextend-lifetimes test.c +; +; extern void func1(); +; extern int bar(); +; extern void baz(int); +; +; int foo(int i, float f, int *punused) +; { +; int j = 3*i; +; if (j > 0) { +; int m = bar(i); +; if (m) { +; char b = f; +; baz(b); +; if (b) +; goto lab; +; func1(); +; } +; lab: +; func1(); +; } +; return 1; +; } + +;; GVN should propagate a constant value through to a regular call, but not to +;; a fake use, which should continue to track the original value. +; CHECK: %[[CONV_VAR:[a-zA-Z0-9]+]] = fptosi +; CHECK: call {{.+}} @bees(i8 0) +; CHECK: call {{.+}} @llvm.fake.use(i8 %[[CONV_VAR]]) + +define i32 @foo(float %f) { + %conv = fptosi float %f to i8 + %tobool3 = icmp eq i8 %conv, 0 + br i1 %tobool3, label %if.end, label %lab + +if.end: + tail call void (...) @bees(i8 %conv) + tail call void (...) @llvm.fake.use(i8 %conv) + br label %lab + +lab: + ret i32 1 +} + +declare i32 @bar(...) + +declare void @baz(i32) + +declare void @bees(i32) + +declare void @func1(...) + +; Function Attrs: nounwind +declare void @llvm.fake.use(...) + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.9.0"}