Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -632,6 +632,30 @@ challenging as many commonly expected properties, such as ``ptrtoint(v)-ptrtoint(v) == 0``, don't hold for non-integral types. +.. _ptr_provenance: + +Pointer Provenance +------------------ + +Note: the introduction of pointer provenance is a work in progress, and should +be considered experimental at this time. + +The provenance of a pointer identifies the possible objects to which that +pointer can refer. The :ref:`Load<_i_load>` and :ref:`Store<_i_store>` +instructions have an optional ``ptr_provenance`` operand. When this is set, the +provenance is decoupled from the actual pointer computation. As computations +are not needed to track the origin of a pointer, those can be omitted for the +``ptr_provenance`` operand. Dependencies on ``PHI`` and ``select`` instructions +can still be useful to accurately identify possible origins. Especially when +later optimizations are able to reduce the set of possibilities. + +Alias analysis can make use of both, the computed pointer value and the +provenance to come up with alias conclusions. + +An ``unknown_provenance`` pointer provenance value indicates that the origin is +unknown, and that it can refer to any object. This special constant can only +be used on the provenance path. + .. _globalvars: Global Variables @@ -10136,8 +10160,8 @@ :: - = load [volatile] , ptr [, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !][, !noundef !] - = load atomic [volatile] , ptr [syncscope("")] , align [, !invariant.group !] + = load [volatile] , ptr [, ptr_provenance ptr ][,align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !][, !noundef !] + = load atomic [volatile] , ptr [, ptr_provenance ptr ] [syncscope("")] , align [, !invariant.group !] ! = !{ i32 1 } ! = !{} ! = !{ i64 } @@ -10169,6 +10193,11 @@ alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic loads. +The optional ``ptr_provenance`` argument, when present, specifies a separate +pointer provenance path for the ``pointer`` operand of the ``load`` instruction. +See :ref:`Pointer Provenance<_ptr_provenance>`. When it is not present, the +``pointer`` operand can be used for the pointer provenance. + The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). It is the responsibility of the code emitter to ensure that the alignment information is @@ -10276,8 +10305,8 @@ :: - store [volatile] , ptr [, align ][, !nontemporal !][, !invariant.group !] ; yields void - store atomic [volatile] , ptr [syncscope("")] , align [, !invariant.group !] ; yields void + store [volatile] , ptr [, ptr_provenance ptr ][, align ][, !nontemporal !][, !invariant.group !] ; yields void + store atomic [volatile] , ptr [, ptr_provenance ptr ] [syncscope("")] , align [, !invariant.group !] ; yields void ! = !{ i32 1 } ! = !{} @@ -10309,6 +10338,11 @@ the alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic stores. +The optional ``ptr_provenance`` argument, when present, specifies a separate +pointer provenance path for the ``pointer`` operand of the ``store`` instruction. +See :ref:`Pointer Provenance<_ptr_provenance>`. When it is not present, the +``pointer`` operand can be used for the pointer provenance. + The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). It is the responsibility of the code emitter to ensure that the alignment information is Index: llvm/include/llvm/IR/InstVisitor.h =================================================================== --- llvm/include/llvm/IR/InstVisitor.h +++ llvm/include/llvm/IR/InstVisitor.h @@ -166,7 +166,7 @@ RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);} RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);} RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(UnaryInstruction);} - RetTy visitLoadInst(LoadInst &I) { DELEGATE(UnaryInstruction);} + RetTy visitLoadInst(LoadInst &I) { DELEGATE(Instruction); } RetTy visitStoreInst(StoreInst &I) { DELEGATE(Instruction);} RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { DELEGATE(Instruction);} RetTy visitAtomicRMWInst(AtomicRMWInst &I) { DELEGATE(Instruction);} Index: llvm/include/llvm/IR/Instructions.h =================================================================== --- llvm/include/llvm/IR/Instructions.h +++ llvm/include/llvm/IR/Instructions.h @@ -174,7 +174,7 @@ /// An instruction for reading from memory. This uses the SubclassData field in /// Value to store whether or not the load is volatile. -class LoadInst : public UnaryInstruction { +class LoadInst : public Instruction { using VolatileField = BoolBitfieldElementT<0>; using AlignmentField = AlignmentBitfieldElementT; using OrderingField = AtomicOrderingBitfieldElementT; @@ -210,6 +210,15 @@ Align Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); + ~LoadInst() { + setLoadInstNumOperands(2); // needed by operator delete + } + // allocate space for exactly two operands + void *operator new(size_t s) { return User::operator new(s, 2); } + + /// Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + /// Return true if this is a load from a volatile memory location. bool isVolatile() const { return getSubclassData(); } @@ -271,6 +280,30 @@ return getPointerOperandType()->getPointerAddressSpace(); } + bool hasPtrProvenanceOperand() const { return getNumOperands() == 2; } + Value *getPtrProvenanceOperand() const { + assert(hasPtrProvenanceOperand() && "we need a ptr_provenance"); + return getOperand(1); + } + /// Returns the PtrProvenanceOperand when available, otherwise the + /// PointerOperand. + Value *getPtrProvenance() { + return hasPtrProvenanceOperand() ? getPtrProvenanceOperand() + : getPointerOperand(); + } + const Value *getPtrProvenance() const { + return hasPtrProvenanceOperand() ? getPtrProvenanceOperand() + : getPointerOperand(); + } + static unsigned getPtrProvenanceOperandIndex() { return 1U; } + void setPtrProvenanceOperand(Value *Provenance); + void removePtrProvenanceOperand(); + std::optional getOptionalPtrProvenance() const { + if (hasPtrProvenanceOperand()) + return getPtrProvenanceOperand(); + else + return std::nullopt; + } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Load; @@ -293,6 +326,11 @@ SyncScope::ID SSID; }; +template <> +struct OperandTraits : public VariadicOperandTraits {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LoadInst, Value) + //===----------------------------------------------------------------------===// // StoreInst Class //===----------------------------------------------------------------------===// @@ -329,8 +367,11 @@ StoreInst(Value *Val, Value *Ptr, bool isVolatile, Align Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); - // allocate space for exactly two operands - void *operator new(size_t S) { return User::operator new(S, 2); } + ~StoreInst() { + setStoreInstNumOperands(3); // needed by operator delete + } + // allocate space for exactly three operands + void *operator new(size_t S) { return User::operator new(S, 3); } void operator delete(void *Ptr) { User::operator delete(Ptr); } /// Return true if this is a store to a volatile memory location. @@ -400,6 +441,30 @@ return getPointerOperandType()->getPointerAddressSpace(); } + bool hasPtrProvenanceOperand() const { return getNumOperands() == 3; } + Value *getPtrProvenanceOperand() const { + assert(hasPtrProvenanceOperand() && "we need a ptr_provenance"); + return getOperand(2); + } + /// Returns the PtrProvenanceOperand when available, otherwise the + /// PointerOperand. + Value *getPtrProvenance() { + return hasPtrProvenanceOperand() ? getPtrProvenanceOperand() + : getPointerOperand(); + } + const Value *getPtrProvenance() const { + return hasPtrProvenanceOperand() ? getPtrProvenanceOperand() + : getPointerOperand(); + } + static unsigned getPtrProvenanceOperandIndex() { return 2U; } + void setPtrProvenanceOperand(Value *Provenance); + void removePtrProvenanceOperand(); + std::optional getOptionalPtrProvenance() const { + if (hasPtrProvenanceOperand()) + return getPtrProvenanceOperand(); + else + return std::nullopt; + } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Store; @@ -423,8 +488,7 @@ }; template <> -struct OperandTraits : public FixedNumOperandTraits { -}; +struct OperandTraits : public VariadicOperandTraits {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value) Index: llvm/include/llvm/IR/User.h =================================================================== --- llvm/include/llvm/IR/User.h +++ llvm/include/llvm/IR/User.h @@ -209,6 +209,24 @@ NumUserOperands = NumOps; } + /// FIXME: As the number of operands is used to find the start of the + /// allocated memory in operator delete, we need to always think we have 3 + /// operands before delete. + void setStoreInstNumOperands(unsigned NumOps) { + assert((2 <= NumOps) && (NumOps <= 3) && + "StoreInst can only have 2 or 3 operands"); + NumUserOperands = NumOps; + } + + /// FIXME: As the number of operands is used to find the start of the + /// allocated memory in operator delete, we need to always think we have 2 + /// operands before delete. + void setLoadInstNumOperands(unsigned NumOps) { + assert((1 <= NumOps) && (NumOps <= 2) && + "LoadInst can only have 1 or 2 operands"); + NumUserOperands = NumOps; + } + /// Subclasses with hung off uses need to manage the operand count /// themselves. In these instances, the operand count isn't used to find the /// OperandList, so there's no issue in having the operand count change. Index: llvm/lib/IR/AsmWriter.cpp =================================================================== --- llvm/lib/IR/AsmWriter.cpp +++ llvm/lib/IR/AsmWriter.cpp @@ -4394,15 +4394,21 @@ } Out << ", "; TypePrinter.print(I.getType(), Out); - } else if (Operand) { // Print the normal way. + } else if (const auto *LI = dyn_cast(&I)) { + Out << ' '; + TypePrinter.print(LI->getType(), Out); + Out << ", "; + writeOperand(I.getOperand(0), true); + } else if (isa(&I)) { + Out << ' '; + writeOperand(I.getOperand(0), true); + Out << ", "; + writeOperand(I.getOperand(1), true); + } else if (Operand) { // Print the normal way. if (const auto *GEP = dyn_cast(&I)) { Out << ' '; TypePrinter.print(GEP->getSourceElementType(), Out); Out << ','; - } else if (const auto *LI = dyn_cast(&I)) { - Out << ' '; - TypePrinter.print(LI->getType(), Out); - Out << ','; } // PrintAllTypes - Instructions who have operands of all the same type @@ -4445,11 +4451,19 @@ if (const LoadInst *LI = dyn_cast(&I)) { if (LI->isAtomic()) writeAtomic(LI->getContext(), LI->getOrdering(), LI->getSyncScopeID()); + if (LI->hasPtrProvenanceOperand()) { + Out << ", ptr_provenance "; + writeOperand(LI->getPtrProvenanceOperand(), true); + } if (MaybeAlign A = LI->getAlign()) Out << ", align " << A->value(); } else if (const StoreInst *SI = dyn_cast(&I)) { if (SI->isAtomic()) writeAtomic(SI->getContext(), SI->getOrdering(), SI->getSyncScopeID()); + if (SI->hasPtrProvenanceOperand()) { + Out << ", ptr_provenance "; + writeOperand(SI->getPtrProvenanceOperand(), true); + } if (MaybeAlign A = SI->getAlign()) Out << ", align " << A->value(); } else if (const AtomicCmpXchgInst *CXI = dyn_cast(&I)) { Index: llvm/lib/IR/Instructions.cpp =================================================================== --- llvm/lib/IR/Instructions.cpp +++ llvm/lib/IR/Instructions.cpp @@ -1530,6 +1530,11 @@ void LoadInst::AssertOK() { assert(getOperand(0)->getType()->isPointerTy() && "Ptr must have pointer type."); + assert((!hasPtrProvenanceOperand() || getOperand(1)) && + "ptr_provenance must be non-null"); + assert((!hasPtrProvenanceOperand() || + (getOperand(0)->getType() == getOperand(1)->getType())) && + "ptr_provenance must have the same type as the pointer"); } static Align computeLoadStoreDefaultAlign(Type *Ty, BasicBlock *BB) { @@ -1576,8 +1581,11 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, Align Align, AtomicOrdering Order, SyncScope::ID SSID, Instruction *InsertBef) - : UnaryInstruction(Ty, Load, Ptr, InsertBef) { + : Instruction(Ty, Load, OperandTraits::op_end(this) - 2, 2, + InsertBef) { assert(cast(Ptr->getType())->isOpaqueOrPointeeTypeMatches(Ty)); + setLoadInstNumOperands(1); + Op<0>() = Ptr; setVolatile(isVolatile); setAlignment(Align); setAtomic(Order, SSID); @@ -1588,7 +1596,10 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, Align Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAE) - : UnaryInstruction(Ty, Load, Ptr, InsertAE) { + : Instruction(Ty, Load, OperandTraits::op_end(this) - 2, 2, + InsertAE) { + setLoadInstNumOperands(1); + Op<0>() = Ptr; assert(cast(Ptr->getType())->isOpaqueOrPointeeTypeMatches(Ty)); setVolatile(isVolatile); setAlignment(Align); @@ -1597,6 +1608,26 @@ setName(Name); } +void LoadInst::setPtrProvenanceOperand(llvm::LoadInst::Value *Provenance) { + assert(Provenance && "Needs a provenance"); + if (!hasPtrProvenanceOperand()) { + setLoadInstNumOperands(2); + // shift operands + setOperand(0, getOperand(1)); + } + setOperand(1, Provenance); + AssertOK(); +} + +void LoadInst::removePtrProvenanceOperand() { + assert(hasPtrProvenanceOperand() && "nothing to remove"); + // shift operands + setOperand(1, getOperand(0)); + setOperand(0, nullptr); + setLoadInstNumOperands(1); + AssertOK(); +} + //===----------------------------------------------------------------------===// // StoreInst Implementation //===----------------------------------------------------------------------===// @@ -1608,6 +1639,11 @@ assert(cast(getOperand(1)->getType()) ->isOpaqueOrPointeeTypeMatches(getOperand(0)->getType()) && "Ptr must be a pointer to Val type!"); + assert((!hasPtrProvenanceOperand() || getOperand(2)) && + "ptr_provenance must be non-null"); + assert((!hasPtrProvenanceOperand() || + (getOperand(1)->getType() == getOperand(2)->getType())) && + "ptr_provenance must have the same type as the pointer"); } StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore) @@ -1642,8 +1678,8 @@ AtomicOrdering Order, SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + OperandTraits::op_end(this) - 3, 3, InsertBefore) { + setStoreInstNumOperands(2); Op<0>() = val; Op<1>() = addr; setVolatile(isVolatile); @@ -1656,8 +1692,8 @@ AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertAtEnd) { + OperandTraits::op_end(this) - 3, 3, InsertAtEnd) { + setStoreInstNumOperands(2); Op<0>() = val; Op<1>() = addr; setVolatile(isVolatile); @@ -1666,6 +1702,28 @@ AssertOK(); } +void StoreInst::setPtrProvenanceOperand(llvm::StoreInst::Value *Provenance) { + assert(Provenance && "Needs a provenance"); + if (!hasPtrProvenanceOperand()) { + setStoreInstNumOperands(3); + // shift uses; FIXME: can be made faster ? + setOperand(0, getOperand(1)); + setOperand(1, getOperand(2)); + } + setOperand(2, Provenance); + AssertOK(); +} + +void StoreInst::removePtrProvenanceOperand() { + assert(hasPtrProvenanceOperand() && "nothing to remove"); + // make sure 'uses' are updated + setOperand(2, getOperand(1)); + setOperand(1, getOperand(0)); + setOperand(0, nullptr); + + setStoreInstNumOperands(2); + AssertOK(); +} //===----------------------------------------------------------------------===// // AtomicCmpXchgInst Implementation @@ -4827,13 +4885,32 @@ } LoadInst *LoadInst::cloneImpl() const { - return new LoadInst(getType(), getOperand(0), Twine(), isVolatile(), - getAlign(), getOrdering(), getSyncScopeID()); + LoadInst *Result = + new LoadInst(getType(), getOperand(0), Twine(), isVolatile(), getAlign(), + getOrdering(), getSyncScopeID()); + // - we must keep the same number of arguments (for vector optimizations) + // - if we duplicate the provenance, we can get into problems with passes + // that don't know how to handle it (Like MergeLoadStoreMotion shows) + // - safe alternative: keep the argument, but map it to unknown_provenance. + if (hasPtrProvenanceOperand()) + Result->setPtrProvenanceOperand(UnknownProvenance::get( + cast(getPtrProvenanceOperand()->getType()))); + return Result; } StoreInst *StoreInst::cloneImpl() const { - return new StoreInst(getOperand(0), getOperand(1), isVolatile(), getAlign(), - getOrdering(), getSyncScopeID()); + StoreInst *Result = + new StoreInst(getOperand(0), getOperand(1), isVolatile(), getAlign(), + getOrdering(), getSyncScopeID()); + + // we must keep the same number of arguments (for vector optimizations) + // - if we duplicate the provenance, we can get into problems with passes + // that don't know how to handle it (Like MergeLoadStoreMotion shows) + // - safe alternative: keep the argument, but map it to unknown_provenance. + if (hasPtrProvenanceOperand()) + Result->setPtrProvenanceOperand(UnknownProvenance::get( + cast(getPtrProvenanceOperand()->getType()))); + return Result; } AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { Index: llvm/unittests/IR/IRBuilderTest.cpp =================================================================== --- llvm/unittests/IR/IRBuilderTest.cpp +++ llvm/unittests/IR/IRBuilderTest.cpp @@ -446,6 +446,39 @@ EXPECT_FALSE(verifyModule(*M)); } +TEST_F(IRBuilderTest, PtrProvenanceLoadStore) { + IRBuilder<> Builder(BB); + auto *A = Builder.CreateAlloca(GV->getValueType()); + + auto *L = Builder.CreateLoad(GV->getValueType(), GV); + EXPECT_TRUE(!L->hasPtrProvenanceOperand()); + EXPECT_EQ(L->getPtrProvenance(), GV); + + auto *S = Builder.CreateStore(L, GV); + EXPECT_TRUE(!S->hasPtrProvenanceOperand()); + EXPECT_EQ(S->getPtrProvenance(), GV); + + L->setPtrProvenanceOperand(A); + EXPECT_TRUE(L->hasPtrProvenanceOperand()); + + S->setPtrProvenanceOperand(A); + EXPECT_TRUE(S->hasPtrProvenanceOperand()); + + EXPECT_EQ(L->getPtrProvenanceOperand(), A); + EXPECT_EQ(S->getPtrProvenanceOperand(), A); + EXPECT_EQ(L->getPtrProvenance(), A); + EXPECT_EQ(S->getPtrProvenance(), A); + + L->removePtrProvenanceOperand(); + EXPECT_TRUE(!L->hasPtrProvenanceOperand()); + + S->removePtrProvenanceOperand(); + EXPECT_TRUE(!S->hasPtrProvenanceOperand()); + + EXPECT_EQ(L->getPtrProvenance(), GV); + EXPECT_EQ(S->getPtrProvenance(), GV); +} + TEST_F(IRBuilderTest, Lifetime) { IRBuilder<> Builder(BB); AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty());