Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -117,6 +117,14 @@ namespace { +static llvm::IntrinsicInst *getBaseAsCopyGuardOrNull(llvm::Value *V) { + llvm::IntrinsicInst *II = dyn_cast(V->stripInBoundsOffsets()); + if (II && (II->getIntrinsicID() == Intrinsic::noalias_copy_guard)) + return II; + + return nullptr; +} + /// A custom IRBuilder inserter which prefixes all names, but only in /// Assert builds. class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter { @@ -653,12 +661,25 @@ /// Set to de-duplicate dead instructions found in the use walk. SmallPtrSet VisitedDeadInsts; + // llvm.noalias.copy.guard, offset + SmallVector, 4> PendingNoAliasCopyGuards; + public: SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS) : PtrUseVisitor(DL), AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize()), AS(AS) {} + PtrInfo visitPtrAndNoAliasCopyGuards(Instruction &I) { + PtrInfo PI = visitPtr(I); + if (!PI.isAborted()) { + for (auto &CGAndOffset : PendingNoAliasCopyGuards) + visitPendingNoAliasCopyGuard(*CGAndOffset.first, CGAndOffset.second); + } + + return PI; + } + private: void markAsDead(Instruction &I) { if (VisitedDeadInsts.insert(&I).second) @@ -777,6 +798,13 @@ assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split"); + if (U->getOperandNo() == LI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(LI.hasNoaliasProvenanceOperand() && + LI.getNoaliasProvenanceOperand() == *U); + return; + } + if (!IsOffsetKnown) return PI.setAborted(&LI); @@ -792,6 +820,13 @@ } void visitStoreInst(StoreInst &SI) { + if (U->getOperandNo() == SI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(SI.hasNoaliasProvenanceOperand() && + SI.getNoaliasProvenanceOperand() == *U); + return; + } + Value *ValOp = SI.getValueOperand(); if (ValOp == *U) return PI.setEscapedAndAborted(&SI); @@ -826,6 +861,14 @@ assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); + + if (auto *LI = dyn_cast(SI.getValueOperand())) { + // When we get here, the Store is based on an AllocaInst. + // Make sure to track any source dependencies on a llvm.noalias.copy.guard + // when the source was not based on the AllocaInst. + rememberIfBasedOnNoAliasCopyGuard(LI->getPointerOperand(), + Offset.getZExtValue()); + } } void visitMemSetInst(MemSetInst &II) { @@ -924,6 +967,141 @@ // Check that we ended up with a valid index in the map. assert(AS.Slices[PrevIdx].getUse()->getUser() == &II && "Map index doesn't point back to a slice with this user."); + + // When we get here, one of the operands is based on an AllocaInst. + // Make sure to track any source dependencies on a llvm.noalias.copy.guard + // when the source was not based on the AllocaInst. + rememberIfBasedOnNoAliasCopyGuard(II.getSource(), Offset.getZExtValue()); + } + + void gatherValidNoAliasPointerOffsets( + IRBuilderTy &IRB, IntrinsicInst &II, + SmallVectorImpl> &PtrOffsetSizes, + SmallVectorImpl &Indices, + SmallVectorImpl &MinusOneIndices, uint64_t BaseOffset, + unsigned Index = 0) { + if (Index == MinusOneIndices.size()) { + // Check if the indices are compatible with the aggregate + auto PointeeType = + cast(II.getType()->getScalarType())->getElementType(); + auto LeafType = GetElementPtrInst::getIndexedType(PointeeType, Indices); + + if (LeafType == nullptr || !isa(LeafType)) { + LLVM_DEBUG(llvm::dbgs() << "gatherValidNoAliasPointerOffsets: " + "incompatible struct ? Is it a union?\n" + << II << "\n"); + return; + } + + // don't know how to compute the offsets without creating a GEP + GetElementPtrInst *GEP = + cast(IRB.CreateGEP(&II, Indices)); + assert(isa(GEP->getType()->getPointerElementType()) && + "noalias pointer is not a pointer?"); + APInt GEPOffset(DL.getPointerSizeInBits(), 0); + if (GEP->accumulateConstantOffset(DL, GEPOffset)) { + auto Offset = GEPOffset.getZExtValue(); + if (Offset >= BaseOffset && Offset < AllocSize) { + PtrOffsetSizes.push_back( + std::make_pair((unsigned)(Offset - BaseOffset), + (unsigned)DL.getTypeAllocSize( + GEP->getType()->getPointerElementType()))); + } + } + GEP->eraseFromParent(); + return; + } + + while (true) { + // Increment + ConstantInt *CI = cast(Indices[MinusOneIndices[Index]]); + Indices[MinusOneIndices[Index]] = + ConstantInt::get(CI->getType(), CI->getSExtValue() + 1, true); + + unsigned CurrentSize = PtrOffsetSizes.size(); + gatherValidNoAliasPointerOffsets(IRB, II, PtrOffsetSizes, Indices, + MinusOneIndices, BaseOffset, Index + 1); + if (CurrentSize == PtrOffsetSizes.size()) { + // no new entries - recurse back; prepare for next iteration + Indices[MinusOneIndices[Index]] = + ConstantInt::get(CI->getType(), -1, true); + break; + } + } + } + + void rememberIfBasedOnNoAliasCopyGuard(Value *V, uint64_t TheOffset) { + if (auto II = getBaseAsCopyGuardOrNull(V)) { + for (auto CGOff : PendingNoAliasCopyGuards) { + if (CGOff.first == II && CGOff.second == TheOffset) + return; + } + + PendingNoAliasCopyGuards.emplace_back(II, TheOffset); + } + } + + void visitNoAliasCopyGuard(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_copy_guard && + "We need a llvm.noalias.copy.guard here"); + rememberIfBasedOnNoAliasCopyGuard(&II, Offset.getZExtValue()); + enqueueUsers(II); + } + + void visitPendingNoAliasCopyGuard(IntrinsicInst &II, uint64_t BaseOffset) { + LLVM_DEBUG( + llvm::dbgs() + << "AllocaSlices::SliceBuilder: handling llvm.noalias.copy.guard:" << II + << ":@" << BaseOffset << ":" << AllocSize << "\n"); + // Identify the usage, so that it can be split + if (II.use_empty()) + return markAsDead(II); + + SmallVector, 4> PtrOffsetSizes; + + // Provide as many slices as we have restrict pointers + MDNode *CopyGuardIndices = + cast(cast( + II.getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + + IRBuilderTy IRB(II.getNextNode()); + SmallVector Indices; + SmallVector MinusOneIndices; + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + Indices.clear(); + MinusOneIndices.clear(); + + unsigned CGIndex = 0; + + for (const MDOperand &MDIndex : IMD->operands()) { + ConstantInt *C = + cast(cast(MDIndex)->getValue()); + if (C->isMinusOne()) // accept any index at this place + MinusOneIndices.push_back(CGIndex); + Indices.push_back(C); + ++CGIndex; + } + gatherValidNoAliasPointerOffsets(IRB, II, PtrOffsetSizes, Indices, + MinusOneIndices, BaseOffset); + } + } + + LLVM_DEBUG(llvm::dbgs() << "noalias pointers are at:\n"; + for (auto &P + : PtrOffsetSizes) { + llvm::dbgs() + << " - [" << P.first << "," << P.first + P.second << ")\n"; + }); + + U = &II.getOperandUse(0); + unsigned AS = II.getOperand(0)->getType()->getPointerAddressSpace(); + APInt TheBaseOffset(DL.getIndexSizeInBits(AS), BaseOffset); + for (auto &P : PtrOffsetSizes) { + APInt TheOffset = TheBaseOffset + P.first; + insertUse(II, TheOffset, P.second, false); + } } // Disable SRoA for any intrinsics except for lifetime invariants and @@ -946,6 +1124,52 @@ insertUse(II, Offset, Size, true); return; } + // look through noalias intrinsics + if (II.getIntrinsicID() == Intrinsic::noalias_decl) { + insertUse(II, Offset, AllocSize, true); + // do not enqueue direct users (?) They should be handled through a + // dependency on the original alloca + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (U->getOperandNo() == Intrinsic::NoAliasIdentifyPArg) { + insertUse(II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::NoAliasIdentifyPArg)->getType()), + false); + return; + } + if (U->getOperandNo() == 0) { + assert(II.getOperand(0) == *U); + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::provenance_noalias) { + if (U->getOperandNo() == Intrinsic::ProvenanceNoAliasIdentifyPArg) { + insertUse(II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) + ->getType()), + false); + return; + } + // hmmm - do not look through the first argument for a + // llvm.provenance.noalias + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_arg_guard) { + if (U->getOperandNo() == 0) { + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + visitNoAliasCopyGuard(II); + return; + } if (II.isLaunderOrStripInvariantGroup()) { enqueueUsers(II); @@ -1068,7 +1292,7 @@ #endif PointerEscapingInstr(nullptr) { SliceBuilder PB(DL, AI, *this); - SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); + SliceBuilder::PtrInfo PtrI = PB.visitPtrAndNoAliasCopyGuards(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { // FIXME: We should sink the escape vs. abort info into the caller nicely, // possibly by just storing the PtrInfo in the AllocaSlices. @@ -1126,6 +1350,18 @@ #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static IntrinsicInst *partitionRepresentsNoAliasPointer(Partition &P) { + // A partition that has a 'llvm.noalias.copy.guard' use, represents a + // noalias pointer + for (auto &I : P) { + Use *U = I.getUse(); + if (auto *II = dyn_cast(U->getUser())) + if (II->getIntrinsicID() == Intrinsic::noalias_copy_guard) + return II; + } + return nullptr; +} + /// Walk the range of a partitioning looking for a common type to cover this /// sequence of slices. static std::pair @@ -1267,7 +1503,14 @@ LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); LoadInst *SomeLoad = cast(PN.user_back()); - Type *LoadTy = SomeLoad->getType(); + if (SomeLoad->getPointerOperand() != &PN) { + // this must be the provenance -> ignore the speculation for now + LLVM_DEBUG(llvm::dbgs() << " not speculating dependency on provenance: " + << *SomeLoad << "\n"); + return; + } + + Type *LoadTy = cast(PN.getType())->getElementType(); IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -2286,6 +2529,9 @@ const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; Type *NewAllocaTy; + IntrinsicInst *OldNoAliasDecl = nullptr; + IntrinsicInst *NewNoAliasDecl = nullptr; + // This is a convenience and flag variable that will be null unless the new // alloca's integer operations should be widened to this integer type due to // passing isIntegerWideningViable above. If it is non-null, the desired @@ -2317,6 +2563,7 @@ uint64_t SliceSize = 0; bool IsSplittable = false; bool IsSplit = false; + bool RepresentsNoAlias = false; Use *OldUse = nullptr; Instruction *OldPtr = nullptr; @@ -2335,7 +2582,8 @@ uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, SmallSetVector &PHIUsers, - SmallSetVector &SelectUsers) + SmallSetVector &SelectUsers, + bool ReprNoAlias) : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), @@ -2350,14 +2598,15 @@ ElementTy(VecTy ? VecTy->getElementType() : nullptr), ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8 : 0), - PHIUsers(PHIUsers), SelectUsers(SelectUsers), - IRB(NewAI.getContext(), ConstantFolder()) { + RepresentsNoAlias(ReprNoAlias), PHIUsers(PHIUsers), + SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { if (VecTy) { assert((DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); ++NumVectorized; } assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy)); + prepareNoAliasDecl(); } bool visit(AllocaSlices::const_iterator I) { @@ -2380,7 +2629,7 @@ SliceSize = NewEndOffset - NewBeginOffset; OldUse = I->getUse(); - OldPtr = cast(OldUse->get()); + OldPtr = dyn_cast(OldUse->get()); Instruction *OldUserI = cast(OldUse->getUser()); IRB.SetInsertPoint(OldUserI); @@ -2896,6 +3145,77 @@ return !II.isVolatile(); } + Instruction *detachFromNoAliasCopyGuard(Instruction *DepPtr, + Instruction *NACG) { + assert(getBaseAsCopyGuardOrNull(DepPtr) == NACG && + "DepPtr must depend on NACG"); + + // Follow first arg until we hit the llvm.noalias.copy.guard + Instruction *Ptr = cast(DepPtr); + while (true) { + if (Ptr->getNumUses() != 1) + break; + Instruction *ParentPtr = cast(Ptr->getOperand(0)); + if (ParentPtr == NACG) { + // we got here with only single uses - just remove the dependency + Ptr->setOperand(0, NACG->getOperand(0)); + return DepPtr; + } + Ptr = ParentPtr; + } + + assert(false && "Multiple use found - we should duplicate the chain"); + return DepPtr; + } + + Instruction *maybeIntroduceNoAlias(LoadInst *Load, Value *PtrOperand) { + if (!RepresentsNoAlias) + return Load; + + auto NACG = getBaseAsCopyGuardOrNull(PtrOperand); + if (NACG == nullptr) { + // strange, but could happen + LLVM_DEBUG(llvm::dbgs() + << "maybeIntroduceNoAlias: RepresentsNoAlias is true," + "but no copy.guard seen\n"); + return Load; + } + + Value *NoAliasDecl = + NACG->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg); + + auto ScopeArg = NACG->getOperand(Intrinsic::NoAliasCopyGuardScopeArg); + + auto NewDepPtr = Load->getPointerOperand(); + if (auto NACG2 = getBaseAsCopyGuardOrNull(NewDepPtr)) { + assert(NACG2 == NACG && "llvm.noalias.copy.guard dep must be identical"); + (void)NACG2; + NewDepPtr = detachFromNoAliasCopyGuard( + cast(Load->getPointerOperand()), NACG); + Load->setOperand(Load->getPointerOperandIndex(), NewDepPtr); + } + + if (Load->getType()->isPointerTy()) { + auto NoAlias = IRB.CreateNoAliasPointer( + Load, NoAliasDecl, NewDepPtr, ScopeArg, Load->getName() + ".noalias"); + if (NoAliasDecl == OldNoAliasDecl) { + assert(NewNoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg) == + ScopeArg && + "New llvm.noalias.decl must have same scope"); + NoAlias->setOperand( + Intrinsic::NoAliasIdentifyPObjIdArg, + NewNoAliasDecl->getOperand(Intrinsic::NoAliasDeclObjIdArg)); + } + + LLVM_DEBUG(llvm::dbgs() << " introduce: " << *NoAlias << "\n"); + + return NoAlias; + } + + assert(false && "Need PtrToInt ?"); + return Load; + } + bool visitMemTransferInst(MemTransferInst &II) { // Rewriting of memory transfer instructions can be a bit tricky. We break // them into two categories: split intrinsics and unsplit intrinsics. @@ -3053,12 +3373,14 @@ Value *Src; if (VecTy && !IsWholeAlloca && !IsDest) { + // FIXME: should we handle noalias annotations here ? Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, NewAI.getAlign(), "load"); Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); } else if (IntTy && !IsWholeAlloca && !IsDest) { - Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, - NewAI.getAlign(), "load"); + LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, + NewAI.getAlign(), "load"); + Src = maybeIntroduceNoAlias(Load, II.getSource()); Src = convertValue(DL, IRB, Src, IntTy); uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract"); @@ -3067,7 +3389,7 @@ II.isVolatile(), "copyload"); if (AATags) Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - Src = Load; + Src = maybeIntroduceNoAlias(Load, II.getSource()); } if (VecTy && !IsWholeAlloca && IsDest) { @@ -3087,13 +3409,130 @@ IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << "(3) to: " << *Store << "\n"); return !II.isVolatile(); } - bool visitIntrinsicInst(IntrinsicInst &II) { - assert((II.isLifetimeStartOrEnd() || II.isDroppable()) && - "Unexpected intrinsic!"); + void prepareNoAliasDecl() { + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + for (auto U : OldAI.users()) { + IntrinsicInst *II = dyn_cast(U); + if (II && II->getIntrinsicID() == Intrinsic::noalias_decl) { + if (OldNoAliasDecl) { + // We alreay found a llvm.noalias.decl - leave it up to the visiter to + // propagate + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + break; + } + IRB.SetInsertPoint(II); + IRB.SetCurrentDebugLocation(II->getDebugLoc()); + IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + + ".noalias.decl."); + + OldNoAliasDecl = II; + LLVM_DEBUG(dbgs() << "Found llvm.noalias.decl: " << *II << "\n"); + ConstantInt *OldId = cast( + II->getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + NewNoAliasDecl = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II->getArgOperand(2))); + LLVM_DEBUG(dbgs() << "New llvm.noalias.decl: " << *NewNoAliasDecl + << "\n"); + // continue - it is possible we see multiple llvm.noalias.decl! + } + } + } + + bool visitNoAliasDeclIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_decl); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + Value *New; + if (OldNoAliasDecl) { + assert(OldNoAliasDecl == &II); + assert(NewNoAliasDecl != nullptr); + New = NewNoAliasDecl; + } else { + assert(NewNoAliasDecl == nullptr); + ConstantInt *OldId = + cast(II.getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + New = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II.getArgOperand(2))); + } + (void)New; + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); + + // Record this instruction for deletion. + Pass.DeadInsts.push_back(&II); + + // nothing else to do - preparation was already done + return true; + } + + bool visitProvenanceNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::provenance_noalias); + assert(II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) == + OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg) == + OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg, + NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::ProvenanceNoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) + ->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = + II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias); + assert(II.getArgOperand(Intrinsic::NoAliasIdentifyPArg) == OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::NoAliasNoAliasDeclArg) == OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::NoAliasNoAliasDeclArg, NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::NoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, II.getArgOperand(Intrinsic::NoAliasIdentifyPArg)->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = II.getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasCopyGuardIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_copy_guard); + return true; + } + + bool visitLifetimeIntrinsicInst(IntrinsicInst &II) { + assert(II.isLifetimeStartOrEnd()); LLVM_DEBUG(dbgs() << " original: " << II << "\n"); // Record this instruction for deletion. @@ -3137,6 +3576,25 @@ return true; } + bool visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return visitLifetimeIntrinsicInst(II); + case Intrinsic::noalias_decl: + return visitNoAliasDeclIntrinsicInst(II); + case Intrinsic::noalias: + return visitNoAliasIntrinsicInst(II); + case Intrinsic::provenance_noalias: + return visitProvenanceNoAliasIntrinsicInst(II); + case Intrinsic::noalias_copy_guard: + return visitNoAliasCopyGuardIntrinsicInst(II); + default: + assert(false && "SROA: SliceRewriter: unhandled intrinsic"); + return false; + } + } + void fixLoadStoreAlign(Instruction &Root) { // This algorithm implements the same visitor loop as // hasUnsafePHIOrSelectUse, and fixes the alignment of each load @@ -3229,6 +3687,264 @@ namespace { +// Returns true if the indicices of the provided GEP are compatible with +// the indices in the llvm.noalias.copy.guard. +// - treatMissingIndicesAsZero: if the number of indices from the GEP +// is smaller, treat the missing indices as zero +// Note: A nullptr GEP can be combined with treatMissingIndicesAsZero=true +static bool +areGepIndicesCompatibleWithCopyGuard(GetElementPtrInst *GEP, + llvm::Instruction *CopyGuardII, + bool treatMissingIndicesAsZero = false) { + assert(CopyGuardII && "We need a llvm.noalias.copy.guard"); + + MDNode *CopyGuardIndices = cast( + cast( + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + unsigned CGIndex = 0; + bool IndicesAreCompatible = true; + if (GEP) { + if (IMD->getNumOperands() < GEP->getNumIndices()) + continue; + for (Value *Index : GEP->indices()) { + const MDOperand &MDIndex = IMD->getOperand(CGIndex); + ++CGIndex; + ConstantInt *C_lhs = + cast(cast(MDIndex)->getValue()); + if (C_lhs->isMinusOne()) + continue; // accept any index at this place + ConstantInt *C_rhs = dyn_cast(Index); + if ((C_rhs == nullptr) || + (C_lhs->getSExtValue() != + C_rhs->getSExtValue())) { // compare int64 - the ConstantInt can + // have different types + IndicesAreCompatible = false; + break; + } + } + if (!IndicesAreCompatible) + continue; + } + // If there are more indices, check that they are zero + unsigned CGMaxIndex = IMD->getNumOperands(); + if (treatMissingIndicesAsZero) { + for (; CGIndex < CGMaxIndex; ++CGIndex) { + const MDOperand &MDIndex = IMD->getOperand(CGIndex); + ConstantInt *C_lhs = + cast(cast(MDIndex)->getValue()); + if (C_lhs->isZero() || C_lhs->isMinusOne()) + continue; // accept 0 or any index at this place + // otherwise, we do not have all-zero indices + IndicesAreCompatible = false; + break; + } + } else { + IndicesAreCompatible = (CGIndex == CGMaxIndex); + } + if (IndicesAreCompatible) { + return true; + } + } + } + return false; +} + +static Type *GetZeroIndexLeafType(Type *TypeToLoad) { + while (true) { + if (StructType *ST = dyn_cast(TypeToLoad)) { + TypeToLoad = ST->getElementType(0); + continue; + } + if (ArrayType *AT = dyn_cast(TypeToLoad)) { + TypeToLoad = AT->getElementType(); + continue; + } + if (VectorType *VT = dyn_cast(TypeToLoad)) { + TypeToLoad = VT->getElementType(); + continue; + } + break; + } + assert(TypeToLoad->isPointerTy() && "Only pointers can have noalias info"); + + return TypeToLoad; +} + +static void detachIfSingleUse(Value *What, Instruction *NACG) { + // Be on the safe side + Instruction *I = dyn_cast(What); + if (I && I->getNumUses() == 1 && I->getOperand(0) == NACG) { + I->setOperand(0, NACG->getOperand(0)); + } +} + +// Check if the load corresponds to a restrict pointer, as specified in the +// CopyGuard information. +// If so, add and return 'llvm.noalias' before the load. If the original load +// needs to be replaced, due to bitcasts, it is returned through the 'Load' +// argument. +static llvm::Instruction *introduceNoAliasWhenCopyGuardIndicesAreCompatible( + llvm::LoadInst *Load, llvm::Instruction *CopyGuardII, const DataLayout &DL, + SmallVector *TrackSliceUses = nullptr) { + Value *PtrOp = Load->getPointerOperand(); + + if (TrackSliceUses) + TrackSliceUses->push_back( + &Load->getOperandUse(Load->getPointerOperandIndex())); + + if (CopyGuardII == nullptr) + return Load; + + // Possible cases: + // 1) load ( gep ( CopyGuard) ) + if (GetElementPtrInst *GEP = dyn_cast(PtrOp)) { + if (areGepIndicesCompatibleWithCopyGuard(GEP, CopyGuardII)) { + IRBuilderTy IRB(Load->getNextNode()); + + detachIfSingleUse(GEP, CopyGuardII); + + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + Load, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + GEP, CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + Load->getName() + ".noalias"); + // juggle around + Load->replaceAllUsesWith(NoAlias); + NoAlias->setOperand(0, Load); + LLVM_DEBUG(llvm::dbgs() + << " - compatible, introduced:" << *NoAlias << "\n"); + + if (TrackSliceUses) + TrackSliceUses->push_back( + &NoAlias->getOperandUse(Intrinsic::NoAliasIdentifyPArg)); + + return NoAlias; + } + + return Load; + } + + if (BitCastInst *BCI = dyn_cast(PtrOp)) { + // We want to pass it as an integer type or pointer + if (!(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) { + LLVM_DEBUG(llvm::dbgs() + << " ** copy.guard: ignoring non integer or pointer:" << *Load + << "\n"); + return Load; + } + + bool IsLoadOfInteger = Load->getType()->isIntegerTy(); + + // 2) load iXX (bitcast (gep (CopyGuard))) + // load pXX (bitcast (gep (CopyGuard))) + // 3) load iXX (bitcast (CopyGuard)) + // load pXX (bitcast (CopyGuard)) + Value *BCIOp = BCI->getOperand(0); + GetElementPtrInst *GEP = dyn_cast(BCI->getOperand(0)); + if (GEP || (BCIOp == CopyGuardII)) { + Type *TypeToLoad = BCIOp->getType()->getPointerElementType(); + + // Also handles a null GEP + if (!areGepIndicesCompatibleWithCopyGuard(GEP, CopyGuardII, true)) { + return Load; + } + + TypeToLoad = GetZeroIndexLeafType(TypeToLoad); + + // Sizes must be identical + if (DL.getTypeStoreSizeInBits(TypeToLoad) != + DL.getTypeStoreSizeInBits(Load->getType())) { + LLVM_DEBUG(llvm::dbgs() << " ** copy.guard: type sizes do not match\n"); + return Load; + } + + IRBuilderTy IRB(Load->getNextNode()); + + if (GEP) { + detachIfSingleUse(GEP, CopyGuardII); + } else { + // Look through BCIop == CopyGuardII + BCIOp = CopyGuardII->getOperand(0); + } + + LLVM_DEBUG( + llvm::dbgs() + << (GEP ? " - compatible bitcast(gep(copy.guard)), introduced:\n" + : " - compatible bitcast(copy.guard), introduced:\n")); + + LoadInst *NewLoad = Load; + Value *NewPtr = BCIOp; + if (IsLoadOfInteger) { + NewPtr = IRB.CreatePointerBitCastOrAddrSpaceCast( + BCIOp, TypeToLoad->getPointerTo()); + + if (NewPtr != BCIOp) { + LLVM_DEBUG(llvm::dbgs() << " -- " << *NewPtr << "\n"); + } + + NewLoad = IRB.CreateAlignedLoad(TypeToLoad, NewPtr, Load->getAlign(), + Load->getName() + ".sroa_as_ptr"); + AAMDNodes AATags; + Load->getAAMetadata(AATags); + NewLoad->setAAMetadata(AATags); + LLVM_DEBUG(llvm::dbgs() << " -- " << *NewLoad << "\n"); + } + + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + NewLoad, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + NewPtr, CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + NewLoad->getName() + ".noalias"); + + LLVM_DEBUG(llvm::dbgs() << " -- " << *NoAlias << "\n"); + Value *RetVal; + if (IsLoadOfInteger) { + auto PtrCast = IRB.CreatePtrToInt(NoAlias, Load->getType(), + Load->getName() + ".sroa_as_int"); + + LLVM_DEBUG(llvm::dbgs() << " -- " << *PtrCast << "\n"); + + // juggle around + Load->replaceAllUsesWith(PtrCast); + Load->eraseFromParent(); + + RetVal = PtrCast; + } else { + Load->replaceAllUsesWith(NoAlias); + + RetVal = NoAlias; + } + + if (BCI->use_empty()) + BCI->eraseFromParent(); + + if (TrackSliceUses) { + TrackSliceUses->back() = + &NewLoad->getOperandUse(NewLoad->getPointerOperandIndex()); + TrackSliceUses->push_back( + &NoAlias->getOperandUse(Intrinsic::NoAliasIdentifyPArg)); + } + + return cast(RetVal); + } + + LLVM_DEBUG(llvm::dbgs() + << " ** copy.guard: unhandled bitcast:" << BCI << "\n"); + return Load; + } + + LLVM_DEBUG(llvm::dbgs() << "copy.guard: unhandled:" << Load << "\n"); + // unhandled other situation + return Load; +} + /// Visitor to rewrite aggregate loads and stores as scalar. /// /// This pass aggressively rewrites all aggregate loads and stores on @@ -3237,6 +3953,7 @@ class AggLoadStoreRewriter : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. friend class InstVisitor; + typedef InstVisitor Base; /// Queue of pointer uses to analyze and potentially rewrite. SmallVector Queue; @@ -3371,45 +4088,73 @@ struct LoadOpSplitter : public OpSplitter { AAMDNodes AATags; + Instruction *CopyGuardII = nullptr; + unsigned CGIIndex = 0; LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, - AAMDNodes AATags, Align BaseAlign, const DataLayout &DL) + AAMDNodes AATags, Align BaseAlign, const DataLayout &DL, + Instruction *CopyGuardII_) : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL), - AATags(AATags) {} + AATags(AATags), CopyGuardII(CopyGuardII_) {} /// Emit a leaf load of a single value. This is called at the leaves of the /// recursive emission to actually load values. void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. + auto Ptr = this->Ptr; // Make sure _NOT_ to overwrite the Ptr member + if (CopyGuardII) { + assert(CopyGuardII == Ptr && "Ptr != CopyGuardII ???"); + Ptr = CopyGuardII->getOperand(0); // look through noalias.copy.guard + } Value *GEP = IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); - LoadInst *Load = + Instruction *PValue; + LoadInst *PLoad = IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load"); APInt Offset( DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); if (AATags && GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) - Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); + PLoad->setAAMetadata(AATags.shift(Offset.getZExtValue())); + PValue = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLoad, CopyGuardII, DL); - Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); - LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); + Agg = IRB.CreateInsertValue(Agg, PValue, Indices, Name + ".insert"); + LLVM_DEBUG(dbgs() << " to: " << *PValue << "\n"); } }; bool visitLoadInst(LoadInst &LI) { - assert(LI.getPointerOperand() == *U); - if (!LI.isSimple() || LI.getType()->isSingleValueType()) + if (U->getOperandNo() == LI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(LI.hasNoaliasProvenanceOperand() && + LI.getNoaliasProvenanceOperand() == *U); return false; + } + assert(LI.getPointerOperand() == *U); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(LI.getPointerOperand()); + if (CopyGuardII) { + LLVM_DEBUG(llvm::dbgs() << " Replacing Load:" << LI + << "\n" + " Depends on:" + << *CopyGuardII << "\n"); + } + if (!LI.isSimple() || LI.getType()->isSingleValueType()) { + LoadInst *PLI = &LI; + auto Load = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLI, CopyGuardII, DL); + return (Load != PLI); + } // We have an aggregate being loaded, split it apart. LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); AAMDNodes AATags; LI.getAAMetadata(AATags); LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, - getAdjustedAlignment(&LI, 0), DL); + getAdjustedAlignment(&LI, 0), DL, CopyGuardII); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); Visited.erase(&LI); @@ -3459,6 +4204,16 @@ // We have an aggregate being stored, split it apart. LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); + + if (auto *LI = dyn_cast(SI.getValueOperand())) { + // Try to split up the depending load, helpful for tracking noalias info + if (Visited.insert(LI).second) { + LLVM_DEBUG(llvm::dbgs() + << " - Forcing split of of StoreInst value operand\n"); + Queue.push_back(&LI->getOperandUse(LI->getPointerOperandIndex())); + } + } + AAMDNodes AATags; SI.getAAMetadata(AATags); StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags, @@ -3474,6 +4229,30 @@ return false; } + // Look through noalias intrinsics + bool visitIntrinsicInst(IntrinsicInst &II) { + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (II.getOperand(0) == *U) { + enqueueUsers(II); + } + return false; + } + if (II.getIntrinsicID() == Intrinsic::provenance_noalias || + II.getIntrinsicID() == Intrinsic::noalias_decl) { + return false; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + LLVM_DEBUG(llvm::dbgs() + << "AggLoadStoreRewriter: handling llvm.noalias.copy.guard:" + << (II.getOperand(0) == *U) << ":" << II << "\n"); + if (II.getOperand(0) == *U) + enqueueUsers(II); + return false; + } + + return Base::visitIntrinsicInst(II); + } + bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { enqueueUsers(ASC); return false; @@ -3982,8 +4761,8 @@ // First, we rewrite all of the split loads, and just accumulate each split // load in a parallel structure. We also build the slices for them and append // them to the alloca slices. - SmallDenseMap, 1> SplitLoadsMap; - std::vector SplitLoads; + SmallDenseMap, 1> SplitLoadsMap; + std::vector SplitLoads; const DataLayout &DL = AI.getModule()->getDataLayout(); for (LoadInst *LI : Loads) { SplitLoads.clear(); @@ -4002,6 +4781,8 @@ Instruction *BasePtr = cast(LI->getPointerOperand()); IRB.SetInsertPoint(LI); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(BasePtr); + LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n"); uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); @@ -4020,18 +4801,23 @@ PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); + SmallVector UsesToTrack; + auto *PValue = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLoad, CopyGuardII, DL, &UsesToTrack); + // Append this load onto the list of split loads so we can find it later // to rewrite the stores. - SplitLoads.push_back(PLoad); + SplitLoads.push_back(PValue); // Now build a new slice for the alloca. - NewSlices.push_back( - Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, - &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), - /*IsSplittable*/ false)); - LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() - << ", " << NewSlices.back().endOffset() - << "): " << *PLoad << "\n"); + for (Use *PUse : UsesToTrack) { + NewSlices.push_back(Slice(BaseOffset + PartOffset, + BaseOffset + PartOffset + PartSize, PUse, + /*IsSplittable*/ false)); + LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() + << ", " << NewSlices.back().endOffset() + << "): " << *PUse->getUser() << "\n"); + } // See if we've handled all the splits. if (Idx >= Size) @@ -4062,7 +4848,7 @@ LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) { - LoadInst *PLoad = SplitLoads[Idx]; + auto *PLoad = SplitLoads[Idx]; uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1]; auto *PartPtrTy = PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); @@ -4124,13 +4910,14 @@ "Cannot represent alloca access size using 64-bit integers!"); Value *LoadBasePtr = LI->getPointerOperand(); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(LoadBasePtr); Instruction *StoreBasePtr = cast(SI->getPointerOperand()); LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n"); // Check whether we have an already split load. auto SplitLoadsMapI = SplitLoadsMap.find(LI); - std::vector *SplitLoads = nullptr; + std::vector *SplitLoads = nullptr; if (SplitLoadsMapI != SplitLoadsMap.end()) { SplitLoads = &SplitLoadsMapI->second; assert(SplitLoads->size() == Offsets.Splits.size() + 1 && @@ -4147,19 +4934,21 @@ auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); // Either lookup a split load or create one. - LoadInst *PLoad; + Instruction *PLoad; if (SplitLoads) { PLoad = (*SplitLoads)[Idx]; } else { IRB.SetInsertPoint(LI); auto AS = LI->getPointerAddressSpace(); - PLoad = IRB.CreateAlignedLoad( + LoadInst *NewPLoad = IRB.CreateAlignedLoad( PartTy, getAdjustedPtr(IRB, DL, LoadBasePtr, APInt(DL.getIndexSizeInBits(AS), PartOffset), LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); + PLoad = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + NewPLoad, CopyGuardII, DL); } // And store this partition. @@ -4268,6 +5057,7 @@ // or an i8 array of an appropriate size. Type *SliceTy = nullptr; const DataLayout &DL = AI.getModule()->getDataLayout(); + auto RepresentsNoAlias = (partitionRepresentsNoAliasPointer(P) != nullptr); std::pair CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()); // Do all uses operate on the same type? @@ -4298,6 +5088,14 @@ if (VecTy) SliceTy = VecTy; + if (RepresentsNoAlias && !SliceTy->isPointerTy()) { + if (DL.getTypeStoreSizeInBits(SliceTy) == + DL.getTypeStoreSizeInBits(SliceTy->getPointerTo())) { + // a restrict pointer must be a pointer + SliceTy = SliceTy->getPointerTo(); + } + } + // Check for the case where we're going to rewrite to a new alloca of the // exact same type as the original, and with the same access offsets. In that // case, re-use the existing alloca, but still run through the rewriter to @@ -4339,7 +5137,7 @@ AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, - PHIUsers, SelectUsers); + PHIUsers, SelectUsers, RepresentsNoAlias); bool Promotable = true; for (Slice *S : P.splitSliceTails()) { Promotable &= Rewriter.visit(S); @@ -4605,6 +5403,7 @@ /// rewritten as needed. bool SROA::runOnAlloca(AllocaInst &AI) { LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); + LLVM_DEBUG(AI.getParent()->getParent()->dump()); ++NumAllocasAnalyzed; // Special case dead allocas, as they're trivial. Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -35,6 +34,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -45,6 +45,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -61,35 +62,205 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -bool llvm::isAllocaPromotable(const AllocaInst *AI) { +class PromotableChecker { +public: + bool check(bool c) { return c; } + void trackRemovable(const Instruction *I) {} + void trackDroppableUse(const Use *U) {} + void trackOperandToZero(const Instruction *I, int operand) {} + void trackNoAliasDecl(const IntrinsicInst *II) {} + + void trackRemovableOrDroppableUses(const Instruction *I) {} +}; + +class PromotableTracker { +public: + bool check(bool c) { + assert(!c && "PromotableTracker::check failed"); + return false; + } + void trackRemovable(Instruction *I) { + // FIXME: Performance Warning: linear search - might become slow (?) + if (std::find(Removables.begin(), Removables.end(), I) == Removables.end()) + Removables.push_back(I); + } + void trackDroppableUse(Use *U) { DroppableUses.push_back(U); } + void trackOperandToZero(Instruction *I, int operand) { + ZeroOperands.emplace_back(I, operand); + } + void trackNoAliasDecl(IntrinsicInst *II) { NoAliasDecls.push_back(II); } + + void trackRemovableOrDroppableUses(Instruction *I) { + for (auto &U_ : I->uses()) { + if (U_.getUser()->isDroppable()) + trackDroppableUse(&U_); + else + trackRemovable(cast(U_.getUser())); + } + } + +public: + SmallVector Removables; + SmallVector DroppableUses; + SmallVector, 4> ZeroOperands; + SmallVector NoAliasDecls; +}; + +// Return true if the only usage of this pointer is as identifyP argument for +// llvm.noalias or llvm.provenance.noalias (either direct or recursive) +// Look through bitcast, getelementptr, llvm.noalias, llvm.provenance.noalias + +template +bool onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(Value *V, PT &pt); + +template +bool isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(IntrinsicInst *II, + unsigned OpNo, + PT &pt) { + if (II->getIntrinsicID() == Intrinsic::provenance_noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else if (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.provenance.noalias dependency"); + } + return true; + } else if (II->getIntrinsicID() == Intrinsic::noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::NoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.provenance.noalias dependency"); + } + return true; + } + + return false; +} + +template +bool onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(Value *V, PT &pt) { + for (Use &U_ : V->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + if (IntrinsicInst *II = dyn_cast(U)) { + if (isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, OpNo, pt)) + continue; + return false; + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(BCI, pt)) + return false; + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(GEPI, pt)) + return false; + pt.trackRemovable(GEPI); + } else if (AddrSpaceCastInst *ASCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(ASCI, pt)) + return false; + pt.trackRemovable(ASCI); + } else { + return false; + } + } + return true; +} + +template bool trackAllocaPromotable(AllocaInst *AI, PT &pt) { // Only allow direct and non-volatile loads and stores... - for (const User *U : AI->users()) { - if (const LoadInst *LI = dyn_cast(U)) { + for (Use &U_ : AI->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + + if (LoadInst *LI = dyn_cast(U)) { // Note that atomic loads can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (LI->isVolatile()) + if (pt.check(LI->isVolatile())) return false; - } else if (const StoreInst *SI = dyn_cast(U)) { - if (SI->getOperand(0) == AI) + if (OpNo == LI->getNoaliasProvenanceOperandIndex()) { + // Load will be removed. Disconnect provenance.noalias dependency + pt.trackOperandToZero(LI, OpNo); + } + } else if (StoreInst *SI = dyn_cast(U)) { + if (pt.check(OpNo == 0)) return false; // Don't allow a store OF the AI, only INTO the AI. // Note that atomic stores can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (SI->isVolatile()) + if (pt.check(SI->isVolatile())) return false; - } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) + if (OpNo == SI->getNoaliasProvenanceOperandIndex()) { + // Store will be removed. Disconnect provenance.noalias dependency + pt.trackOperandToZero(SI, OpNo); + } + } else if (IntrinsicInst *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + pt.trackRemovable(II); + break; + case Intrinsic::noalias_decl: + pt.trackNoAliasDecl(II); + break; + case Intrinsic::noalias: + case Intrinsic::provenance_noalias: + if (!isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, OpNo, + pt)) + return false; + break; + default: + if (II->isDroppable()) { + pt.trackDroppableUse(&U_); + break; + } return false; - } else if (const BitCastInst *BCI = dyn_cast(U)) { - if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) + } + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(BCI, pt)) { + pt.trackRemovable(BCI); + continue; + } + if (pt.check(!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))) return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { - if (!GEPI->hasAllZeroIndices()) + + pt.trackRemovableOrDroppableUses(BCI); + pt.trackRemovable(BCI); + } else if (AddrSpaceCastInst *ACI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(ACI, pt)) { + pt.trackRemovable(ACI); + continue; + } + if (pt.check(!onlyUsedByLifetimeMarkersOrDroppableInsts(ACI))) return false; - if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI)) + + pt.trackRemovableOrDroppableUses(ACI); + pt.trackRemovable(ACI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(GEPI, pt)) { + pt.trackRemovable(GEPI); + continue; + } + if (pt.check(!GEPI->hasAllZeroIndices())) return false; - } else if (const AddrSpaceCastInst *ASCI = dyn_cast(U)) { - if (!onlyUsedByLifetimeMarkers(ASCI)) + if (pt.check(!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI))) return false; + + pt.trackRemovableOrDroppableUses(GEPI); + pt.trackRemovable(GEPI); + } else if (AddrSpaceCastInst *ASCI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(ASCI, pt)) { + pt.trackRemovable(ASCI); + continue; + } + + return false; } else { return false; } @@ -98,6 +269,11 @@ return true; } +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + PromotableChecker pc; + return trackAllocaPromotable(const_cast(AI), pc); +} + namespace { struct AllocaInfo { @@ -311,34 +487,196 @@ static void removeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all - // instructions except for load and store. + // instructions except for load and store and noalias intrinsics. - for (Use &U : llvm::make_early_inc_range(AI->uses())) { - Instruction *I = cast(U.getUser()); - if (isa(I) || isa(I)) - continue; + // Track the possible intrinsics. If we do not have a noalias.decl or we do + // not have an unknown function scope, no extra modificiations are needed. If + // both are there, we need to propagate the MetadataValue from the declaration + // to those intrinsics that are using the unknown scope. + PromotableTracker pt; - // Drop the use of AI in droppable instructions. - if (I->isDroppable()) { - I->dropDroppableUse(U); - continue; - } + if (!trackAllocaPromotable(AI, pt)) { + assert(false && "trackAllocaPromotable not consistent"); + } - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (Use &UU : llvm::make_early_inc_range(I->uses())) { - Instruction *Inst = cast(UU.getUser()); + // Propagate NoaliasDecl + MDNode *NoAliasUnknownScopeMD = + AI->getParent()->getParent()->getMetadata("noalias"); + Instruction *NoAliasDecl = nullptr; + if (pt.NoAliasDecls.size() == 1) + NoAliasDecl = pt.NoAliasDecls[0]; + + if (NoAliasUnknownScopeMD) { + if (NoAliasDecl) { + LLVM_DEBUG(llvm::dbgs() + << "- Propagating " << *NoAliasDecl << " scope to:\n"); + auto NoAliasDeclScope = + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg); + for (auto PairIO : pt.ZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias) { + // If we get here, we can assume the identifyP or its provenance + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg || + OpNo == + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg)); + unsigned ScopeArg = (ID == Intrinsic::noalias + ? Intrinsic::NoAliasScopeArg + : Intrinsic::ProvenanceNoAliasScopeArg); + unsigned DeclArg = + (ID == Intrinsic::noalias + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::ProvenanceNoAliasNoAliasDeclArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate the declaration scope + // Note: splitting already took care of updating the ObjId + LLVM_DEBUG(llvm::dbgs() << "-- " << *I << "\n"); + II->setOperand(ScopeArg, NoAliasDeclScope); + + // also update the noalias declaration + II->setOperand(DeclArg, NoAliasDecl); + } + } + } + } + } else if (pt.NoAliasDecls.empty()) { + for (auto PairIO : pt.ZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias) { + // If we get here, we can assume the identifyP or its provenance + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg || + OpNo == + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg)); + unsigned ScopeArg = (ID == Intrinsic::noalias + ? Intrinsic::NoAliasScopeArg + : Intrinsic::ProvenanceNoAliasScopeArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate a more or less unique id + LLVM_DEBUG(llvm::dbgs() + << "-- No llvm.noalias.decl, looking through: " << *I + << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + } + } + } + } + } + } - // Drop the use of I in droppable instructions. - if (Inst->isDroppable()) { - Inst->dropDroppableUse(UU); + if (NoAliasDecl) { + // Check if we need to split up llvm.noalias.decl with unique ObjId's + // This is needed to differentiate restrict pointers, once the alloca is + // removed. NOTE: we might as well have depended on 'constant propagation of + // null' and work with a 'constant pointer' + // for IdentifyP. Not sure what mechanism would be the best. + const DataLayout &DL = AI->getParent()->getModule()->getDataLayout(); + std::map ObjId2NoAliasDecl; + + auto BaseObjId = cast(NoAliasDecl->getOperand( + Intrinsic::NoAliasDeclObjIdArg)) + ->getZExtValue(); + ObjId2NoAliasDecl[BaseObjId] = NoAliasDecl; + + for (auto PairIO : pt.ZeroOperands) { + IntrinsicInst *II = dyn_cast(PairIO.first); + if (II && ((II->getIntrinsicID() == Intrinsic::noalias) || + (II->getIntrinsicID() == Intrinsic::provenance_noalias))) { + auto OpNo = PairIO.second; + unsigned IdentifyPArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPArg + : Intrinsic::ProvenanceNoAliasIdentifyPArg; + unsigned ObjIdArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPObjIdArg + : Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg; + unsigned NoAliasDeclArg = + (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::ProvenanceNoAliasNoAliasDeclArg; + + if ((unsigned)OpNo != IdentifyPArg) continue; + + auto CurrentObjId = + cast(II->getOperand(ObjIdArg))->getZExtValue(); + + assert(CurrentObjId == BaseObjId && + "Initial object id difference detected."); + + APInt PPointerOffset(DL.getPointerSizeInBits(), 0ull); + assert(AI == II->getOperand(IdentifyPArg) + ->stripAndAccumulateInBoundsConstantOffsets( + DL, PPointerOffset) && + "hmm.. expected stripped P to map to alloca"); + if (!PPointerOffset.isNullValue()) { + CurrentObjId += PPointerOffset.getZExtValue(); + auto &NewNoAliasDecl = ObjId2NoAliasDecl[CurrentObjId]; + if (NewNoAliasDecl == nullptr) { + LLVM_DEBUG(llvm::dbgs() + << "Creating llvm.noalias.decl for IdentifyPObjId " + << CurrentObjId << "\n"); + IRBuilder NoAliasDeclBuilder(NoAliasDecl); + NewNoAliasDecl = NoAliasDeclBuilder.CreateNoAliasDeclaration( + ConstantPointerNull::get(cast(AI->getType())), + CurrentObjId, + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + LLVM_DEBUG(llvm::dbgs() << "- " << *NewNoAliasDecl << "\n"); + } + II->setOperand(NoAliasDeclArg, NewNoAliasDecl); + II->setOperand(ObjIdArg, + ConstantInt::get(II->getOperand(ObjIdArg)->getType(), + CurrentObjId)); + LLVM_DEBUG(llvm::dbgs() + << "Remapping noalias.decl dependency: " << *II << "\n"); } - Inst->eraseFromParent(); } } + } + + // set args to zero + for (auto II : pt.NoAliasDecls) { + LLVM_DEBUG(llvm::dbgs() << "Zeoring noalias.decl dep: " << *II << "\n"); + assert(II->getIntrinsicID() == Intrinsic::noalias_decl); + II->setOperand(Intrinsic::NoAliasDeclAllocaArg, + ConstantPointerNull::get(cast(AI->getType()))); + } + for (auto PairIO : pt.ZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + LLVM_DEBUG(llvm::dbgs() + << "Zeroing operand " << OpNo << " of " << *I << "\n"); + I->setOperand(OpNo, ConstantPointerNull::get( + cast(I->getOperand(OpNo)->getType()))); + } + + // Drop droppables + for (auto U : pt.DroppableUses) { + LLVM_DEBUG(llvm::dbgs() << "Dropping use from " << *U->getUser() << "\n"); + assert(U->getUser()->isDroppable()); + U->getUser()->dropDroppableUse(*U); + } + + // remove + for (auto I : pt.Removables) { + LLVM_DEBUG(llvm::dbgs() << "Removing " << *I << "\n"); I->eraseFromParent(); } } @@ -362,8 +700,12 @@ // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); - for (User *U : make_early_inc_range(AI->users())) { - Instruction *UserInst = cast(U); + for (auto UI = AI->user_begin(), E = AI->user_end(); + UI != E;) { // FIXME: make_early_inc_range ? + Instruction *UserInst = cast(*UI++); + // load/store can have a provenance + if ((UI != E) && (*UI == UserInst)) + ++UI; if (UserInst == OnlyStore) continue; LoadInst *LI = cast(UserInst); @@ -476,8 +818,11 @@ // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. - for (User *U : make_early_inc_range(AI->users())) { - LoadInst *LI = dyn_cast(U); + for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { + LoadInst *LI = dyn_cast(*UI++); + // load/store can have a provenance + if ((UI != E) && (*UI == LI)) + ++UI; if (!LI) continue; Index: llvm/test/Transforms/SROA/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias.ll @@ -0,0 +1,302 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_ri(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca i32*, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* undef, i32** %rp, align 4, !noalias !6 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4, !noalias !6 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** %rp, i64 0, metadata !6) + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* %2, i32** %rp, align 4, !tbaa !2, !noalias !6 + %3 = load i32*, i32** %rp, align 4, !tbaa !2, !noalias !6 + %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %3, i8* %1, i32** %rp, i64 0, metadata !6), !tbaa !2, !noalias !6 + store i32 42, i32* %4, align 4, !tbaa !9, !noalias !6 + %5 = bitcast i32** %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #4 + ret void +} + +; CHECK-LABEL: @test_ri( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !2) +; CHECK: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !2), !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_ra(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca [3 x i32*], align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store [3 x i32*] undef, [3 x i32*]* %rp, align 4, !noalias !11 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !11 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* %rp, i64 0, metadata !11) + %arrayinit.begin = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store i32* %2, i32** %arrayinit.begin, align 4, !tbaa !2, !noalias !11 + %arrayinit.element = getelementptr inbounds i32*, i32** %arrayinit.begin, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, align 4, !tbaa !2, !noalias !11 + %arrayinit.element1 = getelementptr inbounds i32*, i32** %arrayinit.element, i32 1 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr2 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, align 4, !tbaa !2, !noalias !11 + %arrayidx = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %5 = load i32*, i32** %arrayidx, align 4, !tbaa !2, !noalias !11 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %arrayidx, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !11 + %arrayidx3 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %7 = load i32*, i32** %arrayidx3, align 4, !tbaa !2, !noalias !11 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %arrayidx3, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !11 + %arrayidx4 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %9 = load i32*, i32** %arrayidx4, align 4, !tbaa !2, !noalias !11 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %1, i32** %arrayidx4, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !11 + %11 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_ra( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !11) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !11) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !11) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** null, i64 8, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr2, i8* %2, i32** null, i64 16, metadata !11), !tbaa !5, !noalias !11 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]*, i64, metadata) #1 + +; Function Attrs: nounwind +define dso_local void @test_rs(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %foo = alloca %struct.FOO, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store %struct.FOO undef, %struct.FOO* %foo, align 4, !noalias !14 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !14 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* %foo, i64 0, metadata !14) + %mP0 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store i32* %2, i32** %mP0, align 4, !tbaa !17, !noalias !14 + %mP1 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %mP1, align 4, !tbaa !19, !noalias !14 + %mP2 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr1 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr1, i32** %mP2, align 4, !tbaa !20, !noalias !14 + %mP02 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %5 = load i32*, i32** %mP02, align 4, !tbaa !17, !noalias !14 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %mP02, i64 0, metadata !14), !tbaa !17, !noalias !14 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !14 + %mP13 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %7 = load i32*, i32** %mP13, align 4, !tbaa !19, !noalias !14 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %mP13, i64 0, metadata !14), !tbaa !19, !noalias !14 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !14 + %mP24 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %9 = load i32*, i32** %mP24, align 4, !tbaa !20, !noalias !14 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %1, i32** %mP24, i64 0, metadata !14), !tbaa !20, !noalias !14 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !14 + %11 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_rs( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !14) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !14) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !14), !tbaa !17, !noalias !14 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** null, i64 8, metadata !14), !tbaa !19, !noalias !14 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr1, i8* %2, i32** null, i64 16, metadata !14), !tbaa !20, !noalias !14 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ri_inlined(i32* %_p) local_unnamed_addr #2 !noalias !21 { +entry: + %rp = alloca i32*, align 4 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4, !noalias !24 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** nonnull %rp, i64 0, metadata !27), !noalias !21 + store i32* %_p, i32** %rp, ptr_provenance i32** undef, align 4, !noalias !24 + %2 = load i32*, i32** %rp, ptr_provenance i32** undef, align 4, !noalias !28 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %rp, i32** undef, i64 0, metadata !27) #4, !noalias !28 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !28 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4, !noalias !21 + ret void +} + +; CHECK-LABEL: @test_ri_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !24) +; CHECK: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !24){{.*}}, !noalias !27 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ra_inlined(i32* %_p) local_unnamed_addr #2 !noalias !29 { +entry: + %rp = alloca [3 x i32*], align 4 + %.fca.0.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !32 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* nonnull %rp, i64 0, metadata !35), !noalias !29 + store i32* %_p, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !32 + %arrayinit.element = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, ptr_provenance i32** undef, align 4, !noalias !32 + %arrayinit.element1 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %add.ptr2 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, ptr_provenance i32** undef, align 4, !noalias !32 + %2 = load i32*, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !36 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %.fca.0.gep, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !36 + %arrayidx1.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %4 = load i32*, i32** %arrayidx1.i, ptr_provenance i32** undef, align 4, !noalias !36 + %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %4, i8* %1, i32** nonnull %arrayidx1.i, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 43, i32* %4, ptr_provenance i32* %5, align 4, !noalias !36 + %arrayidx2.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 2 + %6 = load i32*, i32** %arrayidx2.i, ptr_provenance i32** undef, align 4, !noalias !36 + %7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* %1, i32** nonnull %arrayidx2.i, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 44, i32* %6, ptr_provenance i32* %7, align 4, !noalias !36 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !29 + ret void +} + +; CHECK-LABEL: @test_ra_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !31) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !31) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !31) +; CHECK: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !31){{.*}}, !noalias !34 +; CHECK: %4 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i64 8, metadata !31){{.*}}, !noalias !34 +; CHECK: %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr2, i8* %2, i32** nonnull null, i32** undef, i64 16, metadata !31){{.*}}, !noalias !34 + + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_rs_inlined(i32* %_p) local_unnamed_addr #2 !noalias !37 { +entry: + %foo = alloca %struct.FOO, align 4 + %.fca.0.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %.fca.2.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !40 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* nonnull %foo, i64 0, metadata !43), !noalias !37 + store i32* %_p, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %.fca.1.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %add.ptr1 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr1, i32** %.fca.2.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %mP0.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %mP0.i, ptr_provenance i32** undef, align 4, !noalias !44 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %mP0.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !44 + %mP1.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %4 = load i32*, i32** %mP1.i, ptr_provenance i32** undef, align 4, !noalias !44 + %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %4, i8* %1, i32** nonnull %mP1.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 43, i32* %4, ptr_provenance i32* %5, align 4, !noalias !44 + %mP2.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %6 = load i32*, i32** %mP2.i, ptr_provenance i32** undef, align 4, !noalias !44 + %7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* %1, i32** nonnull %mP2.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 44, i32* %6, ptr_provenance i32* %7, align 4, !noalias !44 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !37 + ret void +} + +; CHECK-LABEL: @test_rs_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !38) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !38) +; CHECK: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !38){{.*}}, !noalias !41 +; CHECK: %4 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i64 8, metadata !38){{.*}}, !noalias !41 +; CHECK: %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr1, i8* %2, i32** nonnull null, i32** undef, i64 16, metadata !38){{.*}}, !noalias !41 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO*, i64, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32**, i64, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32*, i8*, i32**, i64, metadata) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"any pointer"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_ri: rp"} +!8 = distinct !{!8, !"test_ri"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!4, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_ra: rp"} +!13 = distinct !{!13, !"test_ra"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_rs: foo"} +!16 = distinct !{!16, !"test_rs"} +!17 = !{!18, !3, i64 0, i64 4} +!18 = !{!4, i64 12, !"FOO", !3, i64 0, i64 4, !3, i64 4, i64 4, !3, i64 8, i64 4} +!19 = !{!18, !3, i64 4, i64 4} +!20 = !{!18, !3, i64 8, i64 4} +!21 = !{!22} +!22 = distinct !{!22, !23, !"test_ri_inlined: unknown scope"} +!23 = distinct !{!23, !"test_ri_inlined"} +!24 = !{!25, !22} +!25 = distinct !{!25, !26, !"test_ri_inlined: rp"} +!26 = distinct !{!26, !"test_ri_inlined"} +!27 = !{!25} +!28 = !{!22, !25, !22} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test_ra_inlined: unknown scope"} +!31 = distinct !{!31, !"test_ra_inlined"} +!32 = !{!33, !30} +!33 = distinct !{!33, !34, !"test_ra_inlined: rp"} +!34 = distinct !{!34, !"test_ra_inlined"} +!35 = !{!33} +!36 = !{!30, !33, !30} +!37 = !{!38} +!38 = distinct !{!38, !39, !"test_rs_inlined: unknown scope"} +!39 = distinct !{!39, !"test_rs_inlined"} +!40 = !{!41, !38} +!41 = distinct !{!41, !42, !"test_rs_inlined: foo"} +!42 = distinct !{!42, !"test_rs_inlined"} +!43 = !{!41} +!44 = !{!38, !41, !38} Index: llvm/test/Transforms/SROA/noalias2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias2.ll @@ -0,0 +1,463 @@ +; RUN: opt < %s -sroa -S | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes=sroa -S | FileCheck %s --check-prefixes=CHECK + + +; Share the dependency on llvm.noalias.copy.guard: +; RUN: sed < %s -e 's/tmp18,/tmp10,/' -e 's/cp3,/cp2,/' | opt -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_S +; RUN: sed < %s -e 's/tmp18,/tmp10,/' -e 's/cp3,/cp2,/' | opt -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_S + +; Validate that SROA correctly deduces noalias pointers when removing: +; - llvm.mempcy +; - aggregate load/store +; - copying the struct through i64 + +; General form of each function is based on: +; ------ +; struct FOO { +; int* __restrict p; +; }; +; +; struct FUM { +; int* __restrict p0; +; struct FOO m1; +; }; +; +; void testXXXXXX(struct FUM* a_fum) // Scope A +; { +; struct FUM l_fum = *a_fum; // Scope B +; { +; struct FUM l2_fum = l_fum; // Scope C1 +; +; *l2_fum.p0 = 42; +; } +; { +; struct FUM l3_fum = l_fum; // Scope C2 +; +; *l3_fum.m1.p = 43; +; } +; } +; ---- +; After SROA, we expect to see following llvm.noalias dependencies: +; store 42 -> C1 -> B -> A +; store 43 -> C2 -> B -> A + + +; ModuleID = 'test.c' +source_filename = "test.c" +target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +%struct.FUM = type { i32*, %struct.FOO } +%struct.FOO = type { i32* } + +; Function Attrs: nounwind +define dso_local void @test01_memcpy(%struct.FUM* %a_fum) #0 !noalias !3 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !10 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !12), !noalias !10 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !3) + %tmp4 = bitcast %struct.FUM* %l_fum to i8* + %tmp5 = bitcast %struct.FUM* %tmp3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !10 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !17 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !19), !noalias !17 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !12) + %tmp9 = bitcast %struct.FUM* %l2_fum to i8* + %tmp10 = bitcast %struct.FUM* %tmp8 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp9, i8* align 4 %tmp10, i32 8, i1 false), !tbaa.struct !16, !noalias !17 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !17 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !19), !tbaa !20, !noalias !17 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !17 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !10 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !25 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !27), !noalias !25 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !12) + %tmp17 = bitcast %struct.FUM* %l3_fum to i8* + %tmp18 = bitcast %struct.FUM* %tmp16 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp17, i8* align 4 %tmp18, i32 8, i1 false), !tbaa.struct !16, !noalias !25 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !25 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !27), !tbaa !28, !noalias !25 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !25 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !10 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test01_memcpy +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !6) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.copyload, i8* null, i32** %l_fum.sroa.0.0.tmp5.sroa_idx, i64 0, metadata !3) +; CHECK: %l_fum.sroa.[[SROA:[0-9]+]].0.tmp5.sroa_[[IDX3:idx[0-9]+]] = getelementptr inbounds %struct.FUM, %struct.FUM* %a_fum, i32 0, i32 1, i32 0 +; CHECK: %l_fum.sroa.[[SROA]].0.l_fum.sroa.[[SROA]].0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.[[SROA]].0.copyload, i8* null, i32** %l_fum.sroa.[[SROA]].0.tmp5.sroa_[[IDX3]], i64 0, metadata !3) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !15) +; CHECK: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !15) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %l2_fum.sroa.0.0.l2_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias, i8* %0, i32** null, i64 0, metadata !6) +; CHECK: %l2_fum.sroa.7.0.l2_fum.sroa.7.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.[[SROA]].0.l_fum.sroa.[[SROA]].0.copyload.noalias, i8* %1, i32** null, i64 4, metadata !6) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l2_fum.sroa.0.0.l2_fum.sroa.0.0.copyload.noalias, i8* %2, i32** null, i64 0, metadata !15), !tbaa !17, !noalias !20 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %4 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !23) +; CHECK: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !23) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %l3_fum.sroa.0.0.l3_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias, i8* %0, i32** null, i64 0, metadata !6) +; CHECK: %l3_fum.sroa.5.0.l3_fum.sroa.5.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.[[SROA]].0.l_fum.sroa.[[SROA]].0.copyload.noalias, i8* %1, i32** null, i64 4, metadata !6) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l3_fum.sroa.5.0.l3_fum.sroa.5.0.copyload.noalias, i8* %5, i32** null, i64 4, metadata !23), !tbaa !25, !noalias !26 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: ret void + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %0, i64 %1, metadata %2) #2 + +; Function Attrs: nounwind readnone +declare %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %0, i8* %1, metadata %2, metadata %3) #3 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly %0, i8* noalias nocapture readonly %1, i32 %2, i1 immarg %3) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %0, i8* %1, i32** %2, i64 %3, metadata %4) #4 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #1 + +; Function Attrs: nounwind +define dso_local void @test02_aggloadstore(%struct.FUM* %a_fum) #0 !noalias !29 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !32 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !34), !noalias !32 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !29) + %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 + store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !35 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !37), !noalias !35 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !34) + %cp2 = load %struct.FUM, %struct.FUM* %tmp8, align 4 + store %struct.FUM %cp2, %struct.FUM* %l2_fum, align 4 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !35 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !37), !tbaa !20, !noalias !35 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !35 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !32 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !38 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !40), !noalias !38 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !34) + %cp3 = load %struct.FUM, %struct.FUM* %tmp16, align 4 + store %struct.FUM %cp3, %struct.FUM* %l3_fum, align 4 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !38 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !40), !tbaa !28, !noalias !38 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !38 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !32 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test02_aggloadstore +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !30) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !30) +; CHECK: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !32, metadata !27) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %cp1.fca.0.[[GEP5:gep[0-9]+]] = getelementptr inbounds %struct.FUM, %struct.FUM* %a_fum, i32 0, i32 0 +; CHECK: %cp1.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.0.load, i8* null, i32** %cp1.fca.0.[[GEP5]], i64 0, metadata !27) +; CHECK: %cp1.fca.1.0.[[GEP6:gep[0-9]+]] = getelementptr inbounds %struct.FUM, %struct.FUM* %a_fum, i32 0, i32 1, i32 0 +; CHECK: %cp1.fca.1.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.1.0.load, i8* null, i32** %cp1.fca.1.0.[[GEP6]], i64 0, metadata !27) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !35) +; CHECK: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !35) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %cp2.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.0.extract, i8* %0, i32** null, i64 0, metadata !30) +; CHECK: %cp2.fca.1.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.1.0.extract, i8* %1, i32** null, i64 4, metadata !30) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp2.fca.{{.*}}.extract, i8* %2, i32** null, i64 0, metadata !35), !tbaa !17, !noalias !37 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %4 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) +; CHECK: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !38) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %cp3.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.0.extract, i8* %0, i32** null, i64 0, metadata !30) +; CHECK: %cp3.fca.1.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.1.0.extract, i8* %1, i32** null, i64 4, metadata !30) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{cp3|cp2}}.fca.1.0.extract, i8* %5, i32** null, i64 4, metadata !38), !tbaa !25, !noalias !40 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: ret void + +; Function Attrs: nounwind +define dso_local void @test03_i64loadstore(%struct.FUM* %a_fum) #0 !noalias !41 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !44 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !44 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !46), !noalias !44 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !44 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !41) + %tmp4 = bitcast %struct.FUM* %l_fum to i64* + %tmp5 = bitcast %struct.FUM* %tmp3 to i64* + %cp1 = load i64, i64* %tmp5, align 4 + store i64 %cp1, i64* %tmp4, align 4 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !47 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !49), !noalias !47 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !46) + %tmp9 = bitcast %struct.FUM* %l2_fum to i64* + %tmp10 = bitcast %struct.FUM* %tmp8 to i64* + %cp2 = load i64, i64* %tmp10, align 4 + store i64 %cp2, i64* %tmp9, align 4 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !47 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !49), !tbaa !20, !noalias !47 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !47 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !44 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !50 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !52), !noalias !50 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !46) + %tmp17 = bitcast %struct.FUM* %l3_fum to i64* + %tmp18 = bitcast %struct.FUM* %tmp16 to i64* + %cp3 = load i64, i64* %tmp18, align 4 + store i64 %cp3, i64* %tmp17, align 4 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !50 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !52), !tbaa !28, !noalias !50 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !50 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !44 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test03_i64loadstore +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !44) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !44) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %{{cp1[0-9]*}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{cp1[0-9]*}}.sroa_as_ptr, i8* null, i32** %2, i64 0, metadata !41) +; CHECK: %{{cp1[0-9]*}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{cp1[0-9]*}}.sroa_as_ptr, i8* null, i32** %3, i64 0, metadata !41) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %6 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !46) +; CHECK: %7 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !46) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK_S: %cp21.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %4, i8* %0, i32** null, i64 0, metadata !44) +; CHECK_S: %cp22.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** null, i64 4, metadata !44) +; CHECK: %{{cp2[0-9]+}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %4, i8* %0, i32** null, i64 0, metadata !44) +; CHECK: %{{cp2[0-9]+}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** null, i64 4, metadata !44) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %8, i8* %6, i32** null, i64 0, metadata !46), !tbaa !17, !noalias !48 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %10 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !49) +; CHECK: %11 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !49) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %{{cp3[0-9]*}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %4, i8* %0, i32** null, i64 0, metadata !44) +; CHECK: %{{cp3[0-9]*}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** null, i64 4, metadata !44) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %13, i8* %11, i32** null, i64 4, metadata !49), !tbaa !25, !noalias !51 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: ret void + + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone } +attributes #4 = { argmemonly nounwind speculatable } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{!"clang version"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"test01_memcpy: unknown scope"} +!5 = distinct !{!5, !"test01_memcpy"} +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !4} +!11 = distinct !{!11, !5, !"test01_memcpy: l_fum"} +!12 = !{!11} +!13 = !{!14, !15} +!14 = !{i32 -1, i32 0} +!15 = !{i32 -1, i32 1, i32 0} +!16 = !{i64 0, i64 4, !6, i64 4, i64 4, !6} +!17 = !{!18, !11, !4} +!18 = distinct !{!18, !5, !"test01_memcpy: l2_fum"} +!19 = !{!18} +!20 = !{!21, !7, i64 0} +!21 = !{!"FUM", !7, i64 0, !22, i64 4} +!22 = !{!"FOO", !7, i64 0} +!23 = !{!24, !24, i64 0} +!24 = !{!"int", !8, i64 0} +!25 = !{!26, !11, !4} +!26 = distinct !{!26, !5, !"test01_memcpy: l3_fum"} +!27 = !{!26} +!28 = !{!21, !7, i64 4} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test02_aggloadstore: unknown scope"} +!31 = distinct !{!31, !"test02_aggloadstore"} +!32 = !{!33, !30} +!33 = distinct !{!33, !31, !"test02_aggloadstore: l_fum"} +!34 = !{!33} +!35 = !{!36, !33, !30} +!36 = distinct !{!36, !31, !"test02_aggloadstore: l2_fum"} +!37 = !{!36} +!38 = !{!39, !33, !30} +!39 = distinct !{!39, !31, !"test02_aggloadstore: l3_fum"} +!40 = !{!39} +!41 = !{!42} +!42 = distinct !{!42, !43, !"test03_i64loadstore: unknown scope"} +!43 = distinct !{!43, !"test03_i64loadstore"} +!44 = !{!45, !42} +!45 = distinct !{!45, !43, !"test03_i64loadstore: l_fum"} +!46 = !{!45} +!47 = !{!48, !45, !42} +!48 = distinct !{!48, !43, !"test03_i64loadstore: l2_fum"} +!49 = !{!48} +!50 = !{!51, !45, !42} +!51 = distinct !{!51, !43, !"test03_i64loadstore: l3_fum"} +!52 = !{!51} + +; CHECK: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK: !2 = !{!"clang version"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !5, !"test01_memcpy: unknown scope"} +; CHECK: !5 = distinct !{!5, !"test01_memcpy"} +; CHECK: !6 = !{!7} +; CHECK: !7 = distinct !{!7, !5, !"test01_memcpy: l_fum"} +; CHECK: !8 = !{i64 0, i64 4, !9, i64 4, i64 4, !9} +; CHECK: !9 = !{!10, !10, i64 0} +; CHECK: !10 = !{!"any pointer", !11, i64 0} +; CHECK: !11 = !{!"omnipotent char", !12, i64 0} +; CHECK: !12 = !{!"Simple C/C++ TBAA"} +; CHECK: !13 = !{!7, !4} +; CHECK: !14 = !{i64 0, i64 4, !9} +; CHECK: !15 = !{!16} +; CHECK: !16 = distinct !{!16, !5, !"test01_memcpy: l2_fum"} +; CHECK: !17 = !{!18, !10, i64 0} +; CHECK: !18 = !{!"FUM", !10, i64 0, !19, i64 4} +; CHECK: !19 = !{!"FOO", !10, i64 0} +; CHECK: !20 = !{!16, !7, !4} +; CHECK: !21 = !{!22, !22, i64 0} +; CHECK: !22 = !{!"int", !11, i64 0} +; CHECK: !23 = !{!24} +; CHECK: !24 = distinct !{!24, !5, !"test01_memcpy: l3_fum"} +; CHECK: !25 = !{!18, !10, i64 4} +; CHECK: !26 = !{!24, !7, !4} +; CHECK: !27 = !{!28} +; CHECK: !28 = distinct !{!28, !29, !"test02_aggloadstore: unknown scope"} +; CHECK: !29 = distinct !{!29, !"test02_aggloadstore"} +; CHECK: !30 = !{!31} +; CHECK: !31 = distinct !{!31, !29, !"test02_aggloadstore: l_fum"} +; CHECK: !32 = !{!33, !34} +; CHECK: !33 = !{i32 -1, i32 0} +; CHECK: !34 = !{i32 -1, i32 1, i32 0} +; CHECK: !35 = !{!36} +; CHECK: !36 = distinct !{!36, !29, !"test02_aggloadstore: l2_fum"} +; CHECK: !37 = !{!36, !31, !28} +; CHECK: !38 = !{!39} +; CHECK: !39 = distinct !{!39, !29, !"test02_aggloadstore: l3_fum"} +; CHECK: !40 = !{!39, !31, !28} +; CHECK: !41 = !{!42} +; CHECK: !42 = distinct !{!42, !43, !"test03_i64loadstore: unknown scope"} +; CHECK: !43 = distinct !{!43, !"test03_i64loadstore"} +; CHECK: !44 = !{!45} +; CHECK: !45 = distinct !{!45, !43, !"test03_i64loadstore: l_fum"} +; CHECK: !46 = !{!47} +; CHECK: !47 = distinct !{!47, !43, !"test03_i64loadstore: l2_fum"} +; CHECK: !48 = !{!47, !45, !42} +; CHECK: !49 = !{!50} +; CHECK: !50 = distinct !{!50, !43, !"test03_i64loadstore: l3_fum"} +; CHECK: !51 = !{!50, !45, !42} Index: llvm/test/Transforms/SROA/noalias_copy_guard.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias_copy_guard.ll @@ -0,0 +1,358 @@ +; using memcpy: +; RUN: sed < %s -e 's,;V1 , ,' | opt -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V1 +; RUN: sed < %s -e 's,;V1 , ,' | opt -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V1 + +; using aggregate load/store: +; RUN: sed < %s -e 's,;V2 , ,' | opt -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V2 +; RUN: sed < %s -e 's,;V2 , ,' | opt -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V2 + +; using i64 load/store: +; RUN: sed < %s -e 's,;V3 , ,' | opt -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V3 +; RUN: sed < %s -e 's,;V3 , ,' | opt -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V3 + + +; Validate that SROA correctly deduces noalias pointers when removing: +; - llvm.mempcy +; - aggregate load/store +; - copying the struct through i64 + +; General form of each function is based on: +; ------ +; struct FOO { +; int* __restrict p; +; }; +; +; struct FUM { +; int* __restrict p0; +; struct FOO m1; +; }; +; +; void test01(struct FUM* a_fum) +; { +; struct FUM l_fum = *a_fum; +; *l_fum.p0 = 42; +; } +; +; void test02(struct FUM* a_fum) +; { +; struct FUM l_fum = *a_fum; +; *l_fum.m1.p = 43; +; } +; +; void test03(struct FUM* a_fum) +; { +; struct FUM l_fum = *a_fum; +; *l_fum.p0 = 42; +; *l_fum.m1.p = 43; +; } + + +; ModuleID = 'test3.c' +source_filename = "test3.c" +target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +%struct.FUM = type { i32*, %struct.FOO } +%struct.FOO = type { i32* } + +; Function Attrs: nounwind +define dso_local void @test01(%struct.FUM* %a_fum) #0 !noalias !3 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !10 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !12), !noalias !10 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !3) + +;V1 %tmp4 = bitcast %struct.FUM* %l_fum to i8* +;V1 %tmp5 = bitcast %struct.FUM* %tmp3 to i8* +;V1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !10 + +;V2 %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 +;V2 store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + +;V3 %tmp4 = bitcast %struct.FUM* %l_fum to i64* +;V3 %tmp5 = bitcast %struct.FUM* %tmp3 to i64* +;V3 %cp1 = load i64, i64* %tmp5, align 4 +;V3 store i64 %cp1, i64* %tmp4, align 4 + + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l_fum, i32 0, i32 0 + %tmp6 = load i32*, i32** %p0, align 4, !tbaa !17, !noalias !10 + %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp6, i8* %tmp1, i32** %p0, i64 0, metadata !12), !tbaa !17, !noalias !10 + store i32 42, i32* %tmp7, align 4, !tbaa !20, !noalias !10 + %tmp8 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp8) #5 + ret void +} + +; CHECK-LABEL: test01 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !6) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK_V2: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !8, metadata !3) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata !3) +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata !3) +; CHECK: %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* %0, i32** null, i64 0, metadata !6) +; CHECK: ret void + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg %tmp0, i8* nocapture %tmp1) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %tmp0, i64 %tmp1, metadata %tmp2) #2 + +; Function Attrs: nounwind readnone +declare %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp0, i8* %tmp1, metadata %tmp2, metadata %tmp3) #3 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly %tmp0, i8* noalias nocapture readonly %tmp1, i32 %tmp2, i1 immarg %tmp3) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp0, i8* %tmp1, i32** %tmp2, i64 %tmp3, metadata %tmp4) #4 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg %tmp0, i8* nocapture %tmp1) #1 + +; Function Attrs: nounwind +define dso_local void @test02(%struct.FUM* %a_fum) #0 !noalias !22 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !25 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !25 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !27), !noalias !25 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !25 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !22) + +;V1 %tmp4 = bitcast %struct.FUM* %l_fum to i8* +;V1 %tmp5 = bitcast %struct.FUM* %tmp3 to i8* +;V1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !25 + +;V2 %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 +;V2 store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + +;V3 %tmp4 = bitcast %struct.FUM* %l_fum to i64* +;V3 %tmp5 = bitcast %struct.FUM* %tmp3 to i64* +;V3 %cp1 = load i64, i64* %tmp5, align 4 +;V3 store i64 %cp1, i64* %tmp4, align 4 + + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp6 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !25 + %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp6, i8* %tmp1, i32** %p, i64 0, metadata !27), !tbaa !28, !noalias !25 + store i32 43, i32* %tmp7, align 4, !tbaa !20, !noalias !25 + %tmp8 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp8) #5 + ret void +} + +; CHECK-LABEL: test02 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata ![[SCOPE2:[0-9]+]]) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata ![[SCOPE2]]) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK_V2: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata ![[SCOPE2_OUT:[0-9]+]]) +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata ![[SCOPE2_OUT]]) +; CHECK: %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* %1, i32** null, i64 4, metadata ![[SCOPE2]]) +; CHECK: ret void + +; Function Attrs: nounwind +define dso_local void @test03(%struct.FUM* %a_fum) #0 !noalias !29 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !32 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !34), !noalias !32 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !29) + +;V1 %tmp4 = bitcast %struct.FUM* %l_fum to i8* +;V1 %tmp5 = bitcast %struct.FUM* %tmp3 to i8* +;V1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !32 + +;V2 %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 +;V2 store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + +;V3 %tmp4 = bitcast %struct.FUM* %l_fum to i64* +;V3 %tmp5 = bitcast %struct.FUM* %tmp3 to i64* +;V3 %cp1 = load i64, i64* %tmp5, align 4 +;V3 store i64 %cp1, i64* %tmp4, align 4 + + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l_fum, i32 0, i32 0 + %tmp6 = load i32*, i32** %p0, align 4, !tbaa !17, !noalias !32 + %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp6, i8* %tmp1, i32** %p0, i64 0, metadata !34), !tbaa !17, !noalias !32 + store i32 42, i32* %tmp7, align 4, !tbaa !20, !noalias !32 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp8 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !32 + %tmp9 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp8, i8* %tmp1, i32** %p, i64 0, metadata !34), !tbaa !28, !noalias !32 + store i32 43, i32* %tmp9, align 4, !tbaa !20, !noalias !32 + %tmp10 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp10) #5 + ret void +} + +; CHECK-LABEL: test03 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata ![[SCOPE3:[0-9]+]]) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata ![[SCOPE3]]) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK_V2: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !8, metadata !{{[0-9]+}}) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata ![[SCOPE3_OUT:[0-9]+]]) +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata ![[SCOPE3_OUT]]) +; CHECK: %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* %0, i32** null, i64 0, metadata ![[SCOPE3]]) +; CHECK: %tmp9 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* %1, i32** null, i64 4, metadata ![[SCOPE3]]) +; CHECK: ret void + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone } +attributes #4 = { argmemonly nounwind speculatable } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{!"clang version"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"test01: unknown scope"} +!5 = distinct !{!5, !"test01"} +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !4} +!11 = distinct !{!11, !5, !"test01: l_fum"} +!12 = !{!11} +!13 = !{!14, !15} +!14 = !{i32 -1, i32 0} +!15 = !{i32 -1, i32 1, i32 0} +!16 = !{i64 0, i64 4, !6, i64 4, i64 4, !6} +!17 = !{!18, !7, i64 0} +!18 = !{!"FUM", !7, i64 0, !19, i64 4} +!19 = !{!"FOO", !7, i64 0} +!20 = !{!21, !21, i64 0} +!21 = !{!"int", !8, i64 0} +!22 = !{!23} +!23 = distinct !{!23, !24, !"test02: unknown scope"} +!24 = distinct !{!24, !"test02"} +!25 = !{!26, !23} +!26 = distinct !{!26, !24, !"test02: l_fum"} +!27 = !{!26} +!28 = !{!18, !7, i64 4} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test03: unknown scope"} +!31 = distinct !{!31, !"test03"} +!32 = !{!33, !30} +!33 = distinct !{!33, !31, !"test03: l_fum"} +!34 = !{!33} + +; CHECK_V1: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK_V1: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK_V1: !2 = !{!"clang version"} +; CHECK_V1: !3 = !{!4} +; CHECK_V1: !4 = distinct !{!4, !5, !"test01: unknown scope"} +; CHECK_V1: !5 = distinct !{!5, !"test01"} +; CHECK_V1: !6 = !{!7} +; CHECK_V1: !7 = distinct !{!7, !5, !"test01: l_fum"} +; CHECK_V1: !8 = !{i64 0, i64 4, !9, i64 4, i64 4, !9} +; CHECK_V1: !9 = !{!10, !10, i64 0} +; CHECK_V1: !10 = !{!"any pointer", !11, i64 0} +; CHECK_V1: !11 = !{!"omnipotent char", !12, i64 0} +; CHECK_V1: !12 = !{!"Simple C/C++ TBAA"} +; CHECK_V1: !13 = !{!7, !4} +; CHECK_V1: !14 = !{i64 0, i64 4, !9} +; CHECK_V1: !15 = !{!16, !10, i64 0} +; CHECK_V1: !16 = !{!"FUM", !10, i64 0, !17, i64 4} +; CHECK_V1: !17 = !{!"FOO", !10, i64 0} +; CHECK_V1: !18 = !{!19, !19, i64 0} +; CHECK_V1: !19 = !{!"int", !11, i64 0} +; CHECK_V1: !20 = !{!21} +; CHECK_V1: !21 = distinct !{!21, !22, !"test02: unknown scope"} +; CHECK_V1: !22 = distinct !{!22, !"test02"} +; CHECK_V1: !23 = !{!24} +; CHECK_V1: !24 = distinct !{!24, !22, !"test02: l_fum"} +; CHECK_V1: !25 = !{!24, !21} +; CHECK_V1: !26 = !{!16, !10, i64 4} +; CHECK_V1: !27 = !{!28} +; CHECK_V1: !28 = distinct !{!28, !29, !"test03: unknown scope"} +; CHECK_V1: !29 = distinct !{!29, !"test03"} +; CHECK_V1: !30 = !{!31} +; CHECK_V1: !31 = distinct !{!31, !29, !"test03: l_fum"} +; CHECK_V1: !32 = !{!31, !28} + +; CHECK_V2: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK_V2: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK_V2: !2 = !{!"clang version"} +; CHECK_V2: !3 = !{!4} +; CHECK_V2: !4 = distinct !{!4, !5, !"test01: unknown scope"} +; CHECK_V2: !5 = distinct !{!5, !"test01"} +; CHECK_V2: !6 = !{!7} +; CHECK_V2: !7 = distinct !{!7, !5, !"test01: l_fum"} +; CHECK_V2: !8 = !{!9, !10} +; CHECK_V2: !9 = !{i32 -1, i32 0} +; CHECK_V2: !10 = !{i32 -1, i32 1, i32 0} +; CHECK_V2: !11 = !{!12, !13, i64 0} +; CHECK_V2: !12 = !{!"FUM", !13, i64 0, !16, i64 4} +; CHECK_V2: !13 = !{!"any pointer", !14, i64 0} +; CHECK_V2: !14 = !{!"omnipotent char", !15, i64 0} +; CHECK_V2: !15 = !{!"Simple C/C++ TBAA"} +; CHECK_V2: !16 = !{!"FOO", !13, i64 0} +; CHECK_V2: !17 = !{!7, !4} +; CHECK_V2: !18 = !{!19, !19, i64 0} +; CHECK_V2: !19 = !{!"int", !14, i64 0} +; CHECK_V2: !20 = !{!21} +; CHECK_V2: !21 = distinct !{!21, !22, !"test02: unknown scope"} +; CHECK_V2: !22 = distinct !{!22, !"test02"} +; CHECK_V2: !23 = !{!24} +; CHECK_V2: !24 = distinct !{!24, !22, !"test02: l_fum"} +; CHECK_V2: !25 = !{!12, !13, i64 4} +; CHECK_V2: !26 = !{!24, !21} +; CHECK_V2: !27 = !{!28} +; CHECK_V2: !28 = distinct !{!28, !29, !"test03: unknown scope"} +; CHECK_V2: !29 = distinct !{!29, !"test03"} +; CHECK_V2: !30 = !{!31} +; CHECK_V2: !31 = distinct !{!31, !29, !"test03: l_fum"} +; CHECK_V2: !32 = !{!31, !28} + +; CHECK_V3: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK_V3: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK_V3: !2 = !{!"clang version"} +; CHECK_V3: !3 = !{!4} +; CHECK_V3: !4 = distinct !{!4, !5, !"test01: unknown scope"} +; CHECK_V3: !5 = distinct !{!5, !"test01"} +; CHECK_V3: !6 = !{!7} +; CHECK_V3: !7 = distinct !{!7, !5, !"test01: l_fum"} +; CHECK_V3: !8 = !{!9, !10, i64 0} +; CHECK_V3: !9 = !{!"FUM", !10, i64 0, !13, i64 4} +; CHECK_V3: !10 = !{!"any pointer", !11, i64 0} +; CHECK_V3: !11 = !{!"omnipotent char", !12, i64 0} +; CHECK_V3: !12 = !{!"Simple C/C++ TBAA"} +; CHECK_V3: !13 = !{!"FOO", !10, i64 0} +; CHECK_V3: !14 = !{!7, !4} +; CHECK_V3: !15 = !{!16, !16, i64 0} +; CHECK_V3: !16 = !{!"int", !11, i64 0} +; CHECK_V3: !17 = !{!18} +; CHECK_V3: !18 = distinct !{!18, !19, !"test02: unknown scope"} +; CHECK_V3: !19 = distinct !{!19, !"test02"} +; CHECK_V3: !20 = !{!21} +; CHECK_V3: !21 = distinct !{!21, !19, !"test02: l_fum"} +; CHECK_V3: !22 = !{!9, !10, i64 4} +; CHECK_V3: !23 = !{!21, !18} +; CHECK_V3: !24 = !{!25} +; CHECK_V3: !25 = distinct !{!25, !26, !"test03: unknown scope"} +; CHECK_V3: !26 = distinct !{!26, !"test03"} +; CHECK_V3: !27 = !{!28} +; CHECK_V3: !28 = distinct !{!28, !26, !"test03: l_fum"} +; CHECK_V3: !29 = !{!28, !25}