Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -773,6 +773,13 @@ assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split"); + if (U->getOperandNo() == LI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(LI.hasNoaliasProvenanceOperand() && + LI.getNoaliasProvenanceOperand() == *U); + return; + } + if (!IsOffsetKnown) return PI.setAborted(&LI); @@ -785,6 +792,13 @@ } void visitStoreInst(StoreInst &SI) { + if (U->getOperandNo() == SI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(SI.hasNoaliasProvenanceOperand() && + SI.getNoaliasProvenanceOperand() == *U); + return; + } + Value *ValOp = SI.getValueOperand(); if (ValOp == *U) return PI.setEscapedAndAborted(&SI); @@ -916,6 +930,117 @@ "Map index doesn't point back to a slice with this user."); } + void gatherValidNoAliasPointerOffsets( + IRBuilderTy &IRB, IntrinsicInst &II, + SmallVectorImpl> &PtrOffsetSizes, + SmallVectorImpl &Indices, + SmallVectorImpl &MinusOneIndices, unsigned Index = 0) { + if (Index == MinusOneIndices.size()) { + // Check if the indices are compatible with the aggregate + auto PointeeType = + cast(II.getType()->getScalarType())->getElementType(); + auto LeafType = GetElementPtrInst::getIndexedType(PointeeType, Indices); + + if (LeafType == nullptr || !isa(LeafType)) { + LLVM_DEBUG(llvm::dbgs() << "gatherValidNoAliasPointerOffsets: " + "incompatible struct ? Is it a union?\n" + << II << "\n"); + return; + } + + // don't know how to compute the offsets without creating a GEP + GetElementPtrInst *GEP = + cast(IRB.CreateGEP(&II, Indices)); + assert(isa(GEP->getType()->getPointerElementType()) && + "noalias pointer is not a pointer?"); + APInt GEPOffset(DL.getPointerSizeInBits(), 0); + if (GEP->accumulateConstantOffset(DL, GEPOffset)) { + auto Offset = GEPOffset.getZExtValue(); + if (Offset < AllocSize) { + PtrOffsetSizes.push_back(std::make_pair( + (unsigned)Offset, (unsigned)DL.getTypeAllocSize( + GEP->getType()->getPointerElementType()))); + } + } + GEP->eraseFromParent(); + return; + } + + while (true) { + // Increment + ConstantInt *CI = cast(Indices[MinusOneIndices[Index]]); + Indices[MinusOneIndices[Index]] = + ConstantInt::get(CI->getType(), CI->getSExtValue() + 1, true); + + unsigned CurrentSize = PtrOffsetSizes.size(); + gatherValidNoAliasPointerOffsets(IRB, II, PtrOffsetSizes, Indices, + MinusOneIndices, Index + 1); + if (CurrentSize == PtrOffsetSizes.size()) { + // no new entries - recurse back; prepare for next iteration + Indices[MinusOneIndices[Index]] = + ConstantInt::get(CI->getType(), -1, true); + break; + } + } + } + + void visitNoaliasCopyGuard(IntrinsicInst &II) { + LLVM_DEBUG( + llvm::dbgs() + << "AllocaSlices::SliceBuilder: handling llvm.noalias.copy.guard:" + << (U->getOperandNo() == 0) << ":" << II << "\n"); + // Identify the usage, so that it can be split + if (II.use_empty()) + return markAsDead(II); + + if (U->getOperandNo() != 0) + return; + + enqueueUsers(II); + + SmallVector, 4> PtrOffsetSizes; + + // Provide as many slices as we have restrict pointers + MDNode *CopyGuardIndices = + cast(cast( + II.getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + + IRBuilderTy IRB(II.getNextNode()); + SmallVector Indices; + SmallVector MinusOneIndices; + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + Indices.clear(); + MinusOneIndices.clear(); + + unsigned CGIndex = 0; + + for (const MDOperand &MDIndex : IMD->operands()) { + ConstantInt *C = + cast(cast(MDIndex)->getValue()); + if (C->isMinusOne()) // accept any index at this place + MinusOneIndices.push_back(CGIndex); + Indices.push_back(C); + ++CGIndex; + } + gatherValidNoAliasPointerOffsets(IRB, II, PtrOffsetSizes, Indices, + MinusOneIndices); + } + } + + LLVM_DEBUG(llvm::dbgs() << "noalias pointers are at:\n"; + for (auto &P + : PtrOffsetSizes) { + llvm::dbgs() << " - {" << P.first << "," << P.second << "}\n"; + }); + + for (auto &P : PtrOffsetSizes) { + APInt TheOffset = Offset + P.first; + insertUse(II, TheOffset, P.second, false); + } + } + // Disable SRoA for any intrinsics except for lifetime invariants. // FIXME: What about debug intrinsics? This matches old behavior, but // doesn't make sense. @@ -930,6 +1055,52 @@ insertUse(II, Offset, Size, true); return; } + // look through noalias intrinsics + if (II.getIntrinsicID() == Intrinsic::noalias_decl) { + insertUse(II, Offset, AllocSize, true); + // do not enqueue direct users (?) They should be handled through a + // dependency on the original alloca + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (U->getOperandNo() == Intrinsic::NoAliasIdentifyPArg) { + insertUse(II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::NoAliasIdentifyPArg)->getType()), + false); + return; + } + if (U->getOperandNo() == 0) { + assert(II.getOperand(0) == *U); + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::provenance_noalias) { + if (U->getOperandNo() == Intrinsic::ProvenanceNoAliasIdentifyPArg) { + insertUse(II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) + ->getType()), + false); + return; + } + // hmmm - do not look through the first argument for a + // llvm.provenance.noalias + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_arg_guard) { + if (U->getOperandNo() == 0) { + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + visitNoaliasCopyGuard(II); + return; + } Base::visitIntrinsicInst(II); } @@ -1107,6 +1278,18 @@ #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static IntrinsicInst *partitionRepresentsNoAliasPointer(Partition &P) { + // A partition that has a 'llvm.noalias.copy.guard' use, represents a + // noalias pointer + for (auto &I : P) { + Use *U = I.getUse(); + if (auto *II = dyn_cast(U->getUser())) + if (II->getIntrinsicID() == Intrinsic::noalias_copy_guard) + return II; + } + return nullptr; +} + /// Walk the range of a partitioning looking for a common type to cover this /// sequence of slices. static Type *findCommonType(AllocaSlices::const_iterator B, @@ -1248,7 +1431,14 @@ LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); LoadInst *SomeLoad = cast(PN.user_back()); - Type *LoadTy = SomeLoad->getType(); + if (SomeLoad->getPointerOperand() != &PN) { + // this must be the provenance -> ignore the speculation for now + LLVM_DEBUG(llvm::dbgs() << " not speculating dependency on provenance: " + << *SomeLoad << "\n"); + return; + } + + Type *LoadTy = cast(PN.getType())->getElementType(); IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -2253,6 +2443,9 @@ const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; Type *NewAllocaTy; + IntrinsicInst *OldNoAliasDecl = nullptr; + IntrinsicInst *NewNoAliasDecl = nullptr; + // This is a convenience and flag variable that will be null unless the new // alloca's integer operations should be widened to this integer type due to // passing isIntegerWideningViable above. If it is non-null, the desired @@ -2284,6 +2477,7 @@ uint64_t SliceSize = 0; bool IsSplittable = false; bool IsSplit = false; + IntrinsicInst *RepresentsNoAlias = nullptr; Use *OldUse = nullptr; Instruction *OldPtr = nullptr; @@ -2302,7 +2496,8 @@ uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, SmallSetVector &PHIUsers, - SmallSetVector &SelectUsers) + SmallSetVector &SelectUsers, + IntrinsicInst *ReprNoAlias) : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), @@ -2317,14 +2512,15 @@ ElementTy(VecTy ? VecTy->getElementType() : nullptr), ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8 : 0), - PHIUsers(PHIUsers), SelectUsers(SelectUsers), - IRB(NewAI.getContext(), ConstantFolder()) { + RepresentsNoAlias(ReprNoAlias), PHIUsers(PHIUsers), + SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { if (VecTy) { assert((DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); ++NumVectorized; } assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy)); + prepareNoAliasDecl(); } bool visit(AllocaSlices::const_iterator I) { @@ -2860,6 +3056,40 @@ return !II.isVolatile(); } + Instruction *maybeIntroduceNoAlias(LoadInst *Load) { + if (!RepresentsNoAlias) + return Load; + + Instruction *NoAliasDeclI = NewNoAliasDecl; + Value *NoAliasDecl = NoAliasDeclI; + if (!NoAliasDeclI) { + NoAliasDecl = RepresentsNoAlias->getOperand( + Intrinsic::NoAliasCopyGuardNoAliasDeclArg); + NoAliasDeclI = dyn_cast(NoAliasDecl); + } + auto ScopeArg = + RepresentsNoAlias->getOperand(Intrinsic::NoAliasCopyGuardScopeArg); + + assert((!NoAliasDeclI || + (ScopeArg == + NoAliasDeclI->getOperand(Intrinsic::NoAliasDeclScopeArg))) && + "ScopeArgs must be identical"); + if (Load->getType()->isPointerTy()) { + auto NoAlias = + IRB.CreateNoAliasPointer(Load, NoAliasDecl, Load->getPointerOperand(), + ScopeArg, Load->getName() + ".noalias"); + if (NoAliasDeclI) + NoAlias->setOperand( + Intrinsic::NoAliasIdentifyPObjIdArg, + NoAliasDeclI->getOperand(Intrinsic::NoAliasDeclObjIdArg)); + + return NoAlias; + } + + assert(false && "Need PtrToInt"); + return Load; + } + bool visitMemTransferInst(MemTransferInst &II) { // Rewriting of memory transfer instructions can be a bit tricky. We break // them into two categories: split intrinsics and unsplit intrinsics. @@ -3017,12 +3247,14 @@ Value *Src; if (VecTy && !IsWholeAlloca && !IsDest) { + // FIXME: should we handle noalias annotations here ? Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, NewAI.getAlign(), "load"); Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); } else if (IntTy && !IsWholeAlloca && !IsDest) { - Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, - NewAI.getAlign(), "load"); + LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, + NewAI.getAlign(), "load"); + Src = maybeIntroduceNoAlias(Load); Src = convertValue(DL, IRB, Src, IntTy); uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract"); @@ -3031,7 +3263,7 @@ II.isVolatile(), "copyload"); if (AATags) Load->setAAMetadata(AATags); - Src = Load; + Src = maybeIntroduceNoAlias(Load); } if (VecTy && !IsWholeAlloca && IsDest) { @@ -3051,11 +3283,129 @@ IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); if (AATags) Store->setAAMetadata(AATags); - LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << "(3) to: " << *Store << "\n"); return !II.isVolatile(); } - bool visitIntrinsicInst(IntrinsicInst &II) { + void prepareNoAliasDecl() { + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + for (auto U : OldAI.users()) { + IntrinsicInst *II = dyn_cast(U); + if (II && II->getIntrinsicID() == Intrinsic::noalias_decl) { + if (OldNoAliasDecl) { + // We alreay found a llvm.noalias.decl - leave it up to the visiter to + // propagate + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + break; + } + IRB.SetInsertPoint(II); + IRB.SetCurrentDebugLocation(II->getDebugLoc()); + IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + + ".noalias.decl."); + + OldNoAliasDecl = II; + LLVM_DEBUG(dbgs() << "Found llvm.noalias.decl: " << *II << "\n"); + ConstantInt *OldId = cast( + II->getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + NewNoAliasDecl = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II->getArgOperand(2))); + LLVM_DEBUG(dbgs() << "New llvm.noalias.decl: " << *NewNoAliasDecl + << "\n"); + // continue - it is possible we see multiple llvm.noalias.decl! + } + } + } + + bool visitNoAliasDeclIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_decl); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + Value *New; + if (OldNoAliasDecl) { + assert(OldNoAliasDecl == &II); + assert(NewNoAliasDecl != nullptr); + New = NewNoAliasDecl; + } else { + assert(NewNoAliasDecl == nullptr); + ConstantInt *OldId = + cast(II.getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + New = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II.getArgOperand(2))); + } + (void)New; + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); + + // Record this instruction for deletion. + Pass.DeadInsts.insert(&II); + + // nothing else to do - preparation was already done + return true; + } + + bool visitProvenanceNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::provenance_noalias); + assert(II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) == + OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg) == + OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg, + NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::ProvenanceNoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) + ->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = + II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias); + assert(II.getArgOperand(Intrinsic::NoAliasIdentifyPArg) == OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::NoAliasNoAliasDeclArg) == OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::NoAliasNoAliasDeclArg, NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::NoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, II.getArgOperand(Intrinsic::NoAliasIdentifyPArg)->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = II.getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasCopyGuardIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_copy_guard); + return true; + } + + bool visitLifetimeIntrinsicInst(IntrinsicInst &II) { assert(II.isLifetimeStartOrEnd()); LLVM_DEBUG(dbgs() << " original: " << II << "\n"); assert(II.getArgOperand(1) == OldPtr); @@ -3093,6 +3443,25 @@ return true; } + bool visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return visitLifetimeIntrinsicInst(II); + case Intrinsic::noalias_decl: + return visitNoAliasDeclIntrinsicInst(II); + case Intrinsic::noalias: + return visitNoAliasIntrinsicInst(II); + case Intrinsic::provenance_noalias: + return visitProvenanceNoAliasIntrinsicInst(II); + case Intrinsic::noalias_copy_guard: + return visitNoAliasCopyGuardIntrinsicInst(II); + default: + assert(false && "SROA: SliceRewriter: unhandled intrinsic"); + return false; + } + } + void fixLoadStoreAlign(Instruction &Root) { // This algorithm implements the same visitor loop as // hasUnsafePHIOrSelectUse, and fixes the alignment of each load @@ -3184,6 +3553,277 @@ }; namespace { +static llvm::IntrinsicInst *getBaseAsCopyGuardOrNull(llvm::Value *V) { + llvm::IntrinsicInst *II = dyn_cast(V->stripInBoundsOffsets()); + if (II && (II->getIntrinsicID() == Intrinsic::noalias_copy_guard)) + return II; + + return nullptr; +} + +static bool +areGepIndicesCompatibleWithCopyGuard(GetElementPtrInst *GEP, + llvm::Instruction *CopyGuardII) { + assert(CopyGuardII && "We need a llvm.noalias.copy.guard"); + + MDNode *CopyGuardIndices = cast( + cast( + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + if (IMD->getNumOperands() == GEP->getNumIndices()) { + bool IndicesAreCompatible = true; + unsigned CGIndex = 0; + for (Value *Index : GEP->indices()) { + const MDOperand &MDIndex = IMD->getOperand(CGIndex); + ++CGIndex; + ConstantInt *C_lhs = + cast(cast(MDIndex)->getValue()); + if (C_lhs->isMinusOne()) + continue; // accept any index at this place + ConstantInt *C_rhs = dyn_cast(Index); + if ((C_rhs == nullptr) || + (C_lhs->getSExtValue() != + C_rhs->getSExtValue())) { // compare int64 - the ConstantInt can + // have different types + IndicesAreCompatible = false; + break; + } + } + if (IndicesAreCompatible) { + return true; + } + } + } + } + return false; +} + +static bool +areAllZeroIndicesCompatibleWithCopyGuard(llvm::Instruction *CopyGuardII) { + assert(CopyGuardII && "We need a llvm.noalias.copy.guard"); + + MDNode *CopyGuardIndices = cast( + cast( + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + bool IndicesAreCompatible = true; + for (const MDOperand &MDIndex : IMD->operands()) { + ConstantInt *C_lhs = + cast(cast(MDIndex)->getValue()); + if (C_lhs->isZero() || C_lhs->isMinusOne()) + continue; // accept 0 or any index at this place + // otherwise, we do not have all-zero indices + IndicesAreCompatible = false; + break; + } + if (IndicesAreCompatible) { + return true; + } + } + } + return false; +} + +// Check if the load corresponds to a restrict pointer, as specified in the +// CopyGuard information. +// If so, add and return 'llvm.noalias' before the load. If the original load +// needs to be replaced, due to bitcasts, it is returned through the 'Load' +// argument. +static llvm::Instruction *introduceNoAliasWhenCopyGuardIndicesAreCompatible( + llvm::LoadInst *Load, llvm::Instruction *CopyGuardII, const DataLayout &DL, + SmallVector *TrackSliceUses = nullptr) { + Value *PtrOp = Load->getPointerOperand(); + + if (TrackSliceUses) + TrackSliceUses->push_back( + &Load->getOperandUse(Load->getPointerOperandIndex())); + + if (CopyGuardII == nullptr) + return Load; + + // Possible cases: + // 1) load ( gep ( CopyGuard) ) + if (GetElementPtrInst *GEP = dyn_cast(PtrOp)) { + if (areGepIndicesCompatibleWithCopyGuard(GEP, CopyGuardII)) { + IRBuilderTy IRB(Load->getNextNode()); + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + Load, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + GEP, CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + Load->getName() + ".noalias"); + // juggle around + Load->replaceAllUsesWith(NoAlias); + NoAlias->setOperand(0, Load); + LLVM_DEBUG(llvm::dbgs() + << " - compatible, introduced:" << *NoAlias << "\n"); + + if (TrackSliceUses) + TrackSliceUses->push_back( + &NoAlias->getOperandUse(Intrinsic::NoAliasIdentifyPArg)); + + return NoAlias; + } + + return Load; + } + + if (BitCastInst *BCI = dyn_cast(PtrOp)) { + // We want to pass it as an integer type + if (!Load->getType()->isIntegerTy()) { + LLVM_DEBUG(llvm::dbgs() + << "copy.guard: ignoring non integer:" << *Load << "\n"); + return Load; + } + + // 2) load (bitcast (gep (CopyGuard))) + if (GetElementPtrInst *GEP = + dyn_cast(BCI->getOperand(0))) { + Type *TypeToLoad = GEP->getType()->getPointerElementType(); + // The original type must be a pointer type + if (!TypeToLoad->isPointerTy()) { + LLVM_DEBUG(llvm::dbgs() << "copy.guard: original type is not a pointer:" + << *TypeToLoad << "\n"); + return Load; + } + + // Sizes must be identical + if (DL.getTypeStoreSizeInBits(TypeToLoad) != + DL.getTypeStoreSizeInBits(Load->getType())) { + LLVM_DEBUG(llvm::dbgs() << "copy.guard: type sizes do not match\n"); + return Load; + } + + if (!areGepIndicesCompatibleWithCopyGuard(GEP, CopyGuardII)) { + return Load; + } + + IRBuilderTy IRB(Load->getNextNode()); + + LoadInst *NewLoad = IRB.CreateAlignedLoad( + TypeToLoad, GEP, Load->getAlign(), Load->getName() + ".sroa_as_ptr"); + AAMDNodes AATags; + Load->getAAMetadata(AATags); + NewLoad->setAAMetadata(AATags); + + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + NewLoad, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + GEP, CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + NewLoad->getName() + ".noalias"); + auto PtrCast = IRB.CreatePtrToInt(NoAlias, Load->getType(), + Load->getName() + ".sroa_as_int"); + + // juggle around + Load->replaceAllUsesWith(PtrCast); + Load->eraseFromParent(); + + if (BCI->use_empty()) + BCI->eraseFromParent(); + + if (TrackSliceUses) { + TrackSliceUses->back() = + &NewLoad->getOperandUse(NewLoad->getPointerOperandIndex()); + TrackSliceUses->push_back( + &NoAlias->getOperandUse(Intrinsic::NoAliasIdentifyPArg)); + } + // and return the new load + LLVM_DEBUG(llvm::dbgs() << " - compatible bitcast (gep), introduced:" + << "\n -- " << *NewLoad << "\n -- " << *NoAlias + << "\n -- " << *PtrCast << "\n"); + return cast(PtrCast); + } + + // 3) load (bitcast (CopyGuard)) + if (BCI->getOperand(0) == CopyGuardII) { + if (!areAllZeroIndicesCompatibleWithCopyGuard(CopyGuardII)) + return Load; + + Type *TypeToLoad = CopyGuardII->getType()->getPointerElementType(); + while (true) { + if (StructType *ST = dyn_cast(TypeToLoad)) { + TypeToLoad = ST->getElementType(0); + continue; + } + if (ArrayType *AT = dyn_cast(TypeToLoad)) { + TypeToLoad = AT->getElementType(); + continue; + } + if (VectorType *VT = dyn_cast(TypeToLoad)) { + TypeToLoad = VT->getElementType(); + continue; + } + break; + } + assert(TypeToLoad->isPointerTy() && + "Only pointers can have noalias info"); + + // Sizes must be identical + if (DL.getTypeStoreSizeInBits(TypeToLoad) != + DL.getTypeStoreSizeInBits(Load->getType())) { + LLVM_DEBUG(llvm::dbgs() << "copy.guard: type sizes do not match\n"); + return Load; + } + + IRBuilderTy IRB(Load->getNextNode()); + + auto AS = Load->getPointerAddressSpace(); + auto CGCast = + IRB.CreatePointerCast(CopyGuardII, TypeToLoad->getPointerTo(AS)); + + LoadInst *NewLoad = + IRB.CreateAlignedLoad(TypeToLoad, CGCast, Load->getAlign(), + Load->getName() + ".sroa_as_ptr"); + AAMDNodes AATags; + Load->getAAMetadata(AATags); + NewLoad->setAAMetadata(AATags); + + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + NewLoad, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + CGCast, CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + NewLoad->getName() + ".noalias"); + auto PtrCast = IRB.CreatePtrToInt(NoAlias, Load->getType(), + Load->getName() + ".sroa_as_int"); + + // juggle around + Load->replaceAllUsesWith(PtrCast); + Load->eraseFromParent(); + + if (BCI->use_empty()) + BCI->eraseFromParent(); + + if (TrackSliceUses) { + TrackSliceUses->back() = + &NewLoad->getOperandUse(NewLoad->getPointerOperandIndex()); + TrackSliceUses->push_back( + &NoAlias->getOperandUse(Intrinsic::NoAliasIdentifyPArg)); + } + // and return the new load + LLVM_DEBUG(llvm::dbgs() + << " - compatible bitcast (guard), introduced:" + << "\n -- " << *CGCast << "\n -- " << *NewLoad << "\n -- " + << *NoAlias << "\n -- " << *PtrCast << "\n"); + return cast(PtrCast); + } + + LLVM_DEBUG(llvm::dbgs() << "copy.guard: unhandled bitcast:" << BCI << "\n"); + return Load; + } + + LLVM_DEBUG(llvm::dbgs() << "copy.guard: unhandled:" << Load << "\n"); + // unhandled other situation + return Load; +} /// Visitor to rewrite aggregate loads and stores as scalar. /// @@ -3193,6 +3833,7 @@ class AggLoadStoreRewriter : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. friend class InstVisitor; + typedef InstVisitor Base; /// Queue of pointer uses to analyze and potentially rewrite. SmallVector Queue; @@ -3327,40 +3968,69 @@ struct LoadOpSplitter : public OpSplitter { AAMDNodes AATags; + Instruction *CopyGuardII = nullptr; + unsigned CGIIndex = 0; LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, - AAMDNodes AATags, Align BaseAlign, const DataLayout &DL) + AAMDNodes AATags, Align BaseAlign, const DataLayout &DL, + Instruction *CopyGuardII_) : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL), - AATags(AATags) {} + AATags(AATags), CopyGuardII(CopyGuardII_) {} /// Emit a leaf load of a single value. This is called at the leaves of the /// recursive emission to actually load values. void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. + auto Ptr = this->Ptr; // Make sure _NOT_ to overwrite the Ptr member + if (CopyGuardII) { + assert(CopyGuardII == Ptr && "Ptr != CopyGuardII ???"); + Ptr = CopyGuardII->getOperand(0); // look through noalias.copy.guard + } Value *GEP = IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); - LoadInst *Load = + Instruction *PValue; + LoadInst *PLoad = IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load"); if (AATags) - Load->setAAMetadata(AATags); - Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); - LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); + PLoad->setAAMetadata(AATags); + PValue = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLoad, CopyGuardII, DL); + + Agg = IRB.CreateInsertValue(Agg, PValue, Indices, Name + ".insert"); + LLVM_DEBUG(dbgs() << " to: " << *PValue << "\n"); } }; bool visitLoadInst(LoadInst &LI) { - assert(LI.getPointerOperand() == *U); - if (!LI.isSimple() || LI.getType()->isSingleValueType()) + if (U->getOperandNo() == LI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(LI.hasNoaliasProvenanceOperand() && + LI.getNoaliasProvenanceOperand() == *U); return false; + } + assert(LI.getPointerOperand() == *U); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(LI.getPointerOperand()); + if (CopyGuardII) { + LLVM_DEBUG(llvm::dbgs() << " Replacing Load:" << LI + << "\n" + " Depends on:" + << *CopyGuardII << "\n"); + } + if (!LI.isSimple() || LI.getType()->isSingleValueType()) { + LoadInst *PLI = &LI; + auto Load = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLI, CopyGuardII, DL); + return (Load != PLI); + } // We have an aggregate being loaded, split it apart. LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); AAMDNodes AATags; LI.getAAMetadata(AATags); LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, - getAdjustedAlignment(&LI, 0), DL); + getAdjustedAlignment(&LI, 0), DL, CopyGuardII); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); Visited.erase(&LI); @@ -3420,6 +4090,30 @@ return false; } + // Look through noalias intrinsics + bool visitIntrinsicInst(IntrinsicInst &II) { + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (II.getOperand(0) == *U) { + enqueueUsers(II); + } + return false; + } + if (II.getIntrinsicID() == Intrinsic::provenance_noalias || + II.getIntrinsicID() == Intrinsic::noalias_decl) { + return false; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + LLVM_DEBUG(llvm::dbgs() + << "AggLoadStoreRewriter: handling llvm.noalias.copy.guard:" + << (II.getOperand(0) == *U) << ":" << II << "\n"); + if (II.getOperand(0) == *U) + enqueueUsers(II); + return false; + } + + return Base::visitIntrinsicInst(II); + } + bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { enqueueUsers(ASC); return false; @@ -3928,8 +4622,8 @@ // First, we rewrite all of the split loads, and just accumulate each split // load in a parallel structure. We also build the slices for them and append // them to the alloca slices. - SmallDenseMap, 1> SplitLoadsMap; - std::vector SplitLoads; + SmallDenseMap, 1> SplitLoadsMap; + std::vector SplitLoads; const DataLayout &DL = AI.getModule()->getDataLayout(); for (LoadInst *LI : Loads) { SplitLoads.clear(); @@ -3948,6 +4642,8 @@ Instruction *BasePtr = cast(LI->getPointerOperand()); IRB.SetInsertPoint(LI); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(BasePtr); + LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n"); uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); @@ -3966,18 +4662,23 @@ PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); + SmallVector UsesToTrack; + auto *PValue = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLoad, CopyGuardII, DL, &UsesToTrack); + // Append this load onto the list of split loads so we can find it later // to rewrite the stores. - SplitLoads.push_back(PLoad); + SplitLoads.push_back(PValue); // Now build a new slice for the alloca. - NewSlices.push_back( - Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, - &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), - /*IsSplittable*/ false)); - LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() - << ", " << NewSlices.back().endOffset() - << "): " << *PLoad << "\n"); + for (Use *PUse : UsesToTrack) { + NewSlices.push_back(Slice(BaseOffset + PartOffset, + BaseOffset + PartOffset + PartSize, PUse, + /*IsSplittable*/ false)); + LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() + << ", " << NewSlices.back().endOffset() + << "): " << *PUse->getUser() << "\n"); + } // See if we've handled all the splits. if (Idx >= Size) @@ -4008,7 +4709,7 @@ LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) { - LoadInst *PLoad = SplitLoads[Idx]; + auto *PLoad = SplitLoads[Idx]; uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1]; auto *PartPtrTy = PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); @@ -4070,13 +4771,14 @@ "Cannot represent alloca access size using 64-bit integers!"); Value *LoadBasePtr = LI->getPointerOperand(); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(LoadBasePtr); Instruction *StoreBasePtr = cast(SI->getPointerOperand()); LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n"); // Check whether we have an already split load. auto SplitLoadsMapI = SplitLoadsMap.find(LI); - std::vector *SplitLoads = nullptr; + std::vector *SplitLoads = nullptr; if (SplitLoadsMapI != SplitLoadsMap.end()) { SplitLoads = &SplitLoadsMapI->second; assert(SplitLoads->size() == Offsets.Splits.size() + 1 && @@ -4093,19 +4795,21 @@ auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); // Either lookup a split load or create one. - LoadInst *PLoad; + Instruction *PLoad; if (SplitLoads) { PLoad = (*SplitLoads)[Idx]; } else { IRB.SetInsertPoint(LI); auto AS = LI->getPointerAddressSpace(); - PLoad = IRB.CreateAlignedLoad( + LoadInst *NewPLoad = IRB.CreateAlignedLoad( PartTy, getAdjustedPtr(IRB, DL, LoadBasePtr, APInt(DL.getIndexSizeInBits(AS), PartOffset), LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); + PLoad = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + NewPLoad, CopyGuardII, DL); } // And store this partition. @@ -4217,6 +4921,7 @@ // or an i8 array of an appropriate size. Type *SliceTy = nullptr; const DataLayout &DL = AI.getModule()->getDataLayout(); + auto RepresentsNoAlias = partitionRepresentsNoAliasPointer(P); if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) if (DL.getTypeAllocSize(CommonUseTy).getFixedSize() >= P.size()) SliceTy = CommonUseTy; @@ -4239,6 +4944,14 @@ if (VecTy) SliceTy = VecTy; + if (RepresentsNoAlias && !SliceTy->isPointerTy()) { + if (DL.getTypeStoreSizeInBits(SliceTy) == + DL.getTypeStoreSizeInBits(SliceTy->getPointerTo())) { + // a restrict pointer must be a pointer + SliceTy = SliceTy->getPointerTo(); + } + } + // Check for the case where we're going to rewrite to a new alloca of the // exact same type as the original, and with the same access offsets. In that // case, re-use the existing alloca, but still run through the rewriter to @@ -4280,7 +4993,7 @@ AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, - PHIUsers, SelectUsers); + PHIUsers, SelectUsers, RepresentsNoAlias); bool Promotable = true; for (Slice *S : P.splitSliceTails()) { Promotable &= Rewriter.visit(S); Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -35,6 +34,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -45,6 +45,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -61,40 +62,183 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -bool llvm::isAllocaPromotable(const AllocaInst *AI) { +class PromotableChecker { +public: + bool check(bool c) { return c; } + void trackRemovable(const Instruction *I) {} + void trackOperandToZero(const Instruction *I, int operand) {} + void trackNoAliasDecl(const IntrinsicInst *II) {} +}; + +class PromotableTracker { +public: + bool check(bool c) { + assert(!c && "PromotableTracke::check failed"); + return false; + } + void trackRemovable(Instruction *I) { + // FIXME: Performance Warning: linear search - might become slow (?) + if (std::find(mRemovables.begin(), mRemovables.end(), I) == + mRemovables.end()) + mRemovables.push_back(I); + } + void trackOperandToZero(Instruction *I, int operand) { + mZeroOperands.emplace_back(I, operand); + } + void trackNoAliasDecl(IntrinsicInst *II) { mNoAliasDecls.push_back(II); } + +public: + SmallVector mRemovables; + SmallVector, 4> mZeroOperands; + SmallVector mNoAliasDecls; +}; + +// Return true if the only usage of this pointer is as identifyP argument for +// llvm.noalias or llvm.provenance.noalias (either direct or recursive) +// Look through bitcast, getelementptr, llvm.noalias, llvm.provenance.noalias + +template +bool onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(Value *V, PT &pt); + +template +bool isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(IntrinsicInst *II, + unsigned OpNo, + PT &pt) { + if (II->getIntrinsicID() == Intrinsic::provenance_noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else if (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.provenance.noalias dependency"); + } + return true; + } else if (II->getIntrinsicID() == Intrinsic::noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::NoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.provenance.noalias dependency"); + } + return true; + } + + return false; +} + +template +bool onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(Value *V, PT &pt) { + for (Use &U_ : V->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + if (IntrinsicInst *II = dyn_cast(U)) { + if (isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, OpNo, pt)) + continue; + return false; + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(BCI, pt)) + return false; + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(GEPI, pt)) + return false; + pt.trackRemovable(GEPI); + } else if (AddrSpaceCastInst *ASCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(ASCI, pt)) + return false; + pt.trackRemovable(ASCI); + } else { + return false; + } + } + return true; +} + +template bool trackAllocaPromotable(AllocaInst *AI, PT &pt) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. unsigned AS = AI->getType()->getAddressSpace(); // Only allow direct and non-volatile loads and stores... - for (const User *U : AI->users()) { - if (const LoadInst *LI = dyn_cast(U)) { + for (Use &U_ : AI->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + + if (LoadInst *LI = dyn_cast(U)) { // Note that atomic loads can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (LI->isVolatile()) + if (pt.check(LI->isVolatile())) return false; - } else if (const StoreInst *SI = dyn_cast(U)) { - if (SI->getOperand(0) == AI) + if (OpNo == LI->getNoaliasProvenanceOperandIndex()) { + // Load will be removed. Disconnect provenance.noalias dependency + pt.trackOperandToZero(LI, OpNo); + } + } else if (StoreInst *SI = dyn_cast(U)) { + if (pt.check(OpNo == 0)) return false; // Don't allow a store OF the AI, only INTO the AI. // Note that atomic stores can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (SI->isVolatile()) + if (pt.check(SI->isVolatile())) return false; - } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd()) + if (OpNo == SI->getNoaliasProvenanceOperandIndex()) { + // Store will be removed. Disconnect provenance.noalias dependency + pt.trackOperandToZero(SI, OpNo); + } + } else if (IntrinsicInst *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + pt.trackRemovable(II); + break; + case Intrinsic::noalias_decl: + pt.trackNoAliasDecl(II); + break; + case Intrinsic::noalias: + case Intrinsic::provenance_noalias: + if (!isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, OpNo, + pt)) + return false; + break; + default: return false; - } else if (const BitCastInst *BCI = dyn_cast(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + } + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(BCI, pt)) { + pt.trackRemovable(BCI); + continue; + } + if (pt.check(BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))) return false; - if (!onlyUsedByLifetimeMarkers(BCI)) + if (pt.check(!onlyUsedByLifetimeMarkers(BCI))) return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + + for (auto *U2 : BCI->users()) { + pt.trackRemovable(cast(U2)); + } + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(GEPI, pt)) { + pt.trackRemovable(GEPI); + continue; + } + if (pt.check(GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))) return false; - if (!GEPI->hasAllZeroIndices()) + if (pt.check(!GEPI->hasAllZeroIndices())) return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) + if (pt.check(!onlyUsedByLifetimeMarkers(GEPI))) return false; + + for (auto *U2 : GEPI->users()) { + pt.trackRemovable(cast(U2)); + } + pt.trackRemovable(GEPI); } else { return false; } @@ -103,6 +247,11 @@ return true; } +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + PromotableChecker pc; + return trackAllocaPromotable(const_cast(AI), pc); +} + namespace { struct AllocaInfo { @@ -312,26 +461,192 @@ AC->registerAssumption(CI); } -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { - // Knowing that this alloca is promotable, we know that it's safe to kill all - // instructions except for load and store. +static void removeIntrinsicUsers(AllocaInst *AI) { + // The AI is going to be deleted. Remove lifeftime intrinsic users. + // Also disconnect and remove noalias/provenance.noalias/noalias_decl + // intrinsic users. - for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { - Instruction *I = cast(*UI); - ++UI; - if (isa(I) || isa(I)) - continue; + // Track the possible intrinsics. If we do not have a noalias.decl or we do + // not have an unknown function scope, no extra modificiations are needed. If + // both are there, we need to propagate the MetadataValue from the declaration + // to those intrinsics that are using the unknown scope. + PromotableTracker pt; + + if (!trackAllocaPromotable(AI, pt)) { + assert(false && "trackAllocaPromotable not consistent"); + } + + // Propagate NoaliasDecl + MDNode *NoAliasUnknownScopeMD = + AI->getParent()->getParent()->getMetadata("noalias"); + Instruction *NoAliasDecl = nullptr; + if (pt.mNoAliasDecls.size() == 1) + NoAliasDecl = pt.mNoAliasDecls[0]; + + if (NoAliasUnknownScopeMD) { + if (NoAliasDecl) { + LLVM_DEBUG(llvm::dbgs() + << "- Propagating " << *NoAliasDecl << " scope to:\n"); + auto NoAliasDeclScope = + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg); + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias) { + // If we get here, we can assume the identifyP or its provenance + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg || + OpNo == + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg)); + unsigned ScopeArg = (ID == Intrinsic::noalias + ? Intrinsic::NoAliasScopeArg + : Intrinsic::ProvenanceNoAliasScopeArg); + unsigned DeclArg = + (ID == Intrinsic::noalias + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::ProvenanceNoAliasNoAliasDeclArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate the declaration scope + // Note: splitting already took care of updating the ObjId + LLVM_DEBUG(llvm::dbgs() << "-- " << *I << "\n"); + II->setOperand(ScopeArg, NoAliasDeclScope); + + // also update the noalias declaration + II->setOperand(DeclArg, NoAliasDecl); + } + } + } + } + } else if (pt.mNoAliasDecls.empty()) { + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias) { + // If we get here, we can assume the identifyP or its provenance + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg || + OpNo == + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg)); + unsigned ScopeArg = (ID == Intrinsic::noalias + ? Intrinsic::NoAliasScopeArg + : Intrinsic::ProvenanceNoAliasScopeArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate a more or less unique id + LLVM_DEBUG(llvm::dbgs() + << "-- No llvm.noalias.decl, looking through: " << *I + << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + } + } + } + } + } + } + + if (NoAliasDecl) { + // Check if we need to split up llvm.noalias.decl with unique ObjId's + // This is needed to differentiate restrict pointers, once the alloca is + // removed. NOTE: we might as well have depended on 'constant propagation of + // null' and work with a 'constant pointer' + // for IdentifyP. Not sure what mechanism would be the best. + const DataLayout &DL = AI->getParent()->getModule()->getDataLayout(); + std::map ObjId2NoAliasDecl; + + auto BaseObjId = cast(NoAliasDecl->getOperand( + Intrinsic::NoAliasDeclObjIdArg)) + ->getZExtValue(); + ObjId2NoAliasDecl[BaseObjId] = NoAliasDecl; + + for (auto PairIO : pt.mZeroOperands) { + IntrinsicInst *II = dyn_cast(PairIO.first); + if (II && ((II->getIntrinsicID() == Intrinsic::noalias) || + (II->getIntrinsicID() == Intrinsic::provenance_noalias))) { + auto OpNo = PairIO.second; + unsigned IdentifyPArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPArg + : Intrinsic::ProvenanceNoAliasIdentifyPArg; + unsigned ObjIdArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPObjIdArg + : Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg; + unsigned NoAliasDeclArg = + (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::ProvenanceNoAliasNoAliasDeclArg; + + if ((unsigned)OpNo != IdentifyPArg) + continue; - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { - Instruction *Inst = cast(*UUI); - ++UUI; - Inst->eraseFromParent(); + auto CurrentObjId = + cast(II->getOperand(ObjIdArg))->getZExtValue(); + + assert(CurrentObjId == BaseObjId && + "Initial object id difference detected."); + + APInt PPointerOffset(DL.getPointerSizeInBits(), 0ull); + assert(AI == II->getOperand(IdentifyPArg) + ->stripAndAccumulateInBoundsConstantOffsets( + DL, PPointerOffset) && + "hmm.. expected stripped P to map to alloca"); + if (!PPointerOffset.isNullValue()) { + CurrentObjId += PPointerOffset.getZExtValue(); + auto &NewNoAliasDecl = ObjId2NoAliasDecl[CurrentObjId]; + if (NewNoAliasDecl == nullptr) { + LLVM_DEBUG(llvm::dbgs() + << "Creating llvm.noalias.decl for IdentifyPObjId " + << CurrentObjId << "\n"); + IRBuilder NoAliasDeclBuilder(NoAliasDecl); + NewNoAliasDecl = NoAliasDeclBuilder.CreateNoAliasDeclaration( + ConstantPointerNull::get(cast(AI->getType())), + CurrentObjId, + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + LLVM_DEBUG(llvm::dbgs() << "- " << *NewNoAliasDecl << "\n"); + } + II->setOperand(NoAliasDeclArg, NewNoAliasDecl); + II->setOperand(ObjIdArg, + ConstantInt::get(II->getOperand(ObjIdArg)->getType(), + CurrentObjId)); + LLVM_DEBUG(llvm::dbgs() + << "Remapping noalias.decl dependency: " << *II << "\n"); + } } } + } + + // set args to zero + for (auto II : pt.mNoAliasDecls) { + LLVM_DEBUG(llvm::dbgs() << "Zeoring noalias.decl dep: " << *II << "\n"); + assert(II->getIntrinsicID() == Intrinsic::noalias_decl); + II->setOperand(Intrinsic::NoAliasDeclAllocaArg, + ConstantPointerNull::get(cast(AI->getType()))); + } + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + LLVM_DEBUG(llvm::dbgs() + << "Zeroing operand " << OpNo << " of " << *I << "\n"); + I->setOperand(OpNo, ConstantPointerNull::get( + cast(I->getOperand(OpNo)->getType()))); + } + + // remove + for (auto I : pt.mRemovables) { + LLVM_DEBUG(llvm::dbgs() << "Removing " << *I << "\n"); I->eraseFromParent(); } } @@ -357,6 +672,9 @@ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *UserInst = cast(*UI++); + // load/store can have a provenance + if ((UI != E) && (*UI == UserInst)) + ++UI; if (UserInst == OnlyStore) continue; LoadInst *LI = cast(UserInst); @@ -467,6 +785,9 @@ // store above them, if any. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { LoadInst *LI = dyn_cast(*UI++); + // load/store can have a provenance + if ((UI != E) && (*UI == LI)) + ++UI; if (!LI) continue; @@ -544,7 +865,7 @@ assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeLifetimeIntrinsicUsers(AI); + removeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. Index: llvm/test/Transforms/SROA/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias.ll @@ -0,0 +1,302 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_ri(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca i32*, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* undef, i32** %rp, align 4, !noalias !6 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4, !noalias !6 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** %rp, i64 0, metadata !6) + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* %2, i32** %rp, align 4, !tbaa !2, !noalias !6 + %3 = load i32*, i32** %rp, align 4, !tbaa !2, !noalias !6 + %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %3, i8* %1, i32** %rp, i64 0, metadata !6), !tbaa !2, !noalias !6 + store i32 42, i32* %4, align 4, !tbaa !9, !noalias !6 + %5 = bitcast i32** %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #4 + ret void +} + +; CHECK-LABEL: @test_ri( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !2) +; CHECK: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !2), !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_ra(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca [3 x i32*], align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store [3 x i32*] undef, [3 x i32*]* %rp, align 4, !noalias !11 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !11 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* %rp, i64 0, metadata !11) + %arrayinit.begin = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store i32* %2, i32** %arrayinit.begin, align 4, !tbaa !2, !noalias !11 + %arrayinit.element = getelementptr inbounds i32*, i32** %arrayinit.begin, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, align 4, !tbaa !2, !noalias !11 + %arrayinit.element1 = getelementptr inbounds i32*, i32** %arrayinit.element, i32 1 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr2 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, align 4, !tbaa !2, !noalias !11 + %arrayidx = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %5 = load i32*, i32** %arrayidx, align 4, !tbaa !2, !noalias !11 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %arrayidx, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !11 + %arrayidx3 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %7 = load i32*, i32** %arrayidx3, align 4, !tbaa !2, !noalias !11 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %arrayidx3, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !11 + %arrayidx4 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %9 = load i32*, i32** %arrayidx4, align 4, !tbaa !2, !noalias !11 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %1, i32** %arrayidx4, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !11 + %11 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_ra( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !11) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !11) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !11) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** null, i64 8, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr2, i8* %2, i32** null, i64 16, metadata !11), !tbaa !5, !noalias !11 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]*, i64, metadata) #1 + +; Function Attrs: nounwind +define dso_local void @test_rs(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %foo = alloca %struct.FOO, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store %struct.FOO undef, %struct.FOO* %foo, align 4, !noalias !14 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !14 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* %foo, i64 0, metadata !14) + %mP0 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store i32* %2, i32** %mP0, align 4, !tbaa !17, !noalias !14 + %mP1 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %mP1, align 4, !tbaa !19, !noalias !14 + %mP2 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr1 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr1, i32** %mP2, align 4, !tbaa !20, !noalias !14 + %mP02 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %5 = load i32*, i32** %mP02, align 4, !tbaa !17, !noalias !14 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %mP02, i64 0, metadata !14), !tbaa !17, !noalias !14 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !14 + %mP13 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %7 = load i32*, i32** %mP13, align 4, !tbaa !19, !noalias !14 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %mP13, i64 0, metadata !14), !tbaa !19, !noalias !14 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !14 + %mP24 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %9 = load i32*, i32** %mP24, align 4, !tbaa !20, !noalias !14 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %1, i32** %mP24, i64 0, metadata !14), !tbaa !20, !noalias !14 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !14 + %11 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_rs( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !14) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !14) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !14), !tbaa !17, !noalias !14 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** null, i64 8, metadata !14), !tbaa !19, !noalias !14 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr1, i8* %2, i32** null, i64 16, metadata !14), !tbaa !20, !noalias !14 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ri_inlined(i32* %_p) local_unnamed_addr #2 !noalias !21 { +entry: + %rp = alloca i32*, align 4 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4, !noalias !24 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** nonnull %rp, i64 0, metadata !27), !noalias !21 + store i32* %_p, i32** %rp, ptr_provenance i32** undef, align 4, !noalias !24 + %2 = load i32*, i32** %rp, ptr_provenance i32** undef, align 4, !noalias !28 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %rp, i32** undef, i64 0, metadata !27) #4, !noalias !28 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !28 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4, !noalias !21 + ret void +} + +; CHECK-LABEL: @test_ri_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !24) +; CHECK: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !24){{.*}}, !noalias !27 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ra_inlined(i32* %_p) local_unnamed_addr #2 !noalias !29 { +entry: + %rp = alloca [3 x i32*], align 4 + %.fca.0.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !32 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* nonnull %rp, i64 0, metadata !35), !noalias !29 + store i32* %_p, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !32 + %arrayinit.element = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, ptr_provenance i32** undef, align 4, !noalias !32 + %arrayinit.element1 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %add.ptr2 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, ptr_provenance i32** undef, align 4, !noalias !32 + %2 = load i32*, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !36 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %.fca.0.gep, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !36 + %arrayidx1.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %4 = load i32*, i32** %arrayidx1.i, ptr_provenance i32** undef, align 4, !noalias !36 + %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %4, i8* %1, i32** nonnull %arrayidx1.i, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 43, i32* %4, ptr_provenance i32* %5, align 4, !noalias !36 + %arrayidx2.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 2 + %6 = load i32*, i32** %arrayidx2.i, ptr_provenance i32** undef, align 4, !noalias !36 + %7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* %1, i32** nonnull %arrayidx2.i, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 44, i32* %6, ptr_provenance i32* %7, align 4, !noalias !36 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !29 + ret void +} + +; CHECK-LABEL: @test_ra_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !31) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !31) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !31) +; CHECK: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !31){{.*}}, !noalias !34 +; CHECK: %4 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i64 8, metadata !31){{.*}}, !noalias !34 +; CHECK: %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr2, i8* %2, i32** nonnull null, i32** undef, i64 16, metadata !31){{.*}}, !noalias !34 + + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_rs_inlined(i32* %_p) local_unnamed_addr #2 !noalias !37 { +entry: + %foo = alloca %struct.FOO, align 4 + %.fca.0.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %.fca.2.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !40 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* nonnull %foo, i64 0, metadata !43), !noalias !37 + store i32* %_p, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %.fca.1.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %add.ptr1 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr1, i32** %.fca.2.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %mP0.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %mP0.i, ptr_provenance i32** undef, align 4, !noalias !44 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %mP0.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !44 + %mP1.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %4 = load i32*, i32** %mP1.i, ptr_provenance i32** undef, align 4, !noalias !44 + %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %4, i8* %1, i32** nonnull %mP1.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 43, i32* %4, ptr_provenance i32* %5, align 4, !noalias !44 + %mP2.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %6 = load i32*, i32** %mP2.i, ptr_provenance i32** undef, align 4, !noalias !44 + %7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* %1, i32** nonnull %mP2.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 44, i32* %6, ptr_provenance i32* %7, align 4, !noalias !44 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !37 + ret void +} + +; CHECK-LABEL: @test_rs_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !38) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !38) +; CHECK: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !38){{.*}}, !noalias !41 +; CHECK: %4 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i64 8, metadata !38){{.*}}, !noalias !41 +; CHECK: %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr1, i8* %2, i32** nonnull null, i32** undef, i64 16, metadata !38){{.*}}, !noalias !41 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO*, i64, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32**, i64, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32*, i8*, i32**, i64, metadata) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"any pointer"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_ri: rp"} +!8 = distinct !{!8, !"test_ri"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!4, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_ra: rp"} +!13 = distinct !{!13, !"test_ra"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_rs: foo"} +!16 = distinct !{!16, !"test_rs"} +!17 = !{!18, !3, i64 0, i64 4} +!18 = !{!4, i64 12, !"FOO", !3, i64 0, i64 4, !3, i64 4, i64 4, !3, i64 8, i64 4} +!19 = !{!18, !3, i64 4, i64 4} +!20 = !{!18, !3, i64 8, i64 4} +!21 = !{!22} +!22 = distinct !{!22, !23, !"test_ri_inlined: unknown scope"} +!23 = distinct !{!23, !"test_ri_inlined"} +!24 = !{!25, !22} +!25 = distinct !{!25, !26, !"test_ri_inlined: rp"} +!26 = distinct !{!26, !"test_ri_inlined"} +!27 = !{!25} +!28 = !{!22, !25, !22} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test_ra_inlined: unknown scope"} +!31 = distinct !{!31, !"test_ra_inlined"} +!32 = !{!33, !30} +!33 = distinct !{!33, !34, !"test_ra_inlined: rp"} +!34 = distinct !{!34, !"test_ra_inlined"} +!35 = !{!33} +!36 = !{!30, !33, !30} +!37 = !{!38} +!38 = distinct !{!38, !39, !"test_rs_inlined: unknown scope"} +!39 = distinct !{!39, !"test_rs_inlined"} +!40 = !{!41, !38} +!41 = distinct !{!41, !42, !"test_rs_inlined: foo"} +!42 = distinct !{!42, !"test_rs_inlined"} +!43 = !{!41} +!44 = !{!38, !41, !38} Index: llvm/test/Transforms/SROA/noalias2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias2.ll @@ -0,0 +1,346 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +; Validate that SROA correctly deduces noalias pointers when removing: +; - llvm.mempcy +; - aggregate load/store +; - copying the struct through i64 + +; ModuleID = 'test.c' +source_filename = "test.c" +target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +%struct.FUM = type { i32*, %struct.FOO } +%struct.FOO = type { i32* } + +; Function Attrs: nounwind +define dso_local void @test01_memcpy(%struct.FUM* %a_fum) #0 !noalias !3 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !10 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !12), !noalias !10 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !3) + %tmp4 = bitcast %struct.FUM* %l_fum to i8* + %tmp5 = bitcast %struct.FUM* %tmp3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !10 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !17 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !19), !noalias !17 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !12) + %tmp9 = bitcast %struct.FUM* %l2_fum to i8* + %tmp10 = bitcast %struct.FUM* %tmp8 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp9, i8* align 4 %tmp10, i32 8, i1 false), !tbaa.struct !16, !noalias !17 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !17 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !19), !tbaa !20, !noalias !17 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !17 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !10 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !25 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !27), !noalias !25 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !12) + %tmp17 = bitcast %struct.FUM* %l3_fum to i8* + %tmp18 = bitcast %struct.FUM* %tmp16 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp17, i8* align 4 %tmp18, i32 8, i1 false), !tbaa.struct !16, !noalias !25 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !25 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !27), !tbaa !28, !noalias !25 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !25 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !10 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test01_memcpy +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !6) +; CHECK: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !8, metadata !3) +; CHECK: %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.copyload, i8* %0, i32** %l_fum.sroa.0.0.tmp5.sroa_idx, i64 0, metadata !6) +; CHECK: %l_fum.sroa.10.0.l_fum.sroa.10.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.10.0.copyload, i8* %1, i32** %l_fum.sroa.10.0.tmp5.sroa_idx3, i64 4, metadata !6) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !17) +; CHECK: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !17) +; CHECK: %l2_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias, i8* %0, i32** null, i64 0, metadata !6) +; CHECK: %l2_fum.sroa.6.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.10.0.l_fum.sroa.10.0.copyload.noalias, i8* %1, i32** null, i64 4, metadata !6) +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l2_fum.sroa.0.0.copyload.noalias, i8* %2, i32** null, i64 0, metadata !17), !tbaa !19, !noalias !22 +; CHECK: %4 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !25) +; CHECK: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !25) +; CHECK: %l3_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias, i8* %0, i32** null, i64 0, metadata !6) +; CHECK: %l3_fum.sroa.4.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.10.0.l_fum.sroa.10.0.copyload.noalias, i8* %1, i32** null, i64 4, metadata !6) +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l3_fum.sroa.4.0.copyload.noalias, i8* %5, i32** null, i64 4, metadata !25), !tbaa !27, !noalias !28 +; CHECK: ret void + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %0, i64 %1, metadata %2) #2 + +; Function Attrs: nounwind readnone +declare %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %0, i8* %1, metadata %2, metadata %3) #3 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly %0, i8* noalias nocapture readonly %1, i32 %2, i1 immarg %3) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %0, i8* %1, i32** %2, i64 %3, metadata %4) #4 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #1 + +; Function Attrs: nounwind +define dso_local void @test02_aggloadstore(%struct.FUM* %a_fum) #0 !noalias !29 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !32 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !34), !noalias !32 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !29) + %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 + store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !35 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !37), !noalias !35 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !34) + %cp2 = load %struct.FUM, %struct.FUM* %tmp8, align 4 + store %struct.FUM %cp2, %struct.FUM* %l2_fum, align 4 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !35 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !37), !tbaa !20, !noalias !35 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !35 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !32 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !38 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !40), !noalias !38 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !34) + %cp3 = load %struct.FUM, %struct.FUM* %tmp16, align 4 + store %struct.FUM %cp3, %struct.FUM* %l3_fum, align 4 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !38 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !40), !tbaa !28, !noalias !38 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !38 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !32 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test02_aggloadstore +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !32) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !32) +; CHECK: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !8, metadata !29) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !34) +; CHECK: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !34) +; CHECK: %cp2.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.0.extract, i8* %0, i32** null, i64 0, metadata !32) +; CHECK: %cp2.fca.1.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.1.0.extract, i8* %1, i32** null, i64 4, metadata !32) +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp2.fca.0.extract, i8* %2, i32** null, i64 0, metadata !34), !tbaa !19, !noalias !36 +; CHECK: %4 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !37) +; CHECK: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !37) +; CHECK: %cp3.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.0.extract, i8* %0, i32** null, i64 0, metadata !32) +; CHECK: %cp3.fca.1.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.1.0.extract, i8* %1, i32** null, i64 4, metadata !32) +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp3.fca.1.0.extract, i8* %5, i32** null, i64 4, metadata !37), !tbaa !27, !noalias !39 +; CHECK: ret void + +; Function Attrs: nounwind +define dso_local void @test03_i64loadstore(%struct.FUM* %a_fum) #0 !noalias !41 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !44 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !44 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !46), !noalias !44 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !44 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !41) + %tmp4 = bitcast %struct.FUM* %l_fum to i64* + %tmp5 = bitcast %struct.FUM* %tmp3 to i64* + %cp1 = load i64, i64* %tmp5, align 4 + store i64 %cp1, i64* %tmp4, align 4 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !47 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !49), !noalias !47 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !46) + %tmp9 = bitcast %struct.FUM* %l2_fum to i64* + %tmp10 = bitcast %struct.FUM* %tmp8 to i64* + %cp2 = load i64, i64* %tmp10, align 4 + store i64 %cp2, i64* %tmp9, align 4 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !47 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !49), !tbaa !20, !noalias !47 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !47 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !44 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !50 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !52), !noalias !50 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !46) + %tmp17 = bitcast %struct.FUM* %l3_fum to i64* + %tmp18 = bitcast %struct.FUM* %tmp16 to i64* + %cp3 = load i64, i64* %tmp18, align 4 + store i64 %cp3, i64* %tmp17, align 4 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !50 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !52), !tbaa !28, !noalias !50 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !50 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !44 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test03_i64loadstore +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !43) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !43) +; CHECK: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !8, metadata !40) +; CHECK: %cp17.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp17.sroa_as_ptr, i8* null, i32** %2, i64 0, metadata !40) +; CHECK: %5 = call i8* @llvm.noalias.decl.p0i8.p0i32.i64(i32* null, i64 0, metadata !45) +; CHECK: %6 = call i8* @llvm.noalias.decl.p0i8.p0i32.i64(i32* null, i64 4, metadata !45) +; CHECK: %cp24.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %3, i8* %0, i32** null, i64 0, metadata !43) +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %8, i8* %5, i32** null, i64 0, metadata !45), !tbaa !19, !noalias !47 +; CHECK: %9 = call i8* @llvm.noalias.decl.p0i8.p0i32.i64(i32* null, i64 0, metadata !48) +; CHECK: %10 = call i8* @llvm.noalias.decl.p0i8.p0i32.i64(i32* null, i64 4, metadata !48) +; CHECK: %cp31.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %3, i8* %0, i32** null, i64 0, metadata !43) +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %12, i8* %10, i32** null, i64 4, metadata !48), !tbaa !27, !noalias !50 +; CHECK: ret void + + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone } +attributes #4 = { argmemonly nounwind speculatable } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{!"clang version"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"test01_memcpy: unknown scope"} +!5 = distinct !{!5, !"test01_memcpy"} +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !4} +!11 = distinct !{!11, !5, !"test01_memcpy: l_fum"} +!12 = !{!11} +!13 = !{!14, !15} +!14 = !{i32 -1, i32 0} +!15 = !{i32 -1, i32 1, i32 0} +!16 = !{i64 0, i64 4, !6, i64 4, i64 4, !6} +!17 = !{!18, !11, !4} +!18 = distinct !{!18, !5, !"test01_memcpy: l2_fum"} +!19 = !{!18} +!20 = !{!21, !7, i64 0} +!21 = !{!"FUM", !7, i64 0, !22, i64 4} +!22 = !{!"FOO", !7, i64 0} +!23 = !{!24, !24, i64 0} +!24 = !{!"int", !8, i64 0} +!25 = !{!26, !11, !4} +!26 = distinct !{!26, !5, !"test01_memcpy: l3_fum"} +!27 = !{!26} +!28 = !{!21, !7, i64 4} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test02_aggloadstore: unknown scope"} +!31 = distinct !{!31, !"test02_aggloadstore"} +!32 = !{!33, !30} +!33 = distinct !{!33, !31, !"test02_aggloadstore: l_fum"} +!34 = !{!33} +!35 = !{!36, !33, !30} +!36 = distinct !{!36, !31, !"test02_aggloadstore: l2_fum"} +!37 = !{!36} +!38 = !{!39, !33, !30} +!39 = distinct !{!39, !31, !"test02_aggloadstore: l3_fum"} +!40 = !{!39} +!41 = !{!42} +!42 = distinct !{!42, !43, !"test03_i64loadstore: unknown scope"} +!43 = distinct !{!43, !"test03_i64loadstore"} +!44 = !{!45, !42} +!45 = distinct !{!45, !43, !"test03_i64loadstore: l_fum"} +!46 = !{!45} +!47 = !{!48, !45, !42} +!48 = distinct !{!48, !43, !"test03_i64loadstore: l2_fum"} +!49 = !{!48} +!50 = !{!51, !45, !42} +!51 = distinct !{!51, !43, !"test03_i64loadstore: l3_fum"} +!52 = !{!51} + +; CHECK: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK: !2 = !{!"clang version"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !5, !"test01_memcpy: unknown scope"} +; CHECK: !5 = distinct !{!5, !"test01_memcpy"} +; CHECK: !6 = !{!7} +; CHECK: !7 = distinct !{!7, !5, !"test01_memcpy: l_fum"} +; CHECK: !8 = !{!9, !10} +; CHECK: !9 = !{i32 -1, i32 0} +; CHECK: !10 = !{i32 -1, i32 1, i32 0} +; CHECK: !11 = !{i64 0, i64 4, !12, i64 4, i64 4, !12} +; CHECK: !12 = !{!13, !13, i64 0} +; CHECK: !13 = !{!"any pointer", !14, i64 0} +; CHECK: !14 = !{!"omnipotent char", !15, i64 0} +; CHECK: !15 = !{!"Simple C/C++ TBAA"} +; CHECK: !16 = !{!7, !4} +; CHECK: !17 = !{!18} +; CHECK: !18 = distinct !{!18, !5, !"test01_memcpy: l2_fum"} +; CHECK: !19 = !{!20, !13, i64 0} +; CHECK: !20 = !{!"FUM", !13, i64 0, !21, i64 4} +; CHECK: !21 = !{!"FOO", !13, i64 0} +; CHECK: !22 = !{!18, !7, !4} +; CHECK: !23 = !{!24, !24, i64 0} +; CHECK: !24 = !{!"int", !14, i64 0} +; CHECK: !25 = !{!26} +; CHECK: !26 = distinct !{!26, !5, !"test01_memcpy: l3_fum"} +; CHECK: !27 = !{!20, !13, i64 4} +; CHECK: !28 = !{!26, !7, !4} +; CHECK: !29 = !{!30} +; CHECK: !30 = distinct !{!30, !31, !"test02_aggloadstore: unknown scope"} +; CHECK: !31 = distinct !{!31, !"test02_aggloadstore"} +; CHECK: !32 = !{!33} +; CHECK: !33 = distinct !{!33, !31, !"test02_aggloadstore: l_fum"} +; CHECK: !34 = !{!35} +; CHECK: !35 = distinct !{!35, !31, !"test02_aggloadstore: l2_fum"} +; CHECK: !36 = !{!35, !33, !30} +; CHECK: !37 = !{!38} +; CHECK: !38 = distinct !{!38, !31, !"test02_aggloadstore: l3_fum"} +; CHECK: !39 = !{!38, !33, !30} +; CHECK: !40 = !{!41} +; CHECK: !41 = distinct !{!41, !42, !"test03_i64loadstore: unknown scope"} +; CHECK: !42 = distinct !{!42, !"test03_i64loadstore"} +; CHECK: !43 = !{!44} +; CHECK: !44 = distinct !{!44, !42, !"test03_i64loadstore: l_fum"} +; CHECK: !45 = !{!46} +; CHECK: !46 = distinct !{!46, !42, !"test03_i64loadstore: l2_fum"} +; CHECK: !47 = !{!46, !44, !41} +; CHECK: !48 = !{!49} +; CHECK: !49 = distinct !{!49, !42, !"test03_i64loadstore: l3_fum"} +; CHECK: !50 = !{!49, !44, !41}