Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -70,6 +70,7 @@ class CallInst; class Instruction; class Value; +class IntrinsicInst; } namespace polly { @@ -328,11 +329,21 @@ /// @return True if R is a Scop, false otherwise. bool isValidRegion(DetectionContext &Context) const; + /// @brief Check if an intrinsic call can be part of a Scop. + /// + /// @param II The intrinsic call instruction to check. + /// @param Context The current detection context. + /// + /// @return True if the call instruction is valid, false otherwise. + bool isValidIntrinsicInst(IntrinsicInst &II, DetectionContext &Context) const; + /// @brief Check if a call instruction can be part of a Scop. /// - /// @param CI The call instruction to check. + /// @param CI The call instruction to check. + /// @param Context The current detection context. + /// /// @return True if the call instruction is valid, false otherwise. - static bool isValidCallInst(CallInst &CI); + bool isValidCallInst(CallInst &CI, DetectionContext &Context) const; /// @brief Check if the given loads could be invariant and can be hoisted. /// @@ -355,6 +366,15 @@ /// identified by Reg. bool isInvariant(const Value &Val, const Region &Reg) const; + /// @brief Check if the memory access caused by @p Inst is valid. + /// + /// @param Inst The access instruction. + /// @param AF The access function. + /// @param BP The access base pointer. + /// @param Context The current detection context. + bool isValidAccess(Instruction *Inst, const SCEV *AF, const SCEVUnknown *BP, + DetectionContext &Context) const; + /// @brief Check if a memory access can be part of a Scop. /// /// @param Inst The instruction accessing the memory. Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -497,8 +497,8 @@ /// @brief An unique name of the accessed array. std::string BaseName; - /// @brief Size in bytes of a single array element. - unsigned ElemBytes; + /// @brief Type a single array element wrt. this access. + Type *ElementType; /// @brief Size of each dimension of the accessed array. SmallVector Sizes; @@ -529,7 +529,8 @@ /// @brief The value associated with this memory access. /// /// - For array memory accesses (MK_Array) it is the loaded result or the - /// stored value. + /// stored value. If the access instruction is a memory intrinsic it + /// the access value is also the memory intrinsic. /// - For accesses of kind MK_Value it is the access instruction itself. /// - For accesses of kind MK_PHI or MK_ExitPHI it is the PHI node itself /// (for both, READ and WRITE accesses). @@ -574,8 +575,6 @@ isl_map *NewAccessRelation; // @} - unsigned getElemSizeInBytes() const { return ElemBytes; } - bool isAffine() const { return IsAffine; } __isl_give isl_basic_map *createBasicAccessMap(ScopStmt *Statement); @@ -627,6 +626,9 @@ __isl_give isl_map *foldAccess(__isl_take isl_map *AccessRelation, ScopStmt *Statement); + /// @brief Create the access relation for the underlying memory intrinsic. + void buildMemIntrinsicAccessRelation(); + /// @brief Assemble the access relation from all availbale information. /// /// In particular, used the information passes in the constructor and the @@ -641,15 +643,15 @@ /// @param Stmt The parent statement. /// @param AccessInst The instruction doing the access. /// @param BaseAddr The accessed array's address. - /// @param ElemBytes Number of accessed bytes. + /// @param ElemType The type of the accessed array elements. /// @param AccType Whether read or write access. /// @param IsAffine Whether the subscripts are affine expressions. /// @param Kind The kind of memory accessed. /// @param Subscripts Subscipt expressions /// @param Sizes Dimension lengths of the accessed array. /// @param BaseName Name of the acessed array. - MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, AccessType Type, - Value *BaseAddress, unsigned ElemBytes, bool Affine, + MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, AccessType AccType, + Value *BaseAddress, Type *ElemType, bool Affine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue, ScopArrayInfo::MemoryKind Kind, StringRef BaseName); @@ -754,6 +756,9 @@ const std::string &getBaseName() const { return BaseName; } + /// @brief Return the element type of the accessed array wrt. this access. + Type *getElementType() const { return ElementType; } + /// @brief Return the access value of this memory access. Value *getAccessValue() const { return AccessValue; } @@ -2063,6 +2068,19 @@ const InvariantLoadsSetTy &ScopRIL, const MapInsnToMemAcc &InsnToMemAcc); + /// @brief Try to build a MemoryAccess for a memory intrinsic. + /// + /// @param Inst The instruction that access the memory + /// @param L The parent loop of the instruction + /// @param R The region on which to build the data access dictionary. + /// @param BoxedLoops The set of loops that are overapproximated in @p R. + /// @param ScopRIL The required invariant loads equivalence classes. + /// + /// @returns True if the access could be built, False otherwise. + bool buildAccessMemIntrinsic(MemAccInst Inst, Loop *L, Region *R, + const ScopDetection::BoxedLoopsSetTy *BoxedLoops, + const InvariantLoadsSetTy &ScopRIL); + /// @brief Build a single-dimensional parameteric sized MemoryAccess /// from the Load/Store instruction. /// @@ -2144,9 +2162,9 @@ /// @param BB The block where the access takes place. /// @param Inst The instruction doing the access. It is not necessarily /// inside @p BB. - /// @param Type The kind of access. + /// @param AccType The kind of access. /// @param BaseAddress The accessed array's base address. - /// @param ElemBytes Size of accessed array element. + /// @param ElemType The type of the accessed array elements. /// @param Affine Whether all subscripts are affine expressions. /// @param AccessValue Value read or written. /// @param Subscripts Access subscripts per dimension. @@ -2156,9 +2174,9 @@ /// @return The created MemoryAccess, or nullptr if the access is not within /// the SCoP. MemoryAccess *addMemoryAccess(BasicBlock *BB, Instruction *Inst, - MemoryAccess::AccessType Type, - Value *BaseAddress, unsigned ElemBytes, - bool Affine, Value *AccessValue, + MemoryAccess::AccessType AccType, + Value *BaseAddress, Type *ElemType, bool Affine, + Value *AccessValue, ArrayRef Subscripts, ArrayRef Sizes, ScopArrayInfo::MemoryKind Kind); @@ -2167,16 +2185,16 @@ /// StoreInst. /// /// @param MemAccInst The LoadInst or StoreInst. - /// @param Type The kind of access. + /// @param AccType The kind of access. /// @param BaseAddress The accessed array's base address. - /// @param ElemBytes Size of accessed array element. + /// @param ElemType The type of the accessed array elements. /// @param IsAffine Whether all subscripts are affine expressions. /// @param Subscripts Access subscripts per dimension. /// @param Sizes The array dimension's sizes. /// @param AccessValue Value read or written. /// @see ScopArrayInfo::MemoryKind - void addArrayAccess(MemAccInst MemAccInst, MemoryAccess::AccessType Type, - Value *BaseAddress, unsigned ElemBytes, bool IsAffine, + void addArrayAccess(MemAccInst MemAccInst, MemoryAccess::AccessType AccType, + Value *BaseAddress, Type *ElemType, bool IsAffine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue); Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/ValueHandle.h" namespace llvm { @@ -50,9 +51,10 @@ /// referenced object and should be passed by-value as it is small enough. /// /// This proxy can either represent a LoadInst instance, a StoreInst instance, -/// or a nullptr (only creatable using the default constructor); never an -/// Instruction that is not a load or store. -/// When representing a nullptr, only the following methods are defined: +/// a MemIntrinsic instance (memset, memmove, memcpy) or a nullptr (only +/// creatable using the default constructor); never an Instruction that is not a +/// load or store. When representing a nullptr, only the following methods are +/// defined: /// isNull(), isInstruction(), isLoad(), isStore(), operator bool(), operator!() /// /// The functions isa, cast, cast_or_null, dyn_cast are modeled te resemble @@ -71,14 +73,17 @@ /* implicit */ MemAccInst(llvm::LoadInst *LI) : I(LI) {} /* implicit */ MemAccInst(llvm::StoreInst &SI) : I(&SI) {} /* implicit */ MemAccInst(llvm::StoreInst *SI) : I(SI) {} + /* implicit */ MemAccInst(llvm::MemIntrinsic *MI) : I(MI) {} explicit MemAccInst(llvm::Instruction &I) : I(&I) { assert(isa(I)); } explicit MemAccInst(llvm::Instruction *I) : I(I) { assert(isa(I)); } static bool isa(const llvm::Value &V) { - return llvm::isa(V) || llvm::isa(V); + return llvm::isa(V) || llvm::isa(V) || + llvm::isa(V); } static bool isa(const llvm::Value *V) { - return llvm::isa(V) || llvm::isa(V); + return llvm::isa(V) || llvm::isa(V) || + llvm::isa(V); } static MemAccInst cast(llvm::Value &V) { return MemAccInst(llvm::cast(V)); @@ -126,6 +131,14 @@ I = SI; return *this; } + MemAccInst &operator=(llvm::MemIntrinsic &MI) { + I = &MI; + return *this; + } + MemAccInst &operator=(llvm::MemIntrinsic *MI) { + I = MI; + return *this; + } operator llvm::Instruction *() const { return asInstruction(); } explicit operator bool() const { return isInstruction(); } @@ -152,6 +165,8 @@ return asLoad(); if (isStore()) return asStore()->getValueOperand(); + if (isMemIntrinsic()) + return nullptr; llvm_unreachable("Operation not supported on nullptr"); } llvm::Value *getPointerOperand() const { @@ -159,6 +174,8 @@ return asLoad()->getPointerOperand(); if (isStore()) return asStore()->getPointerOperand(); + if (isMemIntrinsic()) + return asMemIntrinsic()->getDest(); llvm_unreachable("Operation not supported on nullptr"); } @@ -167,6 +184,8 @@ return asLoad()->getAlignment(); if (isStore()) return asStore()->getAlignment(); + if (isMemIntrinsic()) + return asMemIntrinsic()->getAlignment(); llvm_unreachable("Operation not supported on nullptr"); } bool isVolatile() const { @@ -174,6 +193,8 @@ return asLoad()->isVolatile(); if (isStore()) return asStore()->isVolatile(); + if (isMemIntrinsic()) + return asMemIntrinsic()->isVolatile(); llvm_unreachable("Operation not supported on nullptr"); } bool isSimple() const { @@ -181,6 +202,8 @@ return asLoad()->isSimple(); if (isStore()) return asStore()->isSimple(); + if (isMemIntrinsic()) + return !asMemIntrinsic()->isVolatile(); llvm_unreachable("Operation not supported on nullptr"); } llvm::AtomicOrdering getOrdering() const { @@ -188,6 +211,8 @@ return asLoad()->getOrdering(); if (isStore()) return asStore()->getOrdering(); + if (isMemIntrinsic()) + return llvm::AtomicOrdering::NotAtomic; llvm_unreachable("Operation not supported on nullptr"); } bool isUnordered() const { @@ -195,6 +220,8 @@ return asLoad()->isUnordered(); if (isStore()) return asStore()->isUnordered(); + if (isMemIntrinsic()) + return !asMemIntrinsic()->isVolatile(); llvm_unreachable("Operation not supported on nullptr"); } @@ -202,10 +229,24 @@ bool isInstruction() const { return I; } bool isLoad() const { return I && llvm::isa(I); } bool isStore() const { return I && llvm::isa(I); } + bool isMemIntrinsic() const { return I && llvm::isa(I); } + bool isMemSetInst() const { return I && llvm::isa(I); } + bool isMemTransferInst() const { + return I && llvm::isa(I); + } llvm::Instruction *asInstruction() const { return I; } llvm::LoadInst *asLoad() const { return llvm::cast(I); } llvm::StoreInst *asStore() const { return llvm::cast(I); } + llvm::MemIntrinsic *asMemIntrinsic() const { + return llvm::cast(I); + } + llvm::MemSetInst *asMemSetInst() const { + return llvm::cast(I); + } + llvm::MemTransferInst *asMemTransferInst() const { + return llvm::cast(I); + } }; /// @brief Check if the PHINode has any incoming Invoke edge. Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -34,8 +34,8 @@ // // * Side effect free functions call // -// Only function calls and intrinsics that do not have side effects are allowed -// (readnone). +// Function calls and intrinsics that do not have side effects (readnone) +// or of which the memory effects are known and representable are allowed. // // The Scop detection finds the largest Scops by checking if the largest // region is a Scop. If this is not the case, its canonical subregions are @@ -453,21 +453,64 @@ return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); } -bool ScopDetection::isValidCallInst(CallInst &CI) { +bool ScopDetection::isValidCallInst(CallInst &CI, + DetectionContext &Context) const { if (CI.doesNotReturn()) return false; if (CI.doesNotAccessMemory()) return true; + if (auto *II = dyn_cast(&CI)) + return isValidIntrinsicInst(*II, Context); + Function *CalledFunction = CI.getCalledFunction(); // Indirect calls are not supported. if (CalledFunction == 0) return false; - if (isIgnoredIntrinsic(&CI)) + return false; +} + +bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, + DetectionContext &Context) const { + if (isIgnoredIntrinsic(&II)) + return true; + + // The closest loop surrounding the call instruction. + Loop *L = LI->getLoopFor(II.getParent()); + + // The access function and base pointer for memory intrinsics. + const SCEV *AF; + const SCEVUnknown *BP; + + switch (II.getIntrinsicID()) { + // Memory intrinsics that can be represented are supported. + case llvm::Intrinsic::memmove: + case llvm::Intrinsic::memcpy: + AF = SE->getSCEVAtScope(cast(II).getSource(), L); + BP = dyn_cast(SE->getPointerBase(AF)); + // Bail if the source pointer is not valid. + if (!isValidAccess(&II, AF, BP, Context)) + return false; + // Fall through + case llvm::Intrinsic::memset: + AF = SE->getSCEVAtScope(cast(II).getDest(), L); + BP = dyn_cast(SE->getPointerBase(AF)); + // Bail if the destination pointer is not valid. + if (!isValidAccess(&II, AF, BP, Context)) + return false; + + // Bail if the length is not affine. + if (!isAffine(SE->getSCEVAtScope(cast(II).getLength(), L), + Context)) + return false; + return true; + default: + break; + } return false; } @@ -762,72 +805,79 @@ return true; } -bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, - DetectionContext &Context) const { - Region &CurRegion = Context.CurRegion; - - Value *Ptr = Inst.getPointerOperand(); - Loop *L = LI->getLoopFor(Inst.getParent()); - const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L); - const SCEVUnknown *BasePointer; - Value *BaseValue; - - BasePointer = dyn_cast(SE->getPointerBase(AccessFunction)); +bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, + const SCEVUnknown *BP, + DetectionContext &Context) const { - if (!BasePointer) + if (!BP) return invalid(Context, /*Assert=*/true, Inst); - BaseValue = BasePointer->getValue(); - - if (isa(BaseValue)) + auto *BV = BP->getValue(); + if (isa(BV)) return invalid(Context, /*Assert=*/true, Inst); + // FIXME: Think about allowing IntToPtrInst + if (IntToPtrInst *Inst = dyn_cast(BV)) + return invalid(Context, /*Assert=*/true, Inst); + // Check that the base address of the access is invariant in the current // region. - if (!isInvariant(*BaseValue, CurRegion)) - return invalid(Context, /*Assert=*/true, BaseValue, - Inst); + if (!isInvariant(*BV, Context.CurRegion)) + return invalid(Context, /*Assert=*/true, BV, Inst); - AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); + AF = SE->getMinusSCEV(AF, BP); const SCEV *Size = SE->getElementSize(Inst); - if (Context.ElementSize[BasePointer]) { - if (!AllowDifferentTypes && Context.ElementSize[BasePointer] != Size) + if (Context.ElementSize[BP]) { + if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) return invalid(Context, /*Assert=*/true, - Inst, BaseValue); + Inst, BV); - Context.ElementSize[BasePointer] = - SE->getSMinExpr(Size, Context.ElementSize[BasePointer]); + Context.ElementSize[BP] = SE->getSMinExpr(Size, Context.ElementSize[BP]); } else { - Context.ElementSize[BasePointer] = Size; + Context.ElementSize[BP] = Size; } bool isVariantInNonAffineLoop = false; SetVector Loops; - findLoops(AccessFunction, Loops); + findLoops(AF, Loops); for (const Loop *L : Loops) if (Context.BoxedLoopsSet.count(L)) isVariantInNonAffineLoop = true; if (PollyDelinearize && !isVariantInNonAffineLoop) { - Context.Accesses[BasePointer].push_back({Inst, AccessFunction}); + Context.Accesses[BP].push_back({Inst, AF}); - if (!isAffine(AccessFunction, Context, BaseValue)) - Context.NonAffineAccesses.insert(BasePointer); + if (!isAffine(AF, Context, BV)) + Context.NonAffineAccesses.insert(BP); } else if (!AllowNonAffine) { - if (isVariantInNonAffineLoop || - !isAffine(AccessFunction, Context, BaseValue)) - return invalid(Context, /*Assert=*/true, - AccessFunction, Inst, BaseValue); + if (isVariantInNonAffineLoop || !isAffine(AF, Context, BV)) + return invalid(Context, /*Assert=*/true, AF, Inst, + BV); } - // FIXME: Think about allowing IntToPtrInst - if (IntToPtrInst *Inst = dyn_cast(BaseValue)) - return invalid(Context, /*Assert=*/true, Inst); + return true; +} + +bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, + DetectionContext &Context) const { + Region &CurRegion = Context.CurRegion; + + Value *Ptr = Inst.getPointerOperand(); + Loop *L = LI->getLoopFor(Inst.getParent()); + const SCEV *AccessFunction = SE->getSCEVAtScope(Ptr, L); + const SCEVUnknown *BasePointer; + + BasePointer = dyn_cast(SE->getPointerBase(AccessFunction)); + + if (!isValidAccess(Inst, AccessFunction, BasePointer, Context)) + return false; if (IgnoreAliasing) return true; + Value *BaseValue = BasePointer->getValue(); + // Check if the base pointer of the memory access does alias with // any other pointer. This cannot be handled at the moment. AAMDNodes AATags; @@ -880,7 +930,7 @@ // We only check the call instruction but not invoke instruction. if (CallInst *CI = dyn_cast(&Inst)) { - if (isValidCallInst(*CI)) + if (isValidCallInst(*CI, Context)) return true; return invalid(Context, /*Assert=*/true, &Inst); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -303,7 +303,10 @@ auto DimsAccess = isl_space_dim(AccessSpace, isl_dim_set); auto DimsMissing = DimsArray - DimsAccess; + auto *BB = getStatement()->getParent()->getRegion().getEntry(); + auto &DL = BB->getModule()->getDataLayout(); unsigned ArrayElemSize = SAI->getElemSizeInBytes(); + unsigned ElemBytes = DL.getTypeAllocSize(getElementType()); auto Map = isl_map_from_domain_and_range( isl_set_universe(AccessSpace), @@ -612,10 +615,36 @@ isl_space_free(Space); } +void MemoryAccess::buildMemIntrinsicAccessRelation() { + auto MAI = MemAccInst(getAccessInstruction()); + assert(MAI.isMemIntrinsic()); + assert(Subscripts.size() == 1); + + ScalarEvolution *SE = Statement->getParent()->getSE(); + auto *LengthVal = SE->getSCEV(MAI.asMemIntrinsic()->getLength()); + auto *LengthPWA = Statement->getPwAff(LengthVal); + auto *LengthMap = isl_map_from_pw_aff(LengthPWA); + auto *RangeSpace = isl_space_range(isl_map_get_space(LengthMap)); + LengthMap = isl_map_apply_range(LengthMap, isl_map_lex_gt(RangeSpace)); + LengthMap = isl_map_lower_bound_si(LengthMap, isl_dim_out, 0, 0); + auto *SubscriptPWA = Statement->getPwAff(Subscripts[0]); + auto *SubscriptMap = isl_map_from_pw_aff(SubscriptPWA); + SubscriptMap = + isl_map_align_params(SubscriptMap, isl_map_get_space(LengthMap)); + LengthMap = isl_map_align_params(LengthMap, isl_map_get_space(SubscriptMap)); + LengthMap = isl_map_sum(LengthMap, SubscriptMap); + AccessRelation = isl_map_set_tuple_id(LengthMap, isl_dim_in, + getStatement()->getDomainId()); +} + void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) { ScalarEvolution *SE = Statement->getParent()->getSE(); - Value *Ptr = MemAccInst(getAccessInstruction()).getPointerOperand(); + auto MAI = MemAccInst(getAccessInstruction()); + if (MAI.isMemIntrinsic()) + return; + + Value *Ptr = MAI.getPointerOperand(); if (!Ptr || !SE->isSCEVable(Ptr->getType())) return; @@ -731,11 +760,16 @@ isl_id *BaseAddrId = SAI->getBasePtrId(); if (!isAffine()) { + if (isa(getAccessInstruction())) + buildMemIntrinsicAccessRelation(); + // We overapproximate non-affine accesses with a possible access to the // whole array. For read accesses it does not make a difference, if an // access must or may happen. However, for write accesses it is important to // differentiate between writes that must happen and writes that may happen. - AccessRelation = isl_map_from_basic_map(createBasicAccessMap(Statement)); + if (!AccessRelation) + AccessRelation = isl_map_from_basic_map(createBasicAccessMap(Statement)); + AccessRelation = isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId); return; @@ -764,13 +798,13 @@ } MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, - AccessType Type, Value *BaseAddress, - unsigned ElemBytes, bool Affine, + AccessType AccType, Value *BaseAddress, + Type *ElementType, bool Affine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue, ScopArrayInfo::MemoryKind Kind, StringRef BaseName) - : Kind(Kind), AccType(Type), RedType(RT_NONE), Statement(Stmt), - BaseAddr(BaseAddress), BaseName(BaseName), ElemBytes(ElemBytes), + : Kind(Kind), AccType(AccType), RedType(RT_NONE), Statement(Stmt), + BaseAddr(BaseAddress), BaseName(BaseName), ElementType(ElementType), Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine), Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), @@ -945,7 +979,7 @@ void ScopStmt::buildAccessRelations() { Scop &S = *getParent(); for (MemoryAccess *Access : MemAccs) { - Type *ElementType = Access->getAccessValue()->getType(); + Type *ElementType = Access->getElementType(); ScopArrayInfo::MemoryKind Ty; if (Access->isPHIKind()) @@ -2563,7 +2597,10 @@ if (!MA->isRead()) HasWriteAccess.insert(MA->getBaseAddr()); MemAccInst Acc(MA->getAccessInstruction()); - PtrToAcc[Acc.getPointerOperand()] = MA; + if (MA->isRead() && Acc.isMemTransferInst()) + PtrToAcc[Acc.asMemTransferInst()->getSource()] = MA; + else + PtrToAcc[Acc.getPointerOperand()] = MA; AST.add(Acc); } } @@ -2575,8 +2612,8 @@ AliasGroupTy AG; for (auto PR : AS) AG.push_back(PtrToAcc[PR.getValue()]); - assert(AG.size() > 1 && - "Alias groups should contain at least two accesses"); + if (AG.size() < 2) + continue; AliasGroups.push_back(std::move(AG)); } @@ -3770,8 +3807,7 @@ const ScopDetection::BoxedLoopsSetTy *BoxedLoops, const InvariantLoadsSetTy &ScopRIL) { Value *Val = Inst.getValueOperand(); - Type *SizeType = Val->getType(); - unsigned ElementSize = DL->getTypeAllocSize(SizeType); + Type *ElementType = Val->getType(); Value *Address = Inst.getPointerOperand(); const SCEV *AccessFunction = SE->getSCEVAtScope(Address, L); const SCEVUnknown *BasePointer = @@ -3812,7 +3848,7 @@ SizesSCEV.push_back(SE->getSCEV(ConstantInt::get( IntegerType::getInt64Ty(BasePtr->getContext()), V))); - addArrayAccess(Inst, Type, BasePointer->getValue(), ElementSize, true, + addArrayAccess(Inst, Type, BasePointer->getValue(), ElementType, true, Subscripts, SizesSCEV, Val); return true; } @@ -3827,8 +3863,8 @@ const InvariantLoadsSetTy &ScopRIL, const MapInsnToMemAcc &InsnToMemAcc) { Value *Address = Inst.getPointerOperand(); Value *Val = Inst.getValueOperand(); - Type *SizeType = Val->getType(); - unsigned ElementSize = DL->getTypeAllocSize(SizeType); + Type *ElementType = Val->getType(); + unsigned ElementSize = DL->getTypeAllocSize(ElementType); enum MemoryAccess::AccessType Type = Inst.isLoad() ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; @@ -3856,21 +3892,55 @@ if (ElementSize != DelinearizedSize) scop->invalidate(DELINEARIZATION, Inst.getDebugLoc()); - addArrayAccess(Inst, Type, BasePointer->getValue(), ElementSize, true, + addArrayAccess(Inst, Type, BasePointer->getValue(), ElementType, true, AccItr->second.DelinearizedSubscripts, Sizes, Val); return true; } return false; } +bool ScopInfo::buildAccessMemIntrinsic( + MemAccInst Inst, Loop *L, Region *R, + const ScopDetection::BoxedLoopsSetTy *BoxedLoops, + const InvariantLoadsSetTy &ScopRIL) { + if (!Inst.isMemIntrinsic()) + return false; + + auto *DestPtrVal = Inst.getPointerOperand(); + assert(DestPtrVal); + auto *DestAccFunc = SE->getSCEVAtScope(DestPtrVal, L); + assert(DestAccFunc); + auto *DestPtrSCEV = dyn_cast(SE->getPointerBase(DestAccFunc)); + assert(DestPtrSCEV); + DestAccFunc = SE->getMinusSCEV(DestAccFunc, DestPtrSCEV); + addArrayAccess(Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(), + IntegerType::getInt8Ty(DestPtrVal->getContext()), false, + {DestAccFunc}, {}, Inst.getValueOperand()); + + if (!Inst.isMemTransferInst()) + return true; + + auto *SrcPtrVal = Inst.asMemTransferInst()->getSource(); + assert(SrcPtrVal); + auto *SrcAccFunc = SE->getSCEVAtScope(SrcPtrVal, L); + assert(SrcAccFunc); + auto *SrcPtrSCEV = dyn_cast(SE->getPointerBase(SrcAccFunc)); + assert(SrcPtrSCEV); + SrcAccFunc = SE->getMinusSCEV(SrcAccFunc, SrcPtrSCEV); + addArrayAccess(Inst, MemoryAccess::MUST_WRITE, SrcPtrSCEV->getValue(), + IntegerType::getInt8Ty(SrcPtrVal->getContext()), false, + {SrcAccFunc}, {}, Inst.getValueOperand()); + + return true; +} + void ScopInfo::buildAccessSingleDim( MemAccInst Inst, Loop *L, Region *R, const ScopDetection::BoxedLoopsSetTy *BoxedLoops, const InvariantLoadsSetTy &ScopRIL) { Value *Address = Inst.getPointerOperand(); Value *Val = Inst.getValueOperand(); - Type *SizeType = Val->getType(); - unsigned ElementSize = DL->getTypeAllocSize(SizeType); + Type *ElementType = Val->getType(); enum MemoryAccess::AccessType Type = Inst.isLoad() ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; @@ -3903,7 +3973,7 @@ if (!IsAffine && Type == MemoryAccess::MUST_WRITE) Type = MemoryAccess::MAY_WRITE; - addArrayAccess(Inst, Type, BasePointer->getValue(), ElementSize, IsAffine, + addArrayAccess(Inst, Type, BasePointer->getValue(), ElementType, IsAffine, {AccessFunction}, {}, Val); } @@ -3912,6 +3982,9 @@ const ScopDetection::BoxedLoopsSetTy *BoxedLoops, const InvariantLoadsSetTy &ScopRIL, const MapInsnToMemAcc &InsnToMemAcc) { + if (buildAccessMemIntrinsic(Inst, L, R, BoxedLoops, ScopRIL)) + return; + if (buildAccessMultiDimFixed(Inst, L, R, BoxedLoops, ScopRIL)) return; @@ -3995,8 +4068,8 @@ } MemoryAccess *ScopInfo::addMemoryAccess(BasicBlock *BB, Instruction *Inst, - MemoryAccess::AccessType Type, - Value *BaseAddress, unsigned ElemBytes, + MemoryAccess::AccessType AccType, + Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue, ArrayRef Subscripts, ArrayRef Sizes, @@ -4033,24 +4106,24 @@ if (Kind == ScopArrayInfo::MK_PHI || Kind == ScopArrayInfo::MK_ExitPHI) isKnownMustAccess = true; - if (!isKnownMustAccess && Type == MemoryAccess::MUST_WRITE) - Type = MemoryAccess::MAY_WRITE; + if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE) + AccType = MemoryAccess::MAY_WRITE; - AccList.emplace_back(Stmt, Inst, Type, BaseAddress, ElemBytes, Affine, + AccList.emplace_back(Stmt, Inst, AccType, BaseAddress, ElementType, Affine, Subscripts, Sizes, AccessValue, Kind, BaseName); Stmt->addAccess(&AccList.back()); return &AccList.back(); } void ScopInfo::addArrayAccess(MemAccInst MemAccInst, - MemoryAccess::AccessType Type, Value *BaseAddress, - unsigned ElemBytes, bool IsAffine, - ArrayRef Subscripts, + MemoryAccess::AccessType AccType, + Value *BaseAddress, Type *ElementType, + bool IsAffine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue) { - assert(MemAccInst.isLoad() == (Type == MemoryAccess::READ)); - addMemoryAccess(MemAccInst.getParent(), MemAccInst, Type, BaseAddress, - ElemBytes, IsAffine, AccessValue, Subscripts, Sizes, + assert(MemAccInst.isLoad() == (AccType == MemoryAccess::READ)); + addMemoryAccess(MemAccInst.getParent(), MemAccInst, AccType, BaseAddress, + ElementType, IsAffine, AccessValue, Subscripts, Sizes, ScopArrayInfo::MK_Array); } void ScopInfo::ensureValueWrite(Instruction *Value) { @@ -4064,8 +4137,8 @@ if (Stmt->lookupValueWriteOf(Value)) return; - addMemoryAccess(Value->getParent(), Value, MemoryAccess::MUST_WRITE, Value, 1, - true, Value, ArrayRef(), + addMemoryAccess(Value->getParent(), Value, MemoryAccess::MUST_WRITE, Value, + Value->getType(), true, Value, ArrayRef(), ArrayRef(), ScopArrayInfo::MK_Value); } void ScopInfo::ensureValueRead(Value *Value, BasicBlock *UserBB) { @@ -4114,9 +4187,9 @@ if (UserStmt->lookupValueReadOf(Value)) return; - addMemoryAccess(UserBB, nullptr, MemoryAccess::READ, Value, 1, true, Value, - ArrayRef(), ArrayRef(), - ScopArrayInfo::MK_Value); + addMemoryAccess(UserBB, nullptr, MemoryAccess::READ, Value, Value->getType(), + true, Value, ArrayRef(), + ArrayRef(), ScopArrayInfo::MK_Value); if (ValueInst) ensureValueWrite(ValueInst); } @@ -4143,16 +4216,16 @@ MemoryAccess *Acc = addMemoryAccess( IncomingStmt->isBlockStmt() ? IncomingBlock : IncomingStmt->getRegion()->getEntry(), - PHI, MemoryAccess::MUST_WRITE, PHI, 1, true, PHI, + PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true, PHI, ArrayRef(), ArrayRef(), IsExitBlock ? ScopArrayInfo::MK_ExitPHI : ScopArrayInfo::MK_PHI); assert(Acc); Acc->addIncoming(IncomingBlock, IncomingValue); } void ScopInfo::addPHIReadAccess(PHINode *PHI) { - addMemoryAccess(PHI->getParent(), PHI, MemoryAccess::READ, PHI, 1, true, PHI, - ArrayRef(), ArrayRef(), - ScopArrayInfo::MK_PHI); + addMemoryAccess(PHI->getParent(), PHI, MemoryAccess::READ, PHI, + PHI->getType(), true, PHI, ArrayRef(), + ArrayRef(), ScopArrayInfo::MK_PHI); } void ScopInfo::buildScop(Region &R, AssumptionCache &AC) { Index: test/ScopInfo/memcpy.ll =================================================================== --- /dev/null +++ test/ScopInfo/memcpy.ll @@ -0,0 +1,80 @@ +; RUN: opt %loadPolly -basicaa -polly-allow-differing-element-types -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -S -basicaa -polly-allow-differing-element-types -polly-codegen < %s | FileCheck --check-prefix=IR %s +; +; CHECK: Arrays { +; CHECK-NEXT: i8 MemRef_A[*]; // Element size 1 +; CHECK-NEXT: i8 MemRef_B[*]; // Element size 1 +; CHECK-NEXT: } +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_body3 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_for_body3[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> [i0, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_A[o0] : -16 <= o0 <= 20 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_B[o0] : 64 <= o0 <= 100 }; +; +; IR: polly.loop_preheader: +; IR: %[[r1:[a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 -4 +; IR: %[[r2:[a-zA-Z0-9]*]] = bitcast i32* %scevgep to i8* +; IR: %[[r3:[a-zA-Z0-9]*]] = getelementptr i64, i64* %B, i64 8 +; IR: %[[r4:[a-zA-Z0-9]*]] = bitcast i64* %scevgep8 to i8* +; +; IR: polly.stmt.for.body3: +; IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[r2]], i8* %[[r4]], i64 37, i32 4, i1 false) +; +; +; #include +; +; void jd(int *restrict A, long *restrict B) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; memcpy(A - 4, B + 8, 37); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* noalias %A, i64* noalias %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc5, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end7 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %add.ptr = getelementptr inbounds i32, i32* %A, i64 -4 + %tmp = bitcast i32* %add.ptr to i8* + %add.ptr4 = getelementptr inbounds i64, i64* %B, i64 8 + %tmp2 = bitcast i64* %add.ptr4 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp2, i64 37, i32 4, i1 false) + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc5 + +for.inc5: ; preds = %for.end + %inc6 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end7: ; preds = %for.cond + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 + Index: test/ScopInfo/memmove.ll =================================================================== --- /dev/null +++ test/ScopInfo/memmove.ll @@ -0,0 +1,79 @@ +; RUN: opt %loadPolly -basicaa -polly-allow-differing-element-types -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -S -basicaa -polly-allow-differing-element-types -polly-codegen < %s | FileCheck --check-prefix=IR %s +; +; CHECK: Arrays { +; CHECK-NEXT: i8 MemRef_A[*]; // Element size 1 +; CHECK-NEXT: i8 MemRef_B[*]; // Element size 1 +; CHECK-NEXT: } +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_body3 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_for_body3[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> [i0, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_A[o0] : -16 <= o0 <= 15 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_B[o0] : 64 <= o0 <= 95 }; +; +; IR: polly.loop_preheader: +; IR: %[[r1:[a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 -4 +; IR: %[[r2:[a-zA-Z0-9]*]] = bitcast i32* %scevgep to i8* +; IR: %[[r3:[a-zA-Z0-9]*]] = getelementptr i64, i64* %B, i64 8 +; IR: %[[r4:[a-zA-Z0-9]*]] = bitcast i64* %scevgep8 to i8* +; +; IR: polly.stmt.for.body3: +; IR: call void @llvm.memmove.p0i8.p0i8.i64(i8* %[[r2]], i8* %[[r4]], i64 32, i32 4, i1 false) +; +; #include +; +; void jd(int *restrict A, long *restrict B) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; memmove(A - 4, B + 8, 32); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* noalias %A, i64* noalias %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc5, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end7 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %add.ptr = getelementptr inbounds i32, i32* %A, i64 -4 + %tmp = bitcast i32* %add.ptr to i8* + %add.ptr4 = getelementptr inbounds i64, i64* %B, i64 8 + %tmp2 = bitcast i64* %add.ptr4 to i8* + call void @llvm.memmove.p0i8.p0i8.i64(i8* %tmp, i8* %tmp2, i64 32, i32 4, i1 false) + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc5 + +for.inc5: ; preds = %for.end + %inc6 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end7: ; preds = %for.cond + ret void +} + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 + Index: test/ScopInfo/memset.ll =================================================================== --- /dev/null +++ test/ScopInfo/memset.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly -polly-allow-differing-element-types -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -S -polly-allow-differing-element-types -polly-codegen < %s | FileCheck --check-prefix=IR %s +; +; CHECK: Arrays { +; CHECK-NEXT: i8 MemRef_A[*]; // Element size 1 +; CHECK-NEXT: } +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_body3 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_for_body3[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> [i0, i1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_A[o0] : 0 <= o0 <= 186 }; +; +; IR: %[[r1:[a-zA-Z0-9]*]] = bitcast i32* %A to i8* +; +; IR: polly.stmt.for.body3: +; IR: call void @llvm.memset.p0i8.i64(i8* %[[r1]], i8 36, i64 187, i32 4, i1 false) +; +; #include +; +; void jd(int *A) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; memset(A, '$', 187); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* noalias %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %tmp = bitcast i32* %A to i8* + call void @llvm.memset.p0i8.i64(i8* %tmp, i8 36, i64 187, i32 4, i1 false) + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc4 + +for.inc4: ; preds = %for.end + %inc5 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end6: ; preds = %for.cond + ret void +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1 +