Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -93,7 +93,9 @@ public: /// A map from isl_ids to llvm::Values. typedef llvm::MapVector> IDToValueTy; - + /// A map from const SCEV* to llvm::Values + typedef llvm::MapVector> + SCEVToValueTy; typedef llvm::MapVector IDToScopArrayInfoTy; /// A map from isl_ids to ScopArrayInfo objects. @@ -131,9 +133,10 @@ /// @param LI LoopInfo analysis for the current function. /// @param StartBlock The first basic block after the RTC. IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue, - ValueMapT &GlobalMap, const llvm::DataLayout &DL, - llvm::ScalarEvolution &SE, llvm::DominatorTree &DT, - llvm::LoopInfo &LI, llvm::BasicBlock *StartBlock); + SCEVToValueTy &SCEVToValue, ValueMapT &GlobalMap, + const llvm::DataLayout &DL, llvm::ScalarEvolution &SE, + llvm::DominatorTree &DT, llvm::LoopInfo &LI, + llvm::BasicBlock *StartBlock); /// Create LLVM-IR for an isl_ast_expr[ession]. /// @@ -210,6 +213,7 @@ PollyIRBuilder &Builder; IDToValueTy &IDToValue; + SCEVToValueTy &SCEVToValue; ValueMapT &GlobalMap; const llvm::DataLayout &DL; @@ -272,6 +276,10 @@ /// @return A value that represents the result of the multiplication. llvm::Value *createMul(llvm::Value *LHS, llvm::Value *RHS, const llvm::Twine &Name = ""); + + /// Provide a uniform interface to lookup replacements of the old value + /// in the various maps we provide. + llvm::Value *getLatestValue(llvm::Value *Old); }; } // namespace polly Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -98,8 +98,8 @@ const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, Scop &S, BasicBlock *StartBlock) : S(S), Builder(Builder), Annotator(Annotator), - ExprBuilder(S, Builder, IDToValue, ValueMap, DL, SE, DT, LI, - StartBlock), + ExprBuilder(S, Builder, IDToValue, SCEVToValue, ValueMap, DL, SE, DT, + LI, StartBlock), BlockGen(Builder, LI, SE, DT, ScalarMap, EscapeMap, ValueMap, &ExprBuilder, StartBlock), RegionGen(BlockGen), DL(DL), LI(LI), SE(SE), DT(DT), @@ -196,6 +196,10 @@ // ivs. IslExprBuilder::IDToValueTy IDToValue; + // This maps a const SCEV* to the Value* it has in the generated program. For + // now, this stores strides and offsets of SAIs. + IslExprBuilder::SCEVToValueTy SCEVToValue; + /// A collection of all parallel subfunctions that have been created. SmallVector ParallelSubfunctions; @@ -229,6 +233,8 @@ /// @returns False, iff a problem occurred and the value was not materialized. bool materializeParameters(); + void materializeStridedArraySizes(); + // Extract the upper bound of this loop // // The isl code generation can generate arbitrary expressions to check if the Index: include/polly/ScopBuilder.h =================================================================== --- include/polly/ScopBuilder.h +++ include/polly/ScopBuilder.h @@ -186,6 +186,8 @@ /// @param Stmt The parent statement of the instruction void buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt); + bool buildAccessPollyAbstractIndex(MemAccInst Inst, ScopStmt *Stmt); + /// Build an instance of MemoryAccess from the Load/Store instruction. /// /// @param Inst The Load/Store instruction that access the memory @@ -278,7 +280,7 @@ Value *BaseAddress, Type *ElemType, bool Affine, Value *AccessValue, ArrayRef Subscripts, - ArrayRef Sizes, MemoryKind Kind); + ShapeInfo Shape, MemoryKind Kind); /// Create a MemoryAccess that represents either a LoadInst or /// StoreInst. @@ -297,8 +299,8 @@ void addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool IsAffine, - ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue); + ArrayRef Subscripts, ShapeInfo Shape, + Value *AccessValue); /// Create a MemoryAccess for writing an llvm::Instruction. /// Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -108,6 +108,188 @@ DELINEARIZATION, }; +// Abstract over a notion of the shape of an array: +// Once can compute indeces using both sizes and strides. +class ShapeInfo { +private: + using SCEVArrayTy = SmallVector; + using SCEVArrayRefTy = ArrayRef; + + using OptionalSCEVArrayTy = Optional; + using OptionalSCEVArrayRefTy = Optional; + + llvm::Optional> Sizes; + llvm::Optional> Strides; + llvm::Optional Offset; + + ShapeInfo(Optional> SizesRef, + Optional> StridesRef, + llvm::Optional Offset) + : Offset(Offset) { + // Can check for XOR + assert(bool(SizesRef) || bool(StridesRef)); + assert(!(bool(SizesRef) && bool(StridesRef))); + + if (StridesRef || Offset) { + assert(Offset); + assert(StridesRef); + } + + if (SizesRef) + Sizes = + OptionalSCEVArrayTy(SCEVArrayTy(SizesRef->begin(), SizesRef->end())); + + if (StridesRef) + Strides = OptionalSCEVArrayTy( + SCEVArrayTy(StridesRef->begin(), StridesRef->end())); + } + + ShapeInfo(NoneType) : Sizes(None), Strides(None), Offset(None) {} + +public: + static ShapeInfo fromSizes(ArrayRef Sizes) { + return ShapeInfo(OptionalSCEVArrayRefTy(Sizes), None, None); + } + + // We have this anti-pattern in polly which does this: + // Shape(ShapeInfo::fromSizes({nullptr}) + // Consider providing a separate constructor for this, which we then + // kill in some cleanup. + + ShapeInfo(const ShapeInfo &other) { + Sizes = other.Sizes; + Strides = other.Strides; + Offset = other.Offset; + } + + ShapeInfo &operator=(const ShapeInfo &other) { + Sizes = other.Sizes; + Strides = other.Strides; + Offset = other.Offset; + return *this; + } + + static ShapeInfo fromStrides(ArrayRef Strides, + const SCEV *Offset) { + assert(Offset && "offset is null"); + return ShapeInfo(None, OptionalSCEVArrayRefTy(Strides), + Optional(Offset)); + } + + static ShapeInfo none() { return ShapeInfo(None); } + + unsigned getNumberOfDimensions() const { + // assert(isInitialized()); + if (Sizes) + return Sizes->size(); + + if (Strides) + return Strides->size(); + + return 0; + } + + /// Set the sizes of the Shape. It checks the invariant + /// That this shape does not have strides. + void setSizes(SmallVector NewSizes) { + assert(!bool(Strides)); + + if (!bool(Sizes)) { + Sizes = Optional>( + SmallVector()); + } + + Sizes = NewSizes; + } + + /// Set the strides of the Shape. It checks the invariant + /// That this shape does not have sizes. + void setStrides(ArrayRef NewStrides, const SCEV *NewOffset) { + Offset = NewOffset; + assert(!bool(Sizes)); + + // Be explicit because GCC(5.3.0) is unable to deduce this. + if (!Strides) + Strides = Optional>( + SmallVector()); + + Strides->clear(); + Strides->insert(Strides->begin(), NewStrides.begin(), NewStrides.end()); + assert(Offset && "offset is null"); + } + + const SmallVector &sizes() const { + assert(!bool(Strides)); + return Sizes.getValue(); + } + + const SCEV *offset() const { return Offset.getValue(); } + + SmallVector &sizes_mut() { + assert(!bool(Strides)); + return Sizes.getValue(); + } + + bool isInitialized() const { return bool(Sizes) || bool(Strides); } + + const SmallVector &strides() const { + assert(!bool(Sizes)); + return Strides.getValue(); + } + + const SmallVector &getSizesOrStrides() { + if (Sizes) + return Sizes.getValue(); + + if (Strides) + return Strides.getValue(); + } + bool hasSizes() const { return bool(Sizes); } + bool hasStrides() const { return bool(Strides); } + + template + Ret mapSizes(std::function &)> func, + Ret otherwise) { + if (Sizes) + return func(*Sizes); + + return otherwise; + } + + void mapSizes(std::function &)> func) { + if (Sizes) + func(*Sizes); + } + + raw_ostream &print(raw_ostream &OS) const { + if (Sizes) { + OS << "Sizes: "; + for (auto Size : *Sizes) { + if (Size) + OS << *Size << ", "; + else + OS << "null" + << ", "; + } + return OS; + } else if (Strides) { + OS << "Strides: "; + for (auto Stride : *Strides) { + if (Stride) + OS << *Stride << ", "; + else + OS << "null" + << ", "; + } + return OS; + } + OS << "Uninitialized.\n"; + return OS; + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const ShapeInfo &Shape); + /// Enum to distinguish between assumptions and restrictions. enum AssumptionSign { AS_ASSUMPTION, AS_RESTRICTION }; @@ -256,14 +438,15 @@ /// @param BasePtr The array base pointer. /// @param ElementType The type of the elements stored in the array. /// @param IslCtx The isl context used to create the base pointer id. + /// @param ShapeInfo /// @param DimensionSizes A vector containing the size of each dimension. /// @param Kind The kind of the array object. /// @param DL The data layout of the module. /// @param S The scop this array object belongs to. /// @param BaseName The optional name of this memory reference. ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx IslCtx, - ArrayRef DimensionSizes, MemoryKind Kind, - const DataLayout &DL, Scop *S, const char *BaseName = nullptr); + ShapeInfo Shape, MemoryKind Kind, const DataLayout &DL, Scop *S, + const char *BaseName = nullptr); /// Destructor to free the isl id of the base pointer. ~ScopArrayInfo(); @@ -292,6 +475,28 @@ /// with old sizes bool updateSizes(ArrayRef Sizes, bool CheckConsistency = true); + /// Update the strides of a ScopArrayInfo object, when we had + /// initially believed it to be a size based representation, we later + /// get a more refined view of the array in terms of strides. + /// TODO: Think if this is really necessary. We needed this in COSMO for + /// reasons, (IIRC, they would have be a i8*, which they would index + /// "correctly" + /// to get to the offset and stride info.) + /// Roughly, they had code like this: + /// struct ARRTY { int64 offset; int64 stride_1; int64 stride_2; void + /// *memory; } void * arr; offset = ((int64 *)arr)[0]; stride_1 = ((int64 + /// *)arr)[1]; stride_2 = ((int64 *)arr)[1]; memory = + /// fortran_index(((char*)arr+)), offset, stride_1, + /// stride_2, ix1, ix2); In this type of code, we would first see the naked + /// access of "arr" which we would represent in a size based repr, and we + /// would change our view later into the strided version. We should probably + /// disallow this for future clients. + void overwriteSizeWithStrides(ArrayRef Strides, + const SCEV *Offset); + + /// Update the strides of a ScopArrayInfo object. + bool updateStrides(ArrayRef Strides, const SCEV *Offset); + /// Make the ScopArrayInfo model a Fortran array. /// It receives the Fortran array descriptor and stores this. /// It also adds a piecewise expression for the outermost dimension @@ -311,6 +516,9 @@ // Set IsOnHeap to the value in parameter. void setIsOnHeap(bool value) { IsOnHeap = value; } + // get the shape of the ScopArrayInfo + ShapeInfo getShape() const { return Shape; } + /// For indirect accesses return the origin SAI of the BP, else null. const ScopArrayInfo *getBasePtrOriginSAI() const { return BasePtrOriginSAI; } @@ -324,7 +532,7 @@ if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI || Kind == MemoryKind::Value) return 0; - return DimensionSizes.size(); + return Shape.getNumberOfDimensions(); } /// Return the size of dimension @p dim as SCEV*. @@ -334,9 +542,16 @@ // information, in case the array is not newly created. const SCEV *getDimensionSize(unsigned Dim) const { assert(Dim < getNumberOfDimensions() && "Invalid dimension"); - return DimensionSizes[Dim]; + return Shape.sizes()[Dim]; } + const SCEV *getDimensionStride(unsigned Dim) const { + assert(Dim < getNumberOfDimensions() && "Invalid dimension"); + return Shape.strides()[Dim]; + } + + const SCEV *getStrideOffset() const { return Shape.offset(); } + /// Return the size of dimension @p dim as isl::pw_aff. // // Scalars do not have array dimensions and the first dimension of @@ -423,6 +638,8 @@ /// @returns True, if the arrays are compatible, False otherwise. bool isCompatibleWith(const ScopArrayInfo *Array) const; + bool hasStrides() const { return Shape.hasStrides(); } + private: void addDerivedSAI(ScopArrayInfo *DerivedSAI) { DerivedSAIs.insert(DerivedSAI); @@ -452,9 +669,6 @@ /// True if the newly allocated array is on heap. bool IsOnHeap = false; - /// The sizes of each dimension as SCEV*. - SmallVector DimensionSizes; - /// The sizes of each dimension as isl::pw_aff. SmallVector DimensionSizesPw; @@ -466,6 +680,9 @@ /// The data layout of the module. const DataLayout &DL; + /// The sizes of each dimension as SCEV*. + ShapeInfo Shape; + /// The scop this SAI object belongs to. Scop &S; @@ -588,6 +805,9 @@ Type *ElementType; /// Size of each dimension of the accessed array. + ShapeInfo Shape; + + /// Size of each dimension of the accessed array. SmallVector Sizes; // @} @@ -756,10 +976,11 @@ /// @param IsAffine Whether the subscripts are affine expressions. /// @param Kind The kind of memory accessed. /// @param Subscripts Subscript expressions - /// @param Sizes Dimension lengths of the accessed array. + /// @param Shape Shape of the accessed array. + MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, AccessType AccType, Value *BaseAddress, Type *ElemType, bool Affine, - ArrayRef Subscripts, ArrayRef Sizes, + ArrayRef Subscripts, ShapeInfo Shape, Value *AccessValue, MemoryKind Kind); /// Create a new MemoryAccess that corresponds to @p AccRel. @@ -2841,13 +3062,13 @@ const MapInsnToMemAcc &getInsnToMemAccMap() const { return DC.InsnToMemAcc; } /// Return the (possibly new) ScopArrayInfo object for @p Access. - /// + /// @param BasePtr The base pointer of the SAI to add /// @param ElementType The type of the elements stored in this array. + /// @param Shape The shape of the SAI. /// @param Kind The kind of the array info object. /// @param BaseName The optional name of this memory reference. ScopArrayInfo *getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, - ArrayRef Sizes, - MemoryKind Kind, + ShapeInfo Shape, MemoryKind Kind, const char *BaseName = nullptr); /// Create an array and return the corresponding ScopArrayInfo object. Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include #include @@ -480,5 +481,12 @@ /// /// Such a statement must not be removed, even if has no side-effects. bool hasDebugCall(ScopStmt *Stmt); + +/// The name of the array index "intrinsic" interpreted by polly +static const std::string POLLY_ABSTRACT_INDEX_BASENAME = "polly_array_index"; + +/// Check if a given MemAccInst models a polly_array_index. +llvm::Optional> +getAbstractIndexingCall(MemAccInst Inst, llvm::ScalarEvolution &SE); } // namespace polly #endif Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -429,7 +429,7 @@ ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V))); addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - true, Subscripts, SizesSCEV, Val); + true, Subscripts, ShapeInfo::fromSizes(SizesSCEV), Val); return true; } @@ -480,7 +480,8 @@ scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - true, AccItr->second.DelinearizedSubscripts, Sizes, Val); + true, AccItr->second.DelinearizedSubscripts, + ShapeInfo::fromSizes(Sizes), Val); return true; } @@ -522,11 +523,14 @@ auto *DestPtrSCEV = dyn_cast(SE.getPointerBase(DestAccFunc)); assert(DestPtrSCEV); + // TODO: code smell, why doe we initialize an empty shape? I don't recall the + // details + // anymore. DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV); addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(), IntegerType::getInt8Ty(DestPtrVal->getContext()), - LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr}, - Inst.getValueOperand()); + LengthIsAffine, {DestAccFunc, LengthVal}, + ShapeInfo::fromSizes({nullptr}), Inst.getValueOperand()); auto *MemTrans = dyn_cast(MemIntr); if (!MemTrans) @@ -547,13 +551,16 @@ SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV); addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(), IntegerType::getInt8Ty(SrcPtrVal->getContext()), - LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr}, - Inst.getValueOperand()); + LengthIsAffine, {SrcAccFunc, LengthVal}, + ShapeInfo::fromSizes({nullptr}), Inst.getValueOperand()); return true; } bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) { + if (buildAccessPollyAbstractIndex(Inst, Stmt)) + return true; + auto *CI = dyn_cast_or_null(Inst); if (CI == nullptr) @@ -565,6 +572,7 @@ bool ReadOnly = false; auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0); auto *CalledFunction = CI->getCalledFunction(); + switch (AA.getModRefBehavior(CalledFunction)) { case FMRB_UnknownModRefBehavior: llvm_unreachable("Unknown mod ref behaviour cannot be represented."); @@ -591,9 +599,11 @@ if (ArgSCEV->isZero()) continue; + // TODO: again, why do we create a shape with a nullptr shape? auto *ArgBasePtr = cast(SE.getPointerBase(ArgSCEV)); addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(), - ArgBasePtr->getType(), false, {AF}, {nullptr}, CI); + ArgBasePtr->getType(), false, {AF}, + ShapeInfo::fromSizes({nullptr}), CI); } return true; } @@ -643,7 +653,8 @@ AccType = MemoryAccess::MAY_WRITE; addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - IsAffine, {AccessFunction}, {nullptr}, Val); + IsAffine, {AccessFunction}, ShapeInfo::fromSizes({nullptr}), + Val); } void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) { @@ -662,6 +673,95 @@ buildAccessSingleDim(Inst, Stmt); } +bool ScopBuilder::buildAccessPollyAbstractIndex(MemAccInst Inst, + ScopStmt *Stmt) { + auto optionalCallGEP = getAbstractIndexingCall(Inst, SE); + if (!optionalCallGEP) + return false; + + CallInst *Call; + GEPOperator *GEP; + std::tie(Call, GEP) = *optionalCallGEP; + + if ((Call->getNumArgOperands()) % 2 != 1) { + return false; + } + + const int NArrayDims = (Call->getNumArgOperands()) / 2; + + Value *BasePtr = GEP->getPointerOperand(); + + std::vector Subscripts; + std::vector Strides; + Loop *SurroundingLoop = Stmt->getSurroundingLoop(); + + InvariantLoadsSetTy AccessILS; + + const SCEV *OffsetSCEV = [&] { + Value *Offset = Call->getArgOperand(0); + assert(Offset); + const SCEV *OffsetSCEV = SE.getSCEV(Offset); + + if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, OffsetSCEV, SE, + &AccessILS)) { + return (const SCEV *)nullptr; + } + + // Offsets are always loaded from the array, they will get invariant + // load hoisted correctly later. + for (LoadInst *L : AccessILS) { + scop->addRequiredInvariantLoad(L); + } + AccessILS.clear(); + + return OffsetSCEV; + }(); + + if (!OffsetSCEV) + return false; + + for (int i = 0; i < NArrayDims; i++) { + // Handling of Subscripts + Value *Ix = Call->getArgOperand(1 + NArrayDims + i); + const SCEV *IxSCEV = SE.getSCEV(Ix); + ensureValueRead(Ix, Stmt); + Subscripts.push_back(IxSCEV); + + Value *DimSize = Call->getArgOperand(1 + i); + const SCEV *StrideSCEV = SE.getSCEV(DimSize); + + if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, StrideSCEV, SE, + &AccessILS)) { + return false; + } + + for (LoadInst *L : AccessILS) { + scop->addRequiredInvariantLoad(L); + } + AccessILS.clear(); + + // The reasoning for SizesPerDim also applies here + Strides.push_back(StrideSCEV); + } + + Value *Val = Inst.getValueOperand(); + Type *ElementType = Val->getType(); + assert(BasePtr); + assert(ElementType); + + enum MemoryAccess::AccessType AccType = + isa(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; + // scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); + + // NOTE: this should be fromStrides. + // NOTE: To be able to change this, we need to teach ScopArrayInfo to recieve + // a Shape object. So, do that first. + addArrayAccess(Stmt, Inst, AccType, BasePtr, ElementType, /*IsAffine=*/true, + Subscripts, ShapeInfo::fromStrides(Strides, OffsetSCEV), Val); + + return true; +} + void ScopBuilder::buildAccessFunctions() { for (auto &Stmt : *scop) { if (Stmt.isBlockStmt()) { @@ -1011,8 +1111,7 @@ MemoryAccess *ScopBuilder::addMemoryAccess( ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue, - ArrayRef Subscripts, ArrayRef Sizes, - MemoryKind Kind) { + ArrayRef Subscripts, ShapeInfo Shape, MemoryKind Kind) { bool isKnownMustAccess = false; // Accesses in single-basic block statements are always executed. @@ -1039,7 +1138,7 @@ AccType = MemoryAccess::MAY_WRITE; auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType, - Affine, Subscripts, Sizes, AccessValue, Kind); + Affine, Subscripts, Shape, AccessValue, Kind); scop->addAccessFunction(Access); Stmt->addAccess(Access); @@ -1051,12 +1150,11 @@ Value *BaseAddress, Type *ElementType, bool IsAffine, ArrayRef Subscripts, - ArrayRef Sizes, - Value *AccessValue) { + ShapeInfo Shape, Value *AccessValue) { ArrayBasePointers.insert(BaseAddress); auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine, AccessValue, - Subscripts, Sizes, MemoryKind::Array); + Subscripts, Shape, MemoryKind::Array); if (!DetectFortranArrays) return; @@ -1090,7 +1188,8 @@ addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(), true, Inst, ArrayRef(), - ArrayRef(), MemoryKind::Value); + ShapeInfo::fromSizes(ArrayRef()), + MemoryKind::Value); } void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) { @@ -1128,7 +1227,8 @@ break; addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(), - true, V, ArrayRef(), ArrayRef(), + true, V, ArrayRef(), + ShapeInfo::fromSizes(ArrayRef()), MemoryKind::Value); // Inter-statement uses need to write the value in their defining statement. @@ -1145,8 +1245,8 @@ // will create an exit PHI SAI object. It is needed during code generation // and would be created later anyway. if (IsExitBlock) - scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {}, - MemoryKind::ExitPHI); + scop->getOrCreateScopArrayInfo( + PHI, PHI->getType(), ShapeInfo::fromSizes({}), MemoryKind::ExitPHI); // This is possible if PHI is in the SCoP's entry block. The incoming blocks // from outside the SCoP's region have no statement representation. @@ -1167,17 +1267,19 @@ return; } - MemoryAccess *Acc = addMemoryAccess( - IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true, - PHI, ArrayRef(), ArrayRef(), - IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); + MemoryAccess *Acc = + addMemoryAccess(IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, + PHI->getType(), true, PHI, ArrayRef(), + ShapeInfo::fromSizes(ArrayRef()), + IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); assert(Acc); Acc->addIncoming(IncomingBlock, IncomingValue); } void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) { addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true, - PHI, ArrayRef(), ArrayRef(), + PHI, ArrayRef(), + ShapeInfo::fromSizes(ArrayRef()), MemoryKind::PHI); } @@ -1360,7 +1462,7 @@ Ty = MemoryKind::Array; auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(), - ElementType, Access->Sizes, Ty); + ElementType, Access->Shape, Ty); Access->buildAccessRelation(SAI); scop->addAccessData(Access); } @@ -1502,7 +1604,8 @@ Instruction *GlobalRead = GlobalReadPair.second; for (auto *BP : ArrayBasePointers) addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ, - BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead); + BP, BP->getType(), false, {AF}, + ShapeInfo::fromSizes({nullptr}), GlobalRead); } scop->buildInvariantEquivalenceClasses(); Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -536,8 +536,44 @@ return false; } +/// Return if S is a call to a function that we use to denote multidimensional +// accesses +bool isSCEVCallToPollyAbstractIndex(const SCEV *S) { + if (isa(S)) { + Value *V = cast(S)->getValue(); + CallInst *Call = dyn_cast(V); + if (Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName().count( + POLLY_ABSTRACT_INDEX_BASENAME)) + return true; + } + return false; +} + +/// Return if scev represents a multidim access. +bool isSCEVMultidimArrayAccess(const SCEV *S) { + if (isSCEVCallToPollyAbstractIndex(S)) + return true; + const SCEVMulExpr *Mul = dyn_cast(S); + if (!Mul) + return false; + + // TODO: I don't remember why I needed this. + // When does a Mul not have two operands? Something like {*, i, *, j, *, k}? + if (Mul->getNumOperands() != 2) + return false; + + // TODO: I have no memory of why I needed this. + return isSCEVCallToPollyAbstractIndex(Mul->getOperand(0)) || + isSCEVCallToPollyAbstractIndex(Mul->getOperand(1)); +} + bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, DetectionContext &Context) const { + + if (isSCEVMultidimArrayAccess(S)) + return true; + InvariantLoadsSetTy AccessILS; if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) return false; @@ -698,6 +734,11 @@ Function *CalledFunction = CI.getCalledFunction(); + // Function being called is a polly indexing function. + if (CalledFunction->getName().count(POLLY_ABSTRACT_INDEX_BASENAME)) { + return true; + } + // Indirect calls are not supported. if (CalledFunction == nullptr) return false; @@ -1192,6 +1233,13 @@ bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, DetectionContext &Context) const { + + /// If the memory access is modelling a call of + /// polly_abstract_array_index(...) + if (getAbstractIndexingCall(Inst, SE)) { + return true; + } + Value *Ptr = Inst.getPointerOperand(); Loop *L = LI.getLoopFor(Inst->getParent()); const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -248,6 +248,10 @@ //===----------------------------------------------------------------------===// +raw_ostream &polly::operator<<(raw_ostream &OS, const ShapeInfo &Shape) { + return Shape.print(OS); +} + // Create a sequence of two schedules. Either argument may be null and is // interpreted as the empty schedule. Can also return null if both schedules are // empty. @@ -318,10 +322,11 @@ } ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx, - ArrayRef Sizes, MemoryKind Kind, + ShapeInfo Shape, MemoryKind Kind, const DataLayout &DL, Scop *S, const char *BaseName) - : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) { + : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), + Shape(ShapeInfo::none()), S(*S) { std::string BasePtrName = BaseName ? BaseName : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(), @@ -329,7 +334,12 @@ UseInstructionNames); Id = isl::id::alloc(Ctx, BasePtrName, this); - updateSizes(Sizes); + // TODO: why do we need updateSizes() ? Why don't we set this up in the + // ctor? + if (Shape.hasSizes()) + updateSizes(Shape.sizes()); + else + updateStrides(Shape.strides(), Shape.offset()); if (!BasePtr || Kind != MemoryKind::Array) { BasePtrOriginSAI = nullptr; @@ -415,27 +425,61 @@ DimensionSizesPw[0] = PwAff; } +void ScopArrayInfo::overwriteSizeWithStrides(ArrayRef Strides, + const SCEV *Offset) { + + // HACK: first set our shape to a stride based shape so that we don't + // assert within updateStrides. Move this into a bool parameter of + // updateStrides + Shape = ShapeInfo::fromStrides(Strides, Offset); + updateStrides(Strides, Offset); +} +bool ScopArrayInfo::updateStrides(ArrayRef Strides, + const SCEV *Offset) { + Shape.setStrides(Strides, Offset); + DimensionSizesPw.clear(); + for (size_t i = 0; i < Shape.getNumberOfDimensions(); i++) { + isl::space Space(S.getIslCtx(), 1, 0); + + std::string param_name = getIslCompatibleName( + "stride_" + std::to_string(i) + "__", getName(), ""); + isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this); + + Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff); + isl::pw_aff PwAff = + isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0); + + DimensionSizesPw.push_back(PwAff); + } + return true; +} + bool ScopArrayInfo::updateSizes(ArrayRef NewSizes, bool CheckConsistency) { - int SharedDims = std::min(NewSizes.size(), DimensionSizes.size()); - int ExtraDimsNew = NewSizes.size() - SharedDims; - int ExtraDimsOld = DimensionSizes.size() - SharedDims; - - if (CheckConsistency) { - for (int i = 0; i < SharedDims; i++) { - auto *NewSize = NewSizes[i + ExtraDimsNew]; - auto *KnownSize = DimensionSizes[i + ExtraDimsOld]; - if (NewSize && KnownSize && NewSize != KnownSize) - return false; - } + if (Shape.isInitialized()) { + const SmallVector &DimensionSizes = Shape.sizes(); + int SharedDims = std::min(NewSizes.size(), DimensionSizes.size()); + int ExtraDimsNew = NewSizes.size() - SharedDims; + int ExtraDimsOld = DimensionSizes.size() - SharedDims; + + if (CheckConsistency) { + for (int i = 0; i < SharedDims; i++) { + auto *NewSize = NewSizes[i + ExtraDimsNew]; + auto *KnownSize = DimensionSizes[i + ExtraDimsOld]; + if (NewSize && KnownSize && NewSize != KnownSize) + return false; + } - if (DimensionSizes.size() >= NewSizes.size()) - return true; + if (DimensionSizes.size() >= NewSizes.size()) + return true; + } } - + SmallVector DimensionSizes; DimensionSizes.clear(); DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(), NewSizes.end()); + Shape.setSizes(DimensionSizes); + DimensionSizesPw.clear(); for (const SCEV *Expr : DimensionSizes) { if (!Expr) { @@ -468,7 +512,7 @@ bool IsOutermostSizeKnown = SizeAsPwAff && FAD; if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 && - !getDimensionSize(0)) { + !getDimensionSizePw(0)) { OS << "[*]"; u++; } @@ -799,7 +843,7 @@ void MemoryAccess::buildMemIntrinsicAccessRelation() { assert(isMemoryIntrinsic()); - assert(Subscripts.size() == 2 && Sizes.size() == 1); + assert(Subscripts.size() == 2 && Shape.sizes().size() == 1); isl::pw_aff SubscriptPWA = getPwAff(Subscripts[0]); isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA); @@ -867,7 +911,8 @@ } void MemoryAccess::foldAccessRelation() { - if (Sizes.size() < 2 || isa(Sizes[1])) + if (Shape.getNumberOfDimensions() < 2 || + isa(Shape.getSizesOrStrides()[1])) return; int Size = Subscripts.size(); @@ -877,7 +922,7 @@ for (int i = Size - 2; i >= 0; --i) { isl::space Space; isl::map MapOne, MapTwo; - isl::pw_aff DimSize = getPwAff(Sizes[i + 1]); + isl::pw_aff DimSize = getPwAff(Shape.getSizesOrStrides()[i + 1]); isl::space SpaceSize = DimSize.get_space(); isl::id ParamId = SpaceSize.get_dim_id(isl::dim::param, 0); @@ -1011,13 +1056,11 @@ MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, AccessType AccType, Value *BaseAddress, Type *ElementType, bool Affine, - ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue, - MemoryKind Kind) + ArrayRef Subscripts, ShapeInfo Shape, + Value *AccessValue, MemoryKind Kind) : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr), - BaseAddr(BaseAddress), ElementType(ElementType), - Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst), - AccessValue(AccessValue), IsAffine(Affine), + BaseAddr(BaseAddress), ElementType(ElementType), Shape(Shape), + AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine), Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), NewAccessRelation(nullptr), FAD(nullptr) { static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"}; @@ -1029,8 +1072,8 @@ MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel) : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt), - InvalidDomain(nullptr), AccessRelation(nullptr), - NewAccessRelation(AccRel), FAD(nullptr) { + InvalidDomain(nullptr), Shape(ShapeInfo::fromSizes({nullptr})), + AccessRelation(nullptr), NewAccessRelation(AccRel), FAD(nullptr) { isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out); auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId); Sizes.push_back(nullptr); @@ -1840,10 +1883,12 @@ if (Access) return Access; - ScopArrayInfo *SAI = - Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value); - Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(), - true, {}, {}, V, MemoryKind::Value); + // TODO: again, why do we have an _empty_ list here for size? + ScopArrayInfo *SAI = Parent.getOrCreateScopArrayInfo( + V, V->getType(), ShapeInfo::fromSizes({}), MemoryKind::Value); + Access = + new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(), true, + {}, ShapeInfo::fromSizes({}), V, MemoryKind::Value); Parent.addAccessFunction(Access); Access->buildAccessRelation(SAI); addAccess(Access); @@ -2178,6 +2223,23 @@ } } +// Helper function to getFortranArrayIds() to determine if all the dimensions of +// an Array are defined as pw_aff's +static bool hasAllDimensions(polly::ScopArrayInfo *Array, + std::vector &OuterIds) { + int NDim = Array->getNumberOfDimensions(); + for (int i = 0; i < NDim; i++) { + isl::pw_aff PwAff = Array->getDimensionSizePw(i); + if (!PwAff) + return true; + + isl::id Id = PwAff.get_dim_id(isl::dim::param, 0); + assert(!Id.is_null() && "Invalid Id for PwAff expression in Fortran array"); + OuterIds.push_back(Id); + } + return false; +} + static std::vector getFortranArrayIds(Scop::array_range Arrays) { std::vector OutermostSizeIds; for (auto Array : Arrays) { @@ -2185,16 +2247,10 @@ // for its outermost dimension. Fortran arrays will have this since the // outermost dimension size can be picked up from their runtime description. // TODO: actually need to check if it has a FAD, but for now this works. - if (Array->getNumberOfDimensions() > 0) { - isl::pw_aff PwAff = Array->getDimensionSizePw(0); - if (!PwAff) + int NDim = Array->getNumberOfDimensions(); + if (NDim > 0) + if (hasAllDimensions(Array, OutermostSizeIds)) continue; - - isl::id Id = PwAff.get_dim_id(isl::dim::param, 0); - assert(!Id.is_null() && - "Invalid Id for PwAff expression in Fortran array"); - OutermostSizeIds.push_back(Id); - } } return OutermostSizeIds; } @@ -3964,27 +4020,90 @@ } } +static Value *getPointerFromLoadOrStore(Value *V) { + if (LoadInst *LI = dyn_cast(V)) + return LI->getPointerOperand(); + + if (StoreInst *SI = dyn_cast(V)) + return SI->getPointerOperand(); + return nullptr; +} + ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, - ArrayRef Sizes, - MemoryKind Kind, + ShapeInfo Shape, MemoryKind Kind, const char *BaseName) { assert((BasePtr || BaseName) && "BasePtr and BaseName can not be nullptr at the same time."); assert(!(BasePtr && BaseName) && "BaseName is redundant."); + + // We assume that arrays with the strided representation can unify. + // Yes this is nuts. Yes I stil want to do this. + auto unifyStridedArrayBasePtrs = [&]() -> Value * { + if (Shape.hasSizes()) + return BasePtr; + + const Value *CurBase = getPointerFromLoadOrStore(BasePtr); + if (!CurBase) + return BasePtr; + + for (ScopArrayInfo *SAI : arrays()) { + Value *SAIBase = getPointerFromLoadOrStore(SAI->getBasePtr()); + if (!SAIBase) + continue; + + if (SAIBase == CurBase && SAI->hasStrides()) + return SAI->getBasePtr(); + } + return BasePtr; + }; + + BasePtr = unifyStridedArrayBasePtrs(); + auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)] : ScopArrayNameMap[BaseName]; + if (!SAI) { auto &DL = getFunction().getParent()->getDataLayout(); - SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind, + SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Shape, Kind, DL, this, BaseName)); ScopArrayInfoSet.insert(SAI.get()); } else { SAI->updateElementType(ElementType); // In case of mismatching array sizes, we bail out by setting the run-time // context to false. - if (!SAI->updateSizes(Sizes)) - invalidate(DELINEARIZATION, DebugLoc()); + if (SAI->hasStrides() != Shape.hasStrides()) { + LLVM_DEBUG(dbgs() << "SAI and new shape do not agree:\n"); + LLVM_DEBUG(dbgs() << "SAI: "; SAI->print(dbgs(), true); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Shape: " << Shape << "\n"); + + if (Shape.hasStrides()) { + LLVM_DEBUG( + dbgs() << "Shape has strides, SAI had size. Overwriting size " + "with strides"); + SAI->overwriteSizeWithStrides(Shape.strides(), Shape.offset()); + } else { + + errs() << __PRETTY_FUNCTION__ << "\n" + << "SAI has strides, Shape is size based. This should not " + "happen. Ignoring new data for now."; + errs() << " SAI:\n"; + SAI->print(errs(), false); + errs() << "Shape:\n"; + errs() << Shape << "\n"; + errs() << "---\n"; + // report_fatal_error("SAI was given sizes when it had strides"); + return SAI.get(); + } + } + + if (SAI->hasStrides()) { + SAI->updateStrides(Shape.strides(), Shape.offset()); + } else { + if (!SAI->updateSizes(Shape.sizes())) + invalidate(DELINEARIZATION, DebugLoc()); + } } + return SAI.get(); } @@ -4000,7 +4119,8 @@ else SCEVSizes.push_back(nullptr); - auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes, + auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, + ShapeInfo::fromSizes(SCEVSizes), MemoryKind::Array, BaseName.c_str()); return SAI; } @@ -4362,7 +4482,7 @@ OS << "Arrays {\n"; for (auto &Array : arrays()) - Array->print(OS); + Array->print(OS, true); OS.indent(4) << "}\n"; Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -40,12 +40,14 @@ cl::Hidden, cl::init(OT_REQUEST), cl::ZeroOrMore, cl::cat(PollyCategory)); IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder, - IDToValueTy &IDToValue, ValueMapT &GlobalMap, + IDToValueTy &IDToValue, + SCEVToValueTy &SCEVToValue, ValueMapT &GlobalMap, const DataLayout &DL, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, BasicBlock *StartBlock) - : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), - DL(DL), SE(SE), DT(DT), LI(LI), StartBlock(StartBlock) { + : S(S), Builder(Builder), IDToValue(IDToValue), SCEVToValue(SCEVToValue), + GlobalMap(GlobalMap), DL(DL), SE(SE), DT(DT), LI(LI), + StartBlock(StartBlock) { OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr; } @@ -242,7 +244,7 @@ assert(isl_ast_expr_get_op_n_arg(Expr) >= 1 && "We need at least two operands to create a member access."); - Value *Base, *IndexOp, *Access; + Value *Base, *IndexOp1, *IndexOp2, *Access; isl_ast_expr *BaseExpr; isl_id *BaseId; @@ -287,54 +289,226 @@ return Base; } - IndexOp = nullptr; - for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { - Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); - assert(NextIndex->getType()->isIntegerTy() && - "Access index should be an integer"); + IndexOp1 = nullptr; + IndexOp2 = nullptr; - if (PollyDebugPrinting) - RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); - - if (!IndexOp) { - IndexOp = NextIndex; - } else { - Type *Ty = getWidestType(NextIndex->getType(), IndexOp->getType()); + // TODO: Oh right, I had forgotten about _this_ amazing hack. + // In a nutshell, when we are generating code on the kernel side, we choose to + // lookup SCEVtoValueMap. What we should actually do is to _always_ query + // SCEVToValueMap, but only populate it when we are performing codegen on the + // kernel side. + // + // Does SCEVExpander automatically take care of this? IIRC, I had tried + // SCEVExpander but it failed for some reason. Maybe we should try to replace + // the SCEVToValueMap with SCEVExpander. + const std::string Name = Builder.GetInsertBlock()->getModule()->getName(); + + // TODO: refactor this, remove the nesting. It is confusing to tell WTF is + // happening without pen-and-paper. The old code was already not super + // straightforward, and this just makes it worse. + if (SAI->hasStrides()) { + // Vector of values is used because the Block value computation happens + // from outermost dimension to innermost dimension but creation of index + // expression happens from innermost to outermost dimension. We need to + // store these values temporarily which can be accessed later + std::vector SizesToBlockValues; + // This holds the older size value so that multiplication instruction + // gets all its arguments easily + Value *OldDimSize = nullptr; + // This loop takes care of block value creation + for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { + Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); + assert(NextIndex->getType()->isIntegerTy() && + "Access index should be an integer"); + + Type *Ty = [&]() { + if (IndexOp1) + return getWidestType(NextIndex->getType(), IndexOp1->getType()); + else + return NextIndex->getType(); + }(); if (Ty != NextIndex->getType()) NextIndex = Builder.CreateIntCast(NextIndex, Ty, true); - if (Ty != IndexOp->getType()) - IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); + // SCEV corresponding to every dimension + const SCEV *DimSCEV; + + // For the index expression, the coefficient of the index associated + // with outermost dimension is always 1 + if (u == 1) + DimSCEV = SE.getOne(Ty); + // Whereas, for the other dimensions, we cumulatively multiply the + // size of the previous outer dimension with its DimSCEV + else + DimSCEV = SAI->getDimensionStride(e - u); + assert(DimSCEV); + + // Finding out the Value of DimSCEV + Value *DimSize = nullptr; + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + // HACK: we do this because we know the kernel name. + if (Name.find("FUNC__") != std::string::npos) { + auto OldValue = SCEVToValue.find(DimSCEV); + if (OldValue == SCEVToValue.end()) + assert(false && "!OldValue"); + + auto NewDimSizeIt = Map.find(OldValue->second); + if (NewDimSizeIt == Map.end()) + assert(false && "!NewDimSizeIt"); + DimSize = NewDimSizeIt->second; + } else { + + DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); + DimSize = expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + &*Builder.GetInsertPoint(), nullptr, + StartBlock->getSinglePredecessor()); + if (u != 1) + DimSize = getLatestValue(DimSize); + } + assert(DimSize && "dimsize uninitialized"); - IndexOp = createAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); + if (Ty != NextIndex->getType()) + NextIndex = Builder.CreateSExtOrTrunc( + NextIndex, Ty, + "polly.access.idxval." + BaseName + std::to_string(u - 1)); + + if (Ty != DimSize->getType()) + DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, + "polly.access.stride." + BaseName + + std::to_string(u - 1)); + // We create number of dimensions - 1 instructions which will be used as + // coefficients to the thread values(more like indices) which are related + // in LIFO order i.e the last value corresponds to the innermost dimension + if (u == 1) + OldDimSize = DimSize; + else { + DimSize = createMul(DimSize, OldDimSize, + "polly.access.idxval_x_block_stride." + BaseName + + std::to_string(u - 1)); + OldDimSize = DimSize; + } + SizesToBlockValues.push_back(DimSize); } - // For every but the last dimension multiply the size, for the last - // dimension we can exit the loop. - if (u + 1 >= e) - break; - - const SCEV *DimSCEV = SAI->getDimensionSize(u); - - llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); - DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); - Value *DimSize = - expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), - &*Builder.GetInsertPoint(), nullptr, - StartBlock->getSinglePredecessor()); - - Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); - - if (Ty != IndexOp->getType()) - IndexOp = Builder.CreateSExtOrTrunc(IndexOp, Ty, - "polly.access.sext." + BaseName); - if (Ty != DimSize->getType()) - DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, - "polly.access.sext." + BaseName); - IndexOp = createMul(IndexOp, DimSize, "polly.access.mul." + BaseName); + // This loop creates the actual index expression for array access + for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { + Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); + assert(NextIndex->getType()->isIntegerTy() && + "Access index should be an integer"); + + Type *Ty = [&]() { + if (IndexOp2) + return getWidestType(NextIndex->getType(), IndexOp2->getType()); + else + return NextIndex->getType(); + }(); + + Value *DimSize = SizesToBlockValues[e - u - 1]; + NextIndex = createMul(NextIndex, DimSize, + "polly.access.idxval_x_stride." + BaseName + + std::to_string(u - 1)); + + if (PollyDebugPrinting) + RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); + if (!IndexOp2) { + IndexOp2 = NextIndex; + } else { + if (Ty != IndexOp2->getType()) + IndexOp2 = Builder.CreateIntCast(IndexOp2, Ty, true); + IndexOp2 = createAdd(IndexOp2, NextIndex, + "polly.access.idx_accum." + BaseName); + } // end else + } // end for loop over stride dims + assert(IndexOp2 && "expected correct index op"); + Value *Offset = nullptr; + // If we are in the kernel, then the base pointer has already been + // offset correctly so we need not do anything about it. + if (Name.find("FUNC__") != std::string::npos) { + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + const SCEV *OffsetSCEV = SAI->getStrideOffset(); + auto OldValue = SCEVToValue.find(OffsetSCEV); + if (OldValue == SCEVToValue.end()) + assert(false && "!OldValue offset"); + + auto NewIt = Map.find(OldValue->second); + if (NewIt == Map.end()) { + Offset = OldValue->second; + // errs() << "OldValue: " << *OldValue->second << "\n"; + // assert(false && "!NewIt offset"); + } else { + Offset = NewIt->second; + } + assert(Offset); + if (auto OffsetInst = dyn_cast(Offset)) { + assert(OffsetInst->getModule() == + Builder.GetInsertBlock()->getModule()); + } + } else { + const SCEV *OffsetSCEV = SAI->getStrideOffset(); + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + + // If we are outside a kernel, then we do need to synthesize an offset. + OffsetSCEV = SCEVParameterRewriter::rewrite(OffsetSCEV, SE, Map); + // The arguments and their mapped values need to be provided here + Offset = expandCodeFor(S, SE, DL, "polly", OffsetSCEV, + OffsetSCEV->getType(), &*Builder.GetInsertPoint(), + &GlobalMap, StartBlock->getSinglePredecessor()); + Offset = getLatestValue(Offset); + } + assert(Offset && "dimsize uninitialized"); + Offset = Builder.CreateIntCast(Offset, IndexOp2->getType(), true); + IndexOp2 = createAdd(IndexOp2, Offset, "polly.access.offseted." + BaseName); + } // end hasStride + else { + for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { + Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); + assert(NextIndex->getType()->isIntegerTy() && + "Access index should be an integer"); + + if (PollyDebugPrinting) + RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); + + if (!IndexOp2) { + IndexOp2 = NextIndex; + } else { + Type *Ty = getWidestType(NextIndex->getType(), IndexOp2->getType()); + + if (Ty != NextIndex->getType()) + NextIndex = Builder.CreateIntCast(NextIndex, Ty, true); + if (Ty != IndexOp2->getType()) + IndexOp2 = Builder.CreateIntCast(IndexOp2, Ty, true); + + IndexOp2 = + createAdd(IndexOp2, NextIndex, "polly.access.add." + BaseName); + } + + // For every but the last dimension multiply the size, for the last + // dimension we can exit the loop. + if (u + 1 >= e) + break; + + const SCEV *DimSCEV = SAI->getDimensionSize(u); + + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); + Value *DimSize = + expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + &*Builder.GetInsertPoint(), nullptr, + StartBlock->getSinglePredecessor()); + + Type *Ty = getWidestType(DimSize->getType(), IndexOp2->getType()); + + if (Ty != IndexOp2->getType()) + IndexOp2 = Builder.CreateSExtOrTrunc(IndexOp2, Ty, + "polly.access.sext." + BaseName); + if (Ty != DimSize->getType()) + DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, + "polly.access.sext." + BaseName); + IndexOp2 = createMul(IndexOp2, DimSize, "polly.access.mul." + BaseName); + } } - Access = Builder.CreateGEP(Base, IndexOp, "polly.access." + BaseName); + Access = Builder.CreateGEP(Base, IndexOp2, "polly.access." + BaseName); if (PollyDebugPrinting) RuntimeDebugBuilder::createCPUPrinter(Builder, "\n"); @@ -784,3 +958,9 @@ llvm_unreachable("Unexpected enum value"); } + +llvm::Value *IslExprBuilder::getLatestValue(llvm::Value *Old) { + if (GlobalMap.find(Old) != GlobalMap.end()) + return GlobalMap.find(Old)->second; + return Old; +} Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -1207,6 +1207,47 @@ } return true; } +void IslNodeBuilder::materializeStridedArraySizes() { + for (ScopArrayInfo *Array : S.arrays()) { + if (!Array->hasStrides()) + continue; + + for (unsigned i = 0; i < Array->getNumberOfDimensions(); i++) { + isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(i).release(); + assert(ParametricPwAff && "parametric pw_aff corresponding " + "to outermost dimension does not " + "exist"); + + isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0); + isl_pw_aff_free(ParametricPwAff); + + assert(Id && "pw_aff is not parametric"); + + LLVM_DEBUG(dbgs() << "-\n"; dbgs() << "i: " << i << "\n"; + dbgs() << "ID: " << Id << "\n";); + const SCEV *StrideSCEV = Array->getDimensionStride(i); + // dbgs() << "StrideSCEV: " << *StrideSCEV << "\n"; + + Value *Stride = generateSCEV(StrideSCEV); + assert(Stride); + + // errs() << "StrideVal: " << *Stride << "\n"; + IDToValue[Id] = Stride; + SCEVToValue[Array->getDimensionStride(i)] = Stride; + isl_id_free(Id); + } + + const SCEV *OffsetSCEV = Array->getStrideOffset(); + Value *Offset = generateSCEV(OffsetSCEV); + assert(Offset); + SCEVToValue[OffsetSCEV] = Offset; + + const SCEV *BlockOuterDimSCEV = SE.getOne(Offset->getType()); + Value *BlockOuterDim = generateSCEV(BlockOuterDimSCEV); + assert(BlockOuterDim); + SCEVToValue[BlockOuterDimSCEV] = BlockOuterDim; + } +} Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, isl_ast_build *Build, @@ -1560,6 +1601,9 @@ L = L->getParentLoop(); } + // Materialize the stride information for each array + materializeStridedArraySizes(); + isl_set_free(Context); } Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -24,6 +24,7 @@ #include "polly/ScopDetection.h" #include "polly/ScopInfo.h" #include "polly/Support/SCEVValidator.h" +#include "polly/Support/ScopHelper.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -31,6 +32,7 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" @@ -132,6 +134,80 @@ " | Function: " + std::string(S->getFunction().getName()); } +static Function *createPollyAbstractIndexFunction(Module &M, + PollyIRBuilder Builder, + int NumDims) { + + // HACK: pick up the name from ScopHelper. + const std::string BaseName = POLLY_ABSTRACT_INDEX_BASENAME; + // DEBUG(dbgs() << __PRETTY_FUNCTION__ << " | HACK: hardcoded name of: " << + // BaseName << "Fix this.\n"); + const std::string Name = BaseName + "_" + std::to_string(NumDims); + + Function *F = [&]() { + Function *Existing = nullptr; + if ((Existing = M.getFunction(Name))) { + // assert(false); + return Existing; + }; + + GlobalValue::LinkageTypes Linkage = Function::InternalLinkage; + IntegerType *I64Ty = Builder.getInt64Ty(); + std::vector ParamTys; + // offset(1) + stride(numdims) + ix(numdims) = 2 *numdims + 1) + for (int i = 1; i <= 1 + 2 * NumDims; i++) { + ParamTys.push_back(I64Ty); + } + auto FnType = FunctionType::get(I64Ty, ParamTys, /*IsVarArg = */ false); + return Function::Create(FnType, Linkage, Name, &M); + }(); + + BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", F); + Builder.SetInsertPoint(EntryBB); + + std::vector Args; + for (Argument &Arg : F->args()) { + Args.push_back(&Arg); + } + + Args[0]->setName("offset"); + // Offset + std::vector SizesToBlockValues; + Value *TotalIx = Args[0], *OldDimSize = nullptr; + Value *BlockOuterDim = ConstantInt::get(TotalIx->getType(), 1); + SizesToBlockValues.push_back(BlockOuterDim); + for (int i = NumDims - 1; i >= 0; i--) { + const int StrideIx = 1 + i; + Argument *StrideArg = Args[StrideIx]; + StrideArg->setName("stride" + std::to_string(i)); + if (i) { + BlockOuterDim = Builder.CreateMul( + StrideArg, BlockOuterDim, "Block_stride_x_ix_" + std::to_string(i)); + SizesToBlockValues.push_back(BlockOuterDim); + } + } + for (int i = 0; i < NumDims; i++) { + // const int SizePerDimIx = NumDims + 1 + i; + // const int StrideIx = 1 + i; + const int CurIxIx = NumDims + 1 + i; + // Argument *StrideArg = Args[StrideIx]; + Value *StrideArg = SizesToBlockValues[NumDims - i - 1]; + Argument *CurIxArg = Args[CurIxIx]; + // Argument *SizePerDimArg = Args[SizePerDimIx]; + + // StrideArg->setName("stride" + std::to_string(i)); + CurIxArg->setName("ix" + std::to_string(i)); + // SizePerDimArg->setName("sizePerDim" + std::to_string(i)); + + Value *StrideMulIx = Builder.CreateMul(StrideArg, CurIxArg, + "Stride_x_ix_" + std::to_string(i)); + TotalIx = Builder.CreateAdd(TotalIx, StrideMulIx); + } + Builder.CreateRet(TotalIx); + F->addFnAttr(Attribute::AlwaysInline); + return F; +} + /// Used to store information PPCG wants for kills. This information is /// used by live range reordering. /// @@ -784,6 +860,7 @@ DevArrayName.append(Array->name); Value *ArraySize = getArraySize(Array); + // TODO: check if we need this anymore Value *Offset = getArrayOffset(Array); if (Offset) ArraySize = Builder.CreateSub( @@ -1198,11 +1275,11 @@ HostPtr = Builder.CreatePointerCast(HostPtr, Builder.getInt8PtrTy()); - if (Offset) { - Size = Builder.CreateSub( - Size, Builder.CreateMul( - Offset, Builder.getInt64(ScopArray->getElemSizeInBytes()))); - } + //if (Offset) { + // Size = Builder.CreateSub( + // Size, Builder.CreateMul( + // Offset, Builder.getInt64(ScopArray->getElemSizeInBytes()))); + //} if (Direction == HOST_TO_DEVICE) createCallCopyFromHostToDevice(HostPtr, DevPtr, Size); @@ -1430,9 +1507,10 @@ if (AllowLibDevice && getCUDALibDeviceFuntion(Name).length() > 0) return true; - return F->isIntrinsic() && - (Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || - Name.startswith("llvm.copysign")); + return Name.startswith(POLLY_ABSTRACT_INDEX_BASENAME) || + (F->isIntrinsic() && + (Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || + Name.startswith("llvm.copysign"))); } /// Do not take `Function` as a subtree value. @@ -1461,6 +1539,19 @@ return SubtreeFunctions; } +// return whether `Array` is used in `Kernel` or not. +static bool isArrayUsedInKernel(ScopArrayInfo *Array, ppcg_kernel *Kernel, + gpu_prog *Prog) { + for (int i = 0; i < Prog->n_array; i++) { + isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set); + const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(isl::manage(Id)); + if (SAI == Array) + return ppcg_kernel_requires_array_argument(Kernel, i); + } + // assert(false && "unable to find Array in known list of arrays"); + return false; // Is this correct? +} + std::tuple, SetVector, SetVector, isl::space> GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) { @@ -1480,6 +1571,9 @@ for (const auto &I : OutsideLoopIterations) SubtreeValues.insert(cast(I.second)->getValue()); + for (const auto &I : SCEVToValue) + SubtreeValues.insert(I.second); + isl_ast_node_foreach_descendant_top_down( Kernel->tree, collectReferencesInGPUStmt, &References); @@ -1492,8 +1586,10 @@ return S.contains(L) || L->contains(S.getEntry()); }); - for (auto &SAI : S.arrays()) - SubtreeValues.remove(SAI->getBasePtr()); + for (auto &SAI : S.arrays()) { + if (isArrayUsedInKernel(SAI, Kernel, Prog)) + SubtreeValues.remove(SAI->getBasePtr()); + } isl_space *Space = S.getParamSpace().release(); for (long i = 0, n = isl_space_dim(Space, isl_dim_param); i < n; i++) { @@ -1772,11 +1868,35 @@ Clone = Function::Create(Fn->getFunctionType(), GlobalValue::ExternalLinkage, ClonedFnName, GPUModule.get()); + + // For our polly_array_index function, we need readnone attribute so that + // dead code elimination nukes it. In general, it is good practice + // to copy over attributes. + Clone->setAttributes(Fn->getAttributes()); + assert(Clone && "Expected cloned function to be initialized."); - assert(ValueMap.find(Fn) == ValueMap.end() && - "Fn already present in ValueMap"); + // assert(ValueMap.find(Fn) == ValueMap.end() && + // "Fn already present in ValueMap"); ValueMap[Fn] = Clone; } + + Module *Host = S.getFunction().getParent(); + for (int i = 0; i <= 4; i++) { + const std::string BaseName = POLLY_ABSTRACT_INDEX_BASENAME; + Function *HostFn = Host->getFunction(BaseName + "_" + std::to_string(i)); + Function *GPUFn = createPollyAbstractIndexFunction(*GPUModule, Builder, i); + if (HostFn) { + // the chapel compiler actually creates the function definition + // and not just stubs the way I used to do (I used to link in the + // definitions later on. This invariant needs to be discussed with + // the chapel folks, so this is a TODO FIXME + // assert(ValueMap.find(HostFn) == ValueMap.end()); + errs() << "FIXME: discover why we have a duplicate definition of " + "polly_abstract_index" + << '\n'; + ValueMap[HostFn] = GPUFn; + } + } } void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { isl_id *Id = isl_ast_node_get_annotation(KernelStmt); @@ -2012,18 +2132,42 @@ const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(isl::manage_copy(Id)); Type *EleTy = SAI->getElementType(); Value *Val = &*Arg; - SmallVector Sizes; isl_ast_build *Build = isl_ast_build_from_context(isl_set_copy(Prog->context)); - Sizes.push_back(nullptr); - for (long j = 1, n = Kernel->array[i].array->n_index; j < n; j++) { - isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( - Build, isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); - auto V = ExprBuilder.create(DimSize); - Sizes.push_back(SE.getSCEV(V)); - } + + SmallVector Sizes; + + // TODO: take "correct" capture (const & or whatever of SAI) + // TODO: this is so fugly, find a better way to express this :( + ShapeInfo NewShape = [&](SmallVector &Sizes) { + if (SAI->hasStrides()) { + // TODO: find some way to merge the code? Here, we know the outermose + // dimension index so we can start from 0. In the sizes code, we need + // to start from 1 to indicate that we do not know the shape of the + // outermost dim. + for (long j = 0, n = Kernel->array[i].array->n_index; j < n; j++) { + isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( + Build, + isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); + auto V = ExprBuilder.create(DimSize); + Sizes.push_back(SE.getSCEV(V)); + } + return SAI->getShape(); + } else { + Sizes.push_back(nullptr); + for (long j = 1, n = Kernel->array[i].array->n_index; j < n; j++) { + isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( + Build, + isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); + auto V = ExprBuilder.create(DimSize); + Sizes.push_back(SE.getSCEV(V)); + } + return ShapeInfo::fromSizes(Sizes); + } + }(Sizes); + const ScopArrayInfo *SAIRep = - S.getOrCreateScopArrayInfo(Val, EleTy, Sizes, MemoryKind::Array); + S.getOrCreateScopArrayInfo(Val, EleTy, NewShape, MemoryKind::Array); LocalArrays.push_back(Val); isl_ast_build_free(Build); @@ -2242,6 +2386,7 @@ isl_val_free(Val); ArrayTy = ArrayType::get(ArrayTy, Bound); } + const ShapeInfo NewShape = ShapeInfo::fromSizes(Sizes); const ScopArrayInfo *SAI; Value *Allocation; @@ -2258,8 +2403,8 @@ } else { llvm_unreachable("unknown variable type"); } - SAI = - S.getOrCreateScopArrayInfo(Allocation, EleTy, Sizes, MemoryKind::Array); + SAI = S.getOrCreateScopArrayInfo(Allocation, EleTy, NewShape, + MemoryKind::Array); Id = isl_id_alloc(S.getIslCtx().get(), Var.name, nullptr); IDToValue[Id] = Allocation; LocalArrays.push_back(Allocation); @@ -2857,20 +3002,21 @@ isl::local_space LS = isl::local_space(Array->getSpace()); - isl::pw_aff Val = isl::aff::var_on_domain(LS, isl::dim::set, 0); - isl::pw_aff OuterMin = AccessSet.dim_min(0); - isl::pw_aff OuterMax = AccessSet.dim_max(0); - OuterMin = OuterMin.add_dims(isl::dim::in, Val.dim(isl::dim::in)); - OuterMax = OuterMax.add_dims(isl::dim::in, Val.dim(isl::dim::in)); - OuterMin = OuterMin.set_tuple_id(isl::dim::in, Array->getBasePtrId()); - OuterMax = OuterMax.set_tuple_id(isl::dim::in, Array->getBasePtrId()); - isl::set Extent = isl::set::universe(Array->getSpace()); - - Extent = Extent.intersect(OuterMin.le_set(Val)); - Extent = Extent.intersect(OuterMax.ge_set(Val)); - - for (unsigned i = 1; i < NumDims; ++i) + if (!Array->hasStrides()) { + isl::pw_aff Val = isl::aff::var_on_domain(LS, isl::dim::set, 0); + isl::pw_aff OuterMin = AccessSet.dim_min(0); + isl::pw_aff OuterMax = AccessSet.dim_max(0); + OuterMin = OuterMin.add_dims(isl::dim::in, Val.dim(isl::dim::in)); + OuterMax = OuterMax.add_dims(isl::dim::in, Val.dim(isl::dim::in)); + OuterMin = OuterMin.set_tuple_id(isl::dim::in, Array->getBasePtrId()); + OuterMax = OuterMax.set_tuple_id(isl::dim::in, Array->getBasePtrId()); + + Extent = Extent.intersect(OuterMin.le_set(Val)); + Extent = Extent.intersect(OuterMax.ge_set(Val)); + } + const int StartIndex = Array->hasStrides() ? 0 : 1; + for (unsigned i = StartIndex; i < NumDims; ++i) Extent = Extent.lower_bound_si(isl::dim::set, i, 0); for (unsigned i = 0; i < NumDims; ++i) { @@ -3009,6 +3155,7 @@ gpu_array_info &PPCGArray = PPCGProg->array[i]; + PPCGArray.user = Array; PPCGArray.space = Array->getSpace().release(); PPCGArray.type = strdup(TypeName.c_str()); PPCGArray.size = DL->getTypeAllocSize(Array->getElementType()); @@ -3027,7 +3174,7 @@ PPCGArray.global = false; PPCGArray.linearize = false; PPCGArray.dep_order = nullptr; - PPCGArray.user = Array; + //PPCGArray.user = Array; PPCGArray.bound = nullptr; setArrayBounds(PPCGArray, Array); @@ -3532,7 +3679,8 @@ NodeBuilder.addParameters(S->getContext().release()); Value *RTC = NodeBuilder.createRTC(Condition); - Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); + Builder.GetInsertBlock()->getTerminator()->setOperand(0, + Builder.getTrue()); Builder.SetInsertPoint(&*StartBlock->begin()); Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -681,3 +681,59 @@ return false; } + +llvm::Optional> +polly::getAbstractIndexingCall(MemAccInst Inst, ScalarEvolution &SE) { + //// TODO: I can get rid of all this crap and use SCEV to drill through the + //// bitcasts. Alas, this was written when I was more of an LLVM noob than I + //// currently am :) + //// TODO: clean this up please. + + //// Case 1. (Total size of array not known) + //// %2 = tail call i64 @_gfortran_polly_array_index_2(i64 1, i64 %1, i64 + //// %indvars.iv1, i64 %indvars.iv) #1 + //// %3 = getelementptr float, float* %0, i64 + //// %bitcast = bitcast %3 to + //// %2 store float 2.000000e+00, float* %3, align 4 STORE (GEP + ///< bitcast>) / (CALL index_2(, ))) + + //// Case 2. (Total size of array statically known) + //// %4 = tail call i64 @_gfortran_polly_array_index_2(i64 1, i64 5, i64 + //// %indvars.iv1, i64 %indvars.iv) #1 %5 = getelementptr [25 x float], [25 x + //// float]* @__m_MOD_g_arr_const_5_5, i64 0, i64 %4 store float 4.200000e+01, + //// float* %5, align 4 + + Value *MaybeBitcast = Inst.getPointerOperand(); + if (!MaybeBitcast) + return Optional>(None); + + //// If we have a bitcast as the parameter to the instruction, strip off the + //// bitcast. Otherwise, return the original instruction operand. + // Value *MaybeGEP = [&]() -> Value * { + // BitCastOperator *Bitcast = dyn_cast(MaybeBitcast); + // if (Bitcast) { + // return Bitcast->getOperand(0); + // } + // return Inst.getPointerOperand(); + //}(); + + GEPOperator *GEP = dyn_cast(MaybeBitcast); + + if (!GEP) + return Optional>(None); + + auto *MaybeCall = GEP->getOperand(GEP->getNumOperands() - 1); + assert(MaybeCall); + + // GEPOperator *GEP = dyn_cast(MaybeBitcast); + CallInst *Call = dyn_cast(MaybeCall); + if (!Call) + return Optional>(None); + + if (!Call->getCalledFunction()->getName().count( + POLLY_ABSTRACT_INDEX_BASENAME)) + return Optional>(None); + + std::pair p = std::make_pair(Call, GEP); + return Optional>(p); +} Index: lib/Transform/ForwardOpTree.cpp =================================================================== --- lib/Transform/ForwardOpTree.cpp +++ lib/Transform/ForwardOpTree.cpp @@ -360,9 +360,9 @@ Subscripts.push_back(nullptr); } - MemoryAccess *Access = - new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), - LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); + MemoryAccess *Access = new MemoryAccess( + Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), LI->getType(), true, + {}, ShapeInfo::fromSizes(Sizes), LI, MemoryKind::Array); S->addAccessFunction(Access); Stmt->addAccess(Access, true); Index: test/GPGPU/chpl_2d_init_shapeinfo_ppcg.ll =================================================================== --- /dev/null +++ test/GPGPU/chpl_2d_init_shapeinfo_ppcg.ll @@ -0,0 +1,55 @@ +;RUN: opt %loadPolly -polly-only-func=test_chpl -polly-codegen-ppcg -polly-invariant-load-hoisting -analyze < %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%array_ty = type { i64, %array_ptr*, i8 } +%array_ptr = type { [2 x i64], [2 x i64], [2 x i64], i64, double*, double*, i8 } + +; Function Attrs: readnone +define internal i64 @polly_array_index(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5) #0 { + %tmp = mul nsw i64 %arg4, %arg2 + %tmp8 = add nsw i64 %tmp, %arg1 + %tmp9 = mul nsw i64 %arg5, %arg3 + %tmp10 = add nsw i64 %tmp8, %tmp9 + ret i64 %tmp10 +} + + +; Function Attrs: noinline +define weak dso_local void @test_chpl(%array_ty* nonnull %arg) #1 { +bb: + br label %bb23 + +bb23: ; preds = %bb, %bb37 + %.0 = phi i64 [ 0, %bb ], [ %tmp38, %bb37 ] + br label %bb24 + +bb24: ; preds = %bb23, %bb24 + %.01 = phi i64 [ 0, %bb23 ], [ %tmp36, %bb24 ] + %tmp25 = getelementptr inbounds %array_ty, %array_ty* %arg, i64 0, i32 1 + %tmp26 = load %array_ptr*, %array_ptr** %tmp25, align 8 + %tmp27 = getelementptr inbounds %array_ptr, %array_ptr* %tmp26, i64 0, i32 1, i64 1 + store i64 1, i64* %tmp27, align 8 + %tmp28 = getelementptr inbounds %array_ptr, %array_ptr* %tmp26, i64 0, i32 1, i64 0 + %tmp29 = load i64, i64* %tmp28, align 8 + %tmp30 = call i64 @polly_array_index(i64 0, i64 %tmp29, i64 1, i64 %.0, i64 %.01) + %tmp31 = getelementptr inbounds %array_ptr, %array_ptr* %tmp26, i64 0, i32 5 + %tmp32 = load double*, double** %tmp31, align 8 + %tmp33 = getelementptr inbounds double, double* %tmp32, i64 %tmp30 + %tmp34 = add nuw nsw i64 %.01, %.0 + %tmp35 = sitofp i64 %tmp34 to double + store double %tmp35, double* %tmp33, align 8 + %tmp36 = add nuw nsw i64 %.01, 1 + %exitcond = icmp ne i64 %tmp36, 1000 + br i1 %exitcond, label %bb24, label %bb37 + +bb37: ; preds = %bb24 + %tmp38 = add nuw nsw i64 %.0, 1 + %exitcond8 = icmp ne i64 %tmp38, 1000 + br i1 %exitcond8, label %bb23, label %bb39 + +bb39: ; preds = %bb37 + ret void +} + +attributes #0 = { readnone } +attributes #1 = { noinline } Index: test/ScopInfo/chpl_2d_init_shapeinfo.ll =================================================================== --- /dev/null +++ test/ScopInfo/chpl_2d_init_shapeinfo.ll @@ -0,0 +1,55 @@ +;RUN: opt %loadPolly -polly-only-func=test_chpl -polly-scops -polly-invariant-load-hoisting -analyze < %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%array_ty = type { i64, %array_ptr*, i8 } +%array_ptr = type { [2 x i64], [2 x i64], [2 x i64], i64, double*, double*, i8 } + +; Function Attrs: readnone +define internal i64 @polly_array_index(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5) #0 { + %tmp = mul nsw i64 %arg4, %arg2 + %tmp8 = add nsw i64 %tmp, %arg1 + %tmp9 = mul nsw i64 %arg5, %arg3 + %tmp10 = add nsw i64 %tmp8, %tmp9 + ret i64 %tmp10 +} + + +; Function Attrs: noinline +define weak dso_local void @test_chpl(%array_ty* nonnull %arg) #1 { +bb: + br label %bb23 + +bb23: ; preds = %bb, %bb37 + %.0 = phi i64 [ 0, %bb ], [ %tmp38, %bb37 ] + br label %bb24 + +bb24: ; preds = %bb23, %bb24 + %.01 = phi i64 [ 0, %bb23 ], [ %tmp36, %bb24 ] + %tmp25 = getelementptr inbounds %array_ty, %array_ty* %arg, i64 0, i32 1 + %tmp26 = load %array_ptr*, %array_ptr** %tmp25, align 8 + %tmp27 = getelementptr inbounds %array_ptr, %array_ptr* %tmp26, i64 0, i32 1, i64 1 + store i64 1, i64* %tmp27, align 8 + %tmp28 = getelementptr inbounds %array_ptr, %array_ptr* %tmp26, i64 0, i32 1, i64 0 + %tmp29 = load i64, i64* %tmp28, align 8 + %tmp30 = call i64 @polly_array_index(i64 0, i64 %tmp29, i64 1, i64 %.0, i64 %.01) + %tmp31 = getelementptr inbounds %array_ptr, %array_ptr* %tmp26, i64 0, i32 5 + %tmp32 = load double*, double** %tmp31, align 8 + %tmp33 = getelementptr inbounds double, double* %tmp32, i64 %tmp30 + %tmp34 = add nuw nsw i64 %.01, %.0 + %tmp35 = sitofp i64 %tmp34 to double + store double %tmp35, double* %tmp33, align 8 + %tmp36 = add nuw nsw i64 %.01, 1 + %exitcond = icmp ne i64 %tmp36, 1000 + br i1 %exitcond, label %bb24, label %bb37 + +bb37: ; preds = %bb24 + %tmp38 = add nuw nsw i64 %.0, 1 + %exitcond8 = icmp ne i64 %tmp38, 1000 + br i1 %exitcond8, label %bb23, label %bb39 + +bb39: ; preds = %bb37 + ret void +} + +attributes #0 = { readnone } +attributes #1 = { noinline }