Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -93,6 +93,9 @@ public: /// A map from isl_ids to llvm::Values. typedef llvm::MapVector> IDToValueTy; + /// A map from const SCEV* to llvm::Values + typedef llvm::MapVector> + SCEVToValueTy; typedef llvm::MapVector IDToScopArrayInfoTy; @@ -131,7 +134,7 @@ /// @param LI LoopInfo analysis for the current function. /// @param StartBlock The first basic block after the RTC. IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue, - ValueMapT &GlobalMap, const llvm::DataLayout &DL, + SCEVToValueTy &SCEVToValue, ValueMapT &GlobalMap, const llvm::DataLayout &DL, llvm::ScalarEvolution &SE, llvm::DominatorTree &DT, llvm::LoopInfo &LI, llvm::BasicBlock *StartBlock); @@ -210,6 +213,7 @@ PollyIRBuilder &Builder; IDToValueTy &IDToValue; + SCEVToValueTy &SCEVToValue; ValueMapT &GlobalMap; const llvm::DataLayout &DL; @@ -272,6 +276,11 @@ /// @return A value that represents the result of the multiplication. llvm::Value *createMul(llvm::Value *LHS, llvm::Value *RHS, const llvm::Twine &Name = ""); + + // Provide a uniform interface to lookup replacements of the old value + // in the various maps we provide. + llvm::Value *getLatestValue(llvm::Value *Old); + }; } // namespace polly Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -98,7 +98,7 @@ const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, Scop &S, BasicBlock *StartBlock) : S(S), Builder(Builder), Annotator(Annotator), - ExprBuilder(S, Builder, IDToValue, ValueMap, DL, SE, DT, LI, + ExprBuilder(S, Builder, IDToValue, SCEVToValue, ValueMap, DL, SE, DT, LI, StartBlock), BlockGen(Builder, LI, SE, DT, ScalarMap, EscapeMap, ValueMap, &ExprBuilder, StartBlock), @@ -196,6 +196,10 @@ // ivs. IslExprBuilder::IDToValueTy IDToValue; + // This maps a const SCEV* to the Value* it has in the generated program. For + // now, this stores strides and offsets of SAIs. + IslExprBuilder::SCEVToValueTy SCEVToValue; + /// A collection of all parallel subfunctions that have been created. SmallVector ParallelSubfunctions; @@ -229,6 +233,9 @@ /// @returns False, iff a problem occurred and the value was not materialized. bool materializeParameters(); + void materializeStridedArraySizes(); + + // Extract the upper bound of this loop // // The isl code generation can generate arbitrary expressions to check if the Index: include/polly/ScopBuilder.h =================================================================== --- include/polly/ScopBuilder.h +++ include/polly/ScopBuilder.h @@ -186,6 +186,8 @@ /// @param Stmt The parent statement of the instruction void buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt); + bool buildAccessPollyAbstractIndex(MemAccInst Inst, ScopStmt *Stmt); + /// Build an instance of MemoryAccess from the Load/Store instruction. /// /// @param Inst The Load/Store instruction that access the memory @@ -278,7 +280,7 @@ Value *BaseAddress, Type *ElemType, bool Affine, Value *AccessValue, ArrayRef Subscripts, - ArrayRef Sizes, MemoryKind Kind); + ShapeInfo Shape, MemoryKind Kind); /// Create a MemoryAccess that represents either a LoadInst or /// StoreInst. @@ -298,7 +300,7 @@ MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool IsAffine, ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue); + ShapeInfo Shape, Value *AccessValue); /// Create a MemoryAccess for writing an llvm::Instruction. /// Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -108,6 +108,183 @@ DELINEARIZATION, }; +// Abstract over a notion of the shape of an array: +// Once can compute indeces using both sizes and strides. +class ShapeInfo { +private: + using SCEVArrayTy = SmallVector; + using SCEVArrayRefTy = ArrayRef; + + using OptionalSCEVArrayTy = Optional; + using OptionalSCEVArrayRefTy = Optional; + + llvm::Optional> Sizes; + llvm::Optional> Strides; + llvm::Optional Offset; + + ShapeInfo(Optional> SizesRef, + Optional> StridesRef, + llvm::Optional Offset) + : Offset(Offset){ + // Can check for XOR + assert(bool(SizesRef) || bool(StridesRef)); + assert(!(bool(SizesRef) && bool(StridesRef))); + + if (StridesRef || Offset) { + assert(Offset); + assert(StridesRef); + } + + if (SizesRef) + Sizes = + OptionalSCEVArrayTy(SCEVArrayTy(SizesRef->begin(), SizesRef->end())); + + if (StridesRef) + Strides = OptionalSCEVArrayTy( + SCEVArrayTy(StridesRef->begin(), StridesRef->end())); + } + + ShapeInfo(NoneType) : Sizes(None), Strides(None), Offset(None) {} + +public: + static ShapeInfo fromSizes(ArrayRef Sizes) { + return ShapeInfo(OptionalSCEVArrayRefTy(Sizes), None, None); + } + + // We have this anti-pattern in polly which does this: + // Shape(ShapeInfo::fromSizes({nullptr}) + // Consider providing a separate constructor for this, which we then + // kill in some cleanup. + + ShapeInfo(const ShapeInfo &other) { + Sizes = other.Sizes; + Strides = other.Strides; + Offset = other.Offset; + } + + ShapeInfo &operator=(const ShapeInfo &other) { + Sizes = other.Sizes; + Strides = other.Strides; + Offset = other.Offset; + return *this; + } + + static ShapeInfo fromStrides(ArrayRef Strides, + const SCEV *Offset) { + assert(Offset && "offset is null"); + return ShapeInfo(None, OptionalSCEVArrayRefTy(Strides), + Optional(Offset)); + } + + static ShapeInfo none() { return ShapeInfo(None); } + + unsigned getNumberOfDimensions() const { + // assert(isInitialized()); + if (Sizes) + return Sizes->size(); + + if (Strides) + return Strides->size(); + + return 0; + } + + /// Set the sizes of the Shape. It checks the invariant + /// That this shape does not have strides. + void setSizes(SmallVector NewSizes) { + assert(!bool(Strides)); + + if (!bool(Sizes)) { + Sizes = Optional>( + SmallVector()); + } + + Sizes = NewSizes; + } + + /// Set the strides of the Shape. It checks the invariant + /// That this shape does not have sizes. + void setStrides(ArrayRef NewStrides, const SCEV *NewOffset) { + Offset = NewOffset; + assert(!bool(Sizes)); + + // Be explicit because GCC(5.3.0) is unable to deduce this. + if (!Strides) + Strides = Optional>( + SmallVector()); + + Strides->clear(); + Strides->insert(Strides->begin(), NewStrides.begin(), NewStrides.end()); + + assert(Offset && "offset is null"); + } + + const SmallVector &sizes() const { + assert(!bool(Strides)); + return Sizes.getValue(); + } + + const SCEV *offset() const { return Offset.getValue(); } + + SmallVector &sizes_mut() { + assert(!bool(Strides)); + return Sizes.getValue(); + } + + bool isInitialized() const { return bool(Sizes) || bool(Strides); } + + const SmallVector &strides() const { + assert(!bool(Sizes)); + return Strides.getValue(); + } + + bool hasSizes() const { return bool(Sizes); } + bool hasStrides() const { return bool(Strides); } + + template + Ret mapSizes(std::function &)> func, + Ret otherwise) { + if (Sizes) + return func(*Sizes); + + return otherwise; + } + + void mapSizes(std::function &)> func) { + if (Sizes) + func(*Sizes); + } + + raw_ostream &print(raw_ostream &OS) const { + if (Sizes) { + OS << "Sizes: "; + for (auto Size : *Sizes) { + if (Size) + OS << *Size << ", "; + else + OS << "null" + << ", "; + } + return OS; + } else if (Strides) { + OS << "Strides: "; + for (auto Stride : *Strides) { + if (Stride) + OS << *Stride << ", "; + else + OS << "null" + << ", "; + } + return OS; + } + OS << "Uninitialized.\n"; + return OS; + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const ShapeInfo &Shape); + + /// Enum to distinguish between assumptions and restrictions. enum AssumptionSign { AS_ASSUMPTION, AS_RESTRICTION }; @@ -256,13 +433,14 @@ /// @param BasePtr The array base pointer. /// @param ElementType The type of the elements stored in the array. /// @param IslCtx The isl context used to create the base pointer id. + /// @param ShapeInfo /// @param DimensionSizes A vector containing the size of each dimension. /// @param Kind The kind of the array object. /// @param DL The data layout of the module. /// @param S The scop this array object belongs to. /// @param BaseName The optional name of this memory reference. ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx IslCtx, - ArrayRef DimensionSizes, MemoryKind Kind, + ShapeInfo Shape, MemoryKind Kind, const DataLayout &DL, Scop *S, const char *BaseName = nullptr); /// Destructor to free the isl id of the base pointer. @@ -292,6 +470,29 @@ /// with old sizes bool updateSizes(ArrayRef Sizes, bool CheckConsistency = true); + /// Update the strides of a ScopArrayInfo object, when we had + /// initially believed it to be a size based representation, we later + /// get a more refined view of the array in terms of strides. + /// TODO: Think if this is really necessary. We needed this in COSMO for reasons, + /// (IIRC, they would have be a i8*, which they would index "correctly" + /// to get to the offset and stride info.) + /// Roughly, they had code like this: + /// struct ARRTY { int64 offset; int64 stride_1; int64 stride_2; void *memory; } + /// void * arr; + /// offset = ((int64 *)arr)[0]; + /// stride_1 = ((int64 *)arr)[1]; + /// stride_2 = ((int64 *)arr)[1]; + /// memory = fortran_index(((char*)arr+)), offset, stride_1, stride_2, ix1, ix2); + /// In this type of code, we would first see the naked access of "arr" which we would represent in a + /// size based repr, and we would change our view later into the strided version. + /// We should probably disallow this for future clients. + void overwriteSizeWithStrides(ArrayRef Strides, + const SCEV *Offset); + + /// Update the strides of a ScopArrayInfo object. + bool updateStrides(ArrayRef Strides, const SCEV *Offset); + + /// Make the ScopArrayInfo model a Fortran array. /// It receives the Fortran array descriptor and stores this. /// It also adds a piecewise expression for the outermost dimension @@ -311,6 +512,9 @@ // Set IsOnHeap to the value in parameter. void setIsOnHeap(bool value) { IsOnHeap = value; } + //get the shape of the ScopArrayInfo + ShapeInfo getShape() const { return Shape; } + /// For indirect accesses return the origin SAI of the BP, else null. const ScopArrayInfo *getBasePtrOriginSAI() const { return BasePtrOriginSAI; } @@ -324,7 +528,7 @@ if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI || Kind == MemoryKind::Value) return 0; - return DimensionSizes.size(); + return Shape.getNumberOfDimensions(); } /// Return the size of dimension @p dim as SCEV*. @@ -334,9 +538,17 @@ // information, in case the array is not newly created. const SCEV *getDimensionSize(unsigned Dim) const { assert(Dim < getNumberOfDimensions() && "Invalid dimension"); - return DimensionSizes[Dim]; + return Shape.sizes()[Dim]; } + const SCEV *getDimensionStride(unsigned Dim) const { + assert(Dim < getNumberOfDimensions() && "Invalid dimension"); + return Shape.strides()[Dim]; + } + + + const SCEV *getStrideOffset() const { return Shape.offset(); } + /// Return the size of dimension @p dim as isl::pw_aff. // // Scalars do not have array dimensions and the first dimension of @@ -423,6 +635,7 @@ /// @returns True, if the arrays are compatible, False otherwise. bool isCompatibleWith(const ScopArrayInfo *Array) const; + bool hasStrides() const { return Shape.hasStrides(); } private: void addDerivedSAI(ScopArrayInfo *DerivedSAI) { DerivedSAIs.insert(DerivedSAI); @@ -452,9 +665,6 @@ /// True if the newly allocated array is on heap. bool IsOnHeap = false; - /// The sizes of each dimension as SCEV*. - SmallVector DimensionSizes; - /// The sizes of each dimension as isl::pw_aff. SmallVector DimensionSizesPw; @@ -466,6 +676,9 @@ /// The data layout of the module. const DataLayout &DL; + /// The sizes of each dimension as SCEV*. + ShapeInfo Shape; + /// The scop this SAI object belongs to. Scop &S; @@ -588,6 +801,9 @@ Type *ElementType; /// Size of each dimension of the accessed array. + ShapeInfo Shape; + + /// Size of each dimension of the accessed array. SmallVector Sizes; // @} @@ -756,10 +972,11 @@ /// @param IsAffine Whether the subscripts are affine expressions. /// @param Kind The kind of memory accessed. /// @param Subscripts Subscript expressions - /// @param Sizes Dimension lengths of the accessed array. + /// @param Shape Shape of the accessed array. + MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, AccessType AccType, Value *BaseAddress, Type *ElemType, bool Affine, - ArrayRef Subscripts, ArrayRef Sizes, + ArrayRef Subscripts, ShapeInfo Shape, Value *AccessValue, MemoryKind Kind); /// Create a new MemoryAccess that corresponds to @p AccRel. @@ -2841,14 +3058,15 @@ const MapInsnToMemAcc &getInsnToMemAccMap() const { return DC.InsnToMemAcc; } /// Return the (possibly new) ScopArrayInfo object for @p Access. - /// + /// @param BasePtr The base pointer of the SAI to add /// @param ElementType The type of the elements stored in this array. + /// @param Shape The shape of the SAI. /// @param Kind The kind of the array info object. /// @param BaseName The optional name of this memory reference. ScopArrayInfo *getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, - ArrayRef Sizes, - MemoryKind Kind, - const char *BaseName = nullptr); + ShapeInfo Shape, + MemoryKind Kind, + const char *BaseName = nullptr); /// Create an array and return the corresponding ScopArrayInfo object. /// Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -17,7 +17,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include #include @@ -480,5 +481,12 @@ /// /// Such a statement must not be removed, even if has no side-effects. bool hasDebugCall(ScopStmt *Stmt); + +/// The name of the array index "intrinsic" interpreted by polly +static const std::string POLLY_ABSTRACT_INDEX_BASENAME = "polly_array_index"; + +/// Check if a given MemAccInst models a polly_array_index. +llvm::Optional> +getAbstractIndexingCall(MemAccInst Inst, llvm::ScalarEvolution &SE); } // namespace polly #endif Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -429,7 +429,7 @@ ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V))); addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - true, Subscripts, SizesSCEV, Val); + true, Subscripts, ShapeInfo::fromSizes(SizesSCEV), Val); return true; } @@ -480,7 +480,7 @@ scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - true, AccItr->second.DelinearizedSubscripts, Sizes, Val); + true, AccItr->second.DelinearizedSubscripts, ShapeInfo::fromSizes(Sizes), Val); return true; } @@ -522,10 +522,13 @@ auto *DestPtrSCEV = dyn_cast(SE.getPointerBase(DestAccFunc)); assert(DestPtrSCEV); + // TODO: code smell, why doe we initialize an empty shape? I don't recall the details + // anymore. DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV); addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(), IntegerType::getInt8Ty(DestPtrVal->getContext()), - LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr}, + LengthIsAffine, {DestAccFunc, LengthVal}, + ShapeInfo::fromSizes({nullptr}), Inst.getValueOperand()); auto *MemTrans = dyn_cast(MemIntr); @@ -547,13 +550,17 @@ SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV); addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(), IntegerType::getInt8Ty(SrcPtrVal->getContext()), - LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr}, + LengthIsAffine, {SrcAccFunc, LengthVal}, + ShapeInfo::fromSizes({nullptr}), Inst.getValueOperand()); return true; } bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) { + if (buildAccessPollyAbstractIndex(Inst, Stmt)) + return true; + auto *CI = dyn_cast_or_null(Inst); if (CI == nullptr) @@ -565,6 +572,7 @@ bool ReadOnly = false; auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0); auto *CalledFunction = CI->getCalledFunction(); + switch (AA.getModRefBehavior(CalledFunction)) { case FMRB_UnknownModRefBehavior: llvm_unreachable("Unknown mod ref behaviour cannot be represented."); @@ -591,9 +599,10 @@ if (ArgSCEV->isZero()) continue; + // TODO: again, why do we create a shape with a nullptr shape? auto *ArgBasePtr = cast(SE.getPointerBase(ArgSCEV)); addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(), - ArgBasePtr->getType(), false, {AF}, {nullptr}, CI); + ArgBasePtr->getType(), false, {AF}, ShapeInfo::fromSizes({nullptr}), CI); } return true; } @@ -643,7 +652,8 @@ AccType = MemoryAccess::MAY_WRITE; addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - IsAffine, {AccessFunction}, {nullptr}, Val); + IsAffine, {AccessFunction}, + ShapeInfo::fromSizes({nullptr}), Val); } void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) { @@ -662,6 +672,96 @@ buildAccessSingleDim(Inst, Stmt); } + +bool ScopBuilder::buildAccessPollyAbstractIndex(MemAccInst Inst, + ScopStmt *Stmt) { + auto optionalCallGEP = getAbstractIndexingCall(Inst, SE); + if (!optionalCallGEP) + return false; + + CallInst *Call; + GEPOperator *GEP; + std::tie(Call, GEP) = *optionalCallGEP; + + if (Call->getNumArgOperands() % 2 != 1) { + return false; + } + + const int NArrayDims = Call->getNumArgOperands() / 2; + + Value *BasePtr = GEP->getPointerOperand(); + + std::vector Subscripts; + std::vector Strides; + Loop *SurroundingLoop = Stmt->getSurroundingLoop(); + + InvariantLoadsSetTy AccessILS; + + + const SCEV *OffsetSCEV = [&]{ + Value *Offset = Call->getArgOperand(0); + assert(Offset); + const SCEV *OffsetSCEV = SE.getSCEV(Offset); + + if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, OffsetSCEV, SE, + &AccessILS)) { + return (const SCEV *)nullptr; + } + + // Offsets are always loaded from the array, they will get invariant + // load hoisted correctly later. + for (LoadInst *L : AccessILS) { + scop->addRequiredInvariantLoad(L); + } + AccessILS.clear(); + + return OffsetSCEV; + }(); + + if (!OffsetSCEV) return false; + + for (int i = 0; i < NArrayDims; i++) { + Value *Ix = Call->getArgOperand(1 + NArrayDims + i); + const SCEV *IxSCEV = SE.getSCEV(Ix); + ensureValueRead(Ix, Stmt); + Subscripts.push_back(IxSCEV); + + Value *Stride = Call->getArgOperand(1 + i); + const SCEV *StrideSCEV = SE.getSCEV(Stride); + + if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, StrideSCEV, SE, + &AccessILS)) { + return false; + } + + for (LoadInst *L : AccessILS) { + scop->addRequiredInvariantLoad(L); + } + AccessILS.clear(); + + Strides.push_back(StrideSCEV); + } + + Value *Val = Inst.getValueOperand(); + Type *ElementType = Val->getType(); + assert(BasePtr); + assert(ElementType); + + enum MemoryAccess::AccessType AccType = + isa(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; + scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); + + // NOTE: this should be fromStrides. + // NOTE: To be able to change this, we need to teach ScopArrayInfo to receive + // a Shape object. So, do that first. + addArrayAccess(Stmt, Inst, AccType, BasePtr, ElementType, /*IsAffine=*/true, Subscripts, + ShapeInfo::fromStrides(Strides, OffsetSCEV), Val); + + return true; +} + + + void ScopBuilder::buildAccessFunctions() { for (auto &Stmt : *scop) { if (Stmt.isBlockStmt()) { @@ -1011,7 +1111,7 @@ MemoryAccess *ScopBuilder::addMemoryAccess( ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue, - ArrayRef Subscripts, ArrayRef Sizes, + ArrayRef Subscripts, ShapeInfo Shape, MemoryKind Kind) { bool isKnownMustAccess = false; @@ -1039,7 +1139,7 @@ AccType = MemoryAccess::MAY_WRITE; auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType, - Affine, Subscripts, Sizes, AccessValue, Kind); + Affine, Subscripts, Shape, AccessValue, Kind); scop->addAccessFunction(Access); Stmt->addAccess(Access); @@ -1051,12 +1151,12 @@ Value *BaseAddress, Type *ElementType, bool IsAffine, ArrayRef Subscripts, - ArrayRef Sizes, + ShapeInfo Shape, Value *AccessValue) { ArrayBasePointers.insert(BaseAddress); auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine, AccessValue, - Subscripts, Sizes, MemoryKind::Array); + Subscripts, Shape, MemoryKind::Array); if (!DetectFortranArrays) return; @@ -1090,7 +1190,7 @@ addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(), true, Inst, ArrayRef(), - ArrayRef(), MemoryKind::Value); + ShapeInfo::fromSizes(ArrayRef()), MemoryKind::Value); } void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) { @@ -1128,7 +1228,7 @@ break; addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(), - true, V, ArrayRef(), ArrayRef(), + true, V, ArrayRef(), ShapeInfo::fromSizes(ArrayRef()), MemoryKind::Value); // Inter-statement uses need to write the value in their defining statement. @@ -1145,7 +1245,7 @@ // will create an exit PHI SAI object. It is needed during code generation // and would be created later anyway. if (IsExitBlock) - scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {}, + scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), ShapeInfo::fromSizes({}), MemoryKind::ExitPHI); // This is possible if PHI is in the SCoP's entry block. The incoming blocks @@ -1169,7 +1269,7 @@ MemoryAccess *Acc = addMemoryAccess( IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true, - PHI, ArrayRef(), ArrayRef(), + PHI, ArrayRef(), ShapeInfo::fromSizes(ArrayRef()), IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); assert(Acc); Acc->addIncoming(IncomingBlock, IncomingValue); @@ -1177,7 +1277,7 @@ void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) { addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true, - PHI, ArrayRef(), ArrayRef(), + PHI, ArrayRef(), ShapeInfo::fromSizes(ArrayRef()), MemoryKind::PHI); } @@ -1360,7 +1460,7 @@ Ty = MemoryKind::Array; auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(), - ElementType, Access->Sizes, Ty); + ElementType, Access->Shape, Ty); Access->buildAccessRelation(SAI); scop->addAccessData(Access); } @@ -1502,7 +1602,7 @@ Instruction *GlobalRead = GlobalReadPair.second; for (auto *BP : ArrayBasePointers) addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ, - BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead); + BP, BP->getType(), false, {AF}, ShapeInfo::fromSizes({nullptr}), GlobalRead); } scop->buildInvariantEquivalenceClasses(); Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -536,8 +536,46 @@ return false; } +/// Return if S is a call to a function that we use to denote multidimensional +// accesses +bool isSCEVCallToPollyAbstractIndex(const SCEV *S) { + if (isa(S)) { + Value *V = cast(S)->getValue(); + CallInst *Call = dyn_cast(V); + if (Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName().count( + POLLY_ABSTRACT_INDEX_BASENAME)) + return true; + } + return false; +} + +/// Return if scev represents a multidim access. +bool isSCEVMultidimArrayAccess(const SCEV *S) { + if (isSCEVCallToPollyAbstractIndex(S)) + return true; + const SCEVMulExpr *Mul = dyn_cast(S); + if (!Mul) + return false; + + // TODO: I don't remember why I needed this. + // When does a Mul not have two operands? Something like {*, i, *, j, *, k}? + if (Mul->getNumOperands() != 2) + return false; + + // TODO: I have no memory of why I needed this. + return isSCEVCallToPollyAbstractIndex(Mul->getOperand(0)) || + isSCEVCallToPollyAbstractIndex(Mul->getOperand(1)); +} + bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, DetectionContext &Context) const { + + if (isSCEVMultidimArrayAccess(S)) { + return true; + } + + InvariantLoadsSetTy AccessILS; if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) return false; @@ -698,6 +736,11 @@ Function *CalledFunction = CI.getCalledFunction(); + // Function being called is a polly indexing function. + if (CalledFunction->getName().count(POLLY_ABSTRACT_INDEX_BASENAME)) { + return true; + } + // Indirect calls are not supported. if (CalledFunction == nullptr) return false; @@ -1192,6 +1235,13 @@ bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, DetectionContext &Context) const { + + /// If the memory access is modelling a call of + /// polly_abstract_array_index(...) + if (getAbstractIndexingCall(Inst, SE)) { + return true; + } + Value *Ptr = Inst.getPointerOperand(); Loop *L = LI.getLoopFor(Inst->getParent()); const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -248,6 +248,10 @@ //===----------------------------------------------------------------------===// +raw_ostream &polly::operator<<(raw_ostream &OS, const ShapeInfo &Shape) { + return Shape.print(OS); +} + // Create a sequence of two schedules. Either argument may be null and is // interpreted as the empty schedule. Can also return null if both schedules are // empty. @@ -318,10 +322,11 @@ } ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx, - ArrayRef Sizes, MemoryKind Kind, + ShapeInfo Shape, MemoryKind Kind, const DataLayout &DL, Scop *S, const char *BaseName) - : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) { + : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), + Shape(ShapeInfo::none()), S(*S) { std::string BasePtrName = BaseName ? BaseName : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(), @@ -329,7 +334,12 @@ UseInstructionNames); Id = isl::id::alloc(Ctx, BasePtrName, this); - updateSizes(Sizes); + // TODO: why do we need updateSizes() ? Why don't we set this up in the + // ctor? + if (Shape.hasSizes()) + updateSizes(Shape.sizes()); + else + updateStrides(Shape.strides(), Shape.offset()); if (!BasePtr || Kind != MemoryKind::Array) { BasePtrOriginSAI = nullptr; @@ -415,27 +425,61 @@ DimensionSizesPw[0] = PwAff; } +void ScopArrayInfo::overwriteSizeWithStrides(ArrayRef Strides, + const SCEV *Offset) { + + // HACK: first set our shape to a stride based shape so that we don't + // assert within updateStrides. Move this into a bool parameter of + // updateStrides + Shape = ShapeInfo::fromStrides(Strides, Offset); + updateStrides(Strides, Offset); +} +bool ScopArrayInfo::updateStrides(ArrayRef Strides, + const SCEV *Offset) { + Shape.setStrides(Strides, Offset); + DimensionSizesPw.clear(); + for (size_t i = 0; i < Shape.getNumberOfDimensions(); i++) { + isl::space Space(S.getIslCtx(), 1, 0); + + std::string param_name = getIslCompatibleName( + "stride_" + std::to_string(i) + "__", getName(), ""); + isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this); + + Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff); + isl::pw_aff PwAff = + isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0); + + DimensionSizesPw.push_back(PwAff); + } + return true; +} + bool ScopArrayInfo::updateSizes(ArrayRef NewSizes, bool CheckConsistency) { - int SharedDims = std::min(NewSizes.size(), DimensionSizes.size()); - int ExtraDimsNew = NewSizes.size() - SharedDims; - int ExtraDimsOld = DimensionSizes.size() - SharedDims; - - if (CheckConsistency) { - for (int i = 0; i < SharedDims; i++) { - auto *NewSize = NewSizes[i + ExtraDimsNew]; - auto *KnownSize = DimensionSizes[i + ExtraDimsOld]; - if (NewSize && KnownSize && NewSize != KnownSize) - return false; - } + if (Shape.isInitialized()) { + const SmallVector &DimensionSizes = Shape.sizes(); + int SharedDims = std::min(NewSizes.size(), DimensionSizes.size()); + int ExtraDimsNew = NewSizes.size() - SharedDims; + int ExtraDimsOld = DimensionSizes.size() - SharedDims; + + if (CheckConsistency) { + for (int i = 0; i < SharedDims; i++) { + auto *NewSize = NewSizes[i + ExtraDimsNew]; + auto *KnownSize = DimensionSizes[i + ExtraDimsOld]; + if (NewSize && KnownSize && NewSize != KnownSize) + return false; + } - if (DimensionSizes.size() >= NewSizes.size()) - return true; + if (DimensionSizes.size() >= NewSizes.size()) + return true; + } } - + SmallVector DimensionSizes; DimensionSizes.clear(); DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(), NewSizes.end()); + Shape.setSizes(DimensionSizes); + DimensionSizesPw.clear(); for (const SCEV *Expr : DimensionSizes) { if (!Expr) { @@ -1012,11 +1056,11 @@ AccessType AccType, Value *BaseAddress, Type *ElementType, bool Affine, ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue, + ShapeInfo Shape, Value *AccessValue, MemoryKind Kind) : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr), BaseAddr(BaseAddress), ElementType(ElementType), - Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst), + Shape(Shape), AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine), Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), NewAccessRelation(nullptr), FAD(nullptr) { @@ -1029,8 +1073,8 @@ MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel) : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt), - InvalidDomain(nullptr), AccessRelation(nullptr), - NewAccessRelation(AccRel), FAD(nullptr) { + InvalidDomain(nullptr), Shape(ShapeInfo::fromSizes({nullptr})), + AccessRelation(nullptr), NewAccessRelation(AccRel), FAD(nullptr) { isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out); auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId); Sizes.push_back(nullptr); @@ -1840,10 +1884,11 @@ if (Access) return Access; + // TODO: again, why do we have an _empty_ list here for size? ScopArrayInfo *SAI = - Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value); + Parent.getOrCreateScopArrayInfo(V, V->getType(), ShapeInfo::fromSizes({}), MemoryKind::Value); Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(), - true, {}, {}, V, MemoryKind::Value); + true, {}, ShapeInfo::fromSizes({}), V, MemoryKind::Value); Parent.addAccessFunction(Access); Access->buildAccessRelation(SAI); addAccess(Access); @@ -3964,27 +4009,93 @@ } } +Value *getPointerFromLoadOrStore(Value *V) { + if (LoadInst *LI = dyn_cast(V)) + return LI->getPointerOperand(); + + if (StoreInst *SI = dyn_cast(V)) + return SI->getPointerOperand(); + return nullptr; +} + + ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, - ArrayRef Sizes, + ShapeInfo Shape, MemoryKind Kind, const char *BaseName) { assert((BasePtr || BaseName) && "BasePtr and BaseName can not be nullptr at the same time."); assert(!(BasePtr && BaseName) && "BaseName is redundant."); + + // We assume that arrays with the strided representation can unify. + // Yes this is nuts. Yes I stil want to do this. + auto unifyStridedArrayBasePtrs = [&]() -> Value * { + if (Shape.hasSizes()) return BasePtr; + + const Value *CurBase = getPointerFromLoadOrStore(BasePtr); + if (!CurBase) + return BasePtr; + + for (ScopArrayInfo *SAI : arrays()) { + Value *SAIBase = getPointerFromLoadOrStore(SAI->getBasePtr()); + if (!SAIBase) + continue; + + if (SAIBase == CurBase && SAI->hasStrides()) + return SAI->getBasePtr(); + } + return BasePtr; + }; + + BasePtr = unifyStridedArrayBasePtrs(); + auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)] : ScopArrayNameMap[BaseName]; + + + // errs() << "Creating: " << (int)Kind << "\n"; + // BasePtr->dump(); + if (!SAI) { auto &DL = getFunction().getParent()->getDataLayout(); - SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind, + SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Shape, Kind, DL, this, BaseName)); ScopArrayInfoSet.insert(SAI.get()); } else { SAI->updateElementType(ElementType); // In case of mismatching array sizes, we bail out by setting the run-time // context to false. - if (!SAI->updateSizes(Sizes)) - invalidate(DELINEARIZATION, DebugLoc()); + if (SAI->hasStrides() != Shape.hasStrides()) { + LLVM_DEBUG(dbgs() << "SAI and new shape do not agree:\n"); + LLVM_DEBUG(dbgs() << "SAI: "; SAI->print(dbgs(), true); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Shape: " << Shape << "\n"); + + if (Shape.hasStrides()) { + LLVM_DEBUG(dbgs() << "Shape has strides, SAI had size. Overwriting size " + "with strides"); + SAI->overwriteSizeWithStrides(Shape.strides(), Shape.offset()); + } else { + + errs() << __PRETTY_FUNCTION__ << "\n" << "SAI has strides, Shape is size based. This should not " + "happen. Ignoring new data for now."; + errs() << " SAI:\n"; + SAI->print(errs(), false); + errs() << "Shape:\n"; + errs() << Shape << "\n"; + errs() << "---\n"; + // report_fatal_error("SAI was given sizes when it had strides"); + return SAI.get(); + } + } + + if (SAI->hasStrides()) { + SAI->updateStrides(Shape.strides(), Shape.offset()); + } else { + if (!SAI->updateSizes(Shape.sizes())) + invalidate(DELINEARIZATION, DebugLoc()); + } } + return SAI.get(); } @@ -4000,7 +4111,7 @@ else SCEVSizes.push_back(nullptr); - auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes, + auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, ShapeInfo::fromSizes(SCEVSizes), MemoryKind::Array, BaseName.c_str()); return SAI; } Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -40,11 +40,12 @@ cl::Hidden, cl::init(OT_REQUEST), cl::ZeroOrMore, cl::cat(PollyCategory)); IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder, - IDToValueTy &IDToValue, ValueMapT &GlobalMap, + IDToValueTy &IDToValue, SCEVToValueTy &SCEVToValue, + ValueMapT &GlobalMap, const DataLayout &DL, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, BasicBlock *StartBlock) - : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), + : S(S), Builder(Builder), IDToValue(IDToValue), SCEVToValue(SCEVToValue), GlobalMap(GlobalMap), DL(DL), SE(SE), DT(DT), LI(LI), StartBlock(StartBlock) { OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr; } @@ -288,50 +289,166 @@ } IndexOp = nullptr; - for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { - Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); - assert(NextIndex->getType()->isIntegerTy() && - "Access index should be an integer"); + + // TODO: Oh right, I had forgotten about _this_ amazing hack. + // In a nutshell, when we are generating code on the kernel side, we choose to lookup + // SCEVtoValueMap. + // What we should actually do is to _always_ query SCEVToValueMap, but only populate it + // when we are performing codegen on the kernel side. + // + // Does SCEVExpander automatically take care of this? IIRC, I had tried SCEVExpander but it + // failed for some reason. Maybe we should try to replace the SCEVToValueMap with SCEVExpander. + const std::string Name = Builder.GetInsertBlock()->getModule()->getName(); + + // TODO: refactor this, remove the nesting. It is confusing to tell WTF is happening + // without pen-and-paper. The old code was already not super straightforward, and this just + // makes it worse. + if (SAI->hasStrides()) { + for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { + Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); + assert(NextIndex->getType()->isIntegerTy() && + "Access index should be an integer"); + + Type *Ty = [&]() { + if (IndexOp) + return getWidestType(NextIndex->getType(), IndexOp->getType()); + else + return NextIndex->getType(); + }(); - if (PollyDebugPrinting) - RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); + if (Ty != NextIndex->getType()) + NextIndex = Builder.CreateIntCast(NextIndex, Ty, true); - if (!IndexOp) { - IndexOp = NextIndex; - } else { - Type *Ty = getWidestType(NextIndex->getType(), IndexOp->getType()); + const SCEV *DimSCEV = SAI->getDimensionStride(u - 1); + assert(DimSCEV); + + Value *DimSize = nullptr; + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + + // HACK: we do this because we know the kernel name. + if (Name.find("FUNC__") != std::string::npos) { + auto OldValue = SCEVToValue.find(DimSCEV); + if (OldValue == SCEVToValue.end()) + assert(false && "!OldValue"); + + auto NewDimSizeIt = Map.find(OldValue->second); + if (NewDimSizeIt == Map.end()) + assert(false && "!NewDimSizeIt"); + DimSize = NewDimSizeIt->second; + } else { + + DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); + DimSize = expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + &*Builder.GetInsertPoint(), nullptr, + StartBlock->getSinglePredecessor()); + DimSize = getLatestValue(DimSize); + } + assert(DimSize && "dimsize uninitialized"); if (Ty != NextIndex->getType()) - NextIndex = Builder.CreateIntCast(NextIndex, Ty, true); - if (Ty != IndexOp->getType()) - IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); + NextIndex = Builder.CreateSExtOrTrunc(NextIndex, Ty, + "polly.access.idxval." + BaseName + std::to_string(u - 1)); + if (Ty != DimSize->getType()) + DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, + "polly.access.stride." + BaseName + std::to_string(u - 1)); + + NextIndex = createMul(NextIndex, DimSize, "polly.access.idxval_x_stride." + BaseName + std::to_string(u - 1)); + + if (PollyDebugPrinting) + RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); + if (!IndexOp) { + IndexOp = NextIndex; + } else { + if (Ty != IndexOp->getType()) + IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); + IndexOp = createAdd(IndexOp, NextIndex, "polly.access.idx_accum." + BaseName); + } // end else + } // end for loop over stride dims + assert(IndexOp && "expected correct index op"); + Value *Offset = nullptr; + // If we are in the kernel, then the base pointer has already been + // offset correctly so we need not do anything about it. + if (Name.find("FUNC__") != std::string::npos) { + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + const SCEV *OffsetSCEV = SAI->getStrideOffset(); + auto OldValue = SCEVToValue.find(OffsetSCEV); + if (OldValue == SCEVToValue.end()) + assert(false && "!OldValue offset"); + + auto NewIt = Map.find(OldValue->second); + if (NewIt == Map.end()) { + Offset = OldValue->second; + // errs() << "OldValue: " << *OldValue->second << "\n"; + // assert(false && "!NewIt offset"); + } + else { + Offset = NewIt->second; + } + assert(Offset); + if (auto OffsetInst = dyn_cast(Offset)) { + assert(OffsetInst->getModule() == Builder.GetInsertBlock()->getModule()); + } + } else { + const SCEV *OffsetSCEV = SAI->getStrideOffset(); + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); - IndexOp = createAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); + // If we are outside a kernel, then we do need to synthesize an offset. + OffsetSCEV = SCEVParameterRewriter::rewrite(OffsetSCEV, SE, Map); + Offset = expandCodeFor(S, SE, DL, "polly", OffsetSCEV, + OffsetSCEV->getType(), &*Builder.GetInsertPoint(), + nullptr, StartBlock->getSinglePredecessor()); + Offset = getLatestValue(Offset); } + assert(Offset && "dimsize uninitialized"); + Offset = Builder.CreateIntCast(Offset, IndexOp->getType(), true); + IndexOp = createAdd(IndexOp, Offset, "polly.access.offseted." + BaseName); + } // end hasStride + else { + for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { + Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); + assert(NextIndex->getType()->isIntegerTy() && + "Access index should be an integer"); + + if (PollyDebugPrinting) + RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); + + if (!IndexOp) { + IndexOp = NextIndex; + } else { + Type *Ty = getWidestType(NextIndex->getType(), IndexOp->getType()); + + if (Ty != NextIndex->getType()) + NextIndex = Builder.CreateIntCast(NextIndex, Ty, true); + if (Ty != IndexOp->getType()) + IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); + + IndexOp = createAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); + } - // For every but the last dimension multiply the size, for the last - // dimension we can exit the loop. - if (u + 1 >= e) - break; + // For every but the last dimension multiply the size, for the last + // dimension we can exit the loop. + if (u + 1 >= e) + break; - const SCEV *DimSCEV = SAI->getDimensionSize(u); + const SCEV *DimSCEV = SAI->getDimensionSize(u); - llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); - DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); - Value *DimSize = - expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), - &*Builder.GetInsertPoint(), nullptr, - StartBlock->getSinglePredecessor()); - - Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); - - if (Ty != IndexOp->getType()) - IndexOp = Builder.CreateSExtOrTrunc(IndexOp, Ty, - "polly.access.sext." + BaseName); - if (Ty != DimSize->getType()) - DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, - "polly.access.sext." + BaseName); - IndexOp = createMul(IndexOp, DimSize, "polly.access.mul." + BaseName); + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); + Value *DimSize = + expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + &*Builder.GetInsertPoint(), nullptr, + StartBlock->getSinglePredecessor()); + + Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); + + if (Ty != IndexOp->getType()) + IndexOp = Builder.CreateSExtOrTrunc(IndexOp, Ty, + "polly.access.sext." + BaseName); + if (Ty != DimSize->getType()) + DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, + "polly.access.sext." + BaseName); + IndexOp = createMul(IndexOp, DimSize, "polly.access.mul." + BaseName); + } } Access = Builder.CreateGEP(Base, IndexOp, "polly.access." + BaseName); @@ -784,3 +901,10 @@ llvm_unreachable("Unexpected enum value"); } + + +llvm::Value *IslExprBuilder::getLatestValue(llvm::Value *Old) { + if (GlobalMap.find(Old) != GlobalMap.end()) + return GlobalMap.find(Old)->second; + return Old; +} Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -392,6 +392,7 @@ } } + Value *IslNodeBuilder::getLatestValue(Value *Original) const { auto It = ValueMap.find(Original); if (It == ValueMap.end()) @@ -1207,6 +1208,45 @@ } return true; } +void IslNodeBuilder::materializeStridedArraySizes() { + for (ScopArrayInfo *Array : S.arrays()) { + if (!Array->hasStrides()) + continue; + + + for (unsigned i = 0; i < Array->getNumberOfDimensions(); i++) { + isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(i).release(); + assert(ParametricPwAff && "parametric pw_aff corresponding " + "to outermost dimension does not " + "exist"); + + isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0); + isl_pw_aff_free(ParametricPwAff); + + assert(Id && "pw_aff is not parametric"); + + LLVM_DEBUG( + dbgs() << "-\n"; + dbgs() << "i: " << i << "\n"; + dbgs() << "ID: " <getDimensionStride(i); + // dbgs() << "StrideSCEV: " << *StrideSCEV << "\n"; + + Value *Stride = generateSCEV(StrideSCEV); + assert(Stride); + + // errs() << "StrideVal: " << *Stride << "\n"; + IDToValue[Id] = Stride; + SCEVToValue[Array->getDimensionStride(i)] = Stride; + isl_id_free(Id); + } + + const SCEV *OffsetSCEV = Array->getStrideOffset(); + Value *Offset = generateSCEV(OffsetSCEV); + assert(Offset); + SCEVToValue[OffsetSCEV] = Offset; + } +} Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, isl_ast_build *Build, @@ -1544,6 +1584,7 @@ // parameter materializeFortranArrayOutermostDimension(); + // Generate values for the current loop iteration for all surrounding loops. // // We may also reference loops outside of the scop which do not contain the @@ -1560,6 +1601,9 @@ L = L->getParentLoop(); } + // Materialize the stride information for each array + materializeStridedArraySizes(); + isl_set_free(Context); } Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -1467,6 +1467,9 @@ SetVector SubtreeValues; SetVector SCEVs; SetVector Loops; + + + isl::space ParamSpace = isl::space(S.getIslCtx(), 0, 0).params(); SubtreeReferences References = { LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator(), @@ -1480,6 +1483,9 @@ for (const auto &I : OutsideLoopIterations) SubtreeValues.insert(cast(I.second)->getValue()); + for (const auto &I : SCEVToValue) + SubtreeValues.insert(I.second); + isl_ast_node_foreach_descendant_top_down( Kernel->tree, collectReferencesInGPUStmt, &References); @@ -2012,18 +2018,43 @@ const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(isl::manage_copy(Id)); Type *EleTy = SAI->getElementType(); Value *Val = &*Arg; - SmallVector Sizes; isl_ast_build *Build = isl_ast_build_from_context(isl_set_copy(Prog->context)); - Sizes.push_back(nullptr); - for (long j = 1, n = Kernel->array[i].array->n_index; j < n; j++) { - isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( - Build, isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); - auto V = ExprBuilder.create(DimSize); - Sizes.push_back(SE.getSCEV(V)); - } + + + + SmallVector Sizes; + + // TODO: take "correct" capture (const & or whatever of SAI) + // TODO: this is so fugly, find a better way to express this :( + ShapeInfo NewShape = [&] (SmallVector &Sizes) { + if (SAI->hasStrides()) { + // TODO: find some way to merge the code? Here, we know the outermose + // dimension index so we can start from 0. In the sizes code, we need + // to start from 1 to indicate that we do not know the shape of the outermost dim. + for (long j = 0, n = Kernel->array[i].array->n_index; j < n; j++) { + isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( + Build, + isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); + auto V = ExprBuilder.create(DimSize); + Sizes.push_back(SE.getSCEV(V)); + } + return SAI->getShape(); + } else { + Sizes.push_back(nullptr); + for (long j = 1, n = Kernel->array[i].array->n_index; j < n; j++) { + isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( + Build, + isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); + auto V = ExprBuilder.create(DimSize); + Sizes.push_back(SE.getSCEV(V)); + } + return ShapeInfo::fromSizes(Sizes); + } + }(Sizes); + const ScopArrayInfo *SAIRep = - S.getOrCreateScopArrayInfo(Val, EleTy, Sizes, MemoryKind::Array); + S.getOrCreateScopArrayInfo(Val, EleTy, NewShape, MemoryKind::Array); LocalArrays.push_back(Val); isl_ast_build_free(Build); @@ -2242,6 +2273,7 @@ isl_val_free(Val); ArrayTy = ArrayType::get(ArrayTy, Bound); } + const ShapeInfo NewShape = ShapeInfo::fromSizes(Sizes); const ScopArrayInfo *SAI; Value *Allocation; @@ -2259,7 +2291,7 @@ llvm_unreachable("unknown variable type"); } SAI = - S.getOrCreateScopArrayInfo(Allocation, EleTy, Sizes, MemoryKind::Array); + S.getOrCreateScopArrayInfo(Allocation, EleTy,NewShape, MemoryKind::Array); Id = isl_id_alloc(S.getIslCtx().get(), Var.name, nullptr); IDToValue[Id] = Allocation; LocalArrays.push_back(Allocation); Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -681,3 +681,61 @@ return false; } + +llvm::Optional> +polly::getAbstractIndexingCall(MemAccInst Inst, ScalarEvolution &SE) { + // TODO: I can get rid of all this crap and use SCEV to drill through the bitcasts. + // Alas, this was written when I was more of an LLVM noob than I currently am :) + // TODO: clean this up please. + + + // Case 1. (Total size of array not known) + // %2 = tail call i64 @_gfortran_polly_array_index_2(i64 1, i64 %1, i64 + // %indvars.iv1, i64 %indvars.iv) #1 + // %3 = getelementptr float, float* %0, i64 + // %bitcast = bitcast %3 to + // %2 store float 2.000000e+00, float* %3, align 4 STORE (GEP ) + // (CALL index_2(, ))) + + // Case 2. (Total size of array statically known) + // %4 = tail call i64 @_gfortran_polly_array_index_2(i64 1, i64 5, i64 + // %indvars.iv1, i64 %indvars.iv) #1 %5 = getelementptr [25 x float], [25 x + // float]* @__m_MOD_g_arr_const_5_5, i64 0, i64 %4 store float 4.200000e+01, + // float* %5, align 4 + + Value *MaybeBitcast = Inst.getPointerOperand(); + if (!MaybeBitcast) + return Optional>(None); + + + // If we have a bitcast as the parameter to the instruction, strip off the + // bitcast. Otherwise, return the original instruction operand. + Value *MaybeGEP = [&]() -> Value * { + BitCastOperator *Bitcast = dyn_cast(MaybeBitcast); + if (Bitcast) { + return Bitcast->getOperand(0); + } + return Inst.getPointerOperand(); + }(); + + + GEPOperator *GEP = dyn_cast(MaybeGEP); + + if (!GEP) + return Optional>(None); + + + auto *MaybeCall = GEP->getOperand(GEP->getNumOperands() - 1); + assert(MaybeCall); + + CallInst *Call = dyn_cast(MaybeCall); + if (!Call) + return Optional>(None); + + if (!Call->getCalledFunction()->getName().count( + POLLY_ABSTRACT_INDEX_BASENAME)) + return Optional>(None); + + std::pair p = std::make_pair(Call, GEP); + return Optional>(p); +} \ No newline at end of file Index: lib/Transform/ForwardOpTree.cpp =================================================================== --- lib/Transform/ForwardOpTree.cpp +++ lib/Transform/ForwardOpTree.cpp @@ -362,7 +362,7 @@ MemoryAccess *Access = new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), - LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); + LI->getType(), true, {}, ShapeInfo::fromSizes(Sizes), LI, MemoryKind::Array); S->addAccessFunction(Access); Stmt->addAccess(Access, true);