Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -92,6 +92,8 @@ public: /// A map from isl_ids to llvm::Values. typedef llvm::MapVector> IDToValueTy; + typedef llvm::MapVector> + SCEVToValueTy; typedef llvm::MapVector IDToScopArrayInfoTy; @@ -130,9 +132,10 @@ /// @param LI LoopInfo analysis for the current function. /// @param StartBlock The first basic block after the RTC. IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue, - ValueMapT &GlobalMap, const llvm::DataLayout &DL, - llvm::ScalarEvolution &SE, llvm::DominatorTree &DT, - llvm::LoopInfo &LI, llvm::BasicBlock *StartBlock); + SCEVToValueTy &SCEVToValue, ValueMapT &GlobalMap, + const llvm::DataLayout &DL, llvm::ScalarEvolution &SE, + llvm::DominatorTree &DT, llvm::LoopInfo &LI, + llvm::BasicBlock *StartBlock); /// Create LLVM-IR for an isl_ast_expr[ession]. /// @@ -202,6 +205,7 @@ PollyIRBuilder &Builder; IDToValueTy &IDToValue; + SCEVToValueTy &SCEVToValue; ValueMapT &GlobalMap; const llvm::DataLayout &DL; Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -100,8 +100,8 @@ const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, Scop &S, BasicBlock *StartBlock) : S(S), Builder(Builder), Annotator(Annotator), - ExprBuilder(S, Builder, IDToValue, ValueMap, DL, SE, DT, LI, - StartBlock), + ExprBuilder(S, Builder, IDToValue, SCEVToValue, ValueMap, DL, SE, DT, + LI, StartBlock), BlockGen(Builder, LI, SE, DT, ScalarMap, EscapeMap, ValueMap, &ExprBuilder, StartBlock), RegionGen(BlockGen), DL(DL), LI(LI), SE(SE), DT(DT), @@ -118,6 +118,10 @@ /// materialized. True otherwise. bool materializeFortranArrayOutermostDimension(); + Value *extractStrideFromFAD(GlobalValue *FAD, int dimension); + Value *extractOffsetFromFAD(GlobalValue *FAD); + void materializeStridedArraySizes(); + /// Generate code that evaluates @p Condition at run-time. /// /// This function is typically called to generate the LLVM-IR for the @@ -198,6 +202,10 @@ // ivs. IslExprBuilder::IDToValueTy IDToValue; + // This maps a const SCEV* to the Value* it has in the generated program. For + // now, this stores strides and offsets of SAIs. + IslExprBuilder::SCEVToValueTy SCEVToValue; + /// A collection of all parallel subfunctions that have been created. SmallVector ParallelSubfunctions; Index: include/polly/ScopBuilder.h =================================================================== --- include/polly/ScopBuilder.h +++ include/polly/ScopBuilder.h @@ -146,6 +146,13 @@ void buildScop(Region &R, AssumptionCache &AC, OptimizationRemarkEmitter &ORE); + /// Try to build an index expression from a load of an abstract location + /// Load/Store (GEP ) + /// + /// @param Inst The Load/Store instruction that access the memory + /// @param Stmt The parent statement of the instruction + bool buildAccessPollyAbstractMatrix(MemAccInst Inst, ScopStmt *Stmt); + /// Try to build a multi-dimensional fixed sized MemoryAccess from the /// Load/Store instruction. /// @@ -269,7 +276,7 @@ Value *BaseAddress, Type *ElemType, bool Affine, Value *AccessValue, ArrayRef Subscripts, - ArrayRef Sizes, MemoryKind Kind); + ShapeInfo Shape, MemoryKind Kind); /// Create a MemoryAccess that represents either a LoadInst or /// StoreInst. @@ -288,8 +295,8 @@ void addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElemType, bool IsAffine, - ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue); + ArrayRef Subscripts, ShapeInfo Shape, + Value *AccessValue); /// Create a MemoryAccess for writing an llvm::Instruction. /// Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -25,6 +25,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -111,6 +112,189 @@ DELINEARIZATION, }; +// Abstract over a notion of the shape of an array: +// Once can compute indeces using both sizes and strides. +class ShapeInfo { +private: + using SCEVArrayTy = SmallVector; + using SCEVArrayRefTy = ArrayRef; + + using OptionalSCEVArrayTy = Optional; + using OptionalSCEVArrayRefTy = Optional; + + llvm::Optional> Sizes; + llvm::Optional> Strides; + llvm::Optional Offset; + + llvm::OptionalHackFAD; + + ShapeInfo(Optional> SizesRef, + Optional> StridesRef, + llvm::Optional Offset, + llvm::OptionalHackFAD) + : Offset(Offset), HackFAD(HackFAD) { + // Can check for XOR + assert(bool(SizesRef) || bool(StridesRef)); + assert(!(bool(SizesRef) && bool(StridesRef))); + + if (StridesRef || Offset) { + assert(Offset); + assert(StridesRef); + } + + if (SizesRef) + Sizes = + OptionalSCEVArrayTy(SCEVArrayTy(SizesRef->begin(), SizesRef->end())); + + if (StridesRef) + Strides = OptionalSCEVArrayTy( + SCEVArrayTy(StridesRef->begin(), StridesRef->end())); + } + + ShapeInfo(NoneType) : Sizes(None), Strides(None), Offset(None), HackFAD(None) {} + +public: + static ShapeInfo fromSizes(ArrayRef Sizes) { + return ShapeInfo(OptionalSCEVArrayRefTy(Sizes), None, None, None); + } + + ShapeInfo(const ShapeInfo &other) { + Sizes = other.Sizes; + Strides = other.Strides; + Offset = other.Offset; + HackFAD = other.HackFAD; + } + + ShapeInfo &operator=(const ShapeInfo &other) { + Sizes = other.Sizes; + Strides = other.Strides; + Offset = other.Offset; + HackFAD = other.HackFAD; + return *this; + } + + static ShapeInfo fromStrides(ArrayRef Strides, + const SCEV *Offset, + GlobalValue *FAD) { + assert(Offset && "offset is null"); + return ShapeInfo(None, OptionalSCEVArrayRefTy(Strides), + Optional(Offset), + Optional(FAD)); + } + + static ShapeInfo none() { return ShapeInfo(None); } + + unsigned getNumberOfDimensions() const { + // assert(isInitialized()); + if (Sizes) + return Sizes->size(); + + if (Strides) + return Strides->size(); + + return 0; + } + + /// Set the sizes of the Shape. It checks the invariant + /// That this shape does not have strides. + void setSizes(SmallVector NewSizes) { + assert(!bool(Strides)); + + if (!bool(Sizes)) { + Sizes = Optional>( + SmallVector()); + } + + Sizes = NewSizes; + } + + /// Set the strides of the Shape. It checks the invariant + /// That this shape does not have sizes. + void setStrides(ArrayRef NewStrides, const SCEV *NewOffset, GlobalValue *NewHackFAD) { + Offset = NewOffset; + assert(!bool(Sizes)); + + // Be explicit because GCC(5.3.0) is unable to deduce this. + if (!Strides) + Strides = Optional>( + SmallVector()); + + Strides->clear(); + Strides->insert(Strides->begin(), NewStrides.begin(), NewStrides.end()); + + HackFAD = NewHackFAD; + + assert(Offset && "offset is null"); + } + + const SmallVector &sizes() const { + assert(!bool(Strides)); + return Sizes.getValue(); + } + + const SCEV *offset() const { return Offset.getValue(); } + + + GlobalValue *hackFAD() const { return HackFAD.getValue(); } + + SmallVector &sizes_mut() { + assert(!bool(Strides)); + return Sizes.getValue(); + } + + bool isInitialized() const { return bool(Sizes) || bool(Strides); } + + const SmallVector &strides() const { + assert(!bool(Sizes)); + return Strides.getValue(); + } + + bool hasSizes() const { return bool(Sizes); } + bool hasStrides() const { return bool(Strides); } + + template + Ret mapSizes(std::function &)> func, + Ret otherwise) { + if (Sizes) + return func(*Sizes); + + return otherwise; + } + + void mapSizes(std::function &)> func) { + if (Sizes) + func(*Sizes); + } + + raw_ostream &print(raw_ostream &OS) const { + if (Sizes) { + OS << "Sizes: "; + for (auto Size : *Sizes) { + if (Size) + OS << *Size << ", "; + else + OS << "null" + << ", "; + } + return OS; + } else if (Strides) { + OS << "Strides: "; + for (auto Stride : *Strides) { + if (Stride) + OS << *Stride << ", "; + else + OS << "null" + << ", "; + } + return OS; + } + OS << "Uninitialized.\n"; + return OS; + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const ShapeInfo &Shape); + /// Enum to distinguish between assumptions and restrictions. enum AssumptionSign { AS_ASSUMPTION, AS_RESTRICTION }; @@ -265,8 +449,8 @@ /// @param S The scop this array object belongs to. /// @param BaseName The optional name of this memory reference. ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx IslCtx, - ArrayRef DimensionSizes, MemoryKind Kind, - const DataLayout &DL, Scop *S, const char *BaseName = nullptr); + ShapeInfo Shape, MemoryKind Kind, const DataLayout &DL, Scop *S, + const char *BaseName = nullptr); /// Destructor to free the isl id of the base pointer. ~ScopArrayInfo(); @@ -295,6 +479,13 @@ /// with old sizes bool updateSizes(ArrayRef Sizes, bool CheckConsistency = true); + /// Update the strides of a ScopArrayInfo object. + void overwriteSizeWithStrides(ArrayRef Strides, + const SCEV *Offset, GlobalValue *HackFAD); + + /// Update the strides of a ScopArrayInfo object. + bool updateStrides(ArrayRef Strides, const SCEV *Offset, GlobalValue *HackFAD); + /// Make the ScopArrayInfo model a Fortran array. /// It receives the Fortran array descriptor and stores this. /// It also adds a piecewise expression for the outermost dimension @@ -327,9 +518,11 @@ if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI || Kind == MemoryKind::Value) return 0; - return DimensionSizes.size(); + return Shape.getNumberOfDimensions(); } + ShapeInfo getShape() const { return Shape; } + /// Return the size of dimension @p dim as SCEV*. // // Scalars do not have array dimensions and the first dimension of @@ -337,9 +530,16 @@ // information, in case the array is not newly created. const SCEV *getDimensionSize(unsigned Dim) const { assert(Dim < getNumberOfDimensions() && "Invalid dimension"); - return DimensionSizes[Dim]; + return Shape.sizes()[Dim]; } + const SCEV *getDimensionStride(unsigned Dim) const { + assert(Dim < getNumberOfDimensions() && "Invalid dimension"); + return Shape.strides()[Dim]; + } + + const SCEV *getStrideOffset() const { return Shape.offset(); } + /// Return the size of dimension @p dim as isl::pw_aff. // // Scalars do not have array dimensions and the first dimension of @@ -350,6 +550,18 @@ return DimensionSizesPw[Dim]; } + isl::id getDimensionSizeId(unsigned Dim) const { + isl_pw_aff *ParametricPwAff = getDimensionSizePw(Dim).release(); + assert(ParametricPwAff && "parametric pw_aff corresponding " + "to dimension does not " + "exist"); + + isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0); + isl_pw_aff_free(ParametricPwAff); + assert(Id && "pw_aff is not parametric"); + return isl::manage(Id); + }; + /// Get the canonical element type of this array. /// /// @returns The canonical element type of this array. @@ -426,6 +638,8 @@ /// @returns True, if the arrays are compatible, False otherwise. bool isCompatibleWith(const ScopArrayInfo *Array) const; + bool hasStrides() const { return Shape.hasStrides(); } + private: void addDerivedSAI(ScopArrayInfo *DerivedSAI) { DerivedSAIs.insert(DerivedSAI); @@ -455,9 +669,6 @@ /// True if the newly allocated array is on heap. bool IsOnHeap = false; - /// The sizes of each dimension as SCEV*. - SmallVector DimensionSizes; - /// The sizes of each dimension as isl::pw_aff. SmallVector DimensionSizesPw; @@ -469,6 +680,9 @@ /// The data layout of the module. const DataLayout &DL; + /// The sizes of each dimension as SCEV*. + ShapeInfo Shape; + /// The scop this SAI object belongs to. Scop &S; @@ -591,7 +805,7 @@ Type *ElementType; /// Size of each dimension of the accessed array. - SmallVector Sizes; + ShapeInfo Shape; // @} // Properties describing the accessed element. @@ -759,10 +973,10 @@ /// @param IsAffine Whether the subscripts are affine expressions. /// @param Kind The kind of memory accessed. /// @param Subscripts Subscript expressions - /// @param Sizes Dimension lengths of the accessed array. + /// @param ShapeInfo Shape of the accessed array. MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, AccessType AccType, Value *BaseAddress, Type *ElemType, bool Affine, - ArrayRef Subscripts, ArrayRef Sizes, + ArrayRef Subscripts, ShapeInfo Sizes, Value *AccessValue, MemoryKind Kind); /// Create a new MemoryAccess that corresponds to @p AccRel. @@ -2801,8 +3015,7 @@ /// @param Kind The kind of the array info object. /// @param BaseName The optional name of this memory reference. ScopArrayInfo *getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, - ArrayRef Sizes, - MemoryKind Kind, + ShapeInfo Shape, MemoryKind Kind, const char *BaseName = nullptr); /// Create an array and return the corresponding ScopArrayInfo object. Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -15,9 +15,11 @@ #define POLLY_SUPPORT_IRHELPER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include #include @@ -455,5 +457,9 @@ // @param BoxedLoops Set of Boxed Loops we get from the SCoP. llvm::Loop *getFirstNonBoxedLoopFor(llvm::BasicBlock *BB, llvm::LoopInfo &LI, const BoxedLoopsSetTy &BoxedLoops); + +static const std::string POLLY_ABSTRACT_INDEX_BASENAME = "polly_array_index"; +llvm::Optional> +getAbstractMatrixCall(MemAccInst Inst, llvm::ScalarEvolution &SE); } // namespace polly #endif Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -413,7 +413,7 @@ ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V))); addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - true, Subscripts, SizesSCEV, Val); + true, Subscripts, ShapeInfo::fromSizes(SizesSCEV), Val); return true; } @@ -464,7 +464,8 @@ scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - true, AccItr->second.DelinearizedSubscripts, Sizes, Val); + true, AccItr->second.DelinearizedSubscripts, + ShapeInfo::fromSizes(Sizes), Val); return true; } @@ -509,8 +510,8 @@ DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV); addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(), IntegerType::getInt8Ty(DestPtrVal->getContext()), - LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr}, - Inst.getValueOperand()); + LengthIsAffine, {DestAccFunc, LengthVal}, + ShapeInfo::fromSizes({nullptr}), Inst.getValueOperand()); auto *MemTrans = dyn_cast(MemIntr); if (!MemTrans) @@ -531,8 +532,8 @@ SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV); addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(), IntegerType::getInt8Ty(SrcPtrVal->getContext()), - LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr}, - Inst.getValueOperand()); + LengthIsAffine, {SrcAccFunc, LengthVal}, + ShapeInfo::fromSizes({nullptr}), Inst.getValueOperand()); return true; } @@ -546,9 +547,13 @@ if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI)) return true; + bool ReadOnly = false; auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0); auto *CalledFunction = CI->getCalledFunction(); + if (CalledFunction->getName().count(POLLY_ABSTRACT_INDEX_BASENAME)) + return true; + switch (AA.getModRefBehavior(CalledFunction)) { case FMRB_UnknownModRefBehavior: llvm_unreachable("Unknown mod ref behaviour cannot be represented."); @@ -577,7 +582,8 @@ auto *ArgBasePtr = cast(SE.getPointerBase(ArgSCEV)); addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(), - ArgBasePtr->getType(), false, {AF}, {nullptr}, CI); + ArgBasePtr->getType(), false, {AF}, + ShapeInfo::fromSizes({nullptr}), CI); } return true; } @@ -627,10 +633,14 @@ AccType = MemoryAccess::MAY_WRITE; addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, - IsAffine, {AccessFunction}, {nullptr}, Val); + IsAffine, {AccessFunction}, ShapeInfo::fromSizes({nullptr}), + Val); } void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) { + if (buildAccessPollyAbstractMatrix(Inst, Stmt)) + return; + if (buildAccessMemIntrinsic(Inst, Stmt)) return; @@ -645,6 +655,160 @@ buildAccessSingleDim(Inst, Stmt); } +static const bool AbstractMatrixDebug = false; + +// %v = bitcast (%w) +// %v = %w +// return %w in both cases. +Value *unwrapPossibleBitcast(Value *V) { + if (isa(V)) + return cast(V)->getOperand(0); + return V; +} + +//Return @__data_radiation_MOD_cobi from stride %7 +// %indvars.iv = phi i64 [ %indvars.iv.next, %"4" ], [ -10, %"3" ] +// %0 = load float*, float** bitcast (%"struct.array2_real(kind=4)"* @__m_MOD_g_arr to float**), align 32 +// %1 = load i64, i64* getelementptr inbounds (%"struct.array2_real(kind=4)", %"struct.array2_real(kind=4)"* @__m_MOD_g_arr, i64 0, i32 3, i64 1, i32 0), align 8 +// %2 = load i64, i64* getelementptr inbounds (%"struct.array2_real(kind=4)", %"struct.array2_real(kind=4)"* @__m_MOD_g_arr, i64 0, i32 1), align 8 +// %3 = tail call i64 @_gfortran_polly_array_index_2(i64 %2, i64 %1, i64 1, i64 %indvars.iv, i64 %indvars.iv1) #3 +GlobalValue *getBasePtrForVariableStride(Value *V) { + if(AbstractMatrixDebug) errs() << __PRETTY_FUNCTION__ << ":" << __LINE__ << "\n"; + LoadInst *LI; + LI = dyn_cast(V); + if (!LI) return nullptr; + + if(AbstractMatrixDebug) errs() << "LoadInst: " << *LI << "\n"; + Value *LoadBase = LI->getOperand(0); + if(AbstractMatrixDebug) errs() << "LoadBase: " << *LI << "\n"; + Value *CleanedLoadBase = unwrapPossibleBitcast(LoadBase); + if(AbstractMatrixDebug) errs() << "CleanedLoadBase: " << *CleanedLoadBase << "\n"; + + GEPOperator *GEP = nullptr; + GEP = dyn_cast(CleanedLoadBase); + if (!GEP) return nullptr; + if(AbstractMatrixDebug) errs() << "GEP: " << *GEP << "\n"; + + return dyn_cast(GEP->getPointerOperand()); +} +bool ScopBuilder::buildAccessPollyAbstractMatrix(MemAccInst Inst, + ScopStmt *Stmt) { + + auto optionalCallGEP = getAbstractMatrixCall(Inst, SE); + if (!optionalCallGEP) + return false; + + CallInst *Call; + GEPOperator *GEP; + std::tie(Call, GEP) = *optionalCallGEP; + + if (AbstractMatrixDebug) { + errs() << "MemAccInst: " << *Inst << "\n"; + errs() << "Call: " << *Call << "\n"; + errs() << "GEP: " << *GEP << "\n"; + } + + assert(Call->getNumArgOperands() % 2 == 1 && + "expect offset, stride, index pairs\n"); + const int NArrayDims = Call->getNumArgOperands() / 2; + if (AbstractMatrixDebug) + errs() << "Num array dims: " << NArrayDims << "\n"; + + // F(stride1, stride2, .., strideN, ix1, ix2, ..., ixN) + + Value *BasePtr = GEP->getPointerOperand(); + + std::vector Subscripts; + std::vector Strides; + + if (isa(Call->getArgOperand(0))) + return false; + + const SCEV *Offset = SE.getSCEV(Call->getArgOperand(0)); + if (AbstractMatrixDebug) + errs() << "Offset: " << *Offset << "\n"; + + // If all the strides are constants, then we don't need the FAD. + // Otherwise, we need the FAD to load the correct values of strides + // and offset. + GlobalValue *FAD = nullptr; + for (int i = 0; i < NArrayDims; i++) { + Value *Ix = Call->getArgOperand(1 + NArrayDims + i); + Value *Stride = Call->getArgOperand(1 + i); + if (isa(Ix) || isa(Stride)) + return false; + + if (AbstractMatrixDebug) + errs() << i << " |Raw Ix: " << *Ix << " |Raw Stride: " << *Stride << "\n"; + Subscripts.push_back(SE.getSCEV(Ix)); + const SCEV *StrideSCEV = SE.getSCEV(Stride); + + // Try to get an FAD from a stride. + if (!isa(StrideSCEV) && FAD == nullptr) { + FAD = getBasePtrForVariableStride(Stride); + // assert(FAD && "need legal FAD"); + } + Strides.push_back(StrideSCEV); + } + + for (unsigned i = 0; i < Subscripts.size(); ++i) { + if (AbstractMatrixDebug) + errs() << i << "| " + << " Sub: " << *Subscripts[i] << " |Stride: " << *Strides[i] + << "\n"; + } + + + Value *Val = Inst.getValueOperand(); + Type *ElementType = Val->getType(); + assert(BasePtr); + assert(ElementType); + + if (AbstractMatrixDebug) + errs() << "GEPInto / BasePtr (array): " << *BasePtr << "\n"; + if (AbstractMatrixDebug) + errs() << "ElementType: " << *ElementType << "\n"; + + if (AbstractMatrixDebug) + errs() << "Val: " << *Val << "\n"; + + enum MemoryAccess::AccessType AccType = + isa(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; + + + if (AbstractMatrixDebug) { + errs() << "AccType: "; + switch (AccType) { + case MemoryAccess::AccessType::READ: + errs() << "ReadAccess\n"; + break; + case MemoryAccess::AccessType::MUST_WRITE: + errs() << "MustWriteAccess\n"; + break; + default: + report_fatal_error("expected only READ/MUST_WRITE"); + } + } + + /* + void addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, + MemoryAccess::AccessType AccType, Value *BaseAddress, + Type *ElemType, bool IsAffine, + ArrayRef Subscripts, + ArrayRef Sizes, Value *AccessValue); + */ + + // NOTE: this should be fromStrides. + // NOTE: To be able to change this, we need to teach ScopArrayInfo to recieve + // a Shape object. So, do that first. + addArrayAccess(Stmt, Inst, AccType, BasePtr, ElementType, true, Subscripts, + ShapeInfo::fromStrides(Strides, Offset, FAD), Val); + + if (AbstractMatrixDebug) + errs() << "Added array access successfully!\n"; + + return true; +} void ScopBuilder::buildAccessFunctions() { for (auto &Stmt : *scop) { @@ -757,8 +921,7 @@ MemoryAccess *ScopBuilder::addMemoryAccess( ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue, - ArrayRef Subscripts, ArrayRef Sizes, - MemoryKind Kind) { + ArrayRef Subscripts, ShapeInfo Shape, MemoryKind Kind) { bool isKnownMustAccess = false; // Accesses in single-basic block statements are always executed. @@ -785,7 +948,7 @@ AccType = MemoryAccess::MAY_WRITE; auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType, - Affine, Subscripts, Sizes, AccessValue, Kind); + Affine, Subscripts, Shape, AccessValue, Kind); scop->addAccessFunction(Access); Stmt->addAccess(Access); @@ -797,12 +960,17 @@ Value *BaseAddress, Type *ElementType, bool IsAffine, ArrayRef Subscripts, - ArrayRef Sizes, - Value *AccessValue) { + ShapeInfo Shape, Value *AccessValue) { + + // DEBUG( + // dbgs() << "\n" << __PRETTY_FUNCTION__ << ":" << __LINE__ << "\n"; + // dbgs() << "\t-BaseAddr: " << *BaseAddress << "\n"; + // dbgs() << "\t-ElemType: " << *ElementType << "\n"; + // dbgs() << "\t-AccessValue: " << *AccessValue << "\n";); ArrayBasePointers.insert(BaseAddress); auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine, AccessValue, - Subscripts, Sizes, MemoryKind::Array); + Subscripts, Shape, MemoryKind::Array); if (!DetectFortranArrays) return; @@ -836,7 +1004,8 @@ addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(), true, Inst, ArrayRef(), - ArrayRef(), MemoryKind::Value); + ShapeInfo::fromSizes(ArrayRef()), + MemoryKind::Value); } void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) { @@ -874,7 +1043,8 @@ break; addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(), - true, V, ArrayRef(), ArrayRef(), + true, V, ArrayRef(), + ShapeInfo::fromSizes(ArrayRef()), MemoryKind::Value); // Inter-statement uses need to write the value in their defining statement. @@ -891,8 +1061,8 @@ // will create an exit PHI SAI object. It is needed during code generation // and would be created later anyway. if (IsExitBlock) - scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {}, - MemoryKind::ExitPHI); + scop->getOrCreateScopArrayInfo( + PHI, PHI->getType(), ShapeInfo::fromSizes({}), MemoryKind::ExitPHI); // This is possible if PHI is in the SCoP's entry block. The incoming blocks // from outside the SCoP's region have no statement representation. @@ -913,17 +1083,19 @@ return; } - MemoryAccess *Acc = addMemoryAccess( - IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true, - PHI, ArrayRef(), ArrayRef(), - IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); + MemoryAccess *Acc = + addMemoryAccess(IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, + PHI->getType(), true, PHI, ArrayRef(), + ShapeInfo::fromSizes(ArrayRef()), + IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); assert(Acc); Acc->addIncoming(IncomingBlock, IncomingValue); } void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) { addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true, - PHI, ArrayRef(), ArrayRef(), + PHI, ArrayRef(), + ShapeInfo::fromSizes(ArrayRef()), MemoryKind::PHI); } @@ -1094,8 +1266,9 @@ else Ty = MemoryKind::Array; + // NOTE: This is why We need to teach ScopArrayInfo to accept Shape. auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(), - ElementType, Access->Sizes, Ty); + ElementType, Access->Shape, Ty); Access->buildAccessRelation(SAI); scop->addAccessData(Access); } @@ -1204,7 +1377,8 @@ Instruction *GlobalRead = GlobalReadPair.second; for (auto *BP : ArrayBasePointers) addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ, - BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead); + BP, BP->getType(), false, {AF}, + ShapeInfo::fromSizes({nullptr}), GlobalRead); } scop->buildInvariantEquivalenceClasses(); Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -535,9 +535,41 @@ return false; } +/// Return if S is a call to a function that we use to denote multidimensional +// accesses +bool isSCEVCallToPollyAbstractIndex(const SCEV *S) { + if (isa(S)) { + Value *V = cast(S)->getValue(); + CallInst *Call = dyn_cast(V); + if (Call && Call->getCalledFunction()->getName().count( + POLLY_ABSTRACT_INDEX_BASENAME)) + return true; + } + return false; +} + +/// Return if scev represents a multidim access. +bool isSCEVMultidimArrayAccess(const SCEV *S) { + if (isSCEVCallToPollyAbstractIndex(S)) + return true; + const SCEVMulExpr *Mul = dyn_cast(S); + if (!Mul) + return false; + if (Mul->getNumOperands() != 2) + return false; + return isSCEVCallToPollyAbstractIndex(Mul->getOperand(0)) || + isSCEVCallToPollyAbstractIndex(Mul->getOperand(1)); +} + bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, DetectionContext &Context) const { + + if (isSCEVMultidimArrayAccess(S)) { + return true; + } + InvariantLoadsSetTy AccessILS; + if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) return false; @@ -681,9 +713,15 @@ Function *CalledFunction = CI.getCalledFunction(); + // Indirect calls are not supported. if (CalledFunction == nullptr) return false; + + // Function being called is a polly indexing function. + if (CalledFunction->getName().count(POLLY_ABSTRACT_INDEX_BASENAME)) { + return true; + } if (AllowModrefCall) { switch (AA.getModRefBehavior(CalledFunction)) { @@ -921,10 +959,12 @@ } } if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, - Context.RequiredILS)) + Context.RequiredILS)) { + return invalid( Context, /*Assert=*/true, DelinearizedSize, Context.Accesses[BasePointer].front().first, BaseValue); + } } // No array shape derived. @@ -958,6 +998,7 @@ DetectionContext &Context, const SCEVUnknown *BasePointer, std::shared_ptr Shape) const { Value *BaseValue = BasePointer->getValue(); + bool BasePtrHasNonAffine = false; MapInsnToMemAcc TempMemoryAccesses; for (const auto &Pair : Context.Accesses[BasePointer]) { @@ -972,16 +1013,18 @@ if (!AF) { if (isAffine(Pair.second, Scope, Context)) Acc->DelinearizedSubscripts.push_back(Pair.second); - else + else { IsNonAffine = true; + } } else { if (Shape->DelinearizedSizes.size() == 0) { Acc->DelinearizedSubscripts.push_back(AF); } else { SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, Shape->DelinearizedSizes); - if (Acc->DelinearizedSubscripts.size() == 0) + if (Acc->DelinearizedSubscripts.size() == 0) { IsNonAffine = true; + } } for (const SCEV *S : Acc->DelinearizedSubscripts) if (!isAffine(S, Scope, Context)) @@ -991,9 +1034,11 @@ // (Possibly) report non affine access if (IsNonAffine) { BasePtrHasNonAffine = true; - if (!AllowNonAffine) + if (!AllowNonAffine) { invalid(Context, /*Assert=*/true, Pair.second, Insn, BaseValue); + } + if (!KeepGoing && !AllowNonAffine) return false; } @@ -1045,7 +1090,6 @@ bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, const SCEVUnknown *BP, DetectionContext &Context) const { - if (!BP) return invalid(Context, /*Assert=*/true, Inst); @@ -1150,6 +1194,10 @@ bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, DetectionContext &Context) const { + if (getAbstractMatrixCall(Inst, SE)) { + return true; + } + Value *Ptr = Inst.getPointerOperand(); Loop *L = LI.getLoopFor(Inst->getParent()); const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -246,6 +246,9 @@ //===----------------------------------------------------------------------===// +raw_ostream &polly::operator<<(raw_ostream &OS, const ShapeInfo &Shape) { + return Shape.print(OS); +} // Create a sequence of two schedules. Either argument may be null and is // interpreted as the empty schedule. Can also return null if both schedules are // empty. @@ -318,10 +321,11 @@ } ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx, - ArrayRef Sizes, MemoryKind Kind, + ShapeInfo Shape, MemoryKind Kind, const DataLayout &DL, Scop *S, const char *BaseName) - : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) { + : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), + Shape(ShapeInfo::none()), S(*S) { std::string BasePtrName = BaseName ? BaseName : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(), @@ -329,7 +333,20 @@ UseInstructionNames); Id = isl::id::alloc(Ctx, BasePtrName, this); - updateSizes(Sizes); + /* + errs() << "\n" << __PRETTY_FUNCTION__<< ":" << __LINE__<< "\n"; + if(BasePtr) errs() << "\t-BasePtr: " << *BasePtr << "\n"; + if(BaseName) errs() << "\t-BaseName: " << BaseName << "\n"; + errs() << "\t-ElementType: " << *ElementType << "\n"; + */ + + + // Shape.mapSizes([&] (SmallVector &Sizes) { + // this->updateSizes(Sizes); }); + if (Shape.hasSizes()) + updateSizes(Shape.sizes()); + else + updateStrides(Shape.strides(), Shape.offset(), Shape.hackFAD()); if (!BasePtr || Kind != MemoryKind::Array) { BasePtrOriginSAI = nullptr; @@ -364,9 +381,18 @@ if (Array->getNumberOfDimensions() != getNumberOfDimensions()) return false; - for (unsigned i = 0; i < getNumberOfDimensions(); i++) - if (Array->getDimensionSize(i) != getDimensionSize(i)) - return false; + if (this->hasStrides() != Array->hasStrides()) + return false; + + if (this->hasStrides()) { + for (unsigned i = 0; i < getNumberOfDimensions(); i++) + if (Array->getDimensionStride(i) != getDimensionStride(i)) + return false; + } else { + for (unsigned i = 0; i < getNumberOfDimensions(); i++) + if (Array->getDimensionSize(i) != getDimensionSize(i)) + return false; + } return true; } @@ -398,44 +424,84 @@ return; } - assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]); - assert(!this->FAD); - this->FAD = FAD; + if (!this->hasStrides()) { + errs() << "WARNING: Fortran SAI: " << getName() + << "does not have strides!\n"; + assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]); + assert(!this->FAD); + this->FAD = FAD; - isl::space Space(S.getIslCtx(), 1, 0); + isl::space Space(S.getIslCtx(), 1, 0); - std::string param_name = getName(); - param_name += "_fortranarr_size"; - isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this); + std::string param_name = getName(); + param_name += "_fortranarr_size"; + isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this); - Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff); - isl::pw_aff PwAff = - isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0); + Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff); + isl::pw_aff PwAff = + isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0); - DimensionSizesPw[0] = PwAff; + DimensionSizesPw[0] = PwAff; + } +} + +void ScopArrayInfo::overwriteSizeWithStrides(ArrayRef Strides, + const SCEV *Offset, + GlobalValue *FAD) { + + // HACK: first set our shape to a stride based shape so that we don't + // assert within updateStrides. Move this into a bool parameter of + // updateStrides + Shape = ShapeInfo::fromStrides(Strides, Offset, FAD); + updateStrides(Strides, Offset, FAD); +} +bool ScopArrayInfo::updateStrides(ArrayRef Strides, + const SCEV *Offset, + GlobalValue *FAD) { + Shape.setStrides(Strides, Offset, FAD); + DimensionSizesPw.clear(); + for (size_t i = 0; i < Shape.getNumberOfDimensions(); i++) { + isl::space Space(S.getIslCtx(), 1, 0); + + std::string param_name = getIslCompatibleName("stride_" + std::to_string(i) + "__", getName(), ""); + isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this); + + Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff); + isl::pw_aff PwAff = + isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0); + + DimensionSizesPw.push_back(PwAff); + } + return true; } bool ScopArrayInfo::updateSizes(ArrayRef NewSizes, bool CheckConsistency) { - int SharedDims = std::min(NewSizes.size(), DimensionSizes.size()); - int ExtraDimsNew = NewSizes.size() - SharedDims; - int ExtraDimsOld = DimensionSizes.size() - SharedDims; - - if (CheckConsistency) { - for (int i = 0; i < SharedDims; i++) { - auto *NewSize = NewSizes[i + ExtraDimsNew]; - auto *KnownSize = DimensionSizes[i + ExtraDimsOld]; - if (NewSize && KnownSize && NewSize != KnownSize) - return false; - } - if (DimensionSizes.size() >= NewSizes.size()) - return true; - } + if (Shape.isInitialized()) { + const SmallVector &DimensionSizes = Shape.sizes(); + int SharedDims = std::min(NewSizes.size(), DimensionSizes.size()); + int ExtraDimsNew = NewSizes.size() - SharedDims; + int ExtraDimsOld = DimensionSizes.size() - SharedDims; + + if (CheckConsistency) { + for (int i = 0; i < SharedDims; i++) { + auto *NewSize = NewSizes[i + ExtraDimsNew]; + auto *KnownSize = DimensionSizes[i + ExtraDimsOld]; + if (NewSize && KnownSize && NewSize != KnownSize) + return false; + } + if (DimensionSizes.size() >= NewSizes.size()) + return true; + } + } + SmallVector DimensionSizes; DimensionSizes.clear(); DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(), NewSizes.end()); + Shape.setSizes(DimensionSizes); + DimensionSizesPw.clear(); for (const SCEV *Expr : DimensionSizes) { if (!Expr) { @@ -467,22 +533,40 @@ // as a isl_pw_aff even though there is no SCEV information. bool IsOutermostSizeKnown = SizeAsPwAff && FAD; - if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 && - !getDimensionSize(0)) { - OS << "[*]"; - u++; - } - for (; u < getNumberOfDimensions(); u++) { - OS << "["; - - if (SizeAsPwAff) { - isl::pw_aff Size = getDimensionSizePw(u); - OS << " " << Size << " "; - } else { - OS << *getDimensionSize(u); + if (Shape.hasSizes()) { + // OS << "(Sizes)"; + if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 && + !getDimensionSize(0)) { + OS << "[*]"; + u++; } + for (; u < getNumberOfDimensions(); u++) { + OS << "["; - OS << "]"; + if (SizeAsPwAff) { + isl::pw_aff Size = getDimensionSizePw(u); + OS << " " << Size << " "; + } else { + OS << *getDimensionSize(u); + } + + OS << "]"; + } + } else { + OS << "(Strides)"; + for (; u < getNumberOfDimensions(); u++) { + OS << "["; + if (SizeAsPwAff) { + isl::pw_aff Size = getDimensionSizePw(u); + OS << " " << Size << " "; + } else { + const SCEV *Stride = Shape.strides()[u]; + assert(Stride); + OS << *Stride; + } + OS << "]"; + } + OS << ";[Offset: " << *Shape.offset() << "]"; } OS << ";"; @@ -561,6 +645,7 @@ void MemoryAccess::updateDimensionality() { auto *SAI = getScopArrayInfo(); isl::space ArraySpace = SAI->getSpace(); + isl::space AccessSpace = AccessRelation.get_space().range(); isl::ctx Ctx = ArraySpace.get_ctx(); @@ -761,6 +846,9 @@ void MemoryAccess::assumeNoOutOfBound() { if (PollyIgnoreInbounds) return; + if (Shape.hasStrides()) + return; + auto *SAI = getScopArrayInfo(); isl::space Space = getOriginalAccessRelationSpace().range(); isl::set Outside = isl::set::empty(Space); @@ -799,7 +887,7 @@ void MemoryAccess::buildMemIntrinsicAccessRelation() { assert(isMemoryIntrinsic()); - assert(Subscripts.size() == 2 && Sizes.size() == 1); + assert(Subscripts.size() == 2 && Shape.getNumberOfDimensions() == 1); isl::pw_aff SubscriptPWA = getPwAff(Subscripts[0]); isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA); @@ -867,7 +955,11 @@ } void MemoryAccess::foldAccessRelation() { - if (Sizes.size() < 2 || isa(Sizes[1])) + // If we are stride-based, it makes no sense to deliniearlise; + if (Shape.hasStrides()) + return; + + if (Shape.getNumberOfDimensions() < 2 || isa(Shape.sizes()[1])) return; int Size = Subscripts.size(); @@ -877,7 +969,7 @@ for (int i = Size - 2; i >= 0; --i) { isl::space Space; isl::map MapOne, MapTwo; - isl::pw_aff DimSize = getPwAff(Sizes[i + 1]); + isl::pw_aff DimSize = getPwAff(Shape.sizes()[i + 1]); isl::space SpaceSize = DimSize.get_space(); isl::id ParamId = @@ -1013,13 +1105,11 @@ MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, AccessType AccType, Value *BaseAddress, Type *ElementType, bool Affine, - ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue, - MemoryKind Kind) + ArrayRef Subscripts, ShapeInfo Shape, + Value *AccessValue, MemoryKind Kind) : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr), - BaseAddr(BaseAddress), ElementType(ElementType), - Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst), - AccessValue(AccessValue), IsAffine(Affine), + BaseAddr(BaseAddress), ElementType(ElementType), Shape(Shape), + AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine), Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), NewAccessRelation(nullptr), FAD(nullptr) { static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"}; @@ -1031,11 +1121,12 @@ MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel) : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt), - InvalidDomain(nullptr), AccessRelation(nullptr), - NewAccessRelation(AccRel), FAD(nullptr) { + InvalidDomain(nullptr), Shape(ShapeInfo::fromSizes({nullptr})), + AccessRelation(nullptr), NewAccessRelation(AccRel), FAD(nullptr) { isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out); auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId); - Sizes.push_back(nullptr); + // Sizes.push_back(nullptr); + SmallVector &Sizes = Shape.sizes_mut(); for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++) Sizes.push_back(SAI->getDimensionSize(i)); ElementType = SAI->getElementType(); @@ -1854,10 +1945,11 @@ if (Access) return Access; - ScopArrayInfo *SAI = - Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value); - Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(), - true, {}, {}, V, MemoryKind::Value); + ScopArrayInfo *SAI = Parent.getOrCreateScopArrayInfo( + V, V->getType(), ShapeInfo::fromSizes({}), MemoryKind::Value); + Access = + new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(), true, + {}, ShapeInfo::fromSizes({}), V, MemoryKind::Value); Parent.addAccessFunction(Access); Access->buildAccessRelation(SAI); addAccess(Access); @@ -2210,6 +2302,8 @@ static std::vector getFortranArrayIds(Scop::array_range Arrays) { std::vector OutermostSizeIds; for (auto Array : Arrays) { + if (Array->hasStrides()) + continue; // To check if an array is a Fortran array, we check if it has a isl_pw_aff // for its outermost dimension. Fortran arrays will have this since the // outermost dimension size can be picked up from their runtime description. @@ -2331,8 +2425,10 @@ Set = Set.remove_divs(); - if (isl_set_n_basic_set(Set.get()) >= MaxDisjunctsInDomain) + if (isl_set_n_basic_set(Set.get()) >= MaxDisjunctsInDomain) { + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return isl::stat::error; + } // Restrict the number of parameters involved in the access as the lexmin/ // lexmax computation will take too long if this number is high. @@ -2354,18 +2450,24 @@ if (Set.involves_dims(isl::dim::param, u, 1)) InvolvedParams++; - if (InvolvedParams > RunTimeChecksMaxParameters) + if (InvolvedParams > RunTimeChecksMaxParameters) { + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return isl::stat::error; + } } - if (isl_set_n_basic_set(Set.get()) > RunTimeChecksMaxAccessDisjuncts) + if (isl_set_n_basic_set(Set.get()) > RunTimeChecksMaxAccessDisjuncts) { + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return isl::stat::error; + } MinPMA = Set.lexmin_pw_multi_aff(); MaxPMA = Set.lexmax_pw_multi_aff(); - if (isl_ctx_last_error(Ctx.get()) == isl_error_quota) + if (isl_ctx_last_error(Ctx.get()) == isl_error_quota) { + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return isl::stat::error; + } MinPMA = MinPMA.coalesce(); MaxPMA = MaxPMA.coalesce(); @@ -2412,7 +2514,9 @@ auto Lambda = [&MinMaxAccesses, &S](isl::set Set) -> isl::stat { return buildMinMaxAccess(Set, MinMaxAccesses, S); }; - return Locations.foreach_set(Lambda) == isl::stat::ok; + bool Valid = Locations.foreach_set(Lambda) == isl::stat::ok; + + return Valid; } /// Helper to treat non-affine regions and basic blocks the same. @@ -3226,17 +3330,22 @@ splitAliasGroupsByDomain(AliasGroups); for (AliasGroupTy &AG : AliasGroups) { - if (!hasFeasibleRuntimeContext()) + if (!hasFeasibleRuntimeContext()) { + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return false; + } { IslMaxOperationsGuard MaxOpGuard(getIslCtx(), OptComputeOut); bool Valid = buildAliasGroup(AG, HasWriteAccess); - if (!Valid) + if (!Valid) { + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return false; + } } if (isl_ctx_last_error(getIslCtx()) == isl_error_quota) { invalidate(COMPLEXITY, DebugLoc()); + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return false; } } @@ -3283,6 +3392,7 @@ if (!MA->isAffine()) { invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(), MA->getAccessInstruction()->getParent()); + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return false; } } @@ -3305,21 +3415,27 @@ Valid = calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite); - if (!Valid) + if (!Valid) { + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return false; + } // Bail out if the number of values we need to compare is too large. // This is important as the number of comparisons grows quadratically with // the number of values we need to compare. if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() > - RunTimeChecksMaxArraysPerGroup) + RunTimeChecksMaxArraysPerGroup) { return false; + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; + } Valid = calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly); - if (!Valid) + if (!Valid) { + errs() << "@@@" << __PRETTY_FUNCTION__ << __LINE__ << "\n"; return false; + } return true; } @@ -3409,6 +3525,9 @@ if (Array->getNumberOfDimensions() <= 1) continue; + if (Array->hasStrides()) + continue; + isl_space *Space = Array->getSpace().release(); Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed)); @@ -3559,8 +3678,11 @@ for (MemoryAccess *Access : Stmt) { if (!Access->isArrayKind()) continue; + ScopArrayInfo *Array = const_cast(Access->getScopArrayInfo()); + if (Array->hasStrides()) + continue; if (Array->getNumberOfDimensions() != 1) continue; @@ -4040,8 +4162,7 @@ } ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, - ArrayRef Sizes, - MemoryKind Kind, + ShapeInfo Shape, MemoryKind Kind, const char *BaseName) { assert((BasePtr || BaseName) && "BasePtr and BaseName can not be nullptr at the same time."); @@ -4050,15 +4171,33 @@ : ScopArrayNameMap[BaseName]; if (!SAI) { auto &DL = getFunction().getParent()->getDataLayout(); - SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind, + SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Shape, Kind, DL, this, BaseName)); ScopArrayInfoSet.insert(SAI.get()); } else { SAI->updateElementType(ElementType); // In case of mismatching array sizes, we bail out by setting the run-time // context to false. - if (!SAI->updateSizes(Sizes)) - invalidate(DELINEARIZATION, DebugLoc()); + if (SAI->hasStrides() != Shape.hasStrides()) { + DEBUG(dbgs() << "SAI and new shape do not agree:\n"); + DEBUG(dbgs() << "SAI: "; SAI->print(dbgs(), true); dbgs() << "\n"); + DEBUG(dbgs() << "Shape: " << Shape << "\n"); + + if (Shape.hasStrides()) { + DEBUG(dbgs() << "Shape has strides, SAI had size. Overwriting size " + "with strides"); + SAI->overwriteSizeWithStrides(Shape.strides(), Shape.offset(), Shape.hackFAD()); + } else { + report_fatal_error( + "SAI has strides, Shape is size based. This should not happen"); + } + } + if (SAI->hasStrides()) { + SAI->updateStrides(Shape.strides(), Shape.offset(), Shape.hackFAD()); + } else { + if (!SAI->updateSizes(Shape.sizes())) + invalidate(DELINEARIZATION, DebugLoc()); + } } return SAI.get(); } @@ -4075,7 +4214,8 @@ else SCEVSizes.push_back(nullptr); - auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes, + auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, + ShapeInfo::fromSizes(SCEVSizes), MemoryKind::Array, BaseName.c_str()); return SAI; } @@ -4133,8 +4273,18 @@ std::vector FortranIDs; FortranIDs = getFortranArrayIds(arrays()); + std::vector StrideIDs; + + for(ScopArrayInfo *Array : arrays()) { + if (!Array->hasStrides()) continue; + for(int i = 0 ; i < Array->getNumberOfDimensions(); i++) { + isl::id Id = Array->getDimensionSizeId(i); + StrideIDs.push_back(Id); + } + } + isl::space Space = isl::space::params_alloc( - getIslCtx(), ParameterIds.size() + FortranIDs.size()); + getIslCtx(), ParameterIds.size() + FortranIDs.size() + StrideIDs.size()); unsigned PDim = 0; for (const SCEV *Parameter : Parameters) { @@ -4145,6 +4295,14 @@ for (isl::id Id : FortranIDs) Space = Space.set_dim_id(isl::dim::param, PDim++, Id); + for (isl::id Id : StrideIDs) { + Space = Space.set_dim_id(isl::dim::param, PDim++, Id); + } + + /* + errs() << __PRETTY_FUNCTION__ << __LINE__ << " ParamSpace:\n"; + Space.dump(); + */ return Space; } Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -40,12 +40,14 @@ cl::Hidden, cl::init(OT_REQUEST), cl::ZeroOrMore, cl::cat(PollyCategory)); IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder, - IDToValueTy &IDToValue, ValueMapT &GlobalMap, + IDToValueTy &IDToValue, + SCEVToValueTy &SCEVToValue, ValueMapT &GlobalMap, const DataLayout &DL, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, BasicBlock *StartBlock) - : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), - DL(DL), SE(SE), DT(DT), LI(LI), StartBlock(StartBlock) { + : S(S), Builder(Builder), IDToValue(IDToValue), SCEVToValue(SCEVToValue), + GlobalMap(GlobalMap), DL(DL), SE(SE), DT(DT), LI(LI), + StartBlock(StartBlock) { OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr; } @@ -262,50 +264,142 @@ } IndexOp = nullptr; - for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { - Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); - assert(NextIndex->getType()->isIntegerTy() && - "Access index should be an integer"); + const std::string Name = Builder.GetInsertBlock()->getModule()->getName(); + if (SAI->hasStrides()) { + for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { + Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); + assert(NextIndex->getType()->isIntegerTy() && + "Access index should be an integer"); + + Type *Ty = [&]() { + if (IndexOp) + return getWidestType(NextIndex->getType(), IndexOp->getType()); + else + return NextIndex->getType(); + }(); - if (PollyDebugPrinting) - RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); + if (Ty != NextIndex->getType()) + NextIndex = Builder.CreateIntCast(NextIndex, Ty, true); - if (!IndexOp) { - IndexOp = NextIndex; - } else { - Type *Ty = getWidestType(NextIndex->getType(), IndexOp->getType()); + const SCEV *DimSCEV = SAI->getDimensionStride(u - 1); + assert(DimSCEV); + + Value *DimSize = nullptr; + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + + // HACK: we do this because we know the kernel name. + if (Name.find("FUNC__") != std::string::npos) { + auto OldValue = SCEVToValue.find(DimSCEV); + if (OldValue == SCEVToValue.end()) + assert(false && "!OldValue"); + + auto NewDimSizeIt = Map.find(OldValue->second); + if (NewDimSizeIt == Map.end()) + assert(false && "!NewDimSizeIt"); + DimSize = NewDimSizeIt->second; + } else { + + DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); + DimSize = expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + &*Builder.GetInsertPoint(), nullptr, + StartBlock->getSinglePredecessor()); + } + assert(DimSize && "dimsize uninitialized"); if (Ty != NextIndex->getType()) - NextIndex = Builder.CreateIntCast(NextIndex, Ty, true); - if (Ty != IndexOp->getType()) - IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); + NextIndex = Builder.CreateSExtOrTrunc(NextIndex, Ty, + "polly.access.idxval." + BaseName + std::to_string(u - 1)); + if (Ty != DimSize->getType()) + DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, + "polly.access.stride." + BaseName + std::to_string(u - 1)); + + NextIndex = createMul(NextIndex, DimSize, "polly.access.idxval_x_stride." + BaseName + std::to_string(u - 1)); + + if (PollyDebugPrinting) + RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); + if (!IndexOp) { + IndexOp = NextIndex; + } else { + if (Ty != IndexOp->getType()) + IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); + IndexOp = createAdd(IndexOp, NextIndex, "polly.access.idx_accum." + BaseName); + } // end else + } // end for loop over stride dims + assert(IndexOp && "expected correct index op"); + Value *Offset = nullptr; + // If we are in the kernel, then the base pointer has already been + // offset correctly so we need not do anything about it. + if (Name.find("FUNC__") != std::string::npos) { + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + const SCEV *OffsetSCEV = SAI->getStrideOffset(); + auto OldValue = SCEVToValue.find(OffsetSCEV); + if (OldValue == SCEVToValue.end()) + assert(false && "!OldValue offset"); + + auto NewIt = Map.find(OldValue->second); + if (NewIt == Map.end()) + assert(false && "!NewIt offset"); + Offset = NewIt->second; + } else { + const SCEV *OffsetSCEV = SAI->getStrideOffset(); + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); - IndexOp = createAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); + // If we are outside a kernel, then we do need to synthesize an offset. + OffsetSCEV = SCEVParameterRewriter::rewrite(OffsetSCEV, SE, Map); + Offset = expandCodeFor(S, SE, DL, "polly", OffsetSCEV, + OffsetSCEV->getType(), &*Builder.GetInsertPoint(), + nullptr, StartBlock->getSinglePredecessor()); + Offset = Builder.CreateIntCast(Offset, IndexOp->getType(), true); } + assert(Offset && "dimsize uninitialized"); + IndexOp = createAdd(IndexOp, Offset, "polly.access.offseted." + BaseName); + } // end hasStride + else { + for (unsigned u = 1, e = isl_ast_expr_get_op_n_arg(Expr); u < e; u++) { + Value *NextIndex = create(isl_ast_expr_get_op_arg(Expr, u)); + assert(NextIndex->getType()->isIntegerTy() && + "Access index should be an integer"); + + if (PollyDebugPrinting) + RuntimeDebugBuilder::createCPUPrinter(Builder, "[", NextIndex, "]"); + + if (!IndexOp) { + IndexOp = NextIndex; + } else { + Type *Ty = getWidestType(NextIndex->getType(), IndexOp->getType()); + + if (Ty != NextIndex->getType()) + NextIndex = Builder.CreateIntCast(NextIndex, Ty, true); + if (Ty != IndexOp->getType()) + IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); + + IndexOp = createAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); + } + + // For every but the last dimension multiply the size, for the last + // dimension we can exit the loop. + if (u + 1 >= e) + break; - // For every but the last dimension multiply the size, for the last - // dimension we can exit the loop. - if (u + 1 >= e) - break; + const SCEV *DimSCEV = SAI->getDimensionSize(u); - const SCEV *DimSCEV = SAI->getDimensionSize(u); + llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); + DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); + Value *DimSize = + expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + &*Builder.GetInsertPoint(), nullptr, + StartBlock->getSinglePredecessor()); - llvm::ValueToValueMap Map(GlobalMap.begin(), GlobalMap.end()); - DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); - Value *DimSize = - expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), - &*Builder.GetInsertPoint(), nullptr, - StartBlock->getSinglePredecessor()); - - Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); - - if (Ty != IndexOp->getType()) - IndexOp = Builder.CreateSExtOrTrunc(IndexOp, Ty, - "polly.access.sext." + BaseName); - if (Ty != DimSize->getType()) - DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, - "polly.access.sext." + BaseName); - IndexOp = createMul(IndexOp, DimSize, "polly.access.mul." + BaseName); + Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); + + if (Ty != IndexOp->getType()) + IndexOp = Builder.CreateSExtOrTrunc(IndexOp, Ty, + "polly.access.sext." + BaseName); + if (Ty != DimSize->getType()) + DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, + "polly.access.sext." + BaseName); + IndexOp = createMul(IndexOp, DimSize, "polly.access.mul." + BaseName); + } } Access = Builder.CreateGEP(Base, IndexOp, "polly.access." + BaseName); @@ -319,7 +413,8 @@ Value *IslExprBuilder::createOpAccess(isl_ast_expr *Expr) { Value *Addr = createAccessAddress(Expr); assert(Addr && "Could not create op access address"); - return Builder.CreateLoad(Addr, Addr->getName() + ".load"); + auto LoadVal = Builder.CreateLoad(Addr, Addr->getName() + ".load"); + return LoadVal; } Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) { Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -562,8 +562,13 @@ UB = getUpperBound(For, Predicate); ValueLB = ExprBuilder.create(Init); + ValueLB = getLatestValue(ValueLB); + ValueUB = ExprBuilder.create(UB); + ValueUB = getLatestValue(ValueUB); + ValueInc = ExprBuilder.create(Inc); + ValueInc = getLatestValue(ValueInc); MaxType = ExprBuilder.getType(Iterator); MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); @@ -1209,6 +1214,114 @@ return true; } +// given a scev of the form baseptr + offset(constant), return baseptr +Value *getBasePtrFromConstIndexSCEV(const SCEV *S) { + errs() << "S:" << *S << "\n"; + const SCEVAddExpr *Add = cast(S); + assert(Add->getNumOperands() == 2); + if (isa(Add->getOperand(0))) + return cast(Add->getOperand(0))->getValue(); + + if (isa(Add->getOperand(1))) + return cast(Add->getOperand(1))->getValue(); + assert(false); +} + +/* + struct gfc_array_descriptor + { + array *data + index offset; + index dtype; + struct descriptor_dimension dimension[N_DIM]; + } + + struct descriptor_dimension + { + index stride; + index lbound; + index ubound; + } +*/ +Value *IslNodeBuilder::extractStrideFromFAD(GlobalValue *FAD, int dimension) { + Type *Ity = Builder.getInt32Ty(); + std::vector Idxs = { ConstantInt::get(Ity, 0), //global idx + ConstantInt::get(Ity, 3), // location of descriptor dim array + ConstantInt::get(Ity, dimension),//nth descriptor dim array, + ConstantInt::get(Ity, 0) // stride + }; + Value *Loc = Builder.CreateGEP(FAD, Idxs, "stride." + std::to_string(dimension) + ".loc"); + return Builder.CreateLoad(Loc); +} + +Value *IslNodeBuilder::extractOffsetFromFAD(GlobalValue *FAD) { + Type *Ity = Builder.getInt32Ty(); + std::vector Idxs = {ConstantInt::get(Ity, 0) // global idx + , ConstantInt::get(Ity, 1) // offset loc + }; + Value *OffsetLoc = Builder.CreateGEP(FAD, Idxs, "offset.loc"); + return Builder.CreateLoad(OffsetLoc); +} + +void IslNodeBuilder::materializeStridedArraySizes() { + for (ScopArrayInfo *Array : S.arrays()) { + if (!Array->hasStrides()) + continue; + + GlobalValue *FAD = nullptr; + + for (unsigned i = 0; i < Array->getNumberOfDimensions(); i++) { + isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(i).release(); + assert(ParametricPwAff && "parametric pw_aff corresponding " + "to outermost dimension does not " + "exist"); + + isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0); + isl_pw_aff_free(ParametricPwAff); + + assert(Id && "pw_aff is not parametric"); + + DEBUG( + dbgs() << "-\n"; + dbgs() << "i: " << i << "\n"; + dbgs() << "ID: " <getDimensionStride(i); + // dbgs() << "StrideSCEV: " << *StrideSCEV << "\n"; + + Value *Stride = nullptr; + if (FAD) { + assert(false); + // We need to pass a SCEV to the IslExprBuilder for + // kernel strides. We can't synthesize a value because it would be + // different across host and kernel code. + Stride = extractStrideFromFAD(FAD, Array->getNumberOfDimensions() - 1 -i); + } + else { + // assert(isa(StrideSCEV)); + Stride = generateSCEV(StrideSCEV); + } + assert(Stride); + + // errs() << "StrideVal: " << *Stride << "\n"; + IDToValue[Id] = Stride; + SCEVToValue[Array->getDimensionStride(i)] = Stride; + isl_id_free(Id); + } + + Value *Offset = nullptr; + const SCEV *OffsetSCEV = Array->getStrideOffset(); + if (FAD) { + assert(false); + Offset = extractOffsetFromFAD(FAD); + } else { + // assert(isa(OffsetSCEV)); + Offset = generateSCEV(OffsetSCEV); + } + assert(Offset); + SCEVToValue[OffsetSCEV] = Offset; + } +} + Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, isl_ast_build *Build, Instruction *AccInst) { @@ -1372,10 +1485,9 @@ // If the size of a dimension is dependent on another class, make sure it is // preloaded. - for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) { - const SCEV *Dim = SAI->getDimensionSize(i); + auto PreloadSCEV = [&](const SCEV *ToPreload) { SetVector Values; - findValues(Dim, SE, Values); + findValues(ToPreload, SE, Values); for (auto *Val : Values) { if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) { if (!preloadInvariantEquivClass(*BaseIAClass)) @@ -1387,6 +1499,21 @@ ExecutionCtx = isl_set_intersect(ExecutionCtx, BaseExecutionCtx); } } + return true; + }; + + if (SAI->hasStrides()) { + if (!PreloadSCEV(SAI->getStrideOffset())) + return false; + for (unsigned i = 0, e = SAI->getNumberOfDimensions(); i < e; ++i) { + if (!PreloadSCEV(SAI->getDimensionStride(i))) + return false; + } + } else { + for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) { + if (!PreloadSCEV(SAI->getDimensionSize(i))) + return false; + } } Instruction *AccInst = MA->getAccessInstruction(); @@ -1545,6 +1672,8 @@ // parameter materializeFortranArrayOutermostDimension(); + materializeStridedArraySizes(); + // Generate values for the current loop iteration for all surrounding loops. // // We may also reference loops outside of the scop which do not contain the Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -39,8 +39,10 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "polly/Support/ISLOStream.h" #include "isl/union_map.h" extern "C" { @@ -531,7 +533,7 @@ /// /// @param Array The array for which to compute the offset. /// @returns An llvm::Value that contains the offset of the array. - Value *getArrayOffset(gpu_array_info *Array); + Value *getArrayOffset(const ScopArrayInfo *SAI, gpu_array_info *Array); /// Prepare the kernel arguments for kernel code generation /// @@ -781,7 +783,7 @@ DevArrayName.append(Array->name); Value *ArraySize = getArraySize(Array); - Value *Offset = getArrayOffset(Array); + Value *Offset = getArrayOffset(ScopArray, Array); if (Offset) ArraySize = Builder.CreateSub( ArraySize, @@ -793,10 +795,10 @@ // choose to be defensive and catch this at the compile phase. It is // most likely that we are doing something wrong with size computation. if (SizeSCEV->isZero()) { - errs() << getUniqueScopName(&S) - << " has computed array size 0: " << *ArraySize - << " | for array: " << *(ScopArray->getBasePtr()) - << ". This is illegal, exiting.\n"; + errs() << getUniqueScopName(&S) << " has array size 0.\n " + << "Array with size 0: " << *(ScopArray->getBasePtr()) + << "Size expression: " << *ArraySize + << "\nThis is illegal, exiting.\n"; report_fatal_error("array size was computed to be 0"); } @@ -822,7 +824,8 @@ HostPtr = ScopArray->getBasePtr(); HostPtr = getLatestValue(HostPtr); - Value *Offset = getArrayOffset(Array); + /* + Value *Offset = getArrayOffset(ScopArray, Array); if (Offset) { HostPtr = Builder.CreatePointerCast( HostPtr, ScopArray->getElementType()->getPointerTo()); @@ -830,6 +833,7 @@ } HostPtr = Builder.CreatePointerCast(HostPtr, Builder.getInt8PtrTy()); + */ DeviceAllocations[ScopArray] = HostPtr; } } @@ -1054,6 +1058,7 @@ break; case GPURuntime::OpenCL: Name = "polly_initContextCL"; + break; } @@ -1121,10 +1126,16 @@ return ArraySize; } -Value *GPUNodeBuilder::getArrayOffset(gpu_array_info *Array) { +Value *GPUNodeBuilder::getArrayOffset(const ScopArrayInfo *SAI, + gpu_array_info *Array) { if (gpu_array_is_scalar(Array)) return nullptr; + if (SAI->hasStrides()) { + return nullptr; + // return generateSCEV(SAI->getStrideOffset()); + } + isl::ast_build Build = isl::ast_build::from_context(S.getContext()); isl::set Min = isl::manage(isl_set_copy(Array->extent)).lexmin(); @@ -1177,7 +1188,7 @@ auto ScopArray = (ScopArrayInfo *)(Array->user); Value *Size = getArraySize(Array); - Value *Offset = getArrayOffset(Array); + Value *Offset = getArrayOffset(ScopArray, Array); Value *DevPtr = DeviceAllocations[ScopArray]; Value *HostPtr; @@ -1428,6 +1439,9 @@ if (AllowLibDevice && getCUDALibDeviceFuntion(F).length() > 0) return true; + if (Name.count("polly_array_index")) + return true; + return F->isIntrinsic() && (Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || Name.startswith("llvm.copysign")); @@ -1473,6 +1487,9 @@ for (const auto &I : IDToValue) SubtreeValues.insert(I.second); + for (const auto &I : SCEVToValue) + SubtreeValues.insert(I.second); + // NOTE: this is populated in IslNodeBuilder::addParameters // See [Code generation of induction variables of loops outside Scops]. for (const auto &I : OutsideLoopIterations) @@ -1651,16 +1668,22 @@ DevArray = DeviceAllocations[const_cast(SAI)]; DevArray = createCallGetDevicePtr(DevArray); } - assert(DevArray != nullptr && "Array to be offloaded to device not " - "initialized"); - Value *Offset = getArrayOffset(&Prog->array[i]); + + Value *Offset = getArrayOffset(SAI, &Prog->array[i]); if (Offset) { DevArray = Builder.CreatePointerCast( DevArray, SAI->getElementType()->getPointerTo()); - DevArray = Builder.CreateGEP(DevArray, Builder.CreateNeg(Offset)); + + // NO NEGATION FOR STRIDES ARRAY. + if (SAI->hasStrides()) + DevArray = Builder.CreateGEP(DevArray, Offset); + else + DevArray = Builder.CreateGEP(DevArray, Builder.CreateNeg(Offset)); DevArray = Builder.CreatePointerCast(DevArray, Builder.getInt8PtrTy()); } + assert(DevArray != nullptr && "Array to be offloaded to device not " + "initialized"); Value *Slot = Builder.CreateGEP( Parameters, {Builder.getInt64(0), Builder.getInt64(Index)}); @@ -1681,7 +1704,9 @@ new AllocaInst(Builder.getInt8PtrTy(), AddressSpace, Launch + "_param_" + std::to_string(Index), EntryBlock->getTerminator()); - Builder.CreateStore(DevArray, Param); + Value *DevArrayCast = + Builder.CreatePointerCast(DevArray, Builder.getInt8PtrTy()); + Builder.CreateStore(DevArrayCast, Param); Value *ParamTyped = Builder.CreatePointerCast(Param, Builder.getInt8PtrTy()); Builder.CreateStore(ParamTyped, Slot); @@ -1769,6 +1794,12 @@ Clone = Function::Create(Fn->getFunctionType(), GlobalValue::ExternalLinkage, ClonedFnName, GPUModule.get()); + + // For our polly_array_index function, we need readnone attribute so that + // dead code elimination nukes it. In general, it is good practice + // to copy over attributes. + Clone->setAttributes(Fn->getAttributes()); + assert(Clone && "Expected cloned function to be initialized."); assert(ValueMap.find(Fn) == ValueMap.end() && "Fn already present in ValueMap"); @@ -2009,17 +2040,34 @@ Type *EleTy = SAI->getElementType(); Value *Val = &*Arg; SmallVector Sizes; + ShapeInfo NewShape = ShapeInfo::none(); isl_ast_build *Build = isl_ast_build_from_context(isl_set_copy(Prog->context)); - Sizes.push_back(nullptr); - for (long j = 1, n = Kernel->array[i].array->n_index; j < n; j++) { - isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( - Build, isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); - auto V = ExprBuilder.create(DimSize); - Sizes.push_back(SE.getSCEV(V)); + + if (SAI->hasStrides()) { + for (long j = 0, n = Kernel->array[i].array->n_index; j < n; j++) { + isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( + Build, + isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); + auto V = ExprBuilder.create(DimSize); + Sizes.push_back(SE.getSCEV(V)); + } + NewShape = SAI->getShape(); + // NewShape = ShapeInfo::fromStrides(Sizes, SAI->getStrideOffset()); + } else { + Sizes.push_back(nullptr); + for (long j = 1, n = Kernel->array[i].array->n_index; j < n; j++) { + isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( + Build, + isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j)); + auto V = ExprBuilder.create(DimSize); + Sizes.push_back(SE.getSCEV(V)); + } + NewShape = ShapeInfo::fromSizes(Sizes); } const ScopArrayInfo *SAIRep = - S.getOrCreateScopArrayInfo(Val, EleTy, Sizes, MemoryKind::Array); + S.getOrCreateScopArrayInfo(Val, EleTy, NewShape, MemoryKind::Array); + LocalArrays.push_back(Val); isl_ast_build_free(Build); @@ -2201,11 +2249,12 @@ /// To support this case we need to store these scalars back at each /// memory store or at least before each kernel barrier. if (Kernel->n_block != 0 || Kernel->n_grid != 0) { - BuildSuccessful = 0; - DEBUG( - dbgs() << getUniqueScopName(&S) - << " has a store to a scalar value that" - " would be undefined to run in parallel. Bailing out.\n";); + // BuildSuccessful = 0; + DEBUG(dbgs() << __PRETTY_FUNCTION__ << "HACK: disabling bailout on StoredScalar\n";); + //DEBUG( + // dbgs() << getUniqueScopName(&S) + // << " has a store to a scalar value that" + // " would be undefined to run in parallel. Bailing out.\n";); } } } @@ -2216,24 +2265,51 @@ for (int i = 0; i < Kernel->n_var; ++i) { struct ppcg_kernel_var &Var = Kernel->var[i]; isl_id *Id = isl_space_get_tuple_id(Var.array->space, isl_dim_set); + const ScopArrayInfo *OriginalSAI = + ScopArrayInfo::getFromId(isl::manage(isl_id_copy(Id))); + assert(OriginalSAI); + Type *EleTy = ScopArrayInfo::getFromId(isl::manage(Id))->getElementType(); Type *ArrayTy = EleTy; - SmallVector Sizes; + ShapeInfo NewShape = ShapeInfo::none(); + if (OriginalSAI->hasStrides()) { + SmallVector Strides; + for (unsigned int j = 0; j < Var.array->n_index; ++j) { + isl_val *Val = isl_vec_get_element_val(Var.size, j); + long Bound = isl_val_get_num_si(Val); + isl_val_free(Val); + Strides.push_back(S.getSE()->getConstant(Builder.getInt64Ty(), Bound)); + } - Sizes.push_back(nullptr); - for (unsigned int j = 1; j < Var.array->n_index; ++j) { - isl_val *Val = isl_vec_get_element_val(Var.size, j); - long Bound = isl_val_get_num_si(Val); - isl_val_free(Val); - Sizes.push_back(S.getSE()->getConstant(Builder.getInt64Ty(), Bound)); - } + for (int j = Var.array->n_index - 1; j >= 0; --j) { + isl_val *Val = isl_vec_get_element_val(Var.size, j); + long Bound = isl_val_get_num_si(Val); + isl_val_free(Val); + ArrayTy = ArrayType::get(ArrayTy, Bound); + } + + NewShape = OriginalSAI->getShape(); + // NewShape = + // ShapeInfo::fromStrides(Strides, OriginalSAI->getStrideOffset()); + } else { + SmallVector Sizes; + Sizes.push_back(nullptr); + for (unsigned int j = 1; j < Var.array->n_index; ++j) { + isl_val *Val = isl_vec_get_element_val(Var.size, j); + long Bound = isl_val_get_num_si(Val); + isl_val_free(Val); + Sizes.push_back(S.getSE()->getConstant(Builder.getInt64Ty(), Bound)); + } - for (int j = Var.array->n_index - 1; j >= 0; --j) { - isl_val *Val = isl_vec_get_element_val(Var.size, j); - long Bound = isl_val_get_num_si(Val); - isl_val_free(Val); - ArrayTy = ArrayType::get(ArrayTy, Bound); + // ASK TOBIAS: what is going on here? + for (int j = Var.array->n_index - 1; j >= 0; --j) { + isl_val *Val = isl_vec_get_element_val(Var.size, j); + long Bound = isl_val_get_num_si(Val); + isl_val_free(Val); + ArrayTy = ArrayType::get(ArrayTy, Bound); + } + NewShape = ShapeInfo::fromSizes(Sizes); } const ScopArrayInfo *SAI; @@ -2251,8 +2327,8 @@ } else { llvm_unreachable("unknown variable type"); } - SAI = - S.getOrCreateScopArrayInfo(Allocation, EleTy, Sizes, MemoryKind::Array); + SAI = S.getOrCreateScopArrayInfo(Allocation, EleTy, NewShape, + MemoryKind::Array); Id = isl_id_alloc(S.getIslCtx(), Var.name, nullptr); IDToValue[Id] = Allocation; LocalArrays.push_back(Allocation); @@ -2435,6 +2511,21 @@ } std::string GPUNodeBuilder::finalizeKernelFunction() { + { + // NOTE: We currently copy all uses of gfortran_polly_array_index. + // However, these are unsused, but they refer to host side values + // So, ADCE them out. + // For correctness, we should probably add these to + // BlockGenerators.cpp - polly::isIgnoredIntrinsic. + llvm::legacy::PassManager OptPasses; + OptPasses.add(createAggressiveDCEPass()); + // Comment this to allow tests to pass: + // Polly :: GPGPU/host-control-flow.ll + // Polly :: GPGPU/kernel-params-only-some-arrays.ll + // Polly :: GPGPU/live-range-reordering-with-privatization.ll + // Polly :: GPGPU/phi-nodes-in-kernel.ll + OptPasses.run(*GPUModule); + } if (verifyModule(*GPUModule)) { DEBUG(dbgs() << "verifyModule failed on module:\n"; @@ -2862,22 +2953,26 @@ for (unsigned i = 1; i < NumDims; ++i) Extent = Extent.lower_bound_si(isl::dim::set, i, 0); - for (unsigned i = 0; i < NumDims; ++i) { - isl::pw_aff PwAff = Array->getDimensionSizePw(i); + if (!Array->hasStrides()) { + for (unsigned i = 0; i < NumDims; ++i) { + isl::pw_aff PwAff = Array->getDimensionSizePw(i); - // isl_pw_aff can be NULL for zero dimension. Only in the case of a - // Fortran array will we have a legitimate dimension. - if (PwAff.is_null()) { - assert(i == 0 && "invalid dimension isl_pw_aff for nonzero dimension"); - continue; - } + // isl_pw_aff can be NULL for zero dimension. Only in the case of a + // Fortran array will we have a legitimate dimension. + if (PwAff.is_null()) { + assert(i == 0 && + "invalid dimension isl_pw_aff for nonzero dimension"); + continue; + } - isl::pw_aff Val = isl::aff::var_on_domain( - isl::local_space(Array->getSpace()), isl::dim::set, i); - PwAff = PwAff.add_dims(isl::dim::in, Val.dim(isl::dim::in)); - PwAff = PwAff.set_tuple_id(isl::dim::in, Val.get_tuple_id(isl::dim::in)); - isl::set Set = PwAff.gt_set(Val); - Extent = Set.intersect(Extent); + isl::pw_aff Val = isl::aff::var_on_domain( + isl::local_space(Array->getSpace()), isl::dim::set, i); + PwAff = PwAff.add_dims(isl::dim::in, Val.dim(isl::dim::in)); + PwAff = + PwAff.set_tuple_id(isl::dim::in, Val.get_tuple_id(isl::dim::in)); + isl::set Set = PwAff.gt_set(Val); + Extent = Set.intersect(Extent); + } } return Extent; @@ -2894,34 +2989,63 @@ void setArrayBounds(gpu_array_info &PPCGArray, ScopArrayInfo *Array) { std::vector Bounds; - if (PPCGArray.n_index > 0) { - if (isl_set_is_empty(PPCGArray.extent)) { - isl_set *Dom = isl_set_copy(PPCGArray.extent); - isl_local_space *LS = isl_local_space_from_space( - isl_space_params(isl_set_get_space(Dom))); - isl_set_free(Dom); - isl_pw_aff *Zero = isl_pw_aff_from_aff(isl_aff_zero_on_domain(LS)); - Bounds.push_back(Zero); - } else { - isl_set *Dom = isl_set_copy(PPCGArray.extent); - Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1); - isl_pw_aff *Bound = isl_set_dim_max(isl_set_copy(Dom), 0); - isl_set_free(Dom); - Dom = isl_pw_aff_domain(isl_pw_aff_copy(Bound)); - isl_local_space *LS = - isl_local_space_from_space(isl_set_get_space(Dom)); - isl_aff *One = isl_aff_zero_on_domain(LS); - One = isl_aff_add_constant_si(One, 1); - Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One)); - Bound = isl_pw_aff_gist(Bound, S->getContext().release()); - Bounds.push_back(Bound); + if (!Array->hasStrides()) { + if (PPCGArray.n_index > 0) { + if (isl_set_is_empty(PPCGArray.extent)) { + isl_set *Dom = isl_set_copy(PPCGArray.extent); + isl_local_space *LS = isl_local_space_from_space( + isl_space_params(isl_set_get_space(Dom))); + isl_set_free(Dom); + isl_pw_aff *Zero = isl_pw_aff_from_aff(isl_aff_zero_on_domain(LS)); + Bounds.push_back(Zero); + } else { + isl_set *Dom = isl_set_copy(PPCGArray.extent); + Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1); + isl_pw_aff *Bound = isl_set_dim_max(isl_set_copy(Dom), 0); + isl_set_free(Dom); + Dom = isl_pw_aff_domain(isl_pw_aff_copy(Bound)); + isl_local_space *LS = + isl_local_space_from_space(isl_set_get_space(Dom)); + isl_aff *One = isl_aff_zero_on_domain(LS); + One = isl_aff_add_constant_si(One, 1); + Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One)); + Bound = isl_pw_aff_gist(Bound, S->getContext().release()); + Bounds.push_back(Bound); + } } } - for (unsigned i = 1; i < PPCGArray.n_index; ++i) { + const int BeginIndex = Array->hasStrides() ? 0 : 1; + for (unsigned i = BeginIndex; i < PPCGArray.n_index; ++i) { isl_pw_aff *Bound = Array->getDimensionSizePw(i).release(); auto LS = isl_pw_aff_get_domain_space(Bound); auto Aff = isl_multi_aff_zero(LS); + + // We need types to work out, which is why we perform this weird dance + // with `Aff` and `Bound`. Consider this example: + + // LS: [p] -> { [] } + // Zero: [p] -> { [] } | Implicitly, is [p] -> { ~ -> [] }. + // This `~` is used to denote a "null space" (which is different from + // a *zero dimensional* space), which is something that ISL does not + // show you when pretty printing. + + // Bound: [p] -> { [] -> [(10p)] } | Here, the [] is a *zero + // dimensional* space, not a "null space" which does not exist at all. + + // When we pullback (precompose) `Bound` with `Zero`, we get: + // Bound . Zero = + // ([p] -> { [] -> [(10p)] }) . ([p] -> {~ -> [] }) = + // [p] -> { ~ -> [(10p)] } = + // [p] -> [(10p)] (as ISL pretty prints it) + // Bound Pullback: [p] -> { [(10p)] } + + // We want this kind of an expression for Bound, without a + // zero dimensional input, but with a "null space" input for the types + // to work out later on, as far as I (Siddharth Bhat) understand. + // I was unable to find a reference to this in the ISL manual. + // References: Tobias Grosser. + Bound = isl_pw_aff_pullback_multi_aff(Bound, Aff); Bounds.push_back(Bound); } @@ -2933,8 +3057,10 @@ /// `-polly-ignore-parameter-bounds` enabled, the Scop::Context does not /// contain all parameter dimensions. /// So, use the helper `alignPwAffs` to align all the `isl_pw_aff` together. - isl_space *SeedAlignSpace = S->getParamSpace().release(); + isl_space *SeedAlignSpace = S->getFullParamSpace().release(); SeedAlignSpace = isl_space_add_dims(SeedAlignSpace, isl_dim_set, 1); + SeedAlignSpace = isl_space_align_params( + SeedAlignSpace, isl_set_get_space(PPCGArray.extent)); isl_space *AlignSpace = nullptr; std::vector AlignedBounds; @@ -3197,6 +3323,8 @@ Schedule = isl_schedule_align_params(Schedule, S->getFullParamSpace().release()); + // errs() << S->getFullParamSpace() << "\n"; + // report_fatal_error("see full param space"); if (!has_permutable || has_permutable < 0) { Schedule = isl_schedule_free(Schedule); Index: lib/External/isl/isl_map.c =================================================================== --- lib/External/isl/isl_map.c +++ lib/External/isl/isl_map.c @@ -12771,12 +12771,24 @@ ma_space = isl_multi_aff_get_space(ma); + /* + isl_printer *PRINTER = isl_printer_to_str(isl_basic_map_get_ctx(bmap)); + isl_printer_print_basic_map(PRINTER, bmap); + fprintf(stderr, "PRINTER%s%d:\n%s\n", __PRETTY_FUNCTION__, __LINE__, isl_printer_get_str(PRINTER)); + PRINTER = isl_printer_flush(PRINTER); + + isl_printer_print_multi_aff(PRINTER, ma); + fprintf(stderr, "PRINTER%s%d:\n%s\n", __PRETTY_FUNCTION__, __LINE__, isl_printer_get_str(PRINTER)); + PRINTER = isl_printer_flush(PRINTER); + */ + m = isl_space_has_equal_params(bmap->dim, ma_space); if (m < 0) goto error; - if (!m) + if (!m) { isl_die(isl_basic_map_get_ctx(bmap), isl_error_invalid, "parameters don't match", goto error); + } m = isl_space_tuple_is_equal(bmap->dim, type, ma_space, isl_dim_out); if (m < 0) goto error; Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -591,3 +591,74 @@ Loop *L = LI.getLoopFor(BB); return getFirstNonBoxedLoopFor(L, LI, BoxedLoops); } + +static const bool AbstractMatrixDebug = false; +llvm::Optional> +polly::getAbstractMatrixCall(MemAccInst Inst, ScalarEvolution &SE) { + // Case 1. (Total size of array not known) + // %2 = tail call i64 @_gfortran_polly_array_index_2(i64 1, i64 %1, i64 + // %indvars.iv1, i64 %indvars.iv) #1 + // %3 = getelementptr float, float* %0, i64 + // %bitcast = bitcast %3 to + // %2 store float 2.000000e+00, float* %3, align 4 STORE (GEP ) + // (CALL index_2(, ))) + + // Case 2. (Total size of array statically known) + // %4 = tail call i64 @_gfortran_polly_array_index_2(i64 1, i64 5, i64 + // %indvars.iv1, i64 %indvars.iv) #1 %5 = getelementptr [25 x float], [25 x + // float]* @__m_MOD_g_arr_const_5_5, i64 0, i64 %4 store float 4.200000e+01, + // float* %5, align 4 + + if (AbstractMatrixDebug) { + errs() << "@@@" << __PRETTY_FUNCTION__ << "\n"; + errs() << "\nInst: " << *Inst.get() << "\n"; + } + + Value *MaybeBitcast = Inst.getPointerOperand(); + if (!MaybeBitcast) + return Optional>(None); + + if (AbstractMatrixDebug) + errs() << "Bitcast(maybe): " << *MaybeBitcast << "\n"; + + // If we have a bitcast as the parameter to the instruction, strip off the + // bitcast. Otherwise, return the original instruction operand. + Value *MaybeGEP = [&]() -> Value * { + BitCastOperator *Bitcast = dyn_cast(MaybeBitcast); + if (Bitcast) { + return Bitcast->getOperand(0); + } + return Inst.getPointerOperand(); + }(); + + if (AbstractMatrixDebug) + errs() << "\tGEP(maybe): " << *MaybeGEP << "\n"; + + GEPOperator *GEP = dyn_cast(MaybeGEP); + + if (!GEP) + return Optional>(None); + + if (AbstractMatrixDebug) + errs() << "\tGEP(for sure): " << *GEP << "\n"; + + auto *MaybeCall = GEP->getOperand(GEP->getNumOperands() - 1); + assert(MaybeCall); + if (AbstractMatrixDebug) + errs() << "\tCall(maybe): " << *MaybeCall << "\n"; + + CallInst *Call = dyn_cast(MaybeCall); + if (!Call) + return Optional>(None); + if (AbstractMatrixDebug) + errs() << "\tCall(for sure): " << *Call << "\n"; + + if (!Call->getCalledFunction()->getName().count( + POLLY_ABSTRACT_INDEX_BASENAME)) + return Optional>(None); + if (AbstractMatrixDebug) + errs() << "Called name: " << Call->getCalledFunction()->getName() << "\n"; + + std::pair p = std::make_pair(Call, GEP); + return Optional>(p); +} Index: lib/Transform/ForwardOpTree.cpp =================================================================== --- lib/Transform/ForwardOpTree.cpp +++ lib/Transform/ForwardOpTree.cpp @@ -338,9 +338,9 @@ Subscripts.push_back(nullptr); } - MemoryAccess *Access = - new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), - LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); + MemoryAccess *Access = new MemoryAccess( + Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), LI->getType(), true, + {}, ShapeInfo::fromSizes(Sizes), LI, MemoryKind::Array); S->addAccessFunction(Access); Stmt->addAccess(Access, true); Index: test/GPGPU/09-12-failing-testcase-array-inv-load-hoist.ll =================================================================== --- /dev/null +++ test/GPGPU/09-12-failing-testcase-array-inv-load-hoist.ll @@ -0,0 +1,95 @@ +; RUN: opt %loadPolly \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: -polly-detect-fortran-arrays \ +; RUN: -polly-codegen-ppcg \ +; RUN: -polly-acc-codegen-managed-memory \ +; RUN: -polly-acc-fail-on-verify-module-failure \ +; RUN: -debug-only=polly-codegen-ppcg,polly-scops \ +; RUN: -S \ +; RUN: -pass-remarks=polly + +; CHECK: foo +; CONTAINS +; SUBROUTINE f(arr, bloop, eloop, barr, earr, arrinp, arrinp2) +; INTEGER, INTENT(IN) :: & +; bloop, eloop, barr, earr +; REAL, INTENT(IN) :: & +; ! arrinp(barr:earr, ki3sd:ki3ed), & +; arrinp(barr:earr, barr:earr), & +; arrinp2(barr:earr, barr:earr) +; REAL, INTENT(INOUT) :: & +; arr(barr:earr) +; INTEGER :: & +; i, i3 +; REAL :: & +; coeff +; +; DO i=bloop, eloop +; arr(i) = arrinp(0, 0) + arrinp2(i, 0) +; END DO +; END SUBROUTINE f +; END MODULE m + +; ModuleID = 'm.ll' +source_filename = "m.bc" +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +; Function Attrs: nounwind uwtable +define void @__m_MOD_f([0 x float]* noalias %arr, i32* noalias %bloop, i32* noalias %eloop, i32* noalias %barr, i32* noalias %earr, [0 x float]* noalias %arrinp, [0 x float]* noalias %arrinp2) unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %0 = load i32, i32* %barr, align 4 + %1 = sext i32 %0 to i64 + %2 = load i32, i32* %earr, align 4 + %3 = sext i32 %2 to i64 + %4 = sub nsw i64 1, %1 + %5 = add nsw i64 %4, %3 + %6 = icmp sgt i64 %5, 0 + %7 = select i1 %6, i64 %5, i64 0 + %8 = sub nsw i64 0, %1 + %9 = mul i64 %7, %1 + %10 = sub i64 %8, %9 + %11 = load i32, i32* %bloop, align 4 + %12 = load i32, i32* %eloop, align 4 + %13 = icmp sgt i32 %11, %12 + br i1 %13, label %return, label %"3.preheader" + +"3.preheader": ; preds = %entry.split + br label %"3" + +"3": ; preds = %"3", %"3.preheader" + %14 = phi i32 [ %26, %"3" ], [ %11, %"3.preheader" ] + %15 = sext i32 %14 to i64 + %16 = tail call i64 @_gfortran_polly_array_index_1(i64 %8, i64 1, i64 %15) #1 + %17 = tail call i64 @_gfortran_polly_array_index_2(i64 %10, i64 %7, i64 1, i64 0, i64 0) #1 + %18 = getelementptr [0 x float], [0 x float]* %arrinp, i64 0, i64 %17 + %19 = load float, float* %18, align 4 + %20 = tail call i64 @_gfortran_polly_array_index_2(i64 %10, i64 %7, i64 1, i64 0, i64 %15) #1 + %21 = getelementptr [0 x float], [0 x float]* %arrinp2, i64 0, i64 %20 + %22 = load float, float* %21, align 4 + %23 = fadd float %19, %22 + %24 = getelementptr [0 x float], [0 x float]* %arr, i64 0, i64 %16 + store float %23, float* %24, align 4 + %25 = icmp eq i32 %14, %12 + %26 = add i32 %14, 1 + br i1 %25, label %return.loopexit, label %"3" + +return.loopexit: ; preds = %"3" + br label %return + +return: ; preds = %return.loopexit, %entry.split + ret void +} + +declare i64 @_gfortran_polly_array_index_1(i64, i64, i64) #2 + +declare i64 @_gfortran_polly_array_index_2(i64, i64, i64, i64, i64) #2 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { readnone } Index: test/GPGPU/09-13-coe-th-referring-to-arg-in-another-fn.ll =================================================================== --- /dev/null +++ test/GPGPU/09-13-coe-th-referring-to-arg-in-another-fn.ll @@ -0,0 +1,105 @@ +; RUN: opt %loadPolly \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: -polly-detect-fortran-arrays \ +; RUN: -polly-codegen-ppcg \ +; RUN: -polly-acc-codegen-managed-memory \ +; RUN: -polly-acc-fail-on-verify-module-failure \ +; RUN: -debug-only=polly-codegen-ppcg,polly-scops \ +; RUN: -S \ +; RUN: -pass-remarks=polly \ +; RUN: < %s + +; MODULE m +; IMPLICIT NONE +; +; CONTAINS +; SUBROUTINE f(arr, arr_input, bloop, eloop) +; INTEGER, INTENT(IN) :: & +; bloop, eloop +; REAL, INTENT(IN) :: & +; arr_input(bloop:eloop) +; REAL, INTENT(INOUT) :: & +; arr(bloop:eloop) +; INTEGER :: & +; i +; REAL :: & +; cmp_val, final_val +; +; DO i=bloop, eloop +; cmp_val = arr_input(i) +; IF (cmp_val .LT. 42) THEN +; final_val = EXP (cmp_val) +; ELSE +; final_val = 100 +; END IF +; +; arr(i)=final_val +; END DO +; END SUBROUTINE f +; END MODULE m; + +; Error: +; verifyModule Error: +; Referring to an argument in another function! +; %p_1 = getelementptr [0 x float], [0 x float]* %arr_input, i64 0, i64 %p_ +; VerifyModule failed. + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +; Function Attrs: nounwind uwtable +define void @__m_MOD_f([0 x float]* noalias %arr, [0 x float]* noalias %arr_input, i32* noalias %bloop, i32* noalias %eloop) unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %0 = load i32, i32* %bloop, align 4 + %1 = sext i32 %0 to i64 + %2 = load i32, i32* %eloop, align 4 + %3 = sub nsw i64 0, %1 + %4 = icmp sgt i32 %0, %2 + br i1 %4, label %return, label %"3.preheader" + +"3.preheader": ; preds = %entry.split + br label %"3" + +"3": ; preds = %"6", %"3.preheader" + %5 = phi i32 [ %16, %"6" ], [ %0, %"3.preheader" ] + %6 = sext i32 %5 to i64 + %7 = tail call i64 @_gfortran_polly_array_index_1(i64 %3, i64 1, i64 %6) #2 + %8 = getelementptr [0 x float], [0 x float]* %arr_input, i64 0, i64 %7 + %9 = load float, float* %8, align 4 + %10 = fcmp olt float %9, 4.2e+02 + br i1 %10, label %"4", label %"6" + +"4": ; preds = %"3" + %11 = tail call float @expf(float %9) #1 + br label %"6" + +"6": ; preds = %"3", %"4" + %12 = phi float [ %11, %"4" ], [ 1.000000e+02, %"3" ] + %13 = tail call i64 @_gfortran_polly_array_index_1(i64 %3, i64 1, i64 %6) #2 + %14 = getelementptr [0 x float], [0 x float]* %arr, i64 0, i64 %13 + store float %12, float* %14, align 4 + %15 = icmp eq i32 %5, %2 + %16 = add i32 %5, 1 + br i1 %15, label %return.loopexit, label %"3" + +return.loopexit: ; preds = %"6" + br label %return + +return: ; preds = %return.loopexit, %entry.split + ret void +} + +declare i64 @_gfortran_polly_array_index_1(i64, i64, i64) #2 + +; Function Attrs: nounwind readnone +declare float @expf(float) #1 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind } +attributes #2 = { readnone } Index: test/GPGPU/09-13-unbounded-optimium.ll =================================================================== --- /dev/null +++ test/GPGPU/09-13-unbounded-optimium.ll @@ -0,0 +1,82 @@ +; MODULE m +; IMPLICIT NONE +; +; CONTAINS +; SUBROUTINE f(arr, bloop, eloop, arrinp) +; INTEGER, INTENT(IN) :: & +; bloop, eloop +; REAL, INTENT(IN) :: & +; arrinp(bloop:eloop) +; REAL, INTENT(INOUT) :: & +; arr(bloop:eloop) +; INTEGER :: & +; i +; +; DO i=bloop, eloop +; arr(i) = arrinp(i) +; END DO +; END SUBROUTINE f +; END MODULE m + +; RUN: opt %loadPolly \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: -polly-detect-fortran-arrays \ +; RUN: -polly-codegen-ppcg \ +; RUN: -polly-acc-codegen-managed-memory \ +; RUN: -polly-acc-fail-on-verify-module-failure \ +; RUN: -debug-only=polly-codegen-ppcg,polly-scops \ +; RUN: -S \ +; RUN: -pass-remarks=polly \ +; RUN: < %s + +; ModuleID = 'm.ll' +source_filename = "m.bc" +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +; Function Attrs: nounwind uwtable +define void @__m_MOD_f([0 x float]* noalias %arr, i32* noalias %bloop, i32* noalias %eloop, [0 x float]* noalias %arrinp) unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %0 = load i32, i32* %bloop, align 4 + %1 = sext i32 %0 to i64 + %2 = load i32, i32* %eloop, align 4 + %3 = sub nsw i64 0, %1 + %4 = icmp sgt i32 %0, %2 + br i1 %4, label %return, label %"3.preheader" + +"3.preheader": ; preds = %entry.split + br label %"3" + +"3": ; preds = %"3", %"3.preheader" + %5 = phi i32 [ %15, %"3" ], [ %0, %"3.preheader" ] + %6 = sext i32 %5 to i64 + %7 = tail call i64 @_gfortran_polly_array_index_1(i64 %3, i64 1, i64 %6) #1 + %8 = tail call i64 @_gfortran_polly_array_index_1(i64 %3, i64 1, i64 %6) #1 + %9 = getelementptr [0 x float], [0 x float]* %arrinp, i64 0, i64 %8 + %10 = bitcast float* %9 to i32* + %11 = load i32, i32* %10, align 4 + %12 = getelementptr [0 x float], [0 x float]* %arr, i64 0, i64 %7 + %13 = bitcast float* %12 to i32* + store i32 %11, i32* %13, align 4 + %14 = icmp eq i32 %5, %2 + %15 = add i32 %5, 1 + br i1 %14, label %return.loopexit, label %"3" + +return.loopexit: ; preds = %"3" + br label %return + +return: ; preds = %return.loopexit, %entry.split + ret void +} + +declare i64 @_gfortran_polly_array_index_1(i64, i64, i64) #2 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { readnone } + Index: test/GPGPU/abstract-index-function-simple.ll =================================================================== --- /dev/null +++ test/GPGPU/abstract-index-function-simple.ll @@ -0,0 +1,103 @@ +; RUN: opt %loadPolly -polly-scops -analyze -polly-allow-nonaffine \ +; RUN: -polly-ignore-aliasing -polly-use-llvm-names \ +; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP + +; RUN: opt %loadPolly -S -polly-allow-nonaffine \ +; RUN: -polly-acc-codegen-managed-memory \ +; RUN: -polly-ignore-aliasing -polly-use-llvm-names \ +; RUN: -polly-invariant-load-hoisting -polly-codegen-ppcg \ +; RUN: -polly-acc-fail-on-verify-module-failure < %s\ +; RUN: | FileCheck %s -check-prefix=HOST-IR + +; RUN: opt %loadPolly -disable-output -polly-acc-dump-kernel-ir \ +; RUN: -polly-acc-codegen-managed-memory \ +; RUN: -polly-allow-nonaffine \ +; RUN: -polly-ignore-aliasing -polly-use-llvm-names \ +; RUN: -polly-invariant-load-hoisting -polly-codegen-ppcg \ +; RUN: -polly-acc-fail-on-verify-module-failure < %s \ +; RUN: | FileCheck %s -check-prefix=KERNEL-IR + +; SCOP: Function: __m_MOD_f +; SCOP-NEXT: Region: %"3"---%return +; SCOP-NEXT: Max Loop Depth: 2 + +; HOST-IR: polly_getKernel + +; Check that the call to polly_array_index has been eliminated. +; KERNEL-IR-NOT: call i64 @_gfortran_polly_array_index_2 + +; MODULE m +; IMPLICIT NONE +; ! REAL :: & +; ! g_arr_const_5_5(5, 5) +; REAL, ALLOCATABLE :: & +; g_arr(:,:) +; +; CONTAINS +; SUBROUTINE f() +; INTEGER :: & +; i, j +; +; REAL :: & +; coeff +; +; DO i=1,3 +; DO j=1,3 +; g_arr(i*5, j*2) = 100 +; END DO +; END DO +; END SUBROUTINE f +; END MODULE m +; ModuleID = 'm.ll' +source_filename = "m.bc" +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +%"struct.array2_real(kind=4)" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension] } +%struct.descriptor_dimension = type { i64, i64, i64 } + +@__m_MOD_g_arr = unnamed_addr global %"struct.array2_real(kind=4)" zeroinitializer, align 32 + +; Function Attrs: nounwind uwtable +define void @__m_MOD_f() unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %"3" + +"3": ; preds = %"6", %entry.split + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %"6" ], [ 1, %entry.split ] + br label %"4" + +"4": ; preds = %"4", %"3" + %indvars.iv = phi i64 [ %indvars.iv.next, %"4" ], [ 1, %"3" ] + %0 = load float*, float** bitcast (%"struct.array2_real(kind=4)"* @__m_MOD_g_arr to float**), align 32 + %1 = mul nuw nsw i64 %indvars.iv1, 5 + %2 = shl i64 %indvars.iv, 1 + %3 = load i64, i64* getelementptr inbounds (%"struct.array2_real(kind=4)", %"struct.array2_real(kind=4)"* @__m_MOD_g_arr, i64 0, i32 3, i64 1, i32 0), align 8 + %4 = load i64, i64* getelementptr inbounds (%"struct.array2_real(kind=4)", %"struct.array2_real(kind=4)"* @__m_MOD_g_arr, i64 0, i32 1), align 8 + %5 = tail call i64 @_gfortran_polly_array_index_2(i64 %4, i64 %3, i64 1, i64 %2, i64 %1) #1 + %6 = getelementptr float, float* %0, i64 %5 + store float 1.000000e+02, float* %6, align 4 + %7 = icmp eq i64 %indvars.iv, 3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 %7, label %"6", label %"4" + +"6": ; preds = %"4" + %8 = icmp eq i64 %indvars.iv1, 3 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br i1 %8, label %return, label %"3" + +return: ; preds = %"6" + ret void +} + +declare i64 @_gfortran_polly_array_index_2(i64, i64, i64, i64, i64) #2 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { readnone } + Index: test/ScopInfo/abstract-index-function-simple.ll =================================================================== --- /dev/null +++ test/ScopInfo/abstract-index-function-simple.ll @@ -0,0 +1,95 @@ +; RUN: opt %loadPolly -polly-scops -analyze \ +; RUN: -polly-ignore-aliasing -polly-use-llvm-names \ +; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s + + +; Check that we detect the scop. +; CHECK: Function: __m_MOD_f +; CHECK: Region: %"3"---%return +; CHECK: Max Loop Depth: 2 + +; Check that we generate pw_affs for strides. +; CHECK: Arrays { +; CHECK-NEXT: float* MemRef___m_MOD_g_arr[*]; // Element size 8 +; CHECK-NEXT: float MemRef1(Strides)[%3][1];[Offset: %4]; [BasePtrOrigin: MemRef___m_MOD_g_arr] // Element size 4 +; CHECK-NEXT: } +; CHECK-NEXT: Arrays (Bounds as pw_affs) { +; CHECK-NEXT: float* MemRef___m_MOD_g_arr[*]; // Element size 8 +; CHECK-NEXT: float MemRef1(Strides)[ [0_param_stride_size] -> { [] -> [(0_param_stride_size)] } ][ [1_param_stride_size] -> { [] -> [(1_param_stride_size)] } ];[Offset: %4]; [BasePtrOrigin: MemRef___m_MOD_g_arr] // Element size 4 +; CHECK-NEXT: } + + +; MODULE m +; IMPLICIT NONE +; ! REAL :: & +; ! g_arr_const_5_5(5, 5) +; REAL, ALLOCATABLE :: & +; g_arr(:,:) +; +; CONTAINS +; SUBROUTINE f() +; INTEGER :: & +; i, j +; +; REAL :: & +; coeff +; +; DO i=1,3 +; DO j=1,3 +; g_arr(i*5, j*2) = 100 +; END DO +; END DO +; END SUBROUTINE f +; END MODULE m +; ModuleID = 'm.ll' +source_filename = "m.bc" +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +%"struct.array2_real(kind=4)" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension] } +%struct.descriptor_dimension = type { i64, i64, i64 } + +@__m_MOD_g_arr = unnamed_addr global %"struct.array2_real(kind=4)" zeroinitializer, align 32 + +; Function Attrs: nounwind uwtable +define void @__m_MOD_f() unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %"3" + +"3": ; preds = %"6", %entry.split + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %"6" ], [ 1, %entry.split ] + br label %"4" + +"4": ; preds = %"4", %"3" + %indvars.iv = phi i64 [ %indvars.iv.next, %"4" ], [ 1, %"3" ] + %0 = load float*, float** bitcast (%"struct.array2_real(kind=4)"* @__m_MOD_g_arr to float**), align 32 + %1 = mul nuw nsw i64 %indvars.iv1, 5 + %2 = shl i64 %indvars.iv, 1 + %3 = load i64, i64* getelementptr inbounds (%"struct.array2_real(kind=4)", %"struct.array2_real(kind=4)"* @__m_MOD_g_arr, i64 0, i32 3, i64 1, i32 0), align 8 + %4 = load i64, i64* getelementptr inbounds (%"struct.array2_real(kind=4)", %"struct.array2_real(kind=4)"* @__m_MOD_g_arr, i64 0, i32 1), align 8 + %5 = tail call i64 @_gfortran_polly_array_index_2(i64 %4, i64 %3, i64 1, i64 %2, i64 %1) #1 + %6 = getelementptr float, float* %0, i64 %5 + store float 1.000000e+02, float* %6, align 4 + %7 = icmp eq i64 %indvars.iv, 3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 %7, label %"6", label %"4" + +"6": ; preds = %"4" + %8 = icmp eq i64 %indvars.iv1, 3 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br i1 %8, label %return, label %"3" + +return: ; preds = %"6" + ret void +} + +declare i64 @_gfortran_polly_array_index_2(i64, i64, i64, i64, i64) #2 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } +attributes #2 = { readnone } Index: tools/GPURuntime/GPUJIT.c =================================================================== --- tools/GPURuntime/GPUJIT.c +++ tools/GPURuntime/GPUJIT.c @@ -1688,6 +1688,18 @@ void **Parameters) { dump_function(); + /* + void **kernel = (void **)Parameters[0]; + for(int i = 0; i < 4; i++) { + fprintf(stderr, "\tParam[%d] = %p\n", i, Parameters[i]); + } + + fprintf(stderr, "\t\tKERNEL: %p\n", *kernel); + fprintf(stderr, "\t\tSTRIDE0: %d\n", *((int *)(Parameters[1]))); + fprintf(stderr, "\t\tSTRIDE1: %d\n", *((int *)(Parameters[2]))); + fprintf(stderr, "\t\tOFFSET: %d\n", *((int *)(Parameters[3]))); + */ + switch (Runtime) { #ifdef HAS_LIBCUDART case RUNTIME_CUDA: @@ -1827,7 +1839,9 @@ dump_function(); #ifdef HAS_LIBCUDART - return mallocManagedCUDA(size); + void *mem = mallocManagedCUDA(size); + // fprintf(stderr, "\tMallocManaged: %zu | %p \n", size, mem); + return mem; #else fprintf(stderr, "No CUDA Runtime. Managed memory only supported by CUDA\n"); exit(-1);