Index: include/polly/ScopBuilder.h =================================================================== --- include/polly/ScopBuilder.h +++ include/polly/ScopBuilder.h @@ -54,6 +54,33 @@ // The Scop std::unique_ptr scop; + // Methods for pattern matching against Fortran code generated by dragonegg. + // @{ + + /// Try to pattern match and find the array descriptor structure in case of a + /// fortran array accesss. succeeds on load/store into a fortran array that + /// has been allocated. + /// + /// @see polly::FortranArrayDescriptor + /// + /// @param Inst The load/store instruction that access the memory. + /// + /// @note assumes -polly-canonicalize has been run. + GlobalValue *findFortranArrayDescriptorForAllocArrayAccess(MemAccInst Inst); + + /// Try to pattern match and find the array descriptor structure in case of a + /// fortran array accesss. succeeds on load/store into a fortran array that + /// has been allocated. + /// + /// @see polly::FortranArrayDescriptor + /// + /// @param Inst The load/store instruction that access the memory. + /// + /// @note assumes -polly-canonicalize has been run. + GlobalValue * + findFortranArrayDescriptorForNonAllocArrayAccess(MemAccInst Inst); + // @} + // Build the SCoP for Region @p R. void buildScop(Region &R, AssumptionCache &AC); Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -602,6 +602,13 @@ /// Updated access relation read from JSCOP file. isl_map *NewAccessRelation; + + /// Fortran arrays that are created using "Allocate" are stored in terms + /// of a descriptor struct. This maintains a raw pointer to the memory, + /// along with auxiliary fields with information such as dimensions. + /// We hold a reference to the descriptor corresponding to a MemoryAccess + /// into a Fortran array. FAD for "Fortran Array Descriptor" + AssertingVH FAD; // @} __isl_give isl_basic_map *createBasicAccessMap(ScopStmt *Statement); @@ -1006,6 +1013,10 @@ /// Get the reduction type of this access ReductionType getReductionType() const { return RedType; } + /// Set the array descriptor corresponding to the Array on which the + /// memory access is performed. + void setFortranArrayDescriptor(GlobalValue *FAD); + /// Update the original access relation. /// /// We need to update the original access relation during scop construction, Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -113,6 +113,159 @@ } } +/// This is matching against code generated by dragonegg after simplifier +/// passes have been run. +/// +/// This is trying to match against "@globaldescriptor", the descriptor +/// of the Fortran array that is being accessed at load/store. This style +/// of code is generated for arrays that have been allocated using "Allocate" +/// in the same module +/// +/// Pattern Match: +/// 1. %mallocmem = i8* @malloc(i64 40) +/// +/// 5. store i8* %mallocmem, i8** getelementptr inbounds +/// (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* +/// @globaldescriptor, i64 0, i32 0), align 32 +/// +/// 2. %typedmem = bitcast i8* %mallocmem to * +/// +/// 3 is optional because if you are writing to the 0th index, you don't +// need a GEP. +/// 3. [%slot = getelementptr inbounds i8, i8* %typedmem, i64 ] +/// +/// 4.1 store/load , * %typedmem, align 8 +/// 4.2 store/load , * %slot, align 8 +GlobalValue * +ScopBuilder::findFortranArrayDescriptorForAllocArrayAccess(MemAccInst Inst) { + // match: 4.1 & 4.2 store/load + if (!isa(Inst) && !isa(Inst)) + return nullptr; + + // match: 4 + if (Inst.getAlignment() != 8) + return nullptr; + + Value *Address = Inst.getPointerOperand(); + + const BitCastInst *Bitcast = nullptr; + // [match: 3] + if (auto *Slot = dyn_cast(Address)) { + Value *TypedMem = Slot->getPointerOperand(); + // match: 2 + Bitcast = dyn_cast(TypedMem); + } else { + // match: 2 + Bitcast = dyn_cast(Address); + } + + if (!Bitcast) + return nullptr; + + auto *MallocMem = Bitcast->getOperand(0); + + // match: 1 + auto *MallocCall = dyn_cast(MallocMem); + if (!MallocCall) + return nullptr; + + Function *MallocFn = MallocCall->getCalledFunction(); + if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc")) + return nullptr; + + // Find all uses the malloc'd memory. + // We are looking for a "store" into a struct with the type being the Fortran + // descriptor type + for (auto user : MallocMem->users()) { + + /// match: 5 + auto *MallocStore = dyn_cast(user); + if (!MallocStore) + continue; + + auto *DescriptorGEP = + dyn_cast(MallocStore->getPointerOperand()); + if (!DescriptorGEP) + continue; + + // match: 5 + auto DescriptorType = + dyn_cast(DescriptorGEP->getSourceElementType()); + if (!(DescriptorType && DescriptorType->hasName())) + continue; + + // name does not match expected name + if (!DescriptorType->getName().startswith("struct.array")) + continue; + + GlobalValue *Descriptor = + dyn_cast(DescriptorGEP->getPointerOperand()); + + if (!Descriptor) + continue; + + return Descriptor; + } + + return nullptr; +} + +/// This is matching against code generated by dragonegg after simplifier +/// passes have been run. +/// +/// This is trying to match against "@globaldescriptor", the descriptor +/// of the Fortran array that is being accessed at load/store. This style +/// of code is generated for arrays that have been declared global, and +/// are being accessed across modules +/// +/// Pattern Match: +/// 1. %mem = load double*, double** bitcast (%"struct.array1_real(kind=8)"* +/// @globaldescriptor to double**), align 32 +/// +/// 2 is optional because if you are writing to the 0th index, you don't +/// need a GEP. +/// 2. [%slot = getelementptr inbounds i8, i8* %mem, i64 ] +/// +/// 3.1 store/load , * %slot, align 8 +/// 3.2 store/load , * %mem, align 8 +GlobalValue * +ScopBuilder::findFortranArrayDescriptorForNonAllocArrayAccess(MemAccInst Inst) { + // match: 3 + if (!isa(Inst) && !isa(Inst)) + return nullptr; + + // match: 3 + if (Inst.getAlignment() != 8) + return nullptr; + + Value *Slot = Inst.getPointerOperand(); + + LoadInst *MemLoad = nullptr; + // [match: 2] + if (auto *SlotGEP = dyn_cast(Slot)) { + // match: 1 + MemLoad = dyn_cast(SlotGEP->getPointerOperand()); + } else { + // match: 1 + MemLoad = dyn_cast(Slot); + } + + if (!MemLoad) + return nullptr; + + auto *BitcastOperator = + dyn_cast(MemLoad->getPointerOperand()); + if (!BitcastOperator) + return nullptr; + + GlobalValue *Descriptor = + dyn_cast(BitcastOperator->getOperand(0)); + if (!Descriptor) + return nullptr; + + return Descriptor; +} + bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) { Value *Val = Inst.getValueOperand(); Type *ElementType = Val->getType(); @@ -532,9 +685,17 @@ Type *ElementType, bool IsAffine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue) { ArrayBasePointers.insert(BaseAddress); - addMemoryAccess(MemAccInst->getParent(), MemAccInst, AccType, BaseAddress, - ElementType, IsAffine, AccessValue, Subscripts, Sizes, - MemoryKind::Array); + auto *MemAccess = addMemoryAccess( + MemAccInst->getParent(), MemAccInst, AccType, BaseAddress, ElementType, + IsAffine, AccessValue, Subscripts, Sizes, MemoryKind::Array); + + // TODO: change to loop of function pointers? + if (GlobalValue *FAD = + findFortranArrayDescriptorForAllocArrayAccess(MemAccInst)) + MemAccess->setFortranArrayDescriptor(FAD); + else if (GlobalValue *FAD = + findFortranArrayDescriptorForNonAllocArrayAccess(MemAccInst)) + MemAccess->setFortranArrayDescriptor(FAD); } void ScopBuilder::ensureValueWrite(Instruction *Inst) { Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -974,7 +974,7 @@ Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine), Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), - NewAccessRelation(nullptr) { + NewAccessRelation(nullptr), FAD(nullptr) { static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"}; const std::string Access = TypeStrings[AccType] + utostr(Stmt->size()); @@ -986,7 +986,8 @@ __isl_take isl_map *AccRel) : Kind(MemoryKind::Array), AccType(AccType), RedType(RT_NONE), Statement(Stmt), InvalidDomain(nullptr), AccessInstruction(nullptr), - IsAffine(true), AccessRelation(nullptr), NewAccessRelation(AccRel) { + IsAffine(true), AccessRelation(nullptr), NewAccessRelation(AccRel), + FAD(nullptr) { auto *ArrayInfoId = isl_map_get_tuple_id(NewAccessRelation, isl_dim_out); auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId); Sizes.push_back(nullptr); @@ -1022,6 +1023,22 @@ return OS; } +void MemoryAccess::setFortranArrayDescriptor(GlobalValue *FAD) { + this->FAD = FAD; + +// TODO: write checks to make sure it looks _exactly_ like a Fortran array +// descriptor +#ifdef NDEBUG + StructType *ty = dyn_cast(Descriptor->getValueType()); + assert(ty && "expected value of type Fortran array descriptor"); + assert(ty->hasName() && ty->getName().startswith("struct.array") && + "expected global to follow Fortran array descriptor type naming " + "convention"); + assert(ty->getNumElements() == 4 && + "expected layout to be like Fortran array descriptor type"); +#endif +} + void MemoryAccess::print(raw_ostream &OS) const { switch (AccType) { case READ: @@ -1034,7 +1051,14 @@ OS.indent(12) << "MayWriteAccess :=\t"; break; } + OS << "[Reduction Type: " << getReductionType() << "] "; + + if (FAD) { + OS << "[Fortran array descriptor: " << FAD->getName(); + OS << "] "; + }; + OS << "[Scalar: " << isScalarKind() << "]\n"; OS.indent(16) << getOriginalAccessRelationStr() << ";\n"; if (hasNewAccessRelation()) Index: test/FortranDetection/global-malloc-nonvectored.ll =================================================================== --- /dev/null +++ test/FortranDetection/global-malloc-nonvectored.ll @@ -0,0 +1,143 @@ +; RUN: opt -S -analyze -polly-process-unprofitable -polly-remarks-minimal \ +; RUN: -polly-canonicalize -polly-scops -polly-dependences \ +; RUN: -debug-only=polly-dependence -polly-canonicalize -polly-allow-nonaffine \ +; RUN: -polly-ignore-aliasing -polly-invariant-load-hoisting \ +; RUN: < %s| FileCheck %s +; +; MODULE src_soil +; USE data_parameters, ONLY : & +; wp, & ! KIND-type parameter for real variables +; iintegers ! KIND-type parameter for standard integer variables +; IMPLICIT NONE +; REAL (KIND = wp), ALLOCATABLE, PRIVATE :: & +; xdzs (:) +; CONTAINS +; SUBROUTINE terra1(n) +; INTEGER, intent(in) :: n +; INTEGER (KIND=iintegers) :: & +; j +; Allocate(xdzs(n)); +; DO j = 2, n +; xdzs(j) = xdzs(j) * xdzs(j) + xdzs(j - 1) +; END DO +; END SUBROUTINE terra1 +; END MODULE src_soil + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +%"struct.array1_real(kind=8)" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%struct.descriptor_dimension = type { i64, i64, i64 } + +@__src_soil_MOD_xdzs = unnamed_addr global %"struct.array1_real(kind=8)" zeroinitializer, align 32 +@.cst = private unnamed_addr constant [67 x i8] c"Integer overflow when calculating the amount of memory to allocate\00", align 64 +@.cst1 = private unnamed_addr constant [37 x i8] c"Allocation would exceed memory limit\00", align 64 +@.cst2 = private unnamed_addr constant [93 x i8] c"At line 23 of file /home/siddhart/cosmo-self-installation/cosmo-pompa/cosmo/src/src_soil.f90\00", align 64 +@.cst3 = private unnamed_addr constant [55 x i8] c"Attempting to allocate already allocated variable '%s'\00", align 64 +@.cst4 = private unnamed_addr constant [5 x i8] c"xdzs\00", align 8 + +; Function Attrs: nounwind uwtable +define void @__src_soil_MOD_terra1(i32* noalias nocapture %n) unnamed_addr #0 { +entry: + store i64 537, i64* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 2), align 16, !tbaa !0 + store i64 1, i64* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 3, i64 0, i32 1), align 8, !tbaa !0 + %0 = load i32, i32* %n, align 4, !tbaa !3 + %1 = sext i32 %0 to i64 + store i64 %1, i64* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 3, i64 0, i32 2), align 8, !tbaa !0 + store i64 1, i64* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 3, i64 0, i32 0), align 8, !tbaa !0 + %2 = icmp slt i32 %0, 0 + %3 = select i1 %2, i64 0, i64 %1 + %4 = icmp eq i64 %3, 0 + br i1 %4, label %"16", label %"8" + +"8": ; preds = %entry + %5 = sdiv i64 9223372036854775807, %1 + %6 = icmp slt i64 %5, 1 + %7 = icmp slt i32 %0, 1 + %8 = shl nsw i64 %3, 3 + %.2 = select i1 %7, i64 0, i64 %8 + br i1 %6, label %"15", label %"16" + +"15": ; preds = %"8" + + unreachable + +"16": ; preds = %"8", %entry + %.24 = phi i64 [ %.2, %"8" ], [ 0, %entry ] + %9 = load i8*, i8** getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 0), align 32, !tbaa !5 + %10 = icmp eq i8* %9, null + br i1 %10, label %"17", label %"20" + +"17": ; preds = %"16" + %11 = icmp ne i64 %.24, 0 + %12 = select i1 %11, i64 %.24, i64 1 + %13 = tail call noalias i8* @malloc(i64 %12) #2 ;<= 1. malloc + %14 = icmp eq i8* %13, null + br i1 %14, label %"18", label %"19" + +"18": ; preds = %"17" + unreachable + +"19": ; preds = %"17" + store i8* %13, i8** getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 0), align 32, !tbaa !5 + store i64 -1, i64* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 1), align 8, !tbaa !0 + %15 = icmp sgt i32 %0, 1 + br i1 %15, label %"21.preheader", label %return + +"21.preheader": ; preds = %"19" + %16 = bitcast i8* %13 to double* ;<= 2. bitcast to double* + %17 = add i32 %0, 1 + br label %"21" + +"20": ; preds = %"16" + unreachable + +"21": ; preds = %"21", %"21.preheader" + %18 = phi double [ undef, %"21.preheader" ], [ %23, %"21" ] + %indvars.iv = phi i64 [ 2, %"21.preheader" ], [ %indvars.iv.next, %"21" ] + %19 = add nsw i64 %indvars.iv, -1 + %20 = getelementptr inbounds double, double* %16, i64 %19 ;<= 3. GEP + %21 = load double, double* %20, align 8, !tbaa !7 + %22 = fmul double %21, %21 + %23 = fadd double %22, %18 + store double %23, double* %20, align 8, !tbaa !7 ;<= 4. store + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %17 + br i1 %exitcond, label %return, label %"21" + +return: ; preds = %"21", %"19" + ret void +} + +; Function Attrs: noreturn +declare void @_gfortran_runtime_error(i8*, ...) #1 + +; Function Attrs: nounwind +declare noalias i8* @malloc(i64) #2 + +; Function Attrs: noreturn +declare void @_gfortran_os_error(i8*) #1 + +; Function Attrs: noreturn +declare void @_gfortran_runtime_error_at(i8*, i8*, ...) #1 + +attributes #0 = { nounwind uwtable } +attributes #1 = { noreturn } +attributes #2 = { nounwind } +attributes #3 = { noreturn nounwind } + +!0 = !{!1, !1, i64 0} +!1 = !{!"alias set 4: integer(kind=8)", !2} +!2 = distinct !{!2} +!3 = !{!4, !4, i64 0} +!4 = !{!"alias set 11: integer(kind=4)", !2} +!5 = !{!6, !6, i64 0} +!6 = !{!"alias set 3: void*", !2} +!7 = !{!8, !8, i64 0} +!8 = !{!"alias set 18: real(kind=8)", !2} + +; CHECK: ReadAccess := [Reduction Type: NONE] [Fortran array descriptor: __src_soil_MOD_xdzs] [Scalar: 0] +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Fortran array descriptor: __src_soil_MOD_xdzs] [Scalar: 0] Index: test/FortranDetection/global-nonmalloc-nonvectored.ll =================================================================== --- /dev/null +++ test/FortranDetection/global-nonmalloc-nonvectored.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -analyze -polly-process-unprofitable -polly-remarks-minimal \ +; RUN: -polly-canonicalize -polly-scops -polly-dependences \ +; RUN: -debug-only=polly-dependence -polly-canonicalize -polly-allow-nonaffine \ +; RUN: -polly-ignore-aliasing -polly-invariant-load-hoisting \ +; RUN: < %s| FileCheck %s +; +; MODULE src_soil +; USE data_parameters, ONLY : & +; wp, & ! KIND-type parameter for real variables +; iintegers ! KIND-type parameter for standard integer variables +; IMPLICIT NONE +; REAL (KIND = wp), ALLOCATABLE, PRIVATE :: & +; xdzs (:) +; CONTAINS +; +; SUBROUTINE terra1(n) +; INTEGER, intent(in) :: n +; +; INTEGER (KIND=iintegers) :: & +; j +; +; DO j = 22, n +; xdzs(j) = xdzs(j) * xdzs(j) + xdzs(j - 1) +; END DO +; END SUBROUTINE terra1 +; END MODULE src_soil + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +%"struct.array1_real(kind=8)" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%struct.descriptor_dimension = type { i64, i64, i64 } + +@__src_soil_MOD_xdzs = unnamed_addr global %"struct.array1_real(kind=8)" zeroinitializer, align 32 + +; Function Attrs: nounwind uwtable +define void @__src_soil_MOD_terra1(i32* noalias nocapture %n) unnamed_addr #0 { +entry: + %0 = load i32, i32* %n, align 4, !tbaa !0 + %1 = icmp sgt i32 %0, 21 + br i1 %1, label %"3.preheader", label %return + +"3.preheader": ; preds = %entry + %2 = load i64, i64* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 1), align 8, !tbaa !3 + %3 = load i8*, i8** getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @__src_soil_MOD_xdzs, i64 0, i32 0), align 32, !tbaa !5 + %4 = bitcast i8* %3 to double* + %5 = add i32 %0, 1 + br label %"3" + +"3": ; preds = %"3", %"3.preheader" + %indvars.iv = phi i64 [ 22, %"3.preheader" ], [ %indvars.iv.next, %"3" ] + %6 = add nsw i64 %indvars.iv, %2 + %7 = getelementptr inbounds double, double* %4, i64 %6 + %8 = load double, double* %7, align 8, !tbaa !7 + %9 = fmul double %8, %8 + %10 = add nsw i64 %indvars.iv, -1 + %11 = add nsw i64 %10, %2 + %12 = getelementptr inbounds double, double* %4, i64 %11 + %13 = load double, double* %12, align 8, !tbaa !7 + %14 = fadd double %9, %13 + store double %14, double* %7, align 8, !tbaa !7 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %5 + br i1 %exitcond, label %return, label %"3" + +return: ; preds = %"3", %entry + ret void +} + +attributes #0 = { nounwind uwtable } + +!0 = !{!1, !1, i64 0} +!1 = !{!"alias set 11: integer(kind=4)", !2} +!2 = distinct !{!2} +!3 = !{!4, !4, i64 0} +!4 = !{!"alias set 4: integer(kind=8)", !2} +!5 = !{!6, !6, i64 0} +!6 = !{!"alias set 3: void*", !2} +!7 = !{!8, !8, i64 0} +!8 = !{!"alias set 18: real(kind=8)", !2} + +; CHECK: ReadAccess := [Reduction Type: NONE] [Fortran array descriptor: __src_soil_MOD_xdzs] [Scalar: 0] +; CHECK: ReadAccess := [Reduction Type: NONE] [Fortran array descriptor: __src_soil_MOD_xdzs] [Scalar: 0] +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Fortran array descriptor: __src_soil_MOD_xdzs] [Scalar: 0]