Diff 100865

include/polly/ScopInfo.h

Show First 20 Lines • Show All 276 Lines • ▼ Show 20 Lines	public:
void setBasePtr(Value *BP) { BasePtr = BP; }		void setBasePtr(Value *BP) { BasePtr = BP; }

/// Return the base pointer.		/// Return the base pointer.
Value *getBasePtr() const { return BasePtr; }		Value *getBasePtr() const { return BasePtr; }

/// For indirect accesses return the origin SAI of the BP, else null.		/// For indirect accesses return the origin SAI of the BP, else null.
const ScopArrayInfo *getBasePtrOriginSAI() const { return BasePtrOriginSAI; }		const ScopArrayInfo *getBasePtrOriginSAI() const { return BasePtrOriginSAI; }

		/// Return whether the ScopArrayInfo models a Fortran array.
		bool isFortranArray() const { return FAD != nullptr; }

/// The set of derived indirect SAIs for this origin SAI.		/// The set of derived indirect SAIs for this origin SAI.
const SmallSetVector<ScopArrayInfo *, 2> &getDerivedSAIs() const {		const SmallSetVector<ScopArrayInfo *, 2> &getDerivedSAIs() const {
return DerivedSAIs;		return DerivedSAIs;
}		}

/// Return the number of dimensions.		/// Return the number of dimensions.
unsigned getNumberOfDimensions() const {		unsigned getNumberOfDimensions() const {
if (Kind == MemoryKind::PHI \|\| Kind == MemoryKind::ExitPHI \|\|		if (Kind == MemoryKind::PHI \|\| Kind == MemoryKind::ExitPHI \|\|
▲ Show 20 Lines • Show All 2,433 Lines • ▼ Show 20 Lines	public:
/// >0 for other loops in the SCoP		/// >0 for other loops in the SCoP
/// -1 if @p L is nullptr or there is no outermost loop in the SCoP		/// -1 if @p L is nullptr or there is no outermost loop in the SCoP
int getRelativeLoopDepth(const Loop *L) const;		int getRelativeLoopDepth(const Loop *L) const;

/// Find the ScopArrayInfo associated with an isl Id		/// Find the ScopArrayInfo associated with an isl Id
/// that has name @p Name.		/// that has name @p Name.
ScopArrayInfo *getArrayInfoByName(const std::string BaseName);		ScopArrayInfo *getArrayInfoByName(const std::string BaseName);

		// Return whether this Scop contains a Fortran array.
		bool hasFortranArrays() const {
		for (auto &S : *this) {
		for (auto MemAcc : S) {
		if (MemAcc->getLatestScopArrayInfo()->isFortranArray())
		return true;
		}
		}
		return false;
		}
/// Check whether @p Schedule contains extension nodes.		/// Check whether @p Schedule contains extension nodes.
///		///
/// @return true if @p Schedule contains extension nodes.		/// @return true if @p Schedule contains extension nodes.
static bool containsExtensionNode(__isl_keep isl_schedule *Schedule);		static bool containsExtensionNode(__isl_keep isl_schedule *Schedule);

/// Simplify the SCoP representation.		/// Simplify the SCoP representation.
///		///
/// @param AfterHoisting Whether it is called after invariant load hoisting.		/// @param AfterHoisting Whether it is called after invariant load hoisting.
▲ Show 20 Lines • Show All 141 Lines • Show Last 20 Lines

lib/Analysis/ScopInfo.cpp

	Show First 20 Lines • Show All 4,639 Lines • ▼ Show 20 Lines
	//			//
	// @param USet A union set describing the elements for which to generate a			// @param USet A union set describing the elements for which to generate a
	// mapping.			// mapping.
	// @param N The dimension to map to.			// @param N The dimension to map to.
	// @returns A mapping from USet to its N-th dimension.			// @returns A mapping from USet to its N-th dimension.
	static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {			static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
	assert(N >= 0);			assert(N >= 0);
	assert(USet);			assert(USet);
	assert(!USet.is_empty());			assert(!bool(USet.is_empty()));

	auto Result = isl::union_pw_multi_aff::empty(USet.get_space());			auto Result = isl::union_pw_multi_aff::empty(USet.get_space());

	auto Lambda = [&Result, N](isl::set S) -> isl::stat {			auto Lambda = [&Result, N](isl::set S) -> isl::stat {
	int Dim = S.dim(isl::dim::set);			int Dim = S.dim(isl::dim::set);
	auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,			auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
	N, Dim - N);			N, Dim - N);
	if (N > 1)			if (N > 1)
	▲ Show 20 Lines • Show All 399 Lines • Show Last 20 Lines

lib/CodeGen/IslNodeBuilder.cpp

Show First 20 Lines • Show All 1,430 Lines • ▼ Show 20 Lines	bool IslNodeBuilder::preloadInvariantLoads() {

return true;		return true;
}		}

void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {		void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
// Materialize values for the parameters of the SCoP.		// Materialize values for the parameters of the SCoP.
materializeParameters();		materializeParameters();

// materialize the outermost dimension parameters for a Fortran array.		// Materialize the outermost dimension parameters for a Fortran array.
// NOTE: materializeParameters() does not work since it looks through		// NOTE: materializeParameters() does not work since it looks through
// the SCEVs. We don't have a corresponding SCEV for the array size		// the SCEVs. We don't have a corresponding SCEV for the array size
// parameter		// parameter
materializeFortranArrayOutermostDimension();		materializeFortranArrayOutermostDimension();

// Generate values for the current loop iteration for all surrounding loops.		// Generate values for the current loop iteration for all surrounding loops.
//		//
// We may also reference loops outside of the scop which do not contain the		// We may also reference loops outside of the scop which do not contain the
▲ Show 20 Lines • Show All 70 Lines • Show Last 20 Lines

lib/CodeGen/PPCGCodeGeneration.cpp

Show First 20 Lines • Show All 121 Lines • ▼ Show 20 Lines	static __isl_give isl_id_to_ast_expr *pollyBuildAstExprForStmt(

if (!Stmt \|\| !Build)		if (!Stmt \|\| !Build)
return NULL;		return NULL;

Ctx = isl_ast_build_get_ctx(Build);		Ctx = isl_ast_build_get_ctx(Build);
isl_id_to_ast_expr *RefToExpr = isl_id_to_ast_expr_alloc(Ctx, 0);		isl_id_to_ast_expr *RefToExpr = isl_id_to_ast_expr_alloc(Ctx, 0);

for (MemoryAccess Acc : Stmt) {		for (MemoryAccess Acc : Stmt) {
isl_map *AddrFunc = Acc->getAddressFunction();		isl_map *AddrFunc = nullptr;

		if (Acc->isAffine()) {
		AddrFunc = Acc->getAddressFunction();
AddrFunc = isl_map_intersect_domain(AddrFunc, Stmt->getDomain());		AddrFunc = isl_map_intersect_domain(AddrFunc, Stmt->getDomain());

		} else {
		MeinersburUnsubmitted Not Done Reply Inline Actions Please explain why this is necessary. Meinersbur: Please explain why this is necessary.
		bolluAuthorUnsubmitted Not Done Reply Inline Actions `getAddressFunction()` calls `getAddressRelation()` and then `lexmin`s it. This will not work in case of a Fortran array because it's unbounded in the outermost dimension. So, we do the bounding manually. bollu: `getAddressFunction()` calls `getAddressRelation()` and then `lexmin`s it. This will not work…
		MeinersburUnsubmitted Not Done Reply Inline Actions Wouldn't it a better approach to apply the domain constraints to the access relation in general? We could do this in `getAccessFunction()` itself, the intersection is done in both acceses. Second, I am not sure this does what you intent to do. For non-affine access indices, this will just give you an expression to the first element of the array, but I think you will want to access other elements as well. What you may want is a case distinction: The access is affine: Use base pts and access relation/function The access is non-affine: Use only the base ptr, use the original index expression. Meinersbur: Wouldn't it a better approach to apply the domain constraints to the access relation in general?
		bolluAuthorUnsubmitted Not Done Reply Inline Actions how do I know whether the access is affine or not? Do we register this information somewhere? bollu: how do I know whether the access is affine or not? Do we register this information somewhere?
		bolluAuthorUnsubmitted Not Done Reply Inline Actions found `isAffine`. However, in `IslNodeBuilder.cpp`, the following code exists: assert(MA->isAffine() && "Only affine memory accesses can be code generated"); so, I'm not sure how you wish to extract a `pw_aff` from the original expression, because in a non-affine case it would just be a relation to everything in the output, correct? sample-relation.isl [p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size] -> { Stmt_9[i0] -> MemRef0[o0] } bollu: found `isAffine`. However, in `IslNodeBuilder.cpp`, the following code exists: ``` assert…
		MeinersburUnsubmitted Not Done Reply Inline Actions The assertion should only apply if `setNewAccessRelation()` is used. Otherwise it just tries to reproduce the old pointer, not generating it from the access relation. Btw: `setNewAccessRelation()` is already used by `Scop::canonicalizeDynamicBasePtrs()`. This effectively means we cannot do invariant load hoisting on base pointers of non-affine accesses. As discussed in D28518. I'm not sure how you wish to extract a pw_aff from the original expression, because in a non-affine case it would just be a relation to everything in the output, correct? Yes, and we only need the maximum extent of all accesses. In you example, Say the domain is { Stmt_9[i] : 0 <= i < 10 } The extend without bound information would be: `{ MemRef0[i] }` since it can access anything, even with bounded domain. Given the array size information, we can intersect the exent with the maximum in-bounds elements, which will be { MemRef0[i] : 0 <= i < N } Meinersbur: The assertion should only apply if `setNewAccessRelation()` is used. Otherwise it just tries to…
		MeinersburUnsubmitted Not Done Reply Inline Actions `N` would be your `MemRef0_fortranarr_size`. Meinersbur: `N` would be your `MemRef0_fortranarr_size`.
		bolluAuthorUnsubmitted Not Done Reply Inline Actions Perhaps I am mis-understanding what the function does. Isn't `pollyBuildAstExprForStmt` supposed to return a map from the ID of the memory access to a `pw_aff` expression which represents the access index? If our _access_ is not affine, something like `for(i) { A[i i] };`, we do not store the exact non-affine access, nor can this be represented as `pw_aff`. So, how do we create an `isl_ast_expr Access` corresponding to this access? Yes, and we only need the maximum extent of all accesses. In you example, Say the domain is { Stmt_9[i] : 0 <= i < 10 } The extend without bound information would be: { MemRef0[i] } since it can access anything, even with bounded domain. Given the array size information, we can intersect the exent with the maximum in-bounds elements, which will be { MemRef0[i] : 0 <= i < N } I don't understand, how does this relate to the creation of the `AddrFunc`? bollu: Perhaps I am mis-understanding what the function does. 1. Isn't `pollyBuildAstExprForStmt`…
		errs() << "@@Access: ";
		Acc->dump();
		llvm_unreachable("Cannot codegen for GPU backend with non-affine access");
		return nullptr;
		}

		assert(AddrFunc && "expected AddrFunc to be initialized.");
isl_id *RefId = Acc->getId();		isl_id *RefId = Acc->getId();
isl_pw_multi_aff *PMA = isl_pw_multi_aff_from_map(AddrFunc);		isl_pw_multi_aff *PMA = isl_pw_multi_aff_from_map(AddrFunc);
isl_multi_pw_aff *MPA = isl_multi_pw_aff_from_pw_multi_aff(PMA);		isl_multi_pw_aff *MPA = isl_multi_pw_aff_from_pw_multi_aff(PMA);
MPA = isl_multi_pw_aff_coalesce(MPA);		MPA = isl_multi_pw_aff_coalesce(MPA);
MPA = FunctionIndex(MPA, RefId, UserIndex);		MPA = FunctionIndex(MPA, RefId, UserIndex);
isl_ast_expr *Access = isl_ast_build_access_from_multi_pw_aff(Build, MPA);		isl_ast_expr *Access = isl_ast_build_access_from_multi_pw_aff(Build, MPA);
Access = FunctionExpr(Access, RefId, UserExpr);		Access = FunctionExpr(Access, RefId, UserExpr);
RefToExpr = isl_id_to_ast_expr_set(RefToExpr, RefId, Access);		RefToExpr = isl_id_to_ast_expr_set(RefToExpr, RefId, Access);
}		}

return RefToExpr;		return RefToExpr;
}		}

/// Given a LLVM Type, compute its size in bytes,		/// Given a LLVM Type, compute its size in bytes,
static int computeSizeInBytes(const Type *T) {		static int computeSizeInBytes(const Type *T) {
int bytes = T->getPrimitiveSizeInBits() / 8;		int bytes = T->getPrimitiveSizeInBits() / 8;
if (bytes == 0)		if (bytes == 0)
bytes = T->getScalarSizeInBits() / 8;		bytes = T->getScalarSizeInBits() / 8;
▲ Show 20 Lines • Show All 1,979 Lines • ▼ Show 20 Lines	if (isl_union_set_is_empty(AccessUSet)) {
return isl_set_empty(Array->getSpace());		return isl_set_empty(Array->getSpace());
}		}

if (Array->getNumberOfDimensions() == 0) {		if (Array->getNumberOfDimensions() == 0) {
isl_union_set_free(AccessUSet);		isl_union_set_free(AccessUSet);
return isl_set_universe(Array->getSpace());		return isl_set_universe(Array->getSpace());
}		}

		isl_set *Extent = isl_set_universe(Array->getSpace());
isl_set *AccessSet =		isl_set *AccessSet =
isl_union_set_extract_set(AccessUSet, Array->getSpace());		isl_union_set_extract_set(AccessUSet, Array->getSpace());

isl_union_set_free(AccessUSet);		isl_union_set_free(AccessUSet);
isl_local_space *LS = isl_local_space_from_space(Array->getSpace());		isl_local_space *LS = isl_local_space_from_space(Array->getSpace());

isl_pw_aff *Val =		isl_pw_aff *Val =
isl_pw_aff_from_aff(isl_aff_var_on_domain(LS, isl_dim_set, 0));		isl_pw_aff_from_aff(isl_aff_var_on_domain(LS, isl_dim_set, 0));

		// In case the array is a Fortran array, we allow unbounded
		// outermost dimensions, since we can load the outermost dimension
		// information at runtime.
		if (Array->isFortranArray() &&
		!isl_set_dim_is_bounded(AccessSet, isl_dim_set, 0)) {
		isl_set_free(AccessSet);
		isl_pw_aff_free(Val);
		} else {
		MeinersburUnsubmitted Not Done Reply Inline Actions I'd first try to derive the extent from the accesses and only if that fails, and only if that doesn't work, derive the maximal possible extend from the outermost array dimension. That should also work if it is not Fortran array, but whenever all dimension sizes are defined. Meinersbur: I'd first try to derive the extent from the accesses and only if that fails, and only if that…
		bolluAuthorUnsubmitted Not Done Reply Inline Actions Hm, so you wish to derive extents from the accesses for Fortran arrays as well? I'm not sure what you're saying, could you please clarify? bollu: Hm, so you wish to derive extents from the accesses for Fortran arrays as well? I'm not sure…
		MeinersburUnsubmitted Not Done Reply Inline Actions Let's say you have an access for (int i = 5; i < 10; i++) A[i] For this access you need the elements `A[5..9]` to be transferred, but not `A[0..4]`. The older preexisting code would get you the range `A[5..9]`. Your Fortran code would get you the range `A[0..N]`, which includes some unnecessary elements. If you can compute the extent the former way, there is no reason to use the more pessimistic approach to transfer the whole array. You could even compute the extent which might be unbounded in some dimensions, and then interstect with the array dimensions if those are known. Meinersbur: Let's say you have an access ``` for (int i = 5; i < 10; i++) A[i] ``` For this access you…
		bolluAuthorUnsubmitted Not Done Reply Inline Actions Ah, I see. OK, I get it now. bollu: Ah, I see. OK, I get it now.

isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0);		isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0);
isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0);		isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0);
OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in,		OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in,
isl_pw_aff_dim(Val, isl_dim_in));		isl_pw_aff_dim(Val, isl_dim_in));
OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in,		OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in,
isl_pw_aff_dim(Val, isl_dim_in));		isl_pw_aff_dim(Val, isl_dim_in));
OuterMin =		OuterMin =
isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId());		isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId());
OuterMax =		OuterMax =
isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId());		isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId());

isl_set *Extent = isl_set_universe(Array->getSpace());

Extent = isl_set_intersect(		Extent = isl_set_intersect(
Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val)));		Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val)));
Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val));		Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val));
		}

		int StartLowerBoundDim = Array->isFortranArray() ? 0 : 1;
		MeinersburUnsubmitted Not Done Reply Inline Actions Please no const of local variables. I know you like this, but if applied consistently nearly every local variable must be const, adding a lot of syntactic clutter. Meinersbur: Please no const of local variables. I know you like this, but if applied consistently nearly…

for (unsigned i = 1; i < NumDims; ++i)		for (unsigned i = StartLowerBoundDim; i < NumDims; ++i)
Extent = isl_set_lower_bound_si(Extent, isl_dim_set, i, 0);		Extent = isl_set_lower_bound_si(Extent, isl_dim_set, i, 0);

for (unsigned i = 0; i < NumDims; ++i) {		for (unsigned i = 0; i < NumDims; ++i) {
isl_pw_aff *PwAff =		isl_pw_aff *PwAff =
const_cast<isl_pw_aff *>(Array->getDimensionSizePw(i));		const_cast<isl_pw_aff *>(Array->getDimensionSizePw(i));

// isl_pw_aff can be NULL for zero dimension. Only in the case of a		// isl_pw_aff can be NULL for zero dimension. Only in the case of a
// Fortran array will we have a legitimate dimension.		// Fortran array will we have a legitimate dimension.
▲ Show 20 Lines • Show All 247 Lines • ▼ Show 20 Lines	public:
//		//
// 1) Compute new schedule for the program.		// 1) Compute new schedule for the program.
// 2) Map schedule to GPU (TODO)		// 2) Map schedule to GPU (TODO)
// 3) Generate code for new schedule (TODO)		// 3) Generate code for new schedule (TODO)
//		//
// We do not use here the Polly ScheduleOptimizer, as the schedule optimizer		// We do not use here the Polly ScheduleOptimizer, as the schedule optimizer
// is mostly CPU specific. Instead, we use PPCG's GPU code generation		// is mostly CPU specific. Instead, we use PPCG's GPU code generation
// strategy directly from this pass.		// strategy directly from this pass.
gpu_gen generateGPU(ppcg_scop PPCGScop, gpu_prog *PPCGProg) {		gpu_gen generateGPU(bool HasFortranArrays, ppcg_scop PPCGScop,
		gpu_prog *PPCGProg) {
		MeinersburUnsubmitted Not Done Reply Inline Actions Is the coding style in this file supposed to be different than in the rest of Polly? (LLVM coding style says parameter names start with capital letter) Meinersbur: Is the coding style in this file supposed to be different than in the rest of Polly? (LLVM…

auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen);		auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen);

PPCGGen->ctx = S->getIslCtx();		PPCGGen->ctx = S->getIslCtx();
PPCGGen->options = PPCGScop->options;		PPCGGen->options = PPCGScop->options;
PPCGGen->print = nullptr;		PPCGGen->print = nullptr;
PPCGGen->print_user = nullptr;		PPCGGen->print_user = nullptr;
PPCGGen->build_ast_expr = &pollyBuildAstExprForStmt;		PPCGGen->build_ast_expr = &pollyBuildAstExprForStmt;
Show All 9 Lines	gpu_gen generateGPU(bool HasFortranArrays, ppcg_scop PPCGScop,
isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true);		isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true);
isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true);		isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true);
isl_options_set_schedule_whole_component(PPCGGen->ctx, false);		isl_options_set_schedule_whole_component(PPCGGen->ctx, false);

isl_schedule *Schedule = get_schedule(PPCGGen);		isl_schedule *Schedule = get_schedule(PPCGGen);

int has_permutable = has_any_permutable_node(Schedule);		int has_permutable = has_any_permutable_node(Schedule);

if (!has_permutable \|\| has_permutable < 0) {		// TODO: I've simply allowed this to test out the codegen, is this a bad
		// idea?
		if ((!has_permutable \|\| has_permutable < 0) && !HasFortranArrays) {
Schedule = isl_schedule_free(Schedule);		Schedule = isl_schedule_free(Schedule);
} else {		} else {
Schedule = map_to_device(PPCGGen, Schedule);		Schedule = map_to_device(PPCGGen, Schedule);
PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule));		PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule));
}		}

if (DumpSchedule) {		if (DumpSchedule) {
isl_printer *P = isl_printer_to_str(S->getIslCtx());		isl_printer *P = isl_printer_to_str(S->getIslCtx());
▲ Show 20 Lines • Show All 216 Lines • ▼ Show 20 Lines	bool runOnScop(Scop &CurrentScop) override {
RI = &getAnalysis<RegionInfoPass>().getRegionInfo();		RI = &getAnalysis<RegionInfoPass>().getRegionInfo();

// We currently do not support scops with invariant loads.		// We currently do not support scops with invariant loads.
if (S->hasInvariantAccesses())		if (S->hasInvariantAccesses())
return false;		return false;

auto PPCGScop = createPPCGScop();		auto PPCGScop = createPPCGScop();
auto PPCGProg = createPPCGProg(PPCGScop);		auto PPCGProg = createPPCGProg(PPCGScop);
auto PPCGGen = generateGPU(PPCGScop, PPCGProg);		auto PPCGGen =
		generateGPU(CurrentScop.hasFortranArrays(), PPCGScop, PPCGProg);

if (PPCGGen->tree)		if (PPCGGen->tree)
generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);		generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);

freeOptions(PPCGScop);		freeOptions(PPCGScop);
freePPCGGen(PPCGGen);		freePPCGGen(PPCGGen);
gpu_prog_free(PPCGProg);		gpu_prog_free(PPCGProg);
ppcg_scop_free(PPCGScop);		ppcg_scop_free(PPCGScop);
▲ Show 20 Lines • Show All 50 Lines • Show Last 20 Lines

test/GPGPU/fortran-copy-kernel-affine.ll

This file was added.

				; RUN: opt -analyze %loadPolly -polly-allow-nonaffine -polly-ignore-aliasing -polly-scops -polly-detect-fortran-arrays \
				; RUN: < %s \| FileCheck %s -check-prefix=SCOPS
				MeinersburUnsubmitted Not Done Reply Inline Actions You removed [WIP]. but TODO is still there. Meinersbur: You removed [WIP]. but TODO is still there.

				; RUN: opt %loadPolly -polly-allow-nonaffine -polly-ignore-aliasing -polly-detect-fortran-arrays -polly-codegen-ppcg -polly-acc-dump-code \
				; RUN: -disable-output < %s \| FileCheck -check-prefix=CODE %s

				; REQUIRES: pollyacc

				; Check that Fortran arrays are detected.
				; SCOPS: ReadAccess := [Reduction Type: NONE] [Fortran array descriptor: xs] [Scalar: 0]
				; SCOPS-NEXT: [tmp11, tmp7, p_2, tmp1, p_4, MemRef_tmp10_fortranarr_size, MemRef_tmp5_fortranarr_size] -> { Stmt_9[i0] -> MemRef_tmp10[1 + i0, -p_2] };
				; SCOPS-NEXT: MustWriteAccess := [Reduction Type: NONE] [Fortran array descriptor: ys] [Scalar: 0]
				; SCOPS-NEXT: [tmp11, tmp7, p_2, tmp1, p_4, MemRef_tmp10_fortranarr_size, MemRef_tmp5_fortranarr_size] -> { Stmt_9[i0] -> MemRef_tmp5[1 + i0, -p_4] };

				MeinersburUnsubmitted Not Done Reply Inline Actions Fortran Meinersbur: Fortran
				; Check that we generate CUDA calls
				; CODE: Code
				; CODE-NEXT: ====
				; CODE-NEXT: # host
				; CODE-NEXT: if (tmp11 >= 1) {
				; CODE-NEXT: if (tmp7 >= 1 && MemRef_tmp10_fortranarr_size >= 2)
				; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_tmp10, MemRef_tmp10, (MemRef_tmp10_fortranarr_size >= tmp11 + 1 ? tmp11 + 1 : MemRef_tmp10_fortranarr_size) * (tmp7) * sizeof(i32), cudaMemcpyHostToDevice));
				; CODE-NEXT: if (tmp1 >= 1 && MemRef_tmp5_fortranarr_size >= 2)
				; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_tmp5, MemRef_tmp5, (MemRef_tmp5_fortranarr_size >= tmp11 + 1 ? tmp11 + 1 : MemRef_tmp5_fortranarr_size) * (tmp1) * sizeof(i32), cudaMemcpyHostToDevice));
				; CODE-NEXT: {
				; CODE-NEXT: dim3 k0_dimBlock(32);
				; CODE-NEXT: dim3 k0_dimGrid(tmp11 >= 1048546 ? 32768 : floord(tmp11 + 31, 32));
				; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_tmp10, dev_MemRef_tmp5, tmp11, tmp7, p_2, tmp1, p_4, MemRef_tmp10_fortranarr_size, MemRef_tmp5_fortranarr_size);
				; CODE-NEXT: cudaCheckKernel();
				; CODE-NEXT: }

				; CODE: if (tmp1 >= 1 && MemRef_tmp5_fortranarr_size >= 2)
				; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_tmp5, dev_MemRef_tmp5, (MemRef_tmp5_fortranarr_size >= tmp11 + 1 ? tmp11 + 1 : MemRef_tmp5_fortranarr_size) * (tmp1) * sizeof(i32), cudaMemcpyDeviceToHost));
				; CODE-NEXT: }
				; This is not the exact code that is generated from dragonegg. A non-affine access that is
				; created due to error checking is manually made linear.
				;
				; PROGRAM main
				; INTEGER, DIMENSION(100) :: xs
				; INTEGER, DIMENSION(100) :: ys
				;
				; DO i = 1, 100
				; xs (i) = i
				; ys(i) = 0
				; END DO
				;
				; CALL copy(xs, ys, 10)
				;
				; PRINT *, ys
				; CONTAINS
				; SUBROUTINE copy(xs, ys, n)
				; IMPLICIT NONE
				; INTEGER, DIMENSION(:), INTENT(INOUT) :: xs, ys
				; INTEGER, INTENT(IN) :: n
				; INTEGER :: i
				;
				; DO i = 1, n
				; ys(i) = xs(i)
				; END DO
				;
				; END SUBROUTINE copy
				; END PROGRAM


				; ModuleID = 'test/GPGPU/fortran-copy-kernel-affine.ll'
				source_filename = "test/GPGPU/fortran-copy-kernel-affine.ll"
				target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
				target triple = "x86_64-unknown-linux-gnu"

				module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22"

				%"struct.array1_integer(kind=4)" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] }
				%struct.descriptor_dimension = type { i64, i64, i64 }
				%"struct.array1_integer(kind=4).0" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] }

				@.cst = private constant [12 x i8] c"program.f90\00", align 8
				@0 = internal constant i32 10
				@options.14.1603 = internal constant [8 x i32] [i32 68, i32 511, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1], align 32

				define internal void @copy.1550(%"struct.array1_integer(kind=4)"* noalias %xs, %"struct.array1_integer(kind=4).0"* noalias %ys, i32* noalias %n) {
				entry:
				br label %entry.split

				entry.split: ; preds = %entry
				%tmp = getelementptr inbounds %"struct.array1_integer(kind=4).0", %"struct.array1_integer(kind=4).0"* %ys, i64 0, i32 3, i64 0, i32 0
				%tmp1 = load i64, i64* %tmp, align 8
				%tmp2 = icmp eq i64 %tmp1, 0
				%tmp3 = select i1 %tmp2, i64 1, i64 %tmp1
				%tmp4 = bitcast %"struct.array1_integer(kind=4).0"* %ys to i32**
				%tmp5 = load i32, i32* %tmp4, align 8
				%tmp6 = getelementptr inbounds %"struct.array1_integer(kind=4)", %"struct.array1_integer(kind=4)"* %xs, i64 0, i32 3, i64 0, i32 0
				%tmp7 = load i64, i64* %tmp6, align 8
				%tmp8 = icmp eq i64 %tmp7, 0
				%. = select i1 %tmp8, i64 1, i64 %tmp7
				%tmp9 = bitcast %"struct.array1_integer(kind=4)"* %xs to i32**
				%tmp10 = load i32, i32* %tmp9, align 8
				%tmp11 = load i32, i32* %n, align 4
				%tmp12 = icmp sgt i32 %tmp11, 0
				br i1 %tmp12, label %"9.preheader", label %return

				"9.preheader": ; preds = %entry.split
				br label %"9"

				"9": ; preds = %"9", %"9.preheader"
				%tmp13 = phi i32 [ %tmp24, %"9" ], [ 1, %"9.preheader" ]
				%tmp14 = sext i32 %tmp13 to i64
				; replace %tmp3 with % tmp1
				; %tmp15 = mul i64 %tmp3, %tmp14
				%tmp15 = mul i64 %tmp1, %tmp14

				%tmp16 = sub i64 %tmp15, %tmp3
				%tmp17 = sext i32 %tmp13 to i64
				; replace %. with %tmp7
				; %tmp18 = mul i64 %., %tmp17
				%tmp18 = mul i64 %tmp7, %tmp17

				%tmp19 = sub i64 %tmp18, %.
				%tmp20 = getelementptr i32, i32* %tmp10, i64 %tmp19
				%tmp21 = load i32, i32* %tmp20, align 4
				%tmp22 = getelementptr i32, i32* %tmp5, i64 %tmp16
				store i32 %tmp21, i32* %tmp22, align 4
				%tmp23 = icmp eq i32 %tmp13, %tmp11
				%tmp24 = add i32 %tmp13, 1
				br i1 %tmp23, label %return.loopexit, label %"9"

				return.loopexit: ; preds = %"9"
				br label %return

				return: ; preds = %return.loopexit, %entry.split
				ret void
				}

				declare void @_gfortran_set_args(i32, i8**)

				declare void @_gfortran_set_options(i32, i32*)

test/Isl/CodeGen/fortran_array_runtime_size_generation.ll

	; Check that the runtime size computation is generated for Fortran arrays.			; Check that the runtime size computation is generated for Fortran arrays.

				; REQUIRES=pollyacc

				; PPCG code generation backend:
				; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \
				; RUN: -polly-target=gpu -polly-acc-mincompute=0 \
				; RUN: -polly-codegen-ppcg < %s \| FileCheck %s

	; Regular code generation backend:			; Regular code generation backend:
	; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \			; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \
	; RUN: -polly-codegen < %s \| FileCheck %s			; RUN: -polly-codegen < %s \| FileCheck %s

	; What the input fortran code should look like. NOTE: this is fake, the			; What the input fortran code should look like. NOTE: this is fake, the
	; .ll file was hand-written.			; .ll file was hand-written.
	;			;
	; MODULE testmod			; MODULE testmod
	▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[Polly] [Fortran Support] Generate GPU kernels for Fortran arrays
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 100865

include/polly/ScopInfo.h

lib/Analysis/ScopInfo.cpp

lib/CodeGen/IslNodeBuilder.cpp

lib/CodeGen/PPCGCodeGeneration.cpp

test/GPGPU/fortran-copy-kernel-affine.ll

test/Isl/CodeGen/fortran_array_runtime_size_generation.ll

This is an archive of the discontinued LLVM Phabricator instance.

[Polly] [Fortran Support] Generate GPU kernels for Fortran arraysNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 100865

include/polly/ScopInfo.h

lib/Analysis/ScopInfo.cpp

lib/CodeGen/IslNodeBuilder.cpp

lib/CodeGen/PPCGCodeGeneration.cpp

test/GPGPU/fortran-copy-kernel-affine.ll

test/Isl/CodeGen/fortran_array_runtime_size_generation.ll

[Polly] [Fortran Support] Generate GPU kernels for Fortran arrays
Needs ReviewPublic