Diff 48792

polly/trunk/include/polly/CodeGen/IslNodeBuilder.h

Show First 20 Lines • Show All 236 Lines • ▼ Show 20 Lines	protected:
/// This function will preload the representing load from @p IAClass and		/// This function will preload the representing load from @p IAClass and
/// map all members of @p IAClass to that preloaded value, potentially casted		/// map all members of @p IAClass to that preloaded value, potentially casted
/// to the required type.		/// to the required type.
///		///
/// @returns False, iff a problem occured and the load was not preloaded.		/// @returns False, iff a problem occured and the load was not preloaded.
bool preloadInvariantEquivClass(const InvariantEquivClassTy &IAClass);		bool preloadInvariantEquivClass(const InvariantEquivClassTy &IAClass);

void createForVector(__isl_take isl_ast_node *For, int VectorWidth);		void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
void createForSequential(__isl_take isl_ast_node *For);		void createForSequential(__isl_take isl_ast_node *For, bool KnownParallel);

/// Create LLVM-IR that executes a for node thread parallel.		/// Create LLVM-IR that executes a for node thread parallel.
///		///
/// @param For The FOR isl_ast_node for which code is generated.		/// @param For The FOR isl_ast_node for which code is generated.
void createForParallel(__isl_take isl_ast_node *For);		void createForParallel(__isl_take isl_ast_node *For);

/// @brief Create new access functions for modified memory accesses.		/// @brief Create new access functions for modified memory accesses.
///		///
▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines

polly/trunk/lib/CodeGen/IslAst.cpp

Show First 20 Lines • Show All 249 Lines • ▼ Show 20 Lines	astBuildAfterFor(__isl_take isl_ast_node Node, __isl_keep isl_ast_build Build,
assert(!Payload->Build && "Build environment already set");		assert(!Payload->Build && "Build environment already set");
Payload->Build = isl_ast_build_copy(Build);		Payload->Build = isl_ast_build_copy(Build);
Payload->IsInnermost = (Id == BuildInfo->LastForNodeId);		Payload->IsInnermost = (Id == BuildInfo->LastForNodeId);

// Innermost loops that are surrounded by parallel loops have not yet been		// Innermost loops that are surrounded by parallel loops have not yet been
// tested for parallelism. Test them here to ensure we check all innermost		// tested for parallelism. Test them here to ensure we check all innermost
// loops for parallelism.		// loops for parallelism.
if (Payload->IsInnermost && BuildInfo->InParallelFor) {		if (Payload->IsInnermost && BuildInfo->InParallelFor) {
if (Payload->IsOutermostParallel)		if (Payload->IsOutermostParallel) {
Payload->IsInnermostParallel = true;		Payload->IsInnermostParallel = true;
else		} else {
		if (PollyVectorizerChoice == VECTORIZER_NONE)
Payload->IsInnermostParallel =		Payload->IsInnermostParallel =
astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);		astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
}		}
		}
if (Payload->IsOutermostParallel)		if (Payload->IsOutermostParallel)
BuildInfo->InParallelFor = false;		BuildInfo->InParallelFor = false;

isl_id_free(Id);		isl_id_free(Id);
return Node;		return Node;
}		}

		static isl_stat astBuildBeforeMark(__isl_keep isl_id *MarkId,
		__isl_keep isl_ast_build *Build,
		void *User) {
		if (!MarkId)
		return isl_stat_error;

		AstBuildUserInfo BuildInfo = (AstBuildUserInfo )User;
		if (!strcmp(isl_id_get_name(MarkId), "SIMD"))
		BuildInfo->InParallelFor = true;

		return isl_stat_ok;
		}

		static __isl_give isl_ast_node *
		astBuildAfterMark(__isl_take isl_ast_node *Node,
		__isl_keep isl_ast_build Build, void User) {
		assert(isl_ast_node_get_type(Node) == isl_ast_node_mark);
		AstBuildUserInfo BuildInfo = (AstBuildUserInfo )User;
		auto *Id = isl_ast_node_mark_get_id(Node);
		if (!strcmp(isl_id_get_name(Id), "SIMD"))
		BuildInfo->InParallelFor = false;
		isl_id_free(Id);
		return Node;
		}

static __isl_give isl_ast_node AtEachDomain(__isl_take isl_ast_node Node,		static __isl_give isl_ast_node AtEachDomain(__isl_take isl_ast_node Node,
__isl_keep isl_ast_build *Build,		__isl_keep isl_ast_build *Build,
void *User) {		void *User) {
assert(!isl_ast_node_get_annotation(Node) && "Node already annotated");		assert(!isl_ast_node_get_annotation(Node) && "Node already annotated");

IslAstUserPayload *Payload = new IslAstUserPayload();		IslAstUserPayload *Payload = new IslAstUserPayload();
isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", Payload);		isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", Payload);
Id = isl_id_set_free_user(Id, freeIslAstUserPayload);		Id = isl_id_set_free_user(Id, freeIslAstUserPayload);
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines	void IslAst::init(const Dependences &D) {
if (PerformParallelTest) {		if (PerformParallelTest) {
BuildInfo.Deps = &D;		BuildInfo.Deps = &D;
BuildInfo.InParallelFor = 0;		BuildInfo.InParallelFor = 0;

Build = isl_ast_build_set_before_each_for(Build, &astBuildBeforeFor,		Build = isl_ast_build_set_before_each_for(Build, &astBuildBeforeFor,
&BuildInfo);		&BuildInfo);
Build =		Build =
isl_ast_build_set_after_each_for(Build, &astBuildAfterFor, &BuildInfo);		isl_ast_build_set_after_each_for(Build, &astBuildAfterFor, &BuildInfo);

		Build = isl_ast_build_set_before_each_mark(Build, &astBuildBeforeMark,
		&BuildInfo);

		Build = isl_ast_build_set_after_each_mark(Build, &astBuildAfterMark,
		&BuildInfo);
}		}

buildRunCondition(Build);		buildRunCondition(Build);

Root = isl_ast_build_node_from_schedule(Build, S->getScheduleTree());		Root = isl_ast_build_node_from_schedule(Build, S->getScheduleTree());

isl_ast_build_free(Build);		isl_ast_build_free(Build);
}		}
▲ Show 20 Lines • Show All 192 Lines • Show Last 20 Lines

polly/trunk/lib/CodeGen/IslNodeBuilder.cpp

Show First 20 Lines • Show All 346 Lines • ▼ Show 20 Lines	void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);		VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);
isl_id_to_ast_expr_free(NewAccesses);		isl_id_to_ast_expr_free(NewAccesses);
isl_map_free(S);		isl_map_free(S);
isl_id_free(Id);		isl_id_free(Id);
isl_ast_node_free(User);		isl_ast_node_free(User);
}		}

void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {		void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
		auto *Id = isl_ast_node_mark_get_id(Node);
auto Child = isl_ast_node_mark_get_node(Node);		auto Child = isl_ast_node_mark_get_node(Node);
create(Child);
isl_ast_node_free(Node);		isl_ast_node_free(Node);
		// If a child node of a 'SIMD mark' is a loop that has a single iteration,
		// it will be optimized away and we should skip it.
		if (!strcmp(isl_id_get_name(Id), "SIMD") &&
		isl_ast_node_get_type(Child) == isl_ast_node_for) {
		bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
		int VectorWidth = getNumberOfIterations(Child);
		if (Vector && 1 < VectorWidth && VectorWidth <= 16)
		createForVector(Child, VectorWidth);
		else
		createForSequential(Child, true);
		isl_id_free(Id);
		return;
		}
		create(Child);
		isl_id_free(Id);
}		}

void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,		void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
int VectorWidth) {		int VectorWidth) {
isl_ast_node *Body = isl_ast_node_for_get_body(For);		isl_ast_node *Body = isl_ast_node_for_get_body(For);
isl_ast_expr *Init = isl_ast_node_for_get_init(For);		isl_ast_expr *Init = isl_ast_node_for_get_init(For);
isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);		isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);		isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
IDToValue.erase(IDToValue.find(IteratorID));		IDToValue.erase(IDToValue.find(IteratorID));
isl_id_free(IteratorID);		isl_id_free(IteratorID);
isl_union_map_free(Schedule);		isl_union_map_free(Schedule);

isl_ast_node_free(For);		isl_ast_node_free(For);
isl_ast_expr_free(Iterator);		isl_ast_expr_free(Iterator);
}		}

void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {		void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For,
		bool KnownParallel) {
isl_ast_node *Body;		isl_ast_node *Body;
isl_ast_expr Init, Inc, Iterator, UB;		isl_ast_expr Init, Inc, Iterator, UB;
isl_id *IteratorID;		isl_id *IteratorID;
Value ValueLB, ValueUB, *ValueInc;		Value ValueLB, ValueUB, *ValueInc;
Type *MaxType;		Type *MaxType;
BasicBlock *ExitBlock;		BasicBlock *ExitBlock;
Value *IV;		Value *IV;
CmpInst::Predicate Predicate;		CmpInst::Predicate Predicate;
bool Parallel;		bool Parallel;

Parallel =		Parallel = KnownParallel \|\| (IslAstInfo::isParallel(For) &&
IslAstInfo::isParallel(For) && !IslAstInfo::isReductionParallel(For);		!IslAstInfo::isReductionParallel(For));

Body = isl_ast_node_for_get_body(For);		Body = isl_ast_node_for_get_body(For);

// isl_ast_node_for_is_degenerate(For)		// isl_ast_node_for_is_degenerate(For)
//		//
// TODO: For degenerated loops we could generate a plain assignment.		// TODO: For degenerated loops we could generate a plain assignment.
// However, for now we just reuse the logic for normal loops, which will		// However, for now we just reuse the logic for normal loops, which will
// create a loop with a single iteration.		// create a loop with a single iteration.
▲ Show 20 Lines • Show All 201 Lines • ▼ Show 20 Lines	if (1 < VectorWidth && VectorWidth <= 16) {
return;		return;
}		}
}		}

if (IslAstInfo::isExecutedInParallel(For)) {		if (IslAstInfo::isExecutedInParallel(For)) {
createForParallel(For);		createForParallel(For);
return;		return;
}		}
createForSequential(For);		createForSequential(For, false);
}		}

void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {		void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);		isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);

Function *F = Builder.GetInsertBlock()->getParent();		Function *F = Builder.GetInsertBlock()->getParent();
LLVMContext &Context = F->getContext();		LLVMContext &Context = F->getContext();

▲ Show 20 Lines • Show All 495 Lines • Show Last 20 Lines

polly/trunk/lib/Transform/ScheduleOptimizer.cpp

Show First 20 Lines • Show All 283 Lines • ▼ Show 20 Lines	ScheduleTreeOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
Node = isolateFullPartialTiles(Node, VectorWidth);		Node = isolateFullPartialTiles(Node, VectorWidth);
Node = isl_schedule_node_child(Node, 0);		Node = isl_schedule_node_child(Node, 0);
// Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,		// Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
// we will have troubles to match it in the backend.		// we will have troubles to match it in the backend.
Node = isl_schedule_node_band_set_ast_build_options(		Node = isl_schedule_node_band_set_ast_build_options(
Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));		Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));
Node = isl_schedule_node_band_sink(Node);		Node = isl_schedule_node_band_sink(Node);
Node = isl_schedule_node_child(Node, 0);		Node = isl_schedule_node_child(Node, 0);
		if (isl_schedule_node_get_type(Node) == isl_schedule_node_leaf)
		Node = isl_schedule_node_parent(Node);
		isl_id *LoopMarker = isl_id_alloc(Ctx, "SIMD", nullptr);
		Node = isl_schedule_node_insert_mark(Node, LoopMarker);
return Node;		return Node;
}		}

__isl_give isl_schedule_node *		__isl_give isl_schedule_node *
ScheduleTreeOptimizer::tileNode(__isl_take isl_schedule_node *Node,		ScheduleTreeOptimizer::tileNode(__isl_take isl_schedule_node *Node,
const char *Identifier, ArrayRef<int> TileSizes,		const char *Identifier, ArrayRef<int> TileSizes,
int DefaultTileSize) {		int DefaultTileSize) {
auto Ctx = isl_schedule_node_get_ctx(Node);		auto Ctx = isl_schedule_node_get_ctx(Node);
▲ Show 20 Lines • Show All 324 Lines • Show Last 20 Lines

polly/trunk/test/Isl/CodeGen/simple_vec_strides_multidim.ll

	; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly -S -dce < %s \| FileCheck %s			; RUN: opt %loadPolly -polly-opt-isl -polly-codegen -polly-vectorizer=polly -polly-prevect-width=8 -S -dce < %s \| FileCheck %s
	;			;
	; void foo(long n, float A[restrict][n], float B[restrict][n],			; void foo(long n, float A[restrict][n], float B[restrict][n],
	; float C[restrict][n], float D[restrict][n]) {			; float C[restrict][n], float D[restrict][n]) {
	; for (long i = 0; i < 8; i++)			; for (long i = 0; i < 8; i++)
	; for (long j = 0; j < 8; j++)			; for (long j = 0; j < 8; j++)
	; A[i][j] += B[i][0] + C[i][2 * j] + D[j][0];			; A[i][j] += B[i][0] + C[i][2 * j] + D[j][0];
	; }			; }
	;			;
	▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

polly/trunk/test/ScheduleOptimizer/full_partial_tile_separation.ll

	; RUN: opt -S %loadPolly -polly-vectorizer=stripmine -polly-opt-isl -polly-ast -analyze < %s \| FileCheck %s			; RUN: opt -S %loadPolly -polly-vectorizer=stripmine -polly-opt-isl -polly-ast -analyze < %s \| FileCheck %s
	; CHECK: // 1st level tiling - Tiles			; CHECK: // 1st level tiling - Tiles
	; CHECK-NEXT: #pragma known-parallel			; CHECK-NEXT: #pragma known-parallel
	; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1)			; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1)
	; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1)			; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1)
	; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) {			; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) {
	; CHECK-NEXT: // 1st level tiling - Points			; CHECK-NEXT: // 1st level tiling - Points
	; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) {			; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) {
	; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1)			; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1)
	; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1)			; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
	; CHECK-NEXT: #pragma simd			; CHECK-NEXT: // SIMD
	; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1)			; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1)
	; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);			; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
				; CHECK-NEXT: }
	; CHECK-NEXT: if (32 * c1 + 31 >= nj)			; CHECK-NEXT: if (32 * c1 + 31 >= nj)
	; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1)			; CHECK-NEXT: #pragma minimal dependence distance: 1
	; CHECK-NEXT: #pragma simd			; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
				; CHECK-NEXT: // SIMD
	; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1)			; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1)
	; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5);			; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5);
	; CHECK-NEXT: }			; CHECK-NEXT: }
	; CHECK-NEXT: }			; CHECK-NEXT: }
				; CHECK-NEXT: }

	; Function Attrs: nounwind uwtable			; Function Attrs: nounwind uwtable
	define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, [1024 x double]* %C, [1024 x double]* %A, [1024 x double]* %B) #0 {			define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, [1024 x double]* %C, [1024 x double]* %A, [1024 x double]* %B) #0 {
	entry:			entry:
	%cmp.27 = icmp sgt i32 %ni, 0			%cmp.27 = icmp sgt i32 %ni, 0
	br i1 %cmp.27, label %for.cond.1.preheader.lr.ph, label %for.end.22			br i1 %cmp.27, label %for.cond.1.preheader.lr.ph, label %for.end.22

	for.cond.1.preheader.lr.ph: ; preds = %entry			for.cond.1.preheader.lr.ph: ; preds = %entry
	▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

polly/trunk/test/ScheduleOptimizer/prevectorization-without-tiling.ll

Show First 20 Lines • Show All 50 Lines • ▼ Show 20 Lines	for.end30: ; preds = %for.inc28
ret void		ret void
}		}

attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }		attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

; CHECK: #pragma known-parallel		; CHECK: #pragma known-parallel
; CHECK: for (int c0 = 0; c0 <= 1535; c0 += 1)		; CHECK: for (int c0 = 0; c0 <= 1535; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 383; c1 += 1)		; CHECK: for (int c1 = 0; c1 <= 383; c1 += 1)
; CHECK: #pragma simd		; CHECK: // SIMD
; CHECK: for (int c2 = 0; c2 <= 3; c2 += 1)		; CHECK: for (int c2 = 0; c2 <= 3; c2 += 1)
; CHECK: Stmt_for_body3(c0, 4 * c1 + c2);		; CHECK: Stmt_for_body3(c0, 4 * c1 + c2);
; CHECK: #pragma known-parallel		; CHECK: #pragma known-parallel
; CHECK: for (int c0 = 0; c0 <= 1535; c0 += 1)		; CHECK: for (int c0 = 0; c0 <= 1535; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 383; c1 += 1)		; CHECK: for (int c1 = 0; c1 <= 383; c1 += 1)
; CHECK: for (int c2 = 0; c2 <= 1535; c2 += 1)		; CHECK: for (int c2 = 0; c2 <= 1535; c2 += 1)
; CHECK: #pragma simd		; CHECK: // SIMD
; CHECK: for (int c3 = 0; c3 <= 3; c3 += 1)		; CHECK: for (int c3 = 0; c3 <= 3; c3 += 1)
; CHECK: Stmt_for_body8(c0, 4 * c1 + c3, c2);		; CHECK: Stmt_for_body8(c0, 4 * c1 + c3, c2);

!llvm.ident = !{!0}		!llvm.ident = !{!0}

!0 = !{!"clang version 3.5.0 "}		!0 = !{!"clang version 3.5.0 "}

polly/trunk/test/ScheduleOptimizer/prevectorization.ll

	Show First 20 Lines • Show All 59 Lines • ▼ Show 20 Lines

	attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }			attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

	; CHECK: #pragma known-parallel			; CHECK: #pragma known-parallel
	; CHECK: for (int c0 = 0; c0 <= 47; c0 += 1)			; CHECK: for (int c0 = 0; c0 <= 47; c0 += 1)
	; CHECK: for (int c1 = 0; c1 <= 47; c1 += 1)			; CHECK: for (int c1 = 0; c1 <= 47; c1 += 1)
	; CHECK: for (int c2 = 0; c2 <= 31; c2 += 1)			; CHECK: for (int c2 = 0; c2 <= 31; c2 += 1)
	; CHECK: for (int c3 = 0; c3 <= 7; c3 += 1)			; CHECK: for (int c3 = 0; c3 <= 7; c3 += 1)
	; CHECK: #pragma simd			; CHECK: // SIMD
	; CHECK: for (int c4 = 0; c4 <= 3; c4 += 1)			; CHECK: for (int c4 = 0; c4 <= 3; c4 += 1)
	; CHECK: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 4 * c3 + c4);			; CHECK: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 4 * c3 + c4);
	; CHECK: #pragma known-parallel			; CHECK: #pragma known-parallel
	; CHECK: for (int c0 = 0; c0 <= 47; c0 += 1)			; CHECK: for (int c0 = 0; c0 <= 47; c0 += 1)
	; CHECK: for (int c1 = 0; c1 <= 47; c1 += 1)			; CHECK: for (int c1 = 0; c1 <= 47; c1 += 1)
	; CHECK: for (int c2 = 0; c2 <= 47; c2 += 1)			; CHECK: for (int c2 = 0; c2 <= 47; c2 += 1)
	; CHECK: for (int c3 = 0; c3 <= 31; c3 += 1)			; CHECK: for (int c3 = 0; c3 <= 31; c3 += 1)
	; CHECK: for (int c4 = 0; c4 <= 7; c4 += 1)			; CHECK: for (int c4 = 0; c4 <= 7; c4 += 1)
	; CHECK: for (int c5 = 0; c5 <= 31; c5 += 1)			; CHECK: for (int c5 = 0; c5 <= 31; c5 += 1)
	; CHECK: #pragma simd			; CHECK: // SIMD
	; CHECK: for (int c6 = 0; c6 <= 3; c6 += 1)			; CHECK: for (int c6 = 0; c6 <= 3; c6 += 1)
	; CHECK: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);			; CHECK: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);

	; VEC16: {			; VEC16: {
	; VEC16: #pragma known-parallel			; VEC16: #pragma known-parallel
	; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1)			; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1)
	; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)			; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)
	; VEC16: for (int c2 = 0; c2 <= 31; c2 += 1)			; VEC16: for (int c2 = 0; c2 <= 31; c2 += 1)
	; VEC16: for (int c3 = 0; c3 <= 1; c3 += 1)			; VEC16: for (int c3 = 0; c3 <= 1; c3 += 1)
	; VEC16: #pragma simd			; VEC16: // SIMD
	; VEC16: for (int c4 = 0; c4 <= 15; c4 += 1)			; VEC16: for (int c4 = 0; c4 <= 15; c4 += 1)
	; VEC16: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 16 * c3 + c4);			; VEC16: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 16 * c3 + c4);
	; VEC16: #pragma known-parallel			; VEC16: #pragma known-parallel
	; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1)			; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1)
	; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)			; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)
	; VEC16: for (int c2 = 0; c2 <= 47; c2 += 1)			; VEC16: for (int c2 = 0; c2 <= 47; c2 += 1)
	; VEC16: for (int c3 = 0; c3 <= 31; c3 += 1)			; VEC16: for (int c3 = 0; c3 <= 31; c3 += 1)
	; VEC16: for (int c4 = 0; c4 <= 1; c4 += 1)			; VEC16: for (int c4 = 0; c4 <= 1; c4 += 1)
	; VEC16: for (int c5 = 0; c5 <= 31; c5 += 1)			; VEC16: for (int c5 = 0; c5 <= 31; c5 += 1)
	; VEC16: #pragma simd			; VEC16: // SIMD
	; VEC16: for (int c6 = 0; c6 <= 15; c6 += 1)			; VEC16: for (int c6 = 0; c6 <= 15; c6 += 1)
	; VEC16: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 16 * c4 + c6, 32 * c2 + c5);			; VEC16: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 16 * c4 + c6, 32 * c2 + c5);
	; VEC16: }			; VEC16: }


	!llvm.ident = !{!0}			!llvm.ident = !{!0}

	!0 = !{!"clang version 3.5.0 "}			!0 = !{!"clang version 3.5.0 "}

polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll

	Show First 20 Lines • Show All 68 Lines • ▼ Show 20 Lines

	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1)			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1)
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c1 = 0; c1 <= 31; c1 += 1)			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c1 = 0; c1 <= 31; c1 += 1)
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c2 = 0; c2 <= 15; c2 += 1)			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c2 = 0; c2 <= 15; c2 += 1)
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c3 = 0; c3 <= 1; c3 += 1)			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c3 = 0; c3 <= 1; c3 += 1)
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c4 = 0; c4 <= 7; c4 += 1)			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c4 = 0; c4 <= 7; c4 += 1)
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c5 = 0; c5 <= 1; c5 += 1) {			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c5 = 0; c5 <= 1; c5 += 1) {
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: // SIMD
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8);			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8);
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: // SIMD
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8);			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8);
	; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: }			; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: }

	target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"			target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"

	; Function Attrs: nounwind			; Function Attrs: nounwind
	define void @rect([512 x i32]* %A) {			define void @rect([512 x i32]* %A) {
	Show All 28 Lines

polly/trunk/test/ScopInfo/stride_detection.ll

	; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-vectorizer=polly -polly-codegen < %s -S \| FileCheck %s			; RUN: opt %loadPolly -polly-opt-isl -polly-vectorizer=polly -polly-codegen < %s -S \| FileCheck %s

	; #pragma known-parallel			; #pragma known-parallel
	; for (int c0 = 0; c0 <= 31; c0 += 1)			; for (int c0 = 0; c0 <= 31; c0 += 1)
	; for (int c1 = 0; c1 <= floord(nk - 1, 32); c1 += 1)			; for (int c1 = 0; c1 <= floord(nk - 1, 32); c1 += 1)
	; for (int c2 = 0; c2 <= 7; c2 += 1)			; for (int c2 = 0; c2 <= 7; c2 += 1)
	; for (int c3 = 0; c3 <= min(31, nk - 32 * c1 - 1); c3 += 1)			; for (int c3 = 0; c3 <= min(31, nk - 32 * c1 - 1); c3 += 1)
	; #pragma simd			; #pragma simd
	; for (int c4 = 0; c4 <= 3; c4 += 1)			; for (int c4 = 0; c4 <= 3; c4 += 1)
	▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[Polly] [PATCH] Annotation of SIMD loops
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 48792

polly/trunk/include/polly/CodeGen/IslNodeBuilder.h

polly/trunk/lib/CodeGen/IslAst.cpp

polly/trunk/lib/CodeGen/IslNodeBuilder.cpp

polly/trunk/lib/Transform/ScheduleOptimizer.cpp

polly/trunk/test/Isl/CodeGen/simple_vec_strides_multidim.ll

polly/trunk/test/ScheduleOptimizer/full_partial_tile_separation.ll

polly/trunk/test/ScheduleOptimizer/prevectorization-without-tiling.ll

polly/trunk/test/ScheduleOptimizer/prevectorization.ll

polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll

polly/trunk/test/ScopInfo/stride_detection.ll

This is an archive of the discontinued LLVM Phabricator instance.

[Polly] [PATCH] Annotation of SIMD loopsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 48792

polly/trunk/include/polly/CodeGen/IslNodeBuilder.h

polly/trunk/lib/CodeGen/IslAst.cpp

polly/trunk/lib/CodeGen/IslNodeBuilder.cpp

polly/trunk/lib/Transform/ScheduleOptimizer.cpp

polly/trunk/test/Isl/CodeGen/simple_vec_strides_multidim.ll

polly/trunk/test/ScheduleOptimizer/full_partial_tile_separation.ll

polly/trunk/test/ScheduleOptimizer/prevectorization-without-tiling.ll

polly/trunk/test/ScheduleOptimizer/prevectorization.ll

polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll

polly/trunk/test/ScopInfo/stride_detection.ll

[Polly] [PATCH] Annotation of SIMD loops
ClosedPublic