Diff 420973

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Show First 20 Lines • Show All 260 Lines • ▼ Show 20 Lines for (unsigned Row = 0; Row < NumRows; ++Row) {

if (InnerDep == '*' || OuterDep == '*') if (InnerDep == '*' || OuterDep == '*')

return false; return false;

if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep)) if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))

return false; return false;

} }

return true; return true;

} }

static LoopVector populateWorklist(Loop &L) { static void populateWorklist(Loop &L, LoopVector &LoopList) {

LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: " LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "

<< L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getParent()->getName() << " Loop: %"

<< L.getHeader()->getName() << '\n'); << L.getHeader()->getName() << '\n');

LoopVector LoopList; assert(LoopList.empty() && "LoopList should initially be empty!");

Loop *CurrentLoop = &L; Loop *CurrentLoop = &L;

const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();

while (!Vec->empty()) { while (!Vec->empty()) {

// The current loop has multiple subloops in it hence it is not tightly // The current loop has multiple subloops in it hence it is not tightly

// nested. // nested.

// Discard all loops above it added into Worklist. // Discard all loops above it added into Worklist.

if (Vec->size() != 1) if (Vec->size() != 1) {

return {}; LoopList = {};

MeinersburUnsubmitted

Not Done

If you don't assume the vector is empty when starting (assert(LoopList.empty())?), consider clearing the list at the start of the function.

Change seems NFC anyway.

Meinersbur: If you don't assume the vector is empty when starting (`assert(LoopList.empty())`?), consider…

congzheAuthorUnsubmitted

Done

Thanks for the comment, I added the assert correspondingly.

congzhe: Thanks for the comment, I added the assert correspondingly.

return;

}

LoopList.push_back(CurrentLoop); LoopList.push_back(CurrentLoop);

CurrentLoop = Vec->front(); CurrentLoop = Vec->front();

Vec = &CurrentLoop->getSubLoops(); Vec = &CurrentLoop->getSubLoops();

} }

LoopList.push_back(CurrentLoop); LoopList.push_back(CurrentLoop);

return LoopList; return;

} }

namespace { namespace {

/// LoopInterchangeLegality checks if it is legal to interchange the loop. /// LoopInterchangeLegality checks if it is legal to interchange the loop.

class LoopInterchangeLegality { class LoopInterchangeLegality {

public: public:

LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,

▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines struct LoopInterchange {

ScalarEvolution *SE = nullptr; ScalarEvolution *SE = nullptr;

LoopInfo *LI = nullptr; LoopInfo *LI = nullptr;

DependenceInfo *DI = nullptr; DependenceInfo *DI = nullptr;

DominatorTree *DT = nullptr; DominatorTree *DT = nullptr;

/// Interface to emit optimization remarks. /// Interface to emit optimization remarks.

OptimizationRemarkEmitter *ORE; OptimizationRemarkEmitter *ORE;

LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,

MeinersburUnsubmitted

Not Done

Why make it an object member?

Meinersbur: Why make it an object member?

congzheAuthorUnsubmitted

Done

You are correct that it may not be necessary to make it an object member, I moved LoopList to inside the run() function.

congzhe: You are correct that it may not be necessary to make it an object member, I moved `LoopList` to…

DominatorTree *DT, OptimizationRemarkEmitter *ORE) DominatorTree *DT, OptimizationRemarkEmitter *ORE)

: SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {} : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}

bool run(Loop *L) { bool run(Loop *L) {

if (L->getParentLoop()) if (L->getParentLoop())

return false; return false;

SmallVector<Loop *, 8> LoopList;

return processLoopList(populateWorklist(*L)); populateWorklist(*L, LoopList);

return processLoopList(LoopList);

} }

bool run(LoopNest &LN) { bool run(LoopNest &LN) {

const auto &LoopList = LN.getLoops(); SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());

for (unsigned I = 1; I < LoopList.size(); ++I) for (unsigned I = 1; I < LoopList.size(); ++I)

MeinersburUnsubmitted

Not Done

bool run(LoopNest &LN) {

- auto LoopList =

- SmallVector<Loop *, 8>(LN.getLoops().begin(), LN.getLoops().end());

+ SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());

for (unsigned I = 1; I < LoopList.size(); ++I)

One less call of a copy ctor.

Meinersbur: One less call of a copy ctor.

if (LoopList[I]->getParentLoop() != LoopList[I - 1]) if (LoopList[I]->getParentLoop() != LoopList[I - 1])

return false; return false;

return processLoopList(LoopList); return processLoopList(LoopList);

} }

bool isComputableLoopNest(ArrayRef<Loop *> LoopList) { bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {

for (Loop *L : LoopList) { for (Loop *L : LoopList) {

const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);

Show All 14 Lines struct LoopInterchange {

} }

unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) { unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {

// TODO: Add a better heuristic to select the loop to be interchanged based // TODO: Add a better heuristic to select the loop to be interchanged based

// on the dependence matrix. Currently we select the innermost loop. // on the dependence matrix. Currently we select the innermost loop.

return LoopList.size() - 1; return LoopList.size() - 1;

} }

bool processLoopList(ArrayRef<Loop *> LoopList) { bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {

bool Changed = false; bool Changed = false;

unsigned LoopNestDepth = LoopList.size(); unsigned LoopNestDepth = LoopList.size();

if (LoopNestDepth < 2) { if (LoopNestDepth < 2) {

LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");

return false; return false;

} }

if (LoopNestDepth > MaxLoopNestDepth) { if (LoopNestDepth > MaxLoopNestDepth) {

LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than " LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "

Show All 23 Lines #endif

// Get the Outermost loop exit. // Get the Outermost loop exit.

BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock(); BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();

if (!LoopNestExit) { if (!LoopNestExit) {

LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block"); LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");

return false; return false;

} }

unsigned SelecLoopId = selectLoopForInterchange(LoopList); unsigned SelecLoopId = selectLoopForInterchange(LoopList);

// Move the selected loop outwards to the best possible position. // We try to achieve the globally optimal memory access for the loopnest,

Loop *LoopToBeInterchanged = LoopList[SelecLoopId]; // and do interchange based on a bubble-sort fasion. We start from

for (unsigned i = SelecLoopId; i > 0; i--) { // the innermost loop, move it outwards to the best possible position

bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i, // and repeat this process.

i - 1, DependencyMatrix); for (unsigned j = SelecLoopId; j > 0; j--) {

bool ChangedPerIter = false;

MeinersburUnsubmitted

Not Done

There should also be an early abort if there was no interchange during an entire round of i (like https://en.wikipedia.org/wiki/Bubble_sort#Pseudocode_implementation : until not swapped)

Meinersbur: There should also be an early abort if there was no interchange during an entire round of `i`…

congzheAuthorUnsubmitted

Done

Thanks, I added an early abort accordingly.

congzhe: Thanks, I added an early abort accordingly.

for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {

bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,

DependencyMatrix);

if (!Interchanged) if (!Interchanged)

return Changed; continue;

// Loops interchanged, update LoopList accordingly.

std::swap(LoopList[i - 1], LoopList[i]);

// Update the DependencyMatrix // Update the DependencyMatrix

interChangeDependencies(DependencyMatrix, i, i - 1); interChangeDependencies(DependencyMatrix, i, i - 1);

MeinersburUnsubmitted

Not Done

Could you add the motivation to use bubble sort to the comment here?

Any particular reason to start with moving the innermost loops inwards (decreasing j) instead the other way around?

Meinersbur: Could you add the motivation to use bubble sort to the comment here? Any particular reason to…

congzheAuthorUnsubmitted

Done

Thanks, comments updated.

There is no particular reason that we start with moving the innermost loops outwards. Previously loop interchange picks the innermost loop and try our best to move it outwards, so in this patch I started with the innermost loop as well, to make it more "consistent" with the previous repo.

congzhe: Thanks, comments updated. There is no particular reason that we start with moving the…

#ifdef DUMP_DEP_MATRICIES #ifdef DUMP_DEP_MATRICIES

LLVM_DEBUG(dbgs() << "Dependence after interchange\n"); LLVM_DEBUG(dbgs() << "Dependence after interchange\n");

printDepMatrix(DependencyMatrix); printDepMatrix(DependencyMatrix);

#endif #endif

ChangedPerIter |= Interchanged;

Changed |= Interchanged; Changed |= Interchanged;

} }

// Early abort if there was no interchange during an entire round of

// moving loops outwards.

if (!ChangedPerIter)

break;

}

return Changed; return Changed;

} }

bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,

unsigned OuterLoopId, unsigned OuterLoopId,

std::vector<std::vector<char>> &DependencyMatrix) { std::vector<std::vector<char>> &DependencyMatrix) {

LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId

<< " and OuterLoopId = " << OuterLoopId << "\n"); << " and OuterLoopId = " << OuterLoopId << "\n");

▲ Show 20 Lines • Show All 892 Lines • ▼ Show 20 Lines static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,

// header). We can just substitute the user with the incoming value and remove // header). We can just substitute the user with the incoming value and remove

// the PHI. // the PHI.

for (PHINode &P : make_early_inc_range(InnerExit->phis())) { for (PHINode &P : make_early_inc_range(InnerExit->phis())) {

assert(P.getNumIncomingValues() == 1 && assert(P.getNumIncomingValues() == 1 &&

"Only loops with a single exit are supported!"); "Only loops with a single exit are supported!");

// Incoming values are guaranteed be instructions currently. // Incoming values are guaranteed be instructions currently.

auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch)); auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));

// In case of multi-level nested loops, follow LCSSA to find the incoming

// value defined from the innermost loop.

auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));

// Skip phis with incoming values from the inner loop body, excluding the // Skip phis with incoming values from the inner loop body, excluding the

// header and latch. // header and latch.

if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader) if (IncIInnerMost->getParent() != InnerLatch &&

IncIInnerMost->getParent() != InnerHeader)

continue; continue;

assert(all_of(P.users(), assert(all_of(P.users(),

[OuterHeader, OuterExit, IncI, InnerHeader](User *U) { [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {

return (cast<PHINode>(U)->getParent() == OuterHeader && return (cast<PHINode>(U)->getParent() == OuterHeader &&

IncI->getParent() == InnerHeader) || IncI->getParent() == InnerHeader) ||

cast<PHINode>(U)->getParent() == OuterExit; cast<PHINode>(U)->getParent() == OuterExit;

}) && }) &&

▲ Show 20 Lines • Show All 293 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopInterchange/phi-ordering.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=-1000 -S 2>&1 \| FileCheck %s			; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=0 -S 2>&1 \| FileCheck %s
	;; Checks the order of the inner phi nodes does not cause havoc.			;; Checks the order of the inner phi nodes does not cause havoc.
	;; The inner loop has a reduction into c. The IV is not the first phi.			;; The inner loop has a reduction into c. The IV is not the first phi.

	target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"			target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
	target triple = "armv8--linux-gnueabihf"			target triple = "armv8--linux-gnueabihf"



	; Function Attrs: norecurse nounwind			; Function Attrs: norecurse nounwind
	define void @test(i32 %T, [90 x i32]* noalias nocapture %C, i16* noalias nocapture readonly %A, i16* noalias nocapture readonly %B) local_unnamed_addr #0 {			define void @test(i32 %T, [90 x i32]* noalias nocapture %C, [90 x [90 x i16]]* noalias nocapture readonly %A, i16* noalias nocapture readonly %B) local_unnamed_addr #0 {
				MeinersburUnsubmitted Not Done Reply Inline Actions Why changing this test? Meinersbur: Why changing this test?
				congzheAuthorUnsubmitted Done Reply Inline Actions The original test ignores loop interchange cost model (with -loop-interchange-threshold=-1000) and does interchange two times (which does not generate good memory access pattern). If I did not change the test, with the bubble sort fashion it would interchange every neighboring loops due to ignored cost model, and will eventually interchange three times generating a completely different output as compared to the original test case. If I only set `-loop-interchange-threshold=0`, it would interchange only once which does give good memory access pattern, but the output would still be quite different from the current test. Therefore I also changed array @A from 1-dimension to 2-dimension, thus it does interchange twice, and generated an output that is as close to the original one as possible. congzhe: The original test ignores loop interchange cost model (with -loop-interchange-threshold=-1000)…
	; CHECK-LABEL: @test(			; CHECK-LABEL: @test(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: br label [[FOR3_PREHEADER:%.*]]			; CHECK-NEXT: br label [[FOR3_PREHEADER:%.*]]
	; CHECK: for1.header.preheader:			; CHECK: for1.header.preheader:
	; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]			; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
	; CHECK: for1.header:			; CHECK: for1.header:
	; CHECK-NEXT: [[I:%.]] = phi i32 [ [[INC20:%.]], [[FOR1_INC19:%.]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.]] ]			; CHECK-NEXT: [[I:%.]] = phi i32 [ [[INC20:%.]], [[FOR1_INC19:%.]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.]] ]
	; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I]], 90			; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I]], 90
	; CHECK-NEXT: br label [[FOR2_HEADER_PREHEADER:%.*]]			; CHECK-NEXT: br label [[FOR2_HEADER_PREHEADER:%.*]]
	; CHECK: for2.header.preheader:			; CHECK: for2.header.preheader:
	; CHECK-NEXT: br label [[FOR2_HEADER:%.*]]			; CHECK-NEXT: br label [[FOR2_HEADER:%.*]]
	; CHECK: for2.header:			; CHECK: for2.header:
	; CHECK-NEXT: [[J:%.]] = phi i32 [ [[INC17:%.]], [[FOR2_INC16:%.*]] ], [ 0, [[FOR2_HEADER_PREHEADER]] ]			; CHECK-NEXT: [[J:%.]] = phi i32 [ [[INC17:%.]], [[FOR2_INC16:%.*]] ], [ 0, [[FOR2_HEADER_PREHEADER]] ]
	; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]]			; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]]
	; CHECK: for3.preheader:			; CHECK: for3.preheader:
	; CHECK-NEXT: br label [[FOR3:%.*]]			; CHECK-NEXT: br label [[FOR3:%.*]]
	; CHECK: for3:			; CHECK: for3:
	; CHECK-NEXT: [[K:%.]] = phi i32 [ [[TMP1:%.]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ]			; CHECK-NEXT: [[K:%.]] = phi i32 [ [[TMP1:%.]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ]
	; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]			; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
	; CHECK: for3.split1:			; CHECK: for3.split1:
	; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]			; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]
	; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i16, i16 [[A:%.*]], i32 [[ADD]]			; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]] [[A:%.*]], i32 [[ADD]], i32 [[J]], i32 [[I]]
	; CHECK-NEXT: [[TMP0:%.]] = load i16, i16 [[ARRAYIDX]], align 2			; CHECK-NEXT: [[TMP0:%.]] = load i16, i16 [[ARRAYIDX]], align 2
	; CHECK-NEXT: [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1			; CHECK-NEXT: [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1
	; CHECK-NEXT: store i16 [[ADD15]], i16* [[ARRAYIDX]]			; CHECK-NEXT: store i16 [[ADD15]], i16* [[ARRAYIDX]]
	; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[K]], 1			; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[K]], 1
	; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90			; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90
	; CHECK-NEXT: br label [[FOR2_INC16]]			; CHECK-NEXT: br label [[FOR2_INC16]]
	; CHECK: for3.split:			; CHECK: for3.split:
	; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[K]], 1			; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[K]], 1
	Show All 22 Lines

	for2.header: ; preds = %for2.inc16, %for1.header			for2.header: ; preds = %for2.inc16, %for1.header
	%j = phi i32 [ 0, %for1.header ], [ %inc17, %for2.inc16 ]			%j = phi i32 [ 0, %for1.header ], [ %inc17, %for2.inc16 ]
	br label %for3			br label %for3

	for3: ; preds = %for3, %for2.header			for3: ; preds = %for3, %for2.header
	%k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ]			%k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ]
	%add = add nsw i32 %k, %mul			%add = add nsw i32 %k, %mul
	%arrayidx = getelementptr inbounds i16, i16* %A, i32 %add			%arrayidx = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* %A, i32 %add, i32 %j, i32 %i
	%0 = load i16, i16* %arrayidx, align 2			%0 = load i16, i16* %arrayidx, align 2
	%add15 = add nsw i16 %0, 1			%add15 = add nsw i16 %0, 1
	store i16 %add15, i16* %arrayidx			store i16 %add15, i16* %arrayidx
	%inc = add nuw nsw i32 %k, 1			%inc = add nuw nsw i32 %k, 1
	%exitcond = icmp eq i32 %inc, 90			%exitcond = icmp eq i32 %inc, 90
	br i1 %exitcond, label %for2.inc16, label %for3			br i1 %exitcond, label %for2.inc16, label %for3

	for2.inc16: ; preds = %for.body6			for2.inc16: ; preds = %for.body6
	Show All 15 Lines

llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll

This file was added.

				; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
				; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1
				; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s

				; Triply nested loop, should be able to do interchange three times
				; to get the ideal access pattern.
				; void f(int e[10][10][10], int f[10][10][10]) {
				; for (int a = 0; a < 10; a++) {
				; for (int b = 0; b < 10; b++) {
				; for (int c = 0; c < 10; c++) {
				; f[c][b][a] = e[c][b][a];
				; }
				; }
				; }
				; }

				; REMARKS: --- !Passed
				; REMARKS-NEXT: Pass: loop-interchange
				; REMARKS-NEXT: Name: Interchanged
				; REMARKS-NEXT: Function: pr43326-triply-nested
				; REMARKS: --- !Passed
				; REMARKS-NEXT: Pass: loop-interchange
				; REMARKS-NEXT: Name: Interchanged
				; REMARKS-NEXT: Function: pr43326-triply-nested
				; REMARKS: --- !Passed
				; REMARKS-NEXT: Pass: loop-interchange
				; REMARKS-NEXT: Name: Interchanged
				; REMARKS-NEXT: Function: pr43326-triply-nested

				define void @pr43326-triply-nested([10 x [10 x i32]]* %e, [10 x [10 x i32]]* %f) {
				entry:
				br label %for.outermost.header

				for.outermost.header: ; preds = %entry, %for.outermost.latch
				%indvars.outermost = phi i64 [ 0, %entry ], [ %indvars.outermost.next, %for.outermost.latch ]
				br label %for.middle.header

				for.cond.cleanup: ; preds = %for.outermost.latch
				ret void

				for.middle.header: ; preds = %for.outermost.header, %for.middle.latch
				%indvars.middle = phi i64 [ 0, %for.outermost.header ], [ %indvars.middle.next, %for.middle.latch ]
				br label %for.innermost

				for.outermost.latch: ; preds = %for.middle.latch
				%indvars.outermost.next = add nuw nsw i64 %indvars.outermost, 1
				%exitcond.outermost = icmp ne i64 %indvars.outermost.next, 10
				br i1 %exitcond.outermost, label %for.outermost.header, label %for.cond.cleanup

				for.middle.latch: ; preds = %for.innermost
				%indvars.middle.next = add nuw nsw i64 %indvars.middle, 1
				%exitcond.middle = icmp ne i64 %indvars.middle.next, 10
				br i1 %exitcond.middle, label %for.middle.header, label %for.outermost.latch

				for.innermost: ; preds = %for.middle.header, %for.innermost
				%indvars.innermost = phi i64 [ 0, %for.middle.header ], [ %indvars.innermost.next, %for.innermost ]
				%arrayidx12 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* %e, i64 %indvars.innermost, i64 %indvars.middle, i64 %indvars.outermost
				%0 = load i32, i32* %arrayidx12
				%arrayidx18 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* %f, i64 %indvars.innermost, i64 %indvars.middle, i64 %indvars.outermost
				store i32 %0, i32* %arrayidx18
				%indvars.innermost.next = add nuw nsw i64 %indvars.innermost, 1
				%exitcond.innermost = icmp ne i64 %indvars.innermost.next, 10
				br i1 %exitcond.innermost, label %for.innermost, label %for.middle.latch
				}
				No newline at end of file

This is an archive of the discontinued LLVM Phabricator instance.

[LoopInterchange] Try to achieve the most optimal access pattern after interchange
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 420973

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

llvm/test/Transforms/LoopInterchange/phi-ordering.ll

llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LoopInterchange] Try to achieve the most optimal access pattern after interchangeClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 420973

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

llvm/test/Transforms/LoopInterchange/phi-ordering.ll

llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll

[LoopInterchange] Try to achieve the most optimal access pattern after interchange
ClosedPublic