This is an archive of the discontinued LLVM Phabricator instance.

[LICM] hoist fences out of loops w/o memory operations
ClosedPublic

Authored by reames on Aug 8 2018, 9:30 PM.

Download Raw Diff

Details

Reviewers

mkazantsev
skatkov
anna

Commits

rGca256d93fb35: [LICM] hoist fences out of loops w/o memory operations
rL339378: [LICM] hoist fences out of loops w/o memory operations

Summary

The motivating case is an otherwise dead loop with a fence in it. At the moment, this goes all the way through the optimizer and we end up emitting an entirely pointless loop on x86. This case may seem a bit contrived, but we've seen it in real code as the result of otherwise reasonable lowering strategies combined w/thread local memory optimizations (such as escape analysis).

To handle this simple case, we can teach LICM to hoist must execute fences when there is no other memory operation within the loop.

Diff Detail

Repository: rL LLVM

Event Timeline

reames created this revision.Aug 8 2018, 9:30 PM

Herald added subscribers: llvm-commits, jfb, bollu, mcrosier. · View Herald TranscriptAug 8 2018, 9:30 PM

skatkov accepted this revision.Aug 8 2018, 9:57 PM

skatkov added inline comments.

include/llvm/Analysis/AliasSetTracker.h
228 ↗	(On Diff #159853)	Please document that method may return nullptr if it cannot determine the unique instruction by any reason.
lib/Transforms/Scalar/LICM.cpp
696 ↗	(On Diff #159853)	Don't you want to assert that if Begin->getUniqueInstruction() != then it should be FI?

This revision is now accepted and ready to land.Aug 8 2018, 9:57 PM

Closed by commit rL339378: [LICM] hoist fences out of loops w/o memory operations (authored by reames). · Explain WhyAug 9 2018, 1:19 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

include/

llvm/

Analysis/

AliasSetTracker.h

14 lines

lib/

Transforms/

Scalar/

LICM.cpp

15 lines

test/

Transforms/

LICM/

fence.ll

14 lines

Diff 159992

llvm/trunk/include/llvm/Analysis/AliasSetTracker.h

Show First 20 Lines • Show All 218 Lines • ▼ Show 20 Lines	public:
iterator begin() const { return iterator(PtrList); }		iterator begin() const { return iterator(PtrList); }
iterator end() const { return iterator(); }		iterator end() const { return iterator(); }
bool empty() const { return PtrList == nullptr; }		bool empty() const { return PtrList == nullptr; }

// Unfortunately, ilist::size() is linear, so we have to add code to keep		// Unfortunately, ilist::size() is linear, so we have to add code to keep
// track of the list's exact size.		// track of the list's exact size.
unsigned size() { return SetSize; }		unsigned size() { return SetSize; }

		/// If this alias set is known to contain a single instruction and only a
		/// single unique instruction, return it. Otherwise, return nullptr.
		Instruction* getUniqueInstruction() {
		if (size() != 0)
		// Can't track source of pointer, might be many instruction
		return nullptr;
		if (AliasAny)
		// May have collapses alias set
		return nullptr;
		if (1 != UnknownInsts.size())
		return nullptr;
		return cast<Instruction>(UnknownInsts[0]);
		}

void print(raw_ostream &OS) const;		void print(raw_ostream &OS) const;
void dump() const;		void dump() const;

/// Define an iterator for alias sets... this is just a forward iterator.		/// Define an iterator for alias sets... this is just a forward iterator.
class iterator : public std::iterator<std::forward_iterator_tag,		class iterator : public std::iterator<std::forward_iterator_tag,
PointerRec, ptrdiff_t> {		PointerRec, ptrdiff_t> {
PointerRec *CurNode;		PointerRec *CurNode;

▲ Show 20 Lines • Show All 233 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/Scalar/LICM.cpp

Show First 20 Lines • Show All 576 Lines • ▼ Show 20 Lines

namespace {		namespace {
/// Return true if-and-only-if we know how to (mechanically) both hoist and		/// Return true if-and-only-if we know how to (mechanically) both hoist and
/// sink a given instruction out of a loop. Does not address legality		/// sink a given instruction out of a loop. Does not address legality
/// concerns such as aliasing or speculation safety.		/// concerns such as aliasing or speculation safety.
bool isHoistableAndSinkableInst(Instruction &I) {		bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.		// Only these instructions are hoistable/sinkable.
return (isa<LoadInst>(I) \|\| isa<CallInst>(I) \|\|		return (isa<LoadInst>(I) \|\| isa<CallInst>(I) \|\|
		isa<FenceInst>(I) \|\|
isa<BinaryOperator>(I) \|\| isa<CastInst>(I) \|\|		isa<BinaryOperator>(I) \|\| isa<CastInst>(I) \|\|
isa<SelectInst>(I) \|\| isa<GetElementPtrInst>(I) \|\|		isa<SelectInst>(I) \|\| isa<GetElementPtrInst>(I) \|\|
isa<CmpInst>(I) \|\| isa<InsertElementInst>(I) \|\|		isa<CmpInst>(I) \|\| isa<InsertElementInst>(I) \|\|
isa<ExtractElementInst>(I) \|\| isa<ShuffleVectorInst>(I) \|\|		isa<ExtractElementInst>(I) \|\| isa<ShuffleVectorInst>(I) \|\|
isa<ExtractValueInst>(I) \|\| isa<InsertValueInst>(I));		isa<ExtractValueInst>(I) \|\| isa<InsertValueInst>(I));
}		}
/// Return true if all of the alias sets within this AST are known not to		/// Return true if all of the alias sets within this AST are known not to
/// contain a Mod.		/// contain a Mod.
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines	if (AliasAnalysis::onlyReadsMemory(Behavior)) {
if (isReadOnly(CurAST))		if (isReadOnly(CurAST))
return true;		return true;
}		}

// FIXME: This should use mod/ref information to see if we can hoist or		// FIXME: This should use mod/ref information to see if we can hoist or
// sink the call.		// sink the call.

return false;		return false;
		} else if (auto *FI = dyn_cast<FenceInst>(&I)) {
		// Fences alias (most) everything to provide ordering. For the moment,
		// just give up if there are any other memory operations in the loop.
		auto Begin = CurAST->begin();
		assert(Begin != CurAST->end() && "must contain FI");
		if (std::next(Begin) != CurAST->end())
		// constant memory for instance, TODO: handle better
		return false;
		auto *UniqueI = Begin->getUniqueInstruction();
		if (!UniqueI)
		// other memory op, give up
		return false;
		assert(UniqueI == FI && "AS must contain FI");
		return true;
}		}

assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");		assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");

// We've established mechanical ability and aliasing, it's up to the caller		// We've established mechanical ability and aliasing, it's up to the caller
// to check fault safety		// to check fault safety
return true;		return true;
}		}
▲ Show 20 Lines • Show All 878 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/LICM/fence.ll

; RUN: opt -licm -basicaa < %s -S \| FileCheck %s		; RUN: opt -licm -basicaa < %s -S \| FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S \| FileCheck %s		; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S \| FileCheck %s

define void @test1(i64 %n) {		define void @test1(i64 %n) {
; CHECK-LABEL: @test1		; CHECK-LABEL: @test1
; CHECK-LABEL: loop:
; CHECK: fence		; CHECK: fence
		; CHECK-LABEL: loop:
entry:		entry:
br label %loop		br label %loop
loop:		loop:
%iv = phi i64 [0, %entry], [%iv.next, %loop]		%iv = phi i64 [0, %entry], [%iv.next, %loop]
fence release		fence release
%iv.next = add i64 %iv, 1		%iv.next = add i64 %iv, 1
%test = icmp slt i64 %iv, %n		%test = icmp slt i64 %iv, %n
br i1 %test, label %loop, label %exit		br i1 %test, label %loop, label %exit
exit:		exit:
ret void		ret void
}		}

define void @test2(i64 %n) {		define void @test2(i64 %n) {
; CHECK-LABEL: @test2		; CHECK-LABEL: @test2
; CHECK-LABEL: loop:
; CHECK: fence		; CHECK: fence
		; CHECK-LABEL: loop:
entry:		entry:
br label %loop		br label %loop
loop:		loop:
%iv = phi i64 [0, %entry], [%iv.next, %loop]		%iv = phi i64 [0, %entry], [%iv.next, %loop]
fence acquire		fence acquire
%iv.next = add i64 %iv, 1		%iv.next = add i64 %iv, 1
%test = icmp slt i64 %iv, %n		%test = icmp slt i64 %iv, %n
br i1 %test, label %loop, label %exit		br i1 %test, label %loop, label %exit
exit:		exit:
ret void		ret void
}		}

define void @test3(i64 %n) {		define void @test3(i64 %n) {
; CHECK-LABEL: @test3		; CHECK-LABEL: @test3
; CHECK-LABEL: loop:
; CHECK: fence		; CHECK: fence
		; CHECK-LABEL: loop:
entry:		entry:
br label %loop		br label %loop
loop:		loop:
%iv = phi i64 [0, %entry], [%iv.next, %loop]		%iv = phi i64 [0, %entry], [%iv.next, %loop]
fence acq_rel		fence acq_rel
%iv.next = add i64 %iv, 1		%iv.next = add i64 %iv, 1
%test = icmp slt i64 %iv, %n		%test = icmp slt i64 %iv, %n
br i1 %test, label %loop, label %exit		br i1 %test, label %loop, label %exit
exit:		exit:
ret void		ret void
}		}

define void @test4(i64 %n) {		define void @test4(i64 %n) {
; CHECK-LABEL: @test4		; CHECK-LABEL: @test4
; CHECK-LABEL: loop:
; CHECK: fence		; CHECK: fence
		; CHECK-LABEL: loop:
entry:		entry:
br label %loop		br label %loop
loop:		loop:
%iv = phi i64 [0, %entry], [%iv.next, %loop]		%iv = phi i64 [0, %entry], [%iv.next, %loop]
fence seq_cst		fence seq_cst
%iv.next = add i64 %iv, 1		%iv.next = add i64 %iv, 1
%test = icmp slt i64 %iv, %n		%test = icmp slt i64 %iv, %n
br i1 %test, label %loop, label %exit		br i1 %test, label %loop, label %exit
Show All 30 Lines	loop:
%n = load i64, i64* %p		%n = load i64, i64* %p
%iv.next = add i64 %iv, 1		%iv.next = add i64 %iv, 1
%test = icmp slt i64 %iv, %n		%test = icmp slt i64 %iv, %n
br i1 %test, label %loop, label %exit		br i1 %test, label %loop, label %exit
exit:		exit:
ret void		ret void
}		}

define void @testfp1(i64 %n, i64* %p) {		; Note: While a false negative for LICM on it's own, O3 does get this
; CHECK-LABEL: @testfp1		; case by combining the fences.
		define void @testfn1(i64 %n, i64* %p) {
		; CHECK-LABEL: @testfn1
; CHECK-LABEL: loop:		; CHECK-LABEL: loop:
; CHECK: fence		; CHECK: fence
entry:		entry:
br label %loop		br label %loop
loop:		loop:
%iv = phi i64 [0, %entry], [%iv.next, %loop]		%iv = phi i64 [0, %entry], [%iv.next, %loop]
fence release		fence release
fence release		fence release
%iv.next = add i64 %iv, 1		%iv.next = add i64 %iv, 1
%test = icmp slt i64 %iv, %n		%test = icmp slt i64 %iv, %n
br i1 %test, label %loop, label %exit		br i1 %test, label %loop, label %exit
exit:		exit:
ret void		ret void
}		}