This is an archive of the discontinued LLVM Phabricator instance.

[INDVARS]
ClosedPublic

Authored by zinovy.nis on Jul 28 2014, 5:42 AM.

Download Raw Diff

Details

Reviewers

chandlerc
atrick
• rafael
bkramer
hfinkel

Commits

rG0a36cba29d68: [INDVARS] Extend using of widening of induction variables for the cases of "sub…
rL216160: [INDVARS] Extend using of widening of induction variables for the cases of "sub…

Summary

This patch extends using of widening of induction variables for the cases of "sub nsw" and "mul nsw" instructions. Currently only "add nsw" are widened.
This patch eliminates tons of "sext" instructions for 64 bit code (and the corresponding target code) in cases like:

int N = 100;
float **A;

void foo(int x0, int x1)
{
        float * A_cur = &A[0][0];
        float * A_next = &A[1][0];
        for(int x = x0; x < x1; ++x).
        {
          // Currently only [x+N] case is widened. Others 2 cases lead to sext.
          // This patch fixes it, so all 3 cases do not need sext.
          const float div = A_cur[x + N] + A_cur[x - N] + A_cur[x * N];
          A_next[x] = div;
        }
}
...
> clang++ test.cpp -march=core-avx2 -Ofast  -fno-unroll-loops -fno-tree-vectorize -S -o -

(with my patch)

.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        vmovss  (%rdi,%rcx,4), %xmm0
        vaddss  (%rdx,%rcx,4), %xmm0, %xmm0
        vaddss  (%rax), %xmm0, %xmm0
        vmovss  %xmm0, (%r8,%rcx,4)
        incq    %rcx
        addq    %r9, %rax
        cmpl    %esi, %ecx
        jl      .LBB0_2

vs trunk:

.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        vmovss  (%r10,%rcx,4), %xmm0
        leal    (%r11,%rcx), %edx
        movslq  %edx, %rdx
        vaddss  (%rax,%rdx,4), %xmm0, %xmm0
        movslq  %edi, %rdi
        vaddss  (%rax,%rdi,4), %xmm0, %xmm0
        vmovss  %xmm0, (%r8,%rcx,4)
        incq    %rcx
        addl    %r9d, %edi
        cmpl    %esi, %ecx
        jl      .LBB0_2

Diff Detail

Repository: rL LLVM

Event Timeline

zinovy.nis updated this revision to Diff 11943.Jul 28 2014, 5:42 AM

zinovy.nis retitled this revision from to [INDVARS].

zinovy.nis updated this object.

zinovy.nis edited the test plan for this revision. (Show Details)

zinovy.nis added reviewers: atrick, • rafael.

zinovy.nis set the repository for this revision to rL LLVM.

zinovy.nis added a project: deleted.

zinovy.nis added a subscriber: Unknown Object (MLST).

zinovy.nis updated this object.Jul 28 2014, 5:45 AM

Gentle ping.

zinovy.nis removed a project: deleted.Aug 17 2014, 5:41 AM

zinovy.nis added reviewers: chandlerc, bkramer.Aug 19 2014, 2:36 AM

Gentle ping #2.

LGTM. Thanks!

lib/Transforms/Scalar/IndVarSimplify.cpp
847 ↗	(On Diff #11943)	I recommend using llvm_unreachable here. The caller can't handle a null return value, and this function should never be called with any other opcode.

This revision is now accepted and ready to land.Aug 20 2014, 9:40 AM

Zinovy, this is great! I'm sorry I didn't review it right away. Thanks for the ping.
LGTM.

Closed by commit rL216160 (authored by @zinovy.nis).

Revision Contents

Path

Size

llvm/

trunk/

lib/

Transforms/

Scalar/

IndVarSimplify.cpp

27 lines

test/

Transforms/

IndVarSimplify/

2011-09-10-widen-nsw.ll

17 lines

Diff 12749

llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp

Show First 20 Lines • Show All 751 Lines • ▼ Show 20 Lines	Value getExtend(Value NarrowOper, Type *WideType, bool IsSigned,
Instruction *Use);		Instruction *Use);

Instruction *CloneIVUser(NarrowIVDefUse DU);		Instruction *CloneIVUser(NarrowIVDefUse DU);

const SCEVAddRecExpr GetWideRecurrence(Instruction NarrowUse);		const SCEVAddRecExpr GetWideRecurrence(Instruction NarrowUse);

const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);		const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);

		const SCEV GetSCEVByOpCode(const SCEV LHS, const SCEV *RHS,
		unsigned OpCode) const;

Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);		Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);

void pushNarrowIVUsers(Instruction NarrowDef, Instruction WideDef);		void pushNarrowIVUsers(Instruction NarrowDef, Instruction WideDef);
};		};
} // anonymous namespace		} // anonymous namespace

/// isLoopInvariant - Perform a quick domtree based check for loop invariance		/// isLoopInvariant - Perform a quick domtree based check for loop invariance
/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems		/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	if (const OverflowingBinaryOperator *OBO =
dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {		dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();		if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();		if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
}		}
return WideBO;		return WideBO;
}		}
}		}

		const SCEV WidenIV::GetSCEVByOpCode(const SCEV LHS, const SCEV *RHS,
		unsigned OpCode) const {
		if (OpCode == Instruction::Add)
		return SE->getAddExpr(LHS, RHS);
		if (OpCode == Instruction::Sub)
		return SE->getMinusSCEV(LHS, RHS);
		if (OpCode == Instruction::Mul)
		return SE->getMulExpr(LHS, RHS);

		llvm_unreachable("Unsupported opcode.");
		return nullptr;
		}

/// No-wrap operations can transfer sign extension of their result to their		/// No-wrap operations can transfer sign extension of their result to their
/// operands. Generate the SCEV value for the widened operation without		/// operands. Generate the SCEV value for the widened operation without
/// actually modifying the IR yet. If the expression after extending the		/// actually modifying the IR yet. If the expression after extending the
/// operands is an AddRec for this loop, return it.		/// operands is an AddRec for this loop, return it.
const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {		const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {

// Handle the common case of add<nsw/nuw>		// Handle the common case of add<nsw/nuw>
if (DU.NarrowUse->getOpcode() != Instruction::Add)		const unsigned OpCode = DU.NarrowUse->getOpcode();
		// Only Add/Sub/Mul instructions supported yet.
		if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
		OpCode != Instruction::Mul)
return nullptr;		return nullptr;

// One operand (NarrowDef) has already been extended to WideDef. Now determine		// One operand (NarrowDef) has already been extended to WideDef. Now determine
// if extending the other will lead to a recurrence.		// if extending the other will lead to a recurrence.
unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;		unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");		assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");

const SCEV *ExtendOperExpr = nullptr;		const SCEV *ExtendOperExpr = nullptr;
const OverflowingBinaryOperator *OBO =		const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);		cast<OverflowingBinaryOperator>(DU.NarrowUse);
if (IsSigned && OBO->hasNoSignedWrap())		if (IsSigned && OBO->hasNoSignedWrap())
ExtendOperExpr = SE->getSignExtendExpr(		ExtendOperExpr = SE->getSignExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);		SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else if(!IsSigned && OBO->hasNoUnsignedWrap())		else if(!IsSigned && OBO->hasNoUnsignedWrap())
ExtendOperExpr = SE->getZeroExtendExpr(		ExtendOperExpr = SE->getZeroExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);		SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else		else
return nullptr;		return nullptr;

// When creating this AddExpr, don't apply the current operations NSW or NUW		// When creating this SCEV expr, don't apply the current operations NSW or NUW
// flags. This instruction may be guarded by control flow that the no-wrap		// flags. This instruction may be guarded by control flow that the no-wrap
// behavior depends on. Non-control-equivalent instructions can be mapped to		// behavior depends on. Non-control-equivalent instructions can be mapped to
// the same SCEV expression, and it would be incorrect to transfer NSW/NUW		// the same SCEV expression, and it would be incorrect to transfer NSW/NUW
// semantics to those operations.		// semantics to those operations.
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(		const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));		GetSCEVByOpCode(SE->getSCEV(DU.WideDef), ExtendOperExpr, OpCode));

if (!AddRec \|\| AddRec->getLoop() != L)		if (!AddRec \|\| AddRec->getLoop() != L)
return nullptr;		return nullptr;
return AddRec;		return AddRec;
}		}

/// GetWideRecurrence - Is this instruction potentially interesting from		/// GetWideRecurrence - Is this instruction potentially interesting from
/// IVUsers' perspective after widening it's type? In other words, can the		/// IVUsers' perspective after widening it's type? In other words, can the
/// extend be safely hoisted out of the loop with SCEV reducing the value to a		/// extend be safely hoisted out of the loop with SCEV reducing the value to a
▲ Show 20 Lines • Show All 1,041 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll

	; RUN: opt < %s -indvars -S \| FileCheck %s			; RUN: opt < %s -indvars -S \| FileCheck %s
	; Test WidenIV::GetExtendedOperandRecurrence.			; Test WidenIV::GetExtendedOperandRecurrence.
	; add219 should be extended to i64 because it is nsw, even though its			; %add, %sub and %mul should be extended to i64 because it is nsw, even though its
	; sext cannot be hoisted outside the loop.			; sext cannot be hoisted outside the loop.

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

	define void @test() nounwind {			define void @test() nounwind {
	entry:			entry:
	br i1 undef, label %for.body11, label %for.end285			br i1 undef, label %for.body11, label %for.end285

	for.body11: ; preds = %entry			for.body11: ; preds = %entry
	%shl = shl i32 1, 1			%shl = shl i32 1, 1
	%shl132 = shl i32 %shl, 1			%shl132 = shl i32 %shl, 1
	br label %for.body153			br label %for.body153

	for.body153: ; preds = %for.body153, %for.body11			for.body153: ; preds = %for.body153, %for.body11
	br i1 undef, label %for.body170, label %for.body153			br i1 undef, label %for.body170, label %for.body153

	; CHECK: add nsw i64 %indvars.iv, 1			; CHECK: add nsw i64 %indvars.iv, 1
				; CHECK: sub nsw i64 %indvars.iv, 2
				; CHECK: mul nsw i64 %indvars.iv, 4
	for.body170: ; preds = %for.body170, %for.body153			for.body170: ; preds = %for.body170, %for.body153
	%i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]			%i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
	%add219 = add nsw i32 %i2.19, 1
	%idxprom220 = sext i32 %add219 to i64			%add = add nsw i32 %i2.19, 1
				%add.idxprom = sext i32 %add to i64

				%sub = sub nsw i32 %i2.19, 2
				%sub.idxprom = sext i32 %sub to i64

				%mul = mul nsw i32 %i2.19, 4
				%mul.idxprom = sext i32 %mul to i64

	%add249 = add nsw i32 %i2.19, %shl132			%add249 = add nsw i32 %i2.19, %shl132
	br label %for.body170			br label %for.body170

	for.end285: ; preds = %entry			for.end285: ; preds = %entry
	ret void			ret void
	}			}