This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/lib/Analysis/
-
lib/
-
Analysis/
-
ValueTracking.cpp

Differential D66528

Fix misaligned mov instruction codegen by making MaxDepth in value tracking configurable
AbandonedPublic

Authored by cjld on Aug 21 2019, 6:04 AM.

Download Raw Diff

Details

Reviewers

MaskRay
spatel
jdoerfert

Summary

make MaxDepth in value tracking configurable,

A small MaxDepth will generate low-performance code, a example below:

// b.cc
#include <cstddef>
#include <stdint.h>

typedef long long index;

extern "C" index g_tid;
extern "C" index g_num;


void add3(float* __restrict__ a, float* __restrict__ b, float* __restrict__ c) {
    index n = 64*1024;
    index m = 16*1024;
    index k = 4*1024;
    index tid = g_tid;
    index num = g_num;
    __builtin_assume_aligned(a, 32);
    __builtin_assume_aligned(b, 32);
    __builtin_assume_aligned(c, 32);
    for (index i0=tid*k; i0<m; i0+=num*k)
        for (index i1=0; i1<n*m; i1+=m)
            for (index i2=0; i2<k; i2++)
                c[i1+i0+i2] = b[i0+i2] + a[i1+i0+i2];
}

compile with clang ./b.cc -Ofast -march=native -std=c++14 -S -o b.s. (intel i7-7500U)
which yield:

// b.s
......
	vmovaps	-224(%rdi,%rbx,4), %ymm0
	vmovups	-192(%rdi,%rbx,4), %ymm1
	vmovups	-160(%rdi,%rbx,4), %ymm2
	vmovups	-128(%rdi,%rbx,4), %ymm3
	vaddps	-224(%rsi,%rbx,4), %ymm0, %ymm0
	vaddps	-192(%rsi,%rbx,4), %ymm1, %ymm1
	vaddps	-160(%rsi,%rbx,4), %ymm2, %ymm2
	vaddps	-128(%rsi,%rbx,4), %ymm3, %ymm3
	vmovaps	%ymm0, -224(%rdx,%rbx,4)
	vmovups	%ymm1, -192(%rdx,%rbx,4)
	vmovups	%ymm2, -160(%rdx,%rbx,4)
	vmovups	%ymm3, -128(%rdx,%rbx,4)
......

expect:

// b.s
......
	vmovaps	-224(%rdi,%rbx,4), %ymm0
	vmovaps	-192(%rdi,%rbx,4), %ymm1
	vmovaps	-160(%rdi,%rbx,4), %ymm2
	vmovaps	-128(%rdi,%rbx,4), %ymm3
	vaddps	-224(%rsi,%rbx,4), %ymm0, %ymm0
	vaddps	-192(%rsi,%rbx,4), %ymm1, %ymm1
	vaddps	-160(%rsi,%rbx,4), %ymm2, %ymm2
	vaddps	-128(%rsi,%rbx,4), %ymm3, %ymm3
	vmovaps	%ymm0, -224(%rdx,%rbx,4)
	vmovaps	%ymm1, -192(%rdx,%rbx,4)
	vmovaps	%ymm2, -160(%rdx,%rbx,4)
	vmovaps	%ymm3, -128(%rdx,%rbx,4)
......

This is because the MaxDepth is too small, llvm is unable to calculate the alignment info, compile with clang ./b.cc -Ofast -march=native -std=c++14 -mllvm -value-tracking-max-depth=10 -S -o b.s, which produces the expected asm code.

Diff Detail

Event Timeline

cjld created this revision.Aug 21 2019, 6:04 AM

Herald added a project: Restricted Project. · View Herald TranscriptAug 21 2019, 6:04 AM

Herald added subscribers: llvm-commits, hiraditya. · View Herald Transcript

I don't think this is the right fix.

cjld retitled this revision from make MaxDepth in value tracking configurable to Fix misaligned mov instruction codegen by making MaxDepth in value tracking configurable.Aug 21 2019, 6:53 AM

In D66528#1639226, @lebedev.ri wrote:

I don't think this is the right fix.

I found a better way to fix this, please see D66575

cjld abandoned this revision.Aug 22 2019, 10:29 PM

Revision Contents

Path

Size

llvm/

lib/

Analysis/

ValueTracking.cpp

26 lines

Diff 216380

llvm/lib/Analysis/ValueTracking.cpp

Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
#include "llvm/IR/Value.h"		#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"		#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"		#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"		#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"		#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"		#include "llvm/Support/MathExtras.h"
#include <algorithm>		#include <algorithm>
#include <array>		#include <vector>
#include <cassert>		#include <cassert>
#include <cstdint>		#include <cstdint>
#include <iterator>		#include <iterator>
#include <utility>		#include <utility>

using namespace llvm;		using namespace llvm;
using namespace llvm::PatternMatch;		using namespace llvm::PatternMatch;

const unsigned MaxDepth = 6;		static cl::opt<unsigned> MaxDepth("value-tracking-max-depth",
		cl::Hidden, cl::init(6));

// Controls the number of uses of the value searched for possible		// Controls the number of uses of the value searched for possible
// dominating comparisons.		// dominating comparisons.
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",		static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));		cl::Hidden, cl::init(20));

/// Returns the bitwidth of the given scalar or pointer type. For vector types,		/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.		/// returns the element type's bitwidth.
Show All 23 Lines	struct Query {
/// Set of assumptions that should be excluded from further queries.		/// Set of assumptions that should be excluded from further queries.
/// This is because of the potential for mutual recursion to cause		/// This is because of the potential for mutual recursion to cause
/// computeKnownBits to repeatedly visit the same assume intrinsic. The		/// computeKnownBits to repeatedly visit the same assume intrinsic. The
/// classic case of this is assume(x = y), which will attempt to determine		/// classic case of this is assume(x = y), which will attempt to determine
/// bits in x from bits in y, which will attempt to determine bits in y from		/// bits in x from bits in y, which will attempt to determine bits in y from
/// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call		/// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
/// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo		/// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
/// (all of which can call computeKnownBits), and so on.		/// (all of which can call computeKnownBits), and so on.
std::array<const Value *, MaxDepth> Excluded;		std::vector<const Value *> Excluded;

/// If true, it is safe to use metadata during simplification.		/// If true, it is safe to use metadata during simplification.
InstrInfoQuery IIQ;		InstrInfoQuery IIQ;

unsigned NumExcluded = 0;

Query(const DataLayout &DL, AssumptionCache AC, const Instruction CxtI,		Query(const DataLayout &DL, AssumptionCache AC, const Instruction CxtI,
const DominatorTree *DT, bool UseInstrInfo,		const DominatorTree *DT, bool UseInstrInfo,
OptimizationRemarkEmitter *ORE = nullptr)		OptimizationRemarkEmitter *ORE = nullptr)
: DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}		: DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {
		Excluded.reserve(MaxDepth);
		}

Query(const Query &Q, const Value *NewExcl)		Query(const Query &Q, const Value *NewExcl)
: DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ),		: DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ) {
NumExcluded(Q.NumExcluded) {		Excluded.reserve(MaxDepth);
Excluded = Q.Excluded;		Excluded = Q.Excluded;
Excluded[NumExcluded++] = NewExcl;		Excluded.push_back(NewExcl);
assert(NumExcluded <= Excluded.size());
}		}

bool isExcluded(const Value *Value) const {		bool isExcluded(const Value *Value) const {
if (NumExcluded == 0)		return std::find(Excluded.begin(), Excluded.end(), Value) != Excluded.end();
return false;
auto End = Excluded.begin() + NumExcluded;
return std::find(Excluded.begin(), End, Value) != End;
}		}
};		};

} // end anonymous namespace		} // end anonymous namespace

// Given the provided Value and, potentially, a context instruction, return		// Given the provided Value and, potentially, a context instruction, return
// the preferred context instruction (if any).		// the preferred context instruction (if any).
static const Instruction safeCxtI(const Value V, const Instruction *CxtI) {		static const Instruction safeCxtI(const Value V, const Instruction *CxtI) {
▲ Show 20 Lines • Show All 2,506 Lines • ▼ Show 20 Lines
}		}

/// This function computes the integer multiple of Base that equals V.		/// This function computes the integer multiple of Base that equals V.
/// If successful, it returns true and returns the multiple in		/// If successful, it returns true and returns the multiple in
/// Multiple. If unsuccessful, it returns false. It looks		/// Multiple. If unsuccessful, it returns false. It looks
/// through SExt instructions only if LookThroughSExt is true.		/// through SExt instructions only if LookThroughSExt is true.
bool llvm::ComputeMultiple(Value V, unsigned Base, Value &Multiple,		bool llvm::ComputeMultiple(Value V, unsigned Base, Value &Multiple,
bool LookThroughSExt, unsigned Depth) {		bool LookThroughSExt, unsigned Depth) {
const unsigned MaxDepth = 6;

assert(V && "No Value?");		assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");		assert(Depth <= MaxDepth && "Limit Search Depth");
assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");		assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");

Type *T = V->getType();		Type *T = V->getType();

ConstantInt *CI = dyn_cast<ConstantInt>(V);		ConstantInt *CI = dyn_cast<ConstantInt>(V);
▲ Show 20 Lines • Show All 3,108 Lines • Show Last 20 Lines