Skip to content

Commit faaf2d2

Browse files
author
Justin Lebar
committedFeb 28, 2018
[NVPTX] Lower loads from global constants using ld.global.nc (aka LDG).
Summary: After D43914, loads from global variables in addrspace(1) happen with ld.global. But since they're constants, even better would be to use ld.global.nc, aka ldg. Reviewers: tra Subscribers: jholewinski, sanjoy, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D43915 llvm-svn: 326390
1 parent 5a7de89 commit faaf2d2

File tree

2 files changed

+47
-14
lines changed

2 files changed

+47
-14
lines changed
 

‎llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -987,8 +987,10 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
987987
// We have two ways of identifying invariant loads: Loads may be explicitly
988988
// marked as invariant, or we may infer them to be invariant.
989989
//
990-
// We currently infer invariance only for kernel function pointer params that
991-
// are noalias (i.e. __restrict) and never written to.
990+
// We currently infer invariance for loads from
991+
// - constant global variables, and
992+
// - kernel function pointer params that are noalias (i.e. __restrict) and
993+
// never written to.
992994
//
993995
// TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
994996
// not during the SelectionDAG phase).
@@ -1002,23 +1004,22 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
10021004
if (N->isInvariant())
10031005
return true;
10041006

1005-
// Load wasn't explicitly invariant. Attempt to infer invariance.
1006-
if (!isKernelFunction(F->getFunction()))
1007-
return false;
1007+
bool IsKernelFn = isKernelFunction(F->getFunction());
10081008

1009-
// We use GetUnderlyingObjects() here instead of
1010-
// GetUnderlyingObject() mainly because the former looks through phi
1011-
// nodes while the latter does not. We need to look through phi
1012-
// nodes to handle pointer induction variables.
1009+
// We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
1010+
// because the former looks through phi nodes while the latter does not. We
1011+
// need to look through phi nodes to handle pointer induction variables.
10131012
SmallVector<Value *, 8> Objs;
10141013
GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
10151014
Objs, F->getDataLayout());
1016-
for (Value *Obj : Objs) {
1017-
auto *A = dyn_cast<const Argument>(Obj);
1018-
if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
1019-
}
10201015

1021-
return true;
1016+
return all_of(Objs, [&](Value *V) {
1017+
if (auto *A = dyn_cast<const Argument>(V))
1018+
return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
1019+
if (auto *GV = dyn_cast<const GlobalVariable>(V))
1020+
return GV->isConstant();
1021+
return false;
1022+
});
10221023
}
10231024

10241025
bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
@@ -1632,6 +1633,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
16321633
switch (N->getOpcode()) {
16331634
default:
16341635
return false;
1636+
case ISD::LOAD:
16351637
case ISD::INTRINSIC_W_CHAIN:
16361638
if (IsLDG)
16371639
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
@@ -1654,6 +1656,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
16541656
NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
16551657
NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
16561658
break;
1659+
case NVPTXISD::LoadV2:
16571660
case NVPTXISD::LDGV2:
16581661
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
16591662
NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
@@ -1676,6 +1679,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
16761679
NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
16771680
NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
16781681
break;
1682+
case NVPTXISD::LoadV4:
16791683
case NVPTXISD::LDGV4:
16801684
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
16811685
NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
2+
3+
; Check load from constant global variables. These loads should be
4+
; ld.global.nc (aka ldg).
5+
6+
@gv_float = external constant float
7+
@gv_float2 = external constant <2 x float>
8+
@gv_float4 = external constant <4 x float>
9+
10+
; CHECK-LABEL: test_gv_float()
11+
define float @test_gv_float() {
12+
; CHECK: ld.global.nc.f32
13+
%v = load float, float* @gv_float
14+
ret float %v
15+
}
16+
17+
; CHECK-LABEL: test_gv_float2()
18+
define <2 x float> @test_gv_float2() {
19+
; CHECK: ld.global.nc.v2.f32
20+
%v = load <2 x float>, <2 x float>* @gv_float2
21+
ret <2 x float> %v
22+
}
23+
24+
; CHECK-LABEL: test_gv_float4()
25+
define <4 x float> @test_gv_float4() {
26+
; CHECK: ld.global.nc.v4.f32
27+
%v = load <4 x float>, <4 x float>* @gv_float4
28+
ret <4 x float> %v
29+
}

0 commit comments

Comments
 (0)
Please sign in to comment.