Index: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -987,8 +987,10 @@ // We have two ways of identifying invariant loads: Loads may be explicitly // marked as invariant, or we may infer them to be invariant. // - // We currently infer invariance only for kernel function pointer params that - // are noalias (i.e. __restrict) and never written to. + // We currently infer invariance for loads from + // - constant global variables, and + // - kernel function pointer params that are noalias (i.e. __restrict) and + // never written to. // // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally // not during the SelectionDAG phase). @@ -1002,23 +1004,22 @@ if (N->isInvariant()) return true; - // Load wasn't explicitly invariant. Attempt to infer invariance. - if (!isKernelFunction(F->getFunction())) - return false; + bool IsKernelFn = isKernelFunction(F->getFunction()); - // We use GetUnderlyingObjects() here instead of - // GetUnderlyingObject() mainly because the former looks through phi - // nodes while the latter does not. We need to look through phi - // nodes to handle pointer induction variables. + // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly + // because the former looks through phi nodes while the latter does not. We + // need to look through phi nodes to handle pointer induction variables. SmallVector Objs; GetUnderlyingObjects(const_cast(N->getMemOperand()->getValue()), Objs, F->getDataLayout()); - for (Value *Obj : Objs) { - auto *A = dyn_cast(Obj); - if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false; - } - return true; + return all_of(Objs, [&](Value *V) { + if (auto *A = dyn_cast(V)) + return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr(); + if (auto *GV = dyn_cast(V)) + return GV->isConstant(); + return false; + }); } bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { @@ -1632,6 +1633,7 @@ switch (N->getOpcode()) { default: return false; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, @@ -1654,6 +1656,7 @@ NVPTX::INT_PTX_LDU_GLOBAL_f32avar, NVPTX::INT_PTX_LDU_GLOBAL_f64avar); break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, @@ -1676,6 +1679,7 @@ NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar, NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, Index: llvm/trunk/test/CodeGen/NVPTX/read-global-variable-constant.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/read-global-variable-constant.ll +++ llvm/trunk/test/CodeGen/NVPTX/read-global-variable-constant.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s + +; Check load from constant global variables. These loads should be +; ld.global.nc (aka ldg). + +@gv_float = external constant float +@gv_float2 = external constant <2 x float> +@gv_float4 = external constant <4 x float> + +; CHECK-LABEL: test_gv_float() +define float @test_gv_float() { +; CHECK: ld.global.nc.f32 + %v = load float, float* @gv_float + ret float %v +} + +; CHECK-LABEL: test_gv_float2() +define <2 x float> @test_gv_float2() { +; CHECK: ld.global.nc.v2.f32 + %v = load <2 x float>, <2 x float>* @gv_float2 + ret <2 x float> %v +} + +; CHECK-LABEL: test_gv_float4() +define <4 x float> @test_gv_float4() { +; CHECK: ld.global.nc.v4.f32 + %v = load <4 x float>, <4 x float>* @gv_float4 + ret <4 x float> %v +}