@@ -987,8 +987,10 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
987
987
// We have two ways of identifying invariant loads: Loads may be explicitly
988
988
// marked as invariant, or we may infer them to be invariant.
989
989
//
990
- // We currently infer invariance only for kernel function pointer params that
991
- // are noalias (i.e. __restrict) and never written to.
990
+ // We currently infer invariance for loads from
991
+ // - constant global variables, and
992
+ // - kernel function pointer params that are noalias (i.e. __restrict) and
993
+ // never written to.
992
994
//
993
995
// TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
994
996
// not during the SelectionDAG phase).
@@ -1002,23 +1004,22 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
1002
1004
if (N->isInvariant ())
1003
1005
return true ;
1004
1006
1005
- // Load wasn't explicitly invariant. Attempt to infer invariance.
1006
- if (!isKernelFunction (F->getFunction ()))
1007
- return false ;
1007
+ bool IsKernelFn = isKernelFunction (F->getFunction ());
1008
1008
1009
- // We use GetUnderlyingObjects() here instead of
1010
- // GetUnderlyingObject() mainly because the former looks through phi
1011
- // nodes while the latter does not. We need to look through phi
1012
- // nodes to handle pointer induction variables.
1009
+ // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
1010
+ // because the former looks through phi nodes while the latter does not. We
1011
+ // need to look through phi nodes to handle pointer induction variables.
1013
1012
SmallVector<Value *, 8 > Objs;
1014
1013
GetUnderlyingObjects (const_cast <Value *>(N->getMemOperand ()->getValue ()),
1015
1014
Objs, F->getDataLayout ());
1016
- for (Value *Obj : Objs) {
1017
- auto *A = dyn_cast<const Argument>(Obj);
1018
- if (!A || !A->onlyReadsMemory () || !A->hasNoAliasAttr ()) return false ;
1019
- }
1020
1015
1021
- return true ;
1016
+ return all_of (Objs, [&](Value *V) {
1017
+ if (auto *A = dyn_cast<const Argument>(V))
1018
+ return IsKernelFn && A->onlyReadsMemory () && A->hasNoAliasAttr ();
1019
+ if (auto *GV = dyn_cast<const GlobalVariable>(V))
1020
+ return GV->isConstant ();
1021
+ return false ;
1022
+ });
1022
1023
}
1023
1024
1024
1025
bool NVPTXDAGToDAGISel::tryIntrinsicNoChain (SDNode *N) {
@@ -1632,6 +1633,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1632
1633
switch (N->getOpcode ()) {
1633
1634
default :
1634
1635
return false ;
1636
+ case ISD::LOAD:
1635
1637
case ISD::INTRINSIC_W_CHAIN:
1636
1638
if (IsLDG)
1637
1639
Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy ,
@@ -1654,6 +1656,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1654
1656
NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1655
1657
NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1656
1658
break ;
1659
+ case NVPTXISD::LoadV2:
1657
1660
case NVPTXISD::LDGV2:
1658
1661
Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy ,
1659
1662
NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
@@ -1676,6 +1679,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1676
1679
NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1677
1680
NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1678
1681
break ;
1682
+ case NVPTXISD::LoadV4:
1679
1683
case NVPTXISD::LDGV4:
1680
1684
Opcode = pickOpcodeForVT (EltVT.getSimpleVT ().SimpleTy ,
1681
1685
NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
0 commit comments