Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -469,8 +469,9 @@ uint16_t HasDebugValue : 1; uint16_t IsMemIntrinsic : 1; + uint16_t IsDivergent : 1; }; - enum { NumSDNodeBits = 2 }; + enum { NumSDNodeBits = 3 }; class ConstantSDNodeBitfields { friend class ConstantSDNode; @@ -661,6 +662,9 @@ bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; } void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; } + bool isDivergent() const { return SDNodeBits.IsDivergent; } + void setIsDivergent(bool b) { SDNodeBits.IsDivergent = b; } + /// Return true if there are no uses of this node. bool use_empty() const { return UseList == nullptr; } Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -642,6 +642,7 @@ "Can only promote loads to same size type"); SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); + Res.getNode()->setIsDivergent(LD->isDivergent()); RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); RChain = Res.getValue(1); break; @@ -692,7 +693,7 @@ SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); - + Result->setIsDivergent(LD->isDivergent()); Ch = Result.getValue(1); // The chain. if (ExtType == ISD::SEXTLOAD) @@ -729,7 +730,7 @@ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); - + Lo->setIsDivergent(LD->isDivergent()); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -739,7 +740,7 @@ LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); - + Hi->setIsDivergent(LD->isDivergent()); // Build a factor node to remember that this load is independent of // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), @@ -760,7 +761,7 @@ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); - + Hi->setIsDivergent(LD->isDivergent()); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -770,7 +771,7 @@ LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); - + Lo->setIsDivergent(LD->isDivergent()); // Build a factor node to remember that this load is independent of // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), @@ -801,6 +802,7 @@ if (isCustom) { if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { + Res->setIsDivergent(Node->isDivergent()); Value = Res; Chain = Res.getValue(1); } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,6 +18,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallSite.h" @@ -90,6 +91,8 @@ DenseMap NodeMap; + DivergenceAnalysis * DA; + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used /// to preserve debug information for incoming arguments. DenseMap UnusedArgNodeMap; @@ -608,7 +611,7 @@ HasTailCall(false) { } - void init(GCFunctionInfo *gfi, AliasAnalysis *AA, + void init(GCFunctionInfo *gfi, AliasAnalysis *AA, DivergenceAnalysis *DA, const TargetLibraryInfo *li); /// Clear out the current SelectionDAG and the associated state and prepare Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -885,9 +885,11 @@ } void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, + DivergenceAnalysis *da, const TargetLibraryInfo *li) { AA = aa; GFI = gfi; + DA = da; LibInfo = li; DL = &DAG.getDataLayout(); Context = DAG.getContext(); @@ -3491,6 +3493,7 @@ bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); + bool isDivergent = DA->isDivergent(&I); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -3563,7 +3566,7 @@ SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, MMOFlags, AAInfo, Ranges); - + L.getNode()->setIsDivergent(isDivergent); Values[i] = L; Chains[ChainI] = L.getValue(1); } Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -331,6 +331,7 @@ AU.addRequired(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -432,7 +433,7 @@ else AA = nullptr; - SDB->init(GFI, AA, LibInfo); + SDB->init(GFI, AA, &getAnalysis(), LibInfo); MF->setHasInlineAsm(false); Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3487,6 +3487,7 @@ // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getMemOperand()); + newLoad->setIsDivergent(LD->isDivergent()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (LoadedVT != VT) Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : @@ -3522,6 +3523,8 @@ RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), LD->getAAInfo()); + Load->setIsDivergent(LD->isDivergent()); + // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo())); @@ -3540,6 +3543,8 @@ LD->getPointerInfo().getWithOffset(Offset), MemVT, MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), LD->getAAInfo()); + Load->setIsDivergent(LD->isDivergent()); + // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -3552,6 +3557,7 @@ // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, MachinePointerInfo(), LoadedVT); + Load->setIsDivergent(LD->isDivergent()); // Callers expect a MERGE_VALUES node. return std::make_pair(Load, TF); Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1280,13 +1280,14 @@ SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue, LoMemVT, BaseAlign, Load->getMemOperand()->getFlags()); + LoLoad->setIsDivergent(Load->isDivergent()); SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, DAG.getConstant(Size, SL, PtrVT)); SDValue HiLoad = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), HiMemVT, HiAlign, Load->getMemOperand()->getFlags()); - + HiLoad->setIsDivergent(Load->isDivergent()); SDValue Ops[] = { DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad), DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -3646,7 +3646,7 @@ unsigned NumElements = MemVT.getVectorNumElements(); if (AS == AMDGPUASI.CONSTANT_ADDRESS) { - if (isMemOpUniform(Load)) + if (!Op->isDivergent()) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private @@ -3654,7 +3654,7 @@ // } if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { - if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && + if (Subtarget->getScalarizeGlobalBehavior() && /*isMemOpUniform(Load)*/!Op->isDivergent() && !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they Index: lib/Target/AMDGPU/SMInstructions.td =================================================================== --- lib/Target/AMDGPU/SMInstructions.td +++ lib/Target/AMDGPU/SMInstructions.td @@ -226,11 +226,9 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ auto Ld = cast(N); return Ld->getAlignment() >= 4 && - ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && - static_cast(getTargetLowering())->isMemOpUniform(N)) || + ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && !N->isDivergent()) || (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && - !Ld->isVolatile() && - static_cast(getTargetLowering())->isMemOpUniform(N) && + !Ld->isVolatile() && !N->isDivergent() && static_cast(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; Index: test/CodeGen/AMDGPU/hsa-func.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-func.ll +++ test/CodeGen/AMDGPU/hsa-func.ll @@ -1,3 +1,6 @@ +; XFAIL: * +; REQUIRES: asserts + ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s