Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -349,12 +349,15 @@ SDUse *Ops = OperandRecycler.allocate( ArrayRecycler::Capacity::get(Vals.size()), OperandAllocator); + bool IsDivergent = false; for (unsigned I = 0; I != Vals.size(); ++I) { Ops[I].setUser(Node); Ops[I].setInitial(Vals[I]); + IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); } Node->NumOperands = Vals.size(); Node->OperandList = Ops; + Node->SDNodeBits.IsDivergent = IsDivergent; checkForCycles(Node); } Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -466,11 +466,14 @@ friend class SDNode; friend class MemIntrinsicSDNode; friend class MemSDNode; + friend class SelectionDAG; + friend class SelectionDAGBuilder; uint16_t HasDebugValue : 1; uint16_t IsMemIntrinsic : 1; + uint16_t IsDivergent : 1; }; - enum { NumSDNodeBits = 2 }; + enum { NumSDNodeBits = 3 }; class ConstantSDNodeBitfields { friend class ConstantSDNode; @@ -548,6 +551,8 @@ // TODO: unfriend HandleSDNode once we fix its operand handling. friend class HandleSDNode; + friend class SelectionDAGBuilder; + /// Unique id per SDNode in the DAG. int NodeId = -1; @@ -662,6 +667,8 @@ bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; } void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; } + bool isDivergent() const { return SDNodeBits.IsDivergent; } + /// Return true if there are no uses of this node. bool use_empty() const { return UseList == nullptr; } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -22,6 +22,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -96,6 +97,8 @@ DenseMap NodeMap; + DivergenceAnalysis * DA; + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used /// to preserve debug information for incoming arguments. DenseMap UnusedArgNodeMap; @@ -627,7 +630,7 @@ : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo) {} - void init(GCFunctionInfo *gfi, AliasAnalysis *AA, + void init(GCFunctionInfo *gfi, AliasAnalysis *AA, DivergenceAnalysis *DA, const TargetLibraryInfo *li); /// Clear out the current SelectionDAG and the associated state and prepare @@ -682,6 +685,7 @@ SDValue getValueImpl(const Value *V); void setValue(const Value *V, SDValue NewN) { + NewN.getNode()->SDNodeBits.IsDivergent = DA ? DA->isDivergent(V) : 0; SDValue &N = NodeMap[V]; assert(!N.getNode() && "Already set a value for this node!"); N = NewN; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -971,9 +971,11 @@ } void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, + DivergenceAnalysis *da, const TargetLibraryInfo *li) { AA = aa; GFI = gfi; + DA = da; LibInfo = li; DL = &DAG.getDataLayout(); Context = DAG.getContext(); Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -608,6 +608,8 @@ if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; + if (!(isa(this) || (isa(this)))) + OS << "# D:" << isDivergent(); if (!G) return; Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -432,7 +432,7 @@ else AA = nullptr; - SDB->init(GFI, AA, LibInfo); + SDB->init(GFI, AA, getAnalysisIfAvailable(), LibInfo); MF->setHasInlineAsm(false); Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -83,6 +84,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); SelectionDAGISel::getAnalysisUsage(AU); } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -4705,7 +4705,7 @@ unsigned NumElements = MemVT.getVectorNumElements(); if (AS == AMDGPUASI.CONSTANT_ADDRESS) { - if (isMemOpUniform(Load)) + if (!Op->isDivergent()) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private @@ -4713,7 +4713,7 @@ // } if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { - if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && + if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() && !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they Index: lib/Target/AMDGPU/SMInstructions.td =================================================================== --- lib/Target/AMDGPU/SMInstructions.td +++ lib/Target/AMDGPU/SMInstructions.td @@ -226,11 +226,9 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ auto Ld = cast(N); return Ld->getAlignment() >= 4 && - ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && - static_cast(getTargetLowering())->isMemOpUniform(N)) || + ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && !N->isDivergent()) || (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && - !Ld->isVolatile() && - static_cast(getTargetLowering())->isMemOpUniform(N) && + !Ld->isVolatile() && !N->isDivergent() && static_cast(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>;