Index: include/llvm/CodeGen/FunctionLoweringInfo.h =================================================================== --- include/llvm/CodeGen/FunctionLoweringInfo.h +++ include/llvm/CodeGen/FunctionLoweringInfo.h @@ -246,6 +246,7 @@ return 0; unsigned &R = ValueMap[V]; assert(R == 0 && "Already initialized this value register!"); + assert(VirtReg2Value.empty()); return R = CreateRegs(V->getType()); } Index: lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp =================================================================== --- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -579,9 +579,18 @@ const Value * FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { if (VirtReg2Value.empty()) { + SmallVector ValueVTs; for (auto &P : ValueMap) { - VirtReg2Value[P.second] = P.first; + ValueVTs.clear(); + ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(), + P.first->getType(), ValueVTs); + unsigned Reg = P.second; + for (EVT VT : ValueVTs) { + unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + VirtReg2Value[Reg++] = P.first; + } } } - return VirtReg2Value[Vreg]; + return VirtReg2Value.lookup(Vreg); } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -9284,11 +9284,21 @@ Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits); } +static bool isCopyFromRegOfInlineAsm(const SDNode *N) { + assert(N->getOpcode() == ISD::CopyFromReg); + do { + // Follow the chain until we find an INLINEASM node. + N = N->getOperand(0).getNode(); + if (N->getOpcode() == ISD::INLINEASM) + return true; + } while (N->getOpcode() == ISD::CopyFromReg); + return false; +} + bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, FunctionLoweringInfo * FLI, LegacyDivergenceAnalysis * KDA) const { switch (N->getOpcode()) { - case ISD::Register: case ISD::CopyFromReg: { const RegisterSDNode *R = nullptr; @@ -9317,8 +9327,13 @@ // are conservatively considered divergent else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv())) return true; + return false; } - return !KDA || KDA->isDivergent(FLI->getValueFromVirtualReg(Reg)); + const Value *V = FLI->getValueFromVirtualReg(Reg); + if (V) + return KDA->isDivergent(V); + assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N)); + return TRI.isVGPR(MRI, Reg); } } break; Index: test/CodeGen/AMDGPU/sdwa-peephole.ll =================================================================== --- test/CodeGen/AMDGPU/sdwa-peephole.ll +++ test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -501,7 +501,12 @@ ; GCN-LABEL: {{^}}sdwa_crash_inlineasm_def: ; GCN: s_mov_b32 s{{[0-9]+}}, 0xffff ; GCN: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x10000, +; +; TODO: Why is the constant not peepholed into the v_or_b32_e32? +; +; NOSDWA: s_mov_b32 [[CONST:s[0-9]+]], 0x10000 +; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, s0, +; SDWA: v_or_b32_e32 v{{[0-9]+}}, 0x10000, define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 { bb: br label %bb1