diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -289,9 +289,19 @@ } void AMDGPUDAGToDAGISel::PreprocessISelDAG() { - if (!Subtarget->d16PreservesUnusedBits()) - return; + auto doMatchLoadD16 = std::function( + [&](SDNode *) {return false;} + ); + + if (Subtarget->d16PreservesUnusedBits()) { + doMatchLoadD16 = [&](SDNode * N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return matchLoadD16FromBuildVector(N); + }; + } + + auto Chains = SmallVector(); SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); bool MadeChange = false; @@ -302,13 +312,41 @@ switch (N->getOpcode()) { case ISD::BUILD_VECTOR: - MadeChange |= matchLoadD16FromBuildVector(N); + MadeChange |= doMatchLoadD16(N); break; + case AMDGPUISD::ILLEGAL: { + // Create an `ILLEGAL` node with no return value to replace `N`. + auto DL = SDLoc(N); + auto Repl = CurDAG->getNode(AMDGPUISD::ILLEGAL, DL, MVT::Other); + Chains.push_back(Repl); + + // Replace all uses of return value with a constant to break + // remaining dependencies. + auto VT = N->getValueType(0u); + assert(VT.isSimple()); + auto RetVal = [&]() { + if (VT.isFloatingPoint()) { + return CurDAG->getConstantFP(0.0, DL, VT); + } + return CurDAG->getConstant(0u, DL, VT); + }(); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0u), RetVal); + + MadeChange = true; + break; + } default: break; } } + if (!Chains.empty()) { + // Merge chains and update root. + Chains.push_back(CurDAG->getRoot()); + auto Root = CurDAG->getTokenFactor(SDLoc(Chains.back()), Chains); + CurDAG->setRoot(Root); + } + if (MadeChange) { CurDAG->RemoveDeadNodes(); LLVM_DEBUG(dbgs() << "After PreProcess:\n"; @@ -709,6 +747,16 @@ SelectINTRINSIC_VOID(N); return; } + case AMDGPUISD::ILLEGAL: { + // At this point `ILLEGAL` should have no parameters or uses. Replace + // it with `V_ILLEGAL`. + assert(N->getNumOperands() == 0u); + assert(N->getValueType(0u) == MVT::Other); + auto DL = SDLoc(N); + auto Repl = CurDAG->getMachineNode(AMDGPU::V_ILLEGAL, DL, MVT::Other); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0u), SDValue(Repl, 0u)); + return; + } } SelectCode(N); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -536,6 +536,8 @@ BUFFER_ATOMIC_FMIN, BUFFER_ATOMIC_FMAX, + ILLEGAL, + LAST_AMDGPU_ISD_NUMBER }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4412,6 +4412,8 @@ NODE_NAME_CASE(BUFFER_ATOMIC_FMIN) NODE_NAME_CASE(BUFFER_ATOMIC_FMAX) + NODE_NAME_CASE(ILLEGAL) + case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; } return nullptr; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6629,9 +6629,9 @@ if (Subtarget->hasGFX90AInsts()) { Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a, NumVDataDwords, NumVAddrDwords); - if (Opcode == -1) - report_fatal_error( - "requested image instruction is not supported on this GPU"); + if (Opcode == -1) { + return {}; + } } if (Opcode == -1 && Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) @@ -7097,12 +7097,28 @@ SIInstrInfo::MO_ABS32_LO); return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0}; } - default: - if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = - AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) - return lowerImage(Op, ImageDimIntr, DAG, false); + default: { + // Attempt to lower image intrinsic. + if (auto ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) { + auto Result = lowerImage(Op, ImageDimIntr, DAG, false); + if (Result) { + return Result; + } + // Failed to lower image intrinsic. Emit `ILLEGAL` wrapper instead. This wrapper + // satisfies the constraints of `LowerOperation` (the number of return + // values and their types must exactly match). Any return values are + // removed in DAG preprocessing. + auto VTs = Op.getNode()->getVTList(); + auto Ops = SmallVector(); + auto const NumOps = Op.getNumOperands(); + for (auto i = 0u; i < NumOps; ++i) { + Ops.push_back(Op.getOperand(i)); + } + return DAG.getNode(AMDGPUISD::ILLEGAL, DL, VTs, Ops); + } - return Op; + return {}; + } } } @@ -7844,13 +7860,28 @@ M->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); } - default: - - if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = - AMDGPU::getImageDimIntrinsicInfo(IntrID)) - return lowerImage(Op, ImageDimIntr, DAG, true); + default: { + // Attempt to lower image intrinsic. + if (auto ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) { + auto Result = lowerImage(Op, ImageDimIntr, DAG, true); + if (Result) { + return Result; + } + // Failed to lower image intrinsic. Emit `ILLEGAL` wrapper instead. This wrapper + // satisfies the constraints of `LowerOperation` (the number of return + // values and their types must exactly match). Any return values are + // removed in DAG preprocessing. + auto VTs = Op.getNode()->getVTList(); + auto Ops = SmallVector(); + auto const NumOps = Op.getNumOperands(); + for (auto i = 0u; i < NumOps; ++i) { + Ops.push_back(Op.getOperand(i)); + } + return DAG.getNode(AMDGPUISD::ILLEGAL, DL, VTs, Ops); + } - return SDValue(); + return {}; + } } } @@ -8379,11 +8410,26 @@ Op->getOperand(2), Chain), 0); default: { - if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = - AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) - return lowerImage(Op, ImageDimIntr, DAG, true); + // Attempt to lower image intrinsic. + if (auto ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) { + auto Result = lowerImage(Op, ImageDimIntr, DAG, true); + if (Result) { + return Result; + } + // Failed to lower image intrinsic. Emit `ILLEGAL` wrapper instead. This wrapper + // satisfies the constraints of `LowerOperation` (the number of return + // values and their types must exactly match). Any return values are + // removed in DAG preprocessing. + auto VTs = Op.getNode()->getVTList(); + auto Ops = SmallVector(); + auto const NumOps = Op.getNumOperands(); + for (auto i = 0u; i < NumOps; ++i) { + Ops.push_back(Op.getOperand(i)); + } + return DAG.getNode(AMDGPUISD::ILLEGAL, DL, VTs, Ops); + } - return Op; + return {}; } } } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3341,3 +3341,13 @@ let InOperandList = (ins type1:$src0); let hasSideEffects = 0; } + +//============================================================================// +// Dummy Instructions +//============================================================================// + +def V_ILLEGAL : Enc32, InstSI<(outs), (ins), "v_illegal"> { + let Inst{31-0} = 0; + let FixedSize = 1; + let Uses = [EXEC]; +}