Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -109,8 +109,7 @@ // Adjust the encoding family to GFX80 for D16 buffer instructions when the // subtarget has UnpackedD16VMem feature. // TODO: remove this when we discard GFX80 encoding. - if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16) - && !(get(Opcode).TSFlags & SIInstrFlags::MIMG)) + if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf)) Gen = SIEncodingFamily::GFX80; int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2301,10 +2301,6 @@ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) return true; - // Gather4 instructions do not need validation: dst size is hardcoded. - if (Desc.TSFlags & SIInstrFlags::Gather4) - return true; - int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); @@ -2319,9 +2315,12 @@ if (DMask == 0) DMask = 1; - unsigned DataSize = countPopulation(DMask); - if ((Desc.TSFlags & SIInstrFlags::D16) != 0 && hasPackedD16()) { - DataSize = (DataSize + 1) / 2; + unsigned DataSize = + (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); + if (hasPackedD16()) { + int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); + if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) + DataSize = (DataSize + 1) / 2; } return (VDataSize / 4) == DataSize + TFESize; @@ -2389,10 +2388,14 @@ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) return true; - if ((Desc.TSFlags & SIInstrFlags::D16) == 0) - return true; - return !isCI() && !isSI(); + int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); + if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { + if (isCI() || isSI()) + return false; + } + + return true; } bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, @@ -4261,6 +4264,7 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); } void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { @@ -4287,6 +4291,10 @@ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultD16() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyD16); +} + //===----------------------------------------------------------------------===// // smrd //===----------------------------------------------------------------------===// @@ -4389,6 +4397,7 @@ {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, {"r128", AMDGPUOperand::ImmTyR128, true, nullptr}, {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, + {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, @@ -5094,8 +5103,6 @@ return Operand.isLDS() ? Match_Success : Match_InvalidOperand; case MCK_glc: return Operand.isGLC() ? Match_Success : Match_InvalidOperand; - case MCK_d16: - return Operand.isD16() ? Match_Success : Match_InvalidOperand; case MCK_idxen: return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; case MCK_offen: Index: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td @@ -720,7 +720,7 @@ "buffer_store_format_xyzw", VReg_128 >; -let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in { +let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads < "buffer_load_format_d16_x", VGPR_32 >; @@ -747,7 +747,7 @@ >; } // End HasUnpackedD16VMem. -let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in { +let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads < "buffer_load_format_d16_x", VGPR_32 >; @@ -990,7 +990,7 @@ defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>; defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; -let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in { +let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>; defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>; @@ -1001,7 +1001,7 @@ defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>; } // End HasUnpackedD16VMem. -let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in { +let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>; defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>; Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -289,10 +289,6 @@ // as if it has 1 dword, which could be not really so. DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { - if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4) { - return MCDisassembler::Success; - } - int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst); @@ -304,22 +300,25 @@ int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); + int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::d16); assert(VDataIdx != -1); assert(DMaskIdx != -1); assert(TFEIdx != -1); bool IsAtomic = (VDstIdx != -1); + bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; if (DMask == 0) return MCDisassembler::Success; - unsigned DstSize = countPopulation(DMask); + unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask); if (DstSize == 1) return MCDisassembler::Success; - bool D16 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::D16; + bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); if (D16 && AMDGPU::hasPackedD16(STI)) { DstSize = (DstSize + 1) / 2; } @@ -335,6 +334,11 @@ NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize); } if (NewOpcode == -1) return MCDisassembler::Success; + } else if (IsGather4) { + if (D16 && AMDGPU::hasPackedD16(STI)) + NewOpcode = AMDGPU::getMIMGGatherOpPackedD16(MI.getOpcode()); + else + return MCDisassembler::Success; } else { NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize); assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -84,6 +84,8 @@ raw_ostream &O); void printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printD16(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printExpCompr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printExpVM(const MCInst *MI, unsigned OpNo, Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -217,6 +217,11 @@ printNamedBit(MI, OpNo, O, "lwe"); } +void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + printNamedBit(MI, OpNo, O, "d16"); +} + void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { Index: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td @@ -17,6 +17,11 @@ int AtomicSize = !if(is32Bit, 1, 2); } +class MIMG_Gather_Size { + string Op = op; + int Channels = channels; +} + class mimg si, bits<7> vi = si> { field bits<7> SI = si; field bits<7> VI = vi; @@ -37,125 +42,88 @@ class MIMG_NoSampler_Helper op, string asm, RegisterClass dst_rc, RegisterClass addr_rc, - bit d16_bit=0, - string dns=""> : MIMG_Helper < - (outs dst_rc:$vdata), - (ins addr_rc:$vaddr, SReg_256:$srsrc, - DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, - R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), - asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), - dns>, MIMGe { + bit has_d16, + string dns=""> + : MIMG_Helper <(outs dst_rc:$vdata), + !con((ins addr_rc:$vaddr, SReg_256:$srsrc, + DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, + R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), + !if(has_d16, (ins D16:$d16), (ins))), + asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da" + #!if(has_d16, "$d16", ""), + dns>, + MIMGe { let ssamp = 0; - let D16 = d16; -} -multiclass MIMG_NoSampler_Src_Helper_Helper op, string asm, - RegisterClass dst_rc, - int channels, bit d16_bit, - string suffix> { - def NAME # _V1 # suffix : MIMG_NoSampler_Helper , - MIMG_Mask; - def NAME # _V2 # suffix : MIMG_NoSampler_Helper , - MIMG_Mask; - def NAME # _V3 # suffix : MIMG_NoSampler_Helper , - MIMG_Mask; - def NAME # _V4 # suffix : MIMG_NoSampler_Helper , - MIMG_Mask; + let HasD16 = has_d16; + let d16 = !if(HasD16, ?, 0); } multiclass MIMG_NoSampler_Src_Helper op, string asm, - RegisterClass dst_rc, - int channels> { - defm NAME : MIMG_NoSampler_Src_Helper_Helper ; - - let d16 = 1 in { - let SubtargetPredicate = HasPackedD16VMem in { - defm NAME : MIMG_NoSampler_Src_Helper_Helper ; - } // End HasPackedD16VMem. - - let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { - defm NAME : MIMG_NoSampler_Src_Helper_Helper ; - } // End HasUnpackedD16VMem. - } // End d16 = 1. -} - -multiclass MIMG_NoSampler op, string asm> { - defm _V1 : MIMG_NoSampler_Src_Helper ; - defm _V2 : MIMG_NoSampler_Src_Helper ; - defm _V3 : MIMG_NoSampler_Src_Helper ; - defm _V4 : MIMG_NoSampler_Src_Helper ; -} - -multiclass MIMG_PckNoSampler op, string asm> { - defm NAME # _V1 : MIMG_NoSampler_Src_Helper_Helper ; - defm NAME # _V2 : MIMG_NoSampler_Src_Helper_Helper ; - defm NAME # _V3 : MIMG_NoSampler_Src_Helper_Helper ; - defm NAME # _V4 : MIMG_NoSampler_Src_Helper_Helper ; + RegisterClass dst_rc, + int channels, bit has_d16> { + def NAME # _V1 : MIMG_NoSampler_Helper , + MIMG_Mask; + def NAME # _V2 : MIMG_NoSampler_Helper , + MIMG_Mask; + def NAME # _V3 : MIMG_NoSampler_Helper , + MIMG_Mask; + def NAME # _V4 : MIMG_NoSampler_Helper , + MIMG_Mask; +} + +multiclass MIMG_NoSampler op, string asm, bit has_d16> { + defm _V1 : MIMG_NoSampler_Src_Helper ; + defm _V2 : MIMG_NoSampler_Src_Helper ; + defm _V3 : MIMG_NoSampler_Src_Helper ; + defm _V4 : MIMG_NoSampler_Src_Helper ; } class MIMG_Store_Helper op, string asm, RegisterClass data_rc, RegisterClass addr_rc, - bit d16_bit=0, - string dns = ""> : MIMG_Helper < - (outs), - (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, - DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, - R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), - asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe { + bit has_d16, + string dns = ""> + : MIMG_Helper <(outs), + !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, + DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, + R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), + !if(has_d16, (ins D16:$d16), (ins))), + asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da" + #!if(has_d16, "$d16", ""), + dns>, + MIMGe { let ssamp = 0; let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; let hasPostISelHook = 0; let DisableWQM = 1; - let D16 = d16; -} -multiclass MIMG_Store_Addr_Helper_Helper op, string asm, - RegisterClass data_rc, - int channels, bit d16_bit, - string suffix> { - def NAME # _V1 # suffix : MIMG_Store_Helper , - MIMG_Mask; - def NAME # _V2 # suffix : MIMG_Store_Helper , - MIMG_Mask; - def NAME # _V3 # suffix : MIMG_Store_Helper , - MIMG_Mask; - def NAME # _V4 # suffix : MIMG_Store_Helper , - MIMG_Mask; + let HasD16 = has_d16; + let d16 = !if(HasD16, ?, 0); } multiclass MIMG_Store_Addr_Helper op, string asm, RegisterClass data_rc, - int channels> { - defm NAME : MIMG_Store_Addr_Helper_Helper ; - - let d16 = 1 in { - let SubtargetPredicate = HasPackedD16VMem in { - defm NAME : MIMG_Store_Addr_Helper_Helper ; - } // End HasPackedD16VMem. - - let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { - defm NAME : MIMG_Store_Addr_Helper_Helper ; - } // End HasUnpackedD16VMem. - } // End d16 = 1. -} - -multiclass MIMG_Store op, string asm> { - defm _V1 : MIMG_Store_Addr_Helper ; - defm _V2 : MIMG_Store_Addr_Helper ; - defm _V3 : MIMG_Store_Addr_Helper ; - defm _V4 : MIMG_Store_Addr_Helper ; -} - -multiclass MIMG_PckStore op, string asm> { - defm NAME # _V1 : MIMG_Store_Addr_Helper_Helper ; - defm NAME # _V2 : MIMG_Store_Addr_Helper_Helper ; - defm NAME # _V3 : MIMG_Store_Addr_Helper_Helper ; - defm NAME # _V4 : MIMG_Store_Addr_Helper_Helper ; + int channels, bit has_d16> { + def NAME # _V1 : MIMG_Store_Helper , + MIMG_Mask; + def NAME # _V2 : MIMG_Store_Helper , + MIMG_Mask; + def NAME # _V3 : MIMG_Store_Helper , + MIMG_Mask; + def NAME # _V4 : MIMG_Store_Helper , + MIMG_Mask; +} + +multiclass MIMG_Store op, string asm, bit has_d16> { + defm _V1 : MIMG_Store_Addr_Helper ; + defm _V2 : MIMG_Store_Addr_Helper ; + defm _V3 : MIMG_Store_Addr_Helper ; + defm _V4 : MIMG_Store_Addr_Helper ; } class MIMG_Atomic_Helper : - MIMG_Atomic_Helper, - SIMCInstr, - MIMGe { + RegisterClass data_rc, RegisterClass addr_rc, + bit enableDasm> + : MIMG_Atomic_Helper, + SIMCInstr, + MIMGe { let isCodeGenOnly = 0; let AssemblerPredicates = [isSICI]; let DisableDecoder = DisableSIDecoder; + let d16 = 0; } class MIMG_Atomic_Real_vi : - MIMG_Atomic_Helper, - SIMCInstr, - MIMGe { + RegisterClass data_rc, RegisterClass addr_rc, + bit enableDasm> + : MIMG_Atomic_Helper, + SIMCInstr, + MIMGe { let isCodeGenOnly = 0; let AssemblerPredicates = [isVI]; let DisableDecoder = DisableVIDecoder; + let d16 = 0; } multiclass MIMG_Atomic_Helper_m op, string asm, RegisterClass dst_rc, RegisterClass src_rc, - bit wqm, - bit d16_bit=0, - string dns=""> : MIMG_Helper < - (outs dst_rc:$vdata), - (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, - DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, - R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), - asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), - dns>, MIMGe { + bit wqm, bit has_d16, + string dns=""> + : MIMG_Helper <(outs dst_rc:$vdata), + !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, + DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, + R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), + !if(has_d16, (ins D16:$d16), (ins))), + asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da" + #!if(has_d16, "$d16", ""), + dns>, + MIMGe { let WQM = wqm; - let D16 = d16; -} -multiclass MIMG_Sampler_Src_Helper_Helper op, string asm, - RegisterClass dst_rc, - int channels, bit wqm, - bit d16_bit, string suffix> { - def _V1 # suffix : MIMG_Sampler_Helper , - MIMG_Mask; - def _V2 # suffix : MIMG_Sampler_Helper , - MIMG_Mask; - def _V3 # suffix : MIMG_Sampler_Helper , - MIMG_Mask; - def _V4 # suffix : MIMG_Sampler_Helper , - MIMG_Mask; - def _V8 # suffix : MIMG_Sampler_Helper , - MIMG_Mask; - def _V16 # suffix : MIMG_Sampler_Helper , - MIMG_Mask; + let HasD16 = has_d16; + let d16 = !if(HasD16, ?, 0); } multiclass MIMG_Sampler_Src_Helper op, string asm, RegisterClass dst_rc, - int channels, bit wqm> { - defm "" : MIMG_Sampler_Src_Helper_Helper ; - - let d16 = 1 in { - let SubtargetPredicate = HasPackedD16VMem in { - defm "" : MIMG_Sampler_Src_Helper_Helper ; - } // End HasPackedD16VMem. - - let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { - defm "" : MIMG_Sampler_Src_Helper_Helper ; - } // End HasUnpackedD16VMem. - } // End d16 = 1. -} - -multiclass MIMG_Sampler op, string asm, bit wqm=0> { - defm _V1 : MIMG_Sampler_Src_Helper; - defm _V2 : MIMG_Sampler_Src_Helper; - defm _V3 : MIMG_Sampler_Src_Helper; - defm _V4 : MIMG_Sampler_Src_Helper; + int channels, bit wqm, bit has_d16> { + def _V1 : MIMG_Sampler_Helper , + MIMG_Mask; + def _V2 : MIMG_Sampler_Helper , + MIMG_Mask; + def _V3 : MIMG_Sampler_Helper , + MIMG_Mask; + def _V4 : MIMG_Sampler_Helper , + MIMG_Mask; + def _V8 : MIMG_Sampler_Helper , + MIMG_Mask; + def _V16 : MIMG_Sampler_Helper , + MIMG_Mask; +} + +multiclass MIMG_Sampler op, string asm, bit wqm = 0, bit has_d16 = 1> { + defm _V1 : MIMG_Sampler_Src_Helper; + defm _V2 : MIMG_Sampler_Src_Helper; + defm _V3 : MIMG_Sampler_Src_Helper; + defm _V4 : MIMG_Sampler_Src_Helper; } multiclass MIMG_Sampler_WQM op, string asm> : MIMG_Sampler; @@ -306,14 +265,14 @@ RegisterClass dst_rc, RegisterClass src_rc, bit wqm, - bit d16_bit=0, - string dns=""> : MIMG < - (outs dst_rc:$vdata), - (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, - DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, - R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da), - asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), - []>, MIMGe { + string dns=""> + : MIMG <(outs dst_rc:$vdata), + (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, + DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, + R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da, D16:$d16), + asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da$d16", + []>, + MIMGe { let mayLoad = 1; let mayStore = 0; @@ -327,7 +286,7 @@ let Gather4 = 1; let hasPostISelHook = 0; let WQM = wqm; - let D16 = d16; + let HasD16 = 1; let DecoderNamespace = dns; let isAsmParserOnly = !if(!eq(dns,""), 1, 0); @@ -336,29 +295,25 @@ multiclass MIMG_Gather_Src_Helper op, string asm, RegisterClass dst_rc, - bit wqm, bit d16_bit, - string prefix, - string suffix> { - def prefix # _V1 # suffix : MIMG_Gather_Helper ; - def prefix # _V2 # suffix : MIMG_Gather_Helper ; - def prefix # _V3 # suffix : MIMG_Gather_Helper ; - def prefix # _V4 # suffix : MIMG_Gather_Helper ; - def prefix # _V8 # suffix : MIMG_Gather_Helper ; - def prefix # _V16 # suffix : MIMG_Gather_Helper ; + int channels, bit wqm> { + def _V1 : MIMG_Gather_Helper , + MIMG_Gather_Size; + def _V2 : MIMG_Gather_Helper , + MIMG_Gather_Size; + def _V3 : MIMG_Gather_Helper , + MIMG_Gather_Size; + def _V4 : MIMG_Gather_Helper , + MIMG_Gather_Size; + def _V8 : MIMG_Gather_Helper , + MIMG_Gather_Size; + def _V16 : MIMG_Gather_Helper , + MIMG_Gather_Size; } multiclass MIMG_Gather op, string asm, bit wqm=0> { - defm "" : MIMG_Gather_Src_Helper; - - let d16 = 1 in { - let AssemblerPredicate = HasPackedD16VMem in { - defm "" : MIMG_Gather_Src_Helper; - } // End HasPackedD16VMem. - - let AssemblerPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { - defm "" : MIMG_Gather_Src_Helper; - } // End HasUnpackedD16VMem. - } // End d16 = 1. + defm _V2 : MIMG_Gather_Src_Helper; /* for packed D16 only */ + defm _V4 : MIMG_Gather_Src_Helper; } multiclass MIMG_Gather_WQM op, string asm> : MIMG_Gather; @@ -367,19 +322,19 @@ // MIMG Instructions //===----------------------------------------------------------------------===// let SubtargetPredicate = isGCN in { -defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">; -defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">; -defm IMAGE_LOAD_PCK : MIMG_PckNoSampler <0x00000002, "image_load_pck">; -defm IMAGE_LOAD_PCK_SGN : MIMG_PckNoSampler <0x00000003, "image_load_pck_sgn">; -defm IMAGE_LOAD_MIP_PCK : MIMG_PckNoSampler <0x00000004, "image_load_mip_pck">; -defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_PckNoSampler <0x00000005, "image_load_mip_pck_sgn">; -defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store">; -defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip">; -defm IMAGE_STORE_PCK : MIMG_PckStore <0x0000000a, "image_store_pck">; -defm IMAGE_STORE_MIP_PCK : MIMG_PckStore <0x0000000b, "image_store_mip_pck">; +defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load", 1>; +defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip", 1>; +defm IMAGE_LOAD_PCK : MIMG_NoSampler <0x00000002, "image_load_pck", 0>; +defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <0x00000003, "image_load_pck_sgn", 0>; +defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <0x00000004, "image_load_mip_pck", 0>; +defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <0x00000005, "image_load_mip_pck_sgn", 0>; +defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store", 1>; +defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip", 1>; +defm IMAGE_STORE_PCK : MIMG_Store <0x0000000a, "image_store_pck", 0>; +defm IMAGE_STORE_MIP_PCK : MIMG_Store <0x0000000b, "image_store_mip_pck", 0>; let mayLoad = 0, mayStore = 0 in { -defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">; +defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo", 0>; } defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">; @@ -457,7 +412,7 @@ defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">; let mayLoad = 0, mayStore = 0 in { -defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">; +defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod", 1, 0>; } defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">; @@ -519,13 +474,13 @@ } class ImageDimPattern : GCNPat<(undef), (undef)> { list AddrArgs = I.P.AddrDefaultArgs; getDwordsType AddrDwords = getDwordsType; - Instruction MI = - !cast(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix)); + MIMG MI = + !cast(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix)); // DAG fragment to match data arguments (vdata for store/atomic, dmask // for non-atomic). @@ -581,7 +536,8 @@ 0, /* r128 */ 0, /* tfe */ 0 /*(as_i1imm $lwe)*/, - { I.P.Dim.DA })); + { I.P.Dim.DA }), + !if(MI.HasD16, (MI d16), (MI))); let ResultInstrs = [ !if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction) ]; @@ -589,23 +545,23 @@ foreach intr = !listconcat(AMDGPUImageDimIntrinsics, AMDGPUImageDimGetResInfoIntrinsics) in { - def intr#_pat_v1 : ImageDimPattern; - def intr#_pat_v2 : ImageDimPattern; - def intr#_pat_v4 : ImageDimPattern; + def intr#_pat_v1 : ImageDimPattern; + def intr#_pat_v2 : ImageDimPattern; + def intr#_pat_v4 : ImageDimPattern; } multiclass ImageDimD16Helper { let SubtargetPredicate = HasUnpackedD16VMem in { - def _unpacked_v1 : ImageDimPattern; - def _unpacked_v2 : ImageDimPattern; - def _unpacked_v4 : ImageDimPattern; + def _unpacked_v1 : ImageDimPattern; + def _unpacked_v2 : ImageDimPattern; + def _unpacked_v4 : ImageDimPattern; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem in { - def _packed_v1 : ImageDimPattern; - def _packed_v2 : ImageDimPattern; - def _packed_v4 : ImageDimPattern; + def _packed_v1 : ImageDimPattern; + def _packed_v2 : ImageDimPattern; + def _packed_v4 : ImageDimPattern; } // End HasPackedD16VMem. } @@ -627,7 +583,7 @@ } foreach intr = AMDGPUImageDimGatherIntrinsics in { - def intr#_pat3 : ImageDimPattern; + def intr#_pat3 : ImageDimPattern; def intr#_d16helper_profile : AMDGPUDimProfileCopy { let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty); @@ -643,16 +599,16 @@ def intr#_unpacked_v4 : ImageDimPattern( "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name), - "_V4", v4i32, "_D16_gfx80">; + "_V4", v4i32, 1>; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem in { - def intr#_packed_v4 : ImageDimPattern; + def intr#_packed_v4 : ImageDimPattern; } // End HasPackedD16VMem. } foreach intr = AMDGPUImageDimAtomicIntrinsics in { - def intr#_pat1 : ImageDimPattern; + def intr#_pat1 : ImageDimPattern; } /********** ======================= **********/ @@ -663,154 +619,160 @@ // TODO: // 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). // 2. Add A16 support when we pass address of half type. -multiclass ImageSamplePattern { +multiclass ImageSamplePattern { def : GCNPat< (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc, i1:$slc, i1:$lwe, i1:$da)), - (opcode $addr, $rsrc, $sampler, - (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), - 0, 0, (as_i1imm $lwe), (as_i1imm $da)) + !con((opcode $addr, $rsrc, $sampler, (as_i32imm $dmask), (as_i1imm $unorm), + (as_i1imm $glc), (as_i1imm $slc), 0, 0, (as_i1imm $lwe), + (as_i1imm $da)), + !if(opcode.HasD16, (opcode d16), (opcode))) >; } -multiclass ImageSampleDataPatterns { - defm : ImageSamplePattern(opcode # _V1 # suffix), dt, f32>; - defm : ImageSamplePattern(opcode # _V2 # suffix), dt, v2f32>; - defm : ImageSamplePattern(opcode # _V4 # suffix), dt, v4f32>; - defm : ImageSamplePattern(opcode # _V8 # suffix), dt, v8f32>; - defm : ImageSamplePattern(opcode # _V16 # suffix), dt, v16f32>; +multiclass ImageSampleDataPatterns { + defm : ImageSamplePattern(opcode # _V1), dt, f32, d16>; + defm : ImageSamplePattern(opcode # _V2), dt, v2f32, d16>; + defm : ImageSamplePattern(opcode # _V4), dt, v4f32, d16>; + defm : ImageSamplePattern(opcode # _V8), dt, v8f32, d16>; + defm : ImageSamplePattern(opcode # _V16), dt, v16f32, d16>; } // ImageSample patterns. multiclass ImageSamplePatterns { - defm : ImageSampleDataPatterns(opcode # _V1), f32>; - defm : ImageSampleDataPatterns(opcode # _V2), v2f32>; - defm : ImageSampleDataPatterns(opcode # _V4), v4f32>; + defm : ImageSampleDataPatterns(opcode # _V1), f32, 0>; + defm : ImageSampleDataPatterns(opcode # _V2), v2f32, 0>; + defm : ImageSampleDataPatterns(opcode # _V4), v4f32, 0>; let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V1), f16, "_D16_gfx80">; + defm : ImageSampleDataPatterns(opcode # _V1), f16, 1>; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V1), f16, "_D16">; - defm : ImageSampleDataPatterns(opcode # _V1), v2f16, "_D16">; - defm : ImageSampleDataPatterns(opcode # _V2), v4f16, "_D16">; + defm : ImageSampleDataPatterns(opcode # _V1), f16, 1>; + defm : ImageSampleDataPatterns(opcode # _V1), v2f16, 1>; + defm : ImageSampleDataPatterns(opcode # _V2), v4f16, 1>; } // End HasPackedD16VMem. } // ImageSample alternative patterns for illegal vector half Types. multiclass ImageSampleAltPatterns { let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">; - defm : ImageSampleDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">; + defm : ImageSampleDataPatterns(opcode # _V2), v2i32, 1>; + defm : ImageSampleDataPatterns(opcode # _V4), v4i32, 1>; } // End HasUnpackedD16VMem. } // ImageGather4 patterns. multiclass ImageGather4Patterns { - defm : ImageSampleDataPatterns(opcode # _V4), v4f32>; + defm : ImageSampleDataPatterns(opcode # _V4), v4f32, 0>; let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V2), v4f16, "_D16">; + defm : ImageSampleDataPatterns(opcode # _V2), v4f16, 1>; } // End HasPackedD16VMem. } // ImageGather4 alternative patterns for illegal vector half Types. multiclass ImageGather4AltPatterns { let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageSampleDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">; + defm : ImageSampleDataPatterns(opcode # _V4), v4i32, 1>; } // End HasUnpackedD16VMem. - } // ImageLoad for amdgcn. -multiclass ImageLoadPattern { +multiclass ImageLoadPattern { def : GCNPat < (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, i1:$da)), - (opcode $addr, $rsrc, - (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc), - 0, 0, (as_i1imm $lwe), (as_i1imm $da)) + !con((opcode $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc), + (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)), + !if(opcode.HasD16, (opcode d16), (opcode))) >; } -multiclass ImageLoadDataPatterns { - defm : ImageLoadPattern(opcode # _V1 # suffix), dt, i32>; - defm : ImageLoadPattern(opcode # _V2 # suffix), dt, v2i32>; - defm : ImageLoadPattern(opcode # _V4 # suffix), dt, v4i32>; +multiclass ImageLoadDataPatterns { + defm : ImageLoadPattern(opcode # _V1), dt, i32, d16>; + defm : ImageLoadPattern(opcode # _V2), dt, v2i32, d16>; + defm : ImageLoadPattern(opcode # _V4), dt, v4i32, d16>; } // ImageLoad patterns. // TODO: support v3f32. multiclass ImageLoadPatterns { - defm : ImageLoadDataPatterns(opcode # _V1), f32>; - defm : ImageLoadDataPatterns(opcode # _V2), v2f32>; - defm : ImageLoadDataPatterns(opcode # _V4), v4f32>; + defm : ImageLoadDataPatterns(opcode # _V1), f32, 0>; + defm : ImageLoadDataPatterns(opcode # _V2), v2f32, 0>; + defm : ImageLoadDataPatterns(opcode # _V4), v4f32, 0>; let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageLoadDataPatterns(opcode # _V1), f16, "_D16_gfx80">; + defm : ImageLoadDataPatterns(opcode # _V1), f16, 1>; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageLoadDataPatterns(opcode # _V1), f16, "_D16">; - defm : ImageLoadDataPatterns(opcode # _V1), v2f16, "_D16">; - defm : ImageLoadDataPatterns(opcode # _V2), v4f16, "_D16">; + defm : ImageLoadDataPatterns(opcode # _V1), f16, 1>; + defm : ImageLoadDataPatterns(opcode # _V1), v2f16, 1>; + defm : ImageLoadDataPatterns(opcode # _V2), v4f16, 1>; } // End HasPackedD16VMem. } // ImageLoad alternative patterns for illegal vector half Types. multiclass ImageLoadAltPatterns { let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageLoadDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">; - defm : ImageLoadDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">; + defm : ImageLoadDataPatterns(opcode # _V2), v2i32, 1>; + defm : ImageLoadDataPatterns(opcode # _V4), v4i32, 1>; } // End HasUnPackedD16VMem. } // ImageStore for amdgcn. -multiclass ImageStorePattern { +multiclass ImageStorePattern { def : GCNPat < (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, i1:$da), - (opcode $data, $addr, $rsrc, - (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc), - 0, 0, (as_i1imm $lwe), (as_i1imm $da)) + !con((opcode $data, $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc), + (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)), + !if(opcode.HasD16, (opcode d16), (opcode))) >; } -multiclass ImageStoreDataPatterns { - defm : ImageStorePattern(opcode # _V1 # suffix), dt, i32>; - defm : ImageStorePattern(opcode # _V2 # suffix), dt, v2i32>; - defm : ImageStorePattern(opcode # _V4 # suffix), dt, v4i32>; +multiclass ImageStoreDataPatterns { + defm : ImageStorePattern(opcode # _V1), dt, i32, d16>; + defm : ImageStorePattern(opcode # _V2), dt, v2i32, d16>; + defm : ImageStorePattern(opcode # _V4), dt, v4i32, d16>; } // ImageStore patterns. // TODO: support v3f32. multiclass ImageStorePatterns { - defm : ImageStoreDataPatterns(opcode # _V1), f32>; - defm : ImageStoreDataPatterns(opcode # _V2), v2f32>; - defm : ImageStoreDataPatterns(opcode # _V4), v4f32>; + defm : ImageStoreDataPatterns(opcode # _V1), f32, 0>; + defm : ImageStoreDataPatterns(opcode # _V2), v2f32, 0>; + defm : ImageStoreDataPatterns(opcode # _V4), v4f32, 0>; let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageStoreDataPatterns(opcode # _V1), f16, "_D16_gfx80">; + defm : ImageStoreDataPatterns(opcode # _V1), f16, 1>; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageStoreDataPatterns(opcode # _V1), f16, "_D16">; - defm : ImageStoreDataPatterns(opcode # _V1), v2f16, "_D16">; - defm : ImageStoreDataPatterns(opcode # _V2), v4f16, "_D16">; + defm : ImageStoreDataPatterns(opcode # _V1), f16, 1>; + defm : ImageStoreDataPatterns(opcode # _V1), v2f16, 1>; + defm : ImageStoreDataPatterns(opcode # _V2), v4f16, 1>; } // End HasPackedD16VMem. } // ImageStore alternative patterns. multiclass ImageStoreAltPatterns { let SubtargetPredicate = HasUnpackedD16VMem in { - defm : ImageStoreDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">; - defm : ImageStoreDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">; + defm : ImageStoreDataPatterns(opcode # _V2), v2i32, 1>; + defm : ImageStoreDataPatterns(opcode # _V4), v4i32, 1>; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem in { - defm : ImageStoreDataPatterns(opcode # _V1), i32, "_D16">; - defm : ImageStoreDataPatterns(opcode # _V2), v2i32, "_D16">; + defm : ImageStoreDataPatterns(opcode # _V1), i32, 1>; + defm : ImageStoreDataPatterns(opcode # _V2), v2i32, 1>; } // End HasPackedD16VMem. } @@ -1030,83 +992,3 @@ defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; - -/* SIsample for simple 1D texture lookup */ -def : GCNPat < - (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm), - (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) ->; - -class SamplePattern : GCNPat < - (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm), - (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) ->; - -class SampleRectPattern : GCNPat < - (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT), - (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0) ->; - -class SampleArrayPattern : GCNPat < - (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY), - (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1) ->; - -class SampleShadowPattern : GCNPat < - (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW), - (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) ->; - -class SampleShadowArrayPattern : GCNPat < - (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), - (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1) ->; - -/* SIsample* for texture lookups consuming more address parameters */ -multiclass SamplePatterns { - def : SamplePattern ; - def : SampleRectPattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; -} - -defm : SamplePatterns; -defm : SamplePatterns; -defm : SamplePatterns; -defm : SamplePatterns; Index: llvm/trunk/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIDefines.h +++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h @@ -87,8 +87,8 @@ // Is a packed VOP3P instruction. IsPacked = UINT64_C(1) << 49, - // "d16" bit set or not. - D16 = UINT64_C(1) << 50 + // Is a D16 buffer instruction. + D16Buf = UINT64_C(1) << 50 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7762,9 +7762,16 @@ /// Adjust the writemask of MIMG instructions SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node, SelectionDAG &DAG) const { + unsigned Opcode = Node->getMachineOpcode(); + + // Subtract 1 because the vdata output is not a MachineSDNode operand. + int D16Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::d16) - 1; + if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx)) + return Node; // not implemented for D16 + SDNode *Users[4] = { nullptr }; unsigned Lane = 0; - unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3; + unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1; unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx); unsigned NewDmask = 0; bool HasChain = Node->getNumValues() > 1; @@ -7936,7 +7943,7 @@ unsigned Opcode = Node->getMachineOpcode(); if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() && - !TII->isGather4(Opcode) && !TII->isD16(Opcode)) { + !TII->isGather4(Opcode)) { return adjustWritemask(Node, DAG); } Index: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td @@ -118,8 +118,8 @@ // This bit indicates that this is a packed VOP3P instruction field bit IsPacked = 0; - // This bit indicates that this is a D16 instruction. - field bit D16 = 0; + // This bit indicates that this is a D16 buffer instruction. + field bit D16Buf = 0; // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; @@ -176,7 +176,7 @@ let TSFlags{49} = IsPacked; - let TSFlags{50} = D16; + let TSFlags{50} = D16Buf; let SchedRW = [Write32Bit]; @@ -255,7 +255,7 @@ bits<1> tfe; bits<1> lwe; bits<1> slc; - bits<1> d16 = 0; + bit d16; bits<8> vaddr; bits<7> srsrc; bits<7> ssamp; @@ -344,4 +344,6 @@ let UseNamedOperandTable = 1; let hasSideEffects = 0; // XXX ???? + + bit HasD16 = 0; } Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h @@ -445,14 +445,6 @@ return get(Opcode).TSFlags & SIInstrFlags::Gather4; } - static bool isD16(const MachineInstr &MI) { - return MI.getDesc().TSFlags & SIInstrFlags::D16; - } - - bool isD16(uint16_t Opcode) const { - return get(Opcode).TSFlags & SIInstrFlags::D16; - } - static bool isFLAT(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::FLAT; } Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -300,16 +300,6 @@ def SIImage_gather4_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">; def SIImage_gather4_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">; -class SDSample : SDNode , SDTCisVT<2, v8i32>, - SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]> ->; - -def SIsample : SDSample<"AMDGPUISD::SAMPLE">; -def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">; -def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; -def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; - def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> >; @@ -2079,6 +2069,14 @@ let ValueCols = [["1"]]; } +def getMIMGGatherOpPackedD16 : InstrMapping { + let FilterClass = "MIMG_Gather_Size"; + let RowFields = ["Op"]; + let ColFields = ["Channels"]; + let KeyCol = ["4"]; + let ValueCols = [["2"]]; +} + // Maps an commuted opcode to its original version def getCommuteOrig : InstrMapping { let FilterClass = "Commutable_REV"; Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -166,6 +166,9 @@ unsigned Opc, unsigned NewChannels); LLVM_READONLY +int getMIMGGatherOpPackedD16(uint16_t Opcode); + +LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir @@ -61,7 +61,7 @@ %11.sub6 = COPY %1 %11.sub7 = COPY %1 %11.sub8 = COPY %1 - dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec + dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec %20.sub1 = COPY %2 %20.sub2 = COPY %2 %20.sub3 = COPY %2 @@ -70,6 +70,6 @@ %20.sub6 = COPY %2 %20.sub7 = COPY %2 %20.sub8 = COPY %2 - dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec + dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec ... Index: llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir @@ -305,11 +305,11 @@ # GCN-LABEL: {{^}}name: image_clause{{$}} # GCN: early-clobber %4:vreg_128, early-clobber %3:vreg_128, early-clobber %5:vreg_128 = BUNDLE %0, undef %2:sreg_128, %1, implicit $exec { -# GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec -# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec -# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec # GCN-NEXT: } -# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec --- name: image_clause @@ -325,17 +325,17 @@ bb.0: %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF - %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec - IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec - IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec - IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec + %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: mixed_clause{{$}} # GCN: dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec { -# GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec # GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec # GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec # GCN-NEXT: } @@ -355,7 +355,7 @@ %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF %2 = IMPLICIT_DEF - %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec ... Index: llvm/trunk/test/MC/AMDGPU/mimg.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/mimg.s +++ llvm/trunk/test/MC/AMDGPU/mimg.s @@ -356,20 +356,19 @@ // GCN: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x8 ; encoding: [0x00,0x08,0x00,0xf1,0x01,0x05,0x62,0x00] image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16 -// NOSICI: error: instruction not supported on this GPU +// NOSICI: error: d16 modifier is not supported on this GPU // GFX8_0: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] -// NOGFX8_1: error: instruction not supported on this GPU -// NOGFX9: error: instruction not supported on this GPU +// NOGFX8_1: error: image data size does not match dmask and tfe +// NOGFX9: error: image data size does not match dmask and tfe image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 // NOSICI: error: d16 modifier is not supported on this GPU -// NOGFX8_0: error: instruction not supported on this GPU +// NOGFX8_0: error: image data size does not match dmask and tfe // GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] // GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] -// FIXME: d16 is handled as an optional modifier, should it be corrected? image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 -// NOSICI: error: d16 modifier is not supported on this GPU -// NOGFX8_0: error: instruction not supported on this GPU -// GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] -// GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] +// NOSICI: error: image data size does not match dmask and tfe +// NOGFX8_0: error: image data size does not match dmask and tfe +// NOGFX8_1: error: image data size does not match dmask and tfe +// NOGFX9: error: image data size does not match dmask and tfe