Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -473,9 +473,12 @@ } // Represent texture/image types / dimensionality. -class AMDGPUDimProps coord_names, list slice_names> { +class AMDGPUDimProps enc, string name, string asmsuffix, + list coord_names, list slice_names> { AMDGPUDimProps Dim = !cast(NAME); string Name = name; // e.g. "2darraymsaa" + string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings) + bits<3> Encoding = enc; bit DA = 0; // DA bit in MIMG encoding list CoordSliceArgs = @@ -491,17 +494,17 @@ bits<8> NumGradients = !size(GradientArgs); } -def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>; -def AMDGPUDim2D : AMDGPUDimProps<"2d", ["s", "t"], []>; -def AMDGPUDim3D : AMDGPUDimProps<"3d", ["s", "t", "r"], []>; +def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>; +def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>; +def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>; let DA = 1 in { - def AMDGPUDimCube : AMDGPUDimProps<"cube", ["s", "t"], ["face"]>; - def AMDGPUDim1DArray : AMDGPUDimProps<"1darray", ["s"], ["slice"]>; - def AMDGPUDim2DArray : AMDGPUDimProps<"2darray", ["s", "t"], ["slice"]>; + def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>; + def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>; + def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>; } -def AMDGPUDim2DMsaa : AMDGPUDimProps<"2dmsaa", ["s", "t"], ["fragid"]>; +def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>; let DA = 1 in { - def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<"2darraymsaa", ["s", "t"], ["slice", "fragid"]>; + def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>; } def AMDGPUDims { @@ -678,7 +681,7 @@ !if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR) llvm_i1_ty], []), // unorm(imm) [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe) - llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc) !listconcat(props, !if(P_.IsAtomic, [], [ImmArg.DmaskArgIndex>]), !if(P_.IsSample, [ImmArg.UnormArgIndex>], []), @@ -884,7 +887,7 @@ [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; @@ -896,7 +899,7 @@ llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; @@ -908,7 +911,7 @@ llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; @@ -921,7 +924,7 @@ llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; @@ -1031,7 +1034,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1042,7 +1045,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1053,7 +1056,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1065,7 +1068,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) + llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; Index: include/llvm/MC/MCInst.h =================================================================== --- include/llvm/MC/MCInst.h +++ include/llvm/MC/MCInst.h @@ -187,6 +187,7 @@ void clear() { Operands.clear(); } void erase(iterator I) { Operands.erase(I); } + void erase(iterator First, iterator Last) { Operands.erase(First, Last); } size_t size() const { return Operands.size(); } iterator begin() { return Operands.begin(); } const_iterator begin() const { return Operands.begin(); } Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -155,6 +155,7 @@ ImmTySdwaSrc1Sel, ImmTySdwaDstUnused, ImmTyDMask, + ImmTyDim, ImmTyUNorm, ImmTyDA, ImmTyR128A16, @@ -296,6 +297,7 @@ bool isClampSI() const { return isImmTy(ImmTyClampSI); } bool isOModSI() const { return isImmTy(ImmTyOModSI); } bool isDMask() const { return isImmTy(ImmTyDMask); } + bool isDim() const { return isImmTy(ImmTyDim); } bool isUNorm() const { return isImmTy(ImmTyUNorm); } bool isDA() const { return isImmTy(ImmTyDA); } bool isR128A16() const { return isImmTy(ImmTyR128A16); } @@ -695,6 +697,7 @@ case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; case ImmTyDMask: OS << "DMask"; break; + case ImmTyDim: OS << "Dim"; break; case ImmTyUNorm: OS << "UNorm"; break; case ImmTyDA: OS << "DA"; break; case ImmTyR128A16: OS << "R128A16"; break; @@ -926,6 +929,10 @@ enum AMDGPUMatchResultTy { Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY }; + enum OperandMode { + OperandMode_Default, + OperandMode_NSA, + }; using OptionalImmIndexMap = std::map; @@ -1065,7 +1072,8 @@ uint64_t &ErrorInfo, bool MatchingInlineAsm) override; bool ParseDirective(AsmToken DirectiveID) override; - OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, + OperandMode Mode = OperandMode_Default); StringRef parseMnemonicSuffix(StringRef Name); bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; @@ -1133,7 +1141,9 @@ bool validateMIMGAtomicDMask(const MCInst &Inst); bool validateMIMGGatherDMask(const MCInst &Inst); bool validateMIMGDataSize(const MCInst &Inst); + bool validateMIMGAddrSize(const MCInst &Inst); bool validateMIMGD16(const MCInst &Inst); + bool validateMIMGDim(const MCInst &Inst); bool validateLdsDirect(const MCInst &Inst); bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; @@ -1211,6 +1221,7 @@ bool IsAtomic = false); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); + OperandMatchResultTy parseDim(OperandVector &Operands); OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); AMDGPUOperand::Ptr defaultRowMask() const; AMDGPUOperand::Ptr defaultBankMask() const; @@ -2565,6 +2576,46 @@ return (VDataSize / 4) == DataSize + TFESize; } +bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) + return true; + + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); + int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); + int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); + + assert(VAddr0Idx != -1); + assert(SrsrcIdx != -1); + assert(DimIdx != -1); + assert(SrsrcIdx > VAddr0Idx); + + unsigned Dim = Inst.getOperand(DimIdx).getImm(); + const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); + bool IsNSA = SrsrcIdx - VAddr0Idx > 1; + unsigned VAddrSize = + IsNSA ? SrsrcIdx - VAddr0Idx + : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; + + unsigned AddrSize = BaseOpcode->NumExtraArgs + + (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + + (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + + (BaseOpcode->LodOrClampOrMip ? 1 : 0); + if (!IsNSA) { + if (AddrSize > 8) + AddrSize = 16; + else if (AddrSize > 4) + AddrSize = 8; + } + + return VAddrSize == AddrSize; +} + bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); @@ -2621,6 +2672,24 @@ return true; } +bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) + return true; + + int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); + if (DimIdx < 0) + return true; + + long Imm = Inst.getOperand(DimIdx).getImm(); + if (Imm < 0 || Imm >= 8) + return false; + + return true; +} + static bool IsRevOpcode(const unsigned Opcode) { switch (Opcode) { @@ -2853,11 +2922,20 @@ "d16 modifier is not supported on this GPU"); return false; } + if (!validateMIMGDim(Inst)) { + Error(IDLoc, "dim modifier is required on this GPU"); + return false; + } if (!validateMIMGDataSize(Inst)) { Error(IDLoc, "image data size does not match dmask and tfe"); return false; } + if (!validateMIMGAddrSize(Inst)) { + Error(IDLoc, + "image address size does not match dim and a16"); + return false; + } if (!validateMIMGAtomicDMask(Inst)) { Error(IDLoc, "invalid atomic image dmask"); @@ -3217,6 +3295,24 @@ IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, ValRange); + } else if (ID == ".amdhsa_workgroup_processor_mode") { + if (IVersion.Major < 10) + return getParser().Error(IDRange.Start, "directive requires gfx10+", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, + ValRange); + } else if (ID == ".amdhsa_memory_ordered") { + if (IVersion.Major < 10) + return getParser().Error(IDRange.Start, "directive requires gfx10+", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, + ValRange); + } else if (ID == ".amdhsa_forward_progress") { + if (IVersion.Major < 10) + return getParser().Error(IDRange.Start, "directive requires gfx10+", + IDRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, + ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, @@ -3370,6 +3466,22 @@ return TokError(Err.str()); } Lex(); + + if (ID == "enable_wgp_mode") { + if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) + return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); + } + + if (ID == "enable_mem_ordered") { + if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) + return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); + } + + if (ID == "enable_fwd_progress") { + if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) + return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); + } + return false; } @@ -3669,7 +3781,8 @@ } OperandMatchResultTy -AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { +AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, + OperandMode Mode) { // Try to parse with a custom parser OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); @@ -3683,6 +3796,35 @@ getLexer().is(AsmToken::EndOfStatement)) return ResTy; + if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { + unsigned Prefix = Operands.size(); + SMLoc LBraceLoc = getTok().getLoc(); + Parser.Lex(); // eat the '[' + + for (;;) { + ResTy = parseReg(Operands); + if (ResTy != MatchOperand_Success) + return ResTy; + + if (getLexer().is(AsmToken::RBrac)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return MatchOperand_ParseFail; + Parser.Lex(); + } + + if (Operands.size() - Prefix > 1) { + Operands.insert(Operands.begin() + Prefix, + AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); + Operands.push_back(AMDGPUOperand::CreateToken(this, "]", + getTok().getLoc())); + } + + Parser.Lex(); // eat the ']' + return MatchOperand_Success; + } + ResTy = parseRegOrImm(Operands); if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) @@ -3736,8 +3878,13 @@ Name = parseMnemonicSuffix(Name); Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); + bool IsMIMG = Name.startswith("image_"); + while (!getLexer().is(AsmToken::EndOfStatement)) { - OperandMatchResultTy Res = parseOperand(Operands, Name); + OperandMode Mode = OperandMode_Default; + if (IsMIMG && isGFX10() && Operands.size() == 2) + Mode = OperandMode_NSA; + OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); // Eat the comma or space if there is one. if (getLexer().is(AsmToken::Comma)) @@ -5275,7 +5422,7 @@ Op.addRegOperands(Inst, 1); } else if (Op.isImmModifier()) { OptionalIdx[Op.getImmTy()] = I; - } else { + } else if (!Op.isToken()) { llvm_unreachable("unexpected operand type"); } } @@ -5283,6 +5430,8 @@ bool IsGFX10 = isGFX10(); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); + if (IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); if (IsGFX10) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); @@ -5291,7 +5440,8 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); + if (!IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); } @@ -5389,7 +5539,7 @@ {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, - {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, + {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, @@ -5404,6 +5554,7 @@ {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, + {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, @@ -5472,7 +5623,9 @@ Op.Type == AMDGPUOperand::ImmTyNegHi) { res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); - } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { + } else if (Op.Type == AMDGPUOperand::ImmTyDim) { + res = parseDim(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { res = parseDfmtNfmt(Operands); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); @@ -5758,6 +5911,52 @@ return isImm() && isUInt<16>(getImm()); } +OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { + if (!isGFX10()) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + + if (getLexer().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + if (getLexer().getTok().getString() != "dim") + return MatchOperand_NoMatch; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + Parser.Lex(); + + // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an + // integer. + std::string Token; + if (getLexer().is(AsmToken::Integer)) { + SMLoc Loc = getLexer().getTok().getEndLoc(); + Token = getLexer().getTok().getString(); + Parser.Lex(); + if (getLexer().getTok().getLoc() != Loc) + return MatchOperand_ParseFail; + } + if (getLexer().isNot(AsmToken::Identifier)) + return MatchOperand_ParseFail; + Token += getLexer().getTok().getString(); + + StringRef DimId = Token; + if (DimId.startswith("SQ_RSRC_IMG_")) + DimId = DimId.substr(12); + + const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); + if (!DimInfo) + return MatchOperand_ParseFail; + + Parser.Lex(); + + Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, + AMDGPUOperand::ImmTyDim)); + return MatchOperand_Success; +} + OperandMatchResultTy AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { using namespace AMDGPU::DPP; Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -290,7 +290,26 @@ } if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { - Res = convertMIMGInst(MI); + int VAddr0Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); + int RsrcIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); + unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1; + if (VAddr0Idx >= 0 && NSAArgs > 0) { + unsigned NSAWords = (NSAArgs + 3) / 4; + if (Bytes.size() < 4 * NSAWords) { + Res = MCDisassembler::Fail; + } else { + for (unsigned i = 0; i < NSAArgs; ++i) { + MI.insert(MI.begin() + VAddr0Idx + 1 + i, + decodeOperand_VGPR_32(Bytes[i])); + } + Bytes = Bytes.slice(4 * NSAWords); + } + } + + if (Res) + Res = convertMIMGInst(MI); } if (Res && IsSDWA) @@ -339,9 +358,9 @@ return MCDisassembler::Success; } -// Note that MIMG format provides no information about VADDR size. -// Consequently, decoded instructions always show address -// as if it has 1 dword, which could be not really so. +// Note that before gfx10, the MIMG encoding provided no information about +// VADDR size. Consequently, decoded instructions always show address as if it +// has 1 dword, which could be not really so. DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), @@ -349,7 +368,8 @@ int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); - + int VAddr0Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); @@ -362,16 +382,42 @@ assert(DMaskIdx != -1); assert(TFEIdx != -1); + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); bool IsAtomic = (VDstIdx != -1); bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; - unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; - if (DMask == 0) - return MCDisassembler::Success; + bool IsNSA = false; + unsigned AddrSize = Info->VAddrDwords; + + if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { + unsigned DimIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + const AMDGPU::MIMGDimInfo *Dim = + AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm()); + + AddrSize = BaseOpcode->NumExtraArgs + + (BaseOpcode->Gradients ? Dim->NumGradients : 0) + + (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + + (BaseOpcode->LodOrClampOrMip ? 1 : 0); + IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA; + if (!IsNSA) { + if (AddrSize > 8) + AddrSize = 16; + else if (AddrSize > 4) + AddrSize = 8; + } else { + if (AddrSize > Info->VAddrDwords) { + // The NSA encoding does not contain enough operands for the combination + // of base opcode / dimension. Should this be an error? + return MCDisassembler::Success; + } + } + } - unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask); - if (DstSize == 1) - return MCDisassembler::Success; + unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; + unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u); bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); if (D16 && AMDGPU::hasPackedD16(STI)) { @@ -382,44 +428,64 @@ if (MI.getOperand(TFEIdx).getImm()) return MCDisassembler::Success; - int NewOpcode = -1; + if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords) + return MCDisassembler::Success; - if (IsGather4) { - if (D16 && AMDGPU::hasPackedD16(STI)) - NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2); - else + int NewOpcode = + AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize); + if (NewOpcode == -1) + return MCDisassembler::Success; + + // Widen the register to the correct number of enabled channels. + unsigned NewVdata = AMDGPU::NoRegister; + if (DstSize != Info->VDataDwords) { + auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; + + // Get first subregister of VData + unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); + unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); + Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; + + NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, + &MRI.getRegClass(DataRCID)); + if (NewVdata == AMDGPU::NoRegister) { + // It's possible to encode this such that the low register + enabled + // components exceeds the register count. return MCDisassembler::Success; - } else { - NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize); - if (NewOpcode == -1) + } + } + + unsigned NewVAddr0 = AMDGPU::NoRegister; + if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA && + AddrSize != Info->VAddrDwords) { + unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg(); + unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0); + VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0; + + auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass; + NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0, + &MRI.getRegClass(AddrRCID)); + if (NewVAddr0 == AMDGPU::NoRegister) return MCDisassembler::Success; } - auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; + MI.setOpcode(NewOpcode); - // Get first subregister of VData - unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); - unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); - Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; + if (NewVdata != AMDGPU::NoRegister) { + MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); - // Widen the register to the correct number of enabled channels. - auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, - &MRI.getRegClass(RCID)); - if (NewVdata == AMDGPU::NoRegister) { - // It's possible to encode this such that the low register + enabled - // components exceeds the register count. - return MCDisassembler::Success; + if (IsAtomic) { + // Atomic operations have an additional operand (a copy of data) + MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); + } } - MI.setOpcode(NewOpcode); - // vaddr will be always appear as a single VGPR. This will look different than - // how it is usually emitted because the number of register components is not - // in the instruction encoding. - MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); - - if (IsAtomic) { - // Atomic operations have an additional operand (a copy of data) - MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); + if (NewVAddr0 != AMDGPU::NoRegister) { + MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0); + } else if (IsNSA) { + assert(AddrSize <= Info->VAddrDwords); + MI.erase(MI.begin() + VAddr0Idx + AddrSize, + MI.begin() + VAddr0Idx + Info->VAddrDwords); } return MCDisassembler::Success; Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -78,6 +78,8 @@ raw_ostream &O); void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -208,6 +208,18 @@ } } +void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + unsigned Dim = MI->getOperand(OpNo).getImm(); + O << " dim:SQ_RSRC_IMG_"; + + const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); + if (DimInfo) + O << DimInfo->AsmSuffix; + else + O << Dim; +} + void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "unorm"); @@ -254,8 +266,12 @@ const MCSubtargetInfo &STI, raw_ostream &O) { if (unsigned Val = MI->getOperand(OpNo).getImm()) { - O << " dfmt:" << (Val & 15); - O << ", nfmt:" << (Val >> 4); + if (AMDGPU::isGFX10(STI)) + O << " format:" << Val; + else { + O << " dfmt:" << (Val & 15); + O << ", nfmt:" << (Val >> 4); + } } } Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -16,6 +16,7 @@ #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -273,7 +274,25 @@ OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); } - if (bytes > 4) + // NSA encoding. + if (AMDGPU::isGFX10(STI) && Desc.TSFlags & SIInstrFlags::MIMG) { + int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::vaddr0); + int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::srsrc); + assert(vaddr0 >= 0 && srsrc > vaddr0); + unsigned NumExtraAddrs = srsrc - vaddr0 - 1; + unsigned NumPadding = (-NumExtraAddrs) & 3; + + for (unsigned i = 0; i < NumExtraAddrs; ++i) + OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), + Fixups, STI)); + for (unsigned i = 0; i < NumPadding; ++i) + OS.write(0); + } + + if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) || + (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])) return; // Check for additional literals in SRC0/1/2 (Op 1/2/3) @@ -428,7 +447,8 @@ const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (AMDGPU::isSISrcOperand(Desc, OpNo)) { uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI); - if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4)) + if (Enc != ~0U && + (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8)) return Enc; } else if (MO.isImm()) Index: lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- lib/Target/AMDGPU/MIMGInstructions.td +++ lib/Target/AMDGPU/MIMGInstructions.td @@ -11,10 +11,14 @@ // // - MIMGEncGfx6: encoding introduced with gfx6 (obsoleted for atomics in gfx8) // - MIMGEncGfx8: encoding introduced with gfx8 for atomics +// - MIMGEncGfx10Default: gfx default (non-NSA) encoding +// - MIMGEncGfx10NSA: gfx10 NSA encoding class MIMGEncoding; def MIMGEncGfx6 : MIMGEncoding; def MIMGEncGfx8 : MIMGEncoding; +def MIMGEncGfx10Default : MIMGEncoding; +def MIMGEncGfx10NSA : MIMGEncoding; def MIMGEncoding : GenericEnum { let FilterClass = "MIMGEncoding"; @@ -59,13 +63,28 @@ def MIMGDimInfoTable : GenericTable { let FilterClass = "AMDGPUDimProps"; let CppTypeName = "MIMGDimInfo"; - let Fields = ["Dim", "NumCoords", "NumGradients", "DA"]; + let Fields = ["Dim", "NumCoords", "NumGradients", "DA", "Encoding", "AsmSuffix"]; GenericEnum TypeOf_Dim = MIMGDim; let PrimaryKey = ["Dim"]; let PrimaryKeyName = "getMIMGDimInfo"; } +def getMIMGDimInfoByEncoding : SearchIndex { + let Table = MIMGDimInfoTable; + let Key = ["Encoding"]; +} + +def getMIMGDimInfoByAsmSuffix : SearchIndex { + let Table = MIMGDimInfoTable; + let Key = ["AsmSuffix"]; +} + +class mimg si_gfx10, bits<7> vi = si_gfx10> { + field bits<7> SI_GFX10 = si_gfx10; + field bits<7> VI = vi; +} + class MIMGLZMapping { MIMGBaseOpcode L = l; MIMGBaseOpcode LZ = lz; @@ -82,11 +101,6 @@ let PrimaryKeyName = "getMIMGLZMappingInfo"; } -class mimg si, bits<7> vi = si> { - field bits<7> SI = si; - field bits<7> VI = vi; -} - class MIMG : InstSI { @@ -108,7 +122,7 @@ Instruction Opcode = !cast(NAME); MIMGBaseOpcode BaseOpcode; - MIMGEncoding MIMGEncoding = MIMGEncGfx6; + MIMGEncoding MIMGEncoding; bits<8> VDataDwords; bits<8> VAddrDwords; } @@ -129,15 +143,66 @@ let Key = ["Opcode"]; } +// This is a separate class so that TableGen memoizes the computations. +class MIMGNSAHelper { + list AddrAsmNames = + !foldl([], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], lhs, i, + !if(!lt(i, num_addrs), !listconcat(lhs, ["vaddr"#!size(lhs)]), lhs)); + dag AddrIns = !dag(ins, !foreach(arg, AddrAsmNames, VGPR_32), AddrAsmNames); + string AddrAsm = "[" # !foldl("$" # !head(AddrAsmNames), !tail(AddrAsmNames), lhs, rhs, + lhs # ", $" # rhs) # "]"; + + int NSA = !if(!le(num_addrs, 1), ?, + !if(!le(num_addrs, 5), 1, + !if(!le(num_addrs, 9), 2, + !if(!le(num_addrs, 13), 3, ?)))); +} + +// Base class of all pre-gfx10 MIMG instructions. +class MIMG_gfx6789 op, dag outs, string dns = ""> + : MIMG, MIMGe_gfx6789 { + let SubtargetPredicate = isGFX6GFX7GFX8GFX9; + let AssemblerPredicates = [isGFX6GFX7GFX8GFX9]; + + let MIMGEncoding = MIMGEncGfx6; + + let d16 = !if(BaseOpcode.HasD16, ?, 0); +} + +// Base class of all non-NSA gfx10 MIMG instructions. +class MIMG_gfx10 + : MIMG, MIMGe_gfx10 { + let SubtargetPredicate = isGFX10Plus; + let AssemblerPredicates = [isGFX10Plus]; + + let MIMGEncoding = MIMGEncGfx10Default; + + let d16 = !if(BaseOpcode.HasD16, ?, 0); + let nsa = 0; +} + +// Base class for all NSA MIMG instructions. Note that 1-dword addresses always +// use non-NSA variants. +class MIMG_nsa_gfx10 + : MIMG, MIMGe_gfx10 { + let SubtargetPredicate = isGFX10Plus; + let AssemblerPredicates = [isGFX10Plus]; + + let MIMGEncoding = MIMGEncGfx10NSA; + + MIMGNSAHelper nsah = MIMGNSAHelper; + dag AddrIns = nsah.AddrIns; + string AddrAsm = nsah.AddrAsm; + + let d16 = !if(BaseOpcode.HasD16, ?, 0); + let nsa = nsah.NSA; +} + class MIMG_NoSampler_Helper op, string asm, RegisterClass dst_rc, RegisterClass addr_rc, string dns=""> - : MIMG <(outs dst_rc:$vdata), dns>, - MIMGe { - let ssamp = 0; - let d16 = !if(BaseOpcode.HasD16, ?, 0); - + : MIMG_gfx6789 { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -146,18 +211,61 @@ #!if(BaseOpcode.HasD16, "$d16", ""); } +class MIMG_NoSampler_gfx10 + : MIMG_gfx10 { + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, + SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_NoSampler_nsa_gfx10 + : MIMG_nsa_gfx10 { + let InOperandList = !con(AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, + SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + multiclass MIMG_NoSampler_Src_Helper op, string asm, RegisterClass dst_rc, bit enableDisasm> { - let VAddrDwords = 1 in - def NAME # _V1 : MIMG_NoSampler_Helper ; - let VAddrDwords = 2 in - def NAME # _V2 : MIMG_NoSampler_Helper ; - let VAddrDwords = 3 in - def NAME # _V3 : MIMG_NoSampler_Helper ; - let VAddrDwords = 4 in - def NAME # _V4 : MIMG_NoSampler_Helper ; + let ssamp = 0 in { + let VAddrDwords = 1 in { + def _V1 : MIMG_NoSampler_Helper ; + def _V1_gfx10 : MIMG_NoSampler_gfx10; + } + + let VAddrDwords = 2 in { + def _V2 : MIMG_NoSampler_Helper ; + def _V2_gfx10 : MIMG_NoSampler_gfx10; + def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; + } + + let VAddrDwords = 3 in { + def _V3 : MIMG_NoSampler_Helper ; + def _V3_gfx10 : MIMG_NoSampler_gfx10; + def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; + } + + let VAddrDwords = 4 in { + def _V4 : MIMG_NoSampler_Helper ; + def _V4_gfx10 : MIMG_NoSampler_gfx10; + def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10; + } + } } multiclass MIMG_NoSampler op, string asm, bit has_d16, bit mip = 0, @@ -187,17 +295,7 @@ RegisterClass data_rc, RegisterClass addr_rc, string dns = ""> - : MIMG <(outs), dns>, - MIMGe { - let ssamp = 0; - let d16 = !if(BaseOpcode.HasD16, ?, 0); - - let mayLoad = 0; - let mayStore = 1; - let hasSideEffects = 0; - let hasPostISelHook = 0; - let DisableWQM = 1; - + : MIMG_gfx6789 { let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -206,18 +304,60 @@ #!if(BaseOpcode.HasD16, "$d16", ""); } -multiclass MIMG_Store_Addr_Helper op, string asm, +class MIMG_Store_gfx10 + : MIMG_gfx10 { + let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, + DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, + GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Store_nsa_gfx10 + : MIMG_nsa_gfx10 { + let InOperandList = !con((ins DataRC:$vdata), + AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, + SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +multiclass MIMG_Store_Addr_Helper { - let VAddrDwords = 1 in - def NAME # _V1 : MIMG_Store_Helper ; - let VAddrDwords = 2 in - def NAME # _V2 : MIMG_Store_Helper ; - let VAddrDwords = 3 in - def NAME # _V3 : MIMG_Store_Helper ; - let VAddrDwords = 4 in - def NAME # _V4 : MIMG_Store_Helper ; + let mayLoad = 0, mayStore = 1, hasSideEffects = 0, hasPostISelHook = 0, + DisableWQM = 1, ssamp = 0 in { + let VAddrDwords = 1 in { + def _V1 : MIMG_Store_Helper ; + def _V1_gfx10 : MIMG_Store_gfx10 ; + } + let VAddrDwords = 2 in { + def _V2 : MIMG_Store_Helper ; + def _V2_gfx10 : MIMG_Store_gfx10 ; + def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; + } + let VAddrDwords = 3 in { + def _V3 : MIMG_Store_Helper ; + def _V3_gfx10 : MIMG_Store_gfx10 ; + def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; + } + let VAddrDwords = 4 in { + def _V4 : MIMG_Store_Helper ; + def _V4_gfx10 : MIMG_Store_gfx10 ; + def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 ; + } + } } multiclass MIMG_Store op, string asm, bit has_d16, bit mip = 0> { @@ -239,15 +379,9 @@ } } -class MIMG_Atomic_Helper - : MIMG <(outs data_rc:$vdst), !if(enableDasm, dns, "")> { - let mayLoad = 1; - let mayStore = 1; - let hasSideEffects = 1; // FIXME: Remove this - let hasPostISelHook = 0; - let DisableWQM = 1; +class MIMG_Atomic_gfx6789_base op, string asm, RegisterClass data_rc, + RegisterClass addr_rc, string dns=""> + : MIMG_gfx6789 { let Constraints = "$vdst = $vdata"; let AsmMatchConverter = "cvtMIMGAtomic"; @@ -257,39 +391,80 @@ let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"; } -multiclass MIMG_Atomic_Helper_m { - let ssamp = 0, d16 = 0 in { - def _si : MIMG_Atomic_Helper, - SIMCInstr, - MIMGe { - let AssemblerPredicates = [isGFX6GFX7]; - let DisableDecoder = DisableSIDecoder; - } +class MIMG_Atomic_si + : MIMG_Atomic_gfx6789_base { + let AssemblerPredicates = [isGFX6GFX7]; +} - def _vi : MIMG_Atomic_Helper, - SIMCInstr, - MIMGe { - let AssemblerPredicates = [isGFX8GFX9]; - let DisableDecoder = DisableVIDecoder; - let MIMGEncoding = MIMGEncGfx8; - } - } +class MIMG_Atomic_vi + : MIMG_Atomic_gfx6789_base { + let AssemblerPredicates = [isGFX8GFX9]; + let MIMGEncoding = MIMGEncGfx8; +} + +class MIMG_Atomic_gfx10 + : MIMG_gfx10(op.SI_GFX10), (outs DataRC:$vdst), + !if(enableDisasm, "AMDGPU", "")> { + let Constraints = "$vdst = $vdata"; + let AsmMatchConverter = "cvtMIMGAtomic"; + + let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, + DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, + GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe); + let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"; +} + +class MIMG_Atomic_nsa_gfx10 + : MIMG_nsa_gfx10(op.SI_GFX10), (outs DataRC:$vdst), num_addrs, + !if(enableDisasm, "AMDGPU", "")> { + let Constraints = "$vdst = $vdata"; + let AsmMatchConverter = "cvtMIMGAtomic"; + + let InOperandList = !con((ins DataRC:$vdata), + AddrIns, + (ins SReg_256:$srsrc, DMask:$dmask, + Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, + SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe)); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"; } multiclass MIMG_Atomic_Addr_Helper_m { - // _V* variants have different address size, but the size is not encoded. - // So only one variant can be disassembled. V1 looks the safest to decode. - let VAddrDwords = 1 in - defm _V1 : MIMG_Atomic_Helper_m ; - let VAddrDwords = 2 in - defm _V2 : MIMG_Atomic_Helper_m ; - let VAddrDwords = 3 in - defm _V3 : MIMG_Atomic_Helper_m ; - let VAddrDwords = 4 in - defm _V4 : MIMG_Atomic_Helper_m ; + let hasSideEffects = 1, // FIXME: remove this + mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1, + ssamp = 0 in { + let VAddrDwords = 1 in { + def _V1_si : MIMG_Atomic_si ; + def _V1_vi : MIMG_Atomic_vi ; + def _V1_gfx10 : MIMG_Atomic_gfx10 ; + } + let VAddrDwords = 2 in { + def _V2_si : MIMG_Atomic_si ; + def _V2_vi : MIMG_Atomic_vi ; + def _V2_gfx10 : MIMG_Atomic_gfx10 ; + def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; + } + let VAddrDwords = 3 in { + def _V3_si : MIMG_Atomic_si ; + def _V3_vi : MIMG_Atomic_vi ; + def _V3_gfx10 : MIMG_Atomic_gfx10 ; + def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; + } + let VAddrDwords = 4 in { + def _V4_si : MIMG_Atomic_si ; + def _V4_vi : MIMG_Atomic_vi ; + def _V4_gfx10 : MIMG_Atomic_gfx10 ; + def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; + } + } } multiclass MIMG_Atomic { // 64-bit atomics @@ -311,10 +486,7 @@ class MIMG_Sampler_Helper op, string asm, RegisterClass dst_rc, RegisterClass src_rc, string dns=""> - : MIMG <(outs dst_rc:$vdata), dns>, - MIMGe { - let d16 = !if(BaseOpcode.HasD16, ?, 0); - + : MIMG_gfx6789 { let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), @@ -323,6 +495,33 @@ #!if(BaseOpcode.HasD16, "$d16", ""); } +class MIMG_Sampler_gfx10 + : MIMG_gfx10 { + let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, + DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, + GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm" + #"$dlc$glc$slc$r128$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + +class MIMG_Sampler_nsa_gfx10 + : MIMG_nsa_gfx10 { + let InOperandList = !con(AddrIns, + (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, + Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, + SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm" + #"$dlc$glc$slc$r128$tfe$lwe" + #!if(BaseOpcode.HasD16, "$d16", ""); +} + class MIMGAddrSize { int NumWords = dw; @@ -339,6 +538,11 @@ bit Disassemble = enable_disasm; } +// Return whether x is in lst. +class isIntInList lst> { + bit ret = !foldl(0, lst, lhs, y, !or(lhs, !eq(x, y))); +} + // Return whether a value inside the range [min, max] (endpoints inclusive) // is in the given list. class isRangeInList lst> { @@ -374,16 +578,41 @@ !listconcat(lhs.List, [MIMGAddrSize]), !if(!eq(dw, 3), 3, !add(dw, 1))>, // we still need _V4 for codegen w/ 3 dwords lhs)).List; + + // For NSA, generate machine instructions for all possible numbers of words + // except 1 (which is already covered by the non-NSA case). + // The disassembler defaults to the largest number of arguments among the + // variants with the same number of NSA words, and custom code then derives + // the exact variant based on the sample variant and the image dimension. + list NSAInstrs = + !foldl([], [[12, 11, 10], [9, 8, 7, 6], [5, 4, 3, 2]], prev, nsa_group, + !listconcat(prev, + !foldl([], nsa_group, lhs, dw, + !if(isIntInList.ret, + !listconcat(lhs, [MIMGAddrSize]), + lhs)))); } multiclass MIMG_Sampler_Src_Helper op, string asm, AMDGPUSampleVariant sample, RegisterClass dst_rc, bit enableDisasm = 0> { foreach addr = MIMG_Sampler_AddrSizes.MachineInstrs in { - let VAddrDwords = addr.NumWords in - def _V # addr.NumWords - : MIMG_Sampler_Helper ; + let VAddrDwords = addr.NumWords in { + def _V # addr.NumWords + : MIMG_Sampler_Helper ; + def _V # addr.NumWords # _gfx10 + : MIMG_Sampler_gfx10 ; + } + } + + foreach addr = MIMG_Sampler_AddrSizes.NSAInstrs in { + let VAddrDwords = addr.NumWords in { + def _V # addr.NumWords # _nsa_gfx10 + : MIMG_Sampler_nsa_gfx10; + } } } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1003,6 +1003,13 @@ // GFX9 added a 13-bit signed offset. When using regular flat instructions, // the sign bit is ignored and is treated as a 12-bit unsigned offset. + // GFX10 shrinked signed offset to 12 bits. When using regular flat + // instructions, the sign bit is also ignored and is treated as 11-bit + // unsigned offset. + + if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) + return isUInt<11>(AM.BaseOffs) && AM.Scale == 0; + // Just r + i return isUInt<12>(AM.BaseOffs) && AM.Scale == 0; } @@ -2828,8 +2835,9 @@ } - if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && - Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) { + if ((Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS || + Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) && + Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) { report_fatal_error(Twine("invalid register \"" + StringRef(RegName) + "\" for subtarget.")); } @@ -4656,7 +4664,7 @@ } static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG, - SDValue *GLC, SDValue *SLC) { + SDValue *GLC, SDValue *SLC, SDValue *DLC) { auto CachePolicyConst = cast(CachePolicy.getNode()); uint64_t Value = CachePolicyConst->getZExtValue(); @@ -4669,6 +4677,10 @@ *SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32); Value &= ~(uint64_t)0x2; } + if (DLC) { + *DLC = DAG.getTargetConstant((Value & 0x4) ? 1 : 0, DL, MVT::i32); + Value &= ~(uint64_t)0x4; + } return Value == 0; } @@ -4786,6 +4798,7 @@ const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode); unsigned IntrOpcode = Intr->BaseOpcode; + bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10; SmallVector ResultTypes(Op->value_begin(), Op->value_end()); SmallVector OrigResultTypes(Op->value_begin(), Op->value_end()); @@ -4924,7 +4937,22 @@ VAddrs.push_back(Op.getOperand(AddrIdx + i)); } - SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs); + // If the register allocator cannot place the address registers contiguously + // without introducing moves, then using the non-sequential address encoding + // is always preferable, since it saves VALU instructions and is usually a + // wash in terms of code size or even better. + // + // However, we currently have no way of hinting to the register allocator that + // MIMG addresses should be placed contiguously when it is possible to do so, + // so force non-NSA for the common 2-address case as a heuristic. + // + // SIShrinkInstructions will convert NSA encodings to non-NSA after register + // allocation when possible. + bool UseNSA = + ST->hasFeature(AMDGPU::FeatureNSAEncoding) && VAddrs.size() >= 3; + SDValue VAddr; + if (!UseNSA) + VAddr = getBuildDwordsVector(DAG, DL, VAddrs); SDValue True = DAG.getTargetConstant(1, DL, MVT::i1); SDValue False = DAG.getTargetConstant(0, DL, MVT::i1); @@ -4987,45 +5015,66 @@ SDValue GLC; SDValue SLC; + SDValue DLC; if (BaseOpcode->Atomic) { GLC = True; // TODO no-return optimization - if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC)) + if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC, + IsGFX10 ? &DLC : nullptr)) return Op; } else { - if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC)) + if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC, + IsGFX10 ? &DLC : nullptr)) return Op; } - SmallVector Ops; + SmallVector Ops; if (BaseOpcode->Store || BaseOpcode->Atomic) Ops.push_back(VData); // vdata - Ops.push_back(VAddr); + if (UseNSA) { + for (const SDValue &Addr : VAddrs) + Ops.push_back(Addr); + } else { + Ops.push_back(VAddr); + } Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc if (BaseOpcode->Sampler) Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32)); + if (IsGFX10) + Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32)); Ops.push_back(Unorm); + if (IsGFX10) + Ops.push_back(DLC); Ops.push_back(GLC); Ops.push_back(SLC); Ops.push_back(IsA16 && // a16 or r128 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False); Ops.push_back(TFE); // tfe Ops.push_back(LWE); // lwe - Ops.push_back(DimInfo->DA ? True : False); + if (!IsGFX10) + Ops.push_back(DimInfo->DA ? True : False); if (BaseOpcode->HasD16) Ops.push_back(IsD16 ? True : False); if (isa(Op)) Ops.push_back(Op.getOperand(0)); // chain - int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32; + int NumVAddrDwords = + UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32; int Opcode = -1; - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8, - NumVDataDwords, NumVAddrDwords); - if (Opcode == -1) - Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6, + if (IsGFX10) { + Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, + UseNSA ? AMDGPU::MIMGEncGfx10NSA + : AMDGPU::MIMGEncGfx10Default, NumVDataDwords, NumVAddrDwords); + } else { + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8, + NumVDataDwords, NumVAddrDwords); + if (Opcode == -1) + Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6, + NumVDataDwords, NumVAddrDwords); + } assert(Opcode != -1); MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops); Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -246,38 +246,58 @@ let Inst{31-26} = 0x32; // encoding } -class MIMGe op> : Enc64 { +class MIMGe : Enc64 { bits<8> vdata; bits<4> dmask; bits<1> unorm; bits<1> glc; - bits<1> da; bits<1> r128; bits<1> tfe; bits<1> lwe; bits<1> slc; bit d16; - bits<8> vaddr; bits<7> srsrc; bits<7> ssamp; let Inst{11-8} = dmask; let Inst{12} = unorm; let Inst{13} = glc; - let Inst{14} = da; let Inst{15} = r128; let Inst{16} = tfe; let Inst{17} = lwe; - let Inst{24-18} = op; let Inst{25} = slc; let Inst{31-26} = 0x3c; - let Inst{39-32} = vaddr; let Inst{47-40} = vdata; let Inst{52-48} = srsrc{6-2}; let Inst{57-53} = ssamp{6-2}; let Inst{63} = d16; } +class MIMGe_gfx6789 op> : MIMGe { + bits<8> vaddr; + bits<1> da; + + let Inst{14} = da; + let Inst{24-18} = op; + let Inst{39-32} = vaddr; +} + +class MIMGe_gfx10 op> : MIMGe { + bits<8> vaddr0; + bits<3> dim; + bits<2> nsa; + bits<1> dlc; + bits<1> a16 = 0; // TODO: this should be an operand + + let Inst{0} = op{7}; + let Inst{2-1} = nsa; + let Inst{5-3} = dim; + let Inst{7} = dlc; + let Inst{24-18} = op{6-0}; + let Inst{39-32} = vaddr0; + let Inst{62} = a16; +} + class EXPe : Enc64 { bits<4> en; bits<6> tgt; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3223,6 +3223,53 @@ } } + if (isMIMG(MI)) { + const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim); + if (DimOp) { + int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode, + AMDGPU::OpName::vaddr0); + int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode); + const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + const AMDGPU::MIMGDimInfo *Dim = + AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm()); + + if (!Dim) { + ErrInfo = "dim is out of range"; + return false; + } + + bool IsNSA = SRsrcIdx - VAddr0Idx > 1; + unsigned AddrWords = BaseOpcode->NumExtraArgs + + (BaseOpcode->Gradients ? Dim->NumGradients : 0) + + (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + + (BaseOpcode->LodOrClampOrMip ? 1 : 0); + + unsigned VAddrWords; + if (IsNSA) { + VAddrWords = SRsrcIdx - VAddr0Idx; + } else { + const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx); + VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32; + if (AddrWords > 8) + AddrWords = 16; + else if (AddrWords > 4) + AddrWords = 8; + else if (AddrWords == 3 && VAddrWords == 4) { + // CodeGen uses the V4 variant of instructions for three addresses, + // because the selection DAG does not support non-power-of-two types. + AddrWords = 4; + } + } + + if (VAddrWords != AddrWords) { + ErrInfo = "bad vaddr size"; + return false; + } + } + } + const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl); if (DppCt) { using namespace AMDGPU::DPP; @@ -5356,25 +5403,35 @@ return DescSize; // No operands. if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx])) - return DescSize + 4; + return isVOP3(MI) ? 12 : (DescSize + 4); int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return DescSize; if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx])) - return DescSize + 4; + return isVOP3(MI) ? 12 : (DescSize + 4); int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (Src2Idx == -1) return DescSize; if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx])) - return DescSize + 4; + return isVOP3(MI) ? 12 : (DescSize + 4); return DescSize; } + // Check whether we have extra NSA words. + if (isMIMG(MI)) { + int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); + if (VAddr0Idx < 0) + return 8; + + int RSrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); + return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4); + } + switch (Opc) { case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -845,6 +845,7 @@ def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>; def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; +def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -38,6 +38,8 @@ public: static char ID; + void shrinkMIMG(MachineInstr &MI); + public: SIShrinkInstructions() : MachineFunctionPass(ID) { } @@ -211,6 +213,96 @@ } } +// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding. +void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) { + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); + if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) + return; + + MachineFunction *MF = MI.getParent()->getParent(); + const GCNSubtarget &ST = MF->getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + int VAddr0Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); + unsigned NewAddrDwords = Info->VAddrDwords; + const TargetRegisterClass *RC; + + if (Info->VAddrDwords == 2) { + RC = &AMDGPU::VReg_64RegClass; + } else if (Info->VAddrDwords == 3) { + RC = &AMDGPU::VReg_96RegClass; + } else if (Info->VAddrDwords == 4) { + RC = &AMDGPU::VReg_128RegClass; + } else if (Info->VAddrDwords <= 8) { + RC = &AMDGPU::VReg_256RegClass; + NewAddrDwords = 8; + } else { + RC = &AMDGPU::VReg_512RegClass; + NewAddrDwords = 16; + } + + unsigned VgprBase = 0; + bool IsUndef = true; + bool IsKill = NewAddrDwords == Info->VAddrDwords; + for (unsigned i = 0; i < Info->VAddrDwords; ++i) { + const MachineOperand &Op = MI.getOperand(VAddr0Idx + i); + unsigned Vgpr = TRI.getHWRegIndex(Op.getReg()); + + if (i == 0) { + VgprBase = Vgpr; + } else if (VgprBase + i != Vgpr) + return; + + if (!Op.isUndef()) + IsUndef = false; + if (!Op.isKill()) + IsKill = false; + } + + if (VgprBase + NewAddrDwords > 256) + return; + + // Further check for implicit tied operands - this may be present if TFE is + // enabled + int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); + int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe); + unsigned TFEVal = MI.getOperand(TFEIdx).getImm(); + unsigned LWEVal = MI.getOperand(LWEIdx).getImm(); + int ToUntie = -1; + if (TFEVal || LWEVal) { + // TFE/LWE is enabled so we need to deal with an implicit tied operand + for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) { + if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() && + MI.getOperand(i).isImplicit()) { + // This is the tied operand + assert( + ToUntie == -1 && + "found more than one tied implicit operand when expecting only 1"); + ToUntie = i; + MI.untieRegOperand(ToUntie); + } + } + } + + unsigned NewOpcode = + AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default, + Info->VDataDwords, NewAddrDwords); + MI.setDesc(TII->get(NewOpcode)); + MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase)); + MI.getOperand(VAddr0Idx).setIsUndef(IsUndef); + MI.getOperand(VAddr0Idx).setIsKill(IsKill); + + for (unsigned i = 1; i < Info->VAddrDwords; ++i) + MI.RemoveOperand(VAddr0Idx + 1); + + if (ToUntie >= 0) { + MI.tieOperands( + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata), + ToUntie - (Info->VAddrDwords - 1)); + } +} + /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals. /// For AND or OR, try using S_BITSET{0,1} to clear or set bits. /// If the inverse of the immediate is legal, use ANDN2, ORN2 or @@ -597,6 +689,14 @@ continue; } + if (TII->isMIMG(MI.getOpcode()) && + ST.getGeneration() >= AMDGPUSubtarget::GFX10 && + MF.getProperties().hasProperty( + MachineFunctionProperties::Property::NoVRegs)) { + shrinkMIMG(MI); + continue; + } + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -200,6 +200,8 @@ uint8_t NumCoords; uint8_t NumGradients; bool DA; + uint8_t Encoding; + const char *AsmSuffix; }; LLVM_READONLY Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll @@ -1,8 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}atomic_swap_1d: -; GCN: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -11,7 +13,8 @@ } ; GCN-LABEL: {{^}}atomic_add_1d: -; GCN: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -20,7 +23,8 @@ } ; GCN-LABEL: {{^}}atomic_sub_1d: -; GCN: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -29,7 +33,8 @@ } ; GCN-LABEL: {{^}}atomic_smin_1d: -; GCN: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -38,7 +43,8 @@ } ; GCN-LABEL: {{^}}atomic_umin_1d: -; GCN: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -47,7 +53,8 @@ } ; GCN-LABEL: {{^}}atomic_smax_1d: -; GCN: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -56,7 +63,8 @@ } ; GCN-LABEL: {{^}}atomic_umax_1d: -; GCN: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -65,7 +73,8 @@ } ; GCN-LABEL: {{^}}atomic_and_1d: -; GCN: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -74,7 +83,8 @@ } ; GCN-LABEL: {{^}}atomic_or_1d: -; GCN: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -83,7 +93,8 @@ } ; GCN-LABEL: {{^}}atomic_xor_1d: -; GCN: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -92,7 +103,8 @@ } ; GCN-LABEL: {{^}}atomic_inc_1d: -; GCN: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -101,7 +113,8 @@ } ; GCN-LABEL: {{^}}atomic_dec_1d: -; GCN: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -110,7 +123,8 @@ } ; GCN-LABEL: {{^}}atomic_cmpswap_1d: -; GCN: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} +; GFX6789: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} +; GFX10: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -119,7 +133,8 @@ } ; GCN-LABEL: {{^}}atomic_add_2d: -; GCN: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc ; define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -128,7 +143,8 @@ } ; GCN-LABEL: {{^}}atomic_add_3d: -; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ; define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -137,7 +153,8 @@ } ; GCN-LABEL: {{^}}atomic_add_cube: -; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc ; define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0) @@ -146,7 +163,8 @@ } ; GCN-LABEL: {{^}}atomic_add_1darray: -; GCN: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX6789: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX10: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) @@ -155,7 +173,8 @@ } ; GCN-LABEL: {{^}}atomic_add_2darray: -; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) @@ -164,7 +183,8 @@ } ; GCN-LABEL: {{^}}atomic_add_2dmsaa: -; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) @@ -173,7 +193,8 @@ } ; GCN-LABEL: {{^}}atomic_add_2darraymsaa: -; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) @@ -182,7 +203,8 @@ } ; GCN-LABEL: {{^}}atomic_add_1d_slc: -; GCN: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc{{$}} +; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc{{$}} +; GFX10: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc ; define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll @@ -1,9 +1,11 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}image_load_f16: -; GCN: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}} +; GFX89: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}} +; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { main_body: %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -13,6 +15,7 @@ ; GCN-LABEL: {{^}}image_load_v2f16: ; UNPACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} ; PACKED: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} +; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps float @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { main_body: %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -23,6 +26,7 @@ ; GCN-LABEL: {{^}}image_load_v4f16: ; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} +; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps <2 x float> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { main_body: %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -33,6 +37,7 @@ ; GCN-LABEL: {{^}}image_load_mip_v4f16: ; UNPACKED: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_load_mip v[0:1], v[0:3], s[0:7] dmask:0xf unorm d16{{$}} +; GFX10: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { main_body: %tex = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) @@ -43,6 +48,7 @@ ; GCN-LABEL: {{^}}image_load_3d_v2f16: ; UNPACKED: image_load v[0:1], v[0:3], s[0:7] dmask:0x3 unorm d16{{$}} ; PACKED: image_load v0, v[0:3], s[0:7] dmask:0x3 unorm d16{{$}} +; GFX10: image_load v0, v[0:2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm d16{{$}} define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { main_body: %tex = call <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32 3, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -51,7 +57,8 @@ } ; GCN-LABEL: {{^}}image_store_f16 -; GCN: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}} +; GFX89: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}} +; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { main_body: call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -63,6 +70,7 @@ ; UNPACKED: v_and_b32_e32 ; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} ; PACKED: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} +; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, float %in) { main_body: %data = bitcast float %in to <2 x half> @@ -77,6 +85,7 @@ ; UNPACKED: v_and_b32_e32 ; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} +; GFX10: image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) { main_body: %data = bitcast <2 x float> %in to <4 x half> @@ -91,6 +100,7 @@ ; UNPACKED: v_and_b32_e32 ; UNPACKED: image_store_mip v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} ; PACKED: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} +; GFX10: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16{{$}} define amdgpu_ps void @image_store_mip_1d_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %mip, <2 x float> %in) { main_body: %data = bitcast <2 x float> %in to <4 x half> Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll @@ -1,10 +1,12 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16: ; UNPACKED: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}} ; PACKED: image_gather4_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}} +; GFX10: image_gather4_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D d16{{$}} define amdgpu_ps <2 x float> @image_gather4_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { main_body: %tex = call <4 x half> @llvm.amdgcn.image.gather4.b.2d.v4f16.f32.f32(i32 4, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll @@ -1,8 +1,10 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}gather4_2d: -; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -10,7 +12,8 @@ } ; GCN-LABEL: {{^}}gather4_cube: -; GCN: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}} +; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}} +; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ; define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -18,7 +21,8 @@ } ; GCN-LABEL: {{^}}gather4_2darray: -; GCN: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}} +; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}} +; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -26,7 +30,8 @@ } ; GCN-LABEL: {{^}}gather4_c_2d: -; GCN: image_gather4_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -34,7 +39,8 @@ } ; GCN-LABEL: {{^}}gather4_cl_2d: -; GCN: image_gather4_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -42,7 +48,8 @@ } ; GCN-LABEL: {{^}}gather4_c_cl_2d: -; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -50,7 +57,8 @@ } ; GCN-LABEL: {{^}}gather4_b_2d: -; GCN: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -58,7 +66,8 @@ } ; GCN-LABEL: {{^}}gather4_c_b_2d: -; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -66,7 +75,8 @@ } ; GCN-LABEL: {{^}}gather4_b_cl_2d: -; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -74,7 +84,8 @@ } ; GCN-LABEL: {{^}}gather4_c_b_cl_2d: -; GCN: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -82,7 +93,8 @@ } ; GCN-LABEL: {{^}}gather4_l_2d: -; GCN: image_gather4_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -90,7 +102,8 @@ } ; GCN-LABEL: {{^}}gather4_c_l_2d: -; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -98,7 +111,8 @@ } ; GCN-LABEL: {{^}}gather4_lz_2d: -; GCN: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -106,7 +120,8 @@ } ; GCN-LABEL: {{^}}gather4_c_lz_2d: -; GCN: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX6789: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}} +; GFX10: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -114,7 +129,8 @@ } ; GCN-LABEL: {{^}}gather4_2d_dmask_2: -; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2{{$}} +; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2{{$}} +; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -122,7 +138,8 @@ } ; GCN-LABEL: {{^}}gather4_2d_dmask_4: -; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4{{$}} +; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4{{$}} +; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -130,7 +147,8 @@ } ; GCN-LABEL: {{^}}gather4_2d_dmask_8: -; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8{{$}} +; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8{{$}} +; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll @@ -1,8 +1,10 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GCN,PRE-GFX10 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefixes=GCN,PRE-GFX10 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck --check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}getlod_1d: -; GCN: image_get_lod v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} +; PRE-GFX10: image_get_lod v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_get_lod v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GCN: s_waitcnt vmcnt(0) define amdgpu_ps <4 x float> @getlod_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: @@ -11,7 +13,8 @@ } ; GCN-LABEL: {{^}}getlod_2d: -; GCN: image_get_lod v[0:1], v[0:1], s[0:7], s[8:11] dmask:0x3{{$}} +; PRE-GFX10: image_get_lod v[0:1], v[0:1], s[0:7], s[8:11] dmask:0x3{{$}} +; GFX10: image_get_lod v[0:1], v[0:1], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_2D ; GCN: s_waitcnt vmcnt(0) define amdgpu_ps <2 x float> @getlod_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll @@ -0,0 +1,91 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-nsa-encoding -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,NONSA %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,NSA %s + +; GCN-LABEL: {{^}}sample_2d: +; +; TODO: use NSA here +; GCN: v_mov_b32_e32 v2, v0 +; +; GCN: image_sample v[0:3], v[1:2], +define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_3d: +; NONSA: v_mov_b32_e32 v3, v0 +; NONSA: image_sample v[0:3], v[1:4], +; NSA: image_sample v[0:3], [v1, v2, v0], +define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_d_3d: +; NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1], +define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) { +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + ret <4 x float> %v +} + +; GCN-LABEL: {{^}}sample_contig_nsa: +; GCN: image_sample_c_l v0, v[0:7], +; NSA: image_sample v1, [v6, v7, v5], +define amdgpu_ps <2 x float> @sample_contig_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) { +main_body: + %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %r.0 = insertelement <2 x float> undef, float %v1, i32 0 + %r = insertelement <2 x float> %r.0, float %v2, i32 1 + ret <2 x float> %r +} + +; GCN-LABEL: {{^}}sample_nsa_nsa: +; NSA: image_sample_c_l v0, [v1, v2, v3, v4, v0], +; NSA: image_sample v1, [v6, v7, v5], +define amdgpu_ps <2 x float> @sample_nsa_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %r2, float %s2, float %t2) { +main_body: + %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %r.0 = insertelement <2 x float> undef, float %v1, i32 0 + %r = insertelement <2 x float> %r.0, float %v2, i32 1 + ret <2 x float> %r +} + +; GCN-LABEL: {{^}}sample_nsa_contig: +; NSA: image_sample_c_l v0, [v1, v2, v3, v4, v0], +; NSA: image_sample v1, v[5:7], +define amdgpu_ps <2 x float> @sample_nsa_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %s2, float %t2, float %r2) { +main_body: + %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %r.0 = insertelement <2 x float> undef, float %v1, i32 0 + %r = insertelement <2 x float> %r.0, float %v2, i32 1 + ret <2 x float> %r +} + +; GCN-LABEL: {{^}}sample_contig_contig: +; GCN: image_sample_c_l v0, v[0:7], +; NSA: image_sample v1, v[5:7], +; NONSA: image_sample v1, v[5:8], +define amdgpu_ps <2 x float> @sample_contig_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %s2, float %t2, float %r2) { +main_body: + %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %r.0 = insertelement <2 x float> undef, float %v1, i32 0 + %r = insertelement <2 x float> %r.0, float %v2, i32 1 + ret <2 x float> %r +} + + +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +attributes #1 = { nounwind readonly } Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -1,9 +1,11 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}image_sample_2d_f16: -; GCN: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16{{$}} +; GFX89: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16{{$}} +; GFX10: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D d16{{$}} define amdgpu_ps half @image_sample_2d_f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: %tex = call half @llvm.amdgcn.image.sample.2d.f16.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) @@ -26,6 +28,7 @@ ; GCN-LABEL: {{^}}image_sample_c_d_1d_v2f16: ; UNPACKED: image_sample_c_d v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}} ; PACKED: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}} +; GFX10: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D d16{{$}} define amdgpu_ps float @image_sample_c_d_1d_v2f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { main_body: %tex = call <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32 3, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) @@ -52,6 +55,7 @@ ; GCN-LABEL: {{^}}image_sample_b_2d_v4f16: ; UNPACKED: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}} ; PACKED: image_sample_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}} +; GFX10: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D d16{{$}} define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { main_body: %tex = call <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -1,8 +1,10 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}sample_1d: -; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -15,7 +17,8 @@ ; GCN: v_mov_b32_e32 v2, v0 ; GCN: v_mov_b32_e32 v3, v0 ; GCN: v_mov_b32_e32 v4, v0 -; GCN: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe{{$}} +; GFX6789: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe{{$}} +; GFX10: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -28,7 +31,8 @@ ; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_1: ; GCN: v_mov_b32_e32 v0, 0 ; GCN: v_mov_b32_e32 v1, v0 -; GCN: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe{{$}} +; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe{{$}} +; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -44,7 +48,8 @@ ; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_2: ; GCN: v_mov_b32_e32 v0, 0 ; GCN: v_mov_b32_e32 v1, v0 -; GCN: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe{{$}} +; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe{{$}} +; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -60,7 +65,8 @@ ; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_3: ; GCN: v_mov_b32_e32 v0, 0 ; GCN: v_mov_b32_e32 v1, v0 -; GCN: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe{{$}} +; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe{{$}} +; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -76,7 +82,8 @@ ; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_4: ; GCN: v_mov_b32_e32 v0, 0 ; GCN: v_mov_b32_e32 v1, v0 -; GCN: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe{{$}} +; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe{{$}} +; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -93,7 +100,8 @@ ; GCN: v_mov_b32_e32 v0, 0 ; GCN: v_mov_b32_e32 v1, v0 ; GCN: v_mov_b32_e32 v2, v0 -; GCN: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe{{$}} +; GFX6789: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe{{$}} +; GFX10: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -112,7 +120,8 @@ ; GCN: v_mov_b32_e32 v0, 0 ; GCN: v_mov_b32_e32 v1, v0 ; GCN: v_mov_b32_e32 v2, v0 -; GCN: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe{{$}} +; GFX6789: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe{{$}} +; GFX10: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -132,7 +141,8 @@ ; GCN: v_mov_b32_e32 v1, v0 ; GCN: v_mov_b32_e32 v2, v0 ; GCN: v_mov_b32_e32 v3, v0 -; GCN: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe{{$}} +; GFX6789: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe{{$}} +; GFX10: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe ; define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -155,7 +165,8 @@ ; GCN: v_mov_b32_e32 v2, v0 ; GCN: v_mov_b32_e32 v3, v0 ; GCN: v_mov_b32_e32 v4, v0 -; GCN: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe{{$}} +; GFX6789: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe{{$}} +; GFX10: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0) @@ -166,7 +177,8 @@ } ; GCN-LABEL: {{^}}sample_2d: -; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -174,7 +186,8 @@ } ; GCN-LABEL: {{^}}sample_3d: -; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -182,7 +195,8 @@ } ; GCN-LABEL: {{^}}sample_cube: -; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}} +; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}} +; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -190,7 +204,8 @@ } ; GCN-LABEL: {{^}}sample_1darray: -; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da{{$}} +; GFX6789: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da{{$}} +; GFX10: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -198,7 +213,8 @@ } ; GCN-LABEL: {{^}}sample_2darray: -; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}} +; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}} +; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -206,7 +222,8 @@ } ; GCN-LABEL: {{^}}sample_c_1d: -; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -214,7 +231,8 @@ } ; GCN-LABEL: {{^}}sample_c_2d: -; GCN: image_sample_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -222,7 +240,8 @@ } ; GCN-LABEL: {{^}}sample_cl_1d: -; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -230,7 +249,8 @@ } ; GCN-LABEL: {{^}}sample_cl_2d: -; GCN: image_sample_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -238,7 +258,8 @@ } ; GCN-LABEL: {{^}}sample_c_cl_1d: -; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -246,7 +267,8 @@ } ; GCN-LABEL: {{^}}sample_c_cl_2d: -; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -254,7 +276,8 @@ } ; GCN-LABEL: {{^}}sample_b_1d: -; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -262,7 +285,8 @@ } ; GCN-LABEL: {{^}}sample_b_2d: -; GCN: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -270,7 +294,8 @@ } ; GCN-LABEL: {{^}}sample_c_b_1d: -; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -278,7 +303,8 @@ } ; GCN-LABEL: {{^}}sample_c_b_2d: -; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -286,7 +312,8 @@ } ; GCN-LABEL: {{^}}sample_b_cl_1d: -; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -294,7 +321,8 @@ } ; GCN-LABEL: {{^}}sample_b_cl_2d: -; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -302,7 +330,8 @@ } ; GCN-LABEL: {{^}}sample_c_b_cl_1d: -; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -310,7 +339,8 @@ } ; GCN-LABEL: {{^}}sample_c_b_cl_2d: -; GCN: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -318,7 +348,8 @@ } ; GCN-LABEL: {{^}}sample_d_1d: -; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -326,7 +357,8 @@ } ; GCN-LABEL: {{^}}sample_d_2d: -; GCN: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -334,7 +366,8 @@ } ; GCN-LABEL: {{^}}sample_c_d_1d: -; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -342,7 +375,8 @@ } ; GCN-LABEL: {{^}}sample_c_d_2d: -; GCN: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -350,7 +384,8 @@ } ; GCN-LABEL: {{^}}sample_d_cl_1d: -; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -358,7 +393,8 @@ } ; GCN-LABEL: {{^}}sample_d_cl_2d: -; GCN: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -366,7 +402,8 @@ } ; GCN-LABEL: {{^}}sample_c_d_cl_1d: -; GCN: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -374,7 +411,8 @@ } ; GCN-LABEL: {{^}}sample_c_d_cl_2d: -; GCN: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -382,7 +420,8 @@ } ; GCN-LABEL: {{^}}sample_cd_1d: -; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -390,7 +429,8 @@ } ; GCN-LABEL: {{^}}sample_cd_2d: -; GCN: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -398,7 +438,8 @@ } ; GCN-LABEL: {{^}}sample_c_cd_1d: -; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -406,7 +447,8 @@ } ; GCN-LABEL: {{^}}sample_c_cd_2d: -; GCN: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -414,7 +456,8 @@ } ; GCN-LABEL: {{^}}sample_cd_cl_1d: -; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -422,7 +465,8 @@ } ; GCN-LABEL: {{^}}sample_cd_cl_2d: -; GCN: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -430,7 +474,8 @@ } ; GCN-LABEL: {{^}}sample_c_cd_cl_1d: -; GCN: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -438,7 +483,8 @@ } ; GCN-LABEL: {{^}}sample_c_cd_cl_2d: -; GCN: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -446,7 +492,8 @@ } ; GCN-LABEL: {{^}}sample_l_1d: -; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -454,7 +501,8 @@ } ; GCN-LABEL: {{^}}sample_l_2d: -; GCN: image_sample_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -462,7 +510,8 @@ } ; GCN-LABEL: {{^}}sample_c_l_1d: -; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -470,7 +519,8 @@ } ; GCN-LABEL: {{^}}sample_c_l_2d: -; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -478,7 +528,8 @@ } ; GCN-LABEL: {{^}}sample_lz_1d: -; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -486,7 +537,8 @@ } ; GCN-LABEL: {{^}}sample_lz_2d: -; GCN: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -494,7 +546,8 @@ } ; GCN-LABEL: {{^}}sample_c_lz_1d: -; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -502,7 +555,8 @@ } ; GCN-LABEL: {{^}}sample_c_lz_2d: -; GCN: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX6789: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} +; GFX10: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -510,7 +564,8 @@ } ; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1: -; GCN: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da{{$}} +; GFX6789: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da{{$}} +; GFX10: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -518,7 +573,8 @@ } ; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1_tfe: -; GCN: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da{{$}} +; GFX6789: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da{{$}} +; GFX10: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) { main_body: %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -529,7 +585,8 @@ } ; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2: -; GCN: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da{{$}} +; GFX6789: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da{{$}} +; GFX10: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -537,7 +594,8 @@ } ; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2_tfe: -; GCN: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da{{$}} +; GFX6789: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da{{$}} +; GFX10: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { main_body: %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) @@ -553,7 +611,8 @@ } ; GCN-LABEL: {{^}}sample_1d_unorm: -; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm{{$}} +; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm{{$}} +; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0) @@ -561,7 +620,8 @@ } ; GCN-LABEL: {{^}}sample_1d_glc: -; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc{{$}} +; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc{{$}} +; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1) @@ -569,7 +629,8 @@ } ; GCN-LABEL: {{^}}sample_1d_slc: -; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc{{$}} +; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc{{$}} +; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2) @@ -577,7 +638,8 @@ } ; GCN-LABEL: {{^}}sample_1d_glc_slc: -; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc{{$}} +; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc{{$}} +; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc ; define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3) @@ -585,7 +647,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_0: -; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1{{$}} +; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -594,7 +656,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_01 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3{{$}} +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -603,7 +665,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_012 -; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7{{$}} +; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -612,7 +674,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_12 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}} +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -621,7 +683,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_03 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9{{$}} +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -630,7 +692,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_13 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}} +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -639,7 +701,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_123 -; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe{{$}} +; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -656,7 +718,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}} +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -665,7 +727,7 @@ } ; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13 -; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}} +; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) Index: test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll @@ -1,9 +1,11 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED,PREGFX10,PREGFX10-UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s ; GCN-LABEL: {{^}}tbuffer_load_d16_x: -; GCN: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; PREGFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; GFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], format:22, 0 define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) { main_body: %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0) @@ -11,10 +13,11 @@ } ; GCN-LABEL: {{^}}tbuffer_load_d16_xy: -; UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] -; PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; GFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], format:22, 0 ; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]] define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) { main_body: @@ -24,10 +27,12 @@ } ; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw: -; UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; GFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], format:22, 0 ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] -; PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; GFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], format:22, 0 ; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) { main_body: Index: test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll @@ -1,13 +1,15 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED,PREGFX10,PREGFX10-UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s ; GCN-LABEL: {{^}}tbuffer_store_d16_x: -; GCN: s_load_dwordx4 -; GCN: s_load_dword s[[S_LO:[0-9]+]] -; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] -; GCN: tbuffer_store_format_d16_x v[[V_LO]], off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 +; GCN-DAG: s_load_dwordx4 +; GCN-DAG: s_load_dword s[[S_LO:[0-9]+]] +; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] +; PREGFX10: tbuffer_store_format_d16_x v[[V_LO]], off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 +; GFX10: tbuffer_store_format_d16_x v[[V_LO]], off, s[{{[0-9]+:[0-9]+}}], format:33, 0 define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data) { main_body: call void @llvm.amdgcn.raw.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0) @@ -15,14 +17,15 @@ } ; GCN-LABEL: {{^}}tbuffer_store_d16_xy: -; GCN: s_load_dword [[S_DATA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x10 +; GCN: s_load_dword [[S_DATA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, ; UNPACKED-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[S_DATA]], 16 ; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}} ; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]] ; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]] -; UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 +; PREGFX10-UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 -; PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 +; PREGFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 +; GFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], format:33, 0 define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data) { main_body: call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0) @@ -30,7 +33,7 @@ } ; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw: -; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10 +; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, ; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}} ; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16 @@ -40,12 +43,13 @@ ; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]] ; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]] -; UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 +; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 ; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]] ; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]] -; PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 +; PREGFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 +; GFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], format:33, 0 define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data) { main_body: call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0) Index: test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll @@ -1,11 +1,16 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE,PREGFX10 %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,PREGFX10 %s +;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}tbuffer_store: -; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:12, nfmt:2, 0 -; GCN: tbuffer_store_format_xyzw v[4:7], off, s[0:3], dfmt:13, nfmt:3, 0 glc -; GCN: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 slc -; GCN: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 +; PREGFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:12, nfmt:2, 0 +; PREGFX10: tbuffer_store_format_xyzw v[4:7], off, s[0:3], dfmt:13, nfmt:3, 0 glc +; PREGFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 slc +; PREGFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 glc +; GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:44, 0 +; GFX10: tbuffer_store_format_xyzw v[4:7], off, s[0:3], format:61, 0 glc +; GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 slc +; GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 glc dlc define amdgpu_ps void @tbuffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) { main_body: %in1 = bitcast <4 x float> %1 to <4 x i32> @@ -14,12 +19,13 @@ call void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 44, i32 0) call void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32> %in2, <4 x i32> %0, i32 0, i32 0, i32 61, i32 1) call void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32> %in3, <4 x i32> %0, i32 0, i32 0, i32 78, i32 2) - call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 78, i32 0) + call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 78, i32 5) ret void } ; GCN-LABEL: {{^}}tbuffer_store_immoffs: -; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, 0 offset:42 +; PREGFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, 0 offset:42 +; GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, 0 offset:42 define amdgpu_ps void @tbuffer_store_immoffs(<4 x i32> inreg, <4 x float>) { main_body: %in1 = bitcast <4 x float> %1 to <4 x i32> @@ -28,7 +34,8 @@ } ; GCN-LABEL: {{^}}tbuffer_store_scalar_and_imm_offs: -; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} offset:42 +; PREGFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} offset:42 +; GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, {{s[0-9]+}} offset:42 define amdgpu_ps void @tbuffer_store_scalar_and_imm_offs(<4 x i32> inreg, <4 x float> %vdata, i32 inreg %soffset) { main_body: %in1 = bitcast <4 x float> %vdata to <4 x i32> @@ -37,7 +44,8 @@ } ; GCN-LABEL: {{^}}buffer_store_ofs: -; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:3, nfmt:7, 0 offen +; PREGFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:3, nfmt:7, 0 offen +; GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:115, 0 offen define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float> %vdata, i32 %voffset) { main_body: %in1 = bitcast <4 x float> %vdata to <4 x i32> @@ -46,7 +54,8 @@ } ; GCN-LABEL: {{^}}buffer_store_x1: -; GCN: tbuffer_store_format_x v0, off, s[0:3], dfmt:13, nfmt:7, 0 +; PREGFX10: tbuffer_store_format_x v0, off, s[0:3], dfmt:13, nfmt:7, 0 +; GFX10: tbuffer_store_format_x v0, off, s[0:3], format:125, 0 define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data) { main_body: %data.i = bitcast float %data to i32 @@ -55,7 +64,8 @@ } ; GCN-LABEL: {{^}}buffer_store_x2: -; GCN: tbuffer_store_format_xy v[0:1], off, s[0:3], dfmt:1, nfmt:2, 0 +; PREGFX10: tbuffer_store_format_xy v[0:1], off, s[0:3], dfmt:1, nfmt:2, 0 +; GFX10: tbuffer_store_format_xy v[0:1], off, s[0:3], format:33, 0 define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data) { main_body: %data.i = bitcast <2 x float> %data to <2 x i32> Index: test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll @@ -1,10 +1,12 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED,PREGFX10,PREGFX10-UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s ; GCN-LABEL: {{^}}tbuffer_load_d16_x: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; GCN: tbuffer_load_format_d16_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen +; PREGFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen +; GFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], format:22, 0 idxen define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) { main_body: %data = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) @@ -13,10 +15,11 @@ ; GCN-LABEL: {{^}}tbuffer_load_d16_xy: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] +; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen +; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] -; PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen +; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen +; GFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], format:22, 0 idxen ; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]] define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) { main_body: @@ -27,10 +30,11 @@ ; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] +; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen +; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] -; PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen +; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen +; GFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], format:22, 0 idxen ; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) { main_body: Index: test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll @@ -1,19 +1,24 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,PREGFX10 %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,PREGFX10 %s +;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}tbuffer_load: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:3, 0 idxen glc -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 idxen slc -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 idxen +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:3, 0 idxen glc +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 idxen slc +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 idxen glc +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:63, 0 idxen glc +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:22, 0 idxen slc +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:22, 0 idxen glc dlc ; GCN: s_waitcnt define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) { main_body: %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 0) %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 63, i32 1) %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 2) - %vdata_f32 = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 0) + %vdata_f32 = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 5) %vdata.f = bitcast <4 x i32> %vdata to <4 x float> %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float> %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float> @@ -26,7 +31,8 @@ ; GCN-LABEL: {{^}}tbuffer_load_immoffs: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:42 +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:42 +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen offset:42 define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) { main_body: %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 42, i32 0, i32 78, i32 0) @@ -36,9 +42,12 @@ ; GCN-LABEL: {{^}}tbuffer_load_immoffs_large ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:2, 61 idxen offset:4095 -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:3, {{s[0-9]+}} idxen offset:73 -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, {{s[0-9]+}} idxen offset:1 +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:2, 61 idxen offset:4095 +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:3, {{s[0-9]+}} idxen offset:73 +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, {{s[0-9]+}} idxen offset:1 +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:47, 61 idxen offset:4095 +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:62, {{s[0-9]+}} idxen offset:73 +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:77, {{s[0-9]+}} idxen offset:1 ; GCN: s_waitcnt define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) { %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 4095, i32 61, i32 47, i32 0) @@ -54,7 +63,8 @@ } ; GCN-LABEL: {{^}}tbuffer_load_idx: -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen define amdgpu_vs <4 x float> @tbuffer_load_idx(<4 x i32> inreg, i32 %vindex) { main_body: %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 78, i32 0) @@ -63,7 +73,8 @@ } ; GCN-LABEL: {{^}}tbuffer_load_ofs: -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen offen define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) { main_body: %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 78, i32 0) @@ -72,7 +83,8 @@ } ; GCN-LABEL: {{^}}tbuffer_load_ofs_imm: -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen offset:52 +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen offset:52 +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen offen offset:52 define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) { main_body: %ofs = add i32 %voffs, 52 @@ -82,7 +94,8 @@ } ; GCN-LABEL: {{^}}tbuffer_load_both: -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen +; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen offen define amdgpu_vs <4 x float> @tbuffer_load_both(<4 x i32> inreg, i32 %vindex, i32 %voffs) { main_body: %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 %voffs, i32 0, i32 78, i32 0) @@ -92,7 +105,8 @@ ; GCN-LABEL: {{^}}buffer_load_xy: -; GCN: tbuffer_load_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 idxen +; PREGFX10: tbuffer_load_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 idxen +; GFX10: tbuffer_load_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, format:77, 0 idxen define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) { %vdata = call <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0) %vdata.f = bitcast <2 x i32> %vdata to <2 x float> @@ -100,7 +114,8 @@ } ; GCN-LABEL: {{^}}buffer_load_x: -; GCN: tbuffer_load_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 idxen +; PREGFX10: tbuffer_load_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 idxen +; GFX10: tbuffer_load_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, format:77, 0 idxen define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) { %vdata = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0) %vdata.f = bitcast i32 %vdata to float Index: test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll @@ -1,13 +1,15 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PREGFX10,UNPACKED,PREGFX10-UNPACKED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PREGFX10,PACKED,GFX81,PREGFX10-PACKED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PREGFX10,PACKED,GFX9,PREGFX10-PACKED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,PACKED,GFX10-PACKED %s ; GCN-LABEL: {{^}}tbuffer_store_d16_x: -; GCN: s_load_dwordx4 -; GCN: s_load_dword{{[x0-9]*}} s{{\[}}[[S_LO:[0-9]+]] -; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] -; GCN: tbuffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +; GCN-DAG: s_load_dwordx4 +; GCN-DAG: s_load_dword{{[x0-2]*}} s{{\[}}[[S_LO:[0-9]+]] +; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] +; PREGFX10: tbuffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +; GFX10: tbuffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], format:33, 0 idxen define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data, i32 %vindex) { main_body: call void @llvm.amdgcn.struct.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0) @@ -20,9 +22,10 @@ ; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}} ; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]] ; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]] -; UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +; PREGFX10-UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen -; PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +; PREGFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +; GFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], format:33, 0 idxen define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %vindex) { main_body: call void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0) @@ -40,12 +43,12 @@ ; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]] ; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]] -; UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen - +; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen ; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]] ; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]] -; PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +; PREGFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +; GFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], format:33, 0 idxen define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) { main_body: call void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0) Index: test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll @@ -1,12 +1,17 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE,PREGFX10 %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,PREGFX10 %s +;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}tbuffer_store: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:12, nfmt:2, 0 idxen -; GCN: tbuffer_store_format_xyzw v[4:7], [[ZEROREG]], s[0:3], dfmt:13, nfmt:3, 0 idxen glc -; GCN: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen slc -; GCN: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen +; PREGFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:12, nfmt:2, 0 idxen +; PREGFX10: tbuffer_store_format_xyzw v[4:7], [[ZEROREG]], s[0:3], dfmt:13, nfmt:3, 0 idxen glc +; PREGFX10: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen slc +; PREGFX10: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen glc +; GFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], format:44, 0 idxen +; GFX10: tbuffer_store_format_xyzw v[4:7], [[ZEROREG]], s[0:3], format:61, 0 idxen glc +; GFX10: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], format:78, 0 idxen slc +; GFX10: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], format:78, 0 idxen glc dlc define amdgpu_ps void @tbuffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) { main_body: %in1 = bitcast <4 x float> %1 to <4 x i32> @@ -15,13 +20,14 @@ call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 44, i32 0) call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 61, i32 1) call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 2) - call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 0) + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 5) ret void } ; GCN-LABEL: {{^}}tbuffer_store_immoffs: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, 0 idxen offset:42 +; PREGFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, 0 idxen offset:42 +; GFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], format:117, 0 idxen offset:42 define amdgpu_ps void @tbuffer_store_immoffs(<4 x i32> inreg, <4 x float>) { main_body: %in1 = bitcast <4 x float> %1 to <4 x i32> @@ -31,7 +37,8 @@ ; GCN-LABEL: {{^}}tbuffer_store_scalar_and_imm_offs: ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} idxen offset:42 +; PREGFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} idxen offset:42 +; GFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], format:117, {{s[0-9]+}} idxen offset:42 define amdgpu_ps void @tbuffer_store_scalar_and_imm_offs(<4 x i32> inreg, <4 x float> %vdata, i32 inreg %soffset) { main_body: %in1 = bitcast <4 x float> %vdata to <4 x i32> @@ -40,7 +47,8 @@ } ; GCN-LABEL: {{^}}buffer_store_idx: -; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:2, 0 idxen +; PREGFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:2, 0 idxen +; GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:47, 0 idxen define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex) { main_body: %in1 = bitcast <4 x float> %vdata to <4 x i32> @@ -49,7 +57,8 @@ } ; GCN-LABEL: {{^}}buffer_store_ofs: -; GCN: tbuffer_store_format_xyzw v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], dfmt:3, nfmt:7, 0 idxen offen +; PREGFX10: tbuffer_store_format_xyzw v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], dfmt:3, nfmt:7, 0 idxen offen +; GFX10: tbuffer_store_format_xyzw v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], format:115, 0 idxen offen define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float> %vdata, i32 %voffset) { main_body: %in1 = bitcast <4 x float> %vdata to <4 x i32> @@ -58,7 +67,8 @@ } ; GCN-LABEL: {{^}}buffer_store_both: -; GCN: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], dfmt:6, nfmt:4, 0 idxen offen +; PREGFX10: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], dfmt:6, nfmt:4, 0 idxen offen +; GFX10: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], format:70, 0 idxen offen define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex, i32 %voffset) { main_body: %in1 = bitcast <4 x float> %vdata to <4 x i32> @@ -69,11 +79,13 @@ ; Ideally, the register allocator would avoid the wait here ; ; GCN-LABEL: {{^}}buffer_store_wait: -; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:3, 0 idxen +; PREGFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:3, 0 idxen +; GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:63, 0 idxen ; VERDE: s_waitcnt expcnt(0) ; GCN: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen ; GCN: s_waitcnt vmcnt(0) -; GCN: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], dfmt:14, nfmt:2, 0 idxen +; PREGFX10: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], dfmt:14, nfmt:2, 0 idxen +; GFX10: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex.1, i32 %vindex.2, i32 %vindex.3) { main_body: %in1 = bitcast <4 x float> %vdata to <4 x i32> @@ -85,7 +97,8 @@ } ; GCN-LABEL: {{^}}buffer_store_x1: -; GCN: tbuffer_store_format_x v0, v1, s[0:3], dfmt:13, nfmt:7, 0 idxen +; PREGFX10: tbuffer_store_format_x v0, v1, s[0:3], dfmt:13, nfmt:7, 0 idxen +; GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { main_body: %data.i = bitcast float %data to i32 @@ -94,7 +107,8 @@ } ; GCN-LABEL: {{^}}buffer_store_x2: -; GCN: tbuffer_store_format_xy v[0:1], v2, s[0:3], dfmt:1, nfmt:2, 0 idxen +; PREGFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], dfmt:1, nfmt:2, 0 idxen +; GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %vindex) { main_body: %data.i = bitcast <2 x float> %data to <2 x i32> Index: test/MC/AMDGPU/gfx10_asm_mimg.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/gfx10_asm_mimg.s @@ -0,0 +1,380 @@ +; RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s + +image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm +; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] + +image_load v[1:4], [v2, v3], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm +; GFX10: image_load v[1:4], [v2, v3], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x0a,0x1f,0x00,0xf0,0x02,0x01,0x01,0x00,0x03,0x00,0x00,0x00] + +image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_3D unorm +; GFX10: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x12,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] + +image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm +; GFX10: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x1a,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] + +image_load v[0:3], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm +; GFX10: image_load v[0:3], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x22,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x00,0x00,0x00] + +image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm +; GFX10: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x2a,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] + +image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm +; GFX10: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x32,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00] + +image_load v[0:3], [v4, v5, v6, v7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm +; GFX10: image_load v[0:3], [v4, v5, v6, v7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x3a,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x07,0x00] + +image_load v[0:1], v0, s[0:7] dmask:0x9 dim:1D +; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x09,0x00,0xf0,0x00,0x00,0x00,0x00] + +image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D dlc +; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D dlc ; encoding: [0x80,0x01,0x00,0xf0,0x00,0x00,0x00,0x00] + +image_load v255, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX10: image_load v255, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x00,0xf0,0x00,0xff,0x00,0x00] + +image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D slc +; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D slc ; encoding: [0x00,0x01,0x00,0xf2,0x00,0x00,0x00,0x00] + +image_load v0, v255, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16 +; GFX10: image_load v0, v255, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x06,0x00,0xf0,0xff,0x00,0x00,0x80] + +// FIXME: This test is incorrect because r128 assumes a 128-bit SRSRC. +image_load v0, v255, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D r128 +; GFX10: image_load v0, v255, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D r128 ; encoding: [0x00,0x81,0x00,0xf0,0xff,0x00,0x00,0x00] + +image_load v0, v[2:3], s[0:7] dmask:0x1 dim:2D +; GFX10: image_load v0, v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:4], s[0:7] dmask:0x1 dim:3D +; GFX10: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:4], s[0:7] dmask:0x1 dim:CUBE +; GFX10: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:3], s[0:7] dmask:0x1 dim:1D_ARRAY +; GFX10: image_load v0, v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x20,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:4], s[0:7] dmask:0x1 dim:2D_ARRAY +; GFX10: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:4], s[0:7] dmask:0x1 dim:2D_MSAA +; GFX10: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA ; encoding: [0x30,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load v0, v[2:5], s[0:7] dmask:0x1 dim:2D_MSAA_ARRAY +; GFX10: image_load v0, v[2:5], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x38,0x01,0x00,0xf0,0x02,0x00,0x00,0x00] + +image_load_mip v[252:255], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX10: image_load_mip v[252:255], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x04,0xf0,0x00,0xfc,0x00,0x00] + +image_load_mip v[253:255], [v255, v254], s[0:7] dmask:0xe dim:SQ_RSRC_IMG_1D +; GFX10: image_load_mip v[253:255], [v255, v254], s[0:7] dmask:0xe dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0e,0x04,0xf0,0xff,0xfd,0x00,0x00,0xfe,0x00,0x00,0x00] + +image_load_mip v[254:255], [v254, v255, v253], s[0:7] dmask:0xc dim:SQ_RSRC_IMG_2D +; GFX10: image_load_mip v[254:255], [v254, v255, v253], s[0:7] dmask:0xc dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0c,0x04,0xf0,0xfe,0xfe,0x00,0x00,0xff,0xfd,0x00,0x00] + +image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_3D +; GFX10: image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00] + +image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_CUBE +; GFX10: image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00] + +image_load_mip v255, [v254, v255, v253], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10: image_load_mip v255, [v254, v255, v253], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0x00,0x00] + +image_load_mip v255, [v254, v255, v253, v255], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10: image_load_mip v255, [v254, v255, v253, v255], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xff,0x00] + +image_store v[0:3], [v254, v255, v253, v255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +; GFX10: image_store v[0:3], [v254, v255, v253, v255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x3a,0x0f,0x20,0xf0,0xfe,0x00,0x18,0x00,0xff,0xfd,0xff,0x00] + +image_store v[0:3], v[254:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10: image_store v[0:3], v[254:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x20,0xf0,0xfe,0x00,0x18,0x00] + +image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10: image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x24,0xf0,0xfd,0x00,0x18,0x00] + +image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D +; GFX10: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x0f,0x38,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc +; GFX10: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x10,0x21,0x3c,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc +; GFX10: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x12,0x23,0x40,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] + +image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc +; GFX10: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x1a,0x23,0x44,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] + +image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc +; GFX10: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x22,0x21,0x48,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00] + +image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc +; GFX10: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x2a,0x21,0x50,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] + +image_atomic_umin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA glc +; GFX10: image_atomic_umin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA glc ; encoding: [0x32,0x21,0x54,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] + +image_atomic_smax v4, [v32, v1, v2, v3], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY glc +; GFX10: image_atomic_smax v4, [v32, v1, v2, v3], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY glc ; encoding: [0x3a,0x21,0x58,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x03,0x00] + +image_atomic_umax v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D glc +; GFX10: image_atomic_umax v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D glc ; encoding: [0x0a,0x21,0x5c,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00] + +image_atomic_and v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX10: image_atomic_and v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x60,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_or v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX10: image_atomic_or v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x64,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_xor v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX10: image_atomic_xor v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x68,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_inc v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX10: image_atomic_inc v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x6c,0xf0,0x20,0x04,0x18,0x00] + +image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; GFX10: image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x70,0xf0,0x20,0x04,0x18,0x00] + +;image_atomic_fcmpswap v[4:5], v32, s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_1D glc +; TODO-GFX10: ; encoding: [0x00,0x23,0x74,0xf0,0x20,0x04,0x18,0x00] + +;image_atomic_fmin v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; TODO-GFX10: ; encoding: [0x00,0x21,0x78,0xf0,0x20,0x04,0x18,0x00] + +;image_atomic_fmax v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc +; TODO-GFX10: ; encoding: [0x00,0x21,0x7c,0xf0,0x20,0x04,0x18,0x00] + +image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D +; GFX10: image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x80,0xf0,0x20,0x40,0x21,0x03] + +image_sample_cl v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D +; GFX10: image_sample_cl v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x00,0x00] + +image_sample_cl v[64:66], [v32, v16, v15], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D +; GFX10: image_sample_cl v[64:66], [v32, v16, v15], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x0f,0x00,0x00] + +image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x0f,0x14,0x00] + +image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE +; GFX10: image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x0f,0x14,0x00] + +image_sample_cl v[64:66], [v32, v16, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10: image_sample_cl v[64:66], [v32, v16, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x14,0x00,0x00] + +image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10: image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x14,0x15,0x00] + +image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D +; GFX10: image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x00,0x00] + +image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D +; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x00,0x00] + +image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D +; GFX10: image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x07,0x88,0xf0,0x20,0x40,0x21,0x03] + +image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x14,0x15] + +image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x07,0x88,0xf0,0x20,0x40,0x21,0x03] + +image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE +; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x05,0x00,0x00] + +image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x00] + +image_sample_d v[64:66], [v32, v16, v8, v4, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x00,0x14,0x15,0x00,0x00] + +image_sample_d_cl v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21, v48], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_d_cl v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21, v48], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0x8c,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x14,0x15,0x30,0x00,0x00,0x00] + +image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10: image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x90,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x00,0x00] + +image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10: image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x94,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x00,0x00] + +image_sample_b_cl v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10: image_sample_b_cl v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x98,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x00] + +image_sample_lz v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10: image_sample_lz v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x9c,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x00,0x00] + +image_sample_c v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE +; GFX10: image_sample_c v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x07,0xa0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x00] + +image_sample_c_cl v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE +; GFX10: image_sample_c_cl v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x07,0xa4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01] + +image_sample_c_d v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_d v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xa8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x08,0x0c,0x10,0x11,0x00,0x00,0x00] + +image_sample_c_d_cl v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17, v18], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_d_cl v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17, v18], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xac,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x08,0x0c,0x10,0x11,0x12,0x00,0x00] + +image_sample_c_l v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_l v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xb0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01] + +image_sample_c_b v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_b v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xb4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01] + +image_sample_c_b_cl v[64:66], [v32, v16, v0, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_b_cl v[64:66], [v32, v16, v0, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xb8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x05,0x00,0x00,0x00] + +image_sample_c_lz v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_lz v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xbc,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x00] + +image_sample_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xc0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x00] + +image_sample_cl_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_cl_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xc4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01] + +image_sample_d_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_d_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xc8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00,0x00] + +image_sample_d_cl_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_d_cl_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xcc,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x00,0x00] + +image_sample_l_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_l_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xd0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01] + +image_sample_b_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_b_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xd4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01] + +image_sample_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xd8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x00,0x00,0x00] + +image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xdc,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x00] + +image_sample_c_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xe0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01] + +image_sample_c_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xe4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x00,0x00,0x00] + +image_sample_c_d_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_d_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xe8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00] + +image_sample_c_d_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_d_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xec,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x00] + +image_sample_c_l_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_l_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xf0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x00,0x00,0x00] + +image_sample_c_b_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_b_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xf4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x00,0x00,0x00] + +image_sample_c_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xf8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x00,0x00] + +image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D +; GFX10: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x07,0xfc,0xf0,0x20,0x40,0x21,0x03,0x00,0x10,0x00,0x00] + +image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX10: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x03] + +image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE +; GFX10: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x02,0x04,0xf1,0x20,0x40,0x21,0x03] + +image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX10: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x04,0x10,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00] + +image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D +; GFX10: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x08,0x14,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00] + +image_gather4_b_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10: image_gather4_b_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x01,0x18,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_gather4_lz v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_lz v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x1c,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00] + +image_gather4_c v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x20,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x00] + +image_gather4_c_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x24,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_gather4_c_l v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_l v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x30,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_gather4_c_b v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_b v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x34,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_gather4_c_b_cl v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_b_cl v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x38,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00] + +image_gather4_c_lz v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_lz v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x3c,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x00] + +image_gather4_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x40,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x00] + +image_gather4_cl_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_cl_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x44,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_gather4_l_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_l_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x50,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_gather4_b_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_b_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x54,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_gather4_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x58,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00] + +image_gather4_lz_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_lz_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x5c,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x00] + +image_gather4_c_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x60,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_gather4_c_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x64,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00] + +image_gather4_c_l_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_l_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x70,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00] + +image_gather4_c_b_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: ; encoding: [0x14,0x01,0x74,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00] + +image_gather4_c_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x78,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x08,0x00,0x00] + +image_gather4_c_lz_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX10: image_gather4_c_lz_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x7c,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06] + +image_get_lod v64, v[32:33], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX10: image_get_lod v64, v[32:33], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x01,0x80,0xf1,0x20,0x40,0x21,0x03] + +image_get_lod v[64:65], [v32, v0, v16], s[4:11], s[100:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10: image_get_lod v[64:65], [v32, v0, v16], s[4:11], s[100:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x03,0x80,0xf1,0x20,0x40,0x21,0x03,0x00,0x10,0x00,0x00] + +image_sample_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xa0,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07] + +image_sample_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xa4,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x00,0x00,0x00] + +image_sample_c_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xa8,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x00,0x00,0x00] + +image_sample_c_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xac,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00] + +image_sample_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xb0,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x00,0x00,0x00] + +image_sample_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xb4,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00] + +image_sample_c_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xb8,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00] + +image_sample_c_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D +; GFX10: image_sample_c_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xbc,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x00] Index: test/MC/AMDGPU/gfx10_asm_mimg_err.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/gfx10_asm_mimg_err.s @@ -0,0 +1,38 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=NOGFX10 %s + +; TODO: more helpful error message for missing dim operand +image_load v[0:3], v0, s[0:7] dmask:0xf unorm +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D da +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_load_pck v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D d16 +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image data size does not match dmask and tfe + +image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 + +image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 + +image_sample_d v[0:3], [v0, v1, v2, v3, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 + +image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 + +image_sample_c_d v[0:3], [v0, v1, v2, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 + +image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 + +image_sample_c_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16 + +image_load v[0:1], v0, s[0:7] dmask:0x9 dim:1 D +; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand Index: test/MC/AMDGPU/mtbuf-gfx10.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/mtbuf-gfx10.s @@ -0,0 +1,68 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s + +// GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0 ; encoding: [0x00,0x00,0xb0,0xe8,0x00,0x00,0x20,0x80] +tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0 +// GFX10: tbuffer_load_format_d16_xy v0, off, s[0:3], format:22, 0 ; encoding: [0x00,0x00,0xb1,0xe8,0x00,0x00,0x20,0x80] +tbuffer_load_format_d16_xy v0, off, s[0:3], format:22, 0 +// GFX10: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], format:22, 0 ; encoding: [0x00,0x00,0xb3,0xe8,0x00,0x00,0x20,0x80] +tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], format:22, 0 +// GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 ; encoding: [0x00,0x00,0x73,0xea,0x00,0x00,0x00,0x80] +tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 +// GFX10: tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:22, 0 slc ; encoding: [0x00,0x00,0xb3,0xe8,0x00,0x08,0x40,0x80] +tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:22, 0 slc +// GFX10: tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:63, 0 glc ; encoding: [0x00,0x40,0xfb,0xe9,0x00,0x04,0x00,0x80] +tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:63, 0 glc +// GFX10: tbuffer_load_format_xyzw v[12:15], off, s[0:3], format:23, 0 glc dlc ; encoding: [0x00,0xc0,0xbb,0xe8,0x00,0x0c,0x00,0x80] +tbuffer_load_format_xyzw v[12:15], off, s[0:3], format:23, 0 glc dlc +// GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 offset:42 ; encoding: [0x2a,0x00,0x73,0xea,0x00,0x00,0x00,0x80] +tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 offset:42 +// GFX10: tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:62, s4 offset:73 ; encoding: [0x49,0x00,0xf3,0xe9,0x00,0x04,0x00,0x04] +tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:62, s4 offset:73 +// GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:47, 61 offset:4095 ; encoding: [0xff,0x0f,0x7b,0xe9,0x00,0x00,0x00,0xbd] +tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:47, 61 offset:4095 +// GFX10: tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:77, s4 offset:1 ; encoding: [0x01,0x00,0x6b,0xea,0x00,0x08,0x00,0x04] +tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:77, s4 offset:1 +// GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 idxen ; encoding: [0x00,0x20,0x73,0xea,0x00,0x00,0x00,0x80] +tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 idxen +// GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen ; encoding: [0x00,0x10,0x73,0xea,0x00,0x00,0x00,0x80] +tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen +// GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen offset:52 ; encoding: [0x34,0x10,0x73,0xea,0x00,0x00,0x00,0x80] +tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen offset:52 +// GFX10: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], format:78, 0 idxen offen ; encoding: [0x00,0x30,0x73,0xea,0x00,0x00,0x00,0x80] +tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], format:78, 0 idxen offen +// GFX10: tbuffer_load_format_xy v[0:1], off, s[0:3], format:77, 0 ; encoding: [0x00,0x00,0x69,0xea,0x00,0x00,0x00,0x80] +tbuffer_load_format_xy v[0:1], off, s[0:3], format:77, 0 +// GFX10: tbuffer_load_format_x v0, off, s[0:3], format:77, 0 ; encoding: [0x00,0x00,0x68,0xea,0x00,0x00,0x00,0x80] +tbuffer_load_format_x v0, off, s[0:3], format:77, 0 +// GFX10: tbuffer_store_format_d16_x v0, v1, s[4:7], format:33, 0 idxen ; encoding: [0x00,0x20,0x0c,0xe9,0x01,0x00,0x21,0x80] +tbuffer_store_format_d16_x v0, v1, s[4:7], format:33, 0 idxen +// GFX10: tbuffer_store_format_d16_xy v0, v1, s[4:7], format:33, 0 idxen ; encoding: [0x00,0x20,0x0d,0xe9,0x01,0x00,0x21,0x80] +tbuffer_store_format_d16_xy v0, v1, s[4:7], format:33, 0 idxen +// GFX10: tbuffer_store_format_d16_xyzw v[0:1], v2, s[4:7], format:33, 0 idxen ; encoding: [0x00,0x20,0x0f,0xe9,0x02,0x00,0x21,0x80] +tbuffer_store_format_d16_xyzw v[0:1], v2, s[4:7], format:33, 0 idxen +// GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:44, 0 ; encoding: [0x00,0x00,0x67,0xe9,0x00,0x00,0x00,0x80] +tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:44, 0 +// GFX10: tbuffer_store_format_xyzw v[4:7], off, s[0:3], format:61, 0 glc ; encoding: [0x00,0x40,0xef,0xe9,0x00,0x04,0x00,0x80] +tbuffer_store_format_xyzw v[4:7], off, s[0:3], format:61, 0 glc +// GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 slc ; encoding: [0x00,0x00,0x77,0xea,0x00,0x08,0x40,0x80] +tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 slc +// GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 ; encoding: [0x00,0x00,0x77,0xea,0x00,0x08,0x00,0x80] +tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 +// GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, 0 offset:42 ; encoding: [0x2a,0x00,0xaf,0xeb,0x00,0x00,0x00,0x80] +tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, 0 offset:42 +// GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, s4 offset:42 ; encoding: [0x2a,0x00,0xaf,0xeb,0x00,0x00,0x00,0x04] +tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, s4 offset:42 +// GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:47, 0 idxen ; encoding: [0x00,0x20,0x7f,0xe9,0x04,0x00,0x00,0x80] +tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:47, 0 idxen +// GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:115, 0 offen ; encoding: [0x00,0x10,0x9f,0xeb,0x04,0x00,0x00,0x80] +tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:115, 0 offen +// GFX10: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], format:70, 0 idxen offen ; encoding: [0x00,0x30,0x37,0xea,0x04,0x00,0x00,0x80] +tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], format:70, 0 idxen offen +// GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:63, 0 idxen ; encoding: [0x00,0x20,0xff,0xe9,0x04,0x00,0x00,0x80] +tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:63, 0 idxen +// GFX10: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen ; encoding: [0x00,0x20,0x77,0xe9,0x06,0x00,0x00,0x80] +tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen +// GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen ; encoding: [0x00,0x20,0xec,0xeb,0x01,0x00,0x00,0x80] +tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen +// GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen ; encoding: [0x00,0x20,0x0d,0xe9,0x02,0x00,0x00,0x80] +tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen Index: test/MC/Disassembler/AMDGPU/gfx10_mimg.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/gfx10_mimg.txt @@ -0,0 +1,311 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10 %s + +# GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00] +0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00 + +# GFX10: image_load v[0:3], v[1:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x00,0xf0,0x01,0x00,0x00,0x00] +0x08,0x1f,0x00,0xf0,0x01,0x00,0x00,0x00 + +# GFX10: image_load v[252:255], v[2:4], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x00,0xf0,0x02,0xfc,0x00,0x00] +0x10,0x1f,0x00,0xf0,0x02,0xfc,0x00,0x00 + +# GFX10: image_load v[252:255], v[3:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x00,0xf0,0x03,0xfc,0x00,0x00] +0x18,0x1f,0x00,0xf0,0x03,0xfc,0x00,0x00 + +# GFX10: image_load v[253:255], v[4:5], s[0:7] dmask:0xb dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1b,0x00,0xf0,0x04,0xfd,0x00,0x00] +0x20,0x1b,0x00,0xf0,0x04,0xfd,0x00,0x00 + +# GFX10: image_load v[254:255], v[5:7], s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x19,0x00,0xf0,0x05,0xfe,0x00,0x00] +0x28,0x19,0x00,0xf0,0x05,0xfe,0x00,0x00 + +# GFX10: image_load v255, v[6:8], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x30,0x18,0x00,0xf0,0x06,0xff,0x00,0x00] +0x30,0x18,0x00,0xf0,0x06,0xff,0x00,0x00 + +# GFX10: image_load v65, v[7:10], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x38,0x18,0x00,0xf0,0x07,0x41,0x00,0x00] +0x38,0x18,0x00,0xf0,0x07,0x41,0x00,0x00 + +# GFX10: image_load_mip v[16:19], v[8:9], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x04,0xf0,0x08,0x10,0x01,0x00] +0x00,0x1f,0x04,0xf0,0x08,0x10,0x01,0x00 + +# GFX10: image_load_pck v[16:19], v[8:9], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x08,0xf0,0x08,0x10,0x01,0x00] +0x08,0x1f,0x08,0xf0,0x08,0x10,0x01,0x00 + +# GFX10: image_load_pck_sgn v[16:19], v[8:9], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm glc ; encoding: [0x08,0x3f,0x0c,0xf0,0x08,0x10,0x01,0x00] +0x08,0x3f,0x0c,0xf0,0x08,0x10,0x01,0x00 + +# GFX10: image_load_mip_pck v[16:19], v[8:10], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm slc ; encoding: [0x08,0x1f,0x10,0xf2,0x08,0x10,0x01,0x00] +0x08,0x1f,0x10,0xf2,0x08,0x10,0x01,0x00 + +# GFX10: image_load_mip_pck_sgn v[16:19], v[8:10], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm dlc ; encoding: [0x88,0x1f,0x14,0xf0,0x08,0x10,0x01,0x00] +0x88,0x1f,0x14,0xf0,0x08,0x10,0x01,0x00 + +# TODO: This is incorrect: r128 should use a 128-bit register for srsrc +# GFX10: image_load_mip_pck_sgn v[16:19], v[8:10], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm r128 ; encoding: [0x08,0x9f,0x14,0xf0,0x08,0x10,0x01,0x00] +0x08,0x9f,0x14,0xf0,0x08,0x10,0x01,0x00 + +# GFX10: image_store v16, v[8:9], s[96:103] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x14,0x20,0xf0,0x08,0x10,0x18,0x00] +0x08,0x14,0x20,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_store v[16:17], v[8:10], s[96:103] dmask:0x5 dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x15,0x20,0xf0,0x08,0x10,0x18,0x00] +0x10,0x15,0x20,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_store v[16:18], v[8:10], s[96:103] dmask:0xd dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1d,0x20,0xf0,0x08,0x10,0x18,0x00] +0x18,0x1d,0x20,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_store v[16:19], v[8:10], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x20,0xf0,0x08,0x10,0x18,0x00] +0x28,0x1f,0x20,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_store_mip v[16:19], v[8:10], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x24,0xf0,0x08,0x10,0x18,0x00] +0x20,0x1f,0x24,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_store_pck v[16:19], v[8:10], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x28,0xf0,0x08,0x10,0x18,0x00] +0x10,0x1f,0x28,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_store_mip_pck v[16:19], v[8:11], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x2c,0xf0,0x08,0x10,0x18,0x00] +0x10,0x1f,0x2c,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_get_resinfo v[16:19], v8, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x38,0xf0,0x08,0x10,0x18,0x00] +0x28,0x1f,0x38,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_atomic_swap v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00] +0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00 + +# NOTE: Contents of unused NSA bytes are NOT preserved. + +# GFX10: image_atomic_cmpswap v[16:17], [v8, v9], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc ; encoding: [0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00] +0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c + +# GFX10: image_atomic_add v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ; encoding: [0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00] +0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c + +# GFX10: image_atomic_sub v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc ; encoding: [0x1a,0x31,0x48,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00] +0x1a,0x31,0x48,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c + +# GFX10: image_atomic_smin v16, [v8, v9], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc ; encoding: [0x22,0x31,0x50,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00] +0x22,0x31,0x50,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c + +# GFX10: image_atomic_umin v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc ; encoding: [0x2a,0x31,0x54,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00] +0x2a,0x31,0x54,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c + +# GFX10: image_atomic_smax v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; encoding: [0x32,0x31,0x58,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00] +0x32,0x31,0x58,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c + +# GFX10: image_atomic_umax v16, [v8, v9, v10, v11], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc ; encoding: [0x3a,0x31,0x5c,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x00] +0x3a,0x31,0x5c,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c + +# GFX10: image_atomic_and v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x60,0xf0,0x08,0x10,0x18,0x00] +0x00,0x31,0x60,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_atomic_or v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x64,0xf0,0x08,0x10,0x18,0x00] +0x00,0x31,0x64,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_atomic_xor v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x68,0xf0,0x08,0x10,0x18,0x00] +0x00,0x31,0x68,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_atomic_inc v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x6c,0xf0,0x08,0x10,0x18,0x00] +0x00,0x31,0x6c,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_atomic_dec v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x70,0xf0,0x08,0x10,0x18,0x00] +0x00,0x31,0x70,0xf0,0x08,0x10,0x18,0x00 + +# TODO: image_atomic_fcmpswap +# TODO: image_atomic_fmin +# TODO: image_atomic_fmax + +# GFX10: image_sample v[16:19], v8, s[96:103], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x80,0xf0,0x08,0x10,0x18,0x00] +0x00,0x0f,0x80,0xf0,0x08,0x10,0x18,0x00 + +# GFX10: image_sample_cl v[16:17], [v8, v9, v10], s[96:103], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x0a,0x0f,0x84,0xf0,0x08,0x10,0x18,0x80,0x09,0x0a,0x00,0x00] +0x0a,0x0f,0x84,0xf0,0x08,0x10,0x18,0x80,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_d v[16:19], [v8, v9, v10], s[96:103], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x88,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00] +0x02,0x0f,0x88,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_d_cl v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[4:7] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x0f,0x8c,0xf0,0x08,0x10,0x25,0x00,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00] +0x0c,0x0f,0x8c,0xf0,0x08,0x10,0x25,0x00,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_l v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x90,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x12,0x0f,0x90,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_b v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x94,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x1a,0x0f,0x94,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_b_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x98,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x22,0x0f,0x98,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_lz v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x9c,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x2a,0x0f,0x9c,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0xa0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x0a,0x0f,0xa0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c_cl v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0xa4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x12,0x0f,0xa4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c_d v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16, v17], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x0f,0xa8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x00,0x00,0x00] +0x16,0x0f,0xa8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14 + +# GFX10: image_sample_c_d_cl v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0xac,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10] +0x1c,0x0f,0xac,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_c_l v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0xb0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x1a,0x0f,0xb0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c_b v[16:19], v[8:15], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03] +0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03 + +# TODO: address of this instruction is v[250:255], but this register class does +# not exist, and the next-larger size goes beyond the last register, so +# the disassembly is not adjusted properly +# GFX10: image_sample_c_b_cl v16, v[250:252], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03] +0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03 + +# GFX10: image_sample_c_lz v[16:19], v[253:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03] +0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03 + +# GFX10: image_sample_o v[16:19], v[252:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x0f,0xc0,0xf0,0xfc,0x10,0x25,0x03] +0x28,0x0f,0xc0,0xf0,0xfc,0x10,0x25,0x03 + +# GFX10: image_sample_cl_o v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0xc4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x02,0x0f,0xc4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_d_o v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x0f,0xc8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00] +0x0c,0x0f,0xc8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_d_cl_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x0f,0xcc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x00,0x00] +0x16,0x0f,0xcc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14 + +# GFX10: image_sample_l_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0xd0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x1a,0x0f,0xd0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_b_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0xd4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x22,0x0f,0xd4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_b_cl_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x0f,0xd8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00] +0x2c,0x0f,0xd8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_lz_o v[16:19], [v8, v9], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0xdc,0xf0,0x08,0x10,0x25,0x03,0x09,0x00,0x00,0x00] +0x02,0x0f,0xdc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0xe0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x0a,0x0f,0xe0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c_cl_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x0f,0xe4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00] +0x14,0x0f,0xe4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_c_d_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0xe8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10] +0x1c,0x0f,0xe8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_c_d_cl_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x0f,0xec,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x00] +0x16,0x0f,0xec,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14 + +# GFX10: image_sample_c_l_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0xf0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x22,0x0f,0xf0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c_b_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x0f,0xf4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00] +0x2c,0x0f,0xf4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_c_b_cl_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0xf8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x02,0x0f,0xf8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c_lz_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0xfc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x0a,0x0f,0xfc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4 v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_l v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x10,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x22,0x0f,0x10,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_b v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x14,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x2a,0x0f,0x14,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_b_cl v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x18,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x02,0x0f,0x18,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_lz v[16:19], [v8, v9], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0x1c,0xf1,0x08,0x10,0x25,0x03,0x09,0x00,0x00,0x00] +0x0a,0x0f,0x1c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x20,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x12,0x0f,0x20,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_cl v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x24,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x1a,0x0f,0x24,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_l v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x30,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x22,0x0f,0x30,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_b v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x34,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x2a,0x0f,0x34,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_b_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x38,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x02,0x0f,0x38,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_lz v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0x3c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x0a,0x0f,0x3c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x40,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x12,0x0f,0x40,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_cl_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x44,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x1a,0x0f,0x44,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_l_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x50,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x22,0x0f,0x50,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_b_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x54,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x2a,0x0f,0x54,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_b_cl_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x58,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x02,0x0f,0x58,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_lz_o v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0x5c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x0a,0x0f,0x5c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x60,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x12,0x0f,0x60,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_cl_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0x64,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00] +0x1c,0x0f,0x64,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_gather4_c_l_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x70,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x22,0x0f,0x70,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_b_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x0f,0x74,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00] +0x2c,0x0f,0x74,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_gather4_c_b_cl_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x78,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x02,0x0f,0x78,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_gather4_c_lz_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0x7c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] +0x0a,0x0f,0x7c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_get_lod v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x80,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] +0x12,0x0f,0x80,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_cd v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0xa0,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00] +0x1c,0x0f,0xa0,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_cd_cl v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0xa4,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x22,0x0f,0xa4,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_c_cd v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x0f,0xa8,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x00] +0x2c,0x0f,0xa8,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_c_cd_cl v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0xac,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c] +0x02,0x0f,0xac,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c + +# GFX10: image_sample_cd_o v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x0f,0xb0,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00] +0x0c,0x0f,0xb0,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_cd_cl_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x0f,0xb4,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x00,0x00] +0x16,0x0f,0xb4,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14 + +# GFX10: image_sample_c_cd_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0xb8,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10] +0x1c,0x0f,0xb8,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 + +# GFX10: image_sample_c_cd_cl_o v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x24,0x0f,0xbc,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00] +0x24,0x0f,0xbc,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 Index: test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt @@ -0,0 +1,69 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX10 + +# GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0 +0x00,0x00,0xb0,0xe8,0x00,0x00,0x20,0x80 +# GFX10: tbuffer_load_format_d16_xy v0, off, s[0:3], format:22, 0 +0x00,0x00,0xb1,0xe8,0x00,0x00,0x20,0x80 +# GFX10: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], format:22, 0 +0x00,0x00,0xb3,0xe8,0x00,0x00,0x20,0x80 +# GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 +0x00,0x00,0x73,0xea,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:22, 0 slc +0x00,0x00,0xb3,0xe8,0x00,0x08,0x40,0x80 +# GFX10: tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:63, 0 glc +0x00,0x40,0xfb,0xe9,0x00,0x04,0x00,0x80 +# GFX10: tbuffer_load_format_xyzw v[12:15], off, s[0:3], format:23, 0 glc dlc +0x00,0xc0,0xbb,0xe8,0x00,0x0c,0x00,0x80 +# GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 offset:42 +0x2a,0x00,0x73,0xea,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:62, s4 offset:73 +0x49,0x00,0xf3,0xe9,0x00,0x04,0x00,0x04 +# GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:47, 61 offset:4095 +0xff,0x0f,0x7b,0xe9,0x00,0x00,0x00,0xbd +# GFX10: tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:77, s4 offset:1 +0x01,0x00,0x6b,0xea,0x00,0x08,0x00,0x04 +# GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 idxen +0x00,0x20,0x73,0xea,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen +0x00,0x10,0x73,0xea,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen offset:52 +0x34,0x10,0x73,0xea,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], format:78, 0 idxen offen +0x00,0x30,0x73,0xea,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_load_format_xy v[0:1], off, s[0:3], format:77, 0 +0x00,0x00,0x69,0xea,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_load_format_x v0, off, s[0:3], format:77, 0 +0x00,0x00,0x68,0xea,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_d16_x v0, v1, s[4:7], format:33, 0 idxen +0x00,0x20,0x0c,0xe9,0x01,0x00,0x21,0x80 +# GFX10: tbuffer_store_format_d16_xy v0, v1, s[4:7], format:33, 0 idxen +0x00,0x20,0x0d,0xe9,0x01,0x00,0x21,0x80 +# GFX10: tbuffer_store_format_d16_xyzw v[0:1], v2, s[4:7], format:33, 0 idxen +0x00,0x20,0x0f,0xe9,0x02,0x00,0x21,0x80 +# GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:44, 0 +0x00,0x00,0x67,0xe9,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_xyzw v[4:7], off, s[0:3], format:61, 0 glc +0x00,0x40,0xef,0xe9,0x00,0x04,0x00,0x80 +# GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 slc +0x00,0x00,0x77,0xea,0x00,0x08,0x40,0x80 +# GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 +0x00,0x00,0x77,0xea,0x00,0x08,0x00,0x80 +# GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, 0 offset:42 +0x2a,0x00,0xaf,0xeb,0x00,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, s4 offset:42 +0x2a,0x00,0xaf,0xeb,0x00,0x00,0x00,0x04 +# GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:47, 0 idxen +0x00,0x20,0x7f,0xe9,0x04,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:115, 0 offen +0x00,0x10,0x9f,0xeb,0x04,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], format:70, 0 idxen offen +0x00,0x30,0x37,0xea,0x04,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:63, 0 idxen +0x00,0x20,0xff,0xe9,0x04,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen +0x00,0x20,0x77,0xe9,0x06,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen +0x00,0x20,0xec,0xeb,0x01,0x00,0x00,0x80 +# GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen +0x00,0x20,0x0d,0xe9,0x02,0x00,0x00,0x80 +