diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -363,6 +363,12 @@ "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9" >; +def FeatureA16 : SubtargetFeature<"a16", + "HasA16", + "true", + "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx10" +>; + def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", "HasNSAEncoding", "true", @@ -682,7 +688,8 @@ FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking, FeatureVOP3Literal, FeatureDPP8, - FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC + FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC, + FeatureA16 ] >; @@ -1094,6 +1101,9 @@ def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, AssemblerPredicate<"FeatureR128A16">; +def HasA16 : Predicate<"Subtarget->hasA16()">, + AssemblerPredicate<"FeatureA16">; + def HasDPP16 : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -342,6 +342,7 @@ bool HasDPP; bool HasDPP8; bool HasR128A16; + bool HasA16; bool HasNSAEncoding; bool HasDLInsts; bool HasDot1Insts; @@ -988,6 +989,10 @@ return HasR128A16; } + bool hasA16() const { + return HasA16; + } + bool hasOffset3fBug() const { return HasOffset3fBug; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -241,6 +241,7 @@ HasDPP(false), HasDPP8(false), HasR128A16(false), + HasA16(false), HasNSAEncoding(false), HasDLInsts(false), HasDot1Insts(false), diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -163,6 +163,7 @@ ImmTyUNorm, ImmTyDA, ImmTyR128A16, + ImmTyA16, ImmTyLWE, ImmTyExpTgt, ImmTyExpCompr, @@ -315,6 +316,7 @@ bool isUNorm() const { return isImmTy(ImmTyUNorm); } bool isDA() const { return isImmTy(ImmTyDA); } bool isR128A16() const { return isImmTy(ImmTyR128A16); } + bool isA16() const { return isImmTy(ImmTyA16); } bool isLWE() const { return isImmTy(ImmTyLWE); } bool isOff() const { return isImmTy(ImmTyOff); } bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } @@ -847,6 +849,7 @@ case ImmTyUNorm: OS << "UNorm"; break; case ImmTyDA: OS << "DA"; break; case ImmTyR128A16: OS << "R128A16"; break; + case ImmTyA16: OS << "A16"; break; case ImmTyLWE: OS << "LWE"; break; case ImmTyOff: OS << "Off"; break; case ImmTyExpTgt: OS << "ExpTgt"; break; @@ -1157,6 +1160,10 @@ return AMDGPU::hasPackedD16(getSTI()); } + bool hasA16() const { + return AMDGPU::hasA16(getSTI()); + } + bool isSI() const { return AMDGPU::isSI(getSTI()); } @@ -4650,9 +4657,9 @@ case AsmToken::Identifier: { StringRef Tok = Parser.getTok().getString(); if (Tok == Name) { - if (Tok == "r128" && isGFX9()) + if (Tok == "r128" && !hasMIMG_R128()) Error(S, "r128 modifier is not supported on this GPU"); - if (Tok == "a16" && !isGFX9() && !isGFX10()) + if (Tok == "a16" && !isGFX9() && !hasA16()) Error(S, "a16 modifier is not supported on this GPU"); Bit = 1; Parser.Lex(); @@ -4672,6 +4679,9 @@ if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) return MatchOperand_ParseFail; + if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) + ImmTy = AMDGPUOperand::ImmTyR128A16; + Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); return MatchOperand_Success; } @@ -5987,6 +5997,8 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); + if (IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); if (!IsGFX10) @@ -6096,7 +6108,7 @@ {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, - {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, + {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -86,6 +86,8 @@ raw_ostream &O); void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printA16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printD16(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -244,6 +244,11 @@ printNamedBit(MI, OpNo, O, "r128"); } +void AMDGPUInstPrinter::printA16(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + printNamedBit(MI, OpNo, O, "a16"); +} + void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "lwe"); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -238,9 +238,9 @@ : MIMG_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + SLC:$slc, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -251,9 +251,9 @@ let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + SLC:$slc, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -331,9 +331,9 @@ : MIMG_gfx10 { let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + GLC:$glc, SLC:$slc, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -345,9 +345,9 @@ AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + SLC:$slc, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -436,8 +436,8 @@ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe); - let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"; + GLC:$glc, SLC:$slc, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe); + let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"; } class MIMG_Atomic_nsa_gfx10 { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + GLC:$glc, SLC:$slc, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm" - #"$dlc$glc$slc$r128$tfe$lwe" + #"$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -536,10 +536,10 @@ let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + SLC:$slc, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm" - #"$dlc$glc$slc$r128$tfe$lwe" + #"$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5424,7 +5424,7 @@ const MVT VAddrScalarVT = VAddrVT.getScalarType(); if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16))) { // Illegal to use a16 images - if (!ST->hasFeature(AMDGPU::FeatureR128A16)) + if (!ST->hasFeature(AMDGPU::FeatureR128A16) && !ST->hasFeature(AMDGPU::FeatureA16)) return Op; IsA16 = true; @@ -5568,10 +5568,12 @@ Ops.push_back(DLC); Ops.push_back(GLC); Ops.push_back(SLC); - Ops.push_back(IsA16 && // a16 or r128 + Ops.push_back(IsA16 && // r128, a16 for gfx9 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False); - Ops.push_back(TFE); // tfe - Ops.push_back(LWE); // lwe + if (IsGFX10) + Ops.push_back(IsA16 ? True : False); + Ops.push_back(TFE); + Ops.push_back(LWE); if (!IsGFX10) Ops.push_back(DimInfo->DA ? True : False); if (BaseOpcode->HasD16) diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -303,7 +303,7 @@ bits<3> dim; bits<2> nsa; bits<1> dlc; - bits<1> a16 = 0; // TODO: this should be an operand + bits<1> a16; let Inst{0} = op{7}; let Inst{2-1} = nsa; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3742,12 +3742,24 @@ return false; } + bool IsA16 = false; + if (ST.hasR128A16()) { + const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128); + IsA16 = R128A16->getImm() != 0; + } else if (ST.hasA16()) { + const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16); + IsA16 = A16->getImm() != 0; + } + bool IsNSA = SRsrcIdx - VAddr0Idx > 1; - unsigned AddrWords = BaseOpcode->NumExtraArgs + + unsigned AddrComponents = (BaseOpcode->Gradients ? Dim->NumGradients : 0) + (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + (BaseOpcode->LodOrClampOrMip ? 1 : 0); + unsigned AddrWords = BaseOpcode->NumExtraArgs + + (IsA16 ? (AddrComponents + 1) / 2 : AddrComponents); + unsigned VAddrWords; if (IsNSA) { VAddrWords = SRsrcIdx - VAddr0Idx; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1090,6 +1090,7 @@ def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; +def A16 : NamedOperandBit<"A16", NamedMatchClass<"A16">>; def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>; def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -686,7 +686,8 @@ // Check other optional immediate operands for equality. unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc, AMDGPU::OpName::d16, AMDGPU::OpName::unorm, - AMDGPU::OpName::da, AMDGPU::OpName::r128}; + AMDGPU::OpName::da, AMDGPU::OpName::r128, + AMDGPU::OpName::a16}; for (auto op : OperandsToMatch) { int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -551,6 +551,7 @@ bool hasXNACK(const MCSubtargetInfo &STI); bool hasSRAMECC(const MCSubtargetInfo &STI); bool hasMIMG_R128(const MCSubtargetInfo &STI); +bool hasA16(const MCSubtargetInfo &STI); bool hasPackedD16(const MCSubtargetInfo &STI); bool isSI(const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -927,7 +927,11 @@ } bool hasMIMG_R128(const MCSubtargetInfo &STI) { - return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; + return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16]; +} + +bool hasA16(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureA16]; } bool hasPackedD16(const MCSubtargetInfo &STI) { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll @@ -0,0 +1,959 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s + +define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_2d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %t = extractelement <2 x i16> %coords, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_3d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %r = extractelement <2 x i16> %coords_hi, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_cube: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_1darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %slice = extractelement <2 x i16> %coords, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_2darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_2dmsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x30,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %fragid = extractelement <2 x i16> %coords_hi, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_2darraymsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x38,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + %fragid = extractelement <2 x i16> %coords_hi, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_mip_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load_mip v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %mip = extractelement <2 x i16> %coords, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_mip_2d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %mip = extractelement <2 x i16> %coords_hi, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_mip_3d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %r = extractelement <2 x i16> %coords_hi, i32 0 + %mip = extractelement <2 x i16> %coords_hi, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_mip_cube: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + %mip = extractelement <2 x i16> %coords_hi, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_mip_1darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %slice = extractelement <2 x i16> %coords_lo, i32 1 + %mip = extractelement <2 x i16> %coords_hi, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: load_mip_2darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_mip_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + %mip = extractelement <2 x i16> %coords_hi, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_2d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %t = extractelement <2 x i16> %coords, i32 1 + call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_3d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %r = extractelement <2 x i16> %coords_hi, i32 0 + call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_cube: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_1darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %slice = extractelement <2 x i16> %coords, i32 1 + call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_2darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_2dmsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x30,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %fragid = extractelement <2 x i16> %coords_hi, i32 0 + call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_2darraymsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x38,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + %fragid = extractelement <2 x i16> %coords_hi, i32 1 + call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_mip_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_mip_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %mip = extractelement <2 x i16> %coords, i32 1 + call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_mip_2d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_mip_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %mip = extractelement <2 x i16> %coords_hi, i32 0 + call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_mip_3d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_mip_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %r = extractelement <2 x i16> %coords_hi, i32 0 + %mip = extractelement <2 x i16> %coords_hi, i32 1 + call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_mip_cube: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_mip_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + %mip = extractelement <2 x i16> %coords_hi, i32 1 + call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_mip_1darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_mip_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %slice = extractelement <2 x i16> %coords_lo, i32 1 + %mip = extractelement <2 x i16> %coords_hi, i32 0 + call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +; GFX9-LABEL: store_mip_2darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_mip_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords_lo, i32 0 + %t = extractelement <2 x i16> %coords_lo, i32 1 + %slice = extractelement <2 x i16> %coords_hi, i32 0 + %mip = extractelement <2 x i16> %coords_hi, i32 1 + call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: getresinfo_1d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_1d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: getresinfo_2d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: getresinfo_3d: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_3d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: getresinfo_cube: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: getresinfo_1darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_1darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: getresinfo_2darray: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: getresinfo_2dmsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_2dmsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x30,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: getresinfo_2darraymsaa: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_2darraymsaa: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x38,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + +define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_1d_V1: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm a16 ; encoding: [0x00,0x98,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_V1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x18,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret float %v +} + +define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_1d_V2: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm a16 ; encoding: [0x00,0x99,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_V2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x19,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret <2 x float> %v +} + +define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_1d_V1: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm a16 ; encoding: [0x00,0x92,0x20,0xf0,0x01,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_1d_V1: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x12,0x20,0xf0,0x01,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_1d_V2: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm a16 ; encoding: [0x00,0x9c,0x20,0xf0,0x02,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_1d_V2: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1c,0x20,0xf0,0x02,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_1d_glc: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc a16 ; encoding: [0x00,0xbf,0x00,0xf0,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_glc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 ; encoding: [0x00,0x3f,0x00,0xf0,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_1d_slc: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc a16 ; encoding: [0x00,0x9f,0x00,0xf2,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16 ; encoding: [0x00,0x1f,0x00,0xf2,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) + ret <4 x float> %v +} + +define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +; GFX9-LABEL: load_1d_glc_slc: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc a16 ; encoding: [0x00,0xbf,0x00,0xf2,0x00,0x00,0x00,0x00] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: load_1d_glc_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16 ; encoding: [0x00,0x3f,0x00,0xf2,0x00,0x00,0x00,0x40] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] +; GFX10-NEXT: ; return to shader part epilog +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3) + ret <4 x float> %v +} + +define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_1d_glc: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc a16 ; encoding: [0x00,0xbf,0x20,0xf0,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_1d_glc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 ; encoding: [0x00,0x3f,0x20,0xf0,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1) + ret void +} + +define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_1d_slc: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc a16 ; encoding: [0x00,0x9f,0x20,0xf2,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_1d_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16 ; encoding: [0x00,0x1f,0x20,0xf2,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) + ret void +} + +define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +; GFX9-LABEL: store_1d_glc_slc: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc a16 ; encoding: [0x00,0xbf,0x20,0xf2,0x04,0x00,0x00,0x00] +; GFX9-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +; +; GFX10-LABEL: store_1d_glc_slc: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16 ; encoding: [0x00,0x3f,0x20,0xf2,0x04,0x00,0x00,0x40] +; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +main_body: + %s = extractelement <2 x i16> %coords, i32 0 + call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3) + ret void +} + +define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { +; GFX9-LABEL: getresinfo_dmask0: +; GFX9: ; %bb.0: ; %main_body +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: getresinfo_dmask0: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog +main_body: + %mip = extractelement <2 x i16> %coords, i32 0 + %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %r +} + +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1 + +declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0 + +declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0 + +declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 +declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 + +declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) #1 +declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1 +declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) #1 +declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir --- a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir +++ b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir @@ -19,9 +19,9 @@ renamable $sgpr8 = COPY killed renamable $sgpr2 renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr8_sgpr9, 144, 0, 0 :: (invariant load 32, align 16, addrspace 4) renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = COPY killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 - renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16) + renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir --- a/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir @@ -8,7 +8,7 @@ name: hazard_image_sample_d_buf_off6 body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec ... @@ -19,7 +19,7 @@ name: no_hazard_image_sample_d_buf_off1 body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, 0, 0, implicit $exec ... @@ -31,7 +31,7 @@ name: no_hazard_image_sample_d_buf_far body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec V_NOP_e32 implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec ... @@ -44,7 +44,7 @@ name: no_hazard_image_sample_v4_v2_buf_off6 body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec ... @@ -56,6 +56,6 @@ name: no_hazard_image_sample_v4_v3_buf_off6 body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec ...