Diff 162870

llvm/trunk/lib/Target/AMDGPU/AMDGPU.td

Show First 20 Lines • Show All 236 Lines • ▼ Show 20 Lines
>;		>;

def FeatureDPP : SubtargetFeature<"dpp",		def FeatureDPP : SubtargetFeature<"dpp",
"HasDPP",		"HasDPP",
"true",		"true",
"Support DPP (Data Parallel Primitives) extension"		"Support DPP (Data Parallel Primitives) extension"
>;		>;

		def FeatureR128A16 : SubtargetFeature<"r128-a16",
		"HasR128A16",
		"true",
		"Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
		>;

def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",		def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",		"HasIntClamp",
"true",		"true",
"Support clamp for integer destination"		"Support clamp for integer destination"
>;		>;

def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",		def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
"HasUnpackedD16VMem",		"HasUnpackedD16VMem",
▲ Show 20 Lines • Show All 186 Lines • ▼ Show 20 Lines	def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
[FeatureFP64, FeatureLocalMemorySize65536,		[FeatureFP64, FeatureLocalMemorySize65536,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,		FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,		FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,		FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,		FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,		FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,		FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,		FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureScalarAtomics		FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
]		]
>;		>;

class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,		class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
list<SubtargetFeature> Implies>		list<SubtargetFeature> Implies>
: SubtargetFeature <		: SubtargetFeature <
"isaver"#Major#"."#Minor#"."#Stepping,		"isaver"#Major#"."#Minor#"."#Stepping,
"IsaVersion",		"IsaVersion",
▲ Show 20 Lines • Show All 242 Lines • ▼ Show 20 Lines	def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;		AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;

def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,		def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureSDWA,FeatureGFX9">;		AssemblerPredicate<"FeatureSDWA,FeatureGFX9">;

def HasDPP : Predicate<"Subtarget->hasDPP()">,		def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureDPP">;		AssemblerPredicate<"FeatureDPP">;

		def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
		AssemblerPredicate<"FeatureR128A16">;

def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,		def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
AssemblerPredicate<"FeatureIntClamp">;		AssemblerPredicate<"FeatureIntClamp">;

def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,		def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
AssemblerPredicate<"FeatureMadMixInsts">;		AssemblerPredicate<"FeatureMadMixInsts">;

def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,		def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
AssemblerPredicate<"FeatureScalarAtomics">;		AssemblerPredicate<"FeatureScalarAtomics">;
Show All 30 Lines

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h

Show First 20 Lines • Show All 352 Lines • ▼ Show 20 Lines	protected:
bool HasScalarStores;		bool HasScalarStores;
bool HasScalarAtomics;		bool HasScalarAtomics;
bool HasSDWAOmod;		bool HasSDWAOmod;
bool HasSDWAScalar;		bool HasSDWAScalar;
bool HasSDWASdst;		bool HasSDWASdst;
bool HasSDWAMac;		bool HasSDWAMac;
bool HasSDWAOutModsVOPC;		bool HasSDWAOutModsVOPC;
bool HasDPP;		bool HasDPP;
		bool HasR128A16;
bool HasDLInsts;		bool HasDLInsts;
bool D16PreservesUnusedBits;		bool D16PreservesUnusedBits;
bool FlatAddressSpace;		bool FlatAddressSpace;
bool FlatInstOffsets;		bool FlatInstOffsets;
bool FlatGlobalInsts;		bool FlatGlobalInsts;
bool FlatScratchInsts;		bool FlatScratchInsts;
bool AddNoCarryInsts;		bool AddNoCarryInsts;
bool HasUnpackedD16VMem;		bool HasUnpackedD16VMem;
▲ Show 20 Lines • Show All 417 Lines • ▼ Show 20 Lines	bool hasScalarAtomics() const {
return HasScalarAtomics;		return HasScalarAtomics;
}		}


bool hasDPP() const {		bool hasDPP() const {
return HasDPP;		return HasDPP;
}		}

		bool hasR128A16() const {
		return HasR128A16;
		}

bool enableSIScheduler() const {		bool enableSIScheduler() const {
return EnableSIScheduler;		return EnableSIScheduler;
}		}

bool debuggerSupported() const {		bool debuggerSupported() const {
return debuggerInsertNops() && debuggerEmitPrologue();		return debuggerInsertNops() && debuggerEmitPrologue();
}		}

▲ Show 20 Lines • Show All 270 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Show First 20 Lines • Show All 191 Lines • ▼ Show 20 Lines	GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasScalarStores(false),		HasScalarStores(false),
HasScalarAtomics(false),		HasScalarAtomics(false),
HasSDWAOmod(false),		HasSDWAOmod(false),
HasSDWAScalar(false),		HasSDWAScalar(false),
HasSDWASdst(false),		HasSDWASdst(false),
HasSDWAMac(false),		HasSDWAMac(false),
HasSDWAOutModsVOPC(false),		HasSDWAOutModsVOPC(false),
HasDPP(false),		HasDPP(false),
		HasR128A16(false),
HasDLInsts(false),		HasDLInsts(false),
D16PreservesUnusedBits(false),		D16PreservesUnusedBits(false),
FlatAddressSpace(false),		FlatAddressSpace(false),
FlatInstOffsets(false),		FlatInstOffsets(false),
FlatGlobalInsts(false),		FlatGlobalInsts(false),
FlatScratchInsts(false),		FlatScratchInsts(false),
AddNoCarryInsts(false),		AddNoCarryInsts(false),
HasUnpackedD16VMem(false),		HasUnpackedD16VMem(false),
▲ Show 20 Lines • Show All 488 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Show First 20 Lines • Show All 150 Lines • ▼ Show 20 Lines	enum ImmTy {
ImmTyDppBoundCtrl,		ImmTyDppBoundCtrl,
ImmTySdwaDstSel,		ImmTySdwaDstSel,
ImmTySdwaSrc0Sel,		ImmTySdwaSrc0Sel,
ImmTySdwaSrc1Sel,		ImmTySdwaSrc1Sel,
ImmTySdwaDstUnused,		ImmTySdwaDstUnused,
ImmTyDMask,		ImmTyDMask,
ImmTyUNorm,		ImmTyUNorm,
ImmTyDA,		ImmTyDA,
ImmTyR128,		ImmTyR128A16,
ImmTyLWE,		ImmTyLWE,
ImmTyExpTgt,		ImmTyExpTgt,
ImmTyExpCompr,		ImmTyExpCompr,
ImmTyExpVM,		ImmTyExpVM,
ImmTyFORMAT,		ImmTyFORMAT,
ImmTyHwreg,		ImmTyHwreg,
ImmTyOff,		ImmTyOff,
ImmTySendMsg,		ImmTySendMsg,
▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines	bool isImmModifier() const {
return isImm() && Imm.Type != ImmTyNone;		return isImm() && Imm.Type != ImmTyNone;
}		}

bool isClampSI() const { return isImmTy(ImmTyClampSI); }		bool isClampSI() const { return isImmTy(ImmTyClampSI); }
bool isOModSI() const { return isImmTy(ImmTyOModSI); }		bool isOModSI() const { return isImmTy(ImmTyOModSI); }
bool isDMask() const { return isImmTy(ImmTyDMask); }		bool isDMask() const { return isImmTy(ImmTyDMask); }
bool isUNorm() const { return isImmTy(ImmTyUNorm); }		bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }		bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128() const { return isImmTy(ImmTyR128); }		bool isR128A16() const { return isImmTy(ImmTyR128A16); }
bool isLWE() const { return isImmTy(ImmTyLWE); }		bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }		bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }		bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
bool isExpVM() const { return isImmTy(ImmTyExpVM); }		bool isExpVM() const { return isImmTy(ImmTyExpVM); }
bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }		bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
bool isOffen() const { return isImmTy(ImmTyOffen); }		bool isOffen() const { return isImmTy(ImmTyOffen); }
bool isIdxen() const { return isImmTy(ImmTyIdxen); }		bool isIdxen() const { return isImmTy(ImmTyIdxen); }
bool isAddr64() const { return isImmTy(ImmTyAddr64); }		bool isAddr64() const { return isImmTy(ImmTyAddr64); }
▲ Show 20 Lines • Show All 371 Lines • ▼ Show 20 Lines	static void printImmTy(raw_ostream& OS, ImmTy Type) {
case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;		case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;		case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;		case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;		case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;		case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
case ImmTyDMask: OS << "DMask"; break;		case ImmTyDMask: OS << "DMask"; break;
case ImmTyUNorm: OS << "UNorm"; break;		case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;		case ImmTyDA: OS << "DA"; break;
case ImmTyR128: OS << "R128"; break;		case ImmTyR128A16: OS << "R128A16"; break;
case ImmTyLWE: OS << "LWE"; break;		case ImmTyLWE: OS << "LWE"; break;
case ImmTyOff: OS << "Off"; break;		case ImmTyOff: OS << "Off"; break;
case ImmTyExpTgt: OS << "ExpTgt"; break;		case ImmTyExpTgt: OS << "ExpTgt"; break;
case ImmTyExpCompr: OS << "ExpCompr"; break;		case ImmTyExpCompr: OS << "ExpCompr"; break;
case ImmTyExpVM: OS << "ExpVM"; break;		case ImmTyExpVM: OS << "ExpVM"; break;
case ImmTyHwreg: OS << "Hwreg"; break;		case ImmTyHwreg: OS << "Hwreg"; break;
case ImmTySendMsg: OS << "SendMsg"; break;		case ImmTySendMsg: OS << "SendMsg"; break;
case ImmTyInterpSlot: OS << "InterpSlot"; break;		case ImmTyInterpSlot: OS << "InterpSlot"; break;
▲ Show 20 Lines • Show All 395 Lines • ▼ Show 20 Lines	private:

bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);		bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
bool validateConstantBusLimitations(const MCInst &Inst);		bool validateConstantBusLimitations(const MCInst &Inst);
bool validateEarlyClobberLimitations(const MCInst &Inst);		bool validateEarlyClobberLimitations(const MCInst &Inst);
bool validateIntClampSupported(const MCInst &Inst);		bool validateIntClampSupported(const MCInst &Inst);
bool validateMIMGAtomicDMask(const MCInst &Inst);		bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);		bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);		bool validateMIMGDataSize(const MCInst &Inst);
bool validateMIMGR128(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);		bool validateMIMGD16(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);		bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;		bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;		unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;

bool trySkipId(const StringRef Id);		bool trySkipId(const StringRef Id);
bool trySkipToken(const AsmToken::TokenKind Kind);		bool trySkipToken(const AsmToken::TokenKind Kind);
bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);		bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
▲ Show 20 Lines • Show All 1,338 Lines • ▼ Show 20 Lines	bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
// GATHER4 instructions use dmask in a different fashion compared to		// GATHER4 instructions use dmask in a different fashion compared to
// other MIMG instructions. The only useful DMASK values are		// other MIMG instructions. The only useful DMASK values are
// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns		// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
// (red,red,red,red) etc.) The ISA document doesn't mention		// (red,red,red,red) etc.) The ISA document doesn't mention
// this.		// this.
return DMask == 0x1 \|\| DMask == 0x2 \|\| DMask == 0x4 \|\| DMask == 0x8;		return DMask == 0x1 \|\| DMask == 0x2 \|\| DMask == 0x4 \|\| DMask == 0x8;
}		}

bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {

const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);

if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
return true;

int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
assert(Idx != -1);

bool R128 = (Inst.getOperand(Idx).getImm() != 0);

return !R128 \|\| hasMIMG_R128();
}

bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {		bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {

const unsigned Opc = Inst.getOpcode();		const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);		const MCInstrDesc &Desc = MII.get(Opc);

if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)		if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
return true;		return true;

Show All 18 Lines	Error(IDLoc,
"destination must be different than all sources");		"destination must be different than all sources");
return false;		return false;
}		}
if (!validateIntClampSupported(Inst)) {		if (!validateIntClampSupported(Inst)) {
Error(IDLoc,		Error(IDLoc,
"integer clamping is not supported on this GPU");		"integer clamping is not supported on this GPU");
return false;		return false;
}		}
if (!validateMIMGR128(Inst)) {
Error(IDLoc,
"r128 modifier is not supported on this GPU");
return false;
}
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.		// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {		if (!validateMIMGD16(Inst)) {
Error(IDLoc,		Error(IDLoc,
"d16 modifier is not supported on this GPU");		"d16 modifier is not supported on this GPU");
return false;		return false;
}		}
if (!validateMIMGDataSize(Inst)) {		if (!validateMIMGDataSize(Inst)) {
Error(IDLoc,		Error(IDLoc,
▲ Show 20 Lines • Show All 947 Lines • ▼ Show 20 Lines	AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,

// We are at the end of the statement, and this is a default argument, so		// We are at the end of the statement, and this is a default argument, so
// use a default value.		// use a default value.
if (getLexer().isNot(AsmToken::EndOfStatement)) {		if (getLexer().isNot(AsmToken::EndOfStatement)) {
switch(getLexer().getKind()) {		switch(getLexer().getKind()) {
case AsmToken::Identifier: {		case AsmToken::Identifier: {
StringRef Tok = Parser.getTok().getString();		StringRef Tok = Parser.getTok().getString();
if (Tok == Name) {		if (Tok == Name) {
		if (Tok == "r128" && isGFX9())
		Error(S, "r128 modifier is not supported on this GPU");
		if (Tok == "a16" && !isGFX9())
		Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;		Bit = 1;
Parser.Lex();		Parser.Lex();
} else if (Tok.startswith("no") && Tok.endswith(Name)) {		} else if (Tok.startswith("no") && Tok.endswith(Name)) {
Bit = 0;		Bit = 0;
Parser.Lex();		Parser.Lex();
} else {		} else {
return MatchOperand_NoMatch;		return MatchOperand_NoMatch;
}		}
▲ Show 20 Lines • Show All 1,226 Lines • ▼ Show 20 Lines	if (Op.isReg()) {
llvm_unreachable("unexpected operand type");		llvm_unreachable("unexpected operand type");
}		}
}		}

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);		addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
}		}

void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {		void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
cvtMIMG(Inst, Operands, true);		cvtMIMG(Inst, Operands, true);
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines	static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},		{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},		{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},		{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"high", AMDGPUOperand::ImmTyHigh, true, nullptr},		{"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
{"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},		{"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},		{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},		{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},		{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
{"r128", AMDGPUOperand::ImmTyR128, true, nullptr},		{"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
		{"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},		{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},		{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},		{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
{"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},		{"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
{"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},		{"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
{"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},		{"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
{"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},		{"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},		{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
▲ Show 20 Lines • Show All 729 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h

Show First 20 Lines • Show All 74 Lines • ▼ Show 20 Lines	private:
void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,		void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);		raw_ostream &O);
void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,		void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);		raw_ostream &O);
void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,		void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);		raw_ostream &O);
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,		void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);		raw_ostream &O);
void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,		void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);		raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,		void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);		const MCSubtargetInfo &STI, raw_ostream &O);
void printD16(const MCInst *MI, unsigned OpNo,		void printD16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);		const MCSubtargetInfo &STI, raw_ostream &O);
void printExpCompr(const MCInst *MI, unsigned OpNo,		void printExpCompr(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);		const MCSubtargetInfo &STI, raw_ostream &O);
void printExpVM(const MCInst *MI, unsigned OpNo,		void printExpVM(const MCInst *MI, unsigned OpNo,
▲ Show 20 Lines • Show All 159 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp

Show First 20 Lines • Show All 201 Lines • ▼ Show 20 Lines	void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "unorm");		printNamedBit(MI, OpNo, O, "unorm");
}		}

void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,		void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {		const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "da");		printNamedBit(MI, OpNo, O, "da");
}		}

void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,		void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {		const MCSubtargetInfo &STI, raw_ostream &O) {
		if (STI.hasFeature(AMDGPU::FeatureR128A16))
		printNamedBit(MI, OpNo, O, "a16");
		else
printNamedBit(MI, OpNo, O, "r128");		printNamedBit(MI, OpNo, O, "r128");
}		}

void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,		void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {		const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "lwe");		printNamedBit(MI, OpNo, O, "lwe");
}		}

void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,		void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
▲ Show 20 Lines • Show All 1,191 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td

Show First 20 Lines • Show All 135 Lines • ▼ Show 20 Lines	class MIMG_NoSampler_Helper <bits<7> op, string asm,
string dns="">		string dns="">
: MIMG <(outs dst_rc:$vdata), dns>,		: MIMG <(outs dst_rc:$vdata), dns>,
MIMGe<op> {		MIMGe<op> {
let ssamp = 0;		let ssamp = 0;
let d16 = !if(BaseOpcode.HasD16, ?, 0);		let d16 = !if(BaseOpcode.HasD16, ?, 0);

let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,		let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,		DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),		R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));		!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"		let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");		#!if(BaseOpcode.HasD16, "$d16", "");
}		}

multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,		multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,		RegisterClass dst_rc,
bit enableDisasm> {		bit enableDisasm> {
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	class MIMG_Store_Helper <bits<7> op, string asm,
let mayLoad = 0;		let mayLoad = 0;
let mayStore = 1;		let mayStore = 1;
let hasSideEffects = 0;		let hasSideEffects = 0;
let hasPostISelHook = 0;		let hasPostISelHook = 0;
let DisableWQM = 1;		let DisableWQM = 1;

let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,		let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,		DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),		R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));		!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"		let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");		#!if(BaseOpcode.HasD16, "$d16", "");
}		}

multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,		multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
RegisterClass data_rc,		RegisterClass data_rc,
bit enableDisasm> {		bit enableDisasm> {
Show All 36 Lines	class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
let hasSideEffects = 1; // FIXME: Remove this		let hasSideEffects = 1; // FIXME: Remove this
let hasPostISelHook = 0;		let hasPostISelHook = 0;
let DisableWQM = 1;		let DisableWQM = 1;
let Constraints = "$vdst = $vdata";		let Constraints = "$vdst = $vdata";
let AsmMatchConverter = "cvtMIMGAtomic";		let AsmMatchConverter = "cvtMIMGAtomic";

let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,		let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,		DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);		R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";		let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
}		}

multiclass MIMG_Atomic_Helper_m <mimg op, string asm, RegisterClass data_rc,		multiclass MIMG_Atomic_Helper_m <mimg op, string asm, RegisterClass data_rc,
RegisterClass addr_rc, bit enableDasm = 0> {		RegisterClass addr_rc, bit enableDasm = 0> {
let ssamp = 0, d16 = 0 in {		let ssamp = 0, d16 = 0 in {
def _si : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,		def _si : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
SIMCInstr<NAME, SIEncodingFamily.SI>,		SIMCInstr<NAME, SIEncodingFamily.SI>,
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines
class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,		class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">		RegisterClass src_rc, string dns="">
: MIMG <(outs dst_rc:$vdata), dns>,		: MIMG <(outs dst_rc:$vdata), dns>,
MIMGe<op> {		MIMGe<op> {
let d16 = !if(BaseOpcode.HasD16, ?, 0);		let d16 = !if(BaseOpcode.HasD16, ?, 0);

let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,		let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,		DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),		R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));		!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"		let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");		#!if(BaseOpcode.HasD16, "$d16", "");
}		}

class MIMGAddrSize<int dw, bit enable_disasm> {		class MIMGAddrSize<int dw, bit enable_disasm> {
int NumWords = dw;		int NumWords = dw;

▲ Show 20 Lines • Show All 248 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,570 Lines • ▼ Show 20 Lines	static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,

return Value == 0;		return Value == 0;
}		}

SDValue SITargetLowering::lowerImage(SDValue Op,		SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,		const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
SDLoc DL(Op);		SDLoc DL(Op);
		MachineFunction &MF = DAG.getMachineFunction();
		const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =		const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);		AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);		const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =		const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);		AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;		unsigned IntrOpcode = Intr->BaseOpcode;

SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());		SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;		bool IsD16 = false;
		bool IsA16 = false;
SDValue VData;		SDValue VData;
int NumVDataDwords;		int NumVDataDwords;
unsigned AddrIdx; // Index of first address argument		unsigned AddrIdx; // Index of first address argument
unsigned DMask;		unsigned DMask;

if (BaseOpcode->Atomic) {		if (BaseOpcode->Atomic) {
VData = Op.getOperand(2);		VData = Op.getOperand(2);

▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines	if (!DMask && !BaseOpcode->Store) {
// store the channels' default values.		// store the channels' default values.
SDValue Undef = DAG.getUNDEF(Op.getValueType());		SDValue Undef = DAG.getUNDEF(Op.getValueType());
if (isa<MemSDNode>(Op))		if (isa<MemSDNode>(Op))
return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);		return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
return Undef;		return Undef;
}		}
}		}

unsigned NumVAddrs = BaseOpcode->NumExtraArgs +		unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
(BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +		unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
(BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +		unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
(BaseOpcode->LodOrClampOrMip ? 1 : 0);		unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
		NumCoords + NumLCM;
		unsigned NumMIVAddrs = NumVAddrs;

SmallVector<SDValue, 4> VAddrs;		SmallVector<SDValue, 4> VAddrs;
for (unsigned i = 0; i < NumVAddrs; ++i)
VAddrs.push_back(Op.getOperand(AddrIdx + i));

// Optimize _L to _LZ when _L is zero		// Optimize _L to _LZ when _L is zero
if (LZMappingInfo) {		if (LZMappingInfo) {
if (auto ConstantLod =		if (auto ConstantLod =
dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {		dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
if (ConstantLod->isZero() \|\| ConstantLod->isNegative()) {		if (ConstantLod->isZero() \|\| ConstantLod->isNegative()) {
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l		IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
VAddrs.pop_back(); // remove 'lod'		NumMIVAddrs--; // remove 'lod'
		}
		}
		}

		// Check for 16 bit addresses and pack if true.
		unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
		MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
		if (VAddrVT.getScalarType() == MVT::f16 &&
		ST->hasFeature(AMDGPU::FeatureR128A16)) {
		IsA16 = true;
		for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
		SDValue AddrLo, AddrHi;
		// Push back extra arguments.
		if (i < DimIdx) {
		AddrLo = Op.getOperand(i);
		} else {
		AddrLo = Op.getOperand(i);
		// Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
		// in 1D, derivatives dx/dh and dx/dv are packed with undef.
		if (((i + 1) >= (AddrIdx + NumMIVAddrs)) \|\|
		((NumGradients / 2) % 2 == 1 &&
		(i == DimIdx + (NumGradients / 2) - 1 \|\|
		i == DimIdx + NumGradients - 1))) {
		AddrHi = DAG.getUNDEF(MVT::f16);
		} else {
		AddrHi = Op.getOperand(i + 1);
		i++;
}		}
		AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f16,
		{AddrLo, AddrHi});
		AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
}		}
		VAddrs.push_back(AddrLo);
		}
		} else {
		for (unsigned i = 0; i < NumMIVAddrs; ++i)
		VAddrs.push_back(Op.getOperand(AddrIdx + i));
}		}

SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);		SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);

SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);		SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);		SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
unsigned CtrlIdx; // Index of texfailctrl argument		unsigned CtrlIdx; // Index of texfailctrl argument
SDValue Unorm;		SDValue Unorm;
Show All 32 Lines	SDValue SITargetLowering::lowerImage(SDValue Op,
Ops.push_back(VAddr);		Ops.push_back(VAddr);
Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc		Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc
if (BaseOpcode->Sampler)		if (BaseOpcode->Sampler)
Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler		Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));		Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
Ops.push_back(Unorm);		Ops.push_back(Unorm);
Ops.push_back(GLC);		Ops.push_back(GLC);
Ops.push_back(SLC);		Ops.push_back(SLC);
Ops.push_back(False); // r128		Ops.push_back(IsA16 && // a16 or r128
		ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
Ops.push_back(False); // tfe		Ops.push_back(False); // tfe
Ops.push_back(False); // lwe		Ops.push_back(False); // lwe
Ops.push_back(DimInfo->DA ? True : False);		Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)		if (BaseOpcode->HasD16)
Ops.push_back(IsD16 ? True : False);		Ops.push_back(IsD16 ? True : False);
if (isa<MemSDNode>(Op))		if (isa<MemSDNode>(Op))
Ops.push_back(Op.getOperand(0)); // chain		Ops.push_back(Op.getOperand(0)); // chain

▲ Show 20 Lines • Show All 4,469 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td

	Show First 20 Lines • Show All 748 Lines • ▼ Show 20 Lines
	def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;			def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
	def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;			def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;

	def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;			def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
	def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;			def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
	def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;			def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
	def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;			def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
	def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;			def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
	def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;			def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
	def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;			def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
	def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;			def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
	def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;			def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
	def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;			def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;

	def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;			def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;

	def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;			def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
	▲ Show 20 Lines • Show All 1,253 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

				; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s

				; GCN-LABEL: {{^}}gather4_2d:
				; GCN: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_cube:
				; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
				define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_2darray:
				; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
				define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_c_2d:
				; GCN: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_cl_2d:
				; GCN: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_c_cl_2d:
				; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_b_2d:
				; GCN: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_c_b_2d:
				; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_b_cl_2d:
				; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
				; GCN: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_l_2d:
				; GCN: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_c_l_2d:
				; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_lz_2d:
				; GCN: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}gather4_c_lz_2d:
				; GCN: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
				define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				attributes #0 = { nounwind }
				attributes #1 = { nounwind readonly }
				attributes #2 = { nounwind readnone }

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

				; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s
				; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_2d:
				; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_3d:
				; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_cube:
				; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
				define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_1darray:
				; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da{{$}}
				define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_2darray:
				; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
				define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_1d:
				; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_2d:
				; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_cl_1d:
				; GCN: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_cl_2d:
				; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_cl_1d:
				; GCN: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_cl_2d:
				; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_b_1d:
				; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_b_2d:
				; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_b_1d:
				; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_b_2d:
				; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_b_cl_1d:
				; GCN: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_b_cl_2d:
				; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_b_cl_1d:
				; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_b_cl_2d:
				; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_d_1d:
				; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_d_2d:
				; GCN: image_sample_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABAL: {{^}}sample_d_3d:
				; GCN: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_d_1d:
				; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_d_2d:
				; GCN: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_d_cl_1d:
				; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_d_cl_2d:
				; GCN: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_d_cl_1d:
				; GCN: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_d_cl_2d:
				; GCN: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_cd_1d:
				; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_cd_2d:
				; GCN: image_sample_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_cd_1d:
				; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_cd_2d:
				; GCN: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_cd_cl_1d:
				; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_cd_cl_2d:
				; GCN: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
				; GCN: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
				; GCN: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_l_1d:
				; GCN: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_l_2d:
				; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_l_1d:
				; GCN: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_l_2d:
				; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_lz_1d:
				; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_lz_2d:
				; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_lz_1d:
				; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_lz_2d:
				; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
				define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
				main_body:
				%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <4 x float> %v
				}

				; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
				; GCN: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da{{$}}
				define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
				main_body:
				%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret float %v
				}

				; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
				; GCN: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da{{$}}
				define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
				main_body:
				%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
				ret <2 x float> %v
				}

				declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
				declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1

				attributes #0 = { nounwind }
				attributes #1 = { nounwind readonly }
				attributes #2 = { nounwind readnone }

llvm/trunk/test/MC/AMDGPU/mimg.s

	Show First 20 Lines • Show All 262 Lines • ▼ Show 20 Lines

	image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16			image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16
	// NOSICI: error: d16 modifier is not supported on this GPU			// NOSICI: error: d16 modifier is not supported on this GPU
	// NOGFX8_0: error: image data size does not match dmask and tfe			// NOGFX8_0: error: image data size does not match dmask and tfe
	// GFX8_1: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80]			// GFX8_1: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80]
	// GFX9: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80]			// GFX9: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80]

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
				// Image Sample: a16
				//===----------------------------------------------------------------------===//

				image_sample v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
				// GFX9: image_sample v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0x80,0xf0,0xed,0xc1,0x27,0x00]
				// NOSICI: error: a16 modifier is not supported on this GPU
				// NOVI: error: a16 modifier is not supported on this GPU

				image_sample_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
				// GFX9: image_sample_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0x88,0xf0,0xed,0xc1,0x27,0x00]
				// NOSICI: error: a16 modifier is not supported on this GPU
				// NOVI: error: a16 modifier is not supported on this GPU

				image_sample_c_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
				// GFX9: image_sample_c_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0xa8,0xf0,0xed,0xc1,0x27,0x00]
				// NOSICI: error: a16 modifier is not supported on this GPU
				// NOVI: error: a16 modifier is not supported on this GPU

				image_sample_c_d_cl v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
				// GFX9: image_sample_c_d_cl v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0xac,0xf0,0xed,0xc1,0x27,0x00]
				// NOSICI: error: a16 modifier is not supported on this GPU
				// NOVI: error: a16 modifier is not supported on this GPU

				//===----------------------------------------------------------------------===//
	// Image Atomics			// Image Atomics
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	image_atomic_add v4, v192, s[28:35] dmask:0x1 unorm glc			image_atomic_add v4, v192, s[28:35] dmask:0x1 unorm glc
	// SICI: image_atomic_add v4, v192, s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0xc0,0x04,0x07,0x00]			// SICI: image_atomic_add v4, v192, s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0xc0,0x04,0x07,0x00]
	// GFX89: image_atomic_add v4, v192, s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0xc0,0x04,0x07,0x00]			// GFX89: image_atomic_add v4, v192, s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0xc0,0x04,0x07,0x00]

	image_atomic_add v4, v[192:193], s[28:35] dmask:0x1 unorm glc			image_atomic_add v4, v[192:193], s[28:35] dmask:0x1 unorm glc
	▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines
	// GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]			// GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
	// GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]			// GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]

	image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1			image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1
	// NOSICI: error: image data size does not match dmask and tfe			// NOSICI: error: image data size does not match dmask and tfe
	// NOGFX8_0: error: image data size does not match dmask and tfe			// NOGFX8_0: error: image data size does not match dmask and tfe
	// NOGFX8_1: error: image data size does not match dmask and tfe			// NOGFX8_1: error: image data size does not match dmask and tfe
	// NOGFX9: error: image data size does not match dmask and tfe			// NOGFX9: error: image data size does not match dmask and tfe

				image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 a16
				// GFX9: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 a16 ; encoding: [0x00,0x81,0x00,0xf1,0x01,0x05,0x62,0x00]
				// NOSICI: error: a16 modifier is not supported on this GPU
				// NOVI: error: a16 modifier is not supported on this GPU

				image_gather4_b_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 a16
				// GFX9: image_gather4_b_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 a16 ; encoding: [0x00,0x81,0x18,0xf1,0x01,0x05,0x62,0x00]
				// NOSICI: error: a16 modifier is not supported on this GPU
				// NOVI: error: a16 modifier is not supported on this GPU

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Add support for a16 modifier for gfx9
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 162870

llvm/trunk/lib/Target/AMDGPU/AMDGPU.td

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h

llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp

llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

llvm/trunk/test/MC/AMDGPU/mimg.s

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Add support for a16 modifier for gfx9ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 162870

llvm/trunk/lib/Target/AMDGPU/AMDGPU.td

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h

llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp

llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll

llvm/trunk/test/MC/AMDGPU/mimg.s

[AMDGPU] Add support for a16 modifier for gfx9
ClosedPublic