Diff 51139

lib/Target/AMDGPU/SIISelLowering.cpp

Context not available.
	addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);	addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
	addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);	addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);

		// TODO: Subtarget feature for i16
		arsenmUnsubmitted Not Done Reply Inline Actions I did this already, so this should check Subtarget->has16BitInsts() arsenm: I did this already, so this should check Subtarget->has16BitInsts()
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Do we still need this comment? tstellarAMD: Do we still need this comment?
		if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
		addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);

	computeRegisterProperties(STI.getRegisterInfo());	computeRegisterProperties(STI.getRegisterInfo());

	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
		kzhuravlUnsubmitted Done Reply Inline Actions I do not think we need this comment kzhuravl: I do not think we need this comment
		kzhuravlUnsubmitted Done Reply Inline Actions Subtarget->has16BitInsts() kzhuravl: Subtarget->has16BitInsts()
		kzhuravlUnsubmitted Done Reply Inline Actions Subtarget->has16BitInsts() kzhuravl: Subtarget->has16BitInsts()
Context not available.
	setOperationAction(ISD::FDIV, MVT::f32, Custom);	setOperationAction(ISD::FDIV, MVT::f32, Custom);
	setOperationAction(ISD::FDIV, MVT::f64, Custom);	setOperationAction(ISD::FDIV, MVT::f64, Custom);

		if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
		setOperationAction(ISD::Constant, MVT::i16, Legal);

		setOperationAction(ISD::ADD, MVT::i16, Legal);
		setOperationAction(ISD::SUB, MVT::i16, Legal);
		setOperationAction(ISD::SHL, MVT::i16, Legal);
		setOperationAction(ISD::SRL, MVT::i16, Legal);
		setOperationAction(ISD::SRA, MVT::i16, Legal);
		tstellarAMDAuthorUnsubmitted Not Done Reply Inline Actions Are these really necessary? I thought making a type legal marked these operations legal by default. tstellarAMD: Are these really necessary? I thought making a type legal marked these operations legal by…

		setOperationAction(ISD::SMIN, MVT::i16, Legal);
		setOperationAction(ISD::SMAX, MVT::i16, Legal);
		setOperationAction(ISD::UMIN, MVT::i16, Legal);
		setOperationAction(ISD::UMAX, MVT::i16, Legal);
		kzhuravlUnsubmitted Done Reply Inline Actions Detabify kzhuravl: Detabify

		setOperationAction(ISD::SETCC, MVT::i16, Legal);
		kzhuravlUnsubmitted Done Reply Inline Actions Detabify kzhuravl: Detabify
		setOperationAction(ISD::TRUNCATE, MVT::i16, Legal);
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Same with these too. Are they really necessary? tstellarAMD: Same with these too. Are they really necessary?
		arsenmUnsubmitted Done Reply Inline Actions min/max need to be explicitly made legal, but that should be a separate patch. setcc should be promote for now until those are added later arsenm: min/max need to be explicitly made legal, but that should be a separate patch. setcc should be…

		kzhuravlUnsubmitted Done Reply Inline Actions Detabify kzhuravl: Detabify
		setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote);
		AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32);
		kzhuravlUnsubmitted Done Reply Inline Actions Detabify kzhuravl: Detabify

		kzhuravlUnsubmitted Done Reply Inline Actions Remove extra new line kzhuravl: Remove extra new line
		setOperationAction(ISD::AND, MVT::i16, Promote);
		setOperationAction(ISD::OR, MVT::i16, Promote);
		setOperationAction(ISD::XOR, MVT::i16, Promote);

		setOperationAction(ISD::ROTR, MVT::i16, Promote);
		setOperationAction(ISD::ROTL, MVT::i16, Promote);

		setOperationAction(ISD::SDIV, MVT::i16, Promote);
		setOperationAction(ISD::UDIV, MVT::i16, Promote);
		setOperationAction(ISD::SREM, MVT::i16, Promote);
		setOperationAction(ISD::UREM, MVT::i16, Promote);
		setOperationAction(ISD::MUL, MVT::i16, Promote);

		setOperationAction(ISD::BSWAP, MVT::i16, Promote);
		setOperationAction(ISD::CTTZ, MVT::i16, Promote);
		setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
		setOperationAction(ISD::CTLZ, MVT::i16, Promote);
		setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);

		setOperationAction(ISD::SELECT, MVT::i16, Legal);
		setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);

		setOperationAction(ISD::BR_CC, MVT::i16, Expand);

		setOperationAction(ISD::LOAD, MVT::i16, Custom);
		setOperationAction(ISD::STORE, MVT::i16, Custom);

		setLoadExtAction(ISD::SEXTLOAD, MVT::i32, MVT::i16, Legal);
		setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, MVT::i16, Legal);
		setLoadExtAction(ISD::EXTLOAD, MVT::i32, MVT::i16, Legal);

		setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i16, Expand);
		setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, MVT::i16, Expand);
		setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i16, Expand);

		setTruncStoreAction(MVT::i64, MVT::i16, Expand);
		}

	setTargetDAGCombine(ISD::FADD);	setTargetDAGCombine(ISD::FADD);
	setTargetDAGCombine(ISD::FSUB);	setTargetDAGCombine(ISD::FSUB);
	setTargetDAGCombine(ISD::FMINNUM);	setTargetDAGCombine(ISD::FMINNUM);
Context not available.
	ISD::LoadExtType ExtType = Load->getExtensionType();	ISD::LoadExtType ExtType = Load->getExtensionType();
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Does this need to be removed? tstellarAMD: Does this need to be removed?
	EVT MemVT = Load->getMemoryVT();	EVT MemVT = Load->getMemoryVT();

		if (MemVT == MVT::i16) {
		assert(Load->getValueType(0) == MVT::i16);

		SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Load->getChain(),
		Load->getBasePtr(), MVT::i16,
		Load->getMemOperand());
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Coding style. Variable names should start with a captial. tstellarAMD: Coding style. Variable names should start with a captial.

		SDValue Ops[] = {
		DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtLoad),
		ExtLoad.getValue(1)
		};

		return DAG.getMergeValues(Ops, DL);
		}
		tstellarAMDAuthorUnsubmitted Not Done Reply Inline Actions Why does i16 needs special handling here. These seems to be nearly identical to the block of code directly below. tstellarAMD: Why does i16 needs special handling here. These seems to be nearly identical to the block of…

	if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {	if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
	assert(MemVT == MVT::i1 && "Only i1 non-extloads expected");	assert(MemVT == MVT::i1 && "Only i1 non-extloads expected");
	// FIXME: Copied from PPC	// FIXME: Copied from PPC
		arsenmUnsubmitted Done Reply Inline Actions Why is this part of the patch? This looks unrelated arsenm: Why is this part of the patch? This looks unrelated
Context not available.
	StoreSDNode *Store = cast<StoreSDNode>(Op);	StoreSDNode *Store = cast<StoreSDNode>(Op);
	EVT VT = Store->getMemoryVT();	EVT VT = Store->getMemoryVT();

		if (VT == MVT::i16) {
		SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Store->getValue());

		return DAG.getTruncStore(Store->getChain(), DL,
		Ext,
		Store->getBasePtr(),
		MVT::i16,
		Store->getMemOperand());
		}

		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions We do we need to custom lower i16 stores? Can't we just mark then as promote? tstellarAMD: We do we need to custom lower i16 stores? Can't we just mark then as promote?
		arsenmUnsubmitted Done Reply Inline Actions Load/store promote expects an equal size type for a bitcast promote. This is the same problem that i1 has, so it should follow that example arsenm: Load/store promote expects an equal size type for a bitcast promote. This is the same problem…
	if (VT == MVT::i1) {	if (VT == MVT::i1) {
	return DAG.getTruncStore(Store->getChain(), DL,	return DAG.getTruncStore(Store->getChain(), DL,
	DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),	DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
Context not available.
		arsenmUnsubmitted Done Reply Inline Actions Define on same line arsenm: Define on same line

lib/Target/AMDGPU/SIRegisterInfo.td

Context not available.
	// TODO: Do we need to set DwarfRegAlias on register tuples?	// TODO: Do we need to set DwarfRegAlias on register tuples?

	// SGPR 32-bit registers	// SGPR 32-bit registers
	def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,	def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16], 32,
	(add (sequence "SGPR%u", 0, 103))>;	(add (sequence "SGPR%u", 0, 103))>;

	// SGPR 64-bit registers	// SGPR 64-bit registers
Context not available.
	(add (decimate (shl SGPR_32, 15), 4))]>;	(add (decimate (shl SGPR_32, 15), 4))]>;

	// VGPR 32-bit registers	// VGPR 32-bit registers
	def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,	// i16 only on VI+
		def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16], 32,
	(add (sequence "VGPR%u", 0, 255))>;	(add (sequence "VGPR%u", 0, 255))>;

	// VGPR 64-bit registers	// VGPR 64-bit registers
Context not available.
	}	}

	// Register class for all scalar registers (SGPRs + Special Registers)	// Register class for all scalar registers (SGPRs + Special Registers)
	def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,	def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16], 32,
	(add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)	(add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
	>;	>;

Context not available.
	// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate	// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//

	def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;	def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16], 32, (add VGPR_32, SReg_32)>;

	def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {	def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
	let CopyCost = 2;	let CopyCost = 2;
Context not available.

lib/Target/AMDGPU/VIInstructions.td

Context not available.
	def : Pat <	def : Pat <
	(int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$bound_ctrl,	(int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$bound_ctrl,
	imm:$bank_mask, imm:$row_mask),	imm:$bank_mask, imm:$row_mask),
	(V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i1imm $bound_ctrl),	(i32 (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i1imm $bound_ctrl),
	(as_i32imm $bank_mask), (as_i32imm $row_mask))	(as_i32imm $bank_mask), (as_i32imm $row_mask)))
	>;	>;

		//===----------------------------------------------------------------------===//
		// i16 Patterns
		//===----------------------------------------------------------------------===//

		def : Pat <
		(i16 imm:$imm),
		(S_MOV_B32 imm:$imm)
		>;

		def : Pat<
		(i32 (anyext i16:$src)),
		(COPY $src)
		>;

		// FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
		// REG_SEQUENCE patterns don't support instructions with multiple
		// outputs.
		def : Pat<
		(i64 (zext i16:$src)),
		(REG_SEQUENCE SReg_64,
		(i32 (COPY_TO_REGCLASS (S_AND_B32 $src, (i32 0xffff)), SGPR_32)), sub0,
		(S_MOV_B32 (i32 0)), sub1)
		>;

		def : Pat <
		(i64 (sext i16:$src)),
		(REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0,
		(i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (i32 31)), SGPR_32)), sub1)
		>;

		// Same as a 32-bit inreg
		def : Pat<
		(i32 (sext i16:$src)),
		(S_SEXT_I32_I16 $src)
		>;

		def : Pat<
		(i16 (trunc i32:$src)),
		(COPY $src)
		>;

		class ZExt_i16_i1_Pat <SDNode ext> : Pat <
		(i16 (ext i1:$src)),
		(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)
		>;

		def : ZExt_i16_i1_Pat<zext>;
		def : ZExt_i16_i1_Pat<anyext>;
		arsenmUnsubmitted Not Done Reply Inline Actions This pattern is necessary. I believe I had a test for this in my original patch arsenm: This pattern is necessary. I believe I had a test for this in my original patch

		def : Pat <
		(i16 (select i1:$src0, i16:$src1, i16:$src2)),
		(V_CNDMASK_B32_e64 $src2, $src1, $src0)
		>;

		// Note: 16-bit instructions produce a 0 result in the high 16-bits.
		multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> {

		def : Pat<
		(op i16:$src0, i16:$src1),
		(inst i16:$src0, i16:$src1)
		>;

		def : Pat<
		(i32 (zext (op i16:$src0, i16:$src1))),
		(inst i16:$src0, i16:$src1)
		>;

		def : Pat<
		(i64 (zext (op i16:$src0, i16:$src1))),
		(REG_SEQUENCE VReg_64,
		(inst i16:$src0, i16:$src1), sub0,
		(S_MOV_B32 (i32 0)), sub1)
		>;
		arsenmUnsubmitted Not Done Reply Inline Actions Dead code should be removed arsenm: Dead code should be removed
		}

		multiclass Bits_Ops_i16_Pats <SDPatternOperator op, Instruction inst> {

		arsenmUnsubmitted Not Done Reply Inline Actions Should follow camel case naming convention arsenm: Should follow camel case naming convention
		def : Pat<
		(op i16:$src0, i32:$src1),
		arsenmUnsubmitted Not Done Reply Inline Actions No spaces around the :s arsenm: No spaces around the :s
		(inst i16:$src0, i32:$src1)
		>;

		def : Pat<
		arsenmUnsubmitted Not Done Reply Inline Actions Should be indented like other Pats in the file arsenm: Should be indented like other Pats in the file
		(i32 (zext (op i16:$src0, i32:$src1))),
		(inst i16:$src0, i32:$src1)
		>;

		def : Pat<
		arsenmUnsubmitted Not Done Reply Inline Actions This is incorrect if this is a scalar zext, which currently doesn't happen because there are no scalar i16 instructions (although we may want pseudos for these). To be consistent, this should use S_MOV_B32 to materialize the 0 arsenm: This is incorrect if this is a scalar zext, which currently doesn't happen because there are no…
		(i64 (zext (op i16:$src0, i32:$src1))),
		(REG_SEQUENCE VReg_64,
		(inst i16:$src0, i32:$src1), sub0,
		(S_MOV_B32 (i32 0)), sub1)
		>;
		}

		defm : Arithmetic_i16_Pats<add, V_ADD_U16_e32>;
		defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e32>;
		defm : Arithmetic_i16_Pats<smin, V_MIN_U16_e32>;
		defm : Arithmetic_i16_Pats<smax, V_MAX_U16_e32>;

		defm : Bits_Ops_i16_Pats<shl, V_LSHLREV_B16_e32>;
		defm : Bits_Ops_i16_Pats<srl, V_LSHRREV_B16_e32>;
		defm : Bits_Ops_i16_Pats<sra, V_ASHRREV_B16_e32>;
	} // End Predicates = [isVI]	} // End Predicates = [isVI]
Context not available.
		arsenmUnsubmitted Not Done Reply Inline Actions These should be done in a separate patch arsenm: These should be done in a separate patch
		arsenmUnsubmitted Not Done Reply Inline Actions These should be using the signed min/max. I also think the min/max matching should be a separate patch arsenm: These should be using the signed min/max. I also think the min/max matching should be a…
		arsenmUnsubmitted Not Done Reply Inline Actions Dead code should be removed. These instruction's dont exist. However, tests should be added to the rotl/rotr/bswap/ctlz/cttz to make sure these are properly expanded when i16 is added as legal since most operations by default are assumed to be legal if the type is arsenm: Dead code should be removed. These instruction's dont exist. However, tests should be added to…

test/CodeGen/AMDGPU/add.i16.ll

This file was added.

				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=VI -check-prefix=GCN %s

				; GCN-LABEL: {{^}}v_test_add_i16:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_constant:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0x7b, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_add_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%add = add i16 %a, 123
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_neg_constant:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xfffffcb3, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_add_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%add = add i16 %a, -845
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_inline_neg1:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], -1, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_add_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%add = add i16 %a, -1
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i32:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: buffer_store_dword [[ADD]]
				define void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				%ext = zext i16 %add to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i64:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
				; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
				; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}
				define void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				%ext = zext i16 %add to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i32:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: buffer_store_dword [[SEXT]]
				define void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in0
				%b = load i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				%ext = sext i16 %add to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i64:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @v_test_add_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in0
				%b = load i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				%ext = sext i16 %add to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_add_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #1 {
				%add = add i16 %a, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_zeroext_args:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_add_i16_zeroext_args(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%add = add i16 %a, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_signext_args:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_add_i16_signext_args(i16 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%add = add i16 %a, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_zext_to_i32:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_dword [[RESULT]]
				define void @s_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%add = add i16 %a, %b
				%ext = zext i16 %add to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_zext_to_i64:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI-DAG: v_add_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]]
				; VI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @s_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%add = add i16 %a, %b
				%ext = zext i16 %add to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_sext_to_i32:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: v_bfe_i32 [[RESULT:v[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: buffer_store_dword [[RESULT]]
				define void @s_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%add = add i16 %a, %b
				%ext = sext i16 %add to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_sext_to_i64:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @s_test_add_i16_sext_to_i64(i64 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%add = add i16 %a, %b
				%ext = sext i16 %add to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				declare i32 @llvm.amdgcn.workitem.id.x() #0

				attributes #0 = { nounwind readnone }
				attributes #1 = { nounwind }

test/CodeGen/AMDGPU/anyext.ll

	; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs \| FileCheck %s			; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s
	; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck %s			; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s

	; CHECK-LABEL: {{^}}anyext_i1_i32:			; GCN-LABEL: {{^}}anyext_i1_i32:
	; CHECK: v_cndmask_b32_e64			; GCN: v_cndmask_b32_e64
	define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {			define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
	entry:			entry:
	%0 = icmp eq i32 %cond, 0			%tmp = icmp eq i32 %cond, 0
	%1 = zext i1 %0 to i8			%tmp1 = zext i1 %tmp to i8
	%2 = xor i8 %1, -1			%tmp2 = xor i8 %tmp1, -1
	%3 = and i8 %2, 1			%tmp3 = and i8 %tmp2, 1
	%4 = zext i8 %3 to i32			%tmp4 = zext i8 %tmp3 to i32
	store i32 %4, i32 addrspace(1)* %out			store i32 %tmp4, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_anyext_i16_i32:
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]],
				; VI: v_not_b32_e32 [[NOT:v[0-9]+]], [[ADD]]
				; VI: v_and_b32_e32 [[AND:v[0-9]+]], 1, [[NOT]]
				; VI: buffer_store_dword [[AND]]
				define void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 %a, i16 %b) {
				entry:
				%tmp = add i16 %a, %b
				%tmp1 = trunc i16 %tmp to i8
				%tmp2 = xor i8 %tmp1, -1
				%tmp3 = and i8 %tmp2, 1
				%tmp4 = zext i8 %tmp3 to i32
				store i32 %tmp4, i32 addrspace(1)* %out
	ret void			ret void
	}			}

test/CodeGen/AMDGPU/global-extload-i8.ll

Context not available.
	; ret void	; ret void
	; }	; }

		; FUNC-LABEL: {{^}}zextload_global_i8_to_i16:
		; SI: buffer_load_ubyte
		; SI: buffer_store_dword
		; SI: s_endpgm
		define void @zextload_global_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
		%a = load i8, i8 addrspace(1)* %in
		%ext = zext i8 %a to i16
		store i16 %ext, i16 addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}sextload_global_i8_to_i16:
		; SI: buffer_load_sbyte
		; SI: buffer_store_dword
		; SI: s_endpgm
		define void @sextload_global_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
		%a = load i8, i8 addrspace(1)* %in
		%ext = sext i8 %a to i16
		store i16 %ext, i16 addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i16:
		; SI: s_endpgm
		define void @zextload_global_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
		%ext = zext <1 x i8> %load to <1 x i16>
		store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i16:
		; SI: s_endpgm
		define void @sextload_global_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
		%ext = sext <1 x i8> %load to <1 x i16>
		store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i16:
		; SI: s_endpgm
		define void @zextload_global_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
		%ext = zext <2 x i8> %load to <2 x i16>
		store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i16:
		; SI: s_endpgm
		define void @sextload_global_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
		%ext = sext <2 x i8> %load to <2 x i16>
		store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i16:
		; SI: s_endpgm
		define void @zextload_global_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
		%ext = zext <4 x i8> %load to <4 x i16>
		store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i16:
		; SI: s_endpgm
		define void @sextload_global_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
		%ext = sext <4 x i8> %load to <4 x i16>
		store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i16:
		; SI: s_endpgm
		define void @zextload_global_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
		%ext = zext <8 x i8> %load to <8 x i16>
		store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i16:
		; SI: s_endpgm
		define void @sextload_global_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
		%ext = sext <8 x i8> %load to <8 x i16>
		store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i16:
		; SI: s_endpgm
		define void @zextload_global_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
		%ext = zext <16 x i8> %load to <16 x i16>
		store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i16:
		; SI: s_endpgm
		define void @sextload_global_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
		%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
		%ext = sext <16 x i8> %load to <16 x i16>
		store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
		ret void
		}

	; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:	; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
	; SI: buffer_load_ubyte v[[LO:[0-9]+]],	; SI: buffer_load_ubyte v[[LO:[0-9]+]],
	; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}	; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
Context not available.

test/CodeGen/AMDGPU/max.i16.ll

This file was added.

				; RUN: llc -march=amdgcn < %s \| FileCheck -check-prefix=VI -check-prefix=FUNC %s

				declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

				; FUNC-LABEL: {{^}}v_test_imax_sge_i16:
				; VI: v_max_i16_e32
				define void @v_test_imax_sge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp sge i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}v_test_imax_sge_v4i16:
				; VI: v_max_i16_e32
				; VI: v_max_i16_e32
				; VI: v_max_i16_e32
				; VI: v_max_i16_e32
				define void @v_test_imax_sge_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %aptr, <4 x i16> addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
				%a = load <4 x i16>, <4 x i16> addrspace(1)* %gep0, align 4
				%b = load <4 x i16>, <4 x i16> addrspace(1)* %gep1, align 4
				%cmp = icmp sge <4 x i16> %a, %b
				%val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
				store <4 x i16> %val, <4 x i16> addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_imax_sge_i16
				; VI: s_max_i16
				define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp sge i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i16:
				; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9
				define void @s_test_imax_sge_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind {
				%cmp = icmp sge i16 %a, 9
				%val = select i1 %cmp, i16 %a, i16 9
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i16:
				; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9
				define void @s_test_imax_sgt_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind {
				%cmp = icmp sgt i16 %a, 9
				%val = select i1 %cmp, i16 %a, i16 9
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i16:
				; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9
				; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9
				define void @s_test_imax_sgt_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a) nounwind {
				%cmp = icmp sgt <2 x i16> %a, <i16 9, i16 9>
				%val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> <i16 9, i16 9>
				store <2 x i16> %val, <2 x i16> addrspace(1)* %out, align 4
				ret void
				}
				; FUNC-LABEL: @v_test_imax_sgt_i16
				; VI: v_max_i16_e32
				define void @v_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp sgt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_imax_sgt_i16
				; VI: s_max_i16
				define void @s_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp sgt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umax_uge_i16
				; VI: v_max_u32_e32
				define void @v_test_umax_uge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp uge i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_umax_uge_i16
				; VI: s_max_u32
				define void @s_test_umax_uge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp uge i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_umax_uge_v3i16:
				; VI: s_max_u32
				; VI: s_max_u32
				; VI: s_max_u32
				; VI-NOT: s_max_u32
				; VI: s_endpgm
				define void @s_test_umax_uge_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) nounwind {
				%cmp = icmp uge <3 x i16> %a, %b
				%val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				store <3 x i16> %val, <3 x i16> addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umax_ugt_i16
				; VI: v_max_u32_e32
				define void @v_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp ugt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_umax_ugt_i16:
				; VI: s_max_u32
				define void @s_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp ugt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i16:
				; VI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15
				; VI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23
				define void @s_test_umax_ugt_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a) nounwind {
				%cmp = icmp ugt <2 x i16> %a, <i16 15, i16 23>
				%val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> <i16 15, i16 23>
				store <2 x i16> %val, <2 x i16> addrspace(1)* %out, align 4
				ret void
				}

test/CodeGen/AMDGPU/min_test.ll

This file was added.

				; RUN: llc -march=amdgcn < %s \| FileCheck -check-prefix=VI -check-prefix=FUNC %s

				declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

				; FUNC-LABEL: {{^}}v_test_imin_sle_i16:
				; VI: v_min_i16_e32
				define void @v_test_imin_sle_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp sle i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
				; VI: s_min_i16
				define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp sle i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_sle_v1i16:
				; VI: s_min_i16
				define void @s_test_imin_sle_v1i16(<1 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
				%cmp = icmp sle <1 x i16> %a, %b
				%val = select <1 x i1> %cmp, <1 x i16> %a, <1 x i16> %b
				store <1 x i16> %val, <1 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
				; VI: v_min_i16
				; VI: v_min_i16
				; VI: v_min_i16
				; VI: v_min_i16
				define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
				%cmp = icmp sle <4 x i16> %a, %b
				%val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
				store <4 x i16> %val, <4 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: @v_test_imin_slt_i16
				; VI: v_min_i16_e32
				define void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp slt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_imin_slt_i16
				; VI: s_min_i16
				define void @s_test_imin_slt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp slt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_slt_v2i16:
				; VI: s_min_i16
				; VI: s_min_i16
				define void @s_test_imin_slt_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
				%cmp = icmp slt <2 x i16> %a, %b
				%val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
				store <2 x i16> %val, <2 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i16:
				; VI: s_min_i16 {{s[0-9]+}}, {{s[0-9]+}}, 8
				define void @s_test_imin_slt_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind {
				%cmp = icmp slt i16 %a, 8
				%val = select i1 %cmp, i16 %a, i16 8
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i16:
				; VI: s_min_i16 {{s[0-9]+}}, {{s[0-9]+}}, 8
				define void @s_test_imin_sle_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind {
				%cmp = icmp sle i16 %a, 8
				%val = select i1 %cmp, i16 %a, i16 8
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umin_ule_i16
				; VI: v_min_u16_e32
				define void @v_test_umin_ule_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp ule i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umin_ule_v3i16
				; VI: v_min_u16_e32
				; VI: v_min_u16_e32
				; VI: v_min_u16_e32
				; VI-NOT: v_min_u16_e32
				; VI: s_endpgm
				define void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) nounwind {
				%cmp = icmp ule <3 x i16> %a, %b
				%val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				store <3 x i16> %val, <3 x i16> addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_umin_ule_i16
				; VI: s_min_u16
				define void @s_test_umin_ule_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp ule i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umin_ult_i16
				; VI: v_min_u16_e32
				define void @v_test_umin_ult_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp ult i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_umin_ult_i16
				; VI: s_min_u16
				define void @s_test_umin_ult_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp ult i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_umin_ult_v1i16
				; VI: s_min_u16
				define void @s_test_umin_ult_v1i16(<1 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
				%cmp = icmp ult <1 x i16> %a, %b
				%val = select <1 x i1> %cmp, <1 x i16> %a, <1 x i16> %b
				store <1 x i16> %val, <1 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
				%cmp = icmp ult <8 x i16> %a, %b
				%val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
				store <8 x i16> %val, <8 x i16> addrspace(1)* %out
				ret void
				}

test/CodeGen/AMDGPU/shl.i16.ll

This file was added.

				; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck -check-prefix=GCN -check-prefix=VI %s

				declare i32 @llvm.r600.read.tidig.x() #0

				;VI: {{^}}shl_v2i16:
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

				define void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
				%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 1
				%a = load <2 x i16>, <2 x i16> addrspace(1) * %in
				%b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr
				%result = shl <2 x i16> %a, %b
				store <2 x i16> %result, <2 x i16> addrspace(1)* %out
				ret void
				}

				;VI: {{^}}shl_v4i16:
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

				define void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
				%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 1
				%a = load <4 x i16>, <4 x i16> addrspace(1) * %in
				%b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
				%result = shl <4 x i16> %a, %b
				store <4 x i16> %result, <4 x i16> addrspace(1)* %out
				ret void
				}


				;VI: {{^}}shl_i16:
				;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}

				define void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
				%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
				%a = load i16, i16 addrspace(1) * %in
				%b = load i16, i16 addrspace(1) * %b_ptr
				%result = shl i16 %a, %b
				store i16 %result, i16 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}v_shl_i16_32_bit_constant:
				; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
				; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x12d687{{$}}
				; SI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0{{$}}
				; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]]
				define void @v_shl_i16_32_bit_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) {
				%a = load i16, i16 addrspace(1)* %aptr, align 8
				%shl = shl i16 1234567, %a
				store i16 %shl, i16 addrspace(1)* %out, align 8
				ret void
				}

				; FUNC-LABEL: {{^}}v_shl_inline_imm_8_i16:
				; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, 64, {{v[0-9]+}}
				define void @v_shl_inline_imm_64_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) {
				%a = load i16, i16 addrspace(1)* %aptr, align 8
				%shl = shl i16 8, %a
				store i16 %shl, i16 addrspace(1)* %out, align 8
				ret void
				}

				; FUNC-LABEL: {{^}}s_shl_inline_imm_1_i16:
				; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1, s{{[0-9]+}}
				define void @s_shl_inline_imm_1_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 %a) {
				%shl = shl i16 1, %a
				store i16 %shl, i16 addrspace(1)* %out, align 8
				ret void
				}

				attributes #0 = { nounwind readnone }

test/CodeGen/AMDGPU/shl.ll

Context not available.
	ret void	ret void
	}	}

		;EG-LABEL: {{^}}shl_i16:
		;EG: SUB_INT {{\? }}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
		;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]\|PV.[XYZW]}}
		;EG-DAG: ADD_INT {{\? }}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
		;EG-DAG: LSHR {{\? }}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]\|PV.[XYZW]}}, 1
		;EG-DAG: LSHL {{\? }}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
		;EG-DAG: OR_INT {{\? }}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]\|PV.[XYZW]\|PS}}, {{[[OVERF]]\|PV.[XYZW]}}
		;EG-DAG: LSHL {{\? }}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS\|[[SHIFT]]\|PV.[XYZW]}}
		;EG-DAG: SETGT_UINT {{\? }}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
		;EG-DAG: CNDE_INT {{\? }}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
		;EG-DAG: CNDE_INT {{\? }}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0

		;SI: {{^}}shl_i16:
		;SI: v_lshl_b16 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}

		;VI: {{^}}shl_i16:
		;VI: v_lshlrev_b16 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}

		define void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
		%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
		%a = load i16, i16 addrspace(1) * %in
		%b = load i16, i16 addrspace(1) * %b_ptr
		%result = shl i16 %a, %b
		store i16 %result, i16 addrspace(1)* %out
		ret void
		}

		;EG: {{^}}shl_v2i16:
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

		;SI: {{^}}shl_v2i16:
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		;VI: {{^}}shl_v2i16:
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		define void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
		%a = load <2 x i16>, <2 x i16> addrspace(1) * %in
		%b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr
		%result = shl <2 x i16> %a, %b
		store <2 x i16> %result, <2 x i16> addrspace(1)* %out
		ret void
		}

		;EG: {{^}}shl_v4i16:
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

		;SI: {{^}}shl_v4i16:
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		;VI: {{^}}shl_v4i16:
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		define void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
		%a = load <4 x i16>, <4 x i16> addrspace(1) * %in
		%b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
		%result = shl <4 x i16> %a, %b
		store <4 x i16> %result, <4 x i16> addrspace(1)* %out
		ret void
		}

	;EG-LABEL: {{^}}shl_i64:	;EG-LABEL: {{^}}shl_i64:
	;EG: SUB_INT {{\? }}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]	;EG: SUB_INT {{\? }}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
	;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]\|PV.[XYZW]}}	;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]\|PV.[XYZW]}}
Context not available.

test/CodeGen/AMDGPU/sign_extend.ll

	; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s \| FileCheck -check-prefix=SI %s	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI %s
	; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=SI %s	; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=SI %s

	; SI-LABEL: {{^}}s_sext_i1_to_i32:	; SI-LABEL: {{^}}s_sext_i1_to_i32:
Context not available.
	}	}

	; SI-LABEL: {{^}}s_sext_i16_to_i64:	; SI-LABEL: {{^}}s_sext_i16_to_i64:
	; SI: s_endpgm	; SI: s_bfe_i64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100000
	define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {	define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
	%sext = sext i16 %a to i64	%sext = sext i16 %a to i64
	store i64 %sext, i64 addrspace(1)* %out, align 8	store i64 %sext, i64 addrspace(1)* %out, align 8
	ret void	ret void
	}	}

		; SI-LABEL: {{^}}s_sext_i1_to_i16:
		; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1
		; SI-NEXT: buffer_store_short [[RESULT]]
		define void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
		%cmp = icmp eq i32 %a, %b
		%sext = sext i1 %cmp to i16
		store i16 %sext, i16 addrspace(1)* %out
		ret void
		}
Context not available.

test/CodeGen/AMDGPU/sra.i16.ll

This file was added.

				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s

				declare i32 @llvm.r600.read.tidig.x() #0

				; FUNC-LABEL: {{^}}ashr_v2i16:

				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

				define void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
				%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
				%a = load <2 x i16>, <2 x i16> addrspace(1)* %in
				%b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
				%result = ashr <2 x i16> %a, %b
				store <2 x i16> %result, <2 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}ashr_v4i16:

				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

				define void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
				%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
				%a = load <4 x i16>, <4 x i16> addrspace(1)* %in
				%b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
				%result = ashr <4 x i16> %a, %b
				store <4 x i16> %result, <4 x i16> addrspace(1)* %out
				ret void
				}


				; FUNC-LABEL: {{^}}ashr_i16_2:

				; VI: v_ashrrev_i16 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}

				define void @ashr_i16_2(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
				entry:
				%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
				%a = load i16, i16 addrspace(1)* %in
				%b = load i16, i16 addrspace(1)* %b_ptr
				%result = ashr i16 %a, %b
				store i16 %result, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_ashr_2_i16:
				; SI: buffer_load_dword v[[HI:[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
				; VI: flat_load_dword v[[HI:[0-9]+]]
				; GCN: v_ashrrev_i16_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]]
				; GCN: {{buffer\|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}}
				define void @v_ashr_32_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
				%tid = call i32 @llvm.r600.read.tidig.x() #0
				%gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
				%gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in
				%result = ashr i16 %a, 2
				store i16 %result, i16 addrspace(1)* %gep.out
				ret void
				}

				; GCN-LABEL: {{^}}s_ashr_4_i16:
				; GCN-DAG: s_load_dword s[[HI:[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc\|0x30}}
				; GCN: s_ashr_i16 s[[SHIFT:[0-9]+]], s[[HI]], 31
				; GCN: s_mov_b32 s[[COPYSHIFT:[0-9]+]], s[[SHIFT]]
				; GCN: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}}
				; GCN: s_addc_u32 {{s[0-9]+}}, s[[COPYSHIFT]], {{s[0-9]+}}
				define void @s_ashr_63_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) {
				%result = ashr i16 %a, 4
				%add = add i16 %result, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_ashr_8_i16:
				; VI: flat_load_dword v[[HI:[0-9]+]]
				define void @v_ashr_63_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
				%tid = call i32 @llvm.r600.read.tidig.x() #0
				%gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
				%gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in
				%result = ashr i16 %a, 8
				store i16 %result, i16 addrspace(1)* %gep.out
				ret void
				}

				attributes #0 = { nounwind readnone }

test/CodeGen/AMDGPU/sra.ll

Context not available.
	ret void	ret void
	}	}

		; FUNC-LABEL: {{^}}ashr_v2i16:
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		define void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
		%a = load <2 x i16>, <2 x i16> addrspace(1)* %in
		%b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
		%result = ashr <2 x i16> %a, %b
		store <2 x i16> %result, <2 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}ashr_v4i16:
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		define void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
		%a = load <4 x i16>, <4 x i16> addrspace(1)* %in
		%b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
		%result = ashr <4 x i16> %a, %b
		store <4 x i16> %result, <4 x i16> addrspace(1)* %out
		ret void
		}

	; FUNC-LABEL: {{^}}s_ashr_i64:	; FUNC-LABEL: {{^}}s_ashr_i64:
	; GCN: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8	; GCN: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8

Context not available.

test/CodeGen/AMDGPU/sub.i16.ll

This file was added.

				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=VI -check-prefix=GCN %s

				; GCN-LABEL: {{^}}v_test_sub_i16:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_constant:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], 0x7b, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_sub_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%sub = sub i16 %a, 123
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_neg_constant:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], 0xfffffcb3, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_sub_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%sub = sub i16 %a, -845
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_inline_neg1:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], -1, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_sub_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%sub = sub i16 %a, -1
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i32:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: buffer_store_dword [[ADD]]
				define void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				%ext = zext i16 %sub to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI-DAG: v_sub_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
				; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
				; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}
				define void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				%ext = zext i16 %sub to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i32:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: buffer_store_dword [[SEXT]]
				define void @v_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in0
				%b = load i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				%ext = sext i16 %sub to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i64:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @v_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in0
				%b = load i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				%ext = sext i16 %sub to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_sub_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #1 {
				%sub = sub i16 %a, %b
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_zeroext_args:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_sub_i16_zeroext_args(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%sub = sub i16 %a, %b
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_signext_args:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_sub_i16_signext_args(i16 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%sub = sub i16 %a, %b
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_zext_to_i32:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_dword [[RESULT]]
				define void @s_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%sub = sub i16 %a, %b
				%ext = zext i16 %sub to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_zext_to_i64:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI-DAG: v_sub_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]]
				; VI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @s_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%sub = sub i16 %a, %b
				%ext = zext i16 %sub to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_sext_to_i32:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: v_bfe_i32 [[RESULT:v[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: buffer_store_dword [[RESULT]]
				define void @s_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%sub = sub i16 %a, %b
				%ext = sext i16 %sub to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_sext_to_i64:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @s_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%sub = sub i16 %a, %b
				%ext = sext i16 %sub to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				declare i32 @llvm.amdgcn.workitem.id.x() #0

				attributes #0 = { nounwind readnone }
				attributes #1 = { nounwind }

test/CodeGen/AMDGPU/sub.ll

Context not available.
	ret void	ret void
	}	}

		; SI: v_subrev_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		define void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
		%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
		%a = load i16, i16 addrspace(1)* %in
		%b = load i16, i16 addrspace(1)* %b_ptr
		%result = sub i16 %a, %b
		store i16 %result, i16 addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}test_sub_v2i16:
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}

		define void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
		%a = load <2 x i16>, <2 x i16> addrspace(1) * %in
		%b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr
		%result = sub <2 x i16> %a, %b
		store <2 x i16> %result, <2 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}test_sub_v4i16:
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}

		define void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
		%a = load <4 x i16>, <4 x i16> addrspace(1) * %in
		%b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
		%result = sub <4 x i16> %a, %b
		store <4 x i16> %result, <4 x i16> addrspace(1)* %out
		ret void
		}

	; FUNC-LABEL: {{^}}s_sub_i64:	; FUNC-LABEL: {{^}}s_sub_i64:
	; SI: s_sub_u32	; SI: s_sub_u32
	; SI: s_subb_u32	; SI: s_subb_u32
Context not available.

test/CodeGen/AMDGPU/trunc-store-i1.ll

Context not available.
	ret void	ret void
	}	}

	; SI-LABEL: {{^}}global_truncstore_i16_to_i1:	; SI-LABEL: {{^}}s_arg_global_truncstore_i16_to_i1:
	; SI: s_load_dword [[LOAD:s[0-9]+]],	; SI: s_load_dword [[LOAD:s[0-9]+]],
	; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1	; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
	; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]	; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
	; SI: buffer_store_byte [[VREG]],	; SI: buffer_store_byte [[VREG]],
	define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {	define void @s_arg_global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {
	%trunc = trunc i16 %val to i1	%trunc = trunc i16 %val to i1
	store i1 %trunc, i1 addrspace(1)* %out, align 1	store i1 %trunc, i1 addrspace(1)* %out, align 1
	ret void	ret void
	}	}
		; SI-LABEL: {{^}}global_truncstore_i16_to_i1:
		define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val0, i16 %val1) nounwind {
		%add = add i16 %val0, %val1
		%trunc = trunc i16 %add to i1
		store i1 %trunc, i1 addrspace(1)* %out, align 1
		ret void
		}
Context not available.

test/CodeGen/AMDGPU/zero_extend.ll

Context not available.
	; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs \| FileCheck %s --check-prefix=SI	; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs \| FileCheck %s --check-prefix=SI
	; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck %s --check-prefix=SI	; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck %s --check-prefix=SI

	; R600: {{^}}test:	; R600: {{^}}s_mad_zext_i32_to_i64:
	; R600: MEM_RAT_CACHELESS STORE_RAW	; R600: MEM_RAT_CACHELESS STORE_RAW
	; R600: MEM_RAT_CACHELESS STORE_RAW	; R600: MEM_RAT_CACHELESS STORE_RAW

	; SI: {{^}}test:	; SI: {{^}}test:
	; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}}	; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}}
	; SI: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}	; SI: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}
	define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {	define void @s_mad_zext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) #0 {
	entry:	entry:
	%0 = mul i32 %a, %b	%tmp0 = mul i32 %a, %b
	%1 = add i32 %0, %c	%tmp1 = add i32 %tmp0, %c
	%2 = zext i32 %1 to i64	%tmp2 = zext i32 %tmp1 to i64
	store i64 %2, i64 addrspace(1)* %out	store i64 %tmp2, i64 addrspace(1)* %out
	ret void	ret void
	}	}

	; SI-LABEL: {{^}}testi1toi32:	; SI-LABEL: {{^}}s_cmp_zext_i1_to_i32
	; SI: v_cndmask_b32	; SI: v_cndmask_b32
	define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) {	define void @s_cmp_zext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
	entry:	entry:
	%0 = icmp eq i32 %a, %b	%tmp0 = icmp eq i32 %a, %b
	%1 = zext i1 %0 to i32	%tmp1 = zext i1 %tmp0 to i32
	store i32 %1, i32 addrspace(1)* %out	store i32 %tmp1, i32 addrspace(1)* %out
	ret void	ret void
	}	}

	; SI-LABEL: {{^}}zext_i1_to_i64:	; SI-LABEL: {{^}}s_arg_zext_i1_to_i64:
		define void @s_arg_zext_i1_to_i64(i64 addrspace(1)* %out, i1 zeroext %arg) #0 {
		%ext = zext i1 %arg to i64
		store i64 %ext, i64 addrspace(1)* %out, align 8
		ret void
		}

		; SI-LABEL: {{^}}s_cmp_zext_i1_to_i64:
	; SI: s_mov_b32 s{{[0-9]+}}, 0	; SI: s_mov_b32 s{{[0-9]+}}, 0
	; SI: v_cmp_eq_i32	; SI: v_cmp_eq_i32
	; SI: v_cndmask_b32	; SI: v_cndmask_b32
	define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {	define void @s_cmp_zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) #0 {
	%cmp = icmp eq i32 %a, %b	%cmp = icmp eq i32 %a, %b
	%ext = zext i1 %cmp to i64	%ext = zext i1 %cmp to i64
	store i64 %ext, i64 addrspace(1)* %out, align 8	store i64 %ext, i64 addrspace(1)* %out, align 8
	ret void	ret void
	}	}

		; SI-LABEL: {{^}}s_cmp_zext_i1_to_i16
		; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
		; SI-NEXT: buffer_store_short [[RESULT]]
		define void @s_test_zext_i16_to_i32(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #0 {
		%tmp0 = icmp eq i16 %a, %b
		%tmp1 = zext i1 %tmp0 to i16
		store i16 %tmp1, i16 addrspace(1)* %out
		ret void
		}

		attributes #0 = { nounwind }
Context not available.

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Make i16 a legal type for VI subtargets
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 51139

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SIRegisterInfo.td

lib/Target/AMDGPU/VIInstructions.td

test/CodeGen/AMDGPU/add.i16.ll

test/CodeGen/AMDGPU/anyext.ll

test/CodeGen/AMDGPU/global-extload-i8.ll

test/CodeGen/AMDGPU/max.i16.ll

test/CodeGen/AMDGPU/min_test.ll

test/CodeGen/AMDGPU/shl.i16.ll

test/CodeGen/AMDGPU/shl.ll

test/CodeGen/AMDGPU/sign_extend.ll

test/CodeGen/AMDGPU/sra.i16.ll

test/CodeGen/AMDGPU/sra.ll

test/CodeGen/AMDGPU/sub.i16.ll

test/CodeGen/AMDGPU/sub.ll

test/CodeGen/AMDGPU/trunc-store-i1.ll

test/CodeGen/AMDGPU/zero_extend.ll

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Make i16 a legal type for VI subtargetsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 51139

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SIRegisterInfo.td

lib/Target/AMDGPU/VIInstructions.td

test/CodeGen/AMDGPU/add.i16.ll

test/CodeGen/AMDGPU/anyext.ll

test/CodeGen/AMDGPU/global-extload-i8.ll

test/CodeGen/AMDGPU/max.i16.ll

test/CodeGen/AMDGPU/min_test.ll

test/CodeGen/AMDGPU/shl.i16.ll

test/CodeGen/AMDGPU/shl.ll

test/CodeGen/AMDGPU/sign_extend.ll

test/CodeGen/AMDGPU/sra.i16.ll

test/CodeGen/AMDGPU/sra.ll

test/CodeGen/AMDGPU/sub.i16.ll

test/CodeGen/AMDGPU/sub.ll

test/CodeGen/AMDGPU/trunc-store-i1.ll

test/CodeGen/AMDGPU/zero_extend.ll

AMDGPU/SI: Make i16 a legal type for VI subtargets
ClosedPublic