Diff 52514

lib/Target/AMDGPU/AMDGPUInstructions.td

Show First 20 Lines • Show All 540 Lines • ▼ Show 20 Lines	multiclass BFIPatterns <Instruction BFI_INT,
// z ^ (x & (y ^ z))		// z ^ (x & (y ^ z))
def : Pat <		def : Pat <
(xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),		(xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
(BFI_INT $x, $y, $z)		(BFI_INT $x, $y, $z)
>;		>;

def : Pat <		def : Pat <
(fcopysign f32:$src0, f32:$src1),		(fcopysign f32:$src0, f32:$src1),
(BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)		(BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
>;		>;

def : Pat <		def : Pat <
(f64 (fcopysign f64:$src0, f64:$src1)),		(f64 (fcopysign f64:$src0, f64:$src1)),
(REG_SEQUENCE RC64,		(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,		(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 0x7fffffff),		(BFI_INT (LoadImm32 (i32 0x7fffffff)),
(i32 (EXTRACT_SUBREG $src0, sub1)),		(i32 (EXTRACT_SUBREG $src0, sub1)),
(i32 (EXTRACT_SUBREG $src1, sub1))), sub1)		(i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
>;		>;
}		}

// SHA-256 Ma patterns		// SHA-256 Ma patterns

// ((x & z) \| (y & (x \| z))) -> BFI_INT (XOR x, y), z, y		// ((x & z) \| (y & (x \| z))) -> BFI_INT (XOR x, y), z, y
▲ Show 20 Lines • Show All 90 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIISelLowering.cpp

Show All 27 Lines
#include "llvm/ADT/BitVector.h"		#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/StringSwitch.h"		#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"		#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"		#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"		#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"		#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DiagnosticInfo.h"		#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
#include "llvm/ADT/SmallString.h"		#include "llvm/ADT/SmallString.h"
		kzhuravlUnsubmitted Done Reply Inline Actions Alphabetize kzhuravl: Alphabetize

using namespace llvm;		using namespace llvm;

SITargetLowering::SITargetLowering(TargetMachine &TM,		SITargetLowering::SITargetLowering(TargetMachine &TM,
const AMDGPUSubtarget &STI)		const AMDGPUSubtarget &STI)
: AMDGPUTargetLowering(TM, STI) {		: AMDGPUTargetLowering(TM, STI) {
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);		addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);		addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
Show All 12 Lines	SITargetLowering::SITargetLowering(TargetMachine &TM,
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);		addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);

addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);		addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);		addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);

addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);		addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);		addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);

		// TODO: Subtarget feature for i16
		arsenmUnsubmitted Not Done Reply Inline Actions I did this already, so this should check Subtarget->has16BitInsts() arsenm: I did this already, so this should check Subtarget->has16BitInsts()
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Do we still need this comment? tstellarAMD: Do we still need this comment?
		kzhuravlUnsubmitted Done Reply Inline Actions I do not think we need this comment kzhuravl: I do not think we need this comment
		if (Subtarget->has16BitInsts())
		kzhuravlUnsubmitted Done Reply Inline Actions Subtarget->has16BitInsts() kzhuravl: Subtarget->has16BitInsts()
		addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);

computeRegisterProperties(STI.getRegisterInfo());		computeRegisterProperties(STI.getRegisterInfo());

setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);		setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);		setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);		setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);		setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);

setOperationAction(ISD::ADD, MVT::i32, Legal);		setOperationAction(ISD::ADD, MVT::i32, Legal);
▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines	if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FCEIL, MVT::f64, Legal);		setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);		setOperationAction(ISD::FRINT, MVT::f64, Legal);
}		}

setOperationAction(ISD::FFLOOR, MVT::f64, Legal);		setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FDIV, MVT::f32, Custom);		setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);		setOperationAction(ISD::FDIV, MVT::f64, Custom);

		if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
		kzhuravlUnsubmitted Done Reply Inline Actions Subtarget->has16BitInsts() kzhuravl: Subtarget->has16BitInsts()
		setOperationAction(ISD::Constant, MVT::i16, Legal);

		setOperationAction(ISD::SMIN, MVT::i16, Legal);
		setOperationAction(ISD::SMAX, MVT::i16, Legal);
		tstellarAMDAuthorUnsubmitted Not Done Reply Inline Actions Are these really necessary? I thought making a type legal marked these operations legal by default. tstellarAMD: Are these really necessary? I thought making a type legal marked these operations legal by…
		setOperationAction(ISD::UMIN, MVT::i16, Legal);
		setOperationAction(ISD::UMAX, MVT::i16, Legal);

		setOperationAction(ISD::SETCC, MVT::i16, Legal);
		setOperationAction(ISD::TRUNCATE, MVT::i16, Legal);

		setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote);
		AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32);
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Same with these too. Are they really necessary? tstellarAMD: Same with these too. Are they really necessary?
		arsenmUnsubmitted Done Reply Inline Actions min/max need to be explicitly made legal, but that should be a separate patch. setcc should be promote for now until those are added later arsenm: min/max need to be explicitly made legal, but that should be a separate patch. setcc should be…

		setOperationAction(ISD::AND, MVT::i16, Promote);
		setOperationAction(ISD::OR, MVT::i16, Promote);
		setOperationAction(ISD::XOR, MVT::i16, Promote);

		setOperationAction(ISD::ROTR, MVT::i16, Promote);
		setOperationAction(ISD::ROTL, MVT::i16, Promote);

		setOperationAction(ISD::SDIV, MVT::i16, Promote);
		setOperationAction(ISD::UDIV, MVT::i16, Promote);
		setOperationAction(ISD::SREM, MVT::i16, Promote);
		setOperationAction(ISD::UREM, MVT::i16, Promote);
		setOperationAction(ISD::MUL, MVT::i16, Promote);

		setOperationAction(ISD::BSWAP, MVT::i16, Promote);
		setOperationAction(ISD::CTTZ, MVT::i16, Promote);
		setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
		setOperationAction(ISD::CTLZ, MVT::i16, Promote);
		setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);

		setOperationAction(ISD::SELECT, MVT::i16, Legal);
		setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);

		setOperationAction(ISD::BR_CC, MVT::i16, Expand);

		setOperationAction(ISD::LOAD, MVT::i16, Custom);
		setOperationAction(ISD::STORE, MVT::i16, Custom);

		setLoadExtAction(ISD::SEXTLOAD, MVT::i32, MVT::i16, Legal);
		setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, MVT::i16, Legal);
		setLoadExtAction(ISD::EXTLOAD, MVT::i32, MVT::i16, Legal);

		kzhuravlUnsubmitted Done Reply Inline Actions Detabify kzhuravl: Detabify
		setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i16, Expand);
		setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, MVT::i16, Expand);
		kzhuravlUnsubmitted Done Reply Inline Actions Detabify kzhuravl: Detabify
		setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i16, Expand);

		kzhuravlUnsubmitted Done Reply Inline Actions Detabify kzhuravl: Detabify
		setTruncStoreAction(MVT::i64, MVT::i16, Expand);
		}
		kzhuravlUnsubmitted Done Reply Inline Actions Detabify kzhuravl: Detabify

		kzhuravlUnsubmitted Done Reply Inline Actions Remove extra new line kzhuravl: Remove extra new line
setTargetDAGCombine(ISD::FADD);		setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);		setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);		setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);		setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SMIN);		setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::SMAX);		setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMIN);		setTargetDAGCombine(ISD::UMIN);
setTargetDAGCombine(ISD::UMAX);		setTargetDAGCombine(ISD::UMAX);
▲ Show 20 Lines • Show All 995 Lines • ▼ Show 20 Lines	SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,

if (Intr->getOpcode() != ISD::INTRINSIC_W_CHAIN) {		if (Intr->getOpcode() != ISD::INTRINSIC_W_CHAIN) {
// This is a uniform branch so we don't need to legalize.		// This is a uniform branch so we don't need to legalize.
return BRCOND;		return BRCOND;
}		}

assert(!SetCC \|\|		assert(!SetCC \|\|
(SetCC->getConstantOperandVal(1) == 1 &&		(SetCC->getConstantOperandVal(1) == 1 &&
isCFIntrinsic(Intr) &&		isCFIntrinsic(Intr) &&
		arsenmUnsubmitted Done Reply Inline Actions Why is this part of the patch? This looks unrelated arsenm: Why is this part of the patch? This looks unrelated
cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==		cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
ISD::SETNE));		ISD::SETNE));

// Build the result and		// Build the result and
ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());		ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());

// operands of the new intrinsic call		// operands of the new intrinsic call
SmallVector<SDValue, 4> Ops;		SmallVector<SDValue, 4> Ops;
▲ Show 20 Lines • Show All 396 Lines • ▼ Show 20 Lines

SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {		SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);		SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);		LoadSDNode *Load = cast<LoadSDNode>(Op);
ISD::LoadExtType ExtType = Load->getExtensionType();		ISD::LoadExtType ExtType = Load->getExtensionType();
EVT MemVT = Load->getMemoryVT();		EVT MemVT = Load->getMemoryVT();

if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {		if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
assert(MemVT == MVT::i1 && "Only i1 non-extloads expected");		//assert(MemVT == MVT::i1 && "Only i1 non-extloads expected");
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Does this need to be removed? tstellarAMD: Does this need to be removed?
// FIXME: Copied from PPC		// FIXME: Copied from PPC
// First, load into 32 bits, then truncate to 1 bit.		// First, load into 32 bits, then truncate to 1 bit.

SDValue Chain = Load->getChain();		SDValue Chain = Load->getChain();
SDValue BasePtr = Load->getBasePtr();		SDValue BasePtr = Load->getBasePtr();
MachineMemOperand *MMO = Load->getMemOperand();		MachineMemOperand *MMO = Load->getMemOperand();

		EVT mem_vt = (MemVT == MVT::i1) ? MVT::i8 : MVT::i16;
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions Coding style. Variable names should start with a captial. tstellarAMD: Coding style. Variable names should start with a captial.

SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,		SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
BasePtr, MVT::i8, MMO);		BasePtr, mem_vt, MMO);

SDValue Ops[] = {		SDValue Ops[] = {
DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),		DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
NewLD.getValue(1)		NewLD.getValue(1)
};		};
		tstellarAMDAuthorUnsubmitted Not Done Reply Inline Actions Why does i16 needs special handling here. These seems to be nearly identical to the block of code directly below. tstellarAMD: Why does i16 needs special handling here. These seems to be nearly identical to the block of…

return DAG.getMergeValues(Ops, DL);		return DAG.getMergeValues(Ops, DL);
}		}

if (!MemVT.isVector())		if (!MemVT.isVector())
return SDValue();		return SDValue();

assert(Op.getValueType().getVectorElementType() == MVT::i32 &&		assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
▲ Show 20 Lines • Show All 232 Lines • ▼ Show 20 Lines	SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Unexpected type for fdiv");		llvm_unreachable("Unexpected type for fdiv");
}		}

SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {		SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);		SDLoc DL(Op);
StoreSDNode *Store = cast<StoreSDNode>(Op);		StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT VT = Store->getMemoryVT();		EVT VT = Store->getMemoryVT();

		if (VT == MVT::i16) {
		SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Store->getValue());

		return DAG.getTruncStore(Store->getChain(), DL,
		Ext,
		Store->getBasePtr(),
		MVT::i16,
		Store->getMemOperand());
		}

if (VT == MVT::i1) {		if (VT == MVT::i1) {
return DAG.getTruncStore(Store->getChain(), DL,		return DAG.getTruncStore(Store->getChain(), DL,
DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),		DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
Store->getBasePtr(), MVT::i1, Store->getMemOperand());		Store->getBasePtr(), MVT::i1, Store->getMemOperand());
}		}

assert(Store->getValue().getValueType().getScalarType() == MVT::i32);		assert(Store->getValue().getValueType().getScalarType() == MVT::i32);

unsigned NumElements = VT.getVectorNumElements();		unsigned NumElements = VT.getVectorNumElements();
switch (Store->getAddressSpace()) {		switch (Store->getAddressSpace()) {
case AMDGPUAS::GLOBAL_ADDRESS:		case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::FLAT_ADDRESS:		case AMDGPUAS::FLAT_ADDRESS:
if (NumElements > 4)		if (NumElements > 4)
return SplitVectorStore(Op, DAG);		return SplitVectorStore(Op, DAG);
return SDValue();		return SDValue();
case AMDGPUAS::PRIVATE_ADDRESS: {		case AMDGPUAS::PRIVATE_ADDRESS: {
switch (Subtarget->getMaxPrivateElementSize()) {		switch (Subtarget->getMaxPrivateElementSize()) {
		tstellarAMDAuthorUnsubmitted Done Reply Inline Actions We do we need to custom lower i16 stores? Can't we just mark then as promote? tstellarAMD: We do we need to custom lower i16 stores? Can't we just mark then as promote?
		arsenmUnsubmitted Done Reply Inline Actions Load/store promote expects an equal size type for a bitcast promote. This is the same problem that i1 has, so it should follow that example arsenm: Load/store promote expects an equal size type for a bitcast promote. This is the same problem…
case 4:		case 4:
return ScalarizeVectorStore(Op, DAG);		return ScalarizeVectorStore(Op, DAG);
case 8:		case 8:
if (NumElements > 2)		if (NumElements > 2)
return SplitVectorStore(Op, DAG);		return SplitVectorStore(Op, DAG);
return SDValue();		return SDValue();
case 16:		case 16:
if (NumElements > 4)		if (NumElements > 4)
▲ Show 20 Lines • Show All 355 Lines • ▼ Show 20 Lines	if (Signed) {
if (K0->getAPIntValue().uge(K1->getAPIntValue()))		if (K0->getAPIntValue().uge(K1->getAPIntValue()))
return SDValue();		return SDValue();
}		}

EVT VT = K0->getValueType(0);		EVT VT = K0->getValueType(0);
return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,		return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,
Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));		Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
}		}

static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {		static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
		arsenmUnsubmitted Done Reply Inline Actions Define on same line arsenm: Define on same line
if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())		if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
return true;		return true;

return DAG.isKnownNeverNaN(Op);		return DAG.isKnownNeverNaN(Op);
}		}

static SDValue performFPMed3ImmCombine(SelectionDAG &DAG,		static SDValue performFPMed3ImmCombine(SelectionDAG &DAG,
SDLoc SL,		SDLoc SL,
▲ Show 20 Lines • Show All 671 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIInstructions.td

Show First 20 Lines • Show All 2,083 Lines • ▼ Show 20 Lines
} // End isCodeGenOnly, isPseudo		} // End isCodeGenOnly, isPseudo

} // End SubtargetPredicate = isGCN		} // End SubtargetPredicate = isGCN

let Predicates = [isGCN] in {		let Predicates = [isGCN] in {

def : Pat <		def : Pat <
(int_AMDGPU_kilp),		(int_AMDGPU_kilp),
(SI_KILL 0xbf800000)		(SI_KILL (i32 0xbf800000))
>;		>;

/* int_SI_vs_load_input */		/* int_SI_vs_load_input */
def : Pat<		def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),		(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)		(BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, v4i32:$tlst, (i32 0), imm:$attr_offset, 0, 0, 0)
>;		>;

def : Pat <		def : Pat <
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,		(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
f32:$src0, f32:$src1, f32:$src2, f32:$src3),		f32:$src0, f32:$src1, f32:$src2, f32:$src3),
(EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,		(EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
$src0, $src1, $src2, $src3)		$src0, $src1, $src2, $src3)
>;		>;
▲ Show 20 Lines • Show All 260 Lines • ▼ Show 20 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// SOP1 Patterns		// SOP1 Patterns
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def : Pat <		def : Pat <
(i64 (ctpop i64:$src)),		(i64 (ctpop i64:$src)),
(i64 (REG_SEQUENCE SReg_64,		(i64 (REG_SEQUENCE SReg_64,
(i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0,		(i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0,
(S_MOV_B32 0), sub1))		(S_MOV_B32 (i32 0)), sub1))
>;		>;

def : Pat <		def : Pat <
(i32 (smax i32:$x, (i32 (ineg i32:$x)))),		(i32 (smax i32:$x, (i32 (ineg i32:$x)))),
(S_ABS_I32 $x)		(S_ABS_I32 $x)
>;		>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
▲ Show 20 Lines • Show All 451 Lines • ▼ Show 20 Lines

/******** =================== ********/		/******** =================== ********/
/******** Src & Dst modifiers ********/		/******** Src & Dst modifiers ********/
/******** =================== ********/		/******** =================== ********/

def : Pat <		def : Pat <
(AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),		(AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
(f32 FP_ZERO), (f32 FP_ONE)),		(f32 FP_ZERO), (f32 FP_ONE)),
(V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod)		(V_ADD_F32_e64 $src0_modifiers, $src0, 0, (i32 0), 1, $omod)
>;		>;

/******** ================================ ********/		/******** ================================ ********/
/******** Floating point absolute/negative ********/		/******** Floating point absolute/negative ********/
/******** ================================ ********/		/******** ================================ ********/

// Prevent expanding both fneg and fabs.		// Prevent expanding both fneg and fabs.

def : Pat <		def : Pat <
(fneg (fabs f32:$src)),		(fneg (fabs f32:$src)),
(S_OR_B32 $src, 0x80000000) // Set sign bit		(S_OR_B32 $src, (i32 0x80000000)) // Set sign bit
>;		>;

// FIXME: Should use S_OR_B32		// FIXME: Should use S_OR_B32
def : Pat <		def : Pat <
(fneg (fabs f64:$src)),		(fneg (fabs f64:$src)),
(REG_SEQUENCE VReg_64,		(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG f64:$src, sub0)),		(i32 (EXTRACT_SUBREG f64:$src, sub0)),
sub0,		sub0,
(V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),		(V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
(V_MOV_B32_e32 0x80000000)), // Set sign bit.		(V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
sub1)		sub1)
>;		>;

def : Pat <		def : Pat <
(fabs f32:$src),		(fabs f32:$src),
(V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff))		(V_AND_B32_e32 $src, (V_MOV_B32_e32 (i32 0x7fffffff)))
>;		>;

def : Pat <		def : Pat <
(fneg f32:$src),		(fneg f32:$src),
(V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000))		(V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000)))
>;		>;

def : Pat <		def : Pat <
(fabs f64:$src),		(fabs f64:$src),
(REG_SEQUENCE VReg_64,		(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG f64:$src, sub0)),		(i32 (EXTRACT_SUBREG f64:$src, sub0)),
sub0,		sub0,
(V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),		(V_AND_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
(V_MOV_B32_e32 0x7fffffff)), // Set sign bit.		(V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
sub1)		sub1)
>;		>;

def : Pat <		def : Pat <
(fneg f64:$src),		(fneg f64:$src),
(REG_SEQUENCE VReg_64,		(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG f64:$src, sub0)),		(i32 (EXTRACT_SUBREG f64:$src, sub0)),
sub0,		sub0,
(V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),		(V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
(V_MOV_B32_e32 0x80000000)),		(i32 (V_MOV_B32_e32 (i32 0x80000000)))),
sub1)		sub1)
>;		>;

/******** ================== ********/		/******** ================== ********/
/******** Immediate Patterns ********/		/******** Immediate Patterns ********/
/******** ================== ********/		/******** ================== ********/

def : Pat <		def : Pat <
Show All 38 Lines
/******** Intrinsic Patterns ********/		/******** Intrinsic Patterns ********/
/******** ================== ********/		/******** ================== ********/

def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;		def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;

def : Pat <		def : Pat <
(int_AMDGPU_cube v4f32:$src),		(int_AMDGPU_cube v4f32:$src),
(REG_SEQUENCE VReg_128,		(REG_SEQUENCE VReg_128,
(V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),		(V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1),		0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)),
0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2),		0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)),
0 /* clamp /, 0 / omod */), sub0,		0 /* clamp /, 0 / omod */), sub0,
(V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),		(V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),		0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2),		0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
0 /* clamp /, 0 / omod */), sub1,		0 /* clamp /, 0 / omod */), sub1,
(V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),		(V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),		0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),		0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
0 /* clamp /, 0 / omod */), sub2,		0 /* clamp /, 0 / omod */), sub2,
(V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),		(V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),		0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),		0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
0 /* clamp /, 0 / omod */), sub3)		0 /* clamp /, 0 / omod */), sub3)
>;		>;

def : Pat <		def : Pat <
(i32 (sext i1:$src0)),		(i32 (sext i1:$src0)),
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)		(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
>;		>;

class Ext32Pat <SDNode ext> : Pat <		class Ext32Pat <SDNode ext> : Pat <
(i32 (ext i1:$src0)),		(i32 (ext i1:$src0)),
(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)		(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
>;		>;

def : Ext32Pat <zext>;		def : Ext32Pat <zext>;
def : Ext32Pat <anyext>;		def : Ext32Pat <anyext>;

// Offset in an 32-bit VGPR		// Offset in an 32-bit VGPR
def : Pat <		def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),		(SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)		(BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, (i32 0), 0, 0, 0, 0)
>;		>;

// The multiplication scales from [0,1] to the unsigned integer range		// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <		def : Pat <
(AMDGPUurecip i32:$src0),		(AMDGPUurecip i32:$src0),
(V_CVT_U32_F32_e32		(V_CVT_U32_F32_e32
(V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,		(V_MUL_F32_e32 (i32 CONST.FP_UINT_MAX_PLUS_1),
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))		(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
>;		>;

def : Pat <		def : Pat <
(int_SI_tid),		(int_SI_tid),
(V_MBCNT_HI_U32_B32_e64 0xffffffff,		(V_MBCNT_HI_U32_B32_e64 (i32 0xffffffff),
(V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))		(V_MBCNT_LO_U32_B32_e64 (i32 0xffffffff), (i32 0)))
>;		>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// VOP3 Patterns		// VOP3 Patterns
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def : IMad24Pat<V_MAD_I32_I24>;		def : IMad24Pat<V_MAD_I32_I24>;
def : UMad24Pat<V_MAD_U32_U24>;		def : UMad24Pat<V_MAD_U32_U24>;
Show All 40 Lines
let AddedComplexity = 100 in {		let AddedComplexity = 100 in {

def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;		def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
} // End AddedComplexity = 100		} // End AddedComplexity = 100

def : Pat <		def : Pat <
(si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,		(si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1)),		i8:$offset1)),
(DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),		(DS_WRITE2_B32 $ptr, (i32 (EXTRACT_SUBREG $value, sub0)),
(EXTRACT_SUBREG $value, sub1), $offset0, $offset1,		(i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1,
(i1 0))		(i1 0))
>;		>;

class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <		class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),		(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
(inst $ptr, $value, (as_i16imm $offset), (i1 0))		(inst $ptr, $value, (as_i16imm $offset), (i1 0))
>;		>;

▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines
defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;		defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;		defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Conversion Patterns		// Conversion Patterns
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def : Pat<(i32 (sext_inreg i32:$src, i1)),		def : Pat<(i32 (sext_inreg i32:$src, i1)),
(S_BFE_I32 i32:$src, 65536)>; // 0 \| 1 << 16		(S_BFE_I32 i32:$src, (i32 65536))>; // 0 \| 1 << 16

// Handle sext_inreg in i64		// Handle sext_inreg in i64
def : Pat <		def : Pat <
(i64 (sext_inreg i64:$src, i1)),		(i64 (sext_inreg i64:$src, i1)),
(S_BFE_I64 i64:$src, 0x10000) // 0 \| 1 << 16		(S_BFE_I64 i64:$src, (i32 0x10000)) // 0 \| 1 << 16
>;		>;

def : Pat <		def : Pat <
(i64 (sext_inreg i64:$src, i8)),		(i64 (sext_inreg i64:$src, i8)),
(S_BFE_I64 i64:$src, 0x80000) // 0 \| 8 << 16		(S_BFE_I64 i64:$src, (i32 0x80000)) // 0 \| 8 << 16
>;		>;

def : Pat <		def : Pat <
(i64 (sext_inreg i64:$src, i16)),		(i64 (sext_inreg i64:$src, i16)),
(S_BFE_I64 i64:$src, 0x100000) // 0 \| 16 << 16		(S_BFE_I64 i64:$src, (i32 0x100000)) // 0 \| 16 << 16
>;		>;

def : Pat <		def : Pat <
(i64 (sext_inreg i64:$src, i32)),		(i64 (sext_inreg i64:$src, i32)),
(S_BFE_I64 i64:$src, 0x200000) // 0 \| 32 << 16		(S_BFE_I64 i64:$src, (i32 0x200000)) // 0 \| 32 << 16
>;		>;

class ZExt_i64_i32_Pat <SDNode ext> : Pat <		class ZExt_i64_i32_Pat <SDNode ext> : Pat <
(i64 (ext i32:$src)),		(i64 (ext i32:$src)),
(REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1)		(REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1)
>;		>;

class ZExt_i64_i1_Pat <SDNode ext> : Pat <		class ZExt_i64_i1_Pat <SDNode ext> : Pat <
(i64 (ext i1:$src)),		(i64 (ext i1:$src)),
(REG_SEQUENCE VReg_64,		(REG_SEQUENCE VReg_64,
(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,		(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
(S_MOV_B32 0), sub1)		(S_MOV_B32 (i32 0)), sub1)
>;		>;


def : ZExt_i64_i32_Pat<zext>;		def : ZExt_i64_i32_Pat<zext>;
def : ZExt_i64_i32_Pat<anyext>;		def : ZExt_i64_i32_Pat<anyext>;
def : ZExt_i64_i1_Pat<zext>;		def : ZExt_i64_i1_Pat<zext>;
def : ZExt_i64_i1_Pat<anyext>;		def : ZExt_i64_i1_Pat<anyext>;

// FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that		// FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
// REG_SEQUENCE patterns don't support instructions with multiple outputs.		// REG_SEQUENCE patterns don't support instructions with multiple outputs.
def : Pat <		def : Pat <
(i64 (sext i32:$src)),		(i64 (sext i32:$src)),
(REG_SEQUENCE SReg_64, $src, sub0,		(REG_SEQUENCE SReg_64, $src, sub0,
(i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SGPR_32)), sub1)		(i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SGPR_32)), sub1)
>;		>;

def : Pat <		def : Pat <
(i64 (sext i1:$src)),		(i64 (sext i1:$src)),
(REG_SEQUENCE VReg_64,		(REG_SEQUENCE VReg_64,
(V_CNDMASK_B32_e64 0, -1, $src), sub0,		(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0,
(V_CNDMASK_B32_e64 0, -1, $src), sub1)		(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1)
>;		>;

// If we need to perform a logical operation on i1 values, we need to		// If we need to perform a logical operation on i1 values, we need to
// use vector comparisons since there is only one SCC register. Vector		// use vector comparisons since there is only one SCC register. Vector
// comparisions still write to a pair of SGPRs, so treat these as		// comparisions still write to a pair of SGPRs, so treat these as
// 64-bit comparisons. When legalizing SGPR copies, instructions		// 64-bit comparisons. When legalizing SGPR copies, instructions
// resulting in the copies from SCC to these instructions will be		// resulting in the copies from SCC to these instructions will be
// moved to the VALU.		// moved to the VALU.
Show All 9 Lines

def : Pat <		def : Pat <
(i1 (xor i1:$src0, i1:$src1)),		(i1 (xor i1:$src0, i1:$src1)),
(S_XOR_B64 $src0, $src1)		(S_XOR_B64 $src0, $src1)
>;		>;

def : Pat <		def : Pat <
(f32 (sint_to_fp i1:$src)),		(f32 (sint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src)		(V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
>;		>;

def : Pat <		def : Pat <
(f32 (uint_to_fp i1:$src)),		(f32 (uint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src)		(V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)
>;		>;

def : Pat <		def : Pat <
(f64 (sint_to_fp i1:$src)),		(f64 (sint_to_fp i1:$src)),
(V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))		(V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
>;		>;

def : Pat <		def : Pat <
(f64 (uint_to_fp i1:$src)),		(f64 (uint_to_fp i1:$src)),
(V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))		(V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
>;		>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Miscellaneous Patterns		// Miscellaneous Patterns
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def : Pat <		def : Pat <
(i32 (trunc i64:$a)),		(i32 (trunc i64:$a)),
(EXTRACT_SUBREG $a, sub0)		(EXTRACT_SUBREG $a, sub0)
>;		>;

def : Pat <		def : Pat <
(i1 (trunc i32:$a)),		(i1 (trunc i32:$a)),
(V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1)		(V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
>;		>;

def : Pat <		def : Pat <
(i1 (trunc i64:$a)),		(i1 (trunc i64:$a)),
(V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),		(V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),
(EXTRACT_SUBREG $a, sub0)), 1)		(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
>;		>;

def : Pat <		def : Pat <
(i32 (bswap i32:$a)),		(i32 (bswap i32:$a)),
(V_BFI_B32 (S_MOV_B32 0x00ff00ff),		(V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
(V_ALIGNBIT_B32 $a, $a, 24),		(V_ALIGNBIT_B32 $a, $a, (i32 24)),
(V_ALIGNBIT_B32 $a, $a, 8))		(V_ALIGNBIT_B32 $a, $a, (i32 8)))
>;		>;

def : Pat <		def : Pat <
(f32 (select i1:$src2, f32:$src1, f32:$src0)),		(f32 (select i1:$src2, f32:$src1, f32:$src0)),
(V_CNDMASK_B32_e64 $src0, $src1, $src2)		(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;		>;

multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {		multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
def : Pat <		def : Pat <
(vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),		(vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
(BFM $a, $b)		(BFM $a, $b)
>;		>;

def : Pat <		def : Pat <
(vt (add (vt (shl 1, vt:$a)), -1)),		(vt (add (vt (shl 1, vt:$a)), -1)),
(BFM $a, (MOV 0))		(BFM $a, (MOV (i32 0)))
>;		>;
}		}

defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;		defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;		// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;

def : BFEPattern <V_BFE_U32, S_MOV_B32>;		def : BFEPattern <V_BFE_U32, S_MOV_B32>;

Show All 23 Lines	def : Pat <
(V_CNDMASK_B64_PSEUDO		(V_CNDMASK_B64_PSEUDO
(V_MIN_F64		(V_MIN_F64
SRCMODS.NONE,		SRCMODS.NONE,
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),		(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
SRCMODS.NONE,		SRCMODS.NONE,
(V_MOV_B64_PSEUDO 0x3fefffffffffffff),		(V_MOV_B64_PSEUDO 0x3fefffffffffffff),
DSTCLAMP.NONE, DSTOMOD.NONE),		DSTCLAMP.NONE, DSTOMOD.NONE),
$x,		$x,
(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/NaN/))		(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3/NaN/)))
>;		>;

// Convert floor(x) to (x - fract(x))		// Convert floor(x) to (x - fract(x))
def : Pat <		def : Pat <
(f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),		(f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
(V_ADD_F64		(V_ADD_F64
$mods,		$mods,
$x,		$x,
SRCMODS.NEG,		SRCMODS.NEG,
(V_CNDMASK_B64_PSEUDO		(V_CNDMASK_B64_PSEUDO
(V_MIN_F64		(V_MIN_F64
SRCMODS.NONE,		SRCMODS.NONE,
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),		(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
SRCMODS.NONE,		SRCMODS.NONE,
(V_MOV_B64_PSEUDO 0x3fefffffffffffff),		(V_MOV_B64_PSEUDO 0x3fefffffffffffff),
DSTCLAMP.NONE, DSTOMOD.NONE),		DSTCLAMP.NONE, DSTOMOD.NONE),
$x,		$x,
(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/NaN/)),		(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /NaN/))),
DSTCLAMP.NONE, DSTOMOD.NONE)		DSTCLAMP.NONE, DSTOMOD.NONE)
>;		>;

} // End Predicates = [isSI]		} // End Predicates = [isSI]

//============================================================================//		//============================================================================//
// Miscellaneous Optimization Patterns		// Miscellaneous Optimization Patterns
//============================================================================//		//============================================================================//
Show All 15 Lines

lib/Target/AMDGPU/SIRegisterInfo.td

Show First 20 Lines • Show All 83 Lines • ▼ Show 20 Lines
def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {		def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
let CopyCost = -1;		let CopyCost = -1;
let isAllocatable = 0;		let isAllocatable = 0;
}		}

// TODO: Do we need to set DwarfRegAlias on register tuples?		// TODO: Do we need to set DwarfRegAlias on register tuples?

// SGPR 32-bit registers		// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,		def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16], 32,
(add (sequence "SGPR%u", 0, 103))>;		(add (sequence "SGPR%u", 0, 103))>;

// SGPR 64-bit registers		// SGPR 64-bit registers
def SGPR_64Regs : RegisterTuples<[sub0, sub1],		def SGPR_64Regs : RegisterTuples<[sub0, sub1],
[(add (decimate SGPR_32, 2)),		[(add (decimate SGPR_32, 2)),
(add (decimate (shl SGPR_32, 1), 2))]>;		(add (decimate (shl SGPR_32, 1), 2))]>;

// SGPR 128-bit registers		// SGPR 128-bit registers
Show All 30 Lines	def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
(add (decimate (shl SGPR_32, 10), 4)),		(add (decimate (shl SGPR_32, 10), 4)),
(add (decimate (shl SGPR_32, 11), 4)),		(add (decimate (shl SGPR_32, 11), 4)),
(add (decimate (shl SGPR_32, 12), 4)),		(add (decimate (shl SGPR_32, 12), 4)),
(add (decimate (shl SGPR_32, 13), 4)),		(add (decimate (shl SGPR_32, 13), 4)),
(add (decimate (shl SGPR_32, 14), 4)),		(add (decimate (shl SGPR_32, 14), 4)),
(add (decimate (shl SGPR_32, 15), 4))]>;		(add (decimate (shl SGPR_32, 15), 4))]>;

// VGPR 32-bit registers		// VGPR 32-bit registers
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,		// i16 only on VI+
		def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16], 32,
(add (sequence "VGPR%u", 0, 255))>;		(add (sequence "VGPR%u", 0, 255))>;

// VGPR 64-bit registers		// VGPR 64-bit registers
def VGPR_64 : RegisterTuples<[sub0, sub1],		def VGPR_64 : RegisterTuples<[sub0, sub1],
[(add (trunc VGPR_32, 255)),		[(add (trunc VGPR_32, 255)),
(add (shl VGPR_32, 1))]>;		(add (shl VGPR_32, 1))]>;

// VGPR 96-bit registers		// VGPR 96-bit registers
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

class RegImmMatcher<string name> : AsmOperandClass {		class RegImmMatcher<string name> : AsmOperandClass {
let Name = name;		let Name = name;
let RenderMethod = "addRegOrImmOperands";		let RenderMethod = "addRegOrImmOperands";
}		}

// Register class for all scalar registers (SGPRs + Special Registers)		// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,		def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16], 32,
(add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)		(add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>;		>;

def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;		def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;

def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,		def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
(add SGPR_64, VCC, EXEC, FLAT_SCR)		(add SGPR_64, VCC, EXEC, FLAT_SCR)
>;		>;
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines
def SCSrc_32 : RegInlineOperand<SReg_32> {		def SCSrc_32 : RegInlineOperand<SReg_32> {
let ParserMatchClass = RegImmMatcher<"SCSrc32">;		let ParserMatchClass = RegImmMatcher<"SCSrc32">;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate		// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;		def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16], 32, (add VGPR_32, SReg_32)>;

def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {		def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
let CopyCost = 2;		let CopyCost = 2;
}		}

def VSrc_32 : RegisterOperand<VS_32> {		def VSrc_32 : RegisterOperand<VS_32> {
let OperandNamespace = "AMDGPU";		let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_REG_IMM32";		let OperandType = "OPERAND_REG_IMM32";
Show All 34 Lines

lib/Target/AMDGPU/VIInstructions.td

	Show First 20 Lines • Show All 117 Lines • ▼ Show 20 Lines

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// DPP Patterns			// DPP Patterns
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def : Pat <			def : Pat <
	(int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,			(int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
	imm:$bound_ctrl),			imm:$bound_ctrl),
	(V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),			(i32 (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
	(as_i32imm $bank_mask), (as_i1imm $bound_ctrl))			(as_i32imm $bank_mask), (as_i1imm $bound_ctrl)))
	>;			>;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// Misc Patterns			// Misc Patterns
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def : Pat <			def : Pat <
	(i64 (readcyclecounter)),			(i64 (readcyclecounter)),
				arsenmUnsubmitted Not Done Reply Inline Actions This pattern is necessary. I believe I had a test for this in my original patch arsenm: This pattern is necessary. I believe I had a test for this in my original patch
	(S_MEMREALTIME)			(S_MEMREALTIME)
	>;			>;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// DS_PERMUTE/DS_BPERMUTE Instructions.			// DS_PERMUTE/DS_BPERMUTE Instructions.
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	let Uses = [EXEC] in {			let Uses = [EXEC] in {
	defm DS_PERMUTE_B32 : DS_1A1D_PERMUTE <0x3e, "ds_permute_b32", VGPR_32,			defm DS_PERMUTE_B32 : DS_1A1D_PERMUTE <0x3e, "ds_permute_b32", VGPR_32,
	int_amdgcn_ds_permute>;			int_amdgcn_ds_permute>;
	defm DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <0x3f, "ds_bpermute_b32", VGPR_32,			defm DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <0x3f, "ds_bpermute_b32", VGPR_32,
	int_amdgcn_ds_bpermute>;			int_amdgcn_ds_bpermute>;
	}			}
				arsenmUnsubmitted Not Done Reply Inline Actions Should follow camel case naming convention arsenm: Should follow camel case naming convention

				//===----------------------------------------------------------------------===//
				arsenmUnsubmitted Not Done Reply Inline Actions No spaces around the :s arsenm: No spaces around the :s
				// i16 Patterns
				//===----------------------------------------------------------------------===//
				arsenmUnsubmitted Not Done Reply Inline Actions Dead code should be removed arsenm: Dead code should be removed

				def : Pat <
				arsenmUnsubmitted Not Done Reply Inline Actions Should be indented like other Pats in the file arsenm: Should be indented like other Pats in the file
				(i16 imm:$imm),
				(S_MOV_B32 imm:$imm)
				>;

				def : Pat<
				arsenmUnsubmitted Not Done Reply Inline Actions This is incorrect if this is a scalar zext, which currently doesn't happen because there are no scalar i16 instructions (although we may want pseudos for these). To be consistent, this should use S_MOV_B32 to materialize the 0 arsenm: This is incorrect if this is a scalar zext, which currently doesn't happen because there are no…
				(i32 (anyext i16:$src)),
				(COPY $src)
				>;

				// FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
				// REG_SEQUENCE patterns don't support instructions with multiple
				// outputs.
				def : Pat<
				(i64 (zext i16:$src)),
				(REG_SEQUENCE SReg_64,
				(i32 (COPY_TO_REGCLASS (S_AND_B32 $src, (i32 0xffff)), SGPR_32)), sub0,
				(S_MOV_B32 (i32 0)), sub1)
				>;

				def : Pat <
				(i64 (sext i16:$src)),
				(REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0,
				(i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (i32 31)), SGPR_32)), sub1)
				>;

				// Same as a 32-bit inreg
				def : Pat<
				(i32 (sext i16:$src)),
				(S_SEXT_I32_I16 $src)
				>;

				def : Pat<
				(i16 (trunc i32:$src)),
				(COPY $src)
				>;

				arsenmUnsubmitted Not Done Reply Inline Actions These should be using the signed min/max. I also think the min/max matching should be a separate patch arsenm: These should be using the signed min/max. I also think the min/max matching should be a…
				class ZExt_i16_i1_Pat <SDNode ext> : Pat <
				(i16 (ext i1:$src)),
				(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)
				>;

				arsenmUnsubmitted Not Done Reply Inline Actions These should be done in a separate patch arsenm: These should be done in a separate patch
				def : ZExt_i16_i1_Pat<zext>;
				def : ZExt_i16_i1_Pat<anyext>;

				def : Pat <
				(i16 (select i1:$src0, i16:$src1, i16:$src2)),
				(V_CNDMASK_B32_e64 $src2, $src1, $src0)
				>;

				// Note: 16-bit instructions produce a 0 result in the high 16-bits.
				multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> {

				def : Pat<
				(op i16:$src0, i16:$src1),
				(inst i16:$src0, i16:$src1)
				>;

				def : Pat<
				(i32 (zext (op i16:$src0, i16:$src1))),
				(inst i16:$src0, i16:$src1)
				>;

				arsenmUnsubmitted Not Done Reply Inline Actions Dead code should be removed. These instruction's dont exist. However, tests should be added to the rotl/rotr/bswap/ctlz/cttz to make sure these are properly expanded when i16 is added as legal since most operations by default are assumed to be legal if the type is arsenm: Dead code should be removed. These instruction's dont exist. However, tests should be added to…
				def : Pat<
				(i64 (zext (op i16:$src0, i16:$src1))),
				(REG_SEQUENCE VReg_64,
				(inst i16:$src0, i16:$src1), sub0,
				(S_MOV_B32 (i32 0)), sub1)
				>;
				}

				multiclass Bits_Ops_i16_Pats <SDPatternOperator op, Instruction inst> {

				def : Pat<
				(op i16:$src0, i32:$src1),
				(inst i16:$src0, i32:$src1)
				>;

				def : Pat<
				(i32 (zext (op i16:$src0, i32:$src1))),
				(inst i16:$src0, i32:$src1)
				>;


				def : Pat<
				(i64 (zext (op i16:$src0, i32:$src1))),
				(REG_SEQUENCE VReg_64,
				(inst i16:$src0, i32:$src1), sub0,
				(S_MOV_B32 (i32 0)), sub1)
				>;
				}

				defm : Arithmetic_i16_Pats<add, V_ADD_U16_e32>;
				defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e32>;
				defm : Arithmetic_i16_Pats<smin, V_MIN_U16_e32>;
				defm : Arithmetic_i16_Pats<smax, V_MAX_U16_e32>;

				defm : Bits_Ops_i16_Pats<shl, V_LSHLREV_B16_e32>;
				defm : Bits_Ops_i16_Pats<srl, V_LSHRREV_B16_e32>;
				defm : Bits_Ops_i16_Pats<sra, V_ASHRREV_B16_e32>;
	} // End Predicates = [isVI]			} // End Predicates = [isVI]

test/CodeGen/AMDGPU/add.i16.ll

This file was added.

				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=VI -check-prefix=GCN %s

				; GCN-LABEL: {{^}}v_test_add_i16:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_constant:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0x7b, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_add_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%add = add i16 %a, 123
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_neg_constant:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xfffffcb3, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_add_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%add = add i16 %a, -845
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_inline_neg1:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], -1, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_add_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%add = add i16 %a, -1
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i32:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: buffer_store_dword [[ADD]]
				define void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				%ext = zext i16 %add to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i64:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
				; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
				; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}
				define void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				%ext = zext i16 %add to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i32:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: buffer_store_dword [[SEXT]]
				define void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in0
				%b = load i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				%ext = sext i16 %add to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i64:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @v_test_add_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in0
				%b = load i16, i16 addrspace(1)* %gep.in1
				%add = add i16 %a, %b
				%ext = sext i16 %add to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_add_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #1 {
				%add = add i16 %a, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_zeroext_args:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_add_i16_zeroext_args(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%add = add i16 %a, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_signext_args:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_add_i16_signext_args(i16 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%add = add i16 %a, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_zext_to_i32:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_dword [[RESULT]]
				define void @s_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%add = add i16 %a, %b
				%ext = zext i16 %add to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_zext_to_i64:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI-DAG: v_add_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]]
				; VI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @s_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%add = add i16 %a, %b
				%ext = zext i16 %add to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_sext_to_i32:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: v_bfe_i32 [[RESULT:v[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: buffer_store_dword [[RESULT]]
				define void @s_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%add = add i16 %a, %b
				%ext = sext i16 %add to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_add_i16_sext_to_i64:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_add_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @s_test_add_i16_sext_to_i64(i64 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%add = add i16 %a, %b
				%ext = sext i16 %add to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				declare i32 @llvm.amdgcn.workitem.id.x() #0

				attributes #0 = { nounwind readnone }
				attributes #1 = { nounwind }

test/CodeGen/AMDGPU/anyext.ll

	; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs \| FileCheck %s			; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s
	; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck %s			; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s

	; CHECK-LABEL: {{^}}anyext_i1_i32:			; GCN-LABEL: {{^}}anyext_i1_i32:
	; CHECK: v_cndmask_b32_e64			; GCN: v_cndmask_b32_e64
	define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {			define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
	entry:			entry:
	%0 = icmp eq i32 %cond, 0			%tmp = icmp eq i32 %cond, 0
	%1 = zext i1 %0 to i8			%tmp1 = zext i1 %tmp to i8
	%2 = xor i8 %1, -1			%tmp2 = xor i8 %tmp1, -1
	%3 = and i8 %2, 1			%tmp3 = and i8 %tmp2, 1
	%4 = zext i8 %3 to i32			%tmp4 = zext i8 %tmp3 to i32
	store i32 %4, i32 addrspace(1)* %out			store i32 %tmp4, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_anyext_i16_i32:
				; VI: v_add_u16_e32 [[ADD:v[0-9]+]],
				; VI: v_not_b32_e32 [[NOT:v[0-9]+]], [[ADD]]
				; VI: v_and_b32_e32 [[AND:v[0-9]+]], 1, [[NOT]]
				; VI: buffer_store_dword [[AND]]
				define void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 %a, i16 %b) {
				entry:
				%tmp = add i16 %a, %b
				%tmp1 = trunc i16 %tmp to i8
				%tmp2 = xor i8 %tmp1, -1
				%tmp3 = and i8 %tmp2, 1
				%tmp4 = zext i8 %tmp3 to i32
				store i32 %tmp4, i32 addrspace(1)* %out
	ret void			ret void
	}			}

test/CodeGen/AMDGPU/global-extload-i8.ll

	Show First 20 Lines • Show All 144 Lines • ▼ Show 20 Lines
	; XSI: s_endpgm			; XSI: s_endpgm
	; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {			; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
	; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in			; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
	; %ext = sext <64 x i8> %load to <64 x i32>			; %ext = sext <64 x i8> %load to <64 x i32>
	; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out			; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
	; ret void			; ret void
	; }			; }

				; FUNC-LABEL: {{^}}zextload_global_i8_to_i16:
				; SI: buffer_load_ubyte
				; SI: buffer_store_dword
				; SI: s_endpgm
				define void @zextload_global_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
				%a = load i8, i8 addrspace(1)* %in
				%ext = zext i8 %a to i16
				store i16 %ext, i16 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}sextload_global_i8_to_i16:
				; SI: buffer_load_sbyte
				; SI: buffer_store_dword
				; SI: s_endpgm
				define void @sextload_global_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
				%a = load i8, i8 addrspace(1)* %in
				%ext = sext i8 %a to i16
				store i16 %ext, i16 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i16:
				; SI: s_endpgm
				define void @zextload_global_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
				%ext = zext <1 x i8> %load to <1 x i16>
				store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i16:
				; SI: s_endpgm
				define void @sextload_global_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <1 x i8>, <1 x i8> addrspace(1)* %in
				%ext = sext <1 x i8> %load to <1 x i16>
				store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i16:
				; SI: s_endpgm
				define void @zextload_global_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
				%ext = zext <2 x i8> %load to <2 x i16>
				store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i16:
				; SI: s_endpgm
				define void @sextload_global_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <2 x i8>, <2 x i8> addrspace(1)* %in
				%ext = sext <2 x i8> %load to <2 x i16>
				store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i16:
				; SI: s_endpgm
				define void @zextload_global_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
				%ext = zext <4 x i8> %load to <4 x i16>
				store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i16:
				; SI: s_endpgm
				define void @sextload_global_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <4 x i8>, <4 x i8> addrspace(1)* %in
				%ext = sext <4 x i8> %load to <4 x i16>
				store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i16:
				; SI: s_endpgm
				define void @zextload_global_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
				%ext = zext <8 x i8> %load to <8 x i16>
				store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i16:
				; SI: s_endpgm
				define void @sextload_global_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <8 x i8>, <8 x i8> addrspace(1)* %in
				%ext = sext <8 x i8> %load to <8 x i16>
				store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i16:
				; SI: s_endpgm
				define void @zextload_global_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
				%ext = zext <16 x i8> %load to <16 x i16>
				store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i16:
				; SI: s_endpgm
				define void @sextload_global_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
				%load = load <16 x i8>, <16 x i8> addrspace(1)* %in
				%ext = sext <16 x i8> %load to <16 x i16>
				store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
				ret void
				}

	; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:			; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
	; SI: buffer_load_ubyte v[[LO:[0-9]+]],			; SI: buffer_load_ubyte v[[LO:[0-9]+]],
	; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}			; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
	; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]			; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
	define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {			define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
	%a = load i8, i8 addrspace(1)* %in			%a = load i8, i8 addrspace(1)* %in
	%ext = zext i8 %a to i64			%ext = zext i8 %a to i64
	store i64 %ext, i64 addrspace(1)* %out			store i64 %ext, i64 addrspace(1)* %out
	▲ Show 20 Lines • Show All 139 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/max.i16.ll

This file was added.

				; RUN: llc -march=amdgcn < %s \| FileCheck -check-prefix=VI -check-prefix=FUNC %s

				declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

				; FUNC-LABEL: {{^}}v_test_imax_sge_i16:
				; VI: v_max_i16_e32
				define void @v_test_imax_sge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp sge i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}v_test_imax_sge_v4i16:
				; VI: v_max_i16_e32
				; VI: v_max_i16_e32
				; VI: v_max_i16_e32
				; VI: v_max_i16_e32
				define void @v_test_imax_sge_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %aptr, <4 x i16> addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
				%a = load <4 x i16>, <4 x i16> addrspace(1)* %gep0, align 4
				%b = load <4 x i16>, <4 x i16> addrspace(1)* %gep1, align 4
				%cmp = icmp sge <4 x i16> %a, %b
				%val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
				store <4 x i16> %val, <4 x i16> addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_imax_sge_i16
				; VI: s_max_i16
				define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp sge i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i16:
				; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9
				define void @s_test_imax_sge_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind {
				%cmp = icmp sge i16 %a, 9
				%val = select i1 %cmp, i16 %a, i16 9
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i16:
				; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9
				define void @s_test_imax_sgt_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind {
				%cmp = icmp sgt i16 %a, 9
				%val = select i1 %cmp, i16 %a, i16 9
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i16:
				; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9
				; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9
				define void @s_test_imax_sgt_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a) nounwind {
				%cmp = icmp sgt <2 x i16> %a, <i16 9, i16 9>
				%val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> <i16 9, i16 9>
				store <2 x i16> %val, <2 x i16> addrspace(1)* %out, align 4
				ret void
				}
				; FUNC-LABEL: @v_test_imax_sgt_i16
				; VI: v_max_i16_e32
				define void @v_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp sgt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_imax_sgt_i16
				; VI: s_max_i16
				define void @s_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp sgt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umax_uge_i16
				; VI: v_max_u32_e32
				define void @v_test_umax_uge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp uge i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_umax_uge_i16
				; VI: s_max_u32
				define void @s_test_umax_uge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp uge i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_umax_uge_v3i16:
				; VI: s_max_u32
				; VI: s_max_u32
				; VI: s_max_u32
				; VI-NOT: s_max_u32
				; VI: s_endpgm
				define void @s_test_umax_uge_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) nounwind {
				%cmp = icmp uge <3 x i16> %a, %b
				%val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				store <3 x i16> %val, <3 x i16> addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umax_ugt_i16
				; VI: v_max_u32_e32
				define void @v_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp ugt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_umax_ugt_i16:
				; VI: s_max_u32
				define void @s_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp ugt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i16:
				; VI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15
				; VI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23
				define void @s_test_umax_ugt_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a) nounwind {
				%cmp = icmp ugt <2 x i16> %a, <i16 15, i16 23>
				%val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> <i16 15, i16 23>
				store <2 x i16> %val, <2 x i16> addrspace(1)* %out, align 4
				ret void
				}

test/CodeGen/AMDGPU/min_test.ll

This file was added.

				; RUN: llc -march=amdgcn < %s \| FileCheck -check-prefix=VI -check-prefix=FUNC %s

				declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone

				; FUNC-LABEL: {{^}}v_test_imin_sle_i16:
				; VI: v_min_i16_e32
				define void @v_test_imin_sle_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp sle i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
				; VI: s_min_i16
				define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp sle i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_sle_v1i16:
				; VI: s_min_i16
				define void @s_test_imin_sle_v1i16(<1 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
				%cmp = icmp sle <1 x i16> %a, %b
				%val = select <1 x i1> %cmp, <1 x i16> %a, <1 x i16> %b
				store <1 x i16> %val, <1 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
				; VI: v_min_i16
				; VI: v_min_i16
				; VI: v_min_i16
				; VI: v_min_i16
				define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
				%cmp = icmp sle <4 x i16> %a, %b
				%val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
				store <4 x i16> %val, <4 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: @v_test_imin_slt_i16
				; VI: v_min_i16_e32
				define void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp slt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_imin_slt_i16
				; VI: s_min_i16
				define void @s_test_imin_slt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp slt i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_slt_v2i16:
				; VI: s_min_i16
				; VI: s_min_i16
				define void @s_test_imin_slt_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
				%cmp = icmp slt <2 x i16> %a, %b
				%val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
				store <2 x i16> %val, <2 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i16:
				; VI: s_min_i16 {{s[0-9]+}}, {{s[0-9]+}}, 8
				define void @s_test_imin_slt_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind {
				%cmp = icmp slt i16 %a, 8
				%val = select i1 %cmp, i16 %a, i16 8
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i16:
				; VI: s_min_i16 {{s[0-9]+}}, {{s[0-9]+}}, 8
				define void @s_test_imin_sle_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind {
				%cmp = icmp sle i16 %a, 8
				%val = select i1 %cmp, i16 %a, i16 8
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umin_ule_i16
				; VI: v_min_u16_e32
				define void @v_test_umin_ule_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp ule i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umin_ule_v3i16
				; VI: v_min_u16_e32
				; VI: v_min_u16_e32
				; VI: v_min_u16_e32
				; VI-NOT: v_min_u16_e32
				; VI: s_endpgm
				define void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) nounwind {
				%cmp = icmp ule <3 x i16> %a, %b
				%val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				store <3 x i16> %val, <3 x i16> addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_umin_ule_i16
				; VI: s_min_u16
				define void @s_test_umin_ule_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp ule i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @v_test_umin_ult_i16
				; VI: v_min_u16_e32
				define void @v_test_umin_ult_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
				%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				%gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
				%gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
				%outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep0, align 4
				%b = load i16, i16 addrspace(1)* %gep1, align 4
				%cmp = icmp ult i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %outgep, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_umin_ult_i16
				; VI: s_min_u16
				define void @s_test_umin_ult_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
				%cmp = icmp ult i16 %a, %b
				%val = select i1 %cmp, i16 %a, i16 %b
				store i16 %val, i16 addrspace(1)* %out, align 4
				ret void
				}

				; FUNC-LABEL: @s_test_umin_ult_v1i16
				; VI: s_min_u16
				define void @s_test_umin_ult_v1i16(<1 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
				%cmp = icmp ult <1 x i16> %a, %b
				%val = select <1 x i1> %cmp, <1 x i16> %a, <1 x i16> %b
				store <1 x i16> %val, <1 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				; VI: s_min_u16
				define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
				%cmp = icmp ult <8 x i16> %a, %b
				%val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
				store <8 x i16> %val, <8 x i16> addrspace(1)* %out
				ret void
				}

test/CodeGen/AMDGPU/shl.i16.ll

This file was added.

				; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck -check-prefix=GCN -check-prefix=VI %s

				declare i32 @llvm.r600.read.tidig.x() #0

				;VI: {{^}}shl_v2i16:
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

				define void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
				%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 1
				%a = load <2 x i16>, <2 x i16> addrspace(1) * %in
				%b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr
				%result = shl <2 x i16> %a, %b
				store <2 x i16> %result, <2 x i16> addrspace(1)* %out
				ret void
				}

				;VI: {{^}}shl_v4i16:
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

				define void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
				%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 1
				%a = load <4 x i16>, <4 x i16> addrspace(1) * %in
				%b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
				%result = shl <4 x i16> %a, %b
				store <4 x i16> %result, <4 x i16> addrspace(1)* %out
				ret void
				}


				;VI: {{^}}shl_i16:
				;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}

				define void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
				%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
				%a = load i16, i16 addrspace(1) * %in
				%b = load i16, i16 addrspace(1) * %b_ptr
				%result = shl i16 %a, %b
				store i16 %result, i16 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}v_shl_i16_32_bit_constant:
				; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
				; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x12d687{{$}}
				; SI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0{{$}}
				; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]]
				define void @v_shl_i16_32_bit_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) {
				%a = load i16, i16 addrspace(1)* %aptr, align 8
				%shl = shl i16 1234567, %a
				store i16 %shl, i16 addrspace(1)* %out, align 8
				ret void
				}

				; FUNC-LABEL: {{^}}v_shl_inline_imm_8_i16:
				; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, 64, {{v[0-9]+}}
				define void @v_shl_inline_imm_64_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) {
				%a = load i16, i16 addrspace(1)* %aptr, align 8
				%shl = shl i16 8, %a
				store i16 %shl, i16 addrspace(1)* %out, align 8
				ret void
				}

				; FUNC-LABEL: {{^}}s_shl_inline_imm_1_i16:
				; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1, s{{[0-9]+}}
				define void @s_shl_inline_imm_1_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 %a) {
				%shl = shl i16 1, %a
				store i16 %shl, i16 addrspace(1)* %out, align 8
				ret void
				}

				attributes #0 = { nounwind readnone }

test/CodeGen/AMDGPU/shl.ll

Show First 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1		%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1) * %in		%a = load <4 x i32>, <4 x i32> addrspace(1) * %in
%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr		%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = shl <4 x i32> %a, %b		%result = shl <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out		store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void		ret void
}		}

		;EG-LABEL: {{^}}shl_i16:
		;EG: SUB_INT {{\? }}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
		;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]\|PV.[XYZW]}}
		;EG-DAG: ADD_INT {{\? }}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
		;EG-DAG: LSHR {{\? }}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]\|PV.[XYZW]}}, 1
		;EG-DAG: LSHL {{\? }}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
		;EG-DAG: OR_INT {{\? }}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]\|PV.[XYZW]\|PS}}, {{[[OVERF]]\|PV.[XYZW]}}
		;EG-DAG: LSHL {{\? }}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS\|[[SHIFT]]\|PV.[XYZW]}}
		;EG-DAG: SETGT_UINT {{\? }}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
		;EG-DAG: CNDE_INT {{\? }}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
		;EG-DAG: CNDE_INT {{\? }}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0

		;SI: {{^}}shl_i16:
		;SI: v_lshl_b16 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}

		;VI: {{^}}shl_i16:
		;VI: v_lshlrev_b16 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}

		define void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
		%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
		%a = load i16, i16 addrspace(1) * %in
		%b = load i16, i16 addrspace(1) * %b_ptr
		%result = shl i16 %a, %b
		store i16 %result, i16 addrspace(1)* %out
		ret void
		}

		;EG: {{^}}shl_v2i16:
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

		;SI: {{^}}shl_v2i16:
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		;VI: {{^}}shl_v2i16:
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		define void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
		%a = load <2 x i16>, <2 x i16> addrspace(1) * %in
		%b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr
		%result = shl <2 x i16> %a, %b
		store <2 x i16> %result, <2 x i16> addrspace(1)* %out
		ret void
		}

		;EG: {{^}}shl_v4i16:
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		;EG: LSHL {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

		;SI: {{^}}shl_v4i16:
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		;VI: {{^}}shl_v4i16:
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		define void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
		%a = load <4 x i16>, <4 x i16> addrspace(1) * %in
		%b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
		%result = shl <4 x i16> %a, %b
		store <4 x i16> %result, <4 x i16> addrspace(1)* %out
		ret void
		}

;EG-LABEL: {{^}}shl_i64:		;EG-LABEL: {{^}}shl_i64:
;EG: SUB_INT {{\? }}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]		;EG: SUB_INT {{\? }}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]\|PV.[XYZW]}}		;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]\|PV.[XYZW]}}
;EG-DAG: ADD_INT {{\? }}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal		;EG-DAG: ADD_INT {{\? }}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
;EG-DAG: LSHR {{\? }}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]\|PV.[XYZW]}}, 1		;EG-DAG: LSHR {{\? }}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]\|PV.[XYZW]}}, 1
;EG-DAG: LSHL {{\? }}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]		;EG-DAG: LSHL {{\? }}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
;EG-DAG: OR_INT {{\? }}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]\|PV.[XYZW]\|PS}}, {{[[OVERF]]\|PV.[XYZW]}}		;EG-DAG: OR_INT {{\? }}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]\|PV.[XYZW]\|PS}}, {{[[OVERF]]\|PV.[XYZW]}}
;EG-DAG: LSHL {{\? }}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS\|[[SHIFT]]\|PV.[XYZW]}}		;EG-DAG: LSHL {{\? }}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS\|[[SHIFT]]\|PV.[XYZW]}}
▲ Show 20 Lines • Show All 317 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/sign_extend.ll

	; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s \| FileCheck -check-prefix=SI %s			; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI %s
	; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=SI %s			; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=SI %s

	; SI-LABEL: {{^}}s_sext_i1_to_i32:			; SI-LABEL: {{^}}s_sext_i1_to_i32:
	; SI: v_cndmask_b32_e64			; SI: v_cndmask_b32_e64
	; SI: s_endpgm			; SI: s_endpgm
	define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {			define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
	%cmp = icmp eq i32 %a, %b			%cmp = icmp eq i32 %a, %b
	%sext = sext i1 %cmp to i32			%sext = sext i1 %cmp to i32
	Show All 40 Lines
	define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {			define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
	%val = load i32, i32 addrspace(1)* %in, align 4			%val = load i32, i32 addrspace(1)* %in, align 4
	%sext = sext i32 %val to i64			%sext = sext i32 %val to i64
	store i64 %sext, i64 addrspace(1)* %out, align 8			store i64 %sext, i64 addrspace(1)* %out, align 8
	ret void			ret void
	}			}

	; SI-LABEL: {{^}}s_sext_i16_to_i64:			; SI-LABEL: {{^}}s_sext_i16_to_i64:
	; SI: s_endpgm			; SI: s_bfe_i64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100000
	define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {			define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
	%sext = sext i16 %a to i64			%sext = sext i16 %a to i64
	store i64 %sext, i64 addrspace(1)* %out, align 8			store i64 %sext, i64 addrspace(1)* %out, align 8
	ret void			ret void
	}			}

				; SI-LABEL: {{^}}s_sext_i1_to_i16:
				; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1
				; SI-NEXT: buffer_store_short [[RESULT]]
				define void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
				%cmp = icmp eq i32 %a, %b
				%sext = sext i1 %cmp to i16
				store i16 %sext, i16 addrspace(1)* %out
				ret void
				}

test/CodeGen/AMDGPU/sra.i16.ll

This file was added.

				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s

				declare i32 @llvm.r600.read.tidig.x() #0

				; FUNC-LABEL: {{^}}ashr_v2i16:

				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

				define void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
				%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
				%a = load <2 x i16>, <2 x i16> addrspace(1)* %in
				%b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
				%result = ashr <2 x i16> %a, %b
				store <2 x i16> %result, <2 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}ashr_v4i16:

				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
				; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

				define void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
				%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
				%a = load <4 x i16>, <4 x i16> addrspace(1)* %in
				%b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
				%result = ashr <4 x i16> %a, %b
				store <4 x i16> %result, <4 x i16> addrspace(1)* %out
				ret void
				}


				; FUNC-LABEL: {{^}}ashr_i16_2:

				; VI: v_ashrrev_i16 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}

				define void @ashr_i16_2(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
				entry:
				%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
				%a = load i16, i16 addrspace(1)* %in
				%b = load i16, i16 addrspace(1)* %b_ptr
				%result = ashr i16 %a, %b
				store i16 %result, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_ashr_2_i16:
				; SI: buffer_load_dword v[[HI:[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
				; VI: flat_load_dword v[[HI:[0-9]+]]
				; GCN: v_ashrrev_i16_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]]
				; GCN: {{buffer\|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}}
				define void @v_ashr_32_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
				%tid = call i32 @llvm.r600.read.tidig.x() #0
				%gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
				%gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in
				%result = ashr i16 %a, 2
				store i16 %result, i16 addrspace(1)* %gep.out
				ret void
				}

				; GCN-LABEL: {{^}}s_ashr_4_i16:
				; GCN-DAG: s_load_dword s[[HI:[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc\|0x30}}
				; GCN: s_ashr_i16 s[[SHIFT:[0-9]+]], s[[HI]], 31
				; GCN: s_mov_b32 s[[COPYSHIFT:[0-9]+]], s[[SHIFT]]
				; GCN: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}}
				; GCN: s_addc_u32 {{s[0-9]+}}, s[[COPYSHIFT]], {{s[0-9]+}}
				define void @s_ashr_63_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) {
				%result = ashr i16 %a, 4
				%add = add i16 %result, %b
				store i16 %add, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_ashr_8_i16:
				; VI: flat_load_dword v[[HI:[0-9]+]]
				define void @v_ashr_63_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
				%tid = call i32 @llvm.r600.read.tidig.x() #0
				%gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
				%gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in
				%result = ashr i16 %a, 8
				store i16 %result, i16 addrspace(1)* %gep.out
				ret void
				}

				attributes #0 = { nounwind readnone }

test/CodeGen/AMDGPU/sra.ll

Show All 40 Lines	define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1		%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in		%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
%b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr		%b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
%result = ashr <4 x i32> %a, %b		%result = ashr <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out		store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void		ret void
}		}

		; FUNC-LABEL: {{^}}ashr_v2i16:
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		define void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
		%a = load <2 x i16>, <2 x i16> addrspace(1)* %in
		%b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
		%result = ashr <2 x i16> %a, %b
		store <2 x i16> %result, <2 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}ashr_v4i16:
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; SI: v_ashr_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
		; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}

		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: ASHR {{\? }}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		define void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
		%a = load <4 x i16>, <4 x i16> addrspace(1)* %in
		%b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
		%result = ashr <4 x i16> %a, %b
		store <4 x i16> %result, <4 x i16> addrspace(1)* %out
		ret void
		}

; FUNC-LABEL: {{^}}s_ashr_i64:		; FUNC-LABEL: {{^}}s_ashr_i64:
; GCN: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8		; GCN: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8

; EG: ASHR		; EG: ASHR
define void @s_ashr_i64(i64 addrspace(1)* %out, i32 %in) {		define void @s_ashr_i64(i64 addrspace(1)* %out, i32 %in) {
entry:		entry:
%in.ext = sext i32 %in to i64		%in.ext = sext i32 %in to i64
%ashr = ashr i64 %in.ext, 8		%ashr = ashr i64 %in.ext, 8
▲ Show 20 Lines • Show All 203 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/sub.i16.ll

This file was added.

				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=VI -check-prefix=GCN %s

				; GCN-LABEL: {{^}}v_test_sub_i16:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_constant:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], 0x7b, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_sub_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%sub = sub i16 %a, 123
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_neg_constant:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], 0xfffffcb3, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_sub_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%sub = sub i16 %a, -845
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_inline_neg1:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], -1, [[A]]
				; VI-NEXT: buffer_store_short [[ADD]]
				define void @v_test_sub_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%sub = sub i16 %a, -1
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i32:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: buffer_store_dword [[ADD]]
				define void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				%ext = zext i16 %sub to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI-DAG: v_sub_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
				; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
				; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}
				define void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load volatile i16, i16 addrspace(1)* %gep.in0
				%b = load volatile i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				%ext = zext i16 %sub to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i32:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: buffer_store_dword [[SEXT]]
				define void @v_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in0
				%b = load i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				%ext = sext i16 %sub to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i64:
				; VI: flat_load_ushort [[A:v[0-9]+]]
				; VI: flat_load_ushort [[B:v[0-9]+]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
				; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @v_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
				%tid = call i32 @llvm.amdgcn.workitem.id.x()
				%gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid
				%gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid
				%gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid
				%a = load i16, i16 addrspace(1)* %gep.in0
				%b = load i16, i16 addrspace(1)* %gep.in1
				%sub = sub i16 %a, %b
				%ext = sext i16 %sub to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_sub_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #1 {
				%sub = sub i16 %a, %b
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_zeroext_args:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_sub_i16_zeroext_args(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%sub = sub i16 %a, %b
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_signext_args:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_sub_i16_signext_args(i16 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%sub = sub i16 %a, %b
				store i16 %sub, i16 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_zext_to_i32:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: buffer_store_dword [[RESULT]]
				define void @s_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%sub = sub i16 %a, %b
				%ext = zext i16 %sub to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_zext_to_i64:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI-DAG: v_sub_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]]
				; VI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @s_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 {
				%sub = sub i16 %a, %b
				%ext = zext i16 %sub to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_sext_to_i32:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: v_bfe_i32 [[RESULT:v[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: buffer_store_dword [[RESULT]]
				define void @s_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%sub = sub i16 %a, %b
				%ext = sext i16 %sub to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}s_test_sub_i16_sext_to_i64:
				; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c
				; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30
				; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
				; VI: v_sub_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]]
				; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
				; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
				; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
				define void @s_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 {
				%sub = sub i16 %a, %b
				%ext = sext i16 %sub to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				declare i32 @llvm.amdgcn.workitem.id.x() #0

				attributes #0 = { nounwind readnone }
				attributes #1 = { nounwind }

test/CodeGen/AMDGPU/sub.ll

Show First 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1		%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1) * %in		%a = load <4 x i32>, <4 x i32> addrspace(1) * %in
%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr		%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = sub <4 x i32> %a, %b		%result = sub <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out		store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void		ret void
}		}

		; SI: v_subrev_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		define void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
		%b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
		%a = load i16, i16 addrspace(1)* %in
		%b = load i16, i16 addrspace(1)* %b_ptr
		%result = sub i16 %a, %b
		store i16 %result, i16 addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}test_sub_v2i16:
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}

		define void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
		%a = load <2 x i16>, <2 x i16> addrspace(1) * %in
		%b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr
		%result = sub <2 x i16> %a, %b
		store <2 x i16> %result, <2 x i16> addrspace(1)* %out
		ret void
		}

		; FUNC-LABEL: {{^}}test_sub_v4i16:
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
		; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
		; SI: v_sub_i16_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}

		define void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
		%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
		%a = load <4 x i16>, <4 x i16> addrspace(1) * %in
		%b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
		%result = sub <4 x i16> %a, %b
		store <4 x i16> %result, <4 x i16> addrspace(1)* %out
		ret void
		}

; FUNC-LABEL: {{^}}s_sub_i64:		; FUNC-LABEL: {{^}}s_sub_i64:
; SI: s_sub_u32		; SI: s_sub_u32
; SI: s_subb_u32		; SI: s_subb_u32

; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]		; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]
; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]		; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]
; EG-DAG: SUB_INT {{[* ]*}}[[LO]]		; EG-DAG: SUB_INT {{[* ]*}}[[LO]]
; EG-DAG: SUBB_UINT		; EG-DAG: SUBB_UINT
▲ Show 20 Lines • Show All 66 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/trunc-store-i1.ll

	Show All 15 Lines
	; SI-LABEL: {{^}}global_truncstore_i64_to_i1:			; SI-LABEL: {{^}}global_truncstore_i64_to_i1:
	; SI: buffer_store_byte			; SI: buffer_store_byte
	define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwind {			define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwind {
	%trunc = trunc i64 %val to i1			%trunc = trunc i64 %val to i1
	store i1 %trunc, i1 addrspace(1)* %out, align 1			store i1 %trunc, i1 addrspace(1)* %out, align 1
	ret void			ret void
	}			}

	; SI-LABEL: {{^}}global_truncstore_i16_to_i1:			; SI-LABEL: {{^}}s_arg_global_truncstore_i16_to_i1:
	; SI: s_load_dword [[LOAD:s[0-9]+]],			; SI: s_load_dword [[LOAD:s[0-9]+]],
	; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1			; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
	; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]			; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
	; SI: buffer_store_byte [[VREG]],			; SI: buffer_store_byte [[VREG]],
	define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {			define void @s_arg_global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {
	%trunc = trunc i16 %val to i1			%trunc = trunc i16 %val to i1
	store i1 %trunc, i1 addrspace(1)* %out, align 1			store i1 %trunc, i1 addrspace(1)* %out, align 1
	ret void			ret void
	}			}
				; SI-LABEL: {{^}}global_truncstore_i16_to_i1:
				define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val0, i16 %val1) nounwind {
				%add = add i16 %val0, %val1
				%trunc = trunc i16 %add to i1
				store i1 %trunc, i1 addrspace(1)* %out, align 1
				ret void
				}

test/CodeGen/AMDGPU/zero_extend.ll

	; RUN: llc < %s -march=r600 -mcpu=redwood \| FileCheck %s --check-prefix=R600			; RUN: llc < %s -march=r600 -mcpu=redwood \| FileCheck %s --check-prefix=R600
	; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs \| FileCheck %s --check-prefix=SI			; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs \| FileCheck %s --check-prefix=SI
	; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck %s --check-prefix=SI			; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck %s --check-prefix=SI

	; R600: {{^}}test:			; R600: {{^}}s_mad_zext_i32_to_i64:
	; R600: MEM_RAT_CACHELESS STORE_RAW			; R600: MEM_RAT_CACHELESS STORE_RAW
	; R600: MEM_RAT_CACHELESS STORE_RAW			; R600: MEM_RAT_CACHELESS STORE_RAW

	; SI: {{^}}test:			; SI: {{^}}test:
	; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}}			; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}}
	; SI: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}			; SI: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}
	define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @s_mad_zext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) #0 {
	entry:			entry:
	%0 = mul i32 %a, %b			%tmp0 = mul i32 %a, %b
	%1 = add i32 %0, %c			%tmp1 = add i32 %tmp0, %c
	%2 = zext i32 %1 to i64			%tmp2 = zext i32 %tmp1 to i64
	store i64 %2, i64 addrspace(1)* %out			store i64 %tmp2, i64 addrspace(1)* %out
	ret void			ret void
	}			}

	; SI-LABEL: {{^}}testi1toi32:			; SI-LABEL: {{^}}s_cmp_zext_i1_to_i32
	; SI: v_cndmask_b32			; SI: v_cndmask_b32
	define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) {			define void @s_cmp_zext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
	entry:			entry:
	%0 = icmp eq i32 %a, %b			%tmp0 = icmp eq i32 %a, %b
	%1 = zext i1 %0 to i32			%tmp1 = zext i1 %tmp0 to i32
	store i32 %1, i32 addrspace(1)* %out			store i32 %tmp1, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; SI-LABEL: {{^}}zext_i1_to_i64:			; SI-LABEL: {{^}}s_arg_zext_i1_to_i64:
				define void @s_arg_zext_i1_to_i64(i64 addrspace(1)* %out, i1 zeroext %arg) #0 {
				%ext = zext i1 %arg to i64
				store i64 %ext, i64 addrspace(1)* %out, align 8
				ret void
				}

				; SI-LABEL: {{^}}s_cmp_zext_i1_to_i64:
	; SI: s_mov_b32 s{{[0-9]+}}, 0			; SI: s_mov_b32 s{{[0-9]+}}, 0
	; SI: v_cmp_eq_i32			; SI: v_cmp_eq_i32
	; SI: v_cndmask_b32			; SI: v_cndmask_b32
	define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {			define void @s_cmp_zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) #0 {
	%cmp = icmp eq i32 %a, %b			%cmp = icmp eq i32 %a, %b
	%ext = zext i1 %cmp to i64			%ext = zext i1 %cmp to i64
	store i64 %ext, i64 addrspace(1)* %out, align 8			store i64 %ext, i64 addrspace(1)* %out, align 8
	ret void			ret void
	}			}

				; SI-LABEL: {{^}}s_cmp_zext_i1_to_i16
				; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
				; SI-NEXT: buffer_store_short [[RESULT]]
				define void @s_test_zext_i16_to_i32(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #0 {
				%tmp0 = icmp eq i16 %a, %b
				%tmp1 = zext i1 %tmp0 to i16
				store i16 %tmp1, i16 addrspace(1)* %out
				ret void
				}

				attributes #0 = { nounwind }

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Make i16 a legal type for VI subtargets
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 52514

lib/Target/AMDGPU/AMDGPUInstructions.td

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SIInstructions.td

lib/Target/AMDGPU/SIRegisterInfo.td

lib/Target/AMDGPU/VIInstructions.td

test/CodeGen/AMDGPU/add.i16.ll

test/CodeGen/AMDGPU/anyext.ll

test/CodeGen/AMDGPU/global-extload-i8.ll

test/CodeGen/AMDGPU/max.i16.ll

test/CodeGen/AMDGPU/min_test.ll

test/CodeGen/AMDGPU/shl.i16.ll

test/CodeGen/AMDGPU/shl.ll

test/CodeGen/AMDGPU/sign_extend.ll

test/CodeGen/AMDGPU/sra.i16.ll

test/CodeGen/AMDGPU/sra.ll

test/CodeGen/AMDGPU/sub.i16.ll

test/CodeGen/AMDGPU/sub.ll

test/CodeGen/AMDGPU/trunc-store-i1.ll

test/CodeGen/AMDGPU/zero_extend.ll

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Make i16 a legal type for VI subtargetsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 52514

lib/Target/AMDGPU/AMDGPUInstructions.td

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SIInstructions.td

lib/Target/AMDGPU/SIRegisterInfo.td

lib/Target/AMDGPU/VIInstructions.td

test/CodeGen/AMDGPU/add.i16.ll

test/CodeGen/AMDGPU/anyext.ll

test/CodeGen/AMDGPU/global-extload-i8.ll

test/CodeGen/AMDGPU/max.i16.ll

test/CodeGen/AMDGPU/min_test.ll

test/CodeGen/AMDGPU/shl.i16.ll

test/CodeGen/AMDGPU/shl.ll

test/CodeGen/AMDGPU/sign_extend.ll

test/CodeGen/AMDGPU/sra.i16.ll

test/CodeGen/AMDGPU/sra.ll

test/CodeGen/AMDGPU/sub.i16.ll

test/CodeGen/AMDGPU/sub.ll

test/CodeGen/AMDGPU/trunc-store-i1.ll

test/CodeGen/AMDGPU/zero_extend.ll

AMDGPU/SI: Make i16 a legal type for VI subtargets
ClosedPublic