Diff 52283

lib/Target/AMDGPU/AMDGPUISelLowering.h

Show First 20 Lines • Show All 307 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
SENDMSG,		SENDMSG,
INTERP_MOV,		INTERP_MOV,
INTERP_P1,		INTERP_P1,
INTERP_P2,		INTERP_P2,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,		FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,		STORE_MSKOR,
LOAD_CONSTANT,		LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,		TBUFFER_STORE_FORMAT,
		ATOMIC_CMP_SWAP,
LAST_AMDGPU_ISD_NUMBER		LAST_AMDGPU_ISD_NUMBER
};		};


} // End namespace AMDGPUISD		} // End namespace AMDGPUISD

} // End namespace llvm		} // End namespace llvm

#endif		#endif

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Show First 20 Lines • Show All 2,806 Lines • ▼ Show 20 Lines	const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR)		NODE_NAME_CASE(CONST_DATA_PTR)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;		case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)		NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(INTERP_MOV)		NODE_NAME_CASE(INTERP_MOV)
NODE_NAME_CASE(INTERP_P1)		NODE_NAME_CASE(INTERP_P1)
NODE_NAME_CASE(INTERP_P2)		NODE_NAME_CASE(INTERP_P2)
NODE_NAME_CASE(STORE_MSKOR)		NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)		NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
		NODE_NAME_CASE(ATOMIC_CMP_SWAP)
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;		case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
}		}
return nullptr;		return nullptr;
}		}

SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,		SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
DAGCombinerInfo &DCI,		DAGCombinerInfo &DCI,
unsigned &RefinementSteps,		unsigned &RefinementSteps,
▲ Show 20 Lines • Show All 108 Lines • Show Last 20 Lines

lib/Target/AMDGPU/AMDGPUInstrInfo.td

	Show First 20 Lines • Show All 177 Lines • ▼ Show 20 Lines
	// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) \| src)			// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) \| src)
	//			//
	// src0: vec4(src, 0, 0, mask)			// src0: vec4(src, 0, 0, mask)
	// src1: dst - rat offset (aka pointer) in dwords			// src1: dst - rat offset (aka pointer) in dwords
	def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",			def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
	SDTypeProfile<0, 2, []>,			SDTypeProfile<0, 2, []>,
	[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;			[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

				def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
				SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>,
				[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
				SDNPMemOperand]>;

	def AMDGPUround : SDNode<"ISD::FROUND",			def AMDGPUround : SDNode<"ISD::FROUND",
	SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;			SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;

	def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;			def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
				arsenmUnsubmitted Done Reply Inline Actions You should only need the one node. You need to remove the hardcoded vector type. What you need is SDTCisEltOfVec for the type constraint arsenm: You should only need the one node. You need to remove the hardcoded vector type. What you need…
	def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;			def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
	def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;			def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
	def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;			def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;

	def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;			def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;

	// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when			// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
	// performing the mulitply. The result is a 32-bit value.			// performing the mulitply. The result is a 32-bit value.
	▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

lib/Target/AMDGPU/AMDGPUInstructions.td

	Show First 20 Lines • Show All 394 Lines • ▼ Show 20 Lines
	def atomic_max_global : global_binary_atomic_op<atomic_load_max>;			def atomic_max_global : global_binary_atomic_op<atomic_load_max>;
	def atomic_min_global : global_binary_atomic_op<atomic_load_min>;			def atomic_min_global : global_binary_atomic_op<atomic_load_min>;
	def atomic_or_global : global_binary_atomic_op<atomic_load_or>;			def atomic_or_global : global_binary_atomic_op<atomic_load_or>;
	def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;			def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
	def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;			def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
	def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;			def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
	def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;			def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;

				def atomic_cmp_swap_global : global_binary_atomic_op<AMDGPUatomic_cmp_swap>;
				arsenmUnsubmitted Done Reply Inline Actions You should only need this one arsenm: You should only need this one
				def atomic_cmp_swap_global_nortn : PatFrag<
				(ops node:$ptr, node:$value),
				(atomic_cmp_swap_global node:$ptr, node:$value),
				[{ return SDValue(N, 0).use_empty(); }]
				>;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// Misc Pattern Fragments			// Misc Pattern Fragments
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

				arsenmUnsubmitted Done Reply Inline Actions Not necessary, the type applied to the pattern is what matters arsenm: Not necessary, the type applied to the pattern is what matters
	class Constants {			class Constants {
	int TWO_PI = 0x40c90fdb;			int TWO_PI = 0x40c90fdb;
	int PI = 0x40490fdb;			int PI = 0x40490fdb;
	int TWO_PI_INV = 0x3e22f983;			int TWO_PI_INV = 0x3e22f983;
	int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding			int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
	int FP32_NEG_ONE = 0xbf800000;			int FP32_NEG_ONE = 0xbf800000;
	int FP32_ONE = 0x3f800000;			int FP32_ONE = 0x3f800000;
	}			}
	▲ Show 20 Lines • Show All 240 Lines • Show Last 20 Lines

lib/Target/AMDGPU/CIInstructions.td

	Show First 20 Lines • Show All 302 Lines • ▼ Show 20 Lines
	>;			>;

	def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;			def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
	def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;			def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
	def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;			def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
	def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;			def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
	def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;			def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;

	class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <			class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
	(vt (node i64:$addr, vt:$data)),			ValueType data_vt = vt> : Pat <
				(vt (node i64:$addr, data_vt:$data)),
	(inst $addr, $data, 0, 0)			(inst $addr, $data, 0, 0)
	>;			>;

	def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
				def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>;
	def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;			def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;

				def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>;

	} // End Predicates = [isCIVI]			} // End Predicates = [isCIVI]

lib/Target/AMDGPU/SIISelLowering.h

Show All 35 Lines	class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;		SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;

void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;		void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;

SDValue performUCharToFloatCombine(SDNode *N,		SDValue performUCharToFloatCombine(SDNode *N,
DAGCombinerInfo &DCI) const;		DAGCombinerInfo &DCI) const;
SDValue performSHLPtrCombine(SDNode *N,		SDValue performSHLPtrCombine(SDNode *N,
unsigned AS,		unsigned AS,
▲ Show 20 Lines • Show All 93 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIISelLowering.cpp

Show First 20 Lines • Show All 251 Lines • ▼ Show 20 Lines	if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FCEIL, MVT::f64, Legal);		setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);		setOperationAction(ISD::FRINT, MVT::f64, Legal);
}		}

setOperationAction(ISD::FFLOOR, MVT::f64, Legal);		setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FDIV, MVT::f32, Custom);		setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);		setOperationAction(ISD::FDIV, MVT::f64, Custom);

		// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
		// and output demarshalling
		setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
		setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);

		// We can't return success/failure, only the old value,
		// let LLVM add the comparison
		setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
		setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);

setTargetDAGCombine(ISD::FADD);		setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);		setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);		setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);		setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SMIN);		setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::SMAX);		setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMIN);		setTargetDAGCombine(ISD::UMIN);
setTargetDAGCombine(ISD::UMAX);		setTargetDAGCombine(ISD::UMAX);
▲ Show 20 Lines • Show All 883 Lines • ▼ Show 20 Lines	case ISD::LOAD: {
return Result;		return Result;
}		}

case ISD::FSIN:		case ISD::FSIN:
case ISD::FCOS:		case ISD::FCOS:
return LowerTrig(Op, DAG);		return LowerTrig(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);		case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::FDIV: return LowerFDIV(Op, DAG);		case ISD::FDIV: return LowerFDIV(Op, DAG);
		case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);		case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::GlobalAddress: {		case ISD::GlobalAddress: {
MachineFunction &MF = DAG.getMachineFunction();		MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();		SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
return LowerGlobalAddress(MFI, Op, DAG);		return LowerGlobalAddress(MFI, Op, DAG);
}		}
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);		case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);		case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
▲ Show 20 Lines • Show All 831 Lines • ▼ Show 20 Lines	case ISD::FCOS:
return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);		return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
case ISD::FSIN:		case ISD::FSIN:
return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);		return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
default:		default:
llvm_unreachable("Wrong trig opcode");		llvm_unreachable("Wrong trig opcode");
}		}
}		}

		SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
		AtomicSDNode *AtomicNode = cast<AtomicSDNode>(Op);
		assert(AtomicNode && AtomicNode->isCompareAndSwap());
		arsenmUnsubmitted Not Done Reply Inline Actions The assert on the nose is redundant with the cast arsenm: The assert on the nose is redundant with the cast
		unsigned AS = AtomicNode->getAddressSpace ();

		// No custom lowering required for local address space
		arsenmUnsubmitted Done Reply Inline Actions Space after if, and not before (AS) arsenm: Space after if, and not before (AS)
		if (!isFlatGlobalAddrSpace(AS))
		return Op;

		// Non-local address space requires custom lowering for atomic compare
		// and swap; cmp and swap should be in a v2i32 or v2i64 in case of _X2
		SDLoc DL(Op);
		SDValue ChainIn = Op.getOperand(0);
		SDValue Addr = Op.getOperand(1);
		SDValue Old = Op.getOperand(2);
		SDValue New = Op.getOperand(3);
		EVT VT = Op.getValueType();
		MVT SimpleVT = VT.getSimpleVT();
		MVT VecType = MVT::getVectorVT(SimpleVT, 2);
		arsenmUnsubmitted Not Done Reply Inline Actions The wrapping would be less ugly if the type were set to a variable first arsenm: The wrapping would be less ugly if the type were set to a variable first

		SDValue NewOld = DAG.getNode(ISD::BUILD_VECTOR, DL, VecType,
		New, Old);
		SDValue Ops[] = { ChainIn, Addr, NewOld };
		SDVTList VTList = DAG.getVTList(VT, MVT::Other);
		return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_CMP_SWAP, DL,
		VTList, Ops, VT, AtomicNode->getMemOperand());
		}

		arsenmUnsubmitted Done Reply Inline Actions You can put VT directly into getVTList, you don't need the if arsenm: You can put VT directly into getVTList, you don't need the if
		arsenmUnsubmitted Done Reply Inline Actions You don't even need to construct a new vtlist, it should be the same as the incoming op's arsenm: You don't even need to construct a new vtlist, it should be the same as the incoming op's
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Custom DAG optimizations		// Custom DAG optimizations
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,		SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
DAGCombinerInfo &DCI) const {		DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
EVT ScalarVT = VT.getScalarType();		EVT ScalarVT = VT.getScalarType();
▲ Show 20 Lines • Show All 830 Lines • ▼ Show 20 Lines	void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
}		}

// Replace unused atomics with the no return version.		// Replace unused atomics with the no return version.
int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI->getOpcode());		int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI->getOpcode());
if (NoRetAtomicOp != -1) {		if (NoRetAtomicOp != -1) {
if (!Node->hasAnyUseOfValue(0)) {		if (!Node->hasAnyUseOfValue(0)) {
MI->setDesc(TII->get(NoRetAtomicOp));		MI->setDesc(TII->get(NoRetAtomicOp));
MI->RemoveOperand(0);		MI->RemoveOperand(0);
		return;
}		}

		// For mubuf_atomic_cmpswap, we need to have tablegen use an extract_subreg
		// instruction, because the return type of these instructions is a vec2 of
		// the memory type, so it can be tied to the input operand.
		// This means these instructions always have a use, so we need to add a
		// special case to check if the atomic has only one extract_subreg use,
		// which itself has no uses.
		if ((Node->hasNUsesOfValue(1, 0) &&
		Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG &&
		!Node->use_begin()->hasAnyUseOfValue(0))) {
		unsigned Def = MI->getOperand(0).getReg();

		// Change this into a noret atomic.
		MI->setDesc(TII->get(NoRetAtomicOp));
		MI->RemoveOperand(0);

		// If we only remove the def operand from the atomic instruction, the
		// extract_subreg will be left with a use of a vreg without a def.
		// So we need to insert an implicit_def to avoid machine verifier
		// errors.
		BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
		TII->get(AMDGPU::IMPLICIT_DEF), Def);
		}
return;		return;
}		}
}		}

static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {		static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);		SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);		return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
}		}
▲ Show 20 Lines • Show All 154 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIInstructions.td

Show First 20 Lines • Show All 1,042 Lines • ▼ Show 20 Lines	defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global		mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
>;		>;
//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>;		//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>;
//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>;		//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>;
//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI		//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI
//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI		//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI		//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;		//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;
//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>;		defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Atomic <
		mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag
		>;
//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;		//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;
//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;		//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;
//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI		//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>;		//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>;
//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>;		//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>;
//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>;		//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>;
//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>;		//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>;
//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>;		//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>;
▲ Show 20 Lines • Show All 2,121 Lines • ▼ Show 20 Lines
>;		>;

def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;		def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;		def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;		def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;		def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;		def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;


		multiclass MUBUFCmpSwapPat <Instruction inst_addr64, Instruction inst_offset,
		SDPatternOperator node, ValueType data_vt,
		ValueType node_vt> {

		let Predicates = [isSI] in {
		def : Pat <
		(node_vt (node (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
		i16:$offset, i1:$slc), data_vt:$vdata_in)),
		(EXTRACT_SUBREG
		(inst_addr64 $vdata_in, $vaddr, $srsrc, $soffset, $offset, $slc), sub0)
		>;

		}

		def : Pat <
		(node_vt (node (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
		i1:$slc), data_vt:$vdata_in)),
		(EXTRACT_SUBREG
		(inst_offset $vdata_in, $srsrc, $soffset, $offset, $slc), sub0)
		>;
		}

		defm : MUBUFCmpSwapPat <BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64,
		BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET,
		atomic_cmp_swap_global, v2i32, i32>;

		defm : MUBUFCmpSwapPat <BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64,
		BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET,
		atomic_cmp_swap_global, v2i64, i64>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// MTBUF Patterns		// MTBUF Patterns
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

// TBUFFER_STORE_FORMAT_*, addr64=0		// TBUFFER_STORE_FORMAT_*, addr64=0
class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<		class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
(SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,		(SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
i32:$soffset, imm:$inst_offset, imm:$dfmt,		i32:$soffset, imm:$inst_offset, imm:$dfmt,
▲ Show 20 Lines • Show All 278 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/global_atomics.ll

	Show First 20 Lines • Show All 752 Lines • ▼ Show 20 Lines
	define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {			define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
	entry:			entry:
	%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index			%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
	%0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst			%0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
	store i32 %0, i32 addrspace(1)* %out2			store i32 %0, i32 addrspace(1)* %out2
	ret void			ret void
	}			}

				; CMP_SWAP

				; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
				; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
				define void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
				entry:
				%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
				%0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
				ret void
				}

				; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
				; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
				; GCN: buffer_store_dword v[[RET]]
				define void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
				entry:
				%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
				%0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
				%1 = extractvalue { i32, i1 } %0, 0
				store i32 %1, i32 addrspace(1)* %out2
				ret void
				}

				; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
				; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
				define void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
				entry:
				%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
				%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
				%0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
				ret void
				}

				; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
				; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
				; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
				; GCN: buffer_store_dword v[[RET]]
				define void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
				entry:
				%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
				%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
				%0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
				%1 = extractvalue { i32, i1 } %0, 0
				store i32 %1, i32 addrspace(1)* %out2
				ret void
				}

				; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
				; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
				define void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
				entry:
				%0 = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
				ret void
				}

				; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret:
				; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
				; GCN: buffer_store_dword v[[RET]]
				define void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
				entry:
				%0 = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
				%1 = extractvalue { i32, i1 } %0, 0
				store i32 %1, i32 addrspace(1)* %out2
				ret void
				}

				; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
				; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
				; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
				define void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
				entry:
				%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
				%0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
				ret void
				}

				; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
				; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
				; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
				; GCN: buffer_store_dword v[[RET]]
				define void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
				entry:
				%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
				%0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
				%1 = extractvalue { i32, i1 } %0, 0
				store i32 %1, i32 addrspace(1)* %out2
				ret void
				}

	; FUNC-LABEL: {{^}}atomic_xor_i32_offset:			; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
	; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}			; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
	define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {			define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
	entry:			entry:
	%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4			%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
	%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst			%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
	ret void			ret void
	}			}
	▲ Show 20 Lines • Show All 75 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Implement {BUFFER,FLAT}_ATOMIC_CMPSWAP{,_X2}
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 52283

lib/Target/AMDGPU/AMDGPUISelLowering.h

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

lib/Target/AMDGPU/AMDGPUInstrInfo.td

lib/Target/AMDGPU/AMDGPUInstructions.td

lib/Target/AMDGPU/CIInstructions.td

lib/Target/AMDGPU/SIISelLowering.h

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SIInstructions.td

test/CodeGen/AMDGPU/global_atomics.ll

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Implement {BUFFER,FLAT}_ATOMIC_CMPSWAP{,_X2}ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 52283

lib/Target/AMDGPU/AMDGPUISelLowering.h

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

lib/Target/AMDGPU/AMDGPUInstrInfo.td

lib/Target/AMDGPU/AMDGPUInstructions.td

lib/Target/AMDGPU/CIInstructions.td

lib/Target/AMDGPU/SIISelLowering.h

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SIInstructions.td

test/CodeGen/AMDGPU/global_atomics.ll

AMDGPU: Implement {BUFFER,FLAT}_ATOMIC_CMPSWAP{,_X2}
ClosedPublic