This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/SI: Fix LowerParameter() for i16 arguments
ClosedPublic

Authored by • tstellarAMD on Oct 3 2016, 10:35 AM.

Download Raw Diff

Details

Reviewers

Commits

rGbc6c523cce6d: AMDGPU/SI: Fix LowerParameter() for i16 arguments
rL284397: AMDGPU/SI: Fix LowerParameter() for i16 arguments

Summary

If we are loading an i16 value from a 32-bit memory location, then
we need to be able to truncate the loaded value to i16.

Diff Detail

Repository: rL LLVM

Event Timeline

• tstellarAMD updated this revision to Diff 73301.Oct 3 2016, 10:35 AM

• tstellarAMD retitled this revision from to AMDGPU/SI: Fix LowerParameter() for i16 arguments.

• tstellarAMD updated this object.

• tstellarAMD added a reviewer: arsenm.

• tstellarAMD added a subscriber: llvm-commits.

Herald added subscribers: tony-tye, yaxunl, nhaehnle and 2 others. · View Herald TranscriptOct 3 2016, 10:35 AM

arsenm added inline comments.Oct 3 2016, 6:00 PM

lib/Target/AMDGPU/SIISelLowering.cpp
587–592 ↗	(On Diff #73301)	When is the trunc case necessary? Since the FP case only needs to handle extend this looks weird to me

• tstellarAMD added inline comments.Oct 7 2016, 3:54 PM

lib/Target/AMDGPU/SIISelLowering.cpp
587–592 ↗	(On Diff #73301)	Trunc is required when VT is MVT::i16 and MemVT is MVT::i32.

• tstellarAMD added inline comments.Oct 13 2016, 8:31 AM

lib/Target/AMDGPU/SIISelLowering.cpp
587–592 ↗	(On Diff #73301)	This happens for mesa, because 16-bit kernel arguments are stored as 32-bit values in the kernarg buffer.

arsenm added inline comments.Oct 13 2016, 6:56 PM

lib/Target/AMDGPU/SIISelLowering.cpp
587–592 ↗	(On Diff #73301)	So shouldn't it have the same problem with f16 and need to fptrunc it?

• tstellarAMD added inline comments.Oct 14 2016, 8:26 AM

lib/Target/AMDGPU/SIISelLowering.cpp
587–592 ↗	(On Diff #73301)	No, because f16 values are always 16-bits in memory. i16/i8 are just a special case when using the mesa ABI, since those areguments are sign/zero extended by the runtime.

LGTM

This revision is now accepted and ready to land.Oct 17 2016, 9:12 AM

Closed by commit rL284397: AMDGPU/SI: Fix LowerParameter() for i16 arguments (authored by tstellar). · Explain WhyOct 17 2016, 9:31 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

AMDGPU/

SIISelLowering.cpp

28 lines

test/

CodeGen/

AMDGPU/

merge-stores.ll

6 lines

Diff 74861

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

Show First 20 Lines • Show All 581 Lines • ▼ Show 20 Lines	SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,		return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
DAG.getConstant(Offset, SL, PtrVT));		DAG.getConstant(Offset, SL, PtrVT));
}		}
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,		SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,		const SDLoc &SL, SDValue Chain,
unsigned Offset, bool Signed) const {		unsigned Offset, bool Signed) const {
const DataLayout &DL = DAG.getDataLayout();		const DataLayout &DL = DAG.getDataLayout();
Type Ty = VT.getTypeForEVT(DAG.getContext());		Type Ty = VT.getTypeForEVT(DAG.getContext());
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);		PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
SDValue PtrOffset = DAG.getUNDEF(PtrVT);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));		MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));

unsigned Align = DL.getABITypeAlignment(Ty);		unsigned Align = DL.getABITypeAlignment(Ty);

ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
if (MemVT.isFloatingPoint())
ExtTy = ISD::EXTLOAD;

SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);		SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset,		SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
PtrInfo, MemVT, Align,
MachineMemOperand::MONonTemporal \|		MachineMemOperand::MONonTemporal \|
MachineMemOperand::MODereferenceable \|		MachineMemOperand::MODereferenceable \|
MachineMemOperand::MOInvariant);		MachineMemOperand::MOInvariant);

		SDValue Val;
		if (MemVT.isFloatingPoint())
		Val = DAG.getNode(ISD::FP_EXTEND, SL, VT, Load);
		else if (Signed)
		Val = DAG.getSExtOrTrunc(Load, SL, VT);
		else
		Val = DAG.getZExtOrTrunc(Load, SL, VT);

		SDValue Ops[] = {
		Val,
		Load.getValue(1)
		};

		return DAG.getMergeValues(Ops, SL);
}		}

SDValue SITargetLowering::LowerFormalArguments(		SDValue SITargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,		SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,		const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {		SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();		const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();

▲ Show 20 Lines • Show All 3,447 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll

Show First 20 Lines • Show All 143 Lines • ▼ Show 20 Lines	define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {

store float 1.0, float addrspace(1)* %out.gep.1		store float 1.0, float addrspace(1)* %out.gep.1
store float 2.0, float addrspace(1)* %out.gep.2		store float 2.0, float addrspace(1)* %out.gep.2
store float 4.0, float addrspace(1)* %out.gep.3		store float 4.0, float addrspace(1)* %out.gep.3
store float 8.0, float addrspace(1)* %out		store float 8.0, float addrspace(1)* %out
ret void		ret void
}		}

; FIXME: Should be able to merge this
; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:		; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
; GCN-NOAA: buffer_store_dword v		; GCN-NOAA: buffer_store_dwordx4 v
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dword v

; GCN-AA: buffer_store_dwordx2		; GCN-AA: buffer_store_dwordx2
; GCN-AA: buffer_store_dword v		; GCN-AA: buffer_store_dword v
; GCN-AA: buffer_store_dword v		; GCN-AA: buffer_store_dword v

; GCN: s_endpgm		; GCN: s_endpgm
define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {		define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1		%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
▲ Show 20 Lines • Show All 547 Lines • Show Last 20 Lines