This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Change register type for v32 vectors
ClosedPublic

Authored by rampitec on Jul 16 2019, 12:55 PM.

Download Raw Diff

Details

Reviewers

arsenm
kzhuravl
msearles

Commits

rG6e0fa292c22c: [AMDGPU] Change register type for v32 vectors
rL366252: [AMDGPU] Change register type for v32 vectors

Summary

When it is AReg_1024 this results in unnecessary copying into
AGPRs of a 32 element vectors even though they are not intended
for an mfma instruction.

Diff Detail

Repository: rL LLVM

Event Timeline

rampitec created this revision.Jul 16 2019, 12:55 PM

Herald added subscribers: t-tye, tpr, dstuttard and 4 others. · View Herald TranscriptJul 16 2019, 12:55 PM

arsenm accepted this revision.Jul 16 2019, 12:57 PM

arsenm added inline comments.

test/CodeGen/AMDGPU/v1024.ll
8 ↗	(On Diff #210152)	You can uses GCN-COUNT-<number>

This revision is now accepted and ready to land.Jul 16 2019, 12:57 PM

arsenm added inline comments.Jul 16 2019, 12:57 PM

test/CodeGen/AMDGPU/v1024.ll
2 ↗	(On Diff #210152)	Can you add a comment explaining what this tests

Updated test.

Closed by commit rL366252: [AMDGPU] Change register type for v32 vectors (authored by rampitec). · Explain WhyJul 16 2019, 1:05 PM

This revision was automatically updated to reflect the committed changes.

Herald added a project: Restricted Project. · View Herald TranscriptJul 16 2019, 1:05 PM

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

AMDGPU/

SIISelLowering.cpp

4 lines

test/

CodeGen/

AMDGPU/

v1024.ll

29 lines

Diff 210155

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 146 Lines • ▼ Show 20 Lines	if (Subtarget->has16BitInsts()) {
// Unless there are also VOP3P operations, not operations are really legal.		// Unless there are also VOP3P operations, not operations are really legal.
addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass);		addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass);
addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass);		addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass);
addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass);		addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);		addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
}		}

if (Subtarget->hasMAIInsts()) {		if (Subtarget->hasMAIInsts()) {
addRegisterClass(MVT::v32i32, &AMDGPU::AReg_1024RegClass);		addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
addRegisterClass(MVT::v32f32, &AMDGPU::AReg_1024RegClass);		addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
}		}

computeRegisterProperties(Subtarget->getRegisterInfo());		computeRegisterProperties(Subtarget->getRegisterInfo());

// We need to custom lower vector stores from local memory		// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);		setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v3i32, Custom);		setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);		setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
▲ Show 20 Lines • Show All 10,586 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AMDGPU/v1024.ll

				; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s

				; Check that we do not use AGPRs for v32i32 type

				; GCN-LABEL: {{^}}test_v1024:
				; GCN-NOT: v_accvgpr
				; GCN-COUNT-32: v_mov_b32_e32
				; GCN-NOT: v_accvgpr
				define amdgpu_kernel void @test_v1024() {
				entry:
				%alloca = alloca <32 x i32>, align 16, addrspace(5)
				%cast = bitcast <32 x i32> addrspace(5)* %alloca to i8 addrspace(5)*
				br i1 undef, label %if.then.i.i, label %if.else.i

				if.then.i.i: ; preds = %entry
				call void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* align 16 %cast, i8 addrspace(5)* align 4 undef, i64 128, i1 false)
				br label %if.then.i62.i

				if.else.i: ; preds = %entry
				br label %if.then.i62.i

				if.then.i62.i: ; preds = %if.else.i, %if.then.i.i
				call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* align 4 undef, i8 addrspace(5)* align 16 %cast, i64 128, i1 false)
				ret void
				}

				declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg)

				declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg)