Skip to content

Commit 34e2978

Browse files
committedApr 5, 2017
[AMDGPU] SDWA peephole: enable by default
Reviewers: vpykhtin, rampitec, arsenm Subscribers: qcolombet, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye Differential Revision: https://reviews.llvm.org/D31671 llvm-svn: 299536
1 parent 014ac69 commit 34e2978

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+609
-435
lines changed
 

‎llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ static cl::opt<bool> EarlyInlineAll(
105105
static cl::opt<bool> EnableSDWAPeephole(
106106
"amdgpu-sdwa-peephole",
107107
cl::desc("Enable SDWA peepholer"),
108-
cl::init(false));
108+
cl::init(true));
109109

110110
// Enable address space based alias analysis
111111
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,

‎llvm/test/CodeGen/AMDGPU/add.v2i16.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
2-
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
2+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
33

44
; FIXME: Need to handle non-uniform case for function below (load without gep).
55
; GCN-LABEL: {{^}}v_test_add_v2i16:
66
; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
77

8-
; VI: v_add_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
8+
; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
99
; VI: v_add_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
1010
define amdgpu_kernel void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
1111
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -53,7 +53,7 @@ define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <
5353
; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
5454

5555
; VI: v_add_i32
56-
; VI: v_add_i32
56+
; VI: v_add_i32_sdwa
5757
define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
5858
%add = add <2 x i16> %a, %b
5959
store <2 x i16> %add, <2 x i16> addrspace(1)* %out
@@ -257,7 +257,7 @@ define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)
257257
; GFX9: v_pk_add_u16
258258
; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
259259

260-
; VI: v_add_u16_e32
260+
; VI: v_add_u16_sdwa
261261
; VI: v_add_u16_e32
262262

263263
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16

0 commit comments

Comments
 (0)
Please sign in to comment.