Skip to content

Commit c0ae1be

Browse files
committedJul 11, 2019
[AMDGPU] gfx908 dot instruction support
Differential Revision: https://reviews.llvm.org/D64431 llvm-svn: 365715
1 parent 3daf58f commit c0ae1be

File tree

7 files changed

+968
-170
lines changed

7 files changed

+968
-170
lines changed
 

‎llvm/lib/Target/AMDGPU/VOP2Instructions.td

+30
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,11 @@ let Constraints = "$vdst = $src2",
658658
defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
659659
let SubtargetPredicate = HasDot6Insts in
660660
defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
661+
662+
let SubtargetPredicate = HasDot4Insts in
663+
defm V_DOT2C_I32_I16 : VOP2Inst_e32<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>;
664+
let SubtargetPredicate = HasDot3Insts in
665+
defm V_DOT8C_I32_I4 : VOP2Inst_e32<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>;
661666
}
662667

663668
let AddedComplexity = 30 in {
@@ -673,6 +678,18 @@ let AddedComplexity = 30 in {
673678
> {
674679
let SubtargetPredicate = HasDot6Insts;
675680
}
681+
def : GCNPat<
682+
(i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
683+
(i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2))
684+
> {
685+
let SubtargetPredicate = HasDot4Insts;
686+
}
687+
def : GCNPat<
688+
(i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
689+
(i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2))
690+
> {
691+
let SubtargetPredicate = HasDot3Insts;
692+
}
676693
} // End AddedComplexity = 30
677694

678695
let SubtargetPredicate = isGFX10Plus in {
@@ -1536,21 +1553,34 @@ defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
15361553

15371554
} // End SubtargetPredicate = HasDLInsts
15381555

1556+
multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
1557+
def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
1558+
}
1559+
15391560
multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
15401561
VOP2_Real_e32_gfx10<op>,
15411562
VOP2_Real_dpp_gfx10<op>,
15421563
VOP2_Real_dpp8_gfx10<op>;
15431564

15441565
let SubtargetPredicate = HasDot5Insts in {
1566+
defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
15451567
// NB: Opcode conflicts with V_DOT8C_I32_I4
15461568
// This opcode exists in gfx 10.1* only
15471569
defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
15481570
}
15491571

15501572
let SubtargetPredicate = HasDot6Insts in {
1573+
defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>;
15511574
defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>;
15521575
}
15531576

1577+
let SubtargetPredicate = HasDot4Insts in {
1578+
defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>;
1579+
}
1580+
let SubtargetPredicate = HasDot3Insts in {
1581+
defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>;
1582+
}
1583+
15541584
let SubtargetPredicate = HasPkFmacF16Inst in {
15551585
defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>;
15561586
} // End SubtargetPredicate = HasPkFmacF16Inst

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll

+3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
2+
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX908
23
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
34
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
45

56
declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp)
67

78
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_clamp
89
; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
10+
; GFX908: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
911
; GFX10: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
1012
define amdgpu_kernel void @test_llvm_amdgcn_sdot2_clamp(
1113
i32 addrspace(1)* %r,
@@ -23,6 +25,7 @@ entry:
2325

2426
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_no_clamp
2527
; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
28+
; GFX908: v_dot2c_i32_i16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
2629
; GFX10: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
2730
define amdgpu_kernel void @test_llvm_amdgcn_sdot2_no_clamp(
2831
i32 addrspace(1)* %r,

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll

+3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX906
2+
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX908
23
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10,GFX1011
34
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10,GFX1011
45

56
declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
67

78
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_clamp
89
; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
10+
; GFX908: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
911
; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
1012
define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp(
1113
i32 addrspace(1)* %r,
@@ -25,6 +27,7 @@ entry:
2527

2628
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_no_clamp
2729
; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
30+
; GFX908: v_dot8c_i32_i4_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
2831
; GFX1011: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
2932
define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp(
3033
i32 addrspace(1)* %r,

0 commit comments

Comments
 (0)
Please sign in to comment.