Skip to content

Commit 5920bab

Browse files
committedMar 2, 2017
[NVPTX] Added missing LDU/LDG intrinsics for f16.
Differential Revision: https://reviews.llvm.org/D30512 llvm-svn: 296784
1 parent 7b227fe commit 5920bab

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed
 

‎llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

+1
Original file line numberDiff line numberDiff line change
@@ -2450,6 +2450,7 @@ let mayLoad=1, hasSideEffects=0 in {
24502450
defm LDV_i32 : LD_VEC<Int32Regs>;
24512451
defm LDV_i64 : LD_VEC<Int64Regs>;
24522452
defm LDV_f16 : LD_VEC<Float16Regs>;
2453+
defm LDV_f16x2 : LD_VEC<Float16x2Regs>;
24532454
defm LDV_f32 : LD_VEC<Float32Regs>;
24542455
defm LDV_f64 : LD_VEC<Float64Regs>;
24552456
}

‎llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

+18-2
Original file line numberDiff line numberDiff line change
@@ -1503,6 +1503,8 @@ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
15031503
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
15041504
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
15051505
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1506+
defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1507+
defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
15061508
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
15071509
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
15081510
defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
@@ -1553,6 +1555,10 @@ defm INT_PTX_LDU_G_v2i16_ELE
15531555
: VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
15541556
defm INT_PTX_LDU_G_v2i32_ELE
15551557
: VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1558+
defm INT_PTX_LDU_G_v2f16_ELE
1559+
: VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1560+
defm INT_PTX_LDU_G_v2f16x2_ELE
1561+
: VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
15561562
defm INT_PTX_LDU_G_v2f32_ELE
15571563
: VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
15581564
defm INT_PTX_LDU_G_v2i64_ELE
@@ -1567,6 +1573,12 @@ defm INT_PTX_LDU_G_v4i16_ELE
15671573
defm INT_PTX_LDU_G_v4i32_ELE
15681574
: VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
15691575
Int32Regs>;
1576+
defm INT_PTX_LDU_G_v4f16_ELE
1577+
: VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1578+
Float16Regs>;
1579+
defm INT_PTX_LDU_G_v4f16x2_ELE
1580+
: VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1581+
Float16x2Regs>;
15701582
defm INT_PTX_LDU_G_v4f32_ELE
15711583
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
15721584
Float32Regs>;
@@ -1665,7 +1677,9 @@ defm INT_PTX_LDG_G_v2i16_ELE
16651677
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
16661678
defm INT_PTX_LDG_G_v2i32_ELE
16671679
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1668-
defm INT_PTX_LDG_G_v4f16_ELE
1680+
defm INT_PTX_LDG_G_v2f16_ELE
1681+
: VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1682+
defm INT_PTX_LDG_G_v2f16x2_ELE
16691683
: VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
16701684
defm INT_PTX_LDG_G_v2f32_ELE
16711685
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
@@ -1679,7 +1693,9 @@ defm INT_PTX_LDG_G_v4i16_ELE
16791693
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
16801694
defm INT_PTX_LDG_G_v4i32_ELE
16811695
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1682-
defm INT_PTX_LDG_G_v8f16_ELE
1696+
defm INT_PTX_LDG_G_v4f16_ELE
1697+
: VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1698+
defm INT_PTX_LDG_G_v4f16x2_ELE
16831699
: VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
16841700
defm INT_PTX_LDG_G_v4f32_ELE
16851701
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;

0 commit comments

Comments
 (0)
Please sign in to comment.