@@ -1503,6 +1503,8 @@ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1503
1503
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1504
1504
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1505
1505
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1506
+ defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1507
+ defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1506
1508
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1507
1509
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1508
1510
defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
@@ -1553,6 +1555,10 @@ defm INT_PTX_LDU_G_v2i16_ELE
1553
1555
: VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1554
1556
defm INT_PTX_LDU_G_v2i32_ELE
1555
1557
: VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1558
+ defm INT_PTX_LDU_G_v2f16_ELE
1559
+ : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1560
+ defm INT_PTX_LDU_G_v2f16x2_ELE
1561
+ : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1556
1562
defm INT_PTX_LDU_G_v2f32_ELE
1557
1563
: VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1558
1564
defm INT_PTX_LDU_G_v2i64_ELE
@@ -1567,6 +1573,12 @@ defm INT_PTX_LDU_G_v4i16_ELE
1567
1573
defm INT_PTX_LDU_G_v4i32_ELE
1568
1574
: VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1569
1575
Int32Regs>;
1576
+ defm INT_PTX_LDU_G_v4f16_ELE
1577
+ : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1578
+ Float16Regs>;
1579
+ defm INT_PTX_LDU_G_v4f16x2_ELE
1580
+ : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1581
+ Float16x2Regs>;
1570
1582
defm INT_PTX_LDU_G_v4f32_ELE
1571
1583
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1572
1584
Float32Regs>;
@@ -1665,7 +1677,9 @@ defm INT_PTX_LDG_G_v2i16_ELE
1665
1677
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1666
1678
defm INT_PTX_LDG_G_v2i32_ELE
1667
1679
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1668
- defm INT_PTX_LDG_G_v4f16_ELE
1680
+ defm INT_PTX_LDG_G_v2f16_ELE
1681
+ : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1682
+ defm INT_PTX_LDG_G_v2f16x2_ELE
1669
1683
: VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1670
1684
defm INT_PTX_LDG_G_v2f32_ELE
1671
1685
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
@@ -1679,7 +1693,9 @@ defm INT_PTX_LDG_G_v4i16_ELE
1679
1693
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1680
1694
defm INT_PTX_LDG_G_v4i32_ELE
1681
1695
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1682
- defm INT_PTX_LDG_G_v8f16_ELE
1696
+ defm INT_PTX_LDG_G_v4f16_ELE
1697
+ : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1698
+ defm INT_PTX_LDG_G_v4f16x2_ELE
1683
1699
: VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1684
1700
defm INT_PTX_LDG_G_v4f32_ELE
1685
1701
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
0 commit comments