Skip to content

Commit 334f51a

Browse files
committedJan 16, 2017
ADMGPU/EG,CM: Implement _noret global atomics
_RTN versions will be a lot more complicated Differential Revision: https://reviews.llvm.org/D28067 llvm-svn: 292162
1 parent 2bd98af commit 334f51a

File tree

3 files changed

+655
-7
lines changed

3 files changed

+655
-7
lines changed
 

‎llvm/lib/Target/AMDGPU/EvergreenInstructions.td

+108-7
Original file line numberDiff line numberDiff line change
@@ -35,28 +35,59 @@ class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag
3535
: EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins,
3636
"MEM_RAT_CACHELESS "#name, pattern>;
3737

38-
class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
39-
list<dag> pattern>
40-
: EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
38+
class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins,
39+
dag outs, string name, list<dag> pattern>
40+
: EG_CF_RAT <0x56, rat_inst, rat_id, mask, outs, ins,
4141
"MEM_RAT "#name, pattern>;
4242

4343
class CF_MEM_RAT_STORE_TYPED<bits<1> has_eop>
44-
: CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
45-
i32imm:$rat_id, InstFlag:$eop),
44+
: CF_MEM_RAT <0x1, ?, 0xf, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
45+
i32imm:$rat_id, InstFlag:$eop), (outs),
4646
"STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr"
4747
#!if(has_eop, ", $eop", ""),
4848
[(int_r600_rat_store_typed R600_Reg128:$rw_gpr,
4949
R600_Reg128:$index_gpr,
5050
(i32 imm:$rat_id))]>;
5151

52-
def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
53-
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
52+
def RAT_MSKOR : CF_MEM_RAT <0x11, 0, 0xf,
53+
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), (outs),
5454
"MSKOR $rw_gpr.XW, $index_gpr",
5555
[(mskor_global v4i32:$rw_gpr, i32:$index_gpr)]
5656
> {
5757
let eop = 0;
5858
}
5959

60+
61+
multiclass RAT_ATOMIC<bits<6> op_ret, bits<6> op_noret, string name> {
62+
let Constraints = "$rw_gpr = $out_gpr", eop = 0, mayStore = 1 in {
63+
def _RTN: CF_MEM_RAT <op_ret, 0, 0xf,
64+
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
65+
(outs R600_Reg128:$out_gpr),
66+
name ## "_RTN" ## " $rw_gpr, $index_gpr", [] >;
67+
def _NORET: CF_MEM_RAT <op_noret, 0, 0xf,
68+
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
69+
(outs R600_Reg128:$out_gpr),
70+
name ## " $rw_gpr, $index_gpr", [] >;
71+
}
72+
}
73+
74+
// Swap no-ret is just store. Raw store to cached target
75+
// can only store on dword, which exactly matches swap_no_ret.
76+
defm RAT_ATOMIC_XCHG_INT : RAT_ATOMIC<1, 34, "ATOMIC_XCHG_INT">;
77+
defm RAT_ATOMIC_CMPXCHG_INT : RAT_ATOMIC<4, 36, "ATOMIC_CMPXCHG_INT">;
78+
defm RAT_ATOMIC_ADD : RAT_ATOMIC<7, 39, "ATOMIC_ADD">;
79+
defm RAT_ATOMIC_SUB : RAT_ATOMIC<8, 40, "ATOMIC_SUB">;
80+
defm RAT_ATOMIC_RSUB : RAT_ATOMIC<9, 41, "ATOMIC_RSUB">;
81+
defm RAT_ATOMIC_MIN_INT : RAT_ATOMIC<10, 42, "ATOMIC_MIN_INT">;
82+
defm RAT_ATOMIC_MIN_UINT : RAT_ATOMIC<11, 43, "ATOMIC_MIN_UINT">;
83+
defm RAT_ATOMIC_MAX_INT : RAT_ATOMIC<12, 44, "ATOMIC_MAX_INT">;
84+
defm RAT_ATOMIC_MAX_UINT : RAT_ATOMIC<13, 45, "ATOMIC_MAX_UINT">;
85+
defm RAT_ATOMIC_AND : RAT_ATOMIC<14, 46, "ATOMIC_AND">;
86+
defm RAT_ATOMIC_OR : RAT_ATOMIC<15, 47, "ATOMIC_OR">;
87+
defm RAT_ATOMIC_XOR : RAT_ATOMIC<16, 48, "ATOMIC_XOR">;
88+
defm RAT_ATOMIC_INC_UINT : RAT_ATOMIC<18, 50, "ATOMIC_INC_UINT">;
89+
defm RAT_ATOMIC_DEC_UINT : RAT_ATOMIC<19, 51, "ATOMIC_DEC_UINT">;
90+
6091
} // End let Predicates = [isEGorCayman]
6192

6293
//===----------------------------------------------------------------------===//
@@ -257,6 +288,76 @@ def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
257288

258289
let Predicates = [isEGorCayman] in {
259290

291+
multiclass AtomicPat<Instruction inst_ret, Instruction inst_noret,
292+
SDPatternOperator node_ret, SDPatternOperator node_noret> {
293+
// FIXME: Add _RTN version. We need per WI scratch location to store the old value
294+
// EXTRACT_SUBREG here is dummy, we know the node has no uses
295+
def : Pat<(i32 (node_noret i32:$ptr, i32:$data)),
296+
(EXTRACT_SUBREG (inst_noret
297+
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $data, sub0), $ptr), sub1)>;
298+
}
299+
multiclass AtomicIncDecPat<Instruction inst_ret, Instruction inst_noret,
300+
SDPatternOperator node_ret, SDPatternOperator node_noret, int C> {
301+
// FIXME: Add _RTN version. We need per WI scratch location to store the old value
302+
// EXTRACT_SUBREG here is dummy, we know the node has no uses
303+
def : Pat<(i32 (node_noret i32:$ptr, C)),
304+
(EXTRACT_SUBREG (inst_noret
305+
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (MOV_IMM_I32 -1), sub0), $ptr), sub1)>;
306+
}
307+
308+
// CMPSWAP is pattern is special
309+
// EXTRACT_SUBREG here is dummy, we know the node has no uses
310+
// FIXME: Add _RTN version. We need per WI scratch location to store the old value
311+
def : Pat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$data)),
312+
(EXTRACT_SUBREG (RAT_ATOMIC_CMPXCHG_INT_NORET
313+
(INSERT_SUBREG
314+
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $cmp, sub3),
315+
$data, sub0),
316+
$ptr), sub1)>;
317+
318+
defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_RTN,
319+
RAT_ATOMIC_XCHG_INT_NORET,
320+
atomic_swap_global_ret,
321+
atomic_swap_global_noret>;
322+
defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_RTN, RAT_ATOMIC_ADD_NORET,
323+
atomic_add_global_ret, atomic_add_global_noret>;
324+
defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_RTN, RAT_ATOMIC_SUB_NORET,
325+
atomic_sub_global_ret, atomic_sub_global_noret>;
326+
defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_RTN,
327+
RAT_ATOMIC_MIN_INT_NORET,
328+
atomic_min_global_ret, atomic_min_global_noret>;
329+
defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_RTN,
330+
RAT_ATOMIC_MIN_UINT_NORET,
331+
atomic_umin_global_ret, atomic_umin_global_noret>;
332+
defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_RTN,
333+
RAT_ATOMIC_MAX_INT_NORET,
334+
atomic_max_global_ret, atomic_max_global_noret>;
335+
defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_RTN,
336+
RAT_ATOMIC_MAX_UINT_NORET,
337+
atomic_umax_global_ret, atomic_umax_global_noret>;
338+
defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_RTN, RAT_ATOMIC_AND_NORET,
339+
atomic_and_global_ret, atomic_and_global_noret>;
340+
defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_RTN, RAT_ATOMIC_OR_NORET,
341+
atomic_or_global_ret, atomic_or_global_noret>;
342+
defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_RTN, RAT_ATOMIC_XOR_NORET,
343+
atomic_xor_global_ret, atomic_xor_global_noret>;
344+
defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
345+
RAT_ATOMIC_INC_UINT_NORET,
346+
atomic_add_global_ret,
347+
atomic_add_global_noret, 1>;
348+
defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
349+
RAT_ATOMIC_INC_UINT_NORET,
350+
atomic_sub_global_ret,
351+
atomic_sub_global_noret, -1>;
352+
defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
353+
RAT_ATOMIC_DEC_UINT_NORET,
354+
atomic_add_global_ret,
355+
atomic_add_global_noret, -1>;
356+
defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
357+
RAT_ATOMIC_DEC_UINT_NORET,
358+
atomic_sub_global_ret,
359+
atomic_sub_global_noret, 1>;
360+
260361
// Should be predicated on FeatureFP64
261362
// def FMA_64 : R600_3OP <
262363
// 0xA, "FMA_64",

‎llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
221221
setOperationAction(ISD::SUBE, VT, Expand);
222222
}
223223

224+
// LLVM will expand these to atomic_cmp_swap(0)
225+
// and atomic_swap, respectively.
226+
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
227+
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
228+
224229
setSchedulingPreference(Sched::Source);
225230

226231
setTargetDAGCombine(ISD::FP_ROUND);

0 commit comments

Comments
 (0)
Please sign in to comment.