Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -100,4 +100,23 @@ GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, Intrinsic<[], [], []>; +def int_amdgcn_s_dcache_inv : + GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, + Intrinsic<[], [], []>; + +// CI+ +def int_amdgcn_s_dcache_inv_vol : + GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, + Intrinsic<[], [], []>; + +// VI +def int_amdgcn_s_dcache_wb : + GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, + Intrinsic<[], [], []>; + +// VI +def int_amdgcn_s_dcache_wb_vol : + GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, + Intrinsic<[], [], []>; + } Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -43,6 +43,13 @@ >; //===----------------------------------------------------------------------===// +// SMRD Instructions +//===----------------------------------------------------------------------===// + +defm S_DCACHE_INV_VOL : SMRD_Inval , + "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; + +//===----------------------------------------------------------------------===// // MUBUF Instructions //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInsertWaits.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertWaits.cpp +++ lib/Target/AMDGPU/SIInsertWaits.cpp @@ -140,7 +140,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; - Counters Result; + Counters Result = { { 0, 0, 0 } }; Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); @@ -153,13 +153,21 @@ if (TII->isSMRD(MI.getOpcode())) { - MachineOperand &Op = MI.getOperand(0); - assert(Op.isReg() && "First LGKM operand must be a register!"); + if (MI.getNumOperands() != 0) { + MachineOperand &Op = MI.getOperand(0); + assert(Op.isReg() && "First LGKM operand must be a register!"); - unsigned Reg = Op.getReg(); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); - Result.Named.LGKM = Size > 4 ? 2 : 1; + unsigned Reg = Op.getReg(); + // XXX - What if this is a write into a super register? + unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + Result.Named.LGKM = Size > 4 ? 2 : 1; + } else { + // s_dcache_inv etc. do not have a a destination register. Assume we + // want a wait on these. + // XXX - What is the right value? + Result.Named.LGKM = 1; + } } else { // DS Result.Named.LGKM = 1; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -73,9 +73,12 @@ } // Specify an SMRD opcode for SI and SMEM opcode for VI -class smrd si, bits<5> vi = si> { - field bits<5> SI = si; - field bits<8> VI = { 0, 0, 0, vi }; + +// FIXME: This should really be bits<5> si, Tablegen crashes if +// parameter default value is other parameter with different bit size +class smrd si, bits<8> vi = si> { + field bits<5> SI = si{4-0}; + field bits<8> VI = vi; } // Execpt for the NONE field, this must be kept in sync with the SISubtarget enum @@ -899,8 +902,8 @@ } class SMRD_Real_vi op, string opName, bit imm, dag outs, dag ins, - string asm> : - SMRD , + string asm, list pattern = []> : + SMRD , SMEMe_vi , SIMCInstr { let AssemblerPredicates = [isVI]; @@ -920,6 +923,33 @@ } } +multiclass SMRD_Inval { + let hasSideEffects = 1, mayStore = 1 in { + def "" : SMRD_Pseudo ; + + let sbase = 0, offset = 0 in { + let sdst = 0 in { + def _si : SMRD_Real_si ; + } + + let glc = 0, sdata = 0 in { + def _vi : SMRD_Real_vi ; + } + } + } +} + +class SMEM_Inval op, string opName, SDPatternOperator node> : + SMRD_Real_vi { + let hasSideEffects = 1; + let mayStore = 1; + let sbase = 0; + let sdata = 0; + let glc = 0; + let offset = 0; +} + multiclass SMRD_Helper { defm _IMM : SMRD_m < Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -93,7 +93,9 @@ } // mayLoad = 1 //def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>; -//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>; + +defm S_DCACHE_INV : SMRD_Inval , "s_dcache_inv", + int_amdgcn_s_dcache_inv>; //===----------------------------------------------------------------------===// // SOP1 Instructions @@ -3027,7 +3029,6 @@ // S_CBRANCH_CDBGSYS // S_CBRANCH_CDBGSYS_OR_USER // S_CBRANCH_CDBGSYS_AND_USER -// S_DCACHE_INV_VOL // DS_NOP // DS_GWS_SEMA_RELEASE_ALL // DS_WRAP_RTN_B32 Index: lib/Target/AMDGPU/VIInstructions.td =================================================================== --- lib/Target/AMDGPU/VIInstructions.td +++ lib/Target/AMDGPU/VIInstructions.td @@ -89,6 +89,16 @@ def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; +//===----------------------------------------------------------------------===// +// SMEM Instructions +//===----------------------------------------------------------------------===// + +def S_DCACHE_WB : SMEM_Inval <0x21, + "s_dcache_wb", int_amdgcn_s_dcache_wb>; + +def S_DCACHE_WB_VOL : SMEM_Inval <0x23, + "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; + } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.inv() #0 + +; GCN-LABEL: {{^}}test_s_dcache_inv: +; GCN-NEXT: ; BB#0: +; SI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7] +; VI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0x80,0xc0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_s_dcache_inv() #0 { + call void @llvm.amdgcn.s.dcache.inv() + ret void +} + +; GCN-LABEL: {{^}}test_s_dcache_inv_insert_wait: +; GCN-NEXT: ; BB#0: +; GCN-NEXT: s_dcache_inv +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_inv_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.inv() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.inv.vol() #0 + +; GCN-LABEL: {{^}}test_s_dcache_inv_vol: +; GCN-NEXT: ; BB#0: +; CI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7] +; VI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x88,0xc0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_s_dcache_inv_vol() #0 { + call void @llvm.amdgcn.s.dcache.inv.vol() + ret void +} + +; GCN-LABEL: {{^}}test_s_dcache_inv_vol_insert_wait: +; GCN-NEXT: ; BB#0: +; GCN-NEXT: s_dcache_inv_vol +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_inv_vol_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.inv.vol() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.wb() #0 + +; VI-LABEL: {{^}}test_s_dcache_wb: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] +; VI-NEXT: s_endpgm +define void @test_s_dcache_wb() #0 { + call void @llvm.amdgcn.s.dcache.wb() + ret void +} + +; VI-LABEL: {{^}}test_s_dcache_wb_insert_wait: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb +; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_wb_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.wb() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.wb.vol() #0 + +; VI-LABEL: {{^}}test_s_dcache_wb_vol: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00] +; VI-NEXT: s_endpgm +define void @test_s_dcache_wb_vol() #0 { + call void @llvm.amdgcn.s.dcache.wb.vol() + ret void +} + +; VI-LABEL: {{^}}test_s_dcache_wb_vol_insert_wait: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb_vol +; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_wb_vol_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.wb.vol() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } Index: test/MC/AMDGPU/smem.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/smem.s @@ -0,0 +1,11 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSI %s + +s_dcache_wb +; VI: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] +; NOSI: error: instruction not supported on this GPU + +s_dcache_wb_vol +; VI: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00] +; NOSI: error: instruction not supported on this GPU Index: test/MC/AMDGPU/smrd.s =================================================================== --- test/MC/AMDGPU/smrd.s +++ test/MC/AMDGPU/smrd.s @@ -51,3 +51,10 @@ s_load_dwordx16 s[16:31], s[2:3], s4 // GCN: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1] + +s_dcache_inv +// GCN: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7] + +s_dcache_inv_vol +// CI: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7] +// NOSI: error: instruction not supported on this GPU