Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -188,6 +188,10 @@ GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, Intrinsic<[], [], []>; +def int_amdgcn_s_memtime : + GCCBuiltin<"__builtin_amdgcn_s_memtime">, + Intrinsic<[llvm_i64_ty], [], []>; + def int_amdgcn_dispatch_ptr : GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; @@ -246,4 +250,7 @@ GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, Intrinsic<[], [], []>; +def int_amdgcn_s_memrealtime : + GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, + Intrinsic<[llvm_i64_ty], [], []>; } Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -149,6 +149,12 @@ "Additional intstructions for CI+" >; +def FeatureVIInsts : SubtargetFeature<"vi-insts", + "VIInsts", + "true", + "Additional intstructions for VI+" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -308,7 +314,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, - FeatureGCN3Encoding, FeatureCIInsts] + FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts] >; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -88,6 +88,7 @@ bool GCN1Encoding; bool GCN3Encoding; bool CIInsts; + bool VIInsts; bool FeatureDisable; int LDSBankCount; unsigned IsaVersion; Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -81,7 +81,8 @@ WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), MaxPrivateElementSize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), - GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), + GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), VIInsts(false), + LDSBankCount(0), IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false), EnableSIScheduler(false), FrameLowering(nullptr), InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -135,6 +135,9 @@ setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); + // On SI this is s_memtime and s_memrealtime on VI. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + for (MVT VT : MVT::integer_valuetypes()) { if (VT == MVT::i64) continue; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1014,23 +1014,31 @@ } } -multiclass SMRD_Inval { - let hasSideEffects = 1, mayStore = 1 in { - def "" : SMRD_Pseudo ; +multiclass SMRD_Special pattern = []> { + let hasSideEffects = 1 in { + def "" : SMRD_Pseudo ; let sbase = 0, offset = 0 in { let sdst = 0 in { - def _si : SMRD_Real_si ; + def _si : SMRD_Real_si ; } let glc = 0, sdata = 0 in { - def _vi : SMRD_Real_vi ; + def _vi : SMRD_Real_vi ; } } } } +multiclass SMRD_Inval { + let mayStore = 1 in { + defm : SMRD_Special; + } +} + class SMEM_Inval op, string opName, SDPatternOperator node> : SMRD_Real_vi { let hasSideEffects = 1; @@ -1041,6 +1049,18 @@ let offset = 0; } +class SMEM_Ret op, string opName, SDPatternOperator node> : + SMRD_Real_vi { + let hasSideEffects = 1; + let mayStore = ?; + let mayLoad = ?; + let sbase = 0; + let sdata = 0; + let glc = 0; + let offset = 0; +} + multiclass SMRD_Helper { defm _IMM : SMRD_m < Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -88,7 +88,15 @@ smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512 >; -//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>; +let mayStore = ? in { +// FIXME: mayStore = ? is a workaround for tablegen bug for different +// inferred mayStore flags for the instruction pattern vs. standalone +// Pat. Each considers the other contradictory. + +defm S_MEMTIME : SMRD_Special , "s_memtime", + (outs SReg_64:$dst), " $dst", [(set i64:$dst, (int_amdgcn_s_memtime))] +>; +} defm S_DCACHE_INV : SMRD_Inval , "s_dcache_inv", int_amdgcn_s_dcache_inv>; @@ -3149,6 +3157,13 @@ def : BFEPattern ; +let Predicates = [isSICI] in { +def : Pat < + (i64 (readcyclecounter)), + (S_MEMTIME) +>; +} + //===----------------------------------------------------------------------===// // Fract Patterns //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/VIInstructions.td =================================================================== --- lib/Target/AMDGPU/VIInstructions.td +++ lib/Target/AMDGPU/VIInstructions.td @@ -103,6 +103,9 @@ def S_DCACHE_WB_VOL : SMEM_Inval <0x23, "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; +def S_MEMREALTIME : SMEM_Ret<0x25, + "s_memrealtime", int_amdgcn_s_memrealtime>; + } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI let Predicates = [isVI] in { @@ -114,7 +117,7 @@ >; //===----------------------------------------------------------------------===// -// DPP Paterns +// DPP Patterns //===----------------------------------------------------------------------===// def : Pat < @@ -124,4 +127,13 @@ (as_i32imm $bank_mask), (as_i32imm $row_mask)) >; +//===----------------------------------------------------------------------===// +// Misc Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (i64 (readcyclecounter)), + (S_MEMREALTIME) +>; + } // End Predicates = [isVI] Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.s.memtime() #0 + +; GCN-LABEL: {{^}}test_s_memtime: +; GCN-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}} +; GCN-DAG: s_load_dwordx2 +; GCN: lgkmcnt +; GCN: buffer_store_dwordx2 +; GCN-NOT: lgkmcnt +; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 +define void @test_s_memtime(i64 addrspace(1)* %out) #0 { + %cycle0 = call i64 @llvm.amdgcn.s.memtime() + store volatile i64 %cycle0, i64 addrspace(1)* %out + + %cycle1 = call i64 @llvm.amdgcn.s.memtime() + store volatile i64 %cycle1, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.realmemtime.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.realmemtime.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.s.realmemtime() #0 + +; GCN-LABEL: {{^}}test_s_realmemtime: +; GCN-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} +; GCN-DAG: s_load_dwordx2 +; GCN: lgkmcnt +; GCN: buffer_store_dwordx2 +; GCN-NOT: lgkmcnt +; GCN: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 +define void @test_s_realmemtime(i64 addrspace(1)* %out) #0 { + %cycle0 = call i64 @llvm.amdgcn.s.realmemtime() + store volatile i64 %cycle0, i64 addrspace(1)* %out + + %cycle1 = call i64 @llvm.amdgcn.s.realmemtime() + store volatile i64 %cycle1, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/readcyclecounter.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/readcyclecounter.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +declare i64 @llvm.readcyclecounter() #0 + +; GCN-LABEL: {{^}}test_readcyclecounter: +; SI-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}} +; VI-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} +; GCN-DAG: s_load_dwordx2 +; GCN: lgkmcnt +; GCN: buffer_store_dwordx2 +; GCN-NOT: lgkmcnt +; SI: s_memtime s{{\[[0-9]+:[0-9]+\]}} +; VI: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 +define void @test_readcyclecounter(i64 addrspace(1)* %out) #0 { + %cycle0 = call i64 @llvm.readcyclecounter() + store volatile i64 %cycle0, i64 addrspace(1)* %out + + %cycle1 = call i64 @llvm.readcyclecounter() + store volatile i64 %cycle1, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/MC/AMDGPU/smrd.s =================================================================== --- test/MC/AMDGPU/smrd.s +++ test/MC/AMDGPU/smrd.s @@ -67,3 +67,6 @@ s_dcache_inv_vol // CI: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7] // NOSI: error: instruction not supported on this GPU + +s_memtime s[0:1] +// GCN: s_memtime s[0:1] ; encoding: [0x00,0x00,0x80,0xc7]