diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Compiler.h" #include @@ -46,6 +47,7 @@ private: const SIRegisterInfo RI; const GCNSubtarget &ST; + TargetSchedModel SchedModel; // The inverse predicate should have the negative value. enum BranchPredicate { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -85,7 +85,9 @@ SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), - RI(ST), ST(ST) {} + RI(ST), ST(ST) { + SchedModel.init(&ST); +} //===----------------------------------------------------------------------===// // TargetInstrInfo callbacks @@ -6635,10 +6637,10 @@ unsigned Lat = 0, Count = 0; for (++I; I != E && I->isBundledWithPred(); ++I) { ++Count; - Lat = std::max(Lat, getInstrLatency(ItinData, *I, PredCost)); + Lat = std::max(Lat, SchedModel.computeInstrLatency(&*I)); } return Lat + Count - 1; } - return AMDGPUGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); + return SchedModel.computeInstrLatency(&MI); } diff --git a/llvm/test/CodeGen/AMDGPU/max.i16.ll b/llvm/test/CodeGen/AMDGPU/max.i16.ll --- a/llvm/test/CodeGen/AMDGPU/max.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/max.i16.ll @@ -175,11 +175,12 @@ ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_mov_b32_e32 v8, v6 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_max_i16 v6, v6, v7 -; GFX9-NEXT: global_load_short_d16 v7, v[2:3], off offset:4 +; GFX9-NEXT: v_mov_b32_e32 v9, v7 ; GFX9-NEXT: global_load_short_d16 v8, v[0:1], off offset:4 +; GFX9-NEXT: global_load_short_d16 v9, v[2:3], off offset:4 +; GFX9-NEXT: v_pk_max_i16 v6, v6, v7 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_max_i16 v0, v8, v7 +; GFX9-NEXT: v_pk_max_i16 v0, v8, v9 ; GFX9-NEXT: global_store_dword v[4:5], v6, off ; GFX9-NEXT: global_store_short v[4:5], v0, off offset:4 ; GFX9-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir --- a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir @@ -23,8 +23,8 @@ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4) ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store 4, addrspace 3) + ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; GCN: $m0 = S_MOV_B32 0 ; GCN: $vgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GCN: BUNDLE implicit $vgpr0, implicit $m0, implicit $exec {