diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12296,13 +12296,17 @@ MachineBasicBlock *Exit = ML->getExitBlock(); if (Pre && Exit) { - BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(), - TII->get(AMDGPU::S_INST_PREFETCH)) - .addImm(1); // prefetch 2 lines behind PC - - BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(), - TII->get(AMDGPU::S_INST_PREFETCH)) - .addImm(2); // prefetch 1 line behind PC + auto PreTerm = Pre->getFirstTerminator(); + if (PreTerm == Pre->begin() || + std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH) + BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH)) + .addImm(1); // prefetch 2 lines behind PC + + auto ExitHead = Exit->getFirstNonDebugInstr(); + if (ExitHead == Exit->end() || + ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH) + BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH)) + .addImm(2); // prefetch 1 line behind PC } return CacheLineAlign; diff --git a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll --- a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll @@ -11,8 +11,6 @@ ; GFX10-NEXT: s_mov_b32 s1, 0 ; GFX10-NEXT: ; implicit-def: $sgpr2 ; GFX10-NEXT: s_inst_prefetch 0x1 -; GFX10-NEXT: s_inst_prefetch 0x1 -; GFX10-NEXT: s_inst_prefetch 0x1 ; GFX10-NEXT: s_branch .LBB0_2 ; GFX10-NEXT: .p2align 6 ; GFX10-NEXT: .LBB0_1: ; %Flow @@ -50,8 +48,6 @@ ; GFX10-NEXT: s_branch .LBB0_1 ; GFX10-NEXT: .LBB0_4: ; %loop0_merge ; GFX10-NEXT: s_inst_prefetch 0x2 -; GFX10-NEXT: s_inst_prefetch 0x2 -; GFX10-NEXT: s_inst_prefetch 0x2 ; GFX10-NEXT: s_endpgm branch1_true: br label %2