Index: lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp =================================================================== --- lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -395,30 +395,40 @@ unsigned Reg1 = MRI->createVirtualRegister( TII->getRegClass(TII->get(NLoadOpcode), 0, TRI, *(MBB->getParent()))); - BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode), Reg1) - .add(LoadBase) - .addImm(1) - .addReg(X86::NoRegister) - .addImm(LoadDisp) - .addReg(X86::NoRegister) - .addMemOperand( - MBB->getParent()->getMachineMemOperand(LMMO, LMMOffset, Size)); - DEBUG(LoadInst->getPrevNode()->dump()); + MachineInstr *NewLoad = + BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode), + Reg1) + .add(LoadBase) + .addImm(1) + .addReg(X86::NoRegister) + .addImm(LoadDisp) + .addReg(X86::NoRegister) + .addMemOperand( + MBB->getParent()->getMachineMemOperand(LMMO, LMMOffset, Size)); + if (LoadBase.isReg()) + getBaseOperand(NewLoad).setIsKill(false); + DEBUG(NewLoad->dump()); // If the load and store are consecutive, use the loadInst location to // reduce register pressure. MachineInstr *StInst = StoreInst; if (StoreInst->getPrevNode() == LoadInst) StInst = LoadInst; - BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode)) - .add(StoreBase) - .addImm(1) - .addReg(X86::NoRegister) - .addImm(StoreDisp) - .addReg(X86::NoRegister) - .addReg(Reg1) - .addMemOperand( - MBB->getParent()->getMachineMemOperand(SMMO, SMMOffset, Size)); - DEBUG(StInst->getPrevNode()->dump()); + MachineInstr *NewStore = + BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode)) + .add(StoreBase) + .addImm(1) + .addReg(X86::NoRegister) + .addImm(StoreDisp) + .addReg(X86::NoRegister) + .addReg(Reg1) + .addMemOperand( + MBB->getParent()->getMachineMemOperand(SMMO, SMMOffset, Size)); + if (StoreBase.isReg()) + getBaseOperand(NewStore).setIsKill(false); + MachineOperand &StoreSrcVReg = StoreInst->getOperand(X86::AddrNumOperands); + assert(StoreSrcVReg.isReg() && "Expected virtual register"); + NewStore->getOperand(X86::AddrNumOperands).setIsKill(StoreSrcVReg.isKill()); + DEBUG(NewStore->dump()); } void X86AvoidSFBPass::buildCopies(int Size, MachineInstr *LoadInst, Index: test/CodeGen/X86/avoid-sfb-kill-flags.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/avoid-sfb-kill-flags.mir @@ -0,0 +1,64 @@ +# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=x86-avoid-SFB | FileCheck %s +--- | + ; ModuleID = '../test/CodeGen/X86/avoid-sfb-mir.ll' + source_filename = "../test/CodeGen/X86/avoid-sfb-mir.ll" + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + %struct.S = type { i32, i32, i32, i32 } + + ; Function Attrs: nounwind uwtable + define void @test_imm_store(%struct.S* noalias nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 { + entry: + %a2 = bitcast %struct.S* %s1 to i32* + store i32 0, i32* %a2, align 4 + %a13 = bitcast %struct.S* %s3 to i32* + store i32 1, i32* %a13, align 4 + %0 = bitcast %struct.S* %s2 to i8* + %1 = bitcast %struct.S* %s1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 16, i1 false) + ret void + } + + declare void @bar(%struct.S*) local_unnamed_addr + + ; Function Attrs: argmemonly nounwind + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 + +... +--- +name: test_imm_store +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr32 } + - { id: 3, class: gr64 } + - { id: 4, class: vr128 } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } + - { reg: '$rcx', virtual-reg: '%3' } +body: | + bb.0.entry: + liveins: $rdi, $rsi, $rcx + ; CHECK: MOV32mi %0, 1, $noreg, 0, $noreg, 0 :: (store 4 into %ir.a2) + ; CHECK-NEXT: MOV32mi %3, 1, $noreg, 0, $noreg, 1 :: (store 4 into %ir.a13) + ; CHECK-NEXT: %5:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load 4 from %ir.1) + ; CHECK-NEXT: MOV32mr %1, 1, $noreg, 0, $noreg, killed %5 :: (store 4 into %ir.0) + ; CHECK-NEXT: %6:gr64 = MOV64rm %0, 1, $noreg, 4, $noreg :: (load 8 from %ir.1 + 4, align 4) + ; CHECK-NEXT: MOV64mr %1, 1, $noreg, 4, $noreg, killed %6 :: (store 8 into %ir.0 + 4, align 4) + ; CHECK-NEXT: %7:gr32 = MOV32rm killed %0, 1, $noreg, 12, $noreg :: (load 4 from %ir.1 + 12) + ; CHECK-NEXT: MOV32mr killed %1, 1, $noreg, 12, $noreg, killed %7 :: (store 4 into %ir.0 + 12) + + %3:gr64 = COPY $rcx + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + MOV32mi %0, 1, $noreg, 0, $noreg, 0 :: (store 4 into %ir.a2) + MOV32mi %3, 1, $noreg, 0, $noreg, 1 :: (store 4 into %ir.a13) + %4:vr128 = MOVUPSrm killed %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.1, align 4) + MOVUPSmr killed %1, 1, $noreg, 0, $noreg, killed %4 :: (store 16 into %ir.0, align 4) + RET 0 + +... Index: test/CodeGen/X86/avoid-sfb-overlaps.ll =================================================================== --- test/CodeGen/X86/avoid-sfb-overlaps.ll +++ test/CodeGen/X86/avoid-sfb-overlaps.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB | FileCheck %s --check-prefix=DISABLED -; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2 -; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx | FileCheck %s -check-prefix=CHECK-AVX512 +; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2 +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512 ; ModuleID = '../testSFB/testOverlapBlocks.c' source_filename = "../testSFB/testOverlapBlocks.c" Index: test/CodeGen/X86/avoid-sfb.ll =================================================================== --- test/CodeGen/X86/avoid-sfb.ll +++ test/CodeGen/X86/avoid-sfb.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB | FileCheck %s --check-prefix=DISABLED -; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2 -; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx | FileCheck %s -check-prefix=CHECK-AVX512 +; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2 +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"