diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -261,15 +261,25 @@ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isRegMask()) - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (MO.clobbersPhysReg(i)) { + if (MO.isRegMask()) { + auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) { + for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI) + if (!MO.clobbersPhysReg(*SRI)) + return false; + + return true; + }; + + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + if (ClobbersPhysRegAndSubRegs(i)) { DefIndices[i] = Count; KillIndices[i] = ~0u; KeepRegs.reset(i); Classes[i] = nullptr; RegRefs.erase(i); } + } + } if (!MO.isReg()) continue; Register Reg = MO.getReg(); diff --git a/llvm/test/CodeGen/X86/pr44140.ll b/llvm/test/CodeGen/X86/pr44140.ll --- a/llvm/test/CodeGen/X86/pr44140.ll +++ b/llvm/test/CodeGen/X86/pr44140.ll @@ -10,7 +10,6 @@ ; We need xmm6 to be live from the loop header across all iterations of the loop. ; We shouldn't clobber ymm6 inside the loop. -; FIXME: We currently clobber ymm6 define i32 @main() { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %start @@ -23,7 +22,7 @@ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm6 +; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3 ; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) @@ -31,10 +30,10 @@ ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 ; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5