diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1112,6 +1112,14 @@ RegMasks.clear(); BundleVirtRegsMap.clear(); + auto IsTiedDef = [&](unsigned Idx) { + MachineOperand &MO = MI.getOperand(Idx); + if (!MO.isTied()) + return false; + unsigned TiedIdx = MI.findTiedOperandIdx(Idx); + auto TiedMO = MI.getOperand(TiedIdx); + return !TiedMO.isUndef(); + }; // Scan for special cases; Apply pre-assigned register defs to state. bool HasPhysRegUse = false; bool HasRegMask = false; @@ -1119,7 +1127,8 @@ bool HasDef = false; bool HasEarlyClobber = false; bool NeedToAssignLiveThroughs = false; - for (MachineOperand &MO : MI.operands()) { + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); if (MO.isReg()) { Register Reg = MO.getReg(); if (Reg.isVirtual()) { @@ -1130,7 +1139,7 @@ HasEarlyClobber = true; NeedToAssignLiveThroughs = true; } - if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef())) + if (IsTiedDef(I) || (MO.getSubReg() != 0 && !MO.isUndef())) NeedToAssignLiveThroughs = true; } } else if (Reg.isPhysical()) { @@ -1230,7 +1239,7 @@ MachineOperand &MO = MI.getOperand(OpIdx); LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); unsigned Reg = MO.getReg(); - if (MO.isEarlyClobber() || MO.isTied() || + if (MO.isEarlyClobber() || IsTiedDef(OpIdx) || (MO.getSubReg() && !MO.isUndef())) { defineLiveThroughVirtReg(MI, OpIdx, Reg); } else { @@ -1253,7 +1262,8 @@ // Free registers occupied by defs. // Iterate operands in reverse order, so we see the implicit super register // defs first (we added them earlier in case of ). - for (MachineOperand &MO : llvm::reverse(MI.operands())) { + for (signed I = MI.getNumOperands() - 1; I >= 0; --I) { + MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef()) continue; @@ -1268,7 +1278,7 @@ "tied def assigned to clobbered register"); // Do not free tied operands and early clobbers. - if (MO.isTied() || MO.isEarlyClobber()) + if (IsTiedDef(I) || MO.isEarlyClobber()) continue; Register Reg = MO.getReg(); if (!Reg) diff --git a/llvm/test/CodeGen/X86/fastregalloc-tied-undef.mir b/llvm/test/CodeGen/X86/fastregalloc-tied-undef.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fastregalloc-tied-undef.mir @@ -0,0 +1,51 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=regallocfast -o - %s | FileCheck %s + +# If the tied use is undef value, fastregalloc should free the def register. +# There is no reload needed for the undef value. +--- | + define dso_local void @foo() { + entry: + %vec = alloca [4 x <4 x float>], align 16 + %m = alloca <4 x float>, align 16 + store <4 x float> zeroinitializer, ptr %m, align 16 + %0 = load <4 x float>, ptr %m, align 16 + %arrayidx4 = bitcast ptr %vec to ptr + store <4 x float> %0, ptr %arrayidx4, align 16 + %1 = load <4 x float>, ptr %m, align 16 + %arrayidx1 = getelementptr inbounds [4 x <4 x float>], ptr %vec, i64 0, i64 1 + store <4 x float> %1, ptr %arrayidx1, align 16 + %2 = load <4 x float>, ptr %m, align 16 + %arrayidx2 = getelementptr inbounds [4 x <4 x float>], ptr %vec, i64 0, i64 2 + store <4 x float> %2, ptr %arrayidx2, align 16 + %3 = load <4 x float>, ptr %m, align 16 + %arrayidx3 = getelementptr inbounds [4 x <4 x float>], ptr %vec, i64 0, i64 3 + store <4 x float> %3, ptr %arrayidx3, align 16 + ret void + } + +... +--- +name: foo +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: vr128 } +frameInfo: + maxAlignment: 16 +stack: + - { id: 0, name: vec, size: 64, alignment: 16 } + - { id: 1, name: m, size: 16, alignment: 16 } +machineFunctionInfo: {} +body: | + bb.0.entry: + ; CHECK: renamable $xmm0 = PXORrr undef renamable $xmm0, undef renamable $xmm0 + + %0:vr128 = PXORrr undef %0, undef %0 + MOVAPSmr %stack.1.m, 1, $noreg, 0, $noreg, %0 :: (store (s128) into %ir.m) + MOVAPSmr %stack.0.vec, 1, $noreg, 0, $noreg, %0 :: (store (s128) into %ir.arrayidx4) + MOVAPSmr %stack.0.vec, 1, $noreg, 16, $noreg, %0 :: (store (s128) into %ir.arrayidx1) + MOVAPSmr %stack.0.vec, 1, $noreg, 32, $noreg, %0 :: (store (s128) into %ir.arrayidx2) + MOVAPSmr %stack.0.vec, 1, $noreg, 48, $noreg, killed %0 :: (store (s128) into %ir.arrayidx3) + RET 0 + +...