Index: lib/CodeGen/CriticalAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/CriticalAntiDepBreaker.cpp +++ lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -213,14 +213,17 @@ // itself can't be changed. if (MI.isRegTiedToUseOperand(i) && Classes[Reg] == reinterpret_cast(-1)) { - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) { - KeepRegs.set(*SubRegs); - } - for (MCSuperRegIterator SuperRegs(Reg, TRI); - SuperRegs.isValid(); ++SuperRegs) { - KeepRegs.set(*SuperRegs); - } + // Traverse the closure of all of Reg's aliased registers. This ensures, + // on X86 for example, that we encounter both the high and low byte + // register (e.g. %CL and %CH) when Reg is a byte register. While such + // byte registers do not overlap, the presence in KeepRegs of any larger + // containing register such as %ECX leads to a shortcut being taken in + // ScanInstruction which may be unsafe for Reg's bytereg counterpart. + for (MCSuperRegIterator SuperRegs(Reg, TRI, /*IncludeSelf=*/true); + SuperRegs.isValid(); ++SuperRegs) + for (MCSubRegIterator SubRegs(*SuperRegs, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + KeepRegs.set(*SubRegs); } if (MO.isUse() && Special) { @@ -399,6 +402,7 @@ && "Kill and Def maps aren't consistent for NewReg!"); if (KillIndices[NewReg] != ~0u || Classes[NewReg] == reinterpret_cast(-1) || + KeepRegs.test(NewReg) || KillIndices[AntiDepReg] > DefIndices[NewReg]) continue; // If NewReg overlaps any of the forbidden registers, we can't use it. Index: test/CodeGen/X86/pr27681.ll =================================================================== --- test/CodeGen/X86/pr27681.ll +++ test/CodeGen/X86/pr27681.ll @@ -0,0 +1,143 @@ +; RUN: llc < %s -mtriple=i386 -post-RA-scheduler=true | FileCheck %s +; Regression test for pr27681 (and pr27580, pr27804). +; We're trying to verify that the antidependency breaker does not +; erroneously choose an unavailable byte register to break a dependence. +; The test is of course highly dependent on register allocation. +@c = global i16 4, align 2 +@d = global i16 5, align 2 +@a = common global i32 0, align 4 +@i = common global i32 0, align 4 +@b = common global i16 0, align 2 +@e = common global i16 0, align 2 +@f = common global i16 0, align 2 +@g = common global i16 0, align 2 +@h = common global i64 0, align 8 + +; CHECK-LABEL: @main +define i32 @main() { +entry: + %.pr.i = load i32, i32* @a, align 4 + %tobool.i = icmp eq i32 %.pr.i, 0 + %0 = load i32, i32* @i, align 4 + %neg.i = xor i32 %0, -1 + %1 = load i16, i16* @g, align 2 + %2 = load i16, i16* @c, align 2 + %tobool13.i = icmp ne i16 %2, 0 + %3 = load i16, i16* @b, align 2 + %conv2.i = zext i16 %3 to i64 + br i1 %tobool.i, label %for.condthread-pre-split.us.preheader.i, label %entry.split.i + +for.condthread-pre-split.us.preheader.i: ; preds = %entry + %4 = load i16, i16* @f, align 2 + %tobool1.i = icmp eq i16 %4, 0 + br i1 %tobool1.i, label %for.condthread-pre-split.us.i.us.preheader, label %for.condthread-pre-split.us.preheader.i.split + +for.condthread-pre-split.us.i.us.preheader: ; preds = %for.condthread-pre-split.us.preheader.i + %e.promoted.i = load i16, i16* @e, align 2 + %d.promoted.i = load i16, i16* @d, align 2 + %.pre = load i64, i64* @h, align 8 + br label %for.condthread-pre-split.us.i.us + +; CHECK-LABEL: .LBB0_8: +; CHECK: shrl %cl, %edi +; CHECK: xorl %esi, %edi +; CHECK: testl %edx, %edx +; CHECK: setne %cl +; CHECK: orb %bl, %cl +; CHECK: movzbl %cl, %esi +; CHECK: movl {{[0-9]+}}(%esp), %ecx +; CHECK: sarl %cl, %edx +; CHECK: testl %edx, %edx +; CHECK: setne %cl +; The original bug was using %ch instead of %bl in the following andb +; instruction, not recognizing %ch was clobbered by the above reload to %ecx. +; CHECK: andb %bl, %cl +; CHECK-DAG: movl $0, %ebx +; CHECK-DAG: cmpl $-1, %edi +; CHECK: je .LBB0_10 + +for.condthread-pre-split.us.i.us: ; preds = %for.condthread-pre-split.us.i.us.preheader, %cleanup49.thread.us.i.us + %5 = phi i64 [ %and.us.i.us, %cleanup49.thread.us.i.us ], [ %.pre, %for.condthread-pre-split.us.i.us.preheader ] + %add67.us.i.us = phi i16 [ %add.us.i.us, %cleanup49.thread.us.i.us ], [ %e.promoted.i, %for.condthread-pre-split.us.i.us.preheader ] + %conv4265.us.i.us = phi i16 [ %conv42.us.i.us, %cleanup49.thread.us.i.us ], [ %d.promoted.i, %for.condthread-pre-split.us.i.us.preheader ] + %j.0.ph.us.i.us = phi i32 [ %xor.us.i.us, %cleanup49.thread.us.i.us ], [ 0, %for.condthread-pre-split.us.i.us.preheader ] + %conv4.us.i.us = zext i16 %conv4265.us.i.us to i32 + %mul.us.i.us = mul nsw i32 %conv4.us.i.us, %neg.i + %tobool8.us.i.us = icmp ne i16 %conv4265.us.i.us, 0 + %6 = or i16 %conv4265.us.i.us, %1 + %7 = icmp ne i16 %6, 0 + %lor.ext.us.i.us = zext i1 %7 to i32 + %neg9.us.i.us = xor i32 %lor.ext.us.i.us, -1 + %not.tobool11.us.i.us = icmp ne i64 %5, 0 + %8 = and i1 %tobool13.i, %not.tobool11.us.i.us + %mul14.us.i.us = select i1 %8, i32 %mul.us.i.us, i32 0 + %shr.us.i.us = lshr i32 %conv4.us.i.us, %mul14.us.i.us + %xor.us.i.us = xor i32 %shr.us.i.us, %neg9.us.i.us + %tobool18.us.i.us = icmp ne i32 %mul.us.i.us, 0 + %9 = or i1 %tobool8.us.i.us, %tobool18.us.i.us + %lor.ext20.us.i.us = zext i1 %9 to i16 + %add.us.i.us = add i16 %lor.ext20.us.i.us, %conv4265.us.i.us + %shr26.us.i.us = ashr i32 %mul.us.i.us, %0 + %tobool27.us.i.us = icmp ne i32 %shr26.us.i.us, 0 + %10 = and i1 %tobool8.us.i.us, %tobool27.us.i.us + %tobool31.us.i.us = icmp eq i32 %xor.us.i.us, 0 + br i1 %tobool31.us.i.us, label %land.end38.us.i.us, label %land.rhs32.us.i.us + +land.rhs32.us.i.us: ; preds = %for.condthread-pre-split.us.i.us + %tobool33.us.i.us = icmp ne i32 %j.0.ph.us.i.us, 0 + %11 = or i1 %tobool33.us.i.us, %not.tobool11.us.i.us + br label %land.end38.us.i.us + +land.end38.us.i.us: ; preds = %land.rhs32.us.i.us, %for.condthread-pre-split.us.i.us + %12 = phi i1 [ false, %for.condthread-pre-split.us.i.us ], [ %11, %land.rhs32.us.i.us ] + %land.ext39.us.i.us = zext i1 %12 to i16 + %land.ext39.op.us.i.us = xor i16 %land.ext39.us.i.us, -1 + %conv42.us.i.us = select i1 %10, i16 0, i16 %land.ext39.op.us.i.us + %not.cmp.us.i.us = icmp ult i16 %conv42.us.i.us, %2 + br i1 %not.cmp.us.i.us, label %fn1.exit, label %cleanup49.thread.us.i.us + +cleanup49.thread.us.i.us: ; preds = %land.end38.us.i.us + %rem.us.i.us = srem i64 %conv2.i, %5 + %conv.mask.us.i.us = and i16 %add67.us.i.us, 255 + %conv3.us.i.us = zext i16 %conv.mask.us.i.us to i64 + %and.us.i.us = and i64 %rem.us.i.us, %conv3.us.i.us + store i64 %and.us.i.us, i64* @h, align 8 + br label %for.condthread-pre-split.us.i.us + +for.condthread-pre-split.us.preheader.i.split: ; preds = %for.condthread-pre-split.us.preheader.i + br i1 %tobool13.i, label %for.condthread-pre-split.us.i.us1.preheader, label %for.condthread-pre-split.us.i.preheader + +for.condthread-pre-split.us.i.preheader: ; preds = %for.condthread-pre-split.us.preheader.i.split + br label %for.condthread-pre-split.us.i + +for.condthread-pre-split.us.i.us1.preheader: ; preds = %for.condthread-pre-split.us.preheader.i.split + br label %for.condthread-pre-split.us.i.us1 + +for.condthread-pre-split.us.i.us1: ; preds = %for.condthread-pre-split.us.i.us1.preheader, %for.condthread-pre-split.us.i.us1 + br label %for.condthread-pre-split.us.i.us1 + +for.condthread-pre-split.us.i: ; preds = %for.condthread-pre-split.us.i.preheader, %for.condthread-pre-split.us.i + br label %for.condthread-pre-split.us.i + +entry.split.i: ; preds = %entry + br i1 %tobool13.i, label %for.cond.us72.i.preheader, label %for.cond.i.preheader + +for.cond.i.preheader: ; preds = %entry.split.i + br label %for.cond.i + +for.cond.us72.i.preheader: ; preds = %entry.split.i + br label %for.cond.us72.i + +for.cond.us72.i: ; preds = %for.cond.us72.i.preheader, %for.cond.us72.i + br label %for.cond.us72.i + +for.cond.i: ; preds = %for.cond.i.preheader, %for.cond.i + br label %for.cond.i + +fn1.exit: ; preds = %land.end38.us.i.us + %conv42.us.i.us.lcssa = phi i16 [ %conv42.us.i.us, %land.end38.us.i.us ] + %add.us.i.us.lcssa = phi i16 [ %add.us.i.us, %land.end38.us.i.us ] + store i16 %conv42.us.i.us.lcssa, i16* @d, align 2 + store i16 %add.us.i.us.lcssa, i16* @e, align 2 + ret i32 0 +}