Index: lib/CodeGen/CriticalAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/CriticalAntiDepBreaker.cpp +++ lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -174,6 +174,22 @@ if (Reg == 0) continue; const TargetRegisterClass *NewRC = nullptr; + // If this reg is tied, we can't change it or any of its sub or super regs. + // We need to use KeepRegs to mark the reg because not all uses of the + // same reg within an instruction are necessarily tagged as tied. + // Example: an x86 "xor %eax, %eax" will have one source operand tied to the + // def register but not the second (see PR20020 for details). + if (MI->isRegTiedToUseOperand(i)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { + KeepRegs.set(*SubRegs); + } + for (MCSuperRegIterator SuperRegs(Reg, TRI); + SuperRegs.isValid(); ++SuperRegs) { + KeepRegs.set(*SuperRegs); + } + } + if (i < MI->getDesc().getNumOperands()) NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); @@ -236,9 +252,18 @@ unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; + // If we've already marked this reg as unchangeable, carry on. + if (KeepRegs.test(Reg)) continue; + + // All tied regs should have set bits in KeepRegs in PrescanInstruction(). + // This means we will ignore two-addr defs and other instructions + // (eg, many two operand x86 instructions have a tied use (source) + // and def (destination) reg. + assert(!MI->isRegTiedToUseOperand(i) && "Tied operand not in KeepRegs?"); + + // FIXME: we should use a SubRegIterator that includes self (as above), so + // we don't have to repeat all this code for the reg itself. DefIndices[Reg] = Count; KillIndices[Reg] = ~0u; assert(((KillIndices[Reg] == ~0u) != @@ -281,6 +306,9 @@ RegRefs.insert(std::make_pair(Reg, &MO)); + // FIXME: we should use an MCRegAliasIterator that includes self so we don't + // have to repeat all this code for the reg itself. + // It wasn't previously live but now it is, this is a kill. if (KillIndices[Reg] == ~0u) { KillIndices[Reg] = Count; Index: test/CodeGen/X86/pr20020.ll =================================================================== --- test/CodeGen/X86/pr20020.ll +++ test/CodeGen/X86/pr20020.ll @@ -0,0 +1,73 @@ +; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-lsr -post-RA-scheduler=1 -break-anti-dependencies=critical | FileCheck %s + +; In PR20020, the critical anti-depedency breaker algorithm mistakenly +; changes the register operands of an 'xorl %eax, %eax' to 'xorl %ecx, %ecx' +; and then immediately reloads %rcx with a value based on the wrong %rax + +; CHECK-NOT: xorl %ecx, %ecx +; CHECK: leaq 1(%rax), %rcx + + +%struct.planet = type { double, double, double } + +; Function Attrs: nounwind ssp uwtable +define void @advance(i32 %nbodies, %struct.planet* nocapture %bodies) #0 { +entry: + %cmp4 = icmp sgt i32 %nbodies, 0 + br i1 %cmp4, label %for.body.preheader, label %for.end38 + +for.body.preheader: ; preds = %entry + %gep = getelementptr %struct.planet* %bodies, i64 1, i32 1 + %gep13 = bitcast double* %gep to %struct.planet* + %0 = add i32 %nbodies, -1 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.inc20 + %iv19 = phi i32 [ %0, %for.body.preheader ], [ %iv.next, %for.inc20 ] + %iv = phi %struct.planet* [ %gep13, %for.body.preheader ], [ %gep14, %for.inc20 ] + %iv9 = phi i64 [ %iv.next10, %for.inc20 ], [ 0, %for.body.preheader ] + %iv.next10 = add nuw nsw i64 %iv9, 1 + %1 = trunc i64 %iv.next10 to i32 + %cmp22 = icmp slt i32 %1, %nbodies + br i1 %cmp22, label %for.body3.lr.ph, label %for.inc20 + +for.body3.lr.ph: ; preds = %for.body + %x = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 0 + %y = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 1 + %vx = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 2 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %iv20 = phi i32 [ %iv.next21, %for.body3 ], [ %iv19, %for.body3.lr.ph ] + %iv15 = phi %struct.planet* [ %gep16, %for.body3 ], [ %iv, %for.body3.lr.ph ] + %iv1517 = bitcast %struct.planet* %iv15 to double* + %2 = load double* %x, align 8 + %gep18 = getelementptr double* %iv1517, i64 -1 + %3 = load double* %gep18, align 8 + %sub = fsub double %2, %3 + %4 = load double* %y, align 8 + %5 = load double* %iv1517, align 8 + %sub8 = fsub double %4, %5 + %add10 = fadd double %sub, %sub8 + %call = tail call double @sqrt(double %sub8) #2 + store double %add10, double* %vx, align 8 + %gep16 = getelementptr %struct.planet* %iv15, i64 1 + %iv.next21 = add i32 %iv20, -1 + %exitcond = icmp eq i32 %iv.next21, 0 + br i1 %exitcond, label %for.inc20, label %for.body3 + +for.inc20: ; preds = %for.body3, %for.body + %lftr.wideiv11 = trunc i64 %iv.next10 to i32 + %gep14 = getelementptr %struct.planet* %iv, i64 1 + %iv.next = add i32 %iv19, -1 + %exitcond12 = icmp eq i32 %lftr.wideiv11, %nbodies + br i1 %exitcond12, label %for.end38, label %for.body + +for.end38: ; preds = %for.inc20, %entry + ret void +} + +; Function Attrs: nounwind +declare double @sqrt(double) #1 + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }