Index: lib/CodeGen/AggressiveAntiDepBreaker.cpp =================================================================== --- lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -150,12 +150,41 @@ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (const auto &LI : (*SI)->liveins()) { - for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { - unsigned Reg = *AI; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - } + // We only need to consider the live-out registers themselves, not all + // aliases, because we can get more rename chances for live-out registers + // in BB. + // + // Why this is correct? Consider the following cases: + // + // Assume r is alias of R, if R is live-out, can rename r cause incorrect + // result? + // + // case 1: case 2: + // BB1: BB1: + // def R: R=g0 def r: r=gN->gN+1 (via R) (Can be renamed.) + // use R: R=g0 use r: g0->gN (last use) R->gN+1 (last use) + // def r: r=gM->g0 (via R) + // use r: g0->gM (last use) def R: R=gM (Can be renamed.) + // use R: R=g0->gM (last use) + // BB2: + // live-ins: R def R: R=g0 + // use R use R: R=g0 + // + // BB2: + // live-ins: R + // use R + // + // In case 1, AADB can't rename r (or any of R's alias), because R is + // still live, and is group0. + // + // In case 2, if r is sub-register of R, then it is fine to rename r. + // If r is super-register of R, but in BB2, we only use R, it is still + // fine to rename r in BB1; if r is also used in BB2, then r should be + // live-out, too. + unsigned Reg = LI.PhysReg; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; } // Mark live-out callee-saved registers. In a return block this is Index: test/CodeGen/PowerPC/ppc64-aadb.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/ppc64-aadb.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -break-anti-dependencies=all -march=ppc64 -mcpu=pwr8 \ +; RUN: -print-before=post-RA-sched -o /dev/null | \ +; RUN: FileCheck %s -check-prefix=CHECK-BEFORE-POST-RA +; RUN: llc < %s -break-anti-dependencies=all -march=ppc64 -mcpu=pwr8 \ +; RUN: -verify-machineinstrs | FileCheck %s + +; Create a scenario that next.bb uses many variables (VSX registers) comes +; from bb, in this case, they are: %0 %1 %2 %3 %add15. These use will prevent +; original Aggressive Anti-Dependency Breaker from renaming many false +; dependency VSX registers in bb. +; +; We correct this behaviour and use this test case to guard the new behaviour. +define double @aadb(double* nocapture readonly %d, i32 signext %val) #0 { +bb: + %0 = load double, double* %d + %arrayidx1 = getelementptr inbounds double, double* %d, i64 1 + %1 = load double, double* %arrayidx1 + %arrayidx2 = getelementptr inbounds double, double* %d, i64 2 + %2 = load double, double* %arrayidx2 + %arrayidx3 = getelementptr inbounds double, double* %d, i64 3 + %3 = load double, double* %arrayidx3 + %add = fadd double %0, %1 + %add4 = fadd double %2, %3 + %sub = fsub double %add, %add4 + %mul = fmul double %0, %1 + %add5 = fadd double %mul, %sub + %mul6 = fmul double %0, %2 + %add7 = fadd double %mul6, %add5 + %mul8 = fmul double %0, %3 + %sub9 = fsub double %add7, %mul8 + %mul10 = fmul double %1, %2 + %sub11 = fsub double %sub9, %mul10 + %mul12 = fmul double %1, %3 + %sub13 = fsub double %sub11, %mul12 + %mul14 = fmul double %2, %3 + %add15 = fadd double %mul14, %sub13 + %tobool = icmp eq i32 %val, 0 + br i1 %tobool, label %end, label %next.bb + +; Check whether live-in registers are as our expectation before post-RA-sched. +; CHECK-BEFORE-POST-RA: %F5 +; CHECK-BEFORE-POST-RA: %F1 +; CHECK-BEFORE-POST-RA: %CR0 +; CHECK-BEFORE-POST-RA: BB#1 +; CHECK-BEFORE-POST-RA: Live Ins: %F0 %F1 %F2 %F3 %F4 %F5 %X3 + +; CHECK-LABEL: aadb +; CHECK: BB#0 +; CHECK-DAG: xsadddp 1 +; CHECK-DAG: xsadddp 5 +; CHECK-DAG: xsmuldp 9 +; CHECK-DAG: xsmuldp 10 +; CHECK-DAG: xsmuldp 11 +; CHECK-DAG: xsmuldp 12 +; CHECK-DAG: xsmuldp 13 + +next.bb: + %arrayidx16 = getelementptr inbounds double, double* %d, i64 4 + %4 = load double, double* %arrayidx16 + %arrayidx17 = getelementptr inbounds double, double* %d, i64 5 + %5 = load double, double* %arrayidx17 + %mul18 = fmul double %4, %5 + %sub19 = fsub double %0, %1 + %sub20 = fsub double %mul18, %sub19 + %sub21 = fsub double %2, %3 + %sub22 = fsub double %sub20, %sub21 + %add24 = fadd double %mul14, %sub22 + %add25 = fadd double %add15, %add24 + br label %end + +end: + %result.0 = phi double [ %add25, %next.bb ], [ %add15, %bb ] + ret double %result.0 +}