diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -904,7 +904,7 @@ RegAliases.set(*AI); for (SDep S : PathSU->Succs) { SDep::Kind K = S.getKind(); - if (K != SDep::Data && K != SDep::Output && K != SDep::Anti) + if (K != SDep::Data) continue; unsigned R = S.getReg(); if (!RegAliases[R]) diff --git a/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll b/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll --- a/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll @@ -301,23 +301,23 @@ ; CHECK-NEXT: { ; CHECK-NEXT: r2 = #15360 ; CHECK-NEXT: r7 = #-4 +; CHECK-NEXT: r6 = #1 ; CHECK-NEXT: v1 = vmem(r0+#0) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v0.h = vsplat(r2) -; CHECK-NEXT: r6 = ##-2147483648 -; CHECK-NEXT: r2 = #1 -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: v2 = vsplat(r6) ; CHECK-NEXT: r4 = #32 ; CHECK-NEXT: r5 = #8 ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v7 = vsplat(r4) +; CHECK-NEXT: r2 = ##2147483647 ; CHECK-NEXT: v24 = vxor(v24,v24) ; CHECK-NEXT: } ; CHECK-NEXT: { +; CHECK-NEXT: v25 = vsplat(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { ; CHECK-NEXT: v1:0.qf32 = vmpy(v1.hf,v0.hf) ; CHECK-NEXT: } ; CHECK-NEXT: { @@ -327,31 +327,31 @@ ; CHECK-NEXT: v1.sf = v1.qf32 ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r7:6 = combine(#30,#24) +; CHECK-NEXT: r7 = ##-2147483648 ; CHECK-NEXT: v1:0 = vshuff(v1,v0,r7) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v6 = vsplat(r7) +; CHECK-NEXT: v2 = vsplat(r7) ; CHECK-NEXT: q0 = vcmp.gt(v24.w,v1.w) ; CHECK-NEXT: q1 = vcmp.gt(v24.w,v0.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v3.w = vasl(v0.w,r2) +; CHECK-NEXT: v3.w = vasl(v0.w,r6) +; CHECK-NEXT: v28 = vmux(q0,v2,v25) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r2 = ##2147483647 -; CHECK-NEXT: v4.w = vasl(v1.w,r2) +; CHECK-NEXT: r7:6 = combine(#30,#24) +; CHECK-NEXT: v4.w = vasl(v1.w,r6) ; CHECK-NEXT: v3.w = vsub(v3.w,v2.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v25 = vsplat(r2) +; CHECK-NEXT: v6 = vsplat(r7) ; CHECK-NEXT: v5.w = vasl(v0.w,r5) ; CHECK-NEXT: v4.w = vsub(v4.w,v2.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v3.w = vasr(v3.w,r6) ; CHECK-NEXT: v5 = vor(v5,v2) -; CHECK-NEXT: v28 = vmux(q0,v2,v25) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v4.w = vasr(v4.w,r6) @@ -530,28 +530,28 @@ ; CHECK-NEXT: v8.w = vasr(v8.w,r6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v23 = vsplat(r3) +; CHECK-NEXT: v21 = vsplat(r3) ; CHECK-NEXT: v7.w = vasr(v7.w,r6) -; CHECK-NEXT: v20.w = vsub(v9.w,v1.w) +; CHECK-NEXT: v18.w = vsub(v9.w,v1.w) ; CHECK-NEXT: v8.w = vsub(v10.w,v8.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v21.w = vasl(v6.w,r2) -; CHECK-NEXT: v28 = vmux(q1,v1,v23) -; CHECK-NEXT: v26 = vmux(q0,v1,v23) +; CHECK-NEXT: v19.w = vasl(v6.w,r2) +; CHECK-NEXT: v26 = vmux(q1,v1,v21) +; CHECK-NEXT: v24 = vmux(q0,v1,v21) ; CHECK-NEXT: v7.w = vsub(v10.w,v7.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v5.w = vasl(v4.w,r2) ; CHECK-NEXT: v8.w = vmin(v8.w,v13.w) -; CHECK-NEXT: v9 = vor(v21,v1) -; CHECK-NEXT: v22.w = vmin(v7.w,v13.w) +; CHECK-NEXT: v9 = vor(v19,v1) +; CHECK-NEXT: v20.w = vmin(v7.w,v13.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v4.w = vasr(v20.w,r6) +; CHECK-NEXT: v4.w = vasr(v18.w,r6) ; CHECK-NEXT: q3 = vcmp.gt(v8.w,v12.w) ; CHECK-NEXT: v5 = vor(v5,v1) -; CHECK-NEXT: q2 = vcmp.gt(v22.w,v12.w) +; CHECK-NEXT: q2 = vcmp.gt(v20.w,v12.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v11.w = vasr(v11.w,r6) @@ -563,49 +563,49 @@ ; CHECK-NEXT: v4.w = vmin(v4.w,v13.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v24.w = vasl(v0.w,r2) +; CHECK-NEXT: v22.w = vasl(v0.w,r2) ; CHECK-NEXT: v3 = vor(v3,v1) ; CHECK-NEXT: v10.w = vmin(v10.w,v13.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v8.w = vlsr(v9.w,v8.w) -; CHECK-NEXT: v6 = vor(v24,v1) +; CHECK-NEXT: v6 = vor(v22,v1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v5.w = vlsr(v5.w,v22.w) -; CHECK-NEXT: v27.w = vsub(v12.w,v8.w) +; CHECK-NEXT: v5.w = vlsr(v5.w,v20.w) +; CHECK-NEXT: v25.w = vsub(v12.w,v8.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v3.w = vlsr(v3.w,v4.w) -; CHECK-NEXT: v25.w = vsub(v12.w,v5.w) -; CHECK-NEXT: v8 = vmux(q1,v27,v8) +; CHECK-NEXT: v23.w = vsub(v12.w,v5.w) +; CHECK-NEXT: v8 = vmux(q1,v25,v8) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v6.w = vlsr(v6.w,v10.w) -; CHECK-NEXT: v5 = vmux(q0,v25,v5) +; CHECK-NEXT: v5 = vmux(q0,v23,v5) ; CHECK-NEXT: q0 = vcmp.gt(v12.w,v2.w) -; CHECK-NEXT: v29.w = vsub(v12.w,v3.w) +; CHECK-NEXT: v27.w = vsub(v12.w,v3.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v2 = vmux(q3,v8,v28) +; CHECK-NEXT: v2 = vmux(q3,v8,v26) +; CHECK-NEXT: v28.w = vsub(v12.w,v6.w) ; CHECK-NEXT: q3 = vcmp.gt(v12.w,v0.w) -; CHECK-NEXT: v30.w = vsub(v12.w,v6.w) -; CHECK-NEXT: v5 = vmux(q2,v5,v26) +; CHECK-NEXT: v5 = vmux(q2,v5,v24) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0 = vmux(q0,v1,v23) -; CHECK-NEXT: v3 = vmux(q0,v29,v3) +; CHECK-NEXT: v29 = vmux(q0,v1,v21) +; CHECK-NEXT: v3 = vmux(q0,v27,v3) ; CHECK-NEXT: q2 = vcmp.gt(v4.w,v12.w) -; CHECK-NEXT: v31 = vmux(q3,v30,v6) +; CHECK-NEXT: v30 = vmux(q3,v28,v6) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v2.h = vpack(v2.w,v5.w):sat -; CHECK-NEXT: v1 = vmux(q3,v1,v23) +; CHECK-NEXT: v1 = vmux(q3,v1,v21) ; CHECK-NEXT: q3 = vcmp.gt(v10.w,v12.w) -; CHECK-NEXT: v0 = vmux(q2,v3,v0) +; CHECK-NEXT: v0 = vmux(q2,v3,v29) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vmux(q3,v31,v1) +; CHECK-NEXT: v1 = vmux(q3,v30,v1) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v3.h = vpack(v1.w,v0.w):sat @@ -614,13 +614,13 @@ ; CHECK-NEXT: v0.h = vpack(v1.w,v0.w):sat ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1.b = vpack(v3.h,v2.h):sat +; CHECK-NEXT: v31.b = vpack(v3.h,v2.h):sat ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v0.b = vpack(v3.h,v0.h):sat ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1:0 = vshuff(v0,v1,r7) +; CHECK-NEXT: v1:0 = vshuff(v0,v31,r7) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: vmem(r1+#0) = v0.new ; CHECK-NEXT: } @@ -1581,7 +1581,7 @@ ; CHECK-NEXT: v12.w = vsub(v12.w,v3.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v23 = vsplat(r4) +; CHECK-NEXT: v20 = vsplat(r4) ; CHECK-NEXT: v8.w = vasr(v8.w,r6) ; CHECK-NEXT: v11.w = vsub(v11.w,v3.w) ; CHECK-NEXT: } @@ -1592,27 +1592,27 @@ ; CHECK-NEXT: { ; CHECK-NEXT: v6.w = vasl(v5.w,r2) ; CHECK-NEXT: v9.w = vsub(v14.w,v9.w) -; CHECK-NEXT: v8.w = vmin(v8.w,v23.w) +; CHECK-NEXT: v8.w = vmin(v8.w,v20.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v7.w = vasl(v2.w,r2) ; CHECK-NEXT: v6 = vor(v6,v3) -; CHECK-NEXT: v9.w = vmin(v9.w,v23.w) +; CHECK-NEXT: v9.w = vmin(v9.w,v20.w) ; CHECK-NEXT: q1 = vcmp.gt(v13.w,v8.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v22.w = vasr(v11.w,r6) +; CHECK-NEXT: v19.w = vasr(v11.w,r6) ; CHECK-NEXT: v7 = vor(v7,v3) ; CHECK-NEXT: q2 = vcmp.gt(v13.w,v9.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v12.w = vasr(v12.w,r6) -; CHECK-NEXT: v5.w = vsub(v14.w,v22.w) +; CHECK-NEXT: v5.w = vsub(v14.w,v19.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v4.w = vasl(v1.w,r2) -; CHECK-NEXT: v24.w = vsub(v14.w,v12.w) -; CHECK-NEXT: v5.w = vmin(v5.w,v23.w) +; CHECK-NEXT: v21.w = vsub(v14.w,v12.w) +; CHECK-NEXT: v5.w = vmin(v5.w,v20.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r2 = ##2147483647 @@ -1620,51 +1620,51 @@ ; CHECK-NEXT: v4 = vor(v4,v3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v25 = vsplat(r2) +; CHECK-NEXT: v22 = vsplat(r2) ; CHECK-NEXT: v6.w = vlsr(v6.w,v8.w) ; CHECK-NEXT: v3 = vor(v10,v3) -; CHECK-NEXT: v10.w = vmin(v24.w,v23.w) +; CHECK-NEXT: v10.w = vmin(v21.w,v20.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v7.w = vlsr(v7.w,v9.w) -; CHECK-NEXT: v27 = vmux(q1,v25,v6) +; CHECK-NEXT: v24 = vmux(q1,v22,v6) ; CHECK-NEXT: q1 = vcmp.gt(v13.w,v5.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v26.w = vlsr(v4.w,v5.w) -; CHECK-NEXT: v28 = vmux(q2,v25,v7) +; CHECK-NEXT: v23.w = vlsr(v4.w,v5.w) +; CHECK-NEXT: v25 = vmux(q2,v22,v7) ; CHECK-NEXT: q2 = vcmp.gt(v13.w,v10.w) -; CHECK-NEXT: v4 = vmux(q0,v13,v27) +; CHECK-NEXT: v4 = vmux(q0,v13,v24) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v3.w = vlsr(v3.w,v10.w) -; CHECK-NEXT: v29 = vmux(q3,v13,v28) -; CHECK-NEXT: v2 = vmux(q1,v25,v26) +; CHECK-NEXT: v26 = vmux(q3,v13,v25) +; CHECK-NEXT: v2 = vmux(q1,v22,v23) ; CHECK-NEXT: q1 = vcmp.gt(v13.w,v1.w) ; CHECK-NEXT: } ; CHECK-NEXT: { +; CHECK-NEXT: v27 = vmux(q2,v22,v3) ; CHECK-NEXT: q3 = vcmp.gt(v13.w,v0.w) -; CHECK-NEXT: v1 = vmux(q2,v25,v3) -; CHECK-NEXT: v0 = vmux(q1,v13,v2) +; CHECK-NEXT: v28 = vmux(q1,v13,v2) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30.uh = vpack(v29.w,v4.w):sat -; CHECK-NEXT: v1 = vmux(q3,v13,v1) +; CHECK-NEXT: v29.uh = vpack(v26.w,v4.w):sat +; CHECK-NEXT: v1 = vmux(q3,v13,v27) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v31.uh = vpack(v1.w,v0.w):sat +; CHECK-NEXT: v30.uh = vpack(v1.w,v28.w):sat ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.uh = vpack(v1.w,v0.w):sat +; CHECK-NEXT: v0.uh = vpack(v1.w,v28.w):sat ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1.ub = vpack(v31.h,v30.h):sat +; CHECK-NEXT: v31.ub = vpack(v30.h,v29.h):sat ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.ub = vpack(v31.h,v0.h):sat +; CHECK-NEXT: v0.ub = vpack(v30.h,v0.h):sat ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1:0 = vshuff(v0,v1,r7) +; CHECK-NEXT: v1:0 = vshuff(v0,v31,r7) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: vmem(r1+#0) = v0.new ; CHECK-NEXT: } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll b/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll --- a/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll @@ -128,16 +128,16 @@ ; V60-NEXT: v2.w = vmpye(v1.w,v0.uh) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v0.w = vasr(v0.w,r0) +; V60-NEXT: v31.w = vasr(v0.w,r0) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v3.w = vasr(v1.w,r0) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v5:4.w = vmpy(v0.h,v1.uh) +; V60-NEXT: v5:4.w = vmpy(v31.h,v1.uh) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v31:30.w = vmpy(v0.h,v3.h) +; V60-NEXT: v31:30.w = vmpy(v31.h,v3.h) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v7:6.w = vadd(v2.uh,v4.uh) @@ -211,10 +211,10 @@ ; V60-NEXT: v2.uw = vlsr(v2.uw,r2) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1 = vdelta(v1,v4) +; V60-NEXT: v31 = vdelta(v1,v4) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1:0.uw = vmpy(v0.uh,v1.uh) +; V60-NEXT: v1:0.uw = vmpy(v0.uh,v31.uh) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v1:0.w = vadd(v1.uh,v0.uh) diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vmpy-parts.ll b/llvm/test/CodeGen/Hexagon/autohvx/vmpy-parts.ll --- a/llvm/test/CodeGen/Hexagon/autohvx/vmpy-parts.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/vmpy-parts.ll @@ -12,16 +12,16 @@ ; V60-NEXT: v2.w = vmpye(v1.w,v0.uh) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v0.w = vasr(v0.w,r0) +; V60-NEXT: v31.w = vasr(v0.w,r0) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v3.w = vasr(v1.w,r0) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v5:4.w = vmpy(v0.h,v1.uh) +; V60-NEXT: v5:4.w = vmpy(v31.h,v1.uh) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v31:30.w = vmpy(v0.h,v3.h) +; V60-NEXT: v31:30.w = vmpy(v31.h,v3.h) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v7:6.w = vadd(v2.uh,v4.uh) @@ -224,10 +224,10 @@ ; V60-NEXT: v2.uw = vlsr(v2.uw,r2) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1 = vdelta(v1,v4) +; V60-NEXT: v31 = vdelta(v1,v4) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1:0.uw = vmpy(v0.uh,v1.uh) +; V60-NEXT: v1:0.uw = vmpy(v0.uh,v31.uh) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v1:0.w = vadd(v1.uh,v0.uh) @@ -342,13 +342,13 @@ ; V60-NEXT: v6.uw = vlsr(v2.uw,r2) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v5 = vdelta(v1,v5) +; V60-NEXT: v30 = vmux(q1,v1,v4) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1 = vmux(q1,v1,v4) +; V60-NEXT: v5 = vdelta(v1,v5) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: if (q0) v1.w += v0.w +; V60-NEXT: if (q0) v30.w += v0.w ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v9:8.uw = vmpy(v0.uh,v5.uh) @@ -357,19 +357,19 @@ ; V60-NEXT: v9:8.w = vadd(v9.uh,v8.uh) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v31.w = vadd(v8.w,v6.w) +; V60-NEXT: v29.w = vadd(v8.w,v6.w) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v2.w += vasl(v8.w,r2) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v9.w += vasr(v31.w,r2) +; V60-NEXT: v9.w += vasr(v29.w,r2) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v0.w = vadd(v3.w,v9.w) +; V60-NEXT: v31.w = vadd(v3.w,v9.w) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v3.w = vsub(v0.w,v1.w) +; V60-NEXT: v3.w = vsub(v31.w,v30.w) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v1:0 = vcombine(v3,v2) @@ -497,10 +497,10 @@ ; V60-NEXT: v5.uw = vlsr(v2.uw,r1) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1 = vdelta(v1,v4) +; V60-NEXT: v30 = vdelta(v1,v4) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1:0.uw = vmpy(v0.uh,v1.uh) +; V60-NEXT: v1:0.uw = vmpy(v0.uh,v30.uh) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v1:0.w = vadd(v1.uh,v0.uh)