diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5637,6 +5637,11 @@ bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) std::swap(N1, N2); + // Canonicalize: + // binop(splat(X), stepvector) -> binop(stepvector, splat(X)) + else if (N1.getOpcode() == ISD::SPLAT_VECTOR && + N2.getOpcode() == ISD::STEP_VECTOR) + std::swap(N1, N2); } auto *N1C = dyn_cast(N1); diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -80,7 +80,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: index z0.b, #0, #1 ; CHECK-NEXT: mov z1.b, w0 -; CHECK-NEXT: uqadd z0.b, z1.b, z0.b +; CHECK-NEXT: uqadd z0.b, z0.b, z1.b ; CHECK-NEXT: mov z1.b, w1 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b @@ -96,7 +96,7 @@ ; CHECK-NEXT: mov z1.h, w0 ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: add z0.h, z1.h, z0.h +; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: mov z1.h, w1 ; CHECK-NEXT: umin z0.h, z0.h, #255 ; CHECK-NEXT: and z1.h, z1.h, #0xff @@ -115,7 +115,7 @@ ; CHECK-NEXT: mov z1.s, w0 ; CHECK-NEXT: and z0.s, z0.s, #0xff ; CHECK-NEXT: and z1.s, z1.s, #0xff -; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: mov z1.s, w1 ; CHECK-NEXT: umin z0.s, z0.s, #255 ; CHECK-NEXT: and z1.s, z1.s, #0xff @@ -135,7 +135,7 @@ ; CHECK-NEXT: mov z1.d, x0 ; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: and z1.d, z1.d, #0xff -; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: mov z2.d, x1 ; CHECK-NEXT: umin z0.d, z0.d, #255 @@ -167,29 +167,29 @@ ; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z4.s, w1 ; CHECK-NEXT: incw z1.s -; CHECK-NEXT: uqadd z5.s, z3.s, z0.s +; CHECK-NEXT: uqadd z5.s, z0.s, z3.s ; CHECK-NEXT: incw z2.s, all, mul #2 ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: cmphi p1.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z3.s, z1.s +; CHECK-NEXT: uqadd z5.s, z1.s, z3.s ; CHECK-NEXT: cmphi p2.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z3.s, z2.s +; CHECK-NEXT: uqadd z5.s, z2.s, z3.s ; CHECK-NEXT: incw z6.s, all, mul #2 ; CHECK-NEXT: incw z0.s, all, mul #4 ; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z3.s, z6.s +; CHECK-NEXT: uqadd z5.s, z6.s, z3.s ; CHECK-NEXT: incw z1.s, all, mul #4 ; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z0.s, z3.s, z0.s -; CHECK-NEXT: uqadd z1.s, z3.s, z1.s +; CHECK-NEXT: uqadd z0.s, z0.s, z3.s +; CHECK-NEXT: uqadd z1.s, z1.s, z3.s ; CHECK-NEXT: incw z2.s, all, mul #4 ; CHECK-NEXT: incw z6.s, all, mul #4 ; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h ; CHECK-NEXT: uzp1 p2.h, p3.h, p4.h ; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z0.s ; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z1.s -; CHECK-NEXT: uqadd z0.s, z3.s, z2.s -; CHECK-NEXT: uqadd z1.s, z3.s, z6.s +; CHECK-NEXT: uqadd z0.s, z2.s, z3.s +; CHECK-NEXT: uqadd z1.s, z6.s, z3.s ; CHECK-NEXT: cmphi p5.s, p0/z, z4.s, z0.s ; CHECK-NEXT: cmphi p0.s, p0/z, z4.s, z1.s ; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h @@ -223,63 +223,63 @@ ; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z4.d, x1 ; CHECK-NEXT: incd z1.d -; CHECK-NEXT: uqadd z5.d, z3.d, z0.d -; CHECK-NEXT: uqadd z6.d, z3.d, z1.d +; CHECK-NEXT: uqadd z5.d, z0.d, z3.d +; CHECK-NEXT: uqadd z6.d, z1.d, z3.d ; CHECK-NEXT: cmphi p1.d, p0/z, z4.d, z5.d ; CHECK-NEXT: mov z5.d, z1.d ; CHECK-NEXT: incd z2.d, all, mul #2 ; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z2.d +; CHECK-NEXT: uqadd z6.d, z2.d, z3.d ; CHECK-NEXT: mov z7.d, z0.d ; CHECK-NEXT: incd z5.d, all, mul #2 ; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s ; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z5.d +; CHECK-NEXT: uqadd z6.d, z5.d, z3.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: incd z7.d, all, mul #4 ; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z7.d +; CHECK-NEXT: uqadd z6.d, z7.d, z3.d ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: incd z24.d, all, mul #4 ; CHECK-NEXT: mov z26.d, z5.d ; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z24.d +; CHECK-NEXT: uqadd z6.d, z24.d, z3.d ; CHECK-NEXT: incd z25.d, all, mul #4 ; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z25.d +; CHECK-NEXT: uqadd z6.d, z25.d, z3.d ; CHECK-NEXT: incd z26.d, all, mul #4 ; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z26.d +; CHECK-NEXT: uqadd z6.d, z26.d, z3.d ; CHECK-NEXT: uzp1 p2.s, p2.s, p3.s ; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d ; CHECK-NEXT: incd z0.d, all, mul #8 ; CHECK-NEXT: incd z1.d, all, mul #8 ; CHECK-NEXT: uzp1 p4.s, p4.s, p5.s ; CHECK-NEXT: uzp1 p3.s, p6.s, p3.s -; CHECK-NEXT: uqadd z0.d, z3.d, z0.d -; CHECK-NEXT: uqadd z1.d, z3.d, z1.d +; CHECK-NEXT: uqadd z0.d, z0.d, z3.d +; CHECK-NEXT: uqadd z1.d, z1.d, z3.d ; CHECK-NEXT: incd z2.d, all, mul #8 ; CHECK-NEXT: incd z5.d, all, mul #8 ; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h ; CHECK-NEXT: uzp1 p2.h, p4.h, p3.h ; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z0.d ; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z3.d, z2.d -; CHECK-NEXT: uqadd z1.d, z3.d, z5.d +; CHECK-NEXT: uqadd z0.d, z2.d, z3.d +; CHECK-NEXT: uqadd z1.d, z5.d, z3.d ; CHECK-NEXT: incd z7.d, all, mul #8 ; CHECK-NEXT: incd z24.d, all, mul #8 ; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d ; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z3.d, z7.d -; CHECK-NEXT: uqadd z1.d, z3.d, z24.d +; CHECK-NEXT: uqadd z0.d, z7.d, z3.d +; CHECK-NEXT: uqadd z1.d, z24.d, z3.d ; CHECK-NEXT: incd z25.d, all, mul #8 ; CHECK-NEXT: incd z26.d, all, mul #8 ; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s ; CHECK-NEXT: uzp1 p4.s, p5.s, p6.s ; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d ; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z3.d, z25.d -; CHECK-NEXT: uqadd z1.d, z3.d, z26.d +; CHECK-NEXT: uqadd z0.d, z25.d, z3.d +; CHECK-NEXT: uqadd z1.d, z26.d, z3.d ; CHECK-NEXT: cmphi p7.d, p0/z, z4.d, z0.d ; CHECK-NEXT: cmphi p0.d, p0/z, z4.d, z1.d ; CHECK-NEXT: uzp1 p5.s, p5.s, p6.s @@ -308,9 +308,9 @@ ; CHECK-NEXT: mov z2.b, w0 ; CHECK-NEXT: add z1.b, z0.b, z1.b ; CHECK-NEXT: mov z3.b, w1 -; CHECK-NEXT: uqadd z0.b, z2.b, z0.b +; CHECK-NEXT: uqadd z0.b, z0.b, z2.b ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uqadd z1.b, z2.b, z1.b +; CHECK-NEXT: uqadd z1.b, z1.b, z2.b ; CHECK-NEXT: cmphi p0.b, p1/z, z3.b, z0.b ; CHECK-NEXT: cmphi p1.b, p1/z, z3.b, z1.b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll --- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -218,7 +218,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: mov z1.d, x0 -; CHECK-NEXT: add z1.d, z1.d, z0.d +; CHECK-NEXT: add z1.d, z0.d, z1.d ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -292,11 +292,7 @@ define @mul_add_stepvector_nxv2i64_commutative(i64 %x, i64 %y) { ; CHECK-LABEL: mul_add_stepvector_nxv2i64_commutative: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z0.d, x0 -; CHECK-NEXT: mla z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: index z0.d, x0, x1 ; CHECK-NEXT: ret entry: %0 = insertelement poison, i64 %y, i32 0