diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1412,6 +1412,11 @@ /// Return an AssertAlignSDNode. SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A); + /// Swap N1 and N2 if Opcode is a commutative binary opcode + /// and the canonical form expects the opposite order. + void canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, + SDValue &N2) const; + /// Return the specified value casted to /// the target's desired shift amount type. SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5629,20 +5629,34 @@ return getNode(Opcode, DL, VT, N1, N2, Flags); } +void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, + SDValue &N2) const { + if (!TLI->isCommutativeBinOp(Opcode)) + return; + + // Canonicalize: + // binop(const, nonconst) -> binop(nonconst, const) + bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); + bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); + bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) + std::swap(N1, N2); + + // Canonicalize: + // binop(splat(x), step_vector) -> binop(step_vector, splat(x)) + else if (N1.getOpcode() == ISD::SPLAT_VECTOR && + N2.getOpcode() == ISD::STEP_VECTOR) + std::swap(N1, N2); +} + SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { assert(N1.getOpcode() != ISD::DELETED_NODE && N2.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); - // Canonicalize constant to RHS if commutative. - if (TLI->isCommutativeBinOp(Opcode)) { - bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); - bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); - bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); - bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); - if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) - std::swap(N1, N2); - } + + canonicalizeCommutativeBinop(Opcode, N1, N2); auto *N1C = dyn_cast(N1); auto *N2C = dyn_cast(N2); diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -80,7 +80,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: index z0.b, #0, #1 ; CHECK-NEXT: mov z1.b, w0 -; CHECK-NEXT: uqadd z0.b, z1.b, z0.b +; CHECK-NEXT: uqadd z0.b, z0.b, z1.b ; CHECK-NEXT: mov z1.b, w1 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b @@ -96,7 +96,7 @@ ; CHECK-NEXT: mov z1.h, w0 ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: add z0.h, z1.h, z0.h +; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: mov z1.h, w1 ; CHECK-NEXT: umin z0.h, z0.h, #255 ; CHECK-NEXT: and z1.h, z1.h, #0xff @@ -115,7 +115,7 @@ ; CHECK-NEXT: mov z1.s, w0 ; CHECK-NEXT: and z0.s, z0.s, #0xff ; CHECK-NEXT: and z1.s, z1.s, #0xff -; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: mov z1.s, w1 ; CHECK-NEXT: umin z0.s, z0.s, #255 ; CHECK-NEXT: and z1.s, z1.s, #0xff @@ -135,7 +135,7 @@ ; CHECK-NEXT: mov z1.d, x0 ; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: and z1.d, z1.d, #0xff -; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: mov z2.d, x1 ; CHECK-NEXT: umin z0.d, z0.d, #255 @@ -167,29 +167,29 @@ ; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z4.s, w1 ; CHECK-NEXT: incw z1.s -; CHECK-NEXT: uqadd z5.s, z3.s, z0.s +; CHECK-NEXT: uqadd z5.s, z0.s, z3.s ; CHECK-NEXT: incw z2.s, all, mul #2 ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: cmphi p1.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z3.s, z1.s +; CHECK-NEXT: uqadd z5.s, z1.s, z3.s ; CHECK-NEXT: cmphi p2.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z3.s, z2.s +; CHECK-NEXT: uqadd z5.s, z2.s, z3.s ; CHECK-NEXT: incw z6.s, all, mul #2 ; CHECK-NEXT: incw z0.s, all, mul #4 ; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z3.s, z6.s +; CHECK-NEXT: uqadd z5.s, z6.s, z3.s ; CHECK-NEXT: incw z1.s, all, mul #4 ; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z0.s, z3.s, z0.s -; CHECK-NEXT: uqadd z1.s, z3.s, z1.s +; CHECK-NEXT: uqadd z0.s, z0.s, z3.s +; CHECK-NEXT: uqadd z1.s, z1.s, z3.s ; CHECK-NEXT: incw z2.s, all, mul #4 ; CHECK-NEXT: incw z6.s, all, mul #4 ; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h ; CHECK-NEXT: uzp1 p2.h, p3.h, p4.h ; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z0.s ; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z1.s -; CHECK-NEXT: uqadd z0.s, z3.s, z2.s -; CHECK-NEXT: uqadd z1.s, z3.s, z6.s +; CHECK-NEXT: uqadd z0.s, z2.s, z3.s +; CHECK-NEXT: uqadd z1.s, z6.s, z3.s ; CHECK-NEXT: cmphi p5.s, p0/z, z4.s, z0.s ; CHECK-NEXT: cmphi p0.s, p0/z, z4.s, z1.s ; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h @@ -223,63 +223,63 @@ ; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z4.d, x1 ; CHECK-NEXT: incd z1.d -; CHECK-NEXT: uqadd z5.d, z3.d, z0.d -; CHECK-NEXT: uqadd z6.d, z3.d, z1.d +; CHECK-NEXT: uqadd z5.d, z0.d, z3.d +; CHECK-NEXT: uqadd z6.d, z1.d, z3.d ; CHECK-NEXT: cmphi p1.d, p0/z, z4.d, z5.d ; CHECK-NEXT: mov z5.d, z1.d ; CHECK-NEXT: incd z2.d, all, mul #2 ; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z2.d +; CHECK-NEXT: uqadd z6.d, z2.d, z3.d ; CHECK-NEXT: mov z7.d, z0.d ; CHECK-NEXT: incd z5.d, all, mul #2 ; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s ; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z5.d +; CHECK-NEXT: uqadd z6.d, z5.d, z3.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: incd z7.d, all, mul #4 ; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z7.d +; CHECK-NEXT: uqadd z6.d, z7.d, z3.d ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: incd z24.d, all, mul #4 ; CHECK-NEXT: mov z26.d, z5.d ; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z24.d +; CHECK-NEXT: uqadd z6.d, z24.d, z3.d ; CHECK-NEXT: incd z25.d, all, mul #4 ; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z25.d +; CHECK-NEXT: uqadd z6.d, z25.d, z3.d ; CHECK-NEXT: incd z26.d, all, mul #4 ; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z3.d, z26.d +; CHECK-NEXT: uqadd z6.d, z26.d, z3.d ; CHECK-NEXT: uzp1 p2.s, p2.s, p3.s ; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d ; CHECK-NEXT: incd z0.d, all, mul #8 ; CHECK-NEXT: incd z1.d, all, mul #8 ; CHECK-NEXT: uzp1 p4.s, p4.s, p5.s ; CHECK-NEXT: uzp1 p3.s, p6.s, p3.s -; CHECK-NEXT: uqadd z0.d, z3.d, z0.d -; CHECK-NEXT: uqadd z1.d, z3.d, z1.d +; CHECK-NEXT: uqadd z0.d, z0.d, z3.d +; CHECK-NEXT: uqadd z1.d, z1.d, z3.d ; CHECK-NEXT: incd z2.d, all, mul #8 ; CHECK-NEXT: incd z5.d, all, mul #8 ; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h ; CHECK-NEXT: uzp1 p2.h, p4.h, p3.h ; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z0.d ; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z3.d, z2.d -; CHECK-NEXT: uqadd z1.d, z3.d, z5.d +; CHECK-NEXT: uqadd z0.d, z2.d, z3.d +; CHECK-NEXT: uqadd z1.d, z5.d, z3.d ; CHECK-NEXT: incd z7.d, all, mul #8 ; CHECK-NEXT: incd z24.d, all, mul #8 ; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d ; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z3.d, z7.d -; CHECK-NEXT: uqadd z1.d, z3.d, z24.d +; CHECK-NEXT: uqadd z0.d, z7.d, z3.d +; CHECK-NEXT: uqadd z1.d, z24.d, z3.d ; CHECK-NEXT: incd z25.d, all, mul #8 ; CHECK-NEXT: incd z26.d, all, mul #8 ; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s ; CHECK-NEXT: uzp1 p4.s, p5.s, p6.s ; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d ; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z3.d, z25.d -; CHECK-NEXT: uqadd z1.d, z3.d, z26.d +; CHECK-NEXT: uqadd z0.d, z25.d, z3.d +; CHECK-NEXT: uqadd z1.d, z26.d, z3.d ; CHECK-NEXT: cmphi p7.d, p0/z, z4.d, z0.d ; CHECK-NEXT: cmphi p0.d, p0/z, z4.d, z1.d ; CHECK-NEXT: uzp1 p5.s, p5.s, p6.s @@ -308,9 +308,9 @@ ; CHECK-NEXT: mov z2.b, w0 ; CHECK-NEXT: add z1.b, z0.b, z1.b ; CHECK-NEXT: mov z3.b, w1 -; CHECK-NEXT: uqadd z0.b, z2.b, z0.b +; CHECK-NEXT: uqadd z0.b, z0.b, z2.b ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uqadd z1.b, z2.b, z1.b +; CHECK-NEXT: uqadd z1.b, z1.b, z2.b ; CHECK-NEXT: cmphi p0.b, p1/z, z3.b, z0.b ; CHECK-NEXT: cmphi p1.b, p1/z, z3.b, z1.b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll --- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -218,7 +218,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: mov z1.d, x0 -; CHECK-NEXT: add z1.d, z1.d, z0.d +; CHECK-NEXT: add z1.d, z0.d, z1.d ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -292,11 +292,7 @@ define @mul_add_stepvector_nxv2i64_commutative(i64 %x, i64 %y) { ; CHECK-LABEL: mul_add_stepvector_nxv2i64_commutative: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z0.d, x0 -; CHECK-NEXT: mla z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: index z0.d, x0, x1 ; CHECK-NEXT: ret entry: %0 = insertelement poison, i64 %y, i32 0