diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21635,6 +21635,38 @@ return DAG.getBuildVector(VT, SDLoc(N), Opnds); } + // Canonicalise concat_vectors to replace concatenations of truncated nots + // with nots of concatenated truncates. This in some cases allows for multiple + // redundant negations to be eliminated. + // (concat_vectors (v4i16 (truncate (not (v4i32)))), + // (v4i16 (truncate (not (v4i32))))) + // -> + // (not (concat_vectors (v4i16 (truncate (v4i32))), + // (v4i16 (truncate (v4i32))))) + if (N->getNumOperands() == 2) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0->getOpcode() == ISD::TRUNCATE && N1->getOpcode() == ISD::TRUNCATE) { + auto isBitwiseVectorNegate = [](SDValue V) { + return V->getOpcode() == ISD::XOR && + ISD::isConstantSplatVectorAllOnes(V.getOperand(1).getNode()); + }; + SDValue N00 = N0->getOperand(0); + SDValue N10 = N1->getOperand(0); + if (isBitwiseVectorNegate(N00) && isBitwiseVectorNegate(N10)) { + SDLoc dl(N); + return DAG.getNOT( + dl, + DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(ISD::TRUNCATE, dl, N0.getValueType(), + N00->getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, N1.getValueType(), + N10->getOperand(0))), + VT); + } + } + } + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16115,8 +16115,7 @@ } } - - // Wait 'til after everything is legalized to try this. That way we have + // Wait till after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll --- a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll +++ b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll @@ -10,9 +10,8 @@ ; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 ; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 -; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: umaxv b0, v0.8b ; CHECK-NEXT: fmov w9, s0 @@ -32,13 +31,10 @@ ; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0 ; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 ; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 -; CHECK-NEXT: mvn v3.16b, v3.16b -; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: bic w0, w8, w9 @@ -52,30 +48,23 @@ define i1 @unordered_floating_point_compare_on_v32f32(<32 x float> %a_vec) { ; CHECK-LABEL: unordered_floating_point_compare_on_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0 -; CHECK-NEXT: mov w9, #1 -; CHECK-NEXT: fcmgt v6.4s, v6.4s, #0.0 -; CHECK-NEXT: fcmgt v5.4s, v5.4s, #0.0 -; CHECK-NEXT: fcmgt v4.4s, v4.4s, #0.0 ; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0 +; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0 ; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 ; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 -; CHECK-NEXT: mvn v7.16b, v7.16b -; CHECK-NEXT: mvn v6.16b, v6.16b -; CHECK-NEXT: mvn v5.16b, v5.16b -; CHECK-NEXT: mvn v4.16b, v4.16b -; CHECK-NEXT: mvn v3.16b, v3.16b -; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h -; CHECK-NEXT: uzp1 v4.8h, v4.8h, v5.8h +; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0 +; CHECK-NEXT: fcmgt v6.4s, v6.4s, #0.0 +; CHECK-NEXT: fcmgt v5.4s, v5.4s, #0.0 +; CHECK-NEXT: fcmgt v4.4s, v4.4s, #0.0 ; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-NEXT: uzp1 v1.16b, v4.16b, v6.16b +; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h +; CHECK-NEXT: uzp1 v1.8h, v4.8h, v5.8h ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: uzp1 v1.16b, v1.16b, v6.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: bic w0, w9, w8