Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3308,6 +3308,14 @@ !TLI.isOperationLegalOrCustom(ISD::ABS, VT) && TLI.expandABS(N1.getNode(), Result, DAG, true)) return Result; + + // Fold neg(bvsplat(neg(x)) -> bvsplat(x) + if (N1.getOpcode() == ISD::BUILD_VECTOR && + llvm::all_of(N1->ops(), + [&](SDValue Op) { return Op == N1.getOperand(0); }) && + N1.getOperand(0)->getOpcode() == ISD::SUB && + isNullConstant(N1.getOperand(0)->getOperand(0))) + return DAG.getSplatBuildVector(VT, DL, N1.getOperand(0)->getOperand(1)); } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) Index: llvm/test/CodeGen/AArch64/neon-shift-neg.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-shift-neg.ll +++ llvm/test/CodeGen/AArch64/neon-shift-neg.ll @@ -4,9 +4,7 @@ define <2 x i64> @shr64x2(<2 x i64> %a, i64 %b) { ; CHECK-LABEL: shr64x2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg x8, x0 -; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: neg v1.2d, v1.2d +; CHECK-NEXT: dup v1.2d, x0 ; CHECK-NEXT: sshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret entry: @@ -20,9 +18,7 @@ define <4 x i32> @shr32x4(<4 x i32> %a, i32 %b) { ; CHECK-LABEL: shr32x4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: dup v1.4s, w0 ; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret entry: @@ -36,9 +32,7 @@ define <4 x i32> @shr32x4undef(<4 x i32> %a, i32 %b) { ; CHECK-LABEL: shr32x4undef: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: dup v1.4s, w0 ; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret entry: @@ -52,9 +46,7 @@ define <8 x i16> @shr16x8(<8 x i16> %a, i16 %b) { ; CHECK-LABEL: shr16x8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 -; CHECK-NEXT: dup v1.8h, w8 -; CHECK-NEXT: neg v1.8h, v1.8h +; CHECK-NEXT: dup v1.8h, w0 ; CHECK-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret entry: @@ -68,9 +60,7 @@ define <16 x i8> @shr8x16(<16 x i8> %a, i8 %b) { ; CHECK-LABEL: shr8x16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 -; CHECK-NEXT: dup v1.16b, w8 -; CHECK-NEXT: neg v1.16b, v1.16b +; CHECK-NEXT: dup v1.16b, w0 ; CHECK-NEXT: sshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: @@ -84,9 +74,7 @@ define <1 x i64> @shr64x1(<1 x i64> %a, i64 %b) { ; CHECK-LABEL: shr64x1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg x8, x0 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: neg d1, d1 +; CHECK-NEXT: fmov d1, x0 ; CHECK-NEXT: sshl d0, d0, d1 ; CHECK-NEXT: ret entry: @@ -99,9 +87,7 @@ define <2 x i32> @shr32x2(<2 x i32> %a, i32 %b) { ; CHECK-LABEL: shr32x2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 -; CHECK-NEXT: dup v1.2s, w8 -; CHECK-NEXT: neg v1.2s, v1.2s +; CHECK-NEXT: dup v1.2s, w0 ; CHECK-NEXT: sshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret entry: @@ -115,9 +101,7 @@ define <4 x i16> @shr16x4(<4 x i16> %a, i16 %b) { ; CHECK-LABEL: shr16x4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 -; CHECK-NEXT: dup v1.4h, w8 -; CHECK-NEXT: neg v1.4h, v1.4h +; CHECK-NEXT: dup v1.4h, w0 ; CHECK-NEXT: sshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret entry: @@ -131,9 +115,7 @@ define <8 x i8> @shr8x8(<8 x i8> %a, i8 %b) { ; CHECK-LABEL: shr8x8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 -; CHECK-NEXT: dup v1.8b, w8 -; CHECK-NEXT: neg v1.8b, v1.8b +; CHECK-NEXT: dup v1.8b, w0 ; CHECK-NEXT: sshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret entry: