diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -162,17 +162,18 @@ case ISD::GET_ROUNDING: Res = PromoteIntRes_GET_ROUNDING(N); break; + case ISD::ADD: + case ISD::SUB: + case ISD::VP_ADD: + case ISD::VP_SUB: Res = PromoteIntRes_ADDSUB(N); break; + case ISD::AND: case ISD::OR: case ISD::XOR: - case ISD::ADD: - case ISD::SUB: case ISD::MUL: case ISD::VP_AND: case ISD::VP_OR: case ISD::VP_XOR: - case ISD::VP_ADD: - case ISD::VP_SUB: case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; case ISD::VP_SMIN: @@ -1238,6 +1239,44 @@ N->getOperand(2), N->getOperand(3)); } +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUB(SDNode *N) { + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = GetPromotedInteger(N->getOperand(1)); + if (N->getNumOperands() == 2) { + SDNodeFlags Flags; + // Preserve nsw flag if the promoted inputs are sign extended. + if (N->getFlags().hasNoSignedWrap()) { + unsigned OpLEffectiveBits = DAG.ComputeMaxSignificantBits(LHS); + unsigned OpREffectiveBits = DAG.ComputeMaxSignificantBits(RHS); + if (OpLEffectiveBits <= N->getOperand(0).getScalarValueSizeInBits() && + OpREffectiveBits <= N->getOperand(1).getScalarValueSizeInBits()) + Flags.setNoSignedWrap(true); + } + + // Preserve nuw flag if the promoted inputs are zero extended. + if (N->getFlags().hasNoUnsignedWrap()) { + unsigned OpLEffectiveBits = + DAG.computeKnownBits(LHS).countMaxActiveBits(); + unsigned OpREffectiveBits = + DAG.computeKnownBits(RHS).countMaxActiveBits(); + if (OpLEffectiveBits <= N->getOperand(0).getScalarValueSizeInBits() && + OpREffectiveBits <= N->getOperand(1).getScalarValueSizeInBits()) + Flags.setNoUnsignedWrap(true); + } + + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + Flags); + } + + assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -339,6 +339,7 @@ SDValue PromoteIntRes_SETCC(SDNode *N); SDValue PromoteIntRes_SHL(SDNode *N); SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_ADDSUB(SDNode *N); SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); SDValue PromoteIntRes_UMINUMAX(SDNode *N); diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -873,8 +873,8 @@ ; CHECK-NEXT: shl.2s v0, v0, #24 ; CHECK-NEXT: shl.2s v1, v1, #24 ; CHECK-NEXT: sshr.2s v0, v0, #24 -; CHECK-NEXT: ssra.2s v0, v1, #24 -; CHECK-NEXT: sshr.2s v0, v0, #1 +; CHECK-NEXT: sshr.2s v1, v1, #24 +; CHECK-NEXT: shadd.2s v0, v0, v1 ; CHECK-NEXT: ret %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> %zextsrc2 = sext <2 x i8> %src2 to <2 x i16> @@ -889,8 +889,7 @@ ; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: and.8b v1, v1, v2 -; CHECK-NEXT: add.2s v0, v0, v1 -; CHECK-NEXT: ushr.2s v0, v0, #1 +; CHECK-NEXT: uhadd.2s v0, v0, v1 ; CHECK-NEXT: ret %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> @@ -923,8 +922,7 @@ ; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: and.8b v1, v1, v2 -; CHECK-NEXT: add.2s v0, v0, v1 -; CHECK-NEXT: ushr.2s v0, v0, #1 +; CHECK-NEXT: uhadd.2s v0, v0, v1 ; CHECK-NEXT: ret %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> @@ -1006,9 +1004,7 @@ ; CHECK-NEXT: shl.2s v1, v1, #24 ; CHECK-NEXT: sshr.2s v0, v0, #24 ; CHECK-NEXT: sshr.2s v1, v1, #24 -; CHECK-NEXT: mvn.8b v0, v0 -; CHECK-NEXT: sub.2s v0, v1, v0 -; CHECK-NEXT: sshr.2s v0, v0, #1 +; CHECK-NEXT: srhadd.2s v0, v0, v1 ; CHECK-NEXT: ret %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> %zextsrc2 = sext <2 x i8> %src2 to <2 x i16> @@ -1024,9 +1020,7 @@ ; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: and.8b v1, v1, v2 -; CHECK-NEXT: mvn.8b v0, v0 -; CHECK-NEXT: sub.2s v0, v1, v0 -; CHECK-NEXT: ushr.2s v0, v0, #1 +; CHECK-NEXT: urhadd.2s v0, v0, v1 ; CHECK-NEXT: ret %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> @@ -1041,12 +1035,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: shl.2s v0, v0, #24 ; CHECK-NEXT: shl.2s v1, v1, #24 -; CHECK-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-NEXT: movi.2s v2, #1 ; CHECK-NEXT: sshr.2s v0, v0, #24 -; CHECK-NEXT: sshr.2s v1, v1, #24 -; CHECK-NEXT: mvn.8b v0, v0 -; CHECK-NEXT: sub.2s v0, v1, v0 -; CHECK-NEXT: and.8b v0, v0, v2 +; CHECK-NEXT: ssra.2s v0, v1, #24 +; CHECK-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-NEXT: add.2s v0, v0, v2 +; CHECK-NEXT: and.8b v0, v0, v1 ; CHECK-NEXT: ushr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> @@ -1063,9 +1057,7 @@ ; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: and.8b v1, v1, v2 -; CHECK-NEXT: mvn.8b v0, v0 -; CHECK-NEXT: sub.2s v0, v1, v0 -; CHECK-NEXT: ushr.2s v0, v0, #1 +; CHECK-NEXT: urhadd.2s v0, v0, v1 ; CHECK-NEXT: ret %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll --- a/llvm/test/CodeGen/AArch64/sve-hadd.ll +++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll @@ -219,14 +219,22 @@ } define @hadds_v2i16( %s0, %s1) { -; CHECK-LABEL: hadds_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sxth z0.d, p0/m, z0.d -; CHECK-NEXT: sxth z1.d, p0/m, z1.d -; CHECK-NEXT: add z0.d, z0.d, z1.d -; CHECK-NEXT: asr z0.d, z0.d, #1 -; CHECK-NEXT: ret +; SVE-LABEL: hadds_v2i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: ptrue p0.d +; SVE-NEXT: sxth z0.d, p0/m, z0.d +; SVE-NEXT: sxth z1.d, p0/m, z1.d +; SVE-NEXT: add z0.d, z0.d, z1.d +; SVE-NEXT: asr z0.d, z0.d, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v2i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: sxth z0.d, p0/m, z0.d +; SVE2-NEXT: sxth z1.d, p0/m, z1.d +; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret entry: %s0s = sext %s0 to %s1s = sext %s1 to @@ -256,13 +264,21 @@ } define @haddu_v2i16( %s0, %s1) { -; CHECK-LABEL: haddu_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and z0.d, z0.d, #0xffff -; CHECK-NEXT: and z1.d, z1.d, #0xffff -; CHECK-NEXT: add z0.d, z0.d, z1.d -; CHECK-NEXT: lsr z0.d, z0.d, #1 -; CHECK-NEXT: ret +; SVE-LABEL: haddu_v2i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: and z0.d, z0.d, #0xffff +; SVE-NEXT: and z1.d, z1.d, #0xffff +; SVE-NEXT: add z0.d, z0.d, z1.d +; SVE-NEXT: lsr z0.d, z0.d, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v2i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: and z0.d, z0.d, #0xffff +; SVE2-NEXT: and z1.d, z1.d, #0xffff +; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret entry: %s0s = zext %s0 to %s1s = zext %s1 to @@ -417,14 +433,22 @@ } define @hadds_v4i8( %s0, %s1) { -; CHECK-LABEL: hadds_v4i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sxtb z0.s, p0/m, z0.s -; CHECK-NEXT: sxtb z1.s, p0/m, z1.s -; CHECK-NEXT: add z0.s, z0.s, z1.s -; CHECK-NEXT: asr z0.s, z0.s, #1 -; CHECK-NEXT: ret +; SVE-LABEL: hadds_v4i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: sxtb z0.s, p0/m, z0.s +; SVE-NEXT: sxtb z1.s, p0/m, z1.s +; SVE-NEXT: add z0.s, z0.s, z1.s +; SVE-NEXT: asr z0.s, z0.s, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: hadds_v4i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: sxtb z0.s, p0/m, z0.s +; SVE2-NEXT: sxtb z1.s, p0/m, z1.s +; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret entry: %s0s = sext %s0 to %s1s = sext %s1 to @@ -454,13 +478,21 @@ } define @haddu_v4i8( %s0, %s1) { -; CHECK-LABEL: haddu_v4i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and z0.s, z0.s, #0xff -; CHECK-NEXT: and z1.s, z1.s, #0xff -; CHECK-NEXT: add z0.s, z0.s, z1.s -; CHECK-NEXT: lsr z0.s, z0.s, #1 -; CHECK-NEXT: ret +; SVE-LABEL: haddu_v4i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: and z0.s, z0.s, #0xff +; SVE-NEXT: and z1.s, z1.s, #0xff +; SVE-NEXT: add z0.s, z0.s, z1.s +; SVE-NEXT: lsr z0.s, z0.s, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: haddu_v4i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: and z0.s, z0.s, #0xff +; SVE2-NEXT: and z1.s, z1.s, #0xff +; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret entry: %s0s = zext %s0 to %s1s = zext %s1 to @@ -884,15 +916,23 @@ } define @rhaddu_v2i16( %s0, %s1) { -; CHECK-LABEL: rhaddu_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z0.d, z0.d, #0xffff -; CHECK-NEXT: and z1.d, z1.d, #0xffff -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.d, z1.d, z0.d -; CHECK-NEXT: lsr z0.d, z0.d, #1 -; CHECK-NEXT: ret +; SVE-LABEL: rhaddu_v2i16: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; SVE-NEXT: and z0.d, z0.d, #0xffff +; SVE-NEXT: and z1.d, z1.d, #0xffff +; SVE-NEXT: eor z0.d, z0.d, z2.d +; SVE-NEXT: sub z0.d, z1.d, z0.d +; SVE-NEXT: lsr z0.d, z0.d, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v2i16: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.d +; SVE2-NEXT: and z0.d, z0.d, #0xffff +; SVE2-NEXT: and z1.d, z1.d, #0xffff +; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d +; SVE2-NEXT: ret entry: %s0s = zext %s0 to %s1s = zext %s1 to @@ -1095,15 +1135,23 @@ } define @rhaddu_v4i8( %s0, %s1) { -; CHECK-LABEL: rhaddu_v4i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z0.s, z0.s, #0xff -; CHECK-NEXT: and z1.s, z1.s, #0xff -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: sub z0.s, z1.s, z0.s -; CHECK-NEXT: lsr z0.s, z0.s, #1 -; CHECK-NEXT: ret +; SVE-LABEL: rhaddu_v4i8: +; SVE: // %bb.0: // %entry +; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; SVE-NEXT: and z0.s, z0.s, #0xff +; SVE-NEXT: and z1.s, z1.s, #0xff +; SVE-NEXT: eor z0.d, z0.d, z2.d +; SVE-NEXT: sub z0.s, z1.s, z0.s +; SVE-NEXT: lsr z0.s, z0.s, #1 +; SVE-NEXT: ret +; +; SVE2-LABEL: rhaddu_v4i8: +; SVE2: // %bb.0: // %entry +; SVE2-NEXT: ptrue p0.s +; SVE2-NEXT: and z0.s, z0.s, #0xff +; SVE2-NEXT: and z1.s, z1.s, #0xff +; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s +; SVE2-NEXT: ret entry: %s0s = zext %s0 to %s1s = zext %s1 to diff --git a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll --- a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll @@ -116,8 +116,7 @@ ; CHECK-NEXT: vmov.i32 q2, #0xff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 -; CHECK-NEXT: vadd.i32 q0, q0, q1 -; CHECK-NEXT: vshr.u32 q0, q0, #1 +; CHECK-NEXT: vhadd.u32 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %s0s = zext <4 x i8> %s0 to <4 x i16> @@ -313,12 +312,9 @@ ; CHECK-LABEL: vrhaddu_v4i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q2, #0xff -; CHECK-NEXT: movs r0, #1 ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 -; CHECK-NEXT: vadd.i32 q0, q0, q1 -; CHECK-NEXT: vadd.i32 q0, q0, r0 -; CHECK-NEXT: vshr.u32 q0, q0, #1 +; CHECK-NEXT: vrhadd.u32 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %s0s = zext <4 x i8> %s0 to <4 x i16>