diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1184,16 +1184,16 @@ // Get the SETCC result using the canonical SETCC type. SDValue SetCC; if (N->isStrictFPOpcode()) { - EVT VTs[] = {SVT, MVT::Other}; + SDVTList VTs = DAG.getVTList({SVT, MVT::Other}); SDValue Opers[] = {N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)}; - SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers); + SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers, N->getFlags()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1)); } else SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); + N->getOperand(1), N->getOperand(2), N->getFlags()); // Convert to the expected type. return DAG.getSExtOrTrunc(SetCC, dl, NVT); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -715,7 +715,6 @@ auto &OpIdEntry = PromotedIntegers[getTableId(Op)]; assert((OpIdEntry == 0) && "Node is already promoted!"); OpIdEntry = getTableId(Result); - Result->setFlags(Op->getFlags()); DAG.transferDbgValues(Op, Result); } diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -873,8 +873,8 @@ ; CHECK-NEXT: shl.2s v0, v0, #24 ; CHECK-NEXT: shl.2s v1, v1, #24 ; CHECK-NEXT: sshr.2s v0, v0, #24 -; CHECK-NEXT: sshr.2s v1, v1, #24 -; CHECK-NEXT: shadd.2s v0, v0, v1 +; CHECK-NEXT: ssra.2s v0, v1, #24 +; CHECK-NEXT: sshr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> %zextsrc2 = sext <2 x i8> %src2 to <2 x i16> @@ -889,7 +889,8 @@ ; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: and.8b v1, v1, v2 -; CHECK-NEXT: uhadd.2s v0, v0, v1 +; CHECK-NEXT: add.2s v0, v0, v1 +; CHECK-NEXT: ushr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> @@ -922,7 +923,8 @@ ; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: and.8b v1, v1, v2 -; CHECK-NEXT: uhadd.2s v0, v0, v1 +; CHECK-NEXT: add.2s v0, v0, v1 +; CHECK-NEXT: ushr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> @@ -1004,7 +1006,9 @@ ; CHECK-NEXT: shl.2s v1, v1, #24 ; CHECK-NEXT: sshr.2s v0, v0, #24 ; CHECK-NEXT: sshr.2s v1, v1, #24 -; CHECK-NEXT: srhadd.2s v0, v0, v1 +; CHECK-NEXT: mvn.8b v0, v0 +; CHECK-NEXT: sub.2s v0, v1, v0 +; CHECK-NEXT: sshr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> %zextsrc2 = sext <2 x i8> %src2 to <2 x i16> @@ -1020,7 +1024,9 @@ ; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: and.8b v1, v1, v2 -; CHECK-NEXT: urhadd.2s v0, v0, v1 +; CHECK-NEXT: mvn.8b v0, v0 +; CHECK-NEXT: sub.2s v0, v1, v0 +; CHECK-NEXT: ushr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> @@ -1035,12 +1041,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: shl.2s v0, v0, #24 ; CHECK-NEXT: shl.2s v1, v1, #24 -; CHECK-NEXT: movi.2s v2, #1 +; CHECK-NEXT: movi d2, #0x00ffff0000ffff ; CHECK-NEXT: sshr.2s v0, v0, #24 -; CHECK-NEXT: ssra.2s v0, v1, #24 -; CHECK-NEXT: movi d1, #0x00ffff0000ffff -; CHECK-NEXT: add.2s v0, v0, v2 -; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: sshr.2s v1, v1, #24 +; CHECK-NEXT: mvn.8b v0, v0 +; CHECK-NEXT: sub.2s v0, v1, v0 +; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: ushr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> @@ -1057,7 +1063,9 @@ ; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: and.8b v1, v1, v2 -; CHECK-NEXT: urhadd.2s v0, v0, v1 +; CHECK-NEXT: mvn.8b v0, v0 +; CHECK-NEXT: sub.2s v0, v1, v0 +; CHECK-NEXT: ushr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll --- a/llvm/test/CodeGen/AArch64/sve-hadd.ll +++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll @@ -219,22 +219,14 @@ } define @hadds_v2i16( %s0, %s1) { -; SVE-LABEL: hadds_v2i16: -; SVE: // %bb.0: // %entry -; SVE-NEXT: ptrue p0.d -; SVE-NEXT: sxth z0.d, p0/m, z0.d -; SVE-NEXT: sxth z1.d, p0/m, z1.d -; SVE-NEXT: add z0.d, z0.d, z1.d -; SVE-NEXT: asr z0.d, z0.d, #1 -; SVE-NEXT: ret -; -; SVE2-LABEL: hadds_v2i16: -; SVE2: // %bb.0: // %entry -; SVE2-NEXT: ptrue p0.d -; SVE2-NEXT: sxth z0.d, p0/m, z0.d -; SVE2-NEXT: sxth z1.d, p0/m, z1.d -; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d -; SVE2-NEXT: ret +; CHECK-LABEL: hadds_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: sxth z1.d, p0/m, z1.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: asr z0.d, z0.d, #1 +; CHECK-NEXT: ret entry: %s0s = sext %s0 to %s1s = sext %s1 to @@ -264,21 +256,13 @@ } define @haddu_v2i16( %s0, %s1) { -; SVE-LABEL: haddu_v2i16: -; SVE: // %bb.0: // %entry -; SVE-NEXT: and z0.d, z0.d, #0xffff -; SVE-NEXT: and z1.d, z1.d, #0xffff -; SVE-NEXT: add z0.d, z0.d, z1.d -; SVE-NEXT: lsr z0.d, z0.d, #1 -; SVE-NEXT: ret -; -; SVE2-LABEL: haddu_v2i16: -; SVE2: // %bb.0: // %entry -; SVE2-NEXT: ptrue p0.d -; SVE2-NEXT: and z0.d, z0.d, #0xffff -; SVE2-NEXT: and z1.d, z1.d, #0xffff -; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d -; SVE2-NEXT: ret +; CHECK-LABEL: haddu_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, #0xffff +; CHECK-NEXT: and z1.d, z1.d, #0xffff +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret entry: %s0s = zext %s0 to %s1s = zext %s1 to @@ -433,22 +417,14 @@ } define @hadds_v4i8( %s0, %s1) { -; SVE-LABEL: hadds_v4i8: -; SVE: // %bb.0: // %entry -; SVE-NEXT: ptrue p0.s -; SVE-NEXT: sxtb z0.s, p0/m, z0.s -; SVE-NEXT: sxtb z1.s, p0/m, z1.s -; SVE-NEXT: add z0.s, z0.s, z1.s -; SVE-NEXT: asr z0.s, z0.s, #1 -; SVE-NEXT: ret -; -; SVE2-LABEL: hadds_v4i8: -; SVE2: // %bb.0: // %entry -; SVE2-NEXT: ptrue p0.s -; SVE2-NEXT: sxtb z0.s, p0/m, z0.s -; SVE2-NEXT: sxtb z1.s, p0/m, z1.s -; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s -; SVE2-NEXT: ret +; CHECK-LABEL: hadds_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: sxtb z1.s, p0/m, z1.s +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: asr z0.s, z0.s, #1 +; CHECK-NEXT: ret entry: %s0s = sext %s0 to %s1s = sext %s1 to @@ -478,21 +454,13 @@ } define @haddu_v4i8( %s0, %s1) { -; SVE-LABEL: haddu_v4i8: -; SVE: // %bb.0: // %entry -; SVE-NEXT: and z0.s, z0.s, #0xff -; SVE-NEXT: and z1.s, z1.s, #0xff -; SVE-NEXT: add z0.s, z0.s, z1.s -; SVE-NEXT: lsr z0.s, z0.s, #1 -; SVE-NEXT: ret -; -; SVE2-LABEL: haddu_v4i8: -; SVE2: // %bb.0: // %entry -; SVE2-NEXT: ptrue p0.s -; SVE2-NEXT: and z0.s, z0.s, #0xff -; SVE2-NEXT: and z1.s, z1.s, #0xff -; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s -; SVE2-NEXT: ret +; CHECK-LABEL: haddu_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: and z1.s, z1.s, #0xff +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret entry: %s0s = zext %s0 to %s1s = zext %s1 to @@ -916,23 +884,15 @@ } define @rhaddu_v2i16( %s0, %s1) { -; SVE-LABEL: rhaddu_v2i16: -; SVE: // %bb.0: // %entry -; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; SVE-NEXT: and z0.d, z0.d, #0xffff -; SVE-NEXT: and z1.d, z1.d, #0xffff -; SVE-NEXT: eor z0.d, z0.d, z2.d -; SVE-NEXT: sub z0.d, z1.d, z0.d -; SVE-NEXT: lsr z0.d, z0.d, #1 -; SVE-NEXT: ret -; -; SVE2-LABEL: rhaddu_v2i16: -; SVE2: // %bb.0: // %entry -; SVE2-NEXT: ptrue p0.d -; SVE2-NEXT: and z0.d, z0.d, #0xffff -; SVE2-NEXT: and z1.d, z1.d, #0xffff -; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d -; SVE2-NEXT: ret +; CHECK-LABEL: rhaddu_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.d, z0.d, #0xffff +; CHECK-NEXT: and z1.d, z1.d, #0xffff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 +; CHECK-NEXT: ret entry: %s0s = zext %s0 to %s1s = zext %s1 to @@ -1135,23 +1095,15 @@ } define @rhaddu_v4i8( %s0, %s1) { -; SVE-LABEL: rhaddu_v4i8: -; SVE: // %bb.0: // %entry -; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff -; SVE-NEXT: and z0.s, z0.s, #0xff -; SVE-NEXT: and z1.s, z1.s, #0xff -; SVE-NEXT: eor z0.d, z0.d, z2.d -; SVE-NEXT: sub z0.s, z1.s, z0.s -; SVE-NEXT: lsr z0.s, z0.s, #1 -; SVE-NEXT: ret -; -; SVE2-LABEL: rhaddu_v4i8: -; SVE2: // %bb.0: // %entry -; SVE2-NEXT: ptrue p0.s -; SVE2-NEXT: and z0.s, z0.s, #0xff -; SVE2-NEXT: and z1.s, z1.s, #0xff -; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s -; SVE2-NEXT: ret +; CHECK-LABEL: rhaddu_v4i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: and z1.s, z1.s, #0xff +; CHECK-NEXT: eor z0.d, z0.d, z2.d +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 +; CHECK-NEXT: ret entry: %s0s = zext %s0 to %s1s = zext %s1 to diff --git a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll --- a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll @@ -116,7 +116,8 @@ ; CHECK-NEXT: vmov.i32 q2, #0xff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 -; CHECK-NEXT: vhadd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %s0s = zext <4 x i8> %s0 to <4 x i16> @@ -312,9 +313,12 @@ ; CHECK-LABEL: vrhaddu_v4i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q2, #0xff +; CHECK-NEXT: movs r0, #1 ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 -; CHECK-NEXT: vrhadd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, r0 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %s0s = zext <4 x i8> %s0 to <4 x i16>