diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -851,8 +851,8 @@ } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Result.getValueType().getVectorElementType() == + Op.getValueType().getVectorElementType() && "Invalid type for widened vector"); AnalyzeNewValue(Result); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3579,7 +3579,7 @@ WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT); WideOvVT = EVT::getVectorVT( *DAG.getContext(), OvVT.getVectorElementType(), - WideResVT.getVectorNumElements()); + WideResVT.getVectorElementCount()); WideLHS = GetWidenedVector(N->getOperand(0)); WideRHS = GetWidenedVector(N->getOperand(1)); @@ -3587,7 +3587,7 @@ WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT); WideResVT = EVT::getVectorVT( *DAG.getContext(), ResVT.getVectorElementType(), - WideOvVT.getVectorNumElements()); + WideOvVT.getVectorElementCount()); SDValue Zero = DAG.getVectorIdxConstant(0, DL); WideLHS = DAG.getNode( @@ -4447,7 +4447,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); + ElementCount WidenNumElts = WidenVT.getVectorElementCount(); SDValue Cond1 = N->getOperand(0); EVT CondVT = Cond1.getValueType(); diff --git a/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll --- a/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll +++ b/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll @@ -1,6 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s +declare { , } @llvm.smul.with.overflow.nxv1i8(, ) + +define @smulo_nxv1i8( %x, %y) { +; CHECK-LABEL: smulo_nxv1i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: smulh z2.b, p0/m, z2.b, z1.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: asr z1.b, z0.b, #7 +; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, z1.b +; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %a = call { , } @llvm.smul.with.overflow.nxv1i8( %x, %y) + %b = extractvalue { , } %a, 0 + %c = extractvalue { , } %a, 1 + %d = select %c, zeroinitializer, %b + ret %d +} + declare { , } @llvm.smul.with.overflow.nxv2i8(, ) define @smulo_nxv2i8( %x, %y) { @@ -163,6 +183,26 @@ ret %d } +declare { , } @llvm.smul.with.overflow.nxv1i16(, ) + +define @smulo_nxv1i16( %x, %y) { +; CHECK-LABEL: smulo_nxv1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: smulh z2.h, p0/m, z2.h, z1.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: asr z1.h, z0.h, #15 +; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, z1.h +; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %a = call { , } @llvm.smul.with.overflow.nxv1i16( %x, %y) + %b = extractvalue { , } %a, 0 + %c = extractvalue { , } %a, 1 + %d = select %c, zeroinitializer, %b + ret %d +} + declare { , } @llvm.smul.with.overflow.nxv2i16(, ) define @smulo_nxv2i16( %x, %y) { @@ -299,6 +339,26 @@ ret %d } +declare { , } @llvm.smul.with.overflow.nxv1i32(, ) + +define @smulo_nxv1i32( %x, %y) { +; CHECK-LABEL: smulo_nxv1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: smulh z2.s, p0/m, z2.s, z1.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: asr z1.s, z0.s, #31 +; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, z1.s +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %a = call { , } @llvm.smul.with.overflow.nxv1i32( %x, %y) + %b = extractvalue { , } %a, 0 + %c = extractvalue { , } %a, 1 + %d = select %c, zeroinitializer, %b + ret %d +} + declare { , } @llvm.smul.with.overflow.nxv2i32(, ) define @smulo_nxv2i32( %x, %y) { @@ -409,6 +469,26 @@ ret %d } +declare { , } @llvm.smul.with.overflow.nxv1i64(, ) + +define @smulo_nxv1i64( %x, %y) { +; CHECK-LABEL: smulo_nxv1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: smulh z2.d, p0/m, z2.d, z1.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: asr z1.d, z0.d, #63 +; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ret + %a = call { , } @llvm.smul.with.overflow.nxv1i64( %x, %y) + %b = extractvalue { , } %a, 0 + %c = extractvalue { , } %a, 1 + %d = select %c, zeroinitializer, %b + ret %d +} + declare { , } @llvm.smul.with.overflow.nxv2i64(, ) define @smulo_nxv2i64( %x, %y) { diff --git a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll --- a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll +++ b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll @@ -1,6 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s +declare { , } @llvm.umul.with.overflow.nxv1i8(, ) + +define @umulo_nxv1i8( %x, %y) { +; CHECK-LABEL: umulo_nxv1i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: mul z2.b, p0/m, z2.b, z1.b +; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: mov z2.b, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %a = call { , } @llvm.umul.with.overflow.nxv1i8( %x, %y) + %b = extractvalue { , } %a, 0 + %c = extractvalue { , } %a, 1 + %d = select %c, zeroinitializer, %b + ret %d +} + declare { , } @llvm.umul.with.overflow.nxv2i8(, ) define @umulo_nxv2i8( %x, %y) { @@ -157,6 +177,26 @@ ret %d } +declare { , } @llvm.umul.with.overflow.nxv1i16(, ) + +define @umulo_nxv1i16( %x, %y) { +; CHECK-LABEL: umulo_nxv1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h +; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %a = call { , } @llvm.umul.with.overflow.nxv1i16( %x, %y) + %b = extractvalue { , } %a, 0 + %c = extractvalue { , } %a, 1 + %d = select %c, zeroinitializer, %b + ret %d +} + declare { , } @llvm.umul.with.overflow.nxv2i16(, ) define @umulo_nxv2i16( %x, %y) { @@ -289,6 +329,26 @@ ret %d } +declare { , } @llvm.umul.with.overflow.nxv1i32(, ) + +define @umulo_nxv1i32( %x, %y) { +; CHECK-LABEL: umulo_nxv1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s +; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %a = call { , } @llvm.umul.with.overflow.nxv1i32( %x, %y) + %b = extractvalue { , } %a, 0 + %c = extractvalue { , } %a, 1 + %d = select %c, zeroinitializer, %b + ret %d +} + declare { , } @llvm.umul.with.overflow.nxv2i32(, ) define @umulo_nxv2i32( %x, %y) { @@ -397,6 +457,26 @@ ret %d } +declare { , } @llvm.umul.with.overflow.nxv1i64(, ) + +define @umulo_nxv1i64( %x, %y) { +; CHECK-LABEL: umulo_nxv1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d +; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %a = call { , } @llvm.umul.with.overflow.nxv1i64( %x, %y) + %b = extractvalue { , } %a, 0 + %c = extractvalue { , } %a, 1 + %d = select %c, zeroinitializer, %b + ret %d +} + declare { , } @llvm.umul.with.overflow.nxv2i64(, ) define @umulo_nxv2i64( %x, %y) {