diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4384,6 +4384,10 @@ /// only the first Count elements of the vector are used. SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; + /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. + /// Returns true if the expansion was successful. + bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; + //===--------------------------------------------------------------------===// // Instruction Emitting Hooks // diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3343,26 +3343,10 @@ break; } case ISD::UREM: - case ISD::SREM: { - EVT VT = Node->getValueType(0); - bool isSigned = Node->getOpcode() == ISD::SREM; - unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - Tmp2 = Node->getOperand(0); - Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { - SDVTList VTs = DAG.getVTList(VT, VT); - Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); - Results.push_back(Tmp1); - } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { - // X % Y -> X-X/Y*Y - Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); - Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + case ISD::SREM: + if (TLI.expandREM(Node, Tmp1, DAG)) Results.push_back(Tmp1); - } break; - } case ISD::UDIV: case ISD::SDIV: { bool isSigned = Node->getOpcode() == ISD::SDIV; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -145,6 +145,7 @@ void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl &Results); SDValue ExpandStrictFPOp(SDNode *Node); void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl &Results); + void ExpandREM(SDNode *Node, SmallVectorImpl &Results); void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl &Results); @@ -867,6 +868,10 @@ case ISD::VECREDUCE_FMIN: Results.push_back(TLI.expandVecReduce(Node, DAG)); return; + case ISD::SREM: + case ISD::UREM: + ExpandREM(Node, Results); + return; } Results.push_back(DAG.UnrollVectorOp(Node)); @@ -1352,6 +1357,13 @@ UnrollStrictFPOp(Node, Results); } +void VectorLegalizer::ExpandREM(SDNode *Node, + SmallVectorImpl &Results) { + SDValue Result; + if (!TLI.expandREM(Node, Result, DAG)) + Result = DAG.UnrollVectorOp(Node); + Results.push_back(Result); +} void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, SmallVectorImpl &Results) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7775,3 +7775,26 @@ Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res); return Res; } + +bool TargetLowering::expandREM(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getValueType(0); + SDLoc dl(Node); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + SDValue Dividend = Node->getOperand(0); + SDValue Divisor = Node->getOperand(1); + if (isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1); + return true; + } else if (isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor); + SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor); + Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul); + return true; + } + return false; +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -189,6 +189,10 @@ setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::SSUBSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); } for (auto VT : diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -443,7 +443,7 @@ setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); - setOperationAction(ISD::SDIVREM, VT, Custom); + setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -210,6 +210,8 @@ setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) @@ -284,6 +286,8 @@ setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); // Vector reductions diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; @@ -5,126 +6,199 @@ ; define @sdiv_i32( %a, %b) { -; CHECK-LABEL: @sdiv_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: sdiv_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %div = sdiv %a, %b ret %div } define @sdiv_i64( %a, %b) { -; CHECK-LABEL: @sdiv_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: sdiv_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %div = sdiv %a, %b ret %div } define @sdiv_split_i32( %a, %b) { -; CHECK-LABEL: @sdiv_split_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z2.s -; CHECK-DAG: sdiv z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: ret +; CHECK-LABEL: sdiv_split_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret %div = sdiv %a, %b ret %div } define @sdiv_widen_i32( %a, %b) { -; CHECK-LABEL: @sdiv_widen_i32 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: sxtw z1.d, p0/m, z1.d -; CHECK-DAG: sxtw z0.d, p0/m, z0.d -; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: sdiv_widen_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z1.d, p0/m, z1.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %div = sdiv %a, %b ret %div } define @sdiv_split_i64( %a, %b) { -; CHECK-LABEL: @sdiv_split_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z2.d -; CHECK-DAG: sdiv z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: ret +; CHECK-LABEL: sdiv_split_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: sdiv z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret %div = sdiv %a, %b ret %div } ; +; SREM +; + +define @srem_i32( %a, %b) { +; CHECK-LABEL: srem_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s +; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s +; CHECK-NEXT: sub z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %div = srem %a, %b + ret %div +} + +define @srem_i64( %a, %b) { +; CHECK-LABEL: srem_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d +; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d +; CHECK-NEXT: sub z0.d, z0.d, z2.d +; CHECK-NEXT: ret + %div = srem %a, %b + ret %div +} + +; ; UDIV ; define @udiv_i32( %a, %b) { -; CHECK-LABEL: @udiv_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: udiv_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %div = udiv %a, %b ret %div } define @udiv_i64( %a, %b) { -; CHECK-LABEL: @udiv_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: udiv_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %div = udiv %a, %b ret %div } define @udiv_split_i32( %a, %b) { -; CHECK-LABEL: @udiv_split_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: udiv z0.s, p0/m, z0.s, z2.s -; CHECK-DAG: udiv z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: ret +; CHECK-LABEL: udiv_split_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret %div = udiv %a, %b ret %div } define @udiv_widen_i32( %a, %b) { -; CHECK-LABEL: @udiv_widen_i32 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: and z1.d, z1.d, #0xffffffff -; CHECK-DAG: and z0.d, z0.d, #0xffffffff -; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: udiv_widen_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %div = udiv %a, %b ret %div } define @udiv_split_i64( %a, %b) { -; CHECK-LABEL: @udiv_split_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: udiv z0.d, p0/m, z0.d, z2.d -; CHECK-DAG: udiv z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: ret +; CHECK-LABEL: udiv_split_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: udiv z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret %div = udiv %a, %b ret %div } + +; +; UREM +; + +define @urem_i32( %a, %b) { +; CHECK-LABEL: urem_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s +; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s +; CHECK-NEXT: sub z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %div = urem %a, %b + ret %div +} + +define @urem_i64( %a, %b) { +; CHECK-LABEL: urem_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d +; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d +; CHECK-NEXT: sub z0.d, z0.d, z2.d +; CHECK-NEXT: ret + %div = urem %a, %b + ret %div +} + ; ; SMIN ; define @smin_i8( %a, %b, %c) { -; CHECK-LABEL: @smin_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: smin z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: smin_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %cmp = icmp slt %a, %b %min = select %cmp, %a, %b ret %min } define @smin_i16( %a, %b, %c) { -; CHECK-LABEL: @smin_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: smin_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %cmp = icmp slt %a, %b %min = select %cmp, %a, %b ret %min @@ -132,9 +206,10 @@ define @smin_i32( %a, %b, %c) { ; CHECK-LABEL: smin_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %cmp = icmp slt %a, %b %min = select %cmp, %a, %b ret %min @@ -142,9 +217,10 @@ define @smin_i64( %a, %b, %c) { ; CHECK-LABEL: smin_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %cmp = icmp slt %a, %b %min = select %cmp, %a, %b ret %min @@ -155,20 +231,22 @@ ; define @umin_i8( %a, %b, %c) { -; CHECK-LABEL: @umin_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: umin z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: umin_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %cmp = icmp ult %a, %b %min = select %cmp, %a, %b ret %min } define @umin_i16( %a, %b, %c) { -; CHECK-LABEL: @umin_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: umin_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %cmp = icmp ult %a, %b %min = select %cmp, %a, %b ret %min @@ -176,9 +254,10 @@ define @umin_i32( %a, %b, %c) { ; CHECK-LABEL: umin_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: umin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %cmp = icmp ult %a, %b %min = select %cmp, %a, %b ret %min @@ -186,9 +265,10 @@ define @umin_i64( %a, %b, %c) { ; CHECK-LABEL: umin_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: umin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %cmp = icmp ult %a, %b %min = select %cmp, %a, %b ret %min @@ -199,20 +279,22 @@ ; define @smax_i8( %a, %b, %c) { -; CHECK-LABEL: @smax_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: smax z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: smax_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %cmp = icmp sgt %a, %b %min = select %cmp, %a, %b ret %min } define @smax_i16( %a, %b, %c) { -; CHECK-LABEL: @smax_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: smax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: smax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %cmp = icmp sgt %a, %b %min = select %cmp, %a, %b ret %min @@ -220,9 +302,10 @@ define @smax_i32( %a, %b, %c) { ; CHECK-LABEL: smax_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %cmp = icmp sgt %a, %b %min = select %cmp, %a, %b ret %min @@ -230,9 +313,10 @@ define @smax_i64( %a, %b, %c) { ; CHECK-LABEL: smax_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: smax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %cmp = icmp sgt %a, %b %min = select %cmp, %a, %b ret %min @@ -243,20 +327,22 @@ ; define @umax_i8( %a, %b, %c) { -; CHECK-LABEL: @umax_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: umax z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: umax_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %cmp = icmp ugt %a, %b %min = select %cmp, %a, %b ret %min } define @umax_i16( %a, %b, %c) { -; CHECK-LABEL: @umax_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: umax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: umax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %cmp = icmp ugt %a, %b %min = select %cmp, %a, %b ret %min @@ -264,9 +350,10 @@ define @umax_i32( %a, %b, %c) { ; CHECK-LABEL: umax_i32: -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: umax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %cmp = icmp ugt %a, %b %min = select %cmp, %a, %b ret %min @@ -274,9 +361,10 @@ define @umax_i64( %a, %b, %c) { ; CHECK-LABEL: umax_i64: -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %cmp = icmp ugt %a, %b %min = select %cmp, %a, %b ret %min @@ -287,57 +375,64 @@ ; define @asr_i8( %a, %b){ -; CHECK-LABEL: @asr_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: asr z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: asr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %shr = ashr %a, %b ret %shr } define @asr_i16( %a, %b){ -; CHECK-LABEL: @asr_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: asr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: asr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %shr = ashr %a, %b ret %shr } define @asr_i32( %a, %b){ -; CHECK-LABEL: @asr_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: asr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: asr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %shr = ashr %a, %b ret %shr } define @asr_i64( %a, %b){ -; CHECK-LABEL: @asr_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: asr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %shr = ashr %a, %b ret %shr } define @asr_split_i16( %a, %b){ -; CHECK-LABEL: @asr_split_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: asr z0.h, p0/m, z0.h, z2.h -; CHECK-DAG: asr z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: ret +; CHECK-LABEL: asr_split_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: asr z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: ret %shr = ashr %a, %b ret %shr } define @asr_promote_i32( %a, %b){ -; CHECK-LABEL: @asr_promote_i32 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: and z1.d, z1.d, #0xffffffff -; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: asr_promote_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %shr = ashr %a, %b ret %shr } @@ -347,57 +442,63 @@ ; define @lsl_i8( %a, %b){ -; CHECK-LABEL: @lsl_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: lsl z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: lsl_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %shl = shl %a, %b ret %shl } define @lsl_i16( %a, %b){ -; CHECK-LABEL: @lsl_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: lsl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %shl = shl %a, %b ret %shl } define @lsl_i32( %a, %b){ -; CHECK-LABEL: @lsl_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: lsl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %shl = shl %a, %b ret %shl } define @lsl_i64( %a, %b){ -; CHECK-LABEL: @lsl_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: lsl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %shl = shl %a, %b ret %shl } define @lsl_split_i64( %a, %b){ -; CHECK-LABEL: @lsl_split_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: lsl z0.d, p0/m, z0.d, z2.d -; CHECK-DAG: lsl z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: ret +; CHECK-LABEL: lsl_split_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret %shl = shl %a, %b ret %shl } define @lsl_promote_i16( %a, %b){ -; CHECK-LABEL: @lsl_promote_i16 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: and z1.s, z1.s, #0xffff -; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: lsl_promote_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %shl = shl %a, %b ret %shl } @@ -407,57 +508,64 @@ ; define @lsr_i8( %a, %b){ -; CHECK-LABEL: @lsr_i8 -; CHECK-DAG: ptrue p0.b -; CHECK-DAG: lsr z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: lsr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %shr = lshr %a, %b ret %shr } define @lsr_i16( %a, %b){ -; CHECK-LABEL: @lsr_i16 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: lsr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %shr = lshr %a, %b ret %shr } define @lsr_i32( %a, %b){ -; CHECK-LABEL: @lsr_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: lsr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: lsr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %shr = lshr %a, %b ret %shr } define @lsr_i64( %a, %b){ -; CHECK-LABEL: @lsr_i64 -; CHECK-DAG: ptrue p0.d -; CHECK-DAG: lsr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: lsr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %shr = lshr %a, %b ret %shr } define @lsr_promote_i8( %a, %b){ -; CHECK-LABEL: @lsr_promote_i8 -; CHECK-DAG: ptrue p0.h -; CHECK-DAG: and z1.h, z1.h, #0xff -; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: lsr_promote_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %shr = lshr %a, %b ret %shr } define @lsr_split_i32( %a, %b){ -; CHECK-LABEL: @lsr_split_i32 -; CHECK-DAG: ptrue p0.s -; CHECK-DAG: lsr z0.s, p0/m, z0.s, z2.s -; CHECK-DAG: lsr z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: ret +; CHECK-LABEL: lsr_split_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret %shr = lshr %a, %b ret %shr }