diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -191,6 +191,11 @@ return SelectSVELogicalImm(N, VT, Imm); } + template + bool SelectSVEArithImm(SDValue N, SDValue &Imm) { + return SelectSVEArithImm(N, VT, Imm); + } + template bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); @@ -327,7 +332,7 @@ bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, bool AllowSaturation, SDValue &Imm); - bool SelectSVEArithImm(SDValue N, SDValue &Imm); + bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, SDValue &Offset); }; @@ -3128,11 +3133,27 @@ return false; } -bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) { +bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { if (auto CNode = dyn_cast(N)) { - uint64_t ImmVal = CNode->getSExtValue(); + uint64_t ImmVal = CNode->getZExtValue(); SDLoc DL(N); - ImmVal = ImmVal & 0xFF; + + switch (VT.SimpleTy) { + case MVT::i8: + ImmVal &= 0xFF; + break; + case MVT::i16: + ImmVal &= 0xFFFF; + break; + case MVT::i32: + ImmVal &= 0xFFFFFFFF; + break; + case MVT::i64: + break; + default: + llvm_unreachable("Unexpected type"); + } + if (ImmVal < 256) { Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); return true; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -206,7 +206,10 @@ def SVE8BitLslImm : ComplexPattern; -def SVEArithUImmPat : ComplexPattern; +def SVEArithUImm8Pat : ComplexPattern", []>; +def SVEArithUImm16Pat : ComplexPattern", []>; +def SVEArithUImm32Pat : ComplexPattern", []>; +def SVEArithUImm64Pat : ComplexPattern", []>; def SVEArithSImmPat : ComplexPattern; def SVEShiftImmL8 : ComplexPattern", []>; @@ -3981,10 +3984,10 @@ def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>; def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>; - def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _B)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _H)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; } multiclass sve_int_arith_imm2 { diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll @@ -51,6 +51,20 @@ ret %res } +define @smax_i16_out_of_range( %a) { +; CHECK-LABEL: smax_i16_out_of_range: +; CHECK: mov w8, #257 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %elt = insertelement undef, i16 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %cmp = icmp sgt %a, %splat + %res = select %cmp, %a, %splat + ret %res +} + define @smax_i32_pos( %a) { ; CHECK-LABEL: smax_i32_pos ; CHECK: smax z0.s, z0.s, #27 @@ -73,6 +87,20 @@ ret %res } +define @smax_i32_out_of_range( %a) { +; CHECK-LABEL: smax_i32_out_of_range: +; CHECK: mov w8, #-129 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %elt = insertelement undef, i32 -129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %cmp = icmp sgt %a, %splat + %res = select %cmp, %a, %splat + ret %res +} + define @smax_i64_pos( %a) { ; CHECK-LABEL: smax_i64_pos ; CHECK: smax z0.d, z0.d, #27 @@ -95,6 +123,20 @@ ret %res } +define @smax_i64_out_of_range( %a) { +; CHECK-LABEL: smax_i64_out_of_range: +; CHECK: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65535, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %cmp = icmp sgt %a, %splat + %res = select %cmp, %a, %splat + ret %res +} + ; ; SMIN ; @@ -142,6 +184,20 @@ ret %res } +define @smin_i16_out_of_range( %a) { +; CHECK-LABEL: smin_i16_out_of_range: +; CHECK: mov w8, #257 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %elt = insertelement undef, i16 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %cmp = icmp slt %a, %splat + %res = select %cmp, %a, %splat + ret %res +} + define @smin_i32_pos( %a) { ; CHECK-LABEL: smin_i32_pos ; CHECK: smin z0.s, z0.s, #27 @@ -164,6 +220,20 @@ ret %res } +define @smin_i32_out_of_range( %a) { +; CHECK-LABEL: smin_i32_out_of_range: +; CHECK: mov w8, #-129 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %elt = insertelement undef, i32 -129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %cmp = icmp slt %a, %splat + %res = select %cmp, %a, %splat + ret %res +} + define @smin_i64_pos( %a) { ; CHECK-LABEL: smin_i64_pos ; CHECK: smin z0.d, z0.d, #27 @@ -186,6 +256,20 @@ ret %res } +define @smin_i64_out_of_range( %a) { +; CHECK-LABEL: smin_i64_out_of_range: +; CHECK: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65535, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %cmp = icmp slt %a, %splat + %res = select %cmp, %a, %splat + ret %res +} + ; ; UMAX ; @@ -222,11 +306,14 @@ ret %res } -define @umax_i16_large( %a) { -; CHECK-LABEL: umax_i16_large -; CHECK: umax z0.h, z0.h, #129 -; CHECK-NEXT: ret - %elt = insertelement undef, i16 129, i32 0 +define @umax_i16_out_of_range( %a) { +; CHECK-LABEL: umax_i16_out_of_range: +; CHECK: mov w8, #257 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %elt = insertelement undef, i16 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %cmp = icmp ugt %a, %splat %res = select %cmp, %a, %splat @@ -244,11 +331,14 @@ ret %res } -define @umax_i32_large( %a) { -; CHECK-LABEL: umax_i32_large -; CHECK: umax z0.s, z0.s, #129 -; CHECK-NEXT: ret - %elt = insertelement undef, i32 129, i32 0 +define @umax_i32_out_of_range( %a) { +; CHECK-LABEL: umax_i32_out_of_range: +; CHECK: mov w8, #257 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %elt = insertelement undef, i32 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %cmp = icmp ugt %a, %splat %res = select %cmp, %a, %splat @@ -266,11 +356,14 @@ ret %res } -define @umax_i64_large( %a) { -; CHECK-LABEL: umax_i64_large -; CHECK: umax z0.d, z0.d, #129 -; CHECK-NEXT: ret - %elt = insertelement undef, i64 129, i32 0 +define @umax_i64_out_of_range( %a) { +; CHECK-LABEL: umax_i64_out_of_range: +; CHECK: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65535, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %cmp = icmp ugt %a, %splat %res = select %cmp, %a, %splat @@ -313,11 +406,14 @@ ret %res } -define @umin_i16_large( %a) { -; CHECK-LABEL: umin_i16_large -; CHECK: umin z0.h, z0.h, #129 -; CHECK-NEXT: ret - %elt = insertelement undef, i16 129, i32 0 +define @umin_i16_out_of_range( %a) { +; CHECK-LABEL: umin_i16_out_of_range: +; CHECK: mov w8, #257 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %elt = insertelement undef, i16 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %cmp = icmp ult %a, %splat %res = select %cmp, %a, %splat @@ -335,11 +431,14 @@ ret %res } -define @umin_i32_large( %a) { -; CHECK-LABEL: umin_i32_large -; CHECK: umin z0.s, z0.s, #129 -; CHECK-NEXT: ret - %elt = insertelement undef, i32 129, i32 0 +define @umin_i32_out_of_range( %a) { +; CHECK-LABEL: umin_i32_out_of_range: +; CHECK: mov w8, #257 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %elt = insertelement undef, i32 257, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %cmp = icmp ult %a, %splat %res = select %cmp, %a, %splat @@ -357,11 +456,14 @@ ret %res } -define @umin_i64_large( %a) { -; CHECK-LABEL: umin_i64_large -; CHECK: umin z0.d, z0.d, #129 -; CHECK-NEXT: ret - %elt = insertelement undef, i64 129, i32 0 +define @umin_i64_out_of_range( %a) { +; CHECK-LABEL: umin_i64_out_of_range: +; CHECK: mov w8, #65535 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65535, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %cmp = icmp ult %a, %splat %res = select %cmp, %a, %splat diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -35,6 +35,23 @@ ret %out } +define @smax_i16_out_of_range( %a) { +; CHECK-LABEL: smax_i16_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #129 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @smax_i32( %a) { ; CHECK-LABEL: smax_i32: ; CHECK: // %bb.0: @@ -49,6 +66,23 @@ ret %out } +define @smax_i32_out_of_range( %a) { +; CHECK-LABEL: smax_i32_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-129 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 -129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @smax_i64( %a) { ; CHECK-LABEL: smax_i64: ; CHECK: // %bb.0: @@ -63,6 +97,24 @@ ret %out } +define @smax_i64_out_of_range( %a) { +; CHECK-LABEL: smax_i64_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 65535, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.nxv2i64( %pg, + %a, + %splat) + ret %out +} + + ; SMIN define @smin_i8( %a) { @@ -93,6 +145,23 @@ ret %out } +define @smin_i16_out_of_range( %a) { +; CHECK-LABEL: smin_i16_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-129 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 -129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @smin_i32( %a) { ; CHECK-LABEL: smin_i32: ; CHECK: // %bb.0: @@ -107,6 +176,24 @@ ret %out } +define @smin_i32_out_of_range( %a) { +; CHECK-LABEL: smin_i32_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.nxv4i32( %pg, + %a, + %splat) + ret %out +} + + define @smin_i64( %a) { ; CHECK-LABEL: smin_i64: ; CHECK: // %bb.0: @@ -121,6 +208,22 @@ ret %out } +define @smin_i64_out_of_range( %a) { +; CHECK-LABEL: smin_i64_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, #-256 // =0xffffffffffffff00 +; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 -256, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.nxv2i64( %pg, + %a, + %splat) + ret %out +} + ; UMAX define @umax_i8( %a) { @@ -151,6 +254,23 @@ ret %out } +define @umax_i16_out_of_range( %a) { +; CHECK-LABEL: umax_i16_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @umax_i32( %a) { ; CHECK-LABEL: umax_i32: ; CHECK: // %bb.0: @@ -165,6 +285,23 @@ ret %out } +define @umax_i32_out_of_range( %a) { +; CHECK-LABEL: umax_i32_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @umax_i64( %a) { ; CHECK-LABEL: umax_i64: ; CHECK: // %bb.0: @@ -179,6 +316,23 @@ ret %out } +define @umax_i64_out_of_range( %a) { +; CHECK-LABEL: umax_i64_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 65535, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.nxv2i64( %pg, + %a, + %splat) + ret %out +} + ; UMIN define @umin_i8( %a) { @@ -209,6 +363,23 @@ ret %out } +define @umin_i16_out_of_range( %a) { +; CHECK-LABEL: umin_i16_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @umin_i32( %a) { ; CHECK-LABEL: umin_i32: ; CHECK: // %bb.0: @@ -223,6 +394,23 @@ ret %out } +define @umin_i32_out_of_range( %a) { +; CHECK-LABEL: umin_i32_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @umin_i64( %a) { ; CHECK-LABEL: umin_i64: ; CHECK: // %bb.0: @@ -237,6 +425,23 @@ ret %out } +define @umin_i64_out_of_range( %a) { +; CHECK-LABEL: umin_i64_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 65535, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.nxv2i64( %pg, + %a, + %splat) + ret %out +} + ; SQADD define @sqadd_b_lowimm( %a) {