diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2532,11 +2532,11 @@ } // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll @@ -750,6 +750,102 @@ ret %out } +; +; SQSHL (Scalar) +; + +define @sqshl_n_i8( %pg, %a) { +; CHECK-LABEL: sqshl_n_i8: +; CHECK: sqshl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 7) + %out = call @llvm.aarch64.sve.sqshl.nxv16i8( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i16( %pg, %a) { +; CHECK-LABEL: sqshl_n_i16: +; CHECK: sqshl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 15) + %out = call @llvm.aarch64.sve.sqshl.nxv8i16( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i32( %pg, %a) { +; CHECK-LABEL: sqshl_n_i32: +; CHECK: sqshl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 31) + %out = call @llvm.aarch64.sve.sqshl.nxv4i32( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i64( %pg, %a) { +; CHECK-LABEL: sqshl_n_i64: +; CHECK: sqshl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 63) + %out = call @llvm.aarch64.sve.sqshl.nxv2i64( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i8_range( %pg, %a) { +; CHECK-LABEL: sqshl_n_i8_range: +; CHECK: mov z1.b, #8 +; CHECK: sqshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 8) + %out = call @llvm.aarch64.sve.sqshl.nxv16i8( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i16_range( %pg, %a) { +; CHECK-LABEL: sqshl_n_i16_range: +; CHECK: mov z1.h, #16 +; CHECK: sqshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) + %out = call @llvm.aarch64.sve.sqshl.nxv8i16( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i32_range( %pg, %a) { +; CHECK-LABEL: sqshl_n_i32_range: +; CHECK: mov z1.s, #32 +; CHECK: sqshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 32) + %out = call @llvm.aarch64.sve.sqshl.nxv4i32( %pg, + %a, + %dup) + ret %out +} + +define @sqshl_n_i64_range( %pg, %a) { +; CHECK-LABEL: sqshl_n_i64_range: +; CHECK: mov z1.d, #64 +; CHECK: sqshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 64) + %out = call @llvm.aarch64.sve.sqshl.nxv2i64( %pg, + %a, + %dup) + ret %out +} + ; ; SQSHLU ; @@ -1498,6 +1594,102 @@ ret %out } +; +; UQSHL (Scalar) +; + +define @uqshl_n_i8( %pg, %a) { +; CHECK-LABEL: uqshl_n_i8: +; CHECK: uqshl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 7) + %out = call @llvm.aarch64.sve.uqshl.nxv16i8( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i16( %pg, %a) { +; CHECK-LABEL: uqshl_n_i16: +; CHECK: uqshl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 15) + %out = call @llvm.aarch64.sve.uqshl.nxv8i16( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i32( %pg, %a) { +; CHECK-LABEL: uqshl_n_i32: +; CHECK: uqshl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 31) + %out = call @llvm.aarch64.sve.uqshl.nxv4i32( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i64( %pg, %a) { +; CHECK-LABEL: uqshl_n_i64: +; CHECK: uqshl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 63) + %out = call @llvm.aarch64.sve.uqshl.nxv2i64( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i8_range( %pg, %a) { +; CHECK-LABEL: uqshl_n_i8_range: +; CHECK: mov z1.b, #8 +; CHECK: uqshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 8) + %out = call @llvm.aarch64.sve.uqshl.nxv16i8( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i16_range( %pg, %a) { +; CHECK-LABEL: uqshl_n_i16_range: +; CHECK: mov z1.h, #16 +; CHECK: uqshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) + %out = call @llvm.aarch64.sve.uqshl.nxv8i16( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i32_range( %pg, %a) { +; CHECK-LABEL: uqshl_n_i32_range: +; CHECK: mov z1.s, #32 +; CHECK: uqshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 32) + %out = call @llvm.aarch64.sve.uqshl.nxv4i32( %pg, + %a, + %dup) + ret %out +} + +define @uqshl_n_i64_range( %pg, %a) { +; CHECK-LABEL: uqshl_n_i64_range: +; CHECK: mov z1.d, #64 +; CHECK: uqshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %dup = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 64) + %out = call @llvm.aarch64.sve.uqshl.nxv2i64( %pg, + %a, + %dup) + ret %out +} + ; ; UQSUB ; @@ -1878,6 +2070,11 @@ ret %out } +declare @llvm.aarch64.sve.dup.x.nxv16i8(i8) +declare @llvm.aarch64.sve.dup.x.nxv8i16(i16) +declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) +declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) + declare @llvm.aarch64.sve.saba.nxv16i8(, , ) declare @llvm.aarch64.sve.saba.nxv8i16(, , ) declare @llvm.aarch64.sve.saba.nxv4i32(, , )