diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -240,6 +240,16 @@ return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); } + bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) { + if (N->getOpcode() != ISD::SPLAT_VECTOR) + return false; + + EVT EltVT = N->getValueType(0).getVectorElementType(); + return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1, + /* High */ EltVT.getFixedSizeInBits(), + /* AllowSaturation */ true, Imm); + } + // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. template bool SelectCntImm(SDValue N, SDValue &Imm) { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -252,6 +252,14 @@ [(int_aarch64_sve_uaba node:$op1, node:$op2, node:$op3), (add node:$op1, (AArch64uabd_p (SVEAllActive), node:$op2, node:$op3))]>; +def AArch64usra : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_usra node:$op1, node:$op2, node:$op3), + (add node:$op1, (AArch64lsr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; + +def AArch64ssra : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_ssra node:$op1, node:$op2, node:$op3), + (add node:$op1, (AArch64asr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; + def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCVecEltisVT<1,i1> @@ -3151,8 +3159,8 @@ defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>; // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", int_aarch64_sve_ssra>; - defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", int_aarch64_sve_usra>; + defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", AArch64ssra>; + defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", AArch64usra>; defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>; defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -231,6 +231,8 @@ def SVEShiftImmR32 : ComplexPattern", []>; def SVEShiftImmR64 : ComplexPattern", []>; +def SVEShiftSplatImmR : ComplexPattern; + def SVEAllActive : ComplexPattern; class SVEExactFPImm : AsmOperandClass { diff --git a/llvm/test/CodeGen/AArch64/sve2-sra.ll b/llvm/test/CodeGen/AArch64/sve2-sra.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-sra.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; USRA + +define @usra_i8( %a, %b) #0 { +; CHECK-LABEL: usra_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: usra z0.b, z1.b, #1 +; CHECK-NEXT: ret + %ins = insertelement poison, i8 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = lshr %b, %splat + %add = add %a, %shift + ret %add +} + +define @usra_i16( %a, %b) #0 { +; CHECK-LABEL: usra_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: usra z0.h, z1.h, #2 +; CHECK-NEXT: ret + %ins = insertelement poison, i16 2, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = lshr %b, %splat + %add = add %a, %shift + ret %add +} + +define @usra_i32( %a, %b) #0 { +; CHECK-LABEL: usra_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: usra z0.s, z1.s, #3 +; CHECK-NEXT: ret + %ins = insertelement poison, i32 3, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = lshr %b, %splat + %add = add %a, %shift + ret %add +} + +define @usra_i64( %a, %b) #0 { +; CHECK-LABEL: usra_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: usra z0.d, z1.d, #4 +; CHECK-NEXT: ret + %ins = insertelement poison, i64 4, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = lshr %b, %splat + %add = add %a, %shift + ret %add +} + +define @usra_intr_i8( %a, %b) #0 { +; CHECK-LABEL: usra_intr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: usra z0.b, z1.b, #1 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ins = insertelement poison, i8 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @usra_intr_i16( %a, %b) #0 { +; CHECK-LABEL: usra_intr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: usra z0.h, z1.h, #2 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ins = insertelement poison, i16 2, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @usra_intr_i32( %a, %b) #0 { +; CHECK-LABEL: usra_intr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: usra z0.s, z1.s, #3 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ins = insertelement poison, i32 3, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @usra_intr_i64( %a, %b) #0 { +; CHECK-LABEL: usra_intr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: usra z0.d, z1.d, #4 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ins = insertelement poison, i64 4, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +; SSRA + +define @ssra_i8( %a, %b) #0 { +; CHECK-LABEL: ssra_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ssra z0.b, z1.b, #1 +; CHECK-NEXT: ret + %ins = insertelement poison, i8 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = ashr %b, %splat + %add = add %a, %shift + ret %add +} + +define @ssra_i16( %a, %b) #0 { +; CHECK-LABEL: ssra_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ssra z0.h, z1.h, #2 +; CHECK-NEXT: ret + %ins = insertelement poison, i16 2, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = ashr %b, %splat + %add = add %a, %shift + ret %add +} + +define @ssra_i32( %a, %b) #0 { +; CHECK-LABEL: ssra_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ssra z0.s, z1.s, #3 +; CHECK-NEXT: ret + %ins = insertelement poison, i32 3, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = ashr %b, %splat + %add = add %a, %shift + ret %add +} + +define @ssra_i64( %a, %b) #0 { +; CHECK-LABEL: ssra_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ssra z0.d, z1.d, #4 +; CHECK-NEXT: ret + %ins = insertelement poison, i64 4, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = ashr %b, %splat + %add = add %a, %shift + ret %add +} + +define @ssra_intr_i8( %a, %b) #0 { +; CHECK-LABEL: ssra_intr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ssra z0.b, z1.b, #1 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ins = insertelement poison, i8 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @ssra_intr_i16( %a, %b) #0 { +; CHECK-LABEL: ssra_intr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ssra z0.h, z1.h, #2 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ins = insertelement poison, i16 2, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.asr.nxv8i16( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @ssra_intr_i32( %a, %b) #0 { +; CHECK-LABEL: ssra_intr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ssra z0.s, z1.s, #3 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ins = insertelement poison, i32 3, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.asr.nxv4i32( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + +define @ssra_intr_i64( %a, %b) #0 { +; CHECK-LABEL: ssra_intr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ssra z0.d, z1.d, #4 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ins = insertelement poison, i64 4, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %shift = call @llvm.aarch64.sve.asr.nxv2i64( %pg, %b, %splat) + %add = add %a, %shift + ret %add +} + + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32 immarg) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +attributes #0 = { "target-features"="+sve,+sve2" }