diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -335,6 +335,8 @@ bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, SDValue &Offset); + + bool SelectAllActivePredicate(SDValue N); }; } // end anonymous namespace @@ -4983,3 +4985,25 @@ return false; } + +bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { + unsigned NumElts = N.getValueType().getVectorMinNumElements(); + + // Look through cast. + SDValue &Op = N; + while (Op.getOpcode() == AArch64ISD::REINTERPRET_CAST) { + Op = Op.getOperand(0); + // When reinterpreting from a type with fewer elements the "new" elements + // are not active, so bail if they're likely to be used. + if (Op.getValueType().getVectorMinNumElements() < NumElts) + return false; + } + + // "ptrue p., all" can be considered all active when is the same size + // or smaller than the implicit element type represented by N. + // NOTE: A larger element count implies a smaller element type. + if (Op.getOpcode() == AArch64ISD::PTRUE && Op.getConstantOperandVal(0) == 31) + return Op.getValueType().getVectorMinNumElements() >= NumElts; + + return false; +} diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -335,9 +335,9 @@ defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_p>; defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_p>; - defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>; - defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; - defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; + defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or, int_aarch64_sve_orr>; + defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor, int_aarch64_sve_eor>; + defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and, int_aarch64_sve_and>; defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>; defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -221,6 +221,8 @@ def SVEShiftImmR32 : ComplexPattern", []>; def SVEShiftImmR64 : ComplexPattern", []>; +def SVEAllActive : ComplexPattern; + class SVEExactFPImm : AsmOperandClass { let Name = "SVEExactFPImmOperand" # Suffix; let DiagnosticType = "Invalid" # Name; @@ -349,6 +351,11 @@ : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))), (inst $Op1, i64:$imm)>; +class SVE_1_Op_Imm_Log_Pred_Pat + : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))), + (inst $Op1, i64:$imm)>; + class SVE_2_Op_Pat : Pat<(vtd (op vt1:$Op1, vt2:$Op2)), @@ -1494,13 +1501,19 @@ let ElementSize = ElementSizeNone; } -multiclass sve_int_log_imm opc, string asm, string alias, SDPatternOperator op> { +multiclass sve_int_log_imm opc, string asm, string alias, + SDPatternOperator ir_op, SDPatternOperator int_op> { def NAME : sve_int_log_imm; - def : SVE_1_Op_Imm_Log_Pat(NAME)>; - def : SVE_1_Op_Imm_Log_Pat(NAME)>; - def : SVE_1_Op_Imm_Log_Pat(NAME)>; - def : SVE_1_Op_Imm_Log_Pat(NAME)>; + def : SVE_1_Op_Imm_Log_Pat(NAME)>; + def : SVE_1_Op_Imm_Log_Pat(NAME)>; + def : SVE_1_Op_Imm_Log_Pat(NAME)>; + def : SVE_1_Op_Imm_Log_Pat(NAME)>; + + def : SVE_1_Op_Imm_Log_Pred_Pat(NAME)>; + def : SVE_1_Op_Imm_Log_Pred_Pat(NAME)>; + def : SVE_1_Op_Imm_Log_Pred_Pat(NAME)>; + def : SVE_1_Op_Imm_Log_Pred_Pat(NAME)>; def : InstAlias(NAME) ZPR8:$Zdn, sve_logical_imm8:$imm), 4>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll @@ -0,0 +1,237 @@ +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +; +; AND +; + +define @and_i8( %a) #0 { +; CHECK-LABEL: and_i8: +; CHECK: and z0.b, z0.b, #0x7 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i8 7, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @and_i16( %a) #0 { +; CHECK-LABEL: and_i16: +; CHECK: and z0.h, z0.h, #0xf0 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i16 240, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @and_i32( %a) #0 { +; CHECK-LABEL: and_i32: +; CHECK: and z0.s, z0.s, #0xffff00 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i32 16776960, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @and_i64( %a) #0 { +; CHECK-LABEL: and_i64: +; CHECK: and z0.d, z0.d, #0xfffc000000000000 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i64 18445618173802708992, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; EOR +; + +define @eor_i8( %a) #0 { +; CHECK-LABEL: eor_i8: +; CHECK: eor z0.b, z0.b, #0xf +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i8 15, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.eor.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @eor_i16( %a) #0 { +; CHECK-LABEL: eor_i16: +; CHECK: eor z0.h, z0.h, #0xfc07 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i16 64519, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.eor.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @eor_i32( %a) #0 { +; CHECK-LABEL: eor_i32: +; CHECK: eor z0.s, z0.s, #0xffff00 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i32 16776960, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.eor.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @eor_i64( %a) #0 { +; CHECK-LABEL: eor_i64: +; CHECK: eor z0.d, z0.d, #0x1000000000000 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i64 281474976710656, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.eor.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; +; ORR +; + +define @orr_i8( %a) #0 { +; CHECK-LABEL: orr_i8: +; CHECK: orr z0.b, z0.b, #0x6 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i8 6, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.orr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @orr_i16( %a) #0 { +; CHECK-LABEL: orr_i16: +; CHECK: orr z0.h, z0.h, #0x8001 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i16 32769, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.orr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @orr_i32( %a) #0 { +; CHECK-LABEL: orr_i32: +; CHECK: orr z0.s, z0.s, #0xffff +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i32 65535, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.orr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @orr_i64( %a) #0 { +; CHECK-LABEL: orr_i64: +; CHECK: orr z0.d, z0.d, #0x7ffc000000000000 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i64 9222246136947933184, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.orr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +; As orr_i32 but where pg is i8 based and thus compatible for i32. +define @orr_i32_ptrue_all_b( %a) #0 { +; CHECK-LABEL: orr_i32_ptrue_all_b: +; CHECK: orr z0.s, z0.s, #0xffff +; CHECK-NEXT: ret + %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) + %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535) + %out = tail call @llvm.aarch64.sve.orr.nxv4i32( %pg.s, + %a, + %b) + ret %out +} + +; As orr_i32 but where pg is i16 based and thus compatible for i32. +define @orr_i32_ptrue_all_h( %a) #0 { +; CHECK-LABEL: orr_i32_ptrue_all_h: +; CHECK: orr z0.s, z0.s, #0xffff +; CHECK-NEXT: ret + %pg.h = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) + %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) + %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535) + %out = tail call @llvm.aarch64.sve.orr.nxv4i32( %pg.s, + %a, + %b) + ret %out +} + +; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and +; thus inactive lanes are important and the immediate form cannot be used. +define @orr_i32_ptrue_all_d( %a) #0 { +; CHECK-LABEL: orr_i32_ptrue_all_d: +; CHECK-DAG: mov [[IMM:w[0-9]+]], #65535 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].d +; CHECK-DAG: mov [[DUP:z[0-9]+]].s, [[IMM]] +; CHECK-DAG: orr z0.s, [[PG]]/m, z0.s, [[DUP]].s +; CHECK-NEXT: ret + %pg.d = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg.d) + %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) + %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535) + %out = tail call @llvm.aarch64.sve.orr.nxv4i32( %pg.s, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.and.nxv16i8(, , ) +declare @llvm.aarch64.sve.and.nxv8i16(, , ) +declare @llvm.aarch64.sve.and.nxv4i32(, , ) +declare @llvm.aarch64.sve.and.nxv2i64(, , ) + +declare @llvm.aarch64.sve.eor.nxv16i8(, , ) +declare @llvm.aarch64.sve.eor.nxv8i16(, , ) +declare @llvm.aarch64.sve.eor.nxv4i32(, , ) +declare @llvm.aarch64.sve.eor.nxv2i64(, , ) + +declare @llvm.aarch64.sve.orr.nxv16i8(, , ) +declare @llvm.aarch64.sve.orr.nxv8i16(, , ) +declare @llvm.aarch64.sve.orr.nxv4i32(, , ) +declare @llvm.aarch64.sve.orr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() + +declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +attributes #0 = { "target-features"="+sve" }