diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -191,6 +191,11 @@ return SelectSVELogicalImm(N, VT, Imm); } + template + bool SelectSVELogicalImmNot(SDValue N, SDValue &Imm) { + return SelectSVELogicalImmNot(N, VT, Imm); + } + template bool SelectSVEArithImm(SDValue N, SDValue &Imm) { return SelectSVEArithImm(N, VT, Imm); @@ -327,6 +332,7 @@ bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); + bool SelectSVELogicalImmNot(SDValue N, MVT VT, SDValue &Imm); bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, @@ -3209,6 +3215,43 @@ return false; } +bool AArch64DAGToDAGISel::SelectSVELogicalImmNot(SDValue N, MVT VT, SDValue &Imm) { + if (auto CNode = dyn_cast(N)) { + uint64_t ImmVal = ~CNode->getZExtValue(); + SDLoc DL(N); + + // Shift mask depending on type size. + switch (VT.SimpleTy) { + case MVT::i8: + ImmVal &= 0xFF; + ImmVal |= ImmVal << 8; + ImmVal |= ImmVal << 16; + ImmVal |= ImmVal << 32; + break; + case MVT::i16: + ImmVal &= 0xFFFF; + ImmVal |= ImmVal << 16; + ImmVal |= ImmVal << 32; + break; + case MVT::i32: + ImmVal &= 0xFFFFFFFF; + ImmVal |= ImmVal << 32; + break; + case MVT::i64: + break; + default: + llvm_unreachable("Unexpected type"); + } + + uint64_t encoding; + if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { + Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); + return true; + } + } + return false; +} + // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. // Rather than attempt to normalise everything we can sometimes saturate the // shift amount during selection. This function also allows for consistent diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -101,6 +101,9 @@ UMAX_PRED, UMIN_PRED, + // Unpredicated vector instructions + BIC, + // Predicated instructions with the result of inactive lanes provided by the // last operand. FABS_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1952,6 +1952,7 @@ MAKE_CASE(AArch64ISD::FMINNMV_PRED) MAKE_CASE(AArch64ISD::FMUL_PRED) MAKE_CASE(AArch64ISD::FSUB_PRED) + MAKE_CASE(AArch64ISD::BIC) MAKE_CASE(AArch64ISD::BIT) MAKE_CASE(AArch64ISD::CBZ) MAKE_CASE(AArch64ISD::CBNZ) @@ -14013,6 +14014,8 @@ return convertMergedOpToPredOp(N, ISD::SUB, DAG, true); case Intrinsic::aarch64_sve_and: return convertMergedOpToPredOp(N, ISD::AND, DAG, true); + case Intrinsic::aarch64_sve_bic: + return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true); case Intrinsic::aarch64_sve_eor: return convertMergedOpToPredOp(N, ISD::XOR, DAG, true); case Intrinsic::aarch64_sve_orr: diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -273,6 +273,12 @@ return N->hasOneUse(); }]>; +def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, + SDTCisSameAs<0,1>, SDTCisSameAs<1,2> +]>; + +def AArch64bic : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>; let Predicates = [HasSVE] in { defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; @@ -291,7 +297,7 @@ defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>; defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>; defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>; - defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", null_frag>; + defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", AArch64bic>; defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", int_aarch64_sve_add, DestructiveBinaryComm>; defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">; @@ -338,6 +344,7 @@ defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>; defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; + defm BIC_ZI : sve_int_log_imm_bic; defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>; defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -204,6 +204,11 @@ def SVELogicalImm32Pat : ComplexPattern", []>; def SVELogicalImm64Pat : ComplexPattern", []>; +def SVELogicalImm8NotPat : ComplexPattern", []>; +def SVELogicalImm16NotPat : ComplexPattern", []>; +def SVELogicalImm32NotPat : ComplexPattern", []>; +def SVELogicalImm64NotPat : ComplexPattern", []>; + def SVE8BitLslImm : ComplexPattern; def SVEArithUImm8Pat : ComplexPattern", []>; @@ -1536,6 +1541,13 @@ (!cast(NAME) ZPR64:$Zdn, logical_imm64_not:$imm), 0>; } +multiclass sve_int_log_imm_bic { + def : SVE_1_Op_Imm_Log_Pat("AND_ZI")>; + def : SVE_1_Op_Imm_Log_Pat("AND_ZI")>; + def : SVE_1_Op_Imm_Log_Pat("AND_ZI")>; + def : SVE_1_Op_Imm_Log_Pat("AND_ZI")>; +} + class sve_int_dup_mask_imm : I<(outs ZPR64:$Zd), (ins logical_imm64:$imms), asm, "\t$Zd, $imms", diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll @@ -52,6 +52,58 @@ ret %out } +; +; BIC +; + +define @bic_i8( %a) #0 { +; CHECK-LABEL: bic_i8: +; CHECK: and z0.b, z0.b, #0x1 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i8 254, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @bic_i16( %a) #0 { +; CHECK-LABEL: bic_i16: +; CHECK: and z0.h, z0.h, #0x1 +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i16 65534, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @bic_i32( %a) #0 { +; CHECK-LABEL: bic_i32: +; CHECK: and z0.s, z0.s, #0xff0000ff +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i32 16776960, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @bic_i64( %a) #0 { +; CHECK-LABEL: bic_i64: +; CHECK: and z0.d, z0.d, #0x3ffffffffffff +; CHECK-NEXT: ret + %pg = shufflevector insertelement ( undef, i1 true, i32 0), undef, zeroinitializer + %b = shufflevector insertelement ( undef, i64 18445618173802708992, i32 0), undef, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %a, + %b) + ret %out +} + ; ; EOR ; @@ -209,6 +261,11 @@ declare @llvm.aarch64.sve.and.nxv4i32(, , ) declare @llvm.aarch64.sve.and.nxv2i64(, , ) +declare @llvm.aarch64.sve.bic.nxv16i8(, , ) +declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +declare @llvm.aarch64.sve.bic.nxv4i32(, , ) +declare @llvm.aarch64.sve.bic.nxv2i64(, , ) + declare @llvm.aarch64.sve.eor.nxv16i8(, , ) declare @llvm.aarch64.sve.eor.nxv8i16(, , ) declare @llvm.aarch64.sve.eor.nxv4i32(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll @@ -376,6 +376,54 @@ ret %out } +; +; BIC +; + +define @bic_i8( %a, %b) #0 { +; CHECK-LABEL: bic_i8: +; CHECK: bic z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %out = call @llvm.aarch64.sve.bic.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @bic_i16( %a, %b) #0 { +; CHECK-LABEL: bic_i16: +; CHECK: bic z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.bic.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @bic_i32( %a, %b) #0 { +; CHECK-LABEL: bic_i32: +; CHECK: bic z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.bic.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @bic_i64( %a, %b) #0 { +; CHECK-LABEL: bic_i64: +; CHECK: bic z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %a, + %b) + ret %out +} + ; ; EOR ; @@ -1045,6 +1093,11 @@ declare @llvm.aarch64.sve.and.nxv4i32(, , ) declare @llvm.aarch64.sve.and.nxv2i64(, , ) +declare @llvm.aarch64.sve.bic.nxv16i8(, , ) +declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +declare @llvm.aarch64.sve.bic.nxv4i32(, , ) +declare @llvm.aarch64.sve.bic.nxv2i64(, , ) + declare @llvm.aarch64.sve.eor.nxv16i8(, , ) declare @llvm.aarch64.sve.eor.nxv8i16(, , ) declare @llvm.aarch64.sve.eor.nxv4i32(, , )