diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1692,6 +1692,18 @@ def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic; // +// Reinterpreting data +// + +def int_aarch64_sve_convert_from_svbool : Intrinsic<[llvm_anyvector_ty], + [llvm_nxv16i1_ty], + [IntrNoMem]>; + +def int_aarch64_sve_convert_to_svbool : Intrinsic<[llvm_nxv16i1_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; + +// // Gather loads: scalar base + vector offsets // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -218,6 +218,8 @@ DUP_PRED, INDEX_VECTOR, + REINTERPRET_CAST, + LDNF1, LDNF1S, LDFF1, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1345,6 +1345,7 @@ case AArch64ISD::LASTA: return "AArch64ISD::LASTA"; case AArch64ISD::LASTB: return "AArch64ISD::LASTB"; case AArch64ISD::REV: return "AArch64ISD::REV"; + case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST"; case AArch64ISD::TBL: return "AArch64ISD::TBL"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; @@ -2949,6 +2950,12 @@ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } +static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, + int Pattern) { + return DAG.getNode(AArch64ISD::PTRUE, DL, VT, + DAG.getTargetConstant(Pattern, DL, MVT::i32)); +} + SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -3038,6 +3045,24 @@ Op.getOperand(1)); case Intrinsic::aarch64_sve_dupq_lane: return LowerDUPQLane(Op, DAG); + case Intrinsic::aarch64_sve_convert_from_svbool: + return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_convert_to_svbool: { + EVT OutVT = Op.getValueType(); + EVT InVT = Op.getOperand(1).getValueType(); + // Return the operand if the cast isn't changing type, + // i.e. -> + if (InVT == OutVT) + return Op.getOperand(1); + // Otherwise, zero the newly introduced lanes. + SDValue Reinterpret = + DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1)); + SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all); + SDValue MaskReinterpret = + DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask); + return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret); + } case Intrinsic::aarch64_sve_insr: { SDValue Scalar = Op.getOperand(2); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -115,6 +115,8 @@ def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>; def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>; +def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>; + let Predicates = [HasSVE] in { defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; @@ -1304,6 +1306,29 @@ def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>; def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv16i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv8i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv8i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv8i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv4i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv4i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv4i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv2i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + + def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)), + (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>; + def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)), + (AND_PPzPP (PTRUE_H 31), PPR:$Ps1, PPR:$Ps2)>; + def : Pat<(nxv4i1 (and PPR:$Ps1, PPR:$Ps2)), + (AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>; + def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)), + (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>; + // Add more complex addressing modes here as required multiclass pred_load { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; Converting to svbool_t () +; + +define @reinterpret_bool_from_b( %pg) { +; CHECK-LABEL: reinterpret_bool_from_b: +; CHECK: ret + %out = call @llvm.aarch64.sve.convert.to.svbool.nxv16i1( %pg) + ret %out +} + +define @reinterpret_bool_from_h( %pg) { +; CHECK-LABEL: reinterpret_bool_from_h: +; CHECK: ptrue p1.h +; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg) + ret %out +} + +define @reinterpret_bool_from_s( %pg) { +; CHECK-LABEL: reinterpret_bool_from_s: +; CHECK: ptrue p1.s +; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %pg) + ret %out +} + +define @reinterpret_bool_from_d( %pg) { +; CHECK-LABEL: reinterpret_bool_from_d: +; CHECK: ptrue p1.d +; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + ret %out +} + +; +; Converting from svbool_t +; + +define @reinterpret_bool_to_b( %pg) { +; CHECK-LABEL: reinterpret_bool_to_b: +; CHECK: ret + %out = call @llvm.aarch64.sve.convert.from.svbool.nxv16i1( %pg) + ret %out +} + +define @reinterpret_bool_to_h( %pg) { +; CHECK-LABEL: reinterpret_bool_to_h: +; CHECK: ret + %out = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + ret %out +} + +define @reinterpret_bool_to_s( %pg) { +; CHECK-LABEL: reinterpret_bool_to_s: +; CHECK: ret + %out = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + ret %out +} + +define @reinterpret_bool_to_d( %pg) { +; CHECK-LABEL: reinterpret_bool_to_d: +; CHECK: ret + %out = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + ret %out +} + +declare @llvm.aarch64.sve.convert.to.svbool.nxv16i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv16i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1()