Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -984,6 +984,13 @@ def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, Returned<0>]>; + +//===--------- Intrinsics that are used with scalable vector types --------===// + +def int_experimental_vector_vscale : Intrinsic<[llvm_anyint_ty], + [], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -191,6 +191,13 @@ FRECPE, FRECPS, FRSQRTE, FRSQRTS, + // SVE specific + /// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the + /// number of elements within a scalable vector. IMM is a constant integer + /// multiplier that is applied to the runtime value and is usual some + /// multiple of MVT.getVectorNumElements(). + VSCALE, + // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1218,6 +1218,7 @@ case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS"; case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE"; case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS"; + case AArch64ISD::VSCALE: return "AArch64ISD::VSCALE"; } return nullptr; } @@ -2658,6 +2659,15 @@ return DAG.getNode(ISD::SMIN, dl, VT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::aarch64_neon_umin: return DAG.getNode(ISD::UMIN, dl, VT, Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::experimental_vector_vscale: { + // TODO: Something smarter than just using a constant of 1 + auto VScale = DAG.getNode(AArch64ISD::VSCALE, dl, MVT::i64, + DAG.getConstant(1, dl, MVT::i32)); + if (VT != MVT::i64) + VScale = DAG.getZExtOrTrunc(VScale, dl, VT); + + return VScale; + } } } Index: lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64SVEInstrInfo.td +++ lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +def SDT_AArch64VScale : SDTypeProfile<1, 1, [SDTCisVT<0,i64>, SDTCisInt<1>]>; +def AArch64vscale : SDNode<"AArch64ISD::VSCALE", SDT_AArch64VScale>; let Predicates = [HasSVE] in { defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">; defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">; @@ -478,4 +480,10 @@ // InstAliases def : InstAlias<"mov $Zd, $Zn", (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>; + + + // Simple codegen Patterns + // Unoptimized vscale + def : Pat<(AArch64vscale (simm6_32b:$imm)), (RDVLI_XI $imm)>; + } Index: test/CodeGen/AArch64/SVE/rdvl.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/SVE/rdvl.ll @@ -0,0 +1,53 @@ +; RUN: llc -verify-machineinstrs -mattr=+sve < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnueabi" + +declare i64 @llvm.experimental.vector.vscale.i64() + +define i8 @rdvl_nxv16i8() { +; CHECK-LABEL: rdvl_nxv16i8: +; CHECK: orr [[MULT:w[0-9]+]], wzr, #0x1 +; CHECK-NEXT: rdvl x[[RDVL:[0-9]+]], [[MULT]] +; CHECK-NEXT: lsl w0, w[[RDVL]], #4 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.experimental.vector.vscale.i64() + %trunc = trunc i64 %vscale to i8 + %eltcount = mul i8 %trunc, 16 + ret i8 %eltcount +} + +define i16 @rdvl_nxv8i16() { +; CHECK-LABEL: rdvl_nxv8i16: +; CHECK: orr [[MULT:w[0-9]+]], wzr, #0x1 +; CHECK-NEXT: rdvl x[[RDVL:[0-9]+]], [[MULT]] +; CHECK-NEXT: lsl w0, w[[RDVL]], #3 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.experimental.vector.vscale.i64() + %trunc = trunc i64 %vscale to i16 + %eltcount = mul i16 %trunc, 8 + ret i16 %eltcount +} + +define i32 @rdvl_nxv4i32() { +; CHECK-LABEL: rdvl_nxv4i32: +; CHECK: orr [[MULT:w[0-9]+]], wzr, #0x1 +; CHECK-NEXT: rdvl x[[RDVL:[0-9]+]], [[MULT]] +; CHECK-NEXT: lsl w0, w[[RDVL]], #2 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.experimental.vector.vscale.i64() + %trunc = trunc i64 %vscale to i32 + %eltcount = mul i32 %trunc, 4 + ret i32 %eltcount +} + +define i64 @rdvl_nxv2i64() { +; CHECK-LABEL: rdvl_nxv2i64: +; CHECK: orr [[MULT:w[0-9]+]], wzr, #0x1 +; CHECK-NEXT: rdvl x[[RDVL:[0-9]+]], [[MULT]] +; CHECK-NEXT: lsl x0, x[[RDVL]], #1 +; CHECK-NEXT: ret + %vscale = call i64 @llvm.experimental.vector.vscale.i64() + %eltcount = mul i64 %vscale, 2 + ret i64 %eltcount +}