diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4483,23 +4483,14 @@ return new AArch64DAGToDAGISel(TM, OptLevel); } -/// When \p PredVT is a scalable vector predicate in the form -/// MVT::nxxi1, it builds the correspondent scalable vector of -/// integers MVT::nxxi s.t. M x bits = 128. If the input -/// PredVT is not in the form MVT::nxxi1, it returns an invalid -/// EVT. -static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) { - if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) - return EVT(); - - const unsigned NumElts = PredVT.getVectorNumElements(); - - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) - return EVT(); - - EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / NumElts); - EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, NumElts, /*IsScalable=*/true); - return MemVT; +/// For a given ElementCount \p EC, returns a scalable vector of +/// integers MVT::nxxi such that M x bits = 128. +static EVT getSVEPackedVectorTypeFromEC(LLVMContext &Ctx, ElementCount EC) { + assert(EC.Scalable && "Expecting scalable Element Count."); + assert(AArch64::SVEBitsPerBlock % EC.Min == 0 && + "Invalid number of elements."); + EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min); + return EVT::getVectorVT(Ctx, ScalarVT, EC); } /// Return the EVT of the data associated to a memory operation in \p @@ -4508,19 +4499,23 @@ if (isa(Root)) return cast(Root)->getMemoryVT(); - const unsigned Opcode = Root->getOpcode(); - if (Opcode != ISD::INTRINSIC_VOID) - return EVT(); - - const unsigned IntNo = - cast(Root->getOperand(1))->getZExtValue(); - if (IntNo != Intrinsic::aarch64_sve_prf) + switch (Root->getOpcode()) { + case AArch64ISD::LDNF1: + case AArch64ISD::LDNF1S: + return cast(Root->getOperand(3))->getVT(); + case ISD::INTRINSIC_VOID: { + switch (cast(Root->getOperand(1))->getZExtValue()) { + case Intrinsic::aarch64_sve_prf: + return getSVEPackedVectorTypeFromEC( + Ctx, Root->getOperand(2)->getValueType(0).getVectorElementCount()); + } + } break; + default: return EVT(); + } - // We are using an SVE prefetch intrinsic. Type must be inferred - // from the width of the predicate. - return getPackedVectorTypeFromPredicateType( - Ctx, Root->getOperand(2)->getValueType(0)); + llvm_unreachable("Non SDNode-derived classes should be " + "handled inside the switch statement."); } /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1538,9 +1538,16 @@ defm Pat_Load_P2 : unpred_load_predicate; multiclass ldnf1 { + // scalar + immediate (mul vl) + let AddedComplexity = 1 in { + def : Pat<(Ty (Load (PredTy PPR:$gp), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), MemVT)), + (I PPR:$gp, GPR64sp:$base, simm4s1:$offset)>; + } + // base def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)), (I PPR:$gp, GPR64sp:$base, (i64 0))>; + } // 2-element contiguous non-faulting loads @@ -1570,40 +1577,45 @@ // 16-element contiguous non-faulting loads defm : ldnf1; - multiclass ldff1 { - // Add more complex addressing modes here as required. + multiclass ldff1 { + // reg + reg + let AddedComplexity = 1 in { + def : Pat<(Ty (Load (PredTy PPR:$gp), (AddrCP GPR64:$base, GPR64:$offset), MemVT)), + (I PPR:$gp, GPR64sp:$base, GPR64:$offset)>; + } + // Base def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)), (I PPR:$gp, GPR64sp:$base, XZR)>; } // 2-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; // 4-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; // 8-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; // 16-element contiguous first faulting loads - defm : ldff1; + defm : ldff1; } let Predicates = [HasSVE2] in { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll @@ -12,6 +12,15 @@ ret %load } +define @ldff1b_reg( %pg, i8* %a, i64 %offset) { +; CHECK-LABEL: ldff1b_reg: +; CHECK: ldff1b { z0.b }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv16i8( %pg, i8* %base) + ret %load +} + define @ldff1b_h( %pg, i8* %a) { ; CHECK-LABEL: ldff1b_h: ; CHECK: ldff1b { z0.h }, p0/z, [x0] @@ -21,6 +30,16 @@ ret %res } +define @ldff1b_h_reg( %pg, i8* %a, i64 %offset) { +; CHECK-LABEL: ldff1b_h_reg: +; CHECK: ldff1b { z0.h }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv8i8( %pg, i8* %base) + %res = zext %load to + ret %res +} + define @ldff1b_s( %pg, i8* %a) { ; CHECK-LABEL: ldff1b_s: ; CHECK: ldff1b { z0.s }, p0/z, [x0] @@ -30,6 +49,16 @@ ret %res } +define @ldff1b_s_reg( %pg, i8* %a, i64 %offset) { +; CHECK-LABEL: ldff1b_s_reg: +; CHECK: ldff1b { z0.s }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv4i8( %pg, i8* %base) + %res = zext %load to + ret %res +} + define @ldff1b_d( %pg, i8* %a) { ; CHECK-LABEL: ldff1b_d: ; CHECK: ldff1b { z0.d }, p0/z, [x0] @@ -39,6 +68,16 @@ ret %res } +define @ldff1b_d_reg( %pg, i8* %a, i64 %offset) { +; CHECK-LABEL: ldff1b_d_reg: +; CHECK: ldff1b { z0.d }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2i8( %pg, i8* %base) + %res = zext %load to + ret %res +} + ; ; LDFF1SB ; @@ -52,6 +91,16 @@ ret %res } +define @ldff1sb_h_reg( %pg, i8* %a, i64 %offset) { +; CHECK-LABEL: ldff1sb_h_reg: +; CHECK: ldff1sb { z0.h }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv8i8( %pg, i8* %base) + %res = sext %load to + ret %res +} + define @ldff1sb_s( %pg, i8* %a) { ; CHECK-LABEL: ldff1sb_s: ; CHECK: ldff1sb { z0.s }, p0/z, [x0] @@ -61,6 +110,16 @@ ret %res } +define @ldff1sb_s_reg( %pg, i8* %a, i64 %offset) { +; CHECK-LABEL: ldff1sb_s_reg: +; CHECK: ldff1sb { z0.s }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv4i8( %pg, i8* %base) + %res = sext %load to + ret %res +} + define @ldff1sb_d( %pg, i8* %a) { ; CHECK-LABEL: ldff1sb_d: ; CHECK: ldff1sb { z0.d }, p0/z, [x0] @@ -70,6 +129,16 @@ ret %res } +define @ldff1sb_d_reg( %pg, i8* %a, i64 %offset) { +; CHECK-LABEL: ldff1sb_d_reg: +; CHECK: ldff1sb { z0.d }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2i8( %pg, i8* %base) + %res = sext %load to + ret %res +} + ; ; LDFF1H ; @@ -82,6 +151,15 @@ ret %load } +define @ldff1h_reg( %pg, i16* %a, i64 %offset) { +; CHECK-LABEL: ldff1h_reg: +; CHECK: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr i16, i16* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv8i16( %pg, i16* %base) + ret %load +} + define @ldff1h_s( %pg, i16* %a) { ; CHECK-LABEL: ldff1h_s: ; CHECK: ldff1h { z0.s }, p0/z, [x0] @@ -91,6 +169,16 @@ ret %res } +define @ldff1h_s_reg( %pg, i16* %a, i64 %offset) { +; CHECK-LABEL: ldff1h_s_reg: +; CHECK: ldff1h { z0.s }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr i16, i16* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv4i16( %pg, i16* %base) + %res = zext %load to + ret %res +} + define @ldff1h_d( %pg, i16* %a) { ; CHECK-LABEL: ldff1h_d: ; CHECK: ldff1h { z0.d }, p0/z, [x0] @@ -100,6 +188,16 @@ ret %res } +define @ldff1h_d_reg( %pg, i16* %a, i64 %offset) { +; CHECK-LABEL: ldff1h_d_reg: +; CHECK: ldff1h { z0.d }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr i16, i16* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2i16( %pg, i16* %base) + %res = zext %load to + ret %res +} + define @ldff1h_f16( %pg, half* %a) { ; CHECK-LABEL: ldff1h_f16: ; CHECK: ldff1h { z0.h }, p0/z, [x0] @@ -108,6 +206,15 @@ ret %load } +define @ldff1h_f16_reg( %pg, half* %a, i64 %offset) { +; CHECK-LABEL: ldff1h_f16_reg: +; CHECK: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr half, half* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv8f16( %pg, half* %base) + ret %load +} + ; ; LDFF1SH ; @@ -121,6 +228,16 @@ ret %res } +define @ldff1sh_s_reg( %pg, i16* %a, i64 %offset) { +; CHECK-LABEL: ldff1sh_s_reg: +; CHECK: ldff1sh { z0.s }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr i16, i16* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv4i16( %pg, i16* %base) + %res = sext %load to + ret %res +} + define @ldff1sh_d( %pg, i16* %a) { ; CHECK-LABEL: ldff1sh_d: ; CHECK: ldff1sh { z0.d }, p0/z, [x0] @@ -130,6 +247,16 @@ ret %res } +define @ldff1sh_d_reg( %pg, i16* %a, i64 %offset) { +; CHECK-LABEL: ldff1sh_d_reg: +; CHECK: ldff1sh { z0.d }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr i16, i16* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2i16( %pg, i16* %base) + %res = sext %load to + ret %res +} + ; ; LDFF1W ; @@ -142,6 +269,15 @@ ret %load } +define @ldff1w_reg( %pg, i32* %a, i64 %offset) { +; CHECK-LABEL: ldff1w_reg: +; CHECK: ldff1w { z0.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %base = getelementptr i32, i32* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv4i32( %pg, i32* %base) + ret %load +} + define @ldff1w_d( %pg, i32* %a) { ; CHECK-LABEL: ldff1w_d: ; CHECK: ldff1w { z0.d }, p0/z, [x0] @@ -151,6 +287,16 @@ ret %res } +define @ldff1w_d_reg( %pg, i32* %a, i64 %offset) { +; CHECK-LABEL: ldff1w_d_reg: +; CHECK: ldff1w { z0.d }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %base = getelementptr i32, i32* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2i32( %pg, i32* %base) + %res = zext %load to + ret %res +} + define @ldff1w_f32( %pg, float* %a) { ; CHECK-LABEL: ldff1w_f32: ; CHECK: ldff1w { z0.s }, p0/z, [x0] @@ -159,6 +305,15 @@ ret %load } +define @ldff1w_f32_reg( %pg, float* %a, i64 %offset) { +; CHECK-LABEL: ldff1w_f32_reg: +; CHECK: ldff1w { z0.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %base = getelementptr float, float* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv4f32( %pg, float* %base) + ret %load +} + define @ldff1w_2f32( %pg, float* %a) { ; CHECK-LABEL: ldff1w_2f32: ; CHECK: ldff1w { z0.d }, p0/z, [x0] @@ -167,6 +322,15 @@ ret %load } +define @ldff1w_2f32_reg( %pg, float* %a, i64 %offset) { +; CHECK-LABEL: ldff1w_2f32_reg: +; CHECK: ldff1w { z0.d }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %base = getelementptr float, float* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2f32( %pg, float* %base) + ret %load +} + ; ; LDFF1SW ; @@ -180,6 +344,16 @@ ret %res } +define @ldff1sw_d_reg( %pg, i32* %a, i64 %offset) { +; CHECK-LABEL: ldff1sw_d_reg: +; CHECK: ldff1sw { z0.d }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %base = getelementptr i32, i32* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2i32( %pg, i32* %base) + %res = sext %load to + ret %res +} + ; ; LDFF1D ; @@ -192,8 +366,17 @@ ret %load } +define @ldff1d_reg( %pg, i64* %a, i64 %offset) { +; CHECK-LABEL: ldff1d_reg: +; CHECK: ldff1d { z0.d }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %base = getelementptr i64, i64* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2i64( %pg, i64* %base) + ret %load +} -define @ldff1d_f64( %pg, double* %a) { + +define @ldff1d_f64( %pg, double* %a, i64 %offset) { ; CHECK-LABEL: ldff1d_f64: ; CHECK: ldff1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret @@ -201,6 +384,15 @@ ret %load } +define @ldff1d_f64_reg( %pg, double* %a, i64 %offset) { +; CHECK-LABEL: ldff1d_f64_reg: +; CHECK: ldff1d { z0.d }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %base = getelementptr double, double* %a, i64 %offset + %load = call @llvm.aarch64.sve.ldff1.nxv2f64( %pg, double* %base) + ret %load +} + declare @llvm.aarch64.sve.ldff1.nxv16i8(, i8*) declare @llvm.aarch64.sve.ldff1.nxv8i8(, i8*) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll @@ -1,5 +1,9 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; Range testing for the immediate in the reg+imm(mulvl) addressing +; mode is done only for one instruction. The rest of the instrucions +; test only one immediate value in bound. + define @ldnf1b( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b: ; CHECK: ldnf1b { z0.b }, p0/z, [x0] @@ -8,6 +12,65 @@ ret %load } +define @ldnf1b_out_of_lower_bound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_out_of_lower_bound: +; CHECK: rdvl x[[OFFSET:[0-9]+]], #-9 +; CHECK: add x[[BASE:[0-9]+]], x0, x[[OFFSET]] +; CHECK: ldnf1b { z0.b }, p0/z, [x[[BASE]]] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 -9 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %base_scalar) + ret %load +} + +define @ldnf1b_lower_bound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_lower_bound: +; CHECK: ldnf1b { z0.b }, p0/z, [x0, #-8, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 -8 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %base_scalar) + ret %load +} + +define @ldnf1b_inbound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_inbound: +; CHECK: ldnf1b { z0.b }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 1 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %base_scalar) + ret %load +} + +define @ldnf1b_upper_bound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_upper_bound: +; CHECK: ldnf1b { z0.b }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %base_scalar) + ret %load +} + +define @ldnf1b_out_of_upper_bound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_out_of_upper_bound: +; CHECK: rdvl x[[OFFSET:[0-9]+]], #8 +; CHECK: add x[[BASE:[0-9]+]], x0, x[[OFFSET]] +; CHECK: ldnf1b { z0.b }, p0/z, [x[[BASE]]] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 8 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv16i8( %pg, i8* %base_scalar) + ret %load +} + define @ldnf1b_h( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_h: ; CHECK: ldnf1b { z0.h }, p0/z, [x0] @@ -17,6 +80,18 @@ ret %res } +define @ldnf1b_h_inbound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_h_inbound: +; CHECK: ldnf1b { z0.h }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %base_scalar) + %res = zext %load to + ret %res +} + define @ldnf1sb_h( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_h: ; CHECK: ldnf1sb { z0.h }, p0/z, [x0] @@ -26,6 +101,18 @@ ret %res } +define @ldnf1sb_h_inbound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_h_inbound: +; CHECK: ldnf1sb { z0.h }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv8i8( %pg, i8* %base_scalar) + %res = sext %load to + ret %res +} + define @ldnf1h( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h: ; CHECK: ldnf1h { z0.h }, p0/z, [x0] @@ -34,6 +121,17 @@ ret %load } +define @ldnf1h_inbound( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_inbound: +; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i16* %a to * + %base = getelementptr , * %base_scalable, i64 1 + %base_scalar = bitcast * %base to i16* + %load = call @llvm.aarch64.sve.ldnf1.nxv8i16( %pg, i16* %base_scalar) + ret %load +} + define @ldnf1h_f16( %pg, half* %a) { ; CHECK-LABEL: ldnf1h_f16: ; CHECK: ldnf1h { z0.h }, p0/z, [x0] @@ -42,6 +140,17 @@ ret %load } +define @ldnf1h_f16_inbound( %pg, half* %a) { +; CHECK-LABEL: ldnf1h_f16_inbound: +; CHECK: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast half* %a to * + %base = getelementptr , * %base_scalable, i64 1 + %base_scalar = bitcast * %base to half* + %load = call @llvm.aarch64.sve.ldnf1.nxv8f16( %pg, half* %base_scalar) + ret %load +} + define @ldnf1b_s( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_s: ; CHECK: ldnf1b { z0.s }, p0/z, [x0] @@ -51,6 +160,18 @@ ret %res } +define @ldnf1b_s_inbound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_s_inbound: +; CHECK: ldnf1b { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %base_scalar) + %res = zext %load to + ret %res +} + define @ldnf1sb_s( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_s: ; CHECK: ldnf1sb { z0.s }, p0/z, [x0] @@ -60,6 +181,18 @@ ret %res } +define @ldnf1sb_s_inbound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_s_inbound: +; CHECK: ldnf1sb { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv4i8( %pg, i8* %base_scalar) + %res = sext %load to + ret %res +} + define @ldnf1h_s( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h_s: ; CHECK: ldnf1h { z0.s }, p0/z, [x0] @@ -69,6 +202,18 @@ ret %res } +define @ldnf1h_s_inbound( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_s_inbound: +; CHECK: ldnf1h { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i16* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i16* + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %base_scalar) + %res = zext %load to + ret %res +} + define @ldnf1sh_s( %pg, i16* %a) { ; CHECK-LABEL: ldnf1sh_s: ; CHECK: ldnf1sh { z0.s }, p0/z, [x0] @@ -78,6 +223,18 @@ ret %res } +define @ldnf1sh_s_inbound( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_s_inbound: +; CHECK: ldnf1sh { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i16* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i16* + %load = call @llvm.aarch64.sve.ldnf1.nxv4i16( %pg, i16* %base_scalar) + %res = sext %load to + ret %res +} + define @ldnf1w( %pg, i32* %a) { ; CHECK-LABEL: ldnf1w: ; CHECK: ldnf1w { z0.s }, p0/z, [x0] @@ -86,6 +243,17 @@ ret %load } +define @ldnf1w_inbound( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w_inbound: +; CHECK: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i32* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i32* + %load = call @llvm.aarch64.sve.ldnf1.nxv4i32( %pg, i32* %base_scalar) + ret %load +} + define @ldnf1w_f32( %pg, float* %a) { ; CHECK-LABEL: ldnf1w_f32: ; CHECK: ldnf1w { z0.s }, p0/z, [x0] @@ -94,6 +262,17 @@ ret %load } +define @ldnf1w_f32_inbound( %pg, float* %a) { +; CHECK-LABEL: ldnf1w_f32_inbound: +; CHECK: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast float* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to float* + %load = call @llvm.aarch64.sve.ldnf1.nxv4f32( %pg, float* %base_scalar) + ret %load +} + define @ldnf1b_d( %pg, i8* %a) { ; CHECK-LABEL: ldnf1b_d: ; CHECK: ldnf1b { z0.d }, p0/z, [x0] @@ -103,6 +282,18 @@ ret %res } +define @ldnf1b_d_inbound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1b_d_inbound: +; CHECK: ldnf1b { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %base_scalar) + %res = zext %load to + ret %res +} + define @ldnf1sb_d( %pg, i8* %a) { ; CHECK-LABEL: ldnf1sb_d: ; CHECK: ldnf1sb { z0.d }, p0/z, [x0] @@ -112,6 +303,18 @@ ret %res } +define @ldnf1sb_d_inbound( %pg, i8* %a) { +; CHECK-LABEL: ldnf1sb_d_inbound: +; CHECK: ldnf1sb { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i8* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i8* + %load = call @llvm.aarch64.sve.ldnf1.nxv2i8( %pg, i8* %base_scalar) + %res = sext %load to + ret %res +} + define @ldnf1h_d( %pg, i16* %a) { ; CHECK-LABEL: ldnf1h_d: ; CHECK: ldnf1h { z0.d }, p0/z, [x0] @@ -121,6 +324,18 @@ ret %res } +define @ldnf1h_d_inbound( %pg, i16* %a) { +; CHECK-LABEL: ldnf1h_d_inbound: +; CHECK: ldnf1h { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i16* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i16* + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %base_scalar) + %res = zext %load to + ret %res +} + define @ldnf1sh_d( %pg, i16* %a) { ; CHECK-LABEL: ldnf1sh_d: ; CHECK: ldnf1sh { z0.d }, p0/z, [x0] @@ -130,6 +345,18 @@ ret %res } +define @ldnf1sh_d_inbound( %pg, i16* %a) { +; CHECK-LABEL: ldnf1sh_d_inbound: +; CHECK: ldnf1sh { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i16* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i16* + %load = call @llvm.aarch64.sve.ldnf1.nxv2i16( %pg, i16* %base_scalar) + %res = sext %load to + ret %res +} + define @ldnf1w_d( %pg, i32* %a) { ; CHECK-LABEL: ldnf1w_d: ; CHECK: ldnf1w { z0.d }, p0/z, [x0] @@ -139,6 +366,18 @@ ret %res } +define @ldnf1w_d_inbound( %pg, i32* %a) { +; CHECK-LABEL: ldnf1w_d_inbound: +; CHECK: ldnf1w { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i32* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i32* + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %base_scalar) + %res = zext %load to + ret %res +} + define @ldnf1sw_d( %pg, i32* %a) { ; CHECK-LABEL: ldnf1sw_d: ; CHECK: ldnf1sw { z0.d }, p0/z, [x0] @@ -148,6 +387,18 @@ ret %res } +define @ldnf1sw_d_inbound( %pg, i32* %a) { +; CHECK-LABEL: ldnf1sw_d_inbound: +; CHECK: ldnf1sw { z0.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i32* %a to * + %base = getelementptr , * %base_scalable, i64 7 + %base_scalar = bitcast * %base to i32* + %load = call @llvm.aarch64.sve.ldnf1.nxv2i32( %pg, i32* %base_scalar) + %res = sext %load to + ret %res +} + define @ldnf1d( %pg, i64* %a) { ; CHECK-LABEL: ldnf1d: ; CHECK: ldnf1d { z0.d }, p0/z, [x0] @@ -156,6 +407,17 @@ ret %load } +define @ldnf1d_inbound( %pg, i64* %a) { +; CHECK-LABEL: ldnf1d_inbound: +; CHECK: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast i64* %a to * + %base = getelementptr , * %base_scalable, i64 1 + %base_scalar = bitcast * %base to i64* + %load = call @llvm.aarch64.sve.ldnf1.nxv2i64( %pg, i64* %base_scalar) + ret %load +} + define @ldnf1d_f64( %pg, double* %a) { ; CHECK-LABEL: ldnf1d_f64: ; CHECK: ldnf1d { z0.d }, p0/z, [x0] @@ -164,6 +426,17 @@ ret %load } +define @ldnf1d_f64_inbound( %pg, double* %a) { +; CHECK-LABEL: ldnf1d_f64_inbound: +; CHECK: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %base_scalable = bitcast double* %a to * + %base = getelementptr , * %base_scalable, i64 1 + %base_scalar = bitcast * %base to double* + %load = call @llvm.aarch64.sve.ldnf1.nxv2f64( %pg, double* %base_scalar) + ret %load +} + declare @llvm.aarch64.sve.ldnf1.nxv16i8(, i8*) declare @llvm.aarch64.sve.ldnf1.nxv8i8(, i8*)