Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7132,6 +7132,8 @@ assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() && "Incompatible types of Data and Mask"); if (IsCompressedMemory) { + assert(!DataVT.isScalableVector() && + "Cannot currently handle compressed memory with scalable vectors"); // Incrementing the pointer according to number of '1's in the mask. EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits()); SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask); @@ -7147,6 +7149,11 @@ SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL, AddrVT); Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); + } else if (DataVT.isScalableVector()) { + Increment = + DAG.getVScale(DL, AddrVT, + APInt(Addr.getValueSizeInBits().getFixedSize(), + DataVT.getSizeInBits().getKnownMinSize() / 8)); } else Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1089,6 +1089,20 @@ defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>; defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>; + // Extract lo/hi halves of legal predicate types. + def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_S PPR:$Ps, (PFALSE))>; + def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))), + (ZIP2_PPP_S PPR:$Ps, (PFALSE))>; + def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_H PPR:$Ps, (PFALSE))>; + def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), + (ZIP2_PPP_H PPR:$Ps, (PFALSE))>; + def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + (ZIP1_PPP_B PPR:$Ps, (PFALSE))>; + def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + (ZIP2_PPP_B PPR:$Ps, (PFALSE))>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; Index: llvm/test/CodeGen/AArch64/sve-split-load.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-load.ll +++ llvm/test/CodeGen/AArch64/sve-split-load.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; LOAD +; UNPREDICATED -define @load_promote_4i8(* %a) { -; CHECK-LABEL: load_promote_4i8: +define @load_promote_4i16(* %a) { +; CHECK-LABEL: load_promote_4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] @@ -53,3 +53,82 @@ %load = load , * %a ret %load } + +; MASKED + +define @masked_load_promote_2i32( *%a, %pg) { +; CHECK-LABEL: masked_load_promote_2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv2i32( *%a, i32 1, %pg, undef) + ret %load +} + +define @masked_load_split_32i8( *%a, %pg) { +; CHECK-LABEL: masked_load_split_32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv32i8( *%a, i32 1, %pg, undef) + ret %load +} + +define @masked_load_split_32i16( *%a, %pg) { +; CHECK-LABEL: masked_load_split_32i16: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p2.b +; CHECK-NEXT: zip1 p3.b, p0.b, p2.b +; CHECK-NEXT: zip2 p0.b, p0.b, p2.b +; CHECK-NEXT: ld1h { z0.h }, p3/z, [x0] +; CHECK-NEXT: zip1 p3.b, p1.b, p2.b +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: zip2 p0.b, p1.b, p2.b +; CHECK-NEXT: ld1h { z2.h }, p3/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv32i16( *%a, i32 1, %pg, undef) + ret %load +} + +define @masked_load_split_8i32( *%a, %pg) { +; CHECK-LABEL: masked_load_split_8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p2.h, p0.h, p1.h +; CHECK-NEXT: zip2 p0.h, p0.h, p1.h +; CHECK-NEXT: ld1w { z0.s }, p2/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8i32( *%a, i32 1, %pg, undef) + ret %load +} + +define @masked_load_split_8i64( *%a, %pg) { +; CHECK-LABEL: masked_load_split_8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p2.h, p0.h, p1.h +; CHECK-NEXT: zip2 p0.h, p0.h, p1.h +; CHECK-NEXT: zip1 p3.s, p2.s, p1.s +; CHECK-NEXT: zip2 p2.s, p2.s, p1.s +; CHECK-NEXT: ld1d { z0.d }, p3/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p2/z, [x0, #1, mul vl] +; CHECK-NEXT: zip1 p2.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8i64( *%a, i32 1, %pg, undef) + ret %load +} + +declare @llvm.masked.load.nxv32i8(*, i32, , ) + +declare @llvm.masked.load.nxv32i16(*, i32, , ) + +declare @llvm.masked.load.nxv2i32(*, i32, , ) +declare @llvm.masked.load.nxv8i32(*, i32, , ) + +declare @llvm.masked.load.nxv8i64(*, i32, , ) Index: llvm/test/CodeGen/AArch64/sve-split-store.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-store.ll +++ llvm/test/CodeGen/AArch64/sve-split-store.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; UNPREDICATED + define void @store_promote_4i8( %data, * %a) { ; CHECK-LABEL: store_promote_4i8: ; CHECK: // %bb.0: @@ -51,3 +53,82 @@ store %data, * %a ret void } + +; MASKED + +define void @masked_store_promote_2i8( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_promote_2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z0.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2i8( %data, *%a, i32 1, %pg) + ret void +} + +define void @masked_store_split_32i8( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_split_32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z1.b }, p1, [x0, #1, mul vl] +; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv32i8( %data, *%a, i32 1, %pg) + ret void +} + +define void @masked_store_split_32i16( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_split_32i16: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p2.b +; CHECK-NEXT: zip2 p3.b, p1.b, p2.b +; CHECK-NEXT: zip1 p1.b, p1.b, p2.b +; CHECK-NEXT: st1h { z3.h }, p3, [x0, #3, mul vl] +; CHECK-NEXT: zip2 p3.b, p0.b, p2.b +; CHECK-NEXT: zip1 p0.b, p0.b, p2.b +; CHECK-NEXT: st1h { z2.h }, p1, [x0, #2, mul vl] +; CHECK-NEXT: st1h { z1.h }, p3, [x0, #1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv32i16( %data, *%a, i32 1, %pg) + ret void +} + +define void @masked_store_split_8i32( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_split_8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip2 p2.h, p0.h, p1.h +; CHECK-NEXT: zip1 p0.h, p0.h, p1.h +; CHECK-NEXT: st1w { z1.s }, p2, [x0, #1, mul vl] +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv8i32( %data, *%a, i32 1, %pg) + ret void +} + +define void @masked_store_split_8i64( %data, *%a, %pg) { +; CHECK-LABEL: masked_store_split_8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip2 p2.h, p0.h, p1.h +; CHECK-NEXT: zip1 p0.h, p0.h, p1.h +; CHECK-NEXT: zip2 p3.s, p2.s, p1.s +; CHECK-NEXT: zip1 p2.s, p2.s, p1.s +; CHECK-NEXT: st1d { z2.d }, p2, [x0, #2, mul vl] +; CHECK-NEXT: zip2 p2.s, p0.s, p1.s +; CHECK-NEXT: zip1 p0.s, p0.s, p1.s +; CHECK-NEXT: st1d { z3.d }, p3, [x0, #3, mul vl] +; CHECK-NEXT: st1d { z1.d }, p2, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv8i64( %data, *%a, i32 1, %pg) + ret void +} + +declare void @llvm.masked.store.nxv2i8(, *, i32, ) +declare void @llvm.masked.store.nxv32i8(, *, i32, ) + +declare void @llvm.masked.store.nxv32i16(, *, i32, ) + +declare void @llvm.masked.store.nxv8i32(, *, i32, ) + +declare void @llvm.masked.store.nxv8i64(, *, i32, )