Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3857,6 +3857,8 @@ return Opcode; case AArch64ISD::GLD1_MERGE_ZERO: return AArch64ISD::GLD1S_MERGE_ZERO; + case AArch64ISD::GLD1_IMM_MERGE_ZERO: + return AArch64ISD::GLD1S_IMM_MERGE_ZERO; case AArch64ISD::GLD1_UXTW_MERGE_ZERO: return AArch64ISD::GLD1S_UXTW_MERGE_ZERO; case AArch64ISD::GLD1_SXTW_MERGE_ZERO: @@ -3888,6 +3890,52 @@ return false; } +// If the base pointer of a masked gather or scatter is null, we +// may be able to swap BasePtr & Index and use the vector + register +// or vector + immediate addressing mode, e.g. +// VECTOR + REGISTER: +// getelementptr nullptr, (splat(%offset)) + %indices) +// -> getelementptr %offset, %indices +// VECTOR + IMMEDIATE: +// getelementptr nullptr, (splat(#x)) + %indices) +// -> getelementptr #x, %indices +void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT, + unsigned &Opcode, bool IsGather, + SelectionDAG &DAG) { + if (!isNullConstant(BasePtr)) + return; + + ConstantSDNode *Offset = nullptr; + if (Index.getOpcode() == ISD::ADD) + if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) + if (isa(SplatVal)) { + Offset = cast(SplatVal); + } else { + BasePtr = SplatVal; + Index = Index->getOperand(0); + return; + } + + unsigned NewOp = + IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED; + + if (!Offset) { + std::swap(BasePtr, Index); + Opcode = NewOp; + return; + } + + unsigned OffsetVal = Offset->getZExtValue(); + unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8; + if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) + return; + + Opcode = NewOp; + BasePtr = Index->getOperand(0); + Index = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64); + return; +} + SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -3937,6 +3985,9 @@ Index = Index.getOperand(0); unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend); + selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, + /*isGather=*/true, DAG); + if (ResNeedsSignExtend) Opcode = getSignExtendedGatherOpcode(Opcode); @@ -3989,9 +4040,12 @@ if (getGatherScatterIndexIsExtended(Index)) Index = Index.getOperand(0); + unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend); + selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, + /*isGather=*/false, DAG); + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT}; - return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL, - VTs, Ops); + return DAG.getNode(Opcode, DL, VTs, Ops); } // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16. Index: llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll +++ llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll @@ -45,11 +45,28 @@ ; Tests that exercise various type legalisation scenarios for ISD::MGATHER. ; Code generate load of an illegal datatype via promotion. +define @masked_gather_nxv2i8( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2i8: +; CHECK: ld1sb { z0.d }, p0/z, [z0.d] +; CHECK: ret + %data = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + ret %data +} + +; Code generate load of an illegal datatype via promotion. +define @masked_gather_nxv2i16( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2i16: +; CHECK: ld1sh { z0.d }, p0/z, [z0.d] +; CHECK: ret + %data = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) + ret %data +} + +; Code generate load of an illegal datatype via promotion. define @masked_gather_nxv2i32( %ptrs, %mask) { ; CHECK-LABEL: masked_gather_nxv2i32: -; CHECK-DAG: mov x8, xzr -; CHECK-DAG: ld1sw { z0.d }, p0/z, [x8, z0.d] -; CHECK: ret +; CHECK: ld1sw { z0.d }, p0/z, [z0.d] +; CHECK: ret %data = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) ret %data } @@ -92,11 +109,10 @@ define @masked_sgather_nxv4i8( %ptrs, %mask) { ; CHECK-LABEL: masked_sgather_nxv4i8: ; CHECK: pfalse p1.b -; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: zip2 p2.s, p0.s, p1.s ; CHECK-NEXT: zip1 p0.s, p0.s, p1.s -; CHECK-NEXT: ld1sb { z1.d }, p2/z, [x8, z1.d] -; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ld1sb { z1.d }, p2/z, [z1.d] +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: sxtb z0.s, p0/m, z0.s @@ -109,8 +125,6 @@ declare @llvm.masked.gather.nxv2i8(, i32, , ) declare @llvm.masked.gather.nxv2i16(, i32, , ) declare @llvm.masked.gather.nxv2i32(, i32, , ) - declare @llvm.masked.gather.nxv4i8(, i32, , ) - declare @llvm.masked.gather.nxv16i8(, i32, , ) declare @llvm.masked.gather.nxv32i32(, i32, , ) Index: llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-imm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-imm.ll @@ -0,0 +1,191 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @masked_gather_nxv2i8( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d, #1] +; CHECK-NEXT: ret + %ptrs = getelementptr i8, %bases, i32 1 + %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i16( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #2] +; CHECK-NEXT: ret + %ptrs = getelementptr i16, %bases, i32 1 + %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i32( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #4] +; CHECK-NEXT: ret + %ptrs = getelementptr i32, %bases, i32 1 + %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i64( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #8] +; CHECK-NEXT: ret + %ptrs = getelementptr i64, %bases, i32 1 + %vals.zext = call @llvm.masked.gather.nxv2i64( %ptrs, i32 8, %mask, undef) + ret %vals.zext +} + +define @masked_gather_nxv2f16( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4] +; CHECK-NEXT: ret + %ptrs = getelementptr half, %bases, i32 2 + %vals = call @llvm.masked.gather.nxv2f16( %ptrs, i32 2, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2bf16( %bases, %mask) #0 { +; CHECK-LABEL: masked_gather_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4] +; CHECK-NEXT: ret + %ptrs = getelementptr bfloat, %bases, i32 2 + %vals = call @llvm.masked.gather.nxv2bf16( %ptrs, i32 2, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f32( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #12] +; CHECK-NEXT: ret + %ptrs = getelementptr float, %bases, i32 3 + %vals = call @llvm.masked.gather.nxv2f32( %ptrs, i32 4, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f64( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #32] +; CHECK-NEXT: ret + %ptrs = getelementptr double, %bases, i32 4 + %vals = call @llvm.masked.gather.nxv2f64( %ptrs, i32 8, %mask, undef) + ret %vals +} + +define @masked_sgather_nxv2i8( %bases, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d, #5] +; CHECK-NEXT: ret + %ptrs = getelementptr i8, %bases, i32 5 + %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +define @masked_sgather_nxv2i16( %bases, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d, #12] +; CHECK-NEXT: ret + %ptrs = getelementptr i16, %bases, i32 6 + %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +define @masked_sgather_nxv2i32( %bases, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d, #28] +; CHECK-NEXT: ret + %ptrs = getelementptr i32, %bases, i32 7 + %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +; Tests where the immediate is out of range + +define @masked_gather_nxv2i8_range( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2i8_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z0.d, z0.d, #32 // =0x20 +; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %ptrs = getelementptr i8, %bases, i32 32 + %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2f16_range( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2f16_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z0.d, z0.d, #64 // =0x40 +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %ptrs = getelementptr half, %bases, i32 32 + %vals = call @llvm.masked.gather.nxv2f16( %ptrs, i32 2, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2bf16_range( %bases, %mask) #0 { +; CHECK-LABEL: masked_gather_nxv2bf16_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z0.d, z0.d, #64 // =0x40 +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %ptrs = getelementptr bfloat, %bases, i32 32 + %vals = call @llvm.masked.gather.nxv2bf16( %ptrs, i32 2, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f32_range( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2f32_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z0.d, z0.d, #128 // =0x80 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %ptrs = getelementptr float, %bases, i32 32 + %vals = call @llvm.masked.gather.nxv2f32( %ptrs, i32 4, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f64_range( %bases, %mask) { +; CHECK-LABEL: masked_gather_nxv2f64_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z0.d, z0.d, #256 // =0x100 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %ptrs = getelementptr double, %bases, i32 32 + %vals = call @llvm.masked.gather.nxv2f64( %ptrs, i32 8, %mask, undef) + ret %vals +} + +declare @llvm.masked.gather.nxv2i8(, i32, , ) +declare @llvm.masked.gather.nxv2i16(, i32, , ) +declare @llvm.masked.gather.nxv2i32(, i32, , ) +declare @llvm.masked.gather.nxv2i64(, i32, , ) +declare @llvm.masked.gather.nxv2f16(, i32, , ) +declare @llvm.masked.gather.nxv2bf16(, i32, , ) +declare @llvm.masked.gather.nxv2f32(, i32, , ) +declare @llvm.masked.gather.nxv2f64(, i32, , ) +attributes #0 = { "target-features"="+sve,+bf16" } Index: llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-reg.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-reg.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @masked_gather_nxv2i8( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_gather_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %ptrs = getelementptr i8, %bases, i64 %offset + %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i16( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_gather_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i32( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_gather_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i64( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_gather_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2i64( %ptrs, i32 8, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f16( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_gather_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2f16( %ptrs, i32 2, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2bf16( %bases, i64 %offset, %mask) #0 { +; CHECK-LABEL: masked_gather_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2bf16( %ptrs, i32 2, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f32( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_gather_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2f32( %ptrs, i32 4, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f64( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_gather_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2f64( %ptrs, i32 8, %mask, undef) + ret %vals +} + +define @masked_sgather_nxv2i8( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %ptrs = getelementptr i8, %bases, i64 %offset + %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +define @masked_sgather_nxv2i16( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +define @masked_sgather_nxv2i32( %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +declare @llvm.masked.gather.nxv2i8(, i32, , ) +declare @llvm.masked.gather.nxv2i16(, i32, , ) +declare @llvm.masked.gather.nxv2i32(, i32, , ) +declare @llvm.masked.gather.nxv2i64(, i32, , ) +declare @llvm.masked.gather.nxv2f16(, i32, , ) +declare @llvm.masked.gather.nxv2bf16(, i32, , ) +declare @llvm.masked.gather.nxv2f32(, i32, , ) +declare @llvm.masked.gather.nxv2f64(, i32, , ) +attributes #0 = { "target-features"="+sve,+bf16" } Index: llvm/test/CodeGen/AArch64/sve-masked-gather.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-masked-gather.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @masked_gather_nxv2i8( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i16( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i32( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) + %vals.zext = zext %vals to + ret %vals.zext +} + +define @masked_gather_nxv2i64( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i64( %ptrs, i32 8, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f16( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2f16( %ptrs, i32 2, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2bf16( %ptrs, %mask) #0 { +; CHECK-LABEL: masked_gather_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2bf16( %ptrs, i32 2, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f32( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2f32( %ptrs, i32 4, %mask, undef) + ret %vals +} + +define @masked_gather_nxv2f64( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2f64( %ptrs, i32 8, %mask, undef) + ret %vals +} + +define @masked_sgather_nxv2i8( %ptrs, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i8( %ptrs, i32 1, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +define @masked_sgather_nxv2i16( %ptrs, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i16( %ptrs, i32 2, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +define @masked_sgather_nxv2i32( %ptrs, %mask) { +; CHECK-LABEL: masked_sgather_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, undef) + %vals.sext = sext %vals to + ret %vals.sext +} + +declare @llvm.masked.gather.nxv2i8(, i32, , ) +declare @llvm.masked.gather.nxv2i16(, i32, , ) +declare @llvm.masked.gather.nxv2i32(, i32, , ) +declare @llvm.masked.gather.nxv2i64(, i32, , ) +declare @llvm.masked.gather.nxv2f16(, i32, , ) +declare @llvm.masked.gather.nxv2bf16(, i32, , ) +declare @llvm.masked.gather.nxv2f32(, i32, , ) +declare @llvm.masked.gather.nxv2f64(, i32, , ) +attributes #0 = { "target-features"="+sve,+bf16" } Index: llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-imm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-imm.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define void @masked_scatter_nxv2i8( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z0.d }, p0, [z1.d, #1] +; CHECK-NEXT: ret + %ptrs = getelementptr i8, %bases, i32 1 + call void @llvm.masked.scatter.nxv2i8( %data, %ptrs, i32 1, %mask) + ret void +} + +define void @masked_scatter_nxv2i16( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #2] +; CHECK-NEXT: ret + %ptrs = getelementptr i16, %bases, i32 1 + call void @llvm.masked.scatter.nxv2i16( %data, %ptrs, i32 2, %mask) + ret void +} + +define void @masked_scatter_nxv2i32( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #4] +; CHECK-NEXT: ret + %ptrs = getelementptr i32, %bases, i32 1 + call void @llvm.masked.scatter.nxv2i32( %data, %ptrs, i32 4, %mask) + ret void +} + +define void @masked_scatter_nxv2i64( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d, #8] +; CHECK-NEXT: ret + %ptrs = getelementptr i64, %bases, i32 1 + call void @llvm.masked.scatter.nxv2i64( %data, %ptrs, i32 8, %mask) + ret void +} + +define void @masked_scatter_nxv2f16( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #4] +; CHECK-NEXT: ret + %ptrs = getelementptr half, %bases, i32 2 + call void @llvm.masked.scatter.nxv2f16( %data, %ptrs, i32 2, %mask) + ret void +} + +define void @masked_scatter_nxv2bf16( %data, %bases, %mask) #0 { +; CHECK-LABEL: masked_scatter_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #4] +; CHECK-NEXT: ret + %ptrs = getelementptr bfloat, %bases, i32 2 + call void @llvm.masked.scatter.nxv2bf16( %data, %ptrs, i32 2, %mask) + ret void +} + +define void @masked_scatter_nxv2f32( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #12] +; CHECK-NEXT: ret + %ptrs = getelementptr float, %bases, i32 3 + call void @llvm.masked.scatter.nxv2f32( %data, %ptrs, i32 4, %mask) + ret void +} + +define void @masked_scatter_nxv2f64( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d, #32] +; CHECK-NEXT: ret + %ptrs = getelementptr double, %bases, i32 4 + call void @llvm.masked.scatter.nxv2f64( %data, %ptrs, i32 8, %mask) + ret void +} + +; Test where the immediate is out of range + +define void @masked_scatter_nxv2i8_range( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i8_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z1.d, z1.d, #32 // =0x20 +; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %ptrs = getelementptr i8, %bases, i32 32 + call void @llvm.masked.scatter.nxv2i8( %data, %ptrs, i32 1, %mask) + ret void +} + +define void @masked_scatter_nxv2i16_range( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i16_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z1.d, z1.d, #64 // =0x40 +; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %ptrs = getelementptr i16, %bases, i32 32 + call void @llvm.masked.scatter.nxv2i16( %data, %ptrs, i32 2, %mask) + ret void +} + +define void @masked_scatter_nxv2i32_range( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i32_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z1.d, z1.d, #128 // =0x80 +; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %ptrs = getelementptr i32, %bases, i32 32 + call void @llvm.masked.scatter.nxv2i32( %data, %ptrs, i32 1, %mask) + ret void +} + +define void @masked_scatter_nxv2f64_range( %data, %bases, %mask) { +; CHECK-LABEL: masked_scatter_nxv2f64_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: add z1.d, z1.d, #256 // =0x100 +; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %ptrs = getelementptr double, %bases, i32 32 + call void @llvm.masked.scatter.nxv2f64( %data, %ptrs, i32 8, %mask) + ret void +} + +declare void @llvm.masked.scatter.nxv2i8(, , i32, ) +declare void @llvm.masked.scatter.nxv2i16(, , i32, ) +declare void @llvm.masked.scatter.nxv2i32(, , i32, ) +declare void @llvm.masked.scatter.nxv2i64(, , i32, ) +declare void @llvm.masked.scatter.nxv2f16(, , i32, ) +declare void @llvm.masked.scatter.nxv2bf16(, , i32, ) +declare void @llvm.masked.scatter.nxv2f32(, , i32, ) +declare void @llvm.masked.scatter.nxv2f64(, , i32, ) +attributes #0 = { "target-features"="+sve,+bf16" } Index: llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-reg.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-reg.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define void @masked_scatter_nxv2i8( %data, %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %ptrs = getelementptr i8, %bases, i64 %offset + call void @llvm.masked.scatter.nxv2i8( %data, %ptrs, i32 1, %mask) + ret void +} + +define void @masked_scatter_nxv2i16( %data, %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + call void @llvm.masked.scatter.nxv2i16( %data, %ptrs, i32 2, %mask) + ret void +} + +define void @masked_scatter_nxv2i32( %data, %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + call void @llvm.masked.scatter.nxv2i32( %data, %ptrs, i32 4, %mask) + ret void +} + +define void @masked_scatter_nxv2i64( %data, %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_scatter_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + call void @llvm.masked.scatter.nxv2i64( %data, %ptrs, i32 8, %mask) + ret void +} + +define void @masked_scatter_nxv2f16( %data, %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_scatter_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + call void @llvm.masked.scatter.nxv2f16( %data, %ptrs, i32 2, %mask) + ret void +} + +define void @masked_scatter_nxv2bf16( %data, %bases, i64 %offset, %mask) #0 { +; CHECK-LABEL: masked_scatter_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + call void @llvm.masked.scatter.nxv2bf16( %data, %ptrs, i32 2, %mask) + ret void +} + +define void @masked_scatter_nxv2f32( %data, %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_scatter_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + call void @llvm.masked.scatter.nxv2f32( %data, %ptrs, i32 4, %mask) + ret void +} + +define void @masked_scatter_nxv2f64( %data, %bases, i64 %offset, %mask) { +; CHECK-LABEL: masked_scatter_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %byte_ptrs = getelementptr i8, %bases, i64 %offset + %ptrs = bitcast %byte_ptrs to + call void @llvm.masked.scatter.nxv2f64( %data, %ptrs, i32 8, %mask) + ret void +} + +declare void @llvm.masked.scatter.nxv2i8(, , i32, ) +declare void @llvm.masked.scatter.nxv2i16(, , i32, ) +declare void @llvm.masked.scatter.nxv2i32(, , i32, ) +declare void @llvm.masked.scatter.nxv2i64(, , i32, ) +declare void @llvm.masked.scatter.nxv2f16(, , i32, ) +declare void @llvm.masked.scatter.nxv2bf16(, , i32, ) +declare void @llvm.masked.scatter.nxv2f32(, , i32, ) +declare void @llvm.masked.scatter.nxv2f64(, , i32, ) +attributes #0 = { "target-features"="+sve,+bf16" } Index: llvm/test/CodeGen/AArch64/sve-masked-scatter.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-masked-scatter.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define void @masked_scatter_nxv2i8( %data, %ptrs, %masks) nounwind { +; CHECK-LABEL: masked_scatter_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: st1b { z0.d }, p0, [z1.d] +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv2i8( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_nxv2i16( %data, %ptrs, %masks) nounwind { +; CHECK-LABEL: masked_scatter_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [z1.d] +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv2i16( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_nxv2i32( %data, %ptrs, %masks) nounwind { +; CHECK-LABEL: masked_scatter_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.d }, p0, [z1.d] +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv2i32( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_nxv2i64( %data, %ptrs, %masks) nounwind { +; CHECK-LABEL: masked_scatter_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv2i64( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_nxv2f16( %data, %ptrs, %masks) nounwind { +; CHECK-LABEL: masked_scatter_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [z1.d] +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv2f16( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_nxv2bf16( %data, %ptrs, %masks) nounwind #0 { +; CHECK-LABEL: masked_scatter_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: st1h { z0.d }, p0, [z1.d] +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv2bf16( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_nxv2f32( %data, %ptrs, %masks) nounwind { +; CHECK-LABEL: masked_scatter_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.d }, p0, [z1.d] +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv2f32( %data, %ptrs, i32 0, %masks) + ret void +} + +define void @masked_scatter_nxv2f64( %data, %ptrs, %masks) nounwind { +; CHECK-LABEL: masked_scatter_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv2f64( %data, %ptrs, i32 0, %masks) + ret void +} + +declare void @llvm.masked.scatter.nxv2f16(, , i32, ) +declare void @llvm.masked.scatter.nxv2bf16(, , i32, ) +declare void @llvm.masked.scatter.nxv2f32(, , i32, ) +declare void @llvm.masked.scatter.nxv2f64(, , i32, ) +declare void @llvm.masked.scatter.nxv2i16(, , i32, ) +declare void @llvm.masked.scatter.nxv2i32(, , i32, ) +declare void @llvm.masked.scatter.nxv2i64(, , i32, ) +declare void @llvm.masked.scatter.nxv2i8(, , i32, ) +attributes #0 = { "target-features"="+sve,+bf16" }