diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -319,8 +319,11 @@ return Query.Types[0].isScalar() && Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; }) - .clampMaxNumElements(0, s32, 2) - .clampMaxNumElements(0, s64, 1) + // Maximum: sN * k = 128 + .clampMaxNumElements(0, s8, 16) + .clampMaxNumElements(0, s16, 8) + .clampMaxNumElements(0, s32, 4) + .clampMaxNumElements(0, s64, 2) .customIf(IsPtrVecPred); // Constants diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir @@ -19,18 +19,10 @@ ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK: [[FPEXT:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>) ; CHECK: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT]](<2 x s64>) - ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT1]](<2 x s64>) - ; CHECK: G_STORE [[UV2]](s64), [[COPY1]](p0) :: (store 8, align 32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: G_STORE [[FPEXT]](<2 x s64>), [[COPY1]](p0) :: (store 16, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: G_STORE [[UV3]](s64), [[PTR_ADD]](p0) :: (store 8 + 8) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: G_STORE [[UV4]](s64), [[PTR_ADD1]](p0) :: (store 8 + 16, align 16) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: G_STORE [[UV5]](s64), [[PTR_ADD2]](p0) :: (store 8 + 24) + ; CHECK: G_STORE [[FPEXT1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 + 16) ; CHECK: RET_ReallyLR %0:_(<4 x s32>) = COPY $q0 %1:_(p0) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir @@ -115,16 +115,12 @@ ; CHECK: [[FPTRUNC2:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY2]](<2 x s64>) ; CHECK: [[FPTRUNC3:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY3]](<2 x s64>) ; CHECK: [[COPY5:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: G_STORE [[FPTRUNC]](<2 x s32>), [[COPY5]](p0) :: (store 8, align 32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC]](<2 x s32>), [[FPTRUNC1]](<2 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC2]](<2 x s32>), [[FPTRUNC3]](<2 x s32>) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY5]](p0) :: (store 16, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C]](s64) - ; CHECK: G_STORE [[FPTRUNC1]](<2 x s32>), [[PTR_ADD]](p0) :: (store 8 + 8) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C1]](s64) - ; CHECK: G_STORE [[FPTRUNC2]](<2 x s32>), [[PTR_ADD1]](p0) :: (store 8 + 16, align 16) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C2]](s64) - ; CHECK: G_STORE [[FPTRUNC3]](<2 x s32>), [[PTR_ADD2]](p0) :: (store 8 + 24) + ; CHECK: G_STORE [[CONCAT_VECTORS1]](<4 x s32>), [[PTR_ADD]](p0) :: (store 16 + 16) ; CHECK: RET_ReallyLR %2:_(<2 x s64>) = COPY $q0 %3:_(<2 x s64>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -302,3 +302,96 @@ G_STORE %1(<8 x s8>), %0(p0) :: (store 8) RET_ReallyLR ... +--- +name: store_32xs8 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: store_32xs8 + ; CHECK: liveins: $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s8>), %ptr(p0) :: (store 16, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s8>), [[PTR_ADD]](p0) :: (store 16 + 16) + ; CHECK: RET_ReallyLR + %val:_(<32 x s8>) = G_IMPLICIT_DEF + %ptr:_(p0) = COPY $x0 + G_STORE %val(<32 x s8>), %ptr(p0) :: (store 32) + RET_ReallyLR +... +--- +name: store_16xs16 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: store_16xs16 + ; CHECK: liveins: $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s16>), %ptr(p0) :: (store 16, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s16>), [[PTR_ADD]](p0) :: (store 16 + 16) + ; CHECK: RET_ReallyLR + %val:_(<16 x s16>) = G_IMPLICIT_DEF + %ptr:_(p0) = COPY $x0 + G_STORE %val(<16 x s16>), %ptr(p0) :: (store 32) + RET_ReallyLR +... +--- +name: store_8xs32 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: store_8xs32 + ; CHECK: liveins: $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), %ptr(p0) :: (store 16, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p0) :: (store 16 + 16) + ; CHECK: RET_ReallyLR + %val:_(<8 x s32>) = G_IMPLICIT_DEF + %ptr:_(p0) = COPY $x0 + G_STORE %val(<8 x s32>), %ptr(p0) :: (store 32) + RET_ReallyLR +... +--- +name: store_4xs64 +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: store_4xs64 + ; CHECK: liveins: $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: %ptr:_(p0) = COPY $x0 + ; CHECK: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store 16, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) + ; CHECK: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 + 16) + ; CHECK: RET_ReallyLR + %val:_(<4 x s64>) = G_IMPLICIT_DEF + %ptr:_(p0) = COPY $x0 + G_STORE %val(<4 x s64>), %ptr(p0) :: (store 32) + RET_ReallyLR