diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -742,16 +742,16 @@ Variables and aliases can have a :ref:`Thread Local Storage Model `. -:ref:`Scalable vectors ` cannot be global variables or members of -arrays because their size is unknown at compile time. They are allowed in -structs to facilitate intrinsics returning multiple values. Generally, structs -containing scalable vectors are not considered "sized" and cannot be used in -loads, stores, allocas, or GEPs. The only exception to this rule is for structs -that contain scalable vectors of the same type (e.g. ``{, -}`` contains the same type while ``{, -}`` doesn't). These kinds of structs (we may call them -homogeneous scalable vector structs) are considered sized and can be used in -loads, stores, allocas, but not GEPs. +Globals cannot be or contain :ref:`Scalable vectors ` because their +size is unknown at compile time. They are allowed in structs to facilitate +intrinsics returning multiple values. Generally, structs containing scalable +vectors are not considered "sized" and cannot be used in loads, stores, allocas, +or GEPs. The only exception to this rule is for structs that contain scalable +vectors of the same type (e.g. ``{, }`` +contains the same type while ``{, }`` +doesn't). These kinds of structs (we may call them homogeneous scalable vector +structs) are considered sized and can be used in loads, stores, allocas, but +not GEPs. Syntax:: diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4931,7 +4931,7 @@ return UndefValue::get(GEPTy); bool IsScalableVec = - isa(SrcTy) || any_of(Indices, [](const Value *V) { + SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) { return isa(V->getType()); }); diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -58,6 +58,8 @@ } bool Type::isScalableTy() const { + if (const auto *ATy = dyn_cast(this)) + return ATy->getElementType()->isScalableTy(); if (const auto *STy = dyn_cast(this)) { SmallPtrSet Visited; return STy->containsScalableVectorType(&Visited); @@ -658,8 +660,7 @@ bool ArrayType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() && - !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() && - !isa(ElemTy); + !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy(); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -850,17 +850,9 @@ } // Scalable vectors cannot be global variables, since we don't know - // the runtime size. If the global is an array containing scalable vectors, - // that will be caught by the isValidElementType methods in StructType or - // ArrayType instead. - Check(!isa(GV.getValueType()), - "Globals cannot contain scalable vectors", &GV); - - if (auto *STy = dyn_cast(GV.getValueType())) { - SmallPtrSet Visited; - Check(!STy->containsScalableVectorType(&Visited), - "Globals cannot contain scalable vectors", &GV); - } + // the runtime size. + Check(!GV.getValueType()->isScalableTy(), + "Globals cannot contain scalable types", &GV); // Check if it's a target extension type that disallows being used as a // global. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -804,7 +804,7 @@ return nullptr; const DataLayout &DL = IC.getDataLayout(); - auto EltSize = DL.getTypeAllocSize(ET); + TypeSize EltSize = DL.getTypeAllocSize(ET); const auto Align = LI.getAlign(); auto *Addr = LI.getPointerOperand(); @@ -812,7 +812,7 @@ auto *Zero = ConstantInt::get(IdxType, 0); Value *V = PoisonValue::get(T); - uint64_t Offset = 0; + TypeSize Offset = TypeSize::get(0, ET->isScalableTy()); for (uint64_t i = 0; i < NumElements; i++) { Value *Indices[2] = { Zero, @@ -820,9 +820,9 @@ }; auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), Name + ".elt"); + auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue()); auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr, - commonAlignment(Align, Offset), - Name + ".unpack"); + EltAlign, Name + ".unpack"); L->setAAMetadata(LI.getAAMetadata()); V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; @@ -1323,7 +1323,7 @@ return false; const DataLayout &DL = IC.getDataLayout(); - auto EltSize = DL.getTypeAllocSize(AT->getElementType()); + TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType()); const auto Align = SI.getAlign(); SmallString<16> EltName = V->getName(); @@ -1335,7 +1335,7 @@ auto *IdxType = Type::getInt64Ty(T->getContext()); auto *Zero = ConstantInt::get(IdxType, 0); - uint64_t Offset = 0; + TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy()); for (uint64_t i = 0; i < NumElements; i++) { Value *Indices[2] = { Zero, @@ -1344,7 +1344,7 @@ auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName); auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); - auto EltAlign = commonAlignment(Align, Offset); + auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue()); Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); NS->setAAMetadata(SI.getAAMetadata()); Offset += EltSize; diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +%my_type = type [ 3 x ] + +define void @test(ptr %addr) #0 { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: st1d { z2.d }, p0, [sp] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %ret = alloca %my_type, align 8 + %val = load %my_type, ptr %addr + store %my_type %val, ptr %ret, align 8 + ret void +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs --riscv-no-aliases < %s | FileCheck %s + +target triple = "riscv64-unknown-unknown-elf" + +%my_type = type [ 3 x ] + +define void @test(ptr %addr) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrrs a1, vlenb, zero +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrrs a1, vlenb, zero +; CHECK-NEXT: add a2, a0, a1 +; CHECK-NEXT: vl1re64.v v8, (a2) +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: vl1re64.v v9, (a0) +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: vl1re64.v v10, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v9, (a0) +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: vs1r.v v10, (a2) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: csrrs a0, vlenb, zero +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %ret = alloca %my_type, align 8 + %val = load %my_type, ptr %addr + store %my_type %val, ptr %ret, align 8 + ret void +} diff --git a/llvm/test/Other/scalable-vector-array.ll b/llvm/test/Other/scalable-vector-array.ll deleted file mode 100644 --- a/llvm/test/Other/scalable-vector-array.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s - -;; Arrays cannot contain scalable vectors; make sure we detect them even -;; when nested inside other aggregates. - -%ty = type { i64, [4 x ] } -; CHECK: error: invalid array element type -; CHECK: %ty = type { i64, [4 x ] } diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-array.ll b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +define @load(ptr %x) { +; CHECK-LABEL: define @load +; CHECK-SAME: (ptr [[X:%.*]]) { +; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds [2 x ], ptr [[X]], i64 0, i64 1 +; CHECK-NEXT: [[A_UNPACK2:%.*]] = load , ptr [[A_ELT1]], align 16 +; CHECK-NEXT: ret [[A_UNPACK2]] +; + %a = load [2 x ], ptr %x + %b = extractvalue [2 x ] %a, 1 + ret %b +} + +define void @store(ptr %x, %y, %z) { +; CHECK-LABEL: define void @store +; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { +; CHECK-NEXT: store [[Y]], ptr [[X]], align 16 +; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds [2 x ], ptr [[X]], i64 0, i64 1 +; CHECK-NEXT: store [[Z]], ptr [[X_REPACK1]], align 16 +; CHECK-NEXT: ret void +; + %a = insertvalue [2 x ] poison, %y, 0 + %b = insertvalue [2 x ] %a, %z, 1 + store [2 x ] %b, ptr %x + ret void +} diff --git a/llvm/test/Transforms/SROA/scalable-vector-array.ll b/llvm/test/Transforms/SROA/scalable-vector-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vector-array.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes='sroa' -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s + +; This test checks that SROA runs mem2reg on arrays of scalable vectors. + +define [ 2 x ] @alloca( %x, %y) { +; CHECK-LABEL: define [2 x ] @alloca +; CHECK-SAME: ( [[X:%.*]], [[Y:%.*]]) { +; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x ] poison, [[X]], 0 +; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x ] [[AGG0]], [[Y]], 1 +; CHECK-NEXT: ret [2 x ] [[AGG1]] +; + %addr = alloca [ 2 x ], align 4 + %agg0 = insertvalue [ 2 x ] poison, %x, 0 + %agg1 = insertvalue [ 2 x ] %agg0, %y, 1 + store [ 2 x ] %agg1, ptr %addr, align 4 + %val = load [ 2 x ], ptr %addr, align 4 + ret [ 2 x ] %val +} diff --git a/llvm/test/Verifier/scalable-global-vars.ll b/llvm/test/Verifier/scalable-global-vars.ll --- a/llvm/test/Verifier/scalable-global-vars.ll +++ b/llvm/test/Verifier/scalable-global-vars.ll @@ -3,14 +3,15 @@ ;; Global variables cannot be scalable vectors, since we don't ;; know the size at compile time. -; CHECK: Globals cannot contain scalable vectors +; CHECK: Globals cannot contain scalable types ; CHECK-NEXT: ptr @ScalableVecGlobal @ScalableVecGlobal = global zeroinitializer -; CHECK-NEXT: Globals cannot contain scalable vectors +; CHECK-NEXT: Globals cannot contain scalable types +; CHECK-NEXT: ptr @ScalableVecArrayGlobal +@ScalableVecArrayGlobal = global [ 8 x ] zeroinitializer + +; CHECK-NEXT: Globals cannot contain scalable types ; CHECK-NEXT: ptr @ScalableVecStructGlobal @ScalableVecStructGlobal = global { i32, } zeroinitializer -;; Global _pointers_ to scalable vectors are fine -; CHECK-NOT: Globals cannot contain scalable vectors -@ScalableVecPtr = global ptr zeroinitializer