diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -742,16 +742,16 @@ Variables and aliases can have a :ref:`Thread Local Storage Model `. -:ref:`Scalable vectors ` cannot be global variables or members of -arrays because their size is unknown at compile time. They are allowed in -structs to facilitate intrinsics returning multiple values. Generally, structs -containing scalable vectors are not considered "sized" and cannot be used in -loads, stores, allocas, or GEPs. The only exception to this rule is for structs -that contain scalable vectors of the same type (e.g. ``{, -}`` contains the same type while ``{, -}`` doesn't). These kinds of structs (we may call them -homogeneous scalable vector structs) are considered sized and can be used in -loads, stores, allocas, but not GEPs. +Globals cannot be or contain :ref:`Scalable vectors ` because their +size is unknown at compile time. They are allowed in structs to facilitate +intrinsics returning multiple values. Generally, structs containing scalable +vectors are not considered "sized" and cannot be used in loads, stores, allocas, +or GEPs. The only exception to this rule is for structs that contain scalable +vectors of the same type (e.g. ``{, }`` +contains the same type while ``{, }`` +doesn't). These kinds of structs (we may call them homogeneous scalable vector +structs) are considered sized and can be used in loads, stores, allocas, but +not GEPs. Syntax:: diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -209,8 +209,7 @@ /// Return true if this is a target extension type with a scalable layout. bool isScalableTargetExtTy() const; - /// Return true if this is a scalable vector type or a target extension type - /// with a scalable layout. + /// Return true if this is a type whose size is a known multiple of vscale. bool isScalableTy() const; /// Return true if this is a FP type or a vector of FP. diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4934,7 +4934,7 @@ return UndefValue::get(GEPTy); bool IsScalableVec = - isa(SrcTy) || any_of(Indices, [](const Value *V) { + SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) { return isa(V->getType()); }); diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -127,9 +127,7 @@ auto end = generic_gep_type_iterator::end(Index.end()); for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) { // Scalable vectors are multiplied by a runtime constant. - bool ScalableType = false; - if (isa(GTI.getIndexedType())) - ScalableType = true; + bool ScalableType = GTI.getIndexedType()->isScalableTy(); Value *V = GTI.getOperand(); StructType *STy = GTI.getStructTypeOrNull(); @@ -189,7 +187,7 @@ for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this); GTI != GTE; ++GTI) { // Scalable vectors are multiplied by a runtime constant. - bool ScalableType = isa(GTI.getIndexedType()); + bool ScalableType = GTI.getIndexedType()->isScalableTy(); Value *V = GTI.getOperand(); StructType *STy = GTI.getStructTypeOrNull(); diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -58,6 +58,8 @@ } bool Type::isScalableTy() const { + if (const auto *ATy = dyn_cast(this)) + return ATy->getElementType()->isScalableTy(); if (const auto *STy = dyn_cast(this)) { SmallPtrSet Visited; return STy->containsScalableVectorType(&Visited); @@ -658,8 +660,7 @@ bool ArrayType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() && - !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() && - !isa(ElemTy); + !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy(); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -850,17 +850,9 @@ } // Scalable vectors cannot be global variables, since we don't know - // the runtime size. If the global is an array containing scalable vectors, - // that will be caught by the isValidElementType methods in StructType or - // ArrayType instead. - Check(!isa(GV.getValueType()), - "Globals cannot contain scalable vectors", &GV); - - if (auto *STy = dyn_cast(GV.getValueType())) { - SmallPtrSet Visited; - Check(!STy->containsScalableVectorType(&Visited), - "Globals cannot contain scalable vectors", &GV); - } + // the runtime size. + Check(!GV.getValueType()->isScalableTy(), + "Globals cannot contain scalable types", &GV); // Check if it's a target extension type that disallows being used as a // global. diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -390,7 +390,7 @@ } // Scalable types not currently supported. - if (isa(Ty)) + if (Ty->isScalableTy()) return false; auto IsStored = [](Value *V, Constant *Initializer) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -804,7 +804,7 @@ return nullptr; const DataLayout &DL = IC.getDataLayout(); - auto EltSize = DL.getTypeAllocSize(ET); + TypeSize EltSize = DL.getTypeAllocSize(ET); const auto Align = LI.getAlign(); auto *Addr = LI.getPointerOperand(); @@ -812,7 +812,7 @@ auto *Zero = ConstantInt::get(IdxType, 0); Value *V = PoisonValue::get(T); - uint64_t Offset = 0; + TypeSize Offset = TypeSize::get(0, ET->isScalableTy()); for (uint64_t i = 0; i < NumElements; i++) { Value *Indices[2] = { Zero, @@ -820,9 +820,9 @@ }; auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), Name + ".elt"); + auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue()); auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr, - commonAlignment(Align, Offset), - Name + ".unpack"); + EltAlign, Name + ".unpack"); L->setAAMetadata(LI.getAAMetadata()); V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; @@ -957,7 +957,7 @@ Type *SourceElementType = GEPI->getSourceElementType(); // Size information about scalable vectors is not available, so we cannot // deduce whether indexing at n is undefined behaviour or not. Bail out. - if (isa(SourceElementType)) + if (SourceElementType->isScalableTy()) return false; Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops); @@ -1323,7 +1323,7 @@ return false; const DataLayout &DL = IC.getDataLayout(); - auto EltSize = DL.getTypeAllocSize(AT->getElementType()); + TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType()); const auto Align = SI.getAlign(); SmallString<16> EltName = V->getName(); @@ -1335,7 +1335,7 @@ auto *IdxType = Type::getInt64Ty(T->getContext()); auto *Zero = ConstantInt::get(IdxType, 0); - uint64_t Offset = 0; + TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy()); for (uint64_t i = 0; i < NumElements; i++) { Value *Indices[2] = { Zero, @@ -1344,7 +1344,7 @@ auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName); auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); - auto EltAlign = commonAlignment(Align, Offset); + auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue()); Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); NS->setAAMetadata(SI.getAAMetadata()); Offset += EltSize; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2005,7 +2005,7 @@ APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0); if (NumVarIndices != Src->getNumIndices()) { // FIXME: getIndexedOffsetInType() does not handled scalable vectors. - if (isa(BaseType)) + if (BaseType->isScalableTy()) return nullptr; SmallVector ConstantIndices; @@ -2118,7 +2118,7 @@ SmallVector Indices(GEP.indices()); Type *GEPType = GEP.getType(); Type *GEPEltType = GEP.getSourceElementType(); - bool IsGEPSrcEleScalable = isa(GEPEltType); + bool IsGEPSrcEleScalable = GEPEltType->isScalableTy(); if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(), SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -830,7 +830,7 @@ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (GTI.isSequential()) { // Constant offsets of scalable types are not really constant. - if (isa(GTI.getIndexedType())) + if (GTI.getIndexedType()->isScalableTy()) continue; // Tries to extract a constant offset from this GEP index. @@ -1019,7 +1019,7 @@ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (GTI.isSequential()) { // Constant offsets of scalable types are not really constant. - if (isa(GTI.getIndexedType())) + if (GTI.getIndexedType()->isScalableTy()) continue; // Splits this GEP index into a variadic part and a constant offset, and diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +%my_subtype = type +%my_type = type [3 x %my_subtype] + +define void @array_1D(ptr %addr) #0 { +; CHECK-LABEL: array_1D: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: st1d { z2.d }, p0, [sp] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %ret = alloca %my_type, align 8 + %val = load %my_type, ptr %addr + store %my_type %val, ptr %ret, align 8 + ret void +} + +define %my_subtype @array_1D_extract(ptr %addr) #0 { +; CHECK-LABEL: array_1D_extract: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %ret = alloca %my_type, align 8 + %val = load %my_type, ptr %addr + %elt = extractvalue %my_type %val, 1 + ret %my_subtype %elt +} + +define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 { +; CHECK-LABEL: array_1D_insert: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [sp] +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %ret = alloca %my_type, align 8 + %val = load %my_type, ptr %addr + %ins = insertvalue %my_type %val, %my_subtype %elt, 1 + store %my_type %ins, ptr %ret, align 8 + ret void +} + +define void @array_2D(ptr %addr) #0 { +; CHECK-LABEL: array_2D: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-6 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl] +; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #5, mul vl] +; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0] +; CHECK-NEXT: st1d { z5.d }, p0, [sp] +; CHECK-NEXT: st1d { z4.d }, p0, [sp, #5, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #6 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %ret = alloca [2 x %my_type], align 8 + %val = load [2 x %my_type], ptr %addr + store [2 x %my_type] %val, ptr %ret, align 8 + ret void +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs --riscv-no-aliases < %s | FileCheck %s + +target triple = "riscv64-unknown-unknown-elf" + +%my_type = type [3 x ] + +define void @test(ptr %addr) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrrs a1, vlenb, zero +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrrs a1, vlenb, zero +; CHECK-NEXT: add a2, a0, a1 +; CHECK-NEXT: vl1re64.v v8, (a2) +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: vl1re64.v v9, (a0) +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: vl1re64.v v10, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v9, (a0) +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: vs1r.v v10, (a2) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: csrrs a0, vlenb, zero +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %ret = alloca %my_type, align 8 + %val = load %my_type, ptr %addr + store %my_type %val, ptr %ret, align 8 + ret void +} diff --git a/llvm/test/Other/scalable-vector-array.ll b/llvm/test/Other/scalable-vector-array.ll deleted file mode 100644 --- a/llvm/test/Other/scalable-vector-array.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s - -;; Arrays cannot contain scalable vectors; make sure we detect them even -;; when nested inside other aggregates. - -%ty = type { i64, [4 x ] } -; CHECK: error: invalid array element type -; CHECK: %ty = type { i64, [4 x ] } diff --git a/llvm/test/Transforms/GVN/opaque-ptr.ll b/llvm/test/Transforms/GVN/opaque-ptr.ll --- a/llvm/test/Transforms/GVN/opaque-ptr.ll +++ b/llvm/test/Transforms/GVN/opaque-ptr.ll @@ -52,6 +52,12 @@ ; CHECK-NEXT: call void @use(ptr [[GEP5]]) ; CHECK-NEXT: call void @use(ptr [[GEP5_SAME]]) ; CHECK-NEXT: call void @use(ptr [[GEP5_DIFFERENT]]) +; CHECK-NEXT: [[GEP6:%.*]] = getelementptr [4 x ], ptr [[P]], i64 [[IDX]], i64 1 +; CHECK-NEXT: [[GEP6_SAME:%.*]] = getelementptr [4 x ], ptr [[P]], i64 [[IDX]], i64 1 +; CHECK-NEXT: [[GEP6_DIFFERENT:%.*]] = getelementptr [4 x ], ptr [[P]], i64 [[IDX2]], i64 1 +; CHECK-NEXT: call void @use(ptr [[GEP6]]) +; CHECK-NEXT: call void @use(ptr [[GEP6_SAME]]) +; CHECK-NEXT: call void @use(ptr [[GEP6_DIFFERENT]]) ; CHECK-NEXT: ret void ; %gep1 = getelementptr i64, ptr %p, i64 1 @@ -89,6 +95,12 @@ call void @use(ptr %gep5) call void @use(ptr %gep5.same) call void @use(ptr %gep5.different) + %gep6 = getelementptr [4 x ], ptr %p, i64 %idx, i64 1 + %gep6.same = getelementptr [4 x ], ptr %p, i64 %idx, i64 1 + %gep6.different = getelementptr [4 x ], ptr %p, i64 %idx2, i64 1 + call void @use(ptr %gep6) + call void @use(ptr %gep6.same) + call void @use(ptr %gep6.different) ret void } diff --git a/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll b/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll @@ -0,0 +1,15 @@ +; RUN: opt -passes=globalopt < %s + +; Ensure we don't ICE by trying to optimize a scalable vector load of a global +; variable. + +%struct.xxx = type <{ [96 x i8] }> + +@.bss = internal unnamed_addr global %struct.xxx zeroinitializer, align 32 + +define dso_local void @foo() local_unnamed_addr align 16 { +L.entry: + store [4 x ] zeroinitializer, ptr @.bss, align 1 + %0 = load [4 x ], ptr @.bss, align 8 + unreachable +} diff --git a/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll b/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll --- a/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll +++ b/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll @@ -18,3 +18,10 @@ call void @do_something( %tmp) ret void } + +define void @can_replace_gep_idx_with_zero_typesize_2(i64 %n, ptr %a, i64 %b) { + %idx = getelementptr [2 x ], ptr %a, i64 %b, i64 0 + %tmp = load , ptr %idx + call void @do_something( %tmp) + ret void +} diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll --- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -298,6 +298,17 @@ ret ptr %a3 } +define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) { +; CHECK-LABEL: @geps_combinable_scalable_vector_array( +; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [4 x ], ptr [[A:%.*]], i64 1 +; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4 +; CHECK-NEXT: ret ptr [[A3]] +; + %a2 = getelementptr inbounds [4 x ], ptr %a, i64 1 + %a3 = getelementptr inbounds i8, ptr %a2, i32 4 + ret ptr %a3 +} + define i1 @compare_geps_same_indices(ptr %a, ptr %b, i64 %idx) { ; CHECK-LABEL: @compare_geps_same_indices( ; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[A:%.*]], [[B:%.*]] diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-array.ll b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +define @load(ptr %x) { +; CHECK-LABEL: define @load +; CHECK-SAME: (ptr [[X:%.*]]) { +; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds [2 x ], ptr [[X]], i64 0, i64 1 +; CHECK-NEXT: [[A_UNPACK2:%.*]] = load , ptr [[A_ELT1]], align 16 +; CHECK-NEXT: ret [[A_UNPACK2]] +; + %a = load [2 x ], ptr %x + %b = extractvalue [2 x ] %a, 1 + ret %b +} + +define void @store(ptr %x, %y, %z) { +; CHECK-LABEL: define void @store +; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { +; CHECK-NEXT: store [[Y]], ptr [[X]], align 16 +; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds [2 x ], ptr [[X]], i64 0, i64 1 +; CHECK-NEXT: store [[Z]], ptr [[X_REPACK1]], align 16 +; CHECK-NEXT: ret void +; + %a = insertvalue [2 x ] poison, %y, 0 + %b = insertvalue [2 x ] %a, %z, 1 + store [2 x ] %b, ptr %x + ret void +} diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll --- a/llvm/test/Transforms/InstSimplify/gep.ll +++ b/llvm/test/Transforms/InstSimplify/gep.ll @@ -358,3 +358,12 @@ %res = getelementptr inbounds %t.3, ptr %ptr, i64 0, i32 1, <8 x i64> poison, i32 1 ret <8 x ptr> %res } + +define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) { + %c1 = getelementptr inbounds [8 x ], ptr %ptr, i64 4 + %c2 = getelementptr inbounds [8 x ], ptr %ptr, i64 6 + %c1.int = ptrtoint ptr %c1 to i64 + %c2.int = ptrtoint ptr %c2 to i64 + %diff = sub i64 %c2.int, %c1.int + ret i64 %diff +} diff --git a/llvm/test/Transforms/SROA/scalable-vector-array.ll b/llvm/test/Transforms/SROA/scalable-vector-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vector-array.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes='sroa' -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s + +; This test checks that SROA runs mem2reg on arrays of scalable vectors. + +define [ 2 x ] @alloca( %x, %y) { +; CHECK-LABEL: define [2 x ] @alloca +; CHECK-SAME: ( [[X:%.*]], [[Y:%.*]]) { +; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x ] poison, [[X]], 0 +; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x ] [[AGG0]], [[Y]], 1 +; CHECK-NEXT: ret [2 x ] [[AGG1]] +; + %addr = alloca [ 2 x ], align 4 + %agg0 = insertvalue [ 2 x ] poison, %x, 0 + %agg1 = insertvalue [ 2 x ] %agg0, %y, 1 + store [ 2 x ] %agg1, ptr %addr, align 4 + %val = load [ 2 x ], ptr %addr, align 4 + ret [ 2 x ] %val +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll @@ -28,4 +28,41 @@ ret ptr %gep } +; Index is implicitly multiplied by vscale and so not really constant. +define ptr @test3(ptr %base, i64 %idx) #0 { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [8 x ], ptr [[BASE:%.*]], i64 [[IDX_NEXT]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %idx.next = add nuw nsw i64 %idx, 1 + %gep = getelementptr [8 x ], ptr %base, i64 %idx.next + ret ptr %gep +} + +; Indices are implicitly multiplied by vscale and so not really constant. +define ptr @test4(ptr %base, i64 %idx) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [8 x ], ptr [[BASE:%.*]], i64 3, i64 [[IDX_NEXT]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %idx.next = add nuw nsw i64 %idx, 1 + %gep = getelementptr [8 x ], ptr %base, i64 3, i64 %idx.next + ret ptr %gep +} + +; Whilst the first two indices are not constant, the calculation of the third +; index does contain a constant that can be extracted. +define ptr @test5(ptr %base, i64 %idx) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8 x ], ptr [[BASE:%.*]], i64 1, i64 3, i64 [[IDX:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1 +; CHECK-NEXT: ret ptr [[GEP2]] +; + %idx.next = add nuw nsw i64 %idx, 1 + %gep = getelementptr [8 x ], ptr %base, i64 1, i64 3, i64 %idx.next + ret ptr %gep +} + attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Verifier/scalable-global-vars.ll b/llvm/test/Verifier/scalable-global-vars.ll --- a/llvm/test/Verifier/scalable-global-vars.ll +++ b/llvm/test/Verifier/scalable-global-vars.ll @@ -3,14 +3,15 @@ ;; Global variables cannot be scalable vectors, since we don't ;; know the size at compile time. -; CHECK: Globals cannot contain scalable vectors +; CHECK: Globals cannot contain scalable types ; CHECK-NEXT: ptr @ScalableVecGlobal @ScalableVecGlobal = global zeroinitializer -; CHECK-NEXT: Globals cannot contain scalable vectors +; CHECK-NEXT: Globals cannot contain scalable types +; CHECK-NEXT: ptr @ScalableVecArrayGlobal +@ScalableVecArrayGlobal = global [ 8 x ] zeroinitializer + +; CHECK-NEXT: Globals cannot contain scalable types ; CHECK-NEXT: ptr @ScalableVecStructGlobal @ScalableVecStructGlobal = global { i32, } zeroinitializer -;; Global _pointers_ to scalable vectors are fine -; CHECK-NOT: Globals cannot contain scalable vectors -@ScalableVecPtr = global ptr zeroinitializer