diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -206,6 +206,9 @@ /// Return true if this is a target extension type. bool isTargetExtTy() const { return getTypeID() == TargetExtTyID; } + /// Return true if this is a target extension type with a scalable layout. + bool isScalableTargetExtTy() const; + /// Return true if this is a FP type or a vector of FP. bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -80,6 +80,12 @@ return APFloat::getZero(getFltSemantics()).isIEEE(); } +bool Type::isScalableTargetExtTy() const { + if (auto *TT = dyn_cast(this)) + return isa(TT->getLayoutType()); + return false; +} + Type *Type::getFloatingPointTy(LLVMContext &C, const fltSemantics &S) { Type *Ty; if (&S == &APFloat::IEEEhalf()) @@ -866,6 +872,13 @@ return TargetTypeInfo(Type::getInt8PtrTy(C, 0), TargetExtType::HasZeroInit, TargetExtType::CanBeGlobal); } + + // Opaque types in the AArch64 name space. + if (Name.startswith("aarch64.")) { + if (Name == "aarch64.svcount") + return TargetTypeInfo(ScalableVectorType::get(Type::getInt1Ty(C), 16)); + } + return TargetTypeInfo(Type::getVoidTy(C)); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1681,6 +1681,10 @@ return &SI; } + // The code below doesn't work on scalable target types. + if (TrueVal->getType()->isScalableTargetExtTy()) + return nullptr; + // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring // decomposeBitTestICmp() might help. { diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -888,7 +888,8 @@ if (!IsOffsetKnown) return PI.setAborted(&LI); - if (isa(LI.getType())) + if (isa(LI.getType()) || + LI.getType()->isScalableTargetExtTy()) return PI.setAborted(&LI); uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedValue(); @@ -902,7 +903,8 @@ if (!IsOffsetKnown) return PI.setAborted(&SI); - if (isa(ValOp->getType())) + if (isa(ValOp->getType()) || + ValOp->getType()->isScalableTargetExtTy()) return PI.setAborted(&SI); uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedValue(); @@ -4777,7 +4779,7 @@ // Skip alloca forms that this analysis can't handle. auto *AT = AI.getAllocatedType(); if (AI.isArrayAllocation() || !AT->isSized() || isa(AT) || - DL.getTypeAllocSize(AT).getFixedValue() == 0) + AT->isScalableTargetExtTy() || DL.getTypeAllocSize(AT).getFixedValue() == 0) return {Changed, CFGChanged}; // First, split any FCA loads and stores touching this alloca to promote @@ -4904,7 +4906,8 @@ for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); I != E; ++I) { if (AllocaInst *AI = dyn_cast(I)) { - if (isa(AI->getAllocatedType())) { + if (isa(AI->getAllocatedType()) || + AI->getAllocatedType()->isScalableTargetExtTy()) { if (isAllocaPromotable(AI)) PromotableAllocas.push_back(AI); } else { diff --git a/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll b/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='instcombine' -S < %s | FileCheck %s + +define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcount") %val) nounwind { +; CHECK-LABEL: @test_alloca_store_reload( +; CHECK-NEXT: ret target("aarch64.svcount") [[VAL:%.*]] +; + %ptr = alloca target("aarch64.svcount"), align 1 + store target("aarch64.svcount") %val, ptr %ptr + %res = load target("aarch64.svcount"), ptr %ptr + ret target("aarch64.svcount") %res +} + +; Test that instcombine doesn't try to query the (scalable) size of target("aarch64.svcount") +; in foldSelectInstWithICmp. +define target("aarch64.svcount") @test_combine_on_select(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i32 %k) { +; CHECK-LABEL: @test_combine_on_select( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[K:%.*]], 42 +; CHECK-NEXT: [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]] +; CHECK-NEXT: ret target("aarch64.svcount") [[X_Y]] +; + %cmp = icmp sgt i32 %k, 42 + %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y + ret target("aarch64.svcount") %x.y +} diff --git a/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll b/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll @@ -0,0 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='sroa' -S < %s | FileCheck %s + +define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcount") %val) nounwind { +; CHECK-LABEL: @test_alloca_store_reload( +; CHECK-NEXT: ret target("aarch64.svcount") [[VAL:%.*]] +; + %ptr = alloca target("aarch64.svcount"), align 1 + store target("aarch64.svcount") %val, ptr %ptr + %res = load target("aarch64.svcount"), ptr %ptr + ret target("aarch64.svcount") %res +}