Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -803,16 +803,19 @@ uint64_t Field = ConstIdx->getZExtValue(); BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); } else { - int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); - if (ConstIdx) { - BaseOffset += - ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; + TypeSize ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); + // TODO: Add support for base offsets made up of scalable types. + if (ConstIdx && !ElementSize.isScalable()) { + BaseOffset += ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * + ElementSize.getFixedSize(); } else { // Needs scale register. if (Scale != 0) // No addressing mode takes two scale registers. return TTI::TCC_Basic; - Scale = ElementSize; + // If this is a scalable type we are effectively still scaling by + // vscale and the target must be able to cope with that. + Scale = ElementSize.getKnownMinSize(); } } } Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -940,7 +940,19 @@ Loop *CurLoop) { Value *Addr = LI->getOperand(0); const DataLayout &DL = LI->getModule()->getDataLayout(); - const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); + const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); + + // It is not currently possible for clang to generate an invariant.start + // intrinsic with scalable vector types because we don't support thread local + // sizeless types and we don't permit sizeless types in structs or classes. + // Furthermore, even if support is added for this in future the intrinsic + // itself is defined to have a size of -1 for variable sized objects. This + // makes it impossible to verify if the intrinsic envelops our region of + // interest. For example, both and + // types would have a -1 parameter, but the former is clearly double the size + // of the latter. + if (LocSizeInBits.isScalable()) + return false; // if the type is i8 addrspace(x)*, we know this is the type of // llvm.invariant.start operand @@ -970,13 +982,19 @@ if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || !II->use_empty()) continue; - unsigned InvariantSizeInBits = - cast(II->getArgOperand(0))->getSExtValue() * 8; + ConstantInt *InvariantSize = cast(II->getArgOperand(0)); + // The intrinsic supports having a -1 argument for variable sized objects + // so we should check for that here. + if (InvariantSize->isMinusOne()) + continue; + assert(!InvariantSize->isNegative() && + "Unknown invariant.start size value"); + uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; // Confirm the invariant.start location size contains the load operand size // in bits. Also, the invariant.start should dominate the load, and we // should not hoist the load out of a loop that contains this dominating // invariant.start. - if (LocSizeInBits <= InvariantSizeInBits && + if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && DT->properlyDominates(II->getParent(), CurLoop->getHeader())) return true; } Index: llvm/test/Transforms/LICM/AArch64/lit.local.cfg =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True Index: llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll @@ -0,0 +1,53 @@ +; RUN: opt -licm -mtriple aarch64-linux-gnu -mattr=+sve -S < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define void @no_hoist_load(* %out, * %in, i32 %n) { +; CHECK-LABEL: @no_hoist_load( +; CHECK: entry: +; CHECK-NOT: load +; CHECK: for.body: +; CHECK: load +entry: + %cmp0 = icmp ugt i32 %n, 0 + br i1 %cmp0, label %for.body, label %for.end + +for.body: + %i = phi i32 [0, %entry], [%inc, %for.body] + %i2 = zext i32 %i to i64 + %ptr = getelementptr , * %out, i64 %i2 + %val = load , * %in, align 16 + store %val, * %ptr, align 16 + %inc = add nuw nsw i32 %i, 1 + %cmp = icmp ult i32 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +define void @hoist_load(* %out, * noalias %in, i32 %n) { +; CHECK-LABEL: @hoist_load( +; CHECK: entry: +; CHECK: load +; CHECK: for.body: +; CHECK-NOT: load +entry: + %cmp0 = icmp ugt i32 %n, 0 + br i1 %cmp0, label %for.body, label %for.end + +for.body: + %i = phi i32 [0, %entry], [%inc, %for.body] + %i2 = zext i32 %i to i64 + %ptr = getelementptr , * %out, i64 %i2 + %val = load , * %in, align 16 + store %val, * %ptr, align 16 + %inc = add nuw nsw i32 %i, 1 + %cmp = icmp ult i32 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +}