Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -803,16 +803,19 @@ uint64_t Field = ConstIdx->getZExtValue(); BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); } else { - int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); - if (ConstIdx) { - BaseOffset += - ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; + TypeSize ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); + // TODO: Add support for base offsets made up of scalable types. + if (ConstIdx && !ElementSize.isScalable()) { + BaseOffset += ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * + ElementSize.getFixedSize(); } else { // Needs scale register. if (Scale != 0) // No addressing mode takes two scale registers. return TTI::TCC_Basic; - Scale = ElementSize; + // If this is a scalable type we are effectively still scaling by + // vscale and the target must be able to cope with that. + Scale = ElementSize.getKnownMinSize(); } } } Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -940,7 +940,12 @@ Loop *CurLoop) { Value *Addr = LI->getOperand(0); const DataLayout &DL = LI->getModule()->getDataLayout(); - const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); + const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); + + // The invariant_start intrinsic currently doesn't support scalable types + // sizes so we bail out for now. + if (LocSizeInBits.isScalable()) + return false; // if the type is i8 addrspace(x)*, we know this is the type of // llvm.invariant.start operand @@ -976,7 +981,7 @@ // in bits. Also, the invariant.start should dominate the load, and we // should not hoist the load out of a loop that contains this dominating // invariant.start. - if (LocSizeInBits <= InvariantSizeInBits && + if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && DT->properlyDominates(II->getParent(), CurLoop->getHeader())) return true; } Index: llvm/test/Transforms/LICM/AArch64/lit.local.cfg =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True Index: llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll @@ -0,0 +1,53 @@ +; RUN: opt -licm -mtriple aarch64-linux-gnu -mattr=+sve -S < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define void @no_hoist_load(* %out, * %in, i32 %n) { +; CHECK-LABEL: @no_hoist_load( +; CHECK: entry: +; CHECK-NOT: load +; CHECK: for.body: +; CHECK: load +entry: + %cmp0 = icmp ugt i32 %n, 0 + br i1 %cmp0, label %for.body, label %for.end + +for.body: + %i = phi i32 [0, %entry], [%inc, %for.body] + %i2 = zext i32 %i to i64 + %ptr = getelementptr , * %out, i64 %i2 + %val = load , * %in, align 16 + store %val, * %ptr, align 16 + %inc = add nuw nsw i32 %i, 1 + %cmp = icmp ult i32 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +define void @hoist_load(* %out, * noalias %in, i32 %n) { +; CHECK-LABEL: @hoist_load( +; CHECK: entry: +; CHECK: load +; CHECK: for.body: +; CHECK-NOT: load +entry: + %cmp0 = icmp ugt i32 %n, 0 + br i1 %cmp0, label %for.body, label %for.end + +for.body: + %i = phi i32 [0, %entry], [%inc, %for.body] + %i2 = zext i32 %i to i64 + %ptr = getelementptr , * %out, i64 %i2 + %val = load , * %in, align 16 + store %val, * %ptr, align 16 + %inc = add nuw nsw i32 %i, 1 + %cmp = icmp ult i32 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +}