diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -22,8 +22,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MathExtras.h" #include namespace llvm { @@ -318,6 +321,17 @@ return BaseT::isLegalNTStore(DataType, Alignment); } + bool isLegalNTLoad(Type *DataType, Align Alignment) { + if (auto *DataTypeTy = dyn_cast(DataType)) { + unsigned NumElements = + cast(DataTypeTy)->getNumElements(); + unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits(); + return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && + EltSize <= 128 && isPowerOf2_64(EltSize); + } + return BaseT::isLegalNTLoad(DataType, Alignment); + } + bool enableOrderedReductions() const { return true; } InstructionCost getInterleavedMemoryOpCost( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll @@ -258,8 +258,7 @@ define i4 @test_i4_load(i4* %ddst) { ; CHECK-LABEL: define i4 @test_i4_load -; CHECK-LABEL: vector.body: -; CHECK: [[LOAD:%.*]] = load i4, i4* {{.*}}, align 1, !nontemporal !0 +; CHECK-NOT: vector.body: ; CHECk: ret i4 %{{.*}} ; entry: @@ -281,7 +280,7 @@ define i8 @test_load_i8(i8* %ddst) { ; CHECK-LABEL: @test_load_i8( -; CHECK-NOT: vector.body: +; CHECK: vector.body: ; CHECk: ret i8 %{{.*}} ; entry: @@ -303,7 +302,7 @@ define half @test_half_load(half* %ddst) { ; CHECK-LABEL: @test_half_load -; CHECK-NOT: vector.body: +; CHECK-LABEL: vector.body: ; CHECk: ret half %{{.*}} ; entry: @@ -325,7 +324,7 @@ define i16 @test_i16_load(i16* %ddst) { ; CHECK-LABEL: @test_i16_load -; CHECK-NOT: vector.body: +; CHECK-LABEL: vector.body: ; CHECk: ret i16 %{{.*}} ; entry: @@ -347,7 +346,7 @@ define i32 @test_i32_load(i32* %ddst) { ; CHECK-LABEL: @test_i32_load -; CHECK-NOT: vector.body: +; CHECK-LABEL: vector.body: ; CHECk: ret i32 %{{.*}} ; entry: @@ -413,7 +412,7 @@ define i64 @test_i64_load(i64* %ddst) { ; CHECK-LABEL: @test_i64_load -; CHECK-NOT: vector.body: +; CHECK-LABEL: vector.body: ; CHECk: ret i64 %{{.*}} ; entry: @@ -435,7 +434,7 @@ define double @test_double_load(double* %ddst) { ; CHECK-LABEL: @test_double_load -; CHECK-NOT: vector.body: +; CHECK-LABEL: vector.body: ; CHECk: ret double %{{.*}} ; entry: @@ -457,7 +456,7 @@ define i128 @test_i128_load(i128* %ddst) { ; CHECK-LABEL: @test_i128_load -; CHECK-NOT: vector.body: +; CHECK-LABEL: vector.body: ; CHECk: ret i128 %{{.*}} ; entry: