diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4411,14 +4411,15 @@ } bool llvm::mustSuppressSpeculation(const LoadInst &LI) { - if (!LI.isUnordered()) + if (!LI.isSimple()) return true; const Function &F = *LI.getFunction(); // Speculative load may create a race that did not exist in the source. return F.hasFnAttribute(Attribute::SanitizeThread) || - // Speculative load may load data from dirty regions. - F.hasFnAttribute(Attribute::SanitizeAddress) || - F.hasFnAttribute(Attribute::SanitizeHWAddress); + // Speculative load may load data from dirty regions. + F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeHWAddress) || + F.hasFnAttribute(Attribute::SanitizeMemTag); } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -98,7 +98,10 @@ return false; auto *Load = dyn_cast(Scalar); Type *ScalarTy = Scalar->getType(); - if (!Load || !Load->isSimple()) + // Do not vectorize scalar load (widening) if atomic/volatile or under + // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions + // or create data races non-existent in the source. + if (!Load || mustSuppressSpeculation(*Load)) return false; auto *Ty = dyn_cast(I.getType()); if (!Ty) diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -292,6 +292,66 @@ ret <8 x i16> %r } +; Negative test - disable under asan because widened load can cause spurious +; use-after-poison issues when __asan_poison_memory_region is used. + +define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_asan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + +; hwasan and memtag should be similarly suppressed. + +define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_hwasan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + +define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_memtag( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + +; Negative test - disable under tsan because widened load may overlap bytes +; being concurrently modified. tsan does not know that some bytes are undef. + +define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + ; Negative test - can't safely load the offset vector, but could load+shuffle. define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) { @@ -393,3 +453,16 @@ %r = insertelement <2 x float> undef, float %s, i32 0 ret <2 x float> %r } + +; Negative test - suppress load widening for asan/hwasan/memtag/tsan. + +define <2 x float> @load_f32_insert_v2f32_asan(float* align 16 dereferenceable(16) %p) sanitize_address { +; CHECK-LABEL: @load_f32_insert_v2f32_asan( +; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: ret <2 x float> [[R]] +; + %s = load float, float* %p, align 4 + %r = insertelement <2 x float> undef, float %s, i32 0 + ret <2 x float> %r +}