diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -687,6 +687,11 @@ if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true))) return false; + // Do not vectorize scalar load under asan or tsan. The widened load may + // overlap bytes marked as __asan_poison_memory_region or bytes being + // concurrently modified. + bool CanWiden = !F.hasFnAttribute(Attribute::SanitizeAddress) && + !F.hasFnAttribute(Attribute::SanitizeThread); bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -700,7 +705,8 @@ if (isa(I)) continue; Builder.SetInsertPoint(&I); - MadeChange |= vectorizeLoadInsert(I); + if (CanWiden) + MadeChange |= vectorizeLoadInsert(I); MadeChange |= foldExtractExtract(I); MadeChange |= foldBitcastShuf(I); MadeChange |= scalarizeBinopOrCmp(I); diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -292,6 +292,38 @@ ret <8 x i16> %r } +; Negative test - disable under asan because widened load can cause spurious +; use-after-poison issues when __asan_poison_memory_region is used. + +define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_asan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + +; Negative test - disable under tsan because widened load may overlap bytes +; being concurrently modified. tsan does not know that some bytes are undef. + +define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + ; Negative test - can't safely load the offset vector, but could load+shuffle. define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) {