diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -688,6 +688,7 @@ return false; bool MadeChange = false; + bool IsTsan = F.hasFnAttribute(Attribute::SanitizeThread); for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. if (!DT.isReachableFromEntry(&BB)) @@ -700,7 +701,10 @@ if (isa(I)) continue; Builder.SetInsertPoint(&I); - MadeChange |= vectorizeLoadInsert(I); + // Do not vectorize scalar load under tsan. The extended load may overlap + // bytes being concurrently modified and cause spurious data races. + if (!IsTsan) + MadeChange |= vectorizeLoadInsert(I); MadeChange |= foldExtractExtract(I); MadeChange |= foldBitcastShuf(I); MadeChange |= scalarizeBinopOrCmp(I); diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -292,6 +292,22 @@ ret <8 x i16> %r } +; Negative test - disable under tsan because extended load may overlap bytes +; being concurrently modified. + +define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread { +; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0 + %s = load i16, i16* %gep, align 16 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + ; Negative test - can't safely load the offset vector, but could load+shuffle. define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) {