Index: llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -694,31 +694,16 @@ }); for (Instruction &I : make_range(getBoundaryInstrs(Chain))) { - if (isa(I) || isa(I)) { - if (!is_contained(Chain, &I)) - MemoryInstrs.push_back(&I); - else - ChainInstrs.push_back(&I); - } else if (isa(&I) && - cast(&I)->getIntrinsicID() == - Intrinsic::sideeffect) { - // Ignore llvm.sideeffect calls. - } else if (isa(&I) && - cast(&I)->getIntrinsicID() == - Intrinsic::pseudoprobe) { - // Ignore llvm.pseudoprobe calls. - } else if (isa(&I) && - cast(&I)->getIntrinsicID() == Intrinsic::assume) { - // Ignore llvm.assume calls. - } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) { - LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I - << '\n'); - break; - } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) { - LLVM_DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I - << '\n'); + if ((isa(I) || isa(I)) && is_contained(Chain, &I)) { + ChainInstrs.push_back(&I); + continue; + } + if (I.mayThrow()) { + LLVM_DEBUG(dbgs() << "LSV: Found may-throw operation: " << I << '\n'); break; } + if (I.mayReadOrWriteMemory()) + MemoryInstrs.push_back(&I); } // Loop until we find an instruction in ChainInstrs that we can't vectorize. @@ -751,26 +736,28 @@ return LI->hasMetadata(LLVMContext::MD_invariant_load); }; - // We can ignore the alias as long as the load comes before the store, - // because that means we won't be moving the load past the store to - // vectorize it (the vectorized load is inserted at the location of the - // first load in the chain). - if (isa(MemInstr) && ChainLoad && - (IsInvariantLoad(ChainLoad) || ChainLoad->comesBefore(MemInstr))) - continue; - - // Same case, but in reverse. - if (MemLoad && isa(ChainInstr) && - (IsInvariantLoad(MemLoad) || MemLoad->comesBefore(ChainInstr))) - continue; + if (IsLoadChain) { + // We can ignore the alias as long as the load comes before the store, + // because that means we won't be moving the load past the store to + // vectorize it (the vectorized load is inserted at the location of the + // first load in the chain). + if (ChainInstr->comesBefore(MemInstr) || + (ChainLoad && IsInvariantLoad(ChainLoad))) + continue; + } else { + // Same case, but in reverse. + if (MemInstr->comesBefore(ChainInstr) || + (MemLoad && IsInvariantLoad(MemLoad))) + continue; + } - if (!AA.isNoAlias(MemoryLocation::get(MemInstr), - MemoryLocation::get(ChainInstr))) { + ModRefInfo MR = + AA.getModRefInfo(MemInstr, MemoryLocation::get(ChainInstr)); + if (IsLoadChain ? isModSet(MR) : isModOrRefSet(MR)) { LLVM_DEBUG({ dbgs() << "LSV: Found alias:\n" - " Aliasing instruction and pointer:\n" + " Aliasing instruction:\n" << " " << *MemInstr << '\n' - << " " << *getLoadStorePointerOperand(MemInstr) << '\n' << " Aliased instruction and pointer:\n" << " " << *ChainInstr << '\n' << " " << *getLoadStorePointerOperand(ChainInstr) << '\n'; Index: llvm/test/Transforms/LoadStoreVectorizer/int_sideeffect.ll =================================================================== --- llvm/test/Transforms/LoadStoreVectorizer/int_sideeffect.ll +++ llvm/test/Transforms/LoadStoreVectorizer/int_sideeffect.ll @@ -44,22 +44,24 @@ declare void @foo() -define void @test_inaccessiblememonly(float* %p) { -; CHECK-LABEL: @test_inaccessiblememonly( + +define void @test_inaccessiblmemonly(float* %p) { +; CHECK-LABEL: @test_inaccessiblmemonly( ; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 -; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 -; CHECK-NEXT: [[L0:%.*]] = load float, float* [[P0]], align 16 -; CHECK-NEXT: [[L1:%.*]] = load float, float* [[P1]], align 4 -; CHECK-NEXT: [[L2:%.*]] = load float, float* [[P2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 +; CHECK-NEXT: [[L01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[L12:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[L23:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; CHECK-NEXT: [[L34:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 ; CHECK-NEXT: call void @foo() #[[ATTR1:[0-9]+]] -; CHECK-NEXT: [[L3:%.*]] = load float, float* [[P3]], align 4 -; CHECK-NEXT: store float [[L0]], float* [[P0]], align 16 ; CHECK-NEXT: call void @foo() #[[ATTR1]] -; CHECK-NEXT: store float [[L1]], float* [[P1]], align 4 -; CHECK-NEXT: store float [[L2]], float* [[P2]], align 4 -; CHECK-NEXT: store float [[L3]], float* [[P3]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[L01]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[L12]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[L23]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[L34]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP6]], <4 x float>* [[TMP7]], align 16 ; CHECK-NEXT: ret void ; %p0 = getelementptr float, float* %p, i64 0