Index: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -331,6 +331,7 @@ } void Vectorizer::reorder(Instruction *I) { + Instruction *InsertAfter = I; for (User *U : I->users()) { Instruction *User = dyn_cast(U); if (!User || User->getOpcode() == Instruction::PHI) @@ -338,7 +339,8 @@ if (!DT.dominates(I, User)) { User->removeFromParent(); - User->insertAfter(I); + User->insertAfter(InsertAfter); + InsertAfter = User; reorder(User); } } @@ -359,13 +361,15 @@ ++NumFound; if (NumFound == 1) { FirstInstr = I.getIterator(); - } else if (NumFound == Chain.size()) { + } + if (NumFound == Chain.size()) { LastInstr = I.getIterator(); break; } } - return std::make_pair(FirstInstr, LastInstr); + // Range is [first, last). + return std::make_pair(FirstInstr, ++LastInstr); } void Vectorizer::eraseInstructions(ArrayRef Chain) { @@ -415,6 +419,9 @@ } } + assert(Chain.size() == ChainInstrs.size() && + "All instructions in the Chain must exist in [From, To)."); + for (auto EntryMem : MemoryInstrs) { Value *V = EntryMem.first; unsigned VIdx = EntryMem.second; Index: llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll =================================================================== --- llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll +++ llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll @@ -2,11 +2,11 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" -; This is OK to vectorize the load as long as the may alias store -; occurs before the vector load. +; This is NOT OK to vectorize, as either load may alias either store. +; CHECK: load double ; CHECK: store double 0.000000e+00, double addrspace(1)* %a, -; CHECK: load <2 x double> +; CHECK: load double ; CHECK: store double 0.000000e+00, double addrspace(1)* %a.idx.1 define void @interleave(double addrspace(1)* nocapture %a, double addrspace(1)* nocapture %b, double addrspace(1)* nocapture readonly %c) #0 { entry: Index: llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg =================================================================== --- llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg +++ llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'X86' in config.root.targets: + config.unsupported = True + Index: llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll =================================================================== --- llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll +++ llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll @@ -0,0 +1,25 @@ +; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +%struct.buffer_t = type { i32, i8* } + +; Check an i32 and i8* get vectorized, and that +; the two accesses (load into buff.val and store to buff.p) preserve their order. + +; CHECK-LABEL: @preserve_order_32( +; CHECK: load <2 x i32> +; CHECK: %buff.val = load i8 +; CHECK: store i8 0 +define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 { +entry: + %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1 + %buff.p = load i8*, i8** %tmp1, align 8 + %buff.val = load i8, i8* %buff.p, align 8 + store i8 0, i8* %buff.p, align 8 + %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0 + %buff.int = load i32, i32* %tmp0, align 8 + ret void +} + +attributes #0 = { nounwind } Index: llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll =================================================================== --- llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll +++ llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll @@ -0,0 +1,25 @@ +; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" + +%struct.buffer_t = type { i64, i8* } + +; Check an i64 and i8* get vectorized, and that +; the two accesses (load into buff.val and store to buff.p) preserve their order. + +; CHECK-LABEL: @preserve_order_64( +; CHECK: load <2 x i64> +; CHECK: %buff.val = load i8 +; CHECK: store i8 0 +define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 { +entry: + %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1 + %buff.p = load i8*, i8** %tmp1, align 8 + %buff.val = load i8, i8* %buff.p, align 8 + store i8 0, i8* %buff.p, align 8 + %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0 + %buff.int = load i64, i64* %tmp0, align 8 + ret void +} + +attributes #0 = { nounwind }