Index: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -969,8 +969,6 @@ if (VecLoadTy) { SmallVector InstrsToErase; - SmallVector InstrsToReorder; - InstrsToReorder.push_back(cast(Bitcast)); unsigned VecWidth = VecLoadTy->getNumElements(); for (unsigned I = 0, E = Chain.size(); I != E; ++I) { @@ -990,15 +988,14 @@ } } - for (Instruction *ModUser : InstrsToReorder) - reorder(ModUser); + // Bitcast might not be an Instruction, if the value being loaded is a + // constant. In that case, no need to reorder anything. + if (Instruction *BitcastInst = dyn_cast(Bitcast)) + reorder(BitcastInst); for (auto I : InstrsToErase) I->eraseFromParent(); } else { - SmallVector InstrsToReorder; - InstrsToReorder.push_back(cast(Bitcast)); - for (unsigned I = 0, E = Chain.size(); I != E; ++I) { Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(I)); Instruction *Extracted = cast(V); @@ -1012,8 +1009,8 @@ UI->replaceAllUsesWith(Extracted); } - for (Instruction *ModUser : InstrsToReorder) - reorder(ModUser); + if (Instruction *BitcastInst = dyn_cast(Bitcast)) + reorder(BitcastInst); } eraseInstructions(Chain); Index: llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll =================================================================== --- llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll +++ llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll @@ -0,0 +1,14 @@ +; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s + +; Load from a constant. This can be vectorized, but shouldn't crash us. + +@global = internal addrspace(1) constant [4 x float] [float 0xBF71111120000000, float 0x3F70410420000000, float 0xBF81111120000000, float 0x3FB5555560000000], align 4 + +define void @foo() { + ; CHECK: load <4 x float> + %a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 4 + %b = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 1), align 4 + %c = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 2), align 4 + %d = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 3), align 4 + ret void +}