Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -970,7 +970,10 @@ if (VecLoadTy) { SmallVector InstrsToErase; SmallVector InstrsToReorder; - InstrsToReorder.push_back(cast(Bitcast)); + // Bitcast might not be an Instruction, if the value being loaded is a + // constant. In that case, no need to reorder anything. + if (Instruction *I = dyn_cast(Bitcast)) + InstrsToReorder.push_back(I); unsigned VecWidth = VecLoadTy->getNumElements(); for (unsigned I = 0, E = Chain.size(); I != E; ++I) { @@ -997,7 +1000,8 @@ I->eraseFromParent(); } else { SmallVector InstrsToReorder; - InstrsToReorder.push_back(cast(Bitcast)); + if (Instruction *I = dyn_cast(Bitcast)) + InstrsToReorder.push_back(I); for (unsigned I = 0, E = Chain.size(); I != E; ++I) { Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(I)); Index: test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll =================================================================== --- /dev/null +++ test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll @@ -0,0 +1,14 @@ +; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s + +; Load from a constant. This can be vectorized, but shouldn't crash us. + +@global = internal addrspace(1) constant [4 x float] [float 0xBF71111120000000, float 0x3F70410420000000, float 0xBF81111120000000, float 0x3FB5555560000000], align 4 + +define void @foo() { + ; CHECK: load <4 x float> + %a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 4 + %b = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 1), align 4 + %c = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 2), align 4 + %d = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 3), align 4 + ret void +}