Index: llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -339,7 +339,9 @@ static Value * calculateVectorIndex(Value *Ptr, const std::map &GEPIdx) { - GetElementPtrInst *GEP = cast(stripBitcasts(Ptr)); + GetElementPtrInst *GEP = dyn_cast(stripBitcasts(Ptr)); + if (!GEP) + return nullptr; auto I = GEPIdx.find(GEP); return I == GEPIdx.end() ? nullptr : I->second; @@ -496,10 +498,12 @@ if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy()) break; - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *Ptr = cast(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); + if (!Index) + break; + Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); @@ -515,9 +519,12 @@ SI->getValueOperand()->getType()->isVectorTy()) break; - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); + if (!Index) + break; + + Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *Elt = SI->getValueOperand(); Index: llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll +++ llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll @@ -189,5 +189,23 @@ ret void } +; GCN-LABEL: {{^}}ptr_alloca_bitcast: +; OPT-LABEL: define i64 @ptr_alloca_bitcast + +; GCN-NOT: buffer_ +; GCN: v_mov_b32_e32 v1, 0 + +; OPT: %private_iptr = alloca <2 x i32>, align 8, addrspace(5) +; OPT: %cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)* +; OPT: %tmp1 = load i64, i64 addrspace(5)* %cast, align 8 + +define i64 @ptr_alloca_bitcast() { +entry: + %private_iptr = alloca <2 x i32>, align 8, addrspace(5) + %cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)* + %tmp1 = load i64, i64 addrspace(5)* %cast, align 8 + ret i64 %tmp1 +} + declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.workitem.id.y()