diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -508,7 +508,7 @@ Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); if (Inst->getType() != VecEltTy) - ExtractElement = Builder.CreateBitCast(ExtractElement, Inst->getType()); + ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, Inst->getType()); Inst->replaceAllUsesWith(ExtractElement); Inst->eraseFromParent(); break; @@ -529,7 +529,7 @@ Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *Elt = SI->getValueOperand(); if (Elt->getType() != VecEltTy) - Elt = Builder.CreateBitCast(Elt, VecEltTy); + Elt = Builder.CreateBitOrPointerCast(Elt, VecEltTy); Value *NewVecValue = Builder.CreateInsertElement(VecValue, Elt, Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-pointer-array.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-pointer-array.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-pointer-array.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -data-layout=A5 -amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s + +define i64 @test_pointer_array(i64 %v) { +; OPT-LABEL: @test_pointer_array( +; OPT-NEXT: entry: +; OPT-NEXT: [[A:%.*]] = alloca [3 x i8*], align 16, addrspace(5) +; OPT-NEXT: [[GEP:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*] addrspace(5)* [[A]], i32 0, i32 0 +; OPT-NEXT: [[CAST:%.*]] = bitcast i8* addrspace(5)* [[GEP]] to i64 addrspace(5)* +; OPT-NEXT: [[TMP0:%.*]] = bitcast [3 x i8*] addrspace(5)* [[A]] to <3 x i8*> addrspace(5)* +; OPT-NEXT: [[TMP1:%.*]] = load <3 x i8*>, <3 x i8*> addrspace(5)* [[TMP0]], align 32 +; OPT-NEXT: [[TMP2:%.*]] = inttoptr i64 [[V:%.*]] to i8* +; OPT-NEXT: [[TMP3:%.*]] = insertelement <3 x i8*> [[TMP1]], i8* [[TMP2]], i32 0 +; OPT-NEXT: store <3 x i8*> [[TMP3]], <3 x i8*> addrspace(5)* [[TMP0]], align 32 +; OPT-NEXT: [[TMP4:%.*]] = bitcast [3 x i8*] addrspace(5)* [[A]] to <3 x i8*> addrspace(5)* +; OPT-NEXT: [[TMP5:%.*]] = load <3 x i8*>, <3 x i8*> addrspace(5)* [[TMP4]], align 32 +; OPT-NEXT: [[TMP6:%.*]] = extractelement <3 x i8*> [[TMP5]], i32 0 +; OPT-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[TMP6]] to i64 +; OPT-NEXT: ret i64 [[TMP7]] +; +entry: + %a = alloca [3 x i8*], align 16, addrspace(5) + %gep = getelementptr inbounds [3 x i8*], [3 x i8*] addrspace(5)* %a, i32 0, i32 0 + %cast = bitcast i8* addrspace(5)* %gep to i64 addrspace(5)* + store i64 %v, i64 addrspace(5)* %cast, align 16 + %ld = load i64, i64 addrspace(5)* %cast, align 16 + ret i64 %ld +}