diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -635,8 +635,10 @@ ConstantExpr *CE, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL, const TargetTransformInfo *TTI) { - Type *TargetType = PointerType::getWithSamePointeeType( - cast(CE->getType()), NewAddrSpace); + Type *TargetType = CE->getType()->isPointerTy() + ? PointerType::getWithSamePointeeType( + cast(CE->getType()), NewAddrSpace) + : CE->getType(); if (CE->getOpcode() == Instruction::AddrSpaceCast) { // Because CE is flat, the source address space must be specific. diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll --- a/llvm/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll +++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll @@ -31,6 +31,41 @@ declare void @f2(i64*, i64) local_unnamed_addr #0 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1 +; Make sure we can clone GEP which uses complex constant expressions as indices. +; https://bugs.llvm.org/show_bug.cgi?id=51099 +@g2 = internal addrspace(3) global [128 x i8] undef, align 1 + +; Function Attrs: norecurse nounwind +define float @nonptr(i8* nocapture readnone %a, i8* nocapture readnone %b, i8* nocapture readnone %c) local_unnamed_addr #0 { +entry: + %0 = load float, float* bitcast ( + i8* getelementptr ( + i8, i8* getelementptr inbounds ( + [128 x i8], + [128 x i8]* addrspacecast ([128 x i8] addrspace(3)* @g2 to [128 x i8]*), + i64 0, + i64 0), + i64 sub ( + i64 ptrtoint ( + i8* getelementptr inbounds ( + [128 x i8], + [128 x i8]* addrspacecast ([128 x i8] addrspace(3)* @g2 to [128 x i8]*), + i64 0, + i64 123) + to i64), + i64 ptrtoint ( + i8* getelementptr inbounds ( + [128 x i8], + [128 x i8]* addrspacecast ([128 x i8] addrspace(3)* @g2 to [128 x i8]*), + i64 2, + i64 0) + to i64))) + to float*), align 4 + ret float %0 +} + + + attributes #0 = { convergent nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind }