Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2029,9 +2029,17 @@ areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)) || (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() && areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)))) { - GEP.setOperand(0, SrcOp); - GEP.setSourceElementType(SrcEltType); - return &GEP; + + Value *NGEP = + GEP.isInBounds() + ? Builder.CreateInBoundsGEP(nullptr, SrcOp, makeArrayRef(Ops).slice(1)) + : Builder.CreateGEP(nullptr, SrcOp,makeArrayRef(Ops).slice(1)); + NGEP->takeName(&GEP); + + if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) + return new AddrSpaceCastInst(NGEP, GEPType); + + return replaceInstUsesWith(GEP, NGEP); } // See if we can simplify: Index: test/Transforms/InstCombine/gep-addrspace.ll =================================================================== --- test/Transforms/InstCombine/gep-addrspace.ll +++ test/Transforms/InstCombine/gep-addrspace.ll @@ -84,3 +84,21 @@ store <16 x i32> %call, <16 x i32> addrspace(3)* %t4 ret void } + +declare void @extern_vec_pointers_func(<16 x i32 addrspace(3)*>) + +define void @insertelem_after_gep(<16 x i32>* %t0) { +; CHECK-LABEL: @insertelem_after_gep( +; CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[T0:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[T3]] to i32 addrspace(3)* +; CHECK-NEXT: [[T4:%.*]] = insertelement <16 x i32 addrspace(3)*> undef, i32 addrspace(3)* [[TMP1]], i32 0 +; CHECK-NEXT: call void @extern_vec_pointers_func(<16 x i32 addrspace(3)*> [[T4]]) +; CHECK-NEXT: ret void +; + %t1 = bitcast <16 x i32>* %t0 to [16 x i32]* + %t2 = addrspacecast [16 x i32]* %t1 to [16 x i32] addrspace(3)* + %t3 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %t2, i64 0, i64 0 + %t4 = insertelement <16 x i32 addrspace(3)*> undef, i32 addrspace(3)* %t3, i32 0 + call void @extern_vec_pointers_func(<16 x i32 addrspace(3)*> %t4) + ret void +}