Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2457,6 +2457,22 @@ if (Instruction *I = visitGEPOfGEP(GEP, Src)) return I; + // Canonicalize gep of addrspacecast to addrspacecast of gep. Don't do this + // if index sizes don't match, to avoid introducing sext/trunc. + // TODO: It might make more sense to canonicalize in the reverse direction. + if (auto *AC = dyn_cast(PtrOp)) { + if (AC->hasOneUse() && + DL.getIndexTypeSizeInBits(GEP.getType()) == + DL.getIndexTypeSizeInBits(AC->getOperand(0)->getType())) { + Value *NewGEP = Builder.CreateGEP(GEPEltType, AC->getOperand(0), Indices, + "", GEP.isInBounds()); + NewGEP->takeName(&GEP); + Value *NewAC = Builder.CreateAddrSpaceCast(NewGEP, GEP.getType()); + NewAC->takeName(AC); + return replaceInstUsesWith(GEP, NewAC); + } + } + // Skip if GEP source element type is scalable. The type alloc size is unknown // at compile-time. if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) { Index: llvm/test/Transforms/InstCombine/gep-addrspace.ll =================================================================== --- llvm/test/Transforms/InstCombine/gep-addrspace.ll +++ llvm/test/Transforms/InstCombine/gep-addrspace.ll @@ -48,9 +48,9 @@ ; CHECK-NEXT: [[T0:%.*]] = alloca i16, align 2 ; CHECK-NEXT: call void @escape_alloca(i16* nonnull [[T0]]) ; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast i16* [[T0]] to [2 x i8]* -; CHECK-NEXT: [[T1:%.*]] = addrspacecast [2 x i8]* [[TMPCAST]] to [2 x i8] addrspace(11)* -; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [2 x i8], [2 x i8] addrspace(11)* [[T1]], i64 0, i64 1 -; CHECK-NEXT: [[T3:%.*]] = load i8, i8 addrspace(11)* [[T2]], align 1 +; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[TMPCAST]], i64 0, i64 1 +; CHECK-NEXT: [[T1:%.*]] = addrspacecast i8* [[T2]] to i8 addrspace(11)* +; CHECK-NEXT: [[T3:%.*]] = load i8, i8 addrspace(11)* [[T1]], align 1 ; CHECK-NEXT: [[INSERT:%.*]] = insertvalue { i8, i8 } zeroinitializer, i8 [[T3]], 1 ; CHECK-NEXT: ret { i8, i8 } [[INSERT]] ; Index: llvm/test/Transforms/InstCombine/gep-vector.ll =================================================================== --- llvm/test/Transforms/InstCombine/gep-vector.ll +++ llvm/test/Transforms/InstCombine/gep-vector.ll @@ -82,9 +82,9 @@ define i32 addrspace(3)* @bitcast_vec_to_array_addrspace(<7 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @bitcast_vec_to_array_addrspace( ; CHECK-NEXT: [[ARR_PTR:%.*]] = bitcast <7 x i32>* [[X:%.*]] to [7 x i32]* -; CHECK-NEXT: [[ASC:%.*]] = addrspacecast [7 x i32]* [[ARR_PTR]] to [7 x i32] addrspace(3)* -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [7 x i32], [7 x i32] addrspace(3)* [[ASC]], i64 [[Y:%.*]], i64 [[Z:%.*]] -; CHECK-NEXT: ret i32 addrspace(3)* [[GEP]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [7 x i32], [7 x i32]* [[ARR_PTR]], i64 [[Y:%.*]], i64 [[Z:%.*]] +; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)* +; CHECK-NEXT: ret i32 addrspace(3)* [[ASC]] ; %arr_ptr = bitcast <7 x i32>* %x to [7 x i32]* %asc = addrspacecast [7 x i32]* %arr_ptr to [7 x i32] addrspace(3)* @@ -97,9 +97,9 @@ define i32 addrspace(3)* @inbounds_bitcast_vec_to_array_addrspace(<7 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @inbounds_bitcast_vec_to_array_addrspace( ; CHECK-NEXT: [[ARR_PTR:%.*]] = bitcast <7 x i32>* [[X:%.*]] to [7 x i32]* -; CHECK-NEXT: [[ASC:%.*]] = addrspacecast [7 x i32]* [[ARR_PTR]] to [7 x i32] addrspace(3)* -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32] addrspace(3)* [[ASC]], i64 [[Y:%.*]], i64 [[Z:%.*]] -; CHECK-NEXT: ret i32 addrspace(3)* [[GEP]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* [[ARR_PTR]], i64 [[Y:%.*]], i64 [[Z:%.*]] +; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)* +; CHECK-NEXT: ret i32 addrspace(3)* [[ASC]] ; %arr_ptr = bitcast <7 x i32>* %x to [7 x i32]* %asc = addrspacecast [7 x i32]* %arr_ptr to [7 x i32] addrspace(3)* @@ -112,8 +112,8 @@ define i32 addrspace(3)* @bitcast_vec_to_array_addrspace_matching_alloc_size(<4 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @bitcast_vec_to_array_addrspace_matching_alloc_size( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)* -; CHECK-NEXT: ret i32 addrspace(3)* [[TMP1]] +; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)* +; CHECK-NEXT: ret i32 addrspace(3)* [[ASC]] ; %arr_ptr = bitcast <4 x i32>* %x to [4 x i32]* %asc = addrspacecast [4 x i32]* %arr_ptr to [4 x i32] addrspace(3)* @@ -126,8 +126,8 @@ define i32 addrspace(3)* @inbounds_bitcast_vec_to_array_addrspace_matching_alloc_size(<4 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @inbounds_bitcast_vec_to_array_addrspace_matching_alloc_size( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)* -; CHECK-NEXT: ret i32 addrspace(3)* [[TMP1]] +; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)* +; CHECK-NEXT: ret i32 addrspace(3)* [[ASC]] ; %arr_ptr = bitcast <4 x i32>* %x to [4 x i32]* %asc = addrspacecast [4 x i32]* %arr_ptr to [4 x i32] addrspace(3)* @@ -138,19 +138,21 @@ ; Negative test - avoid doing bitcast on i8*, because '16' should be scaled by 'vscale'. define i8* @test_accumulate_constant_offset_vscale_nonzero( %pg, i8* %base) { -; CHECK-LABEL: @test_accumulate_constant_offset_vscale_nonzero -; CHECK-NEXT: %bc = bitcast i8* %base to * -; CHECK-NEXT: %gep = getelementptr , * %bc, i64 1, i64 4 -; CHECK-NEXT: ret i8* %gep +; CHECK-LABEL: @test_accumulate_constant_offset_vscale_nonzero( +; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[BASE:%.*]] to * +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[BC]], i64 1, i64 4 +; CHECK-NEXT: ret i8* [[GEP]] +; %bc = bitcast i8* %base to * %gep = getelementptr , * %bc, i64 1, i64 4 ret i8* %gep } define i8* @test_accumulate_constant_offset_vscale_zero( %pg, i8* %base) { -; CHECK-LABEL: @test_accumulate_constant_offset_vscale_zero -; CHECK-NEXT: %[[RES:.*]] = getelementptr i8, i8* %base, i64 4 -; CHECK-NEXT: ret i8* %[[RES]] +; CHECK-LABEL: @test_accumulate_constant_offset_vscale_zero( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 4 +; CHECK-NEXT: ret i8* [[GEP]] +; %bc = bitcast i8* %base to * %gep = getelementptr , * %bc, i64 0, i64 4 ret i8* %gep Index: llvm/test/Transforms/InstCombine/non-integral-pointers.ll =================================================================== --- llvm/test/Transforms/InstCombine/non-integral-pointers.ll +++ llvm/test/Transforms/InstCombine/non-integral-pointers.ll @@ -55,10 +55,10 @@ ; CHECK-LABEL: @g( ; CHECK-NEXT: [[DOTPRE:%.*]] = load ptr addrspace(4), ptr [[GP:%.*]], align 8 ; CHECK-NEXT: [[V74:%.*]] = call ptr addrspace(4) @alloc() -; CHECK-NEXT: [[V75:%.*]] = addrspacecast ptr addrspace(4) [[V74]] to ptr -; CHECK-NEXT: [[V77:%.*]] = getelementptr ptr addrspace(4), ptr [[V75]], i64 -1 -; CHECK-NEXT: store ptr addrspace(4) [[DOTPRE]], ptr [[V77]], align 8 -; CHECK-NEXT: [[V81:%.*]] = load i64, ptr [[V77]], align 8 +; CHECK-NEXT: [[V77:%.*]] = getelementptr ptr addrspace(4), ptr addrspace(4) [[V74]], i64 -1 +; CHECK-NEXT: [[V75:%.*]] = addrspacecast ptr addrspace(4) [[V77]] to ptr +; CHECK-NEXT: store ptr addrspace(4) [[DOTPRE]], ptr [[V75]], align 8 +; CHECK-NEXT: [[V81:%.*]] = load i64, ptr [[V75]], align 8 ; CHECK-NEXT: ret i64 [[V81]] ; %.pre = load ptr addrspace(4), ptr %gp, align 8 Index: llvm/test/Transforms/InstCombine/opaque-ptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -427,9 +427,9 @@ define ptr addrspace(1) @gep_of_addrspace_cast(ptr %ptr) { ; CHECK-LABEL: @gep_of_addrspace_cast( -; CHECK-NEXT: [[CAST1:%.*]] = addrspacecast ptr [[PTR:%.*]] to ptr addrspace(1) -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[CAST1]], i64 1 -; CHECK-NEXT: ret ptr addrspace(1) [[GEP]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[CAST1:%.*]] = addrspacecast ptr [[GEP]] to ptr addrspace(1) +; CHECK-NEXT: ret ptr addrspace(1) [[CAST1]] ; %cast1 = addrspacecast ptr %ptr to ptr addrspace(1) %gep = getelementptr inbounds i32, ptr addrspace(1) %cast1, i64 1