This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Canonicalize addrspacecast between different element types
ClosedPublic

Authored by jingyue on Jun 3 2014, 5:52 PM.

Download Raw Diff

Details

Reviewers

eliben
meheff
arsenm

Summary

addrspacecast X addrspace(M)* to Y addrspace(N)*
-->
bitcast X addrspace(M)* to Y addrspace(M)*
addrspacecast Y addrspace(M)* to Y addrspace(N)*

This canonicalization makes NVPTXFavorNonGenericAddrSpaces more effective, and can potentially benefit many other cases.

This patch is based on D2186 with fixes and more tests:

Fix an issue in D2186 that causes InstCombine to run into infinite loops
Add the failed test as @canonicalize_addrspacecast in test/Transforms/InstCombine/addrspacecast.ll
Do bitcast before addrspacecast because addrspacecasts from non-generic to generic can be folded into load/store
Updated all affected tests. One affected test (@test2_addrspacecast) in memcpy-from-global.ll is actually better optimized because of this canonicalization. See how alloca %T being transformed to alloca [128 x i8]

Diff Detail

Event Timeline

jingyue updated this revision to Diff 10073.Jun 3 2014, 5:52 PM

jingyue retitled this revision from to [InstCombine] Canonicalize addrspacecast between different element types.

jingyue updated this object.

jingyue edited the test plan for this revision. (Show Details)

jingyue added reviewers: arsenm, eliben, meheff.

jingyue added a subscriber: Unknown Object (MLST).

LGTM

lib/Transforms/InstCombine/InstCombineCasts.cpp
1925–1926	I would remove the comment about a specific target pass

This revision is now accepted and ready to land.Jun 3 2014, 6:48 PM

Committed in r210375

Revision Contents

Path

Size

lib/

IR/

Instructions.cpp

18 lines

Transforms/

InstCombine/

InstCombineCasts.cpp

26 lines

test/

Transforms/

InstCombine/

addrspacecast.ll

80 lines

getelementptr.ll

17 lines

memcpy-from-global.ll

3 lines

Diff 10073

lib/IR/Instructions.cpp

Show First 20 Lines • Show All 2,325 Lines • ▼ Show 20 Lines	case 13:
MidTy->isPtrOrPtrVectorTy() &&		MidTy->isPtrOrPtrVectorTy() &&
DstTy->isPtrOrPtrVectorTy() &&		DstTy->isPtrOrPtrVectorTy() &&
SrcTy->getPointerAddressSpace() != MidTy->getPointerAddressSpace() &&		SrcTy->getPointerAddressSpace() != MidTy->getPointerAddressSpace() &&
MidTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace() &&		MidTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace() &&
"Illegal addrspacecast, bitcast sequence!");		"Illegal addrspacecast, bitcast sequence!");
// Allowed, use first cast's opcode		// Allowed, use first cast's opcode
return firstOp;		return firstOp;
case 14:		case 14:
// FIXME: this state can be merged with (2), but the following assert		// bitcast, addrspacecast -> addrspacecast if the element type of
// is useful to check the correcteness of the sequence due to semantic		// bitcast's source is the same as that of addrspacecast's destination.
// change of bitcast.		if (SrcTy->getPointerElementType() == DstTy->getPointerElementType())
assert(		return Instruction::AddrSpaceCast;
SrcTy->isPtrOrPtrVectorTy() &&		return 0;
MidTy->isPtrOrPtrVectorTy() &&
DstTy->isPtrOrPtrVectorTy() &&
SrcTy->getPointerAddressSpace() == MidTy->getPointerAddressSpace() &&
MidTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace() &&
"Illegal bitcast, addrspacecast sequence!");
// Allowed, use second cast's opcode
return secondOp;
case 15:		case 15:
// FIXME: this state can be merged with (1), but the following assert		// FIXME: this state can be merged with (1), but the following assert
// is useful to check the correcteness of the sequence due to semantic		// is useful to check the correcteness of the sequence due to semantic
// change of bitcast.		// change of bitcast.
assert(		assert(
SrcTy->isIntOrIntVectorTy() &&		SrcTy->isIntOrIntVectorTy() &&
MidTy->isPtrOrPtrVectorTy() &&		MidTy->isPtrOrPtrVectorTy() &&
DstTy->isPtrOrPtrVectorTy() &&		DstTy->isPtrOrPtrVectorTy() &&
▲ Show 20 Lines • Show All 1,387 Lines • Show Last 20 Lines

lib/Transforms/InstCombine/InstCombineCasts.cpp

Show First 20 Lines • Show All 1,428 Lines • ▼ Show 20 Lines

/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)		/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {		Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);		Value *Src = CI.getOperand(0);

if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {		if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
// If casting the result of a getelementptr instruction with no offset, turn		// If casting the result of a getelementptr instruction with no offset, turn
// this into a cast of the original pointer!		// this into a cast of the original pointer!
if (GEP->hasAllZeroIndices()) {		if (GEP->hasAllZeroIndices() &&
		// If CI is an addrspacecast and GEP changes the poiner type, merging
		// GEP into CI would undo canonicalizing addrspacecast with different
		// pointer types, causing infinite loops.
		(!isa<AddrSpaceCastInst>(CI) \|\|
		GEP->getType() == GEP->getPointerOperand()->getType())) {
// Changing the cast operand is usually not a good idea but it is safe		// Changing the cast operand is usually not a good idea but it is safe
// here because the pointer operand is being replaced with another		// here because the pointer operand is being replaced with another
// pointer operand so the opcode doesn't need to change.		// pointer operand so the opcode doesn't need to change.
Worklist.Add(GEP);		Worklist.Add(GEP);
CI.setOperand(0, GEP->getOperand(0));		CI.setOperand(0, GEP->getOperand(0));
return &CI;		return &CI;
}		}

▲ Show 20 Lines • Show All 453 Lines • ▼ Show 20 Lines	Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}		}

if (SrcTy->isPointerTy())		if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);		return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);		return commonCastTransforms(CI);
}		}

Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {		Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
		// If the destination pointer element type is not the the same as the source's
		// do the addrspacecast to the same type, and then the bitcast in the new
		// address space. This allows the cast to be exposed to other transforms.
		Value *Src = CI.getOperand(0);
		PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
		PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());

		Type *DestElemTy = DestTy->getElementType();
		if (SrcTy->getElementType() != DestElemTy) {
		Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace());
		if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
		MidTy = VectorType::get(MidTy, CI.getType()->getVectorNumElements());

		// Putting bitcast before addrspacecast helps NVPTXFavorNonGenericAddrSpaces
		// to fold the addrspacecast into load/store.
		arsenmUnsubmitted Not Done Reply Inline Actions I would remove the comment about a specific target pass arsenm: I would remove the comment about a specific target pass
		Value *NewBitCast = Builder->CreateBitCast(Src, MidTy);
		return new AddrSpaceCastInst(NewBitCast, CI.getType());
		}

return commonPointerCastTransforms(CI);		return commonPointerCastTransforms(CI);
}		}

test/Transforms/InstCombine/addrspacecast.ll

	Show All 22 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%y = addrspacecast <4 x i32 addrspace(1)> %x to <4 x i32 addrspace(3)>			%y = addrspacecast <4 x i32 addrspace(1)> %x to <4 x i32 addrspace(3)>
	%z = addrspacecast <4 x i32 addrspace(3)> %y to <4 x i32>			%z = addrspacecast <4 x i32 addrspace(3)> %y to <4 x i32>
	ret <4 x i32*> %z			ret <4 x i32*> %z
	}			}

	define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {			define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
	; CHECK-LABEL: @combine_redundant_addrspacecast_types(			; CHECK-LABEL: @combine_redundant_addrspacecast_types(
	; CHECK: addrspacecast i32 addrspace(1)* %x to float*			; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
				; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float*
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*			%y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
	%z = addrspacecast i32 addrspace(3)* %y to float*			%z = addrspacecast i32 addrspace(3)* %y to float*
	ret float* %z			ret float* %z
	}			}

				define <4 x float> @combine_redundant_addrspacecast_types_vector(<4 x i32 addrspace(1)> %x) nounwind {
				; CHECK-LABEL: @combine_redundant_addrspacecast_types_vector(
				; CHECK-NEXT: bitcast <4 x i32 addrspace(1)> %x to <4 x float addrspace(1)>
				; CHECK-NEXT: addrspacecast <4 x float addrspace(1)> %1 to <4 x float>
				; CHECK-NEXT: ret
				%y = addrspacecast <4 x i32 addrspace(1)> %x to <4 x i32 addrspace(3)>
				%z = addrspacecast <4 x i32 addrspace(3)> %y to <4 x float>
				ret <4 x float*> %z
				}

				define float addrspace(2)* @combine_addrspacecast_bitcast_1(i32 addrspace(1)* %x) nounwind {
				; CHECK-LABEL: @combine_addrspacecast_bitcast_1(
				; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
				; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
				; CHECK-NEXT: ret
				%y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
				%z = bitcast i32 addrspace(2)* %y to float addrspace(2)*
				ret float addrspace(2)* %z
				}

				define i32 addrspace(2)* @combine_addrspacecast_bitcast_2(i32 addrspace(1)* %x) nounwind {
				; CHECK-LABEL: @combine_addrspacecast_bitcast_2(
				; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
				; CHECK-NEXT: ret
				%y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
				%z = bitcast float addrspace(2)* %y to i32 addrspace(2)*
				ret i32 addrspace(2)* %z
				}

				define i32 addrspace(2)* @combine_bitcast_addrspacecast_1(i32 addrspace(1)* %x) nounwind {
				; CHECK-LABEL: @combine_bitcast_addrspacecast_1(
				; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
				; CHECK-NEXT: ret
				%y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
				%z = addrspacecast i8 addrspace(1)* %y to i32 addrspace(2)*
				ret i32 addrspace(2)* %z
				}

				define float addrspace(2)* @combine_bitcast_addrspacecast_2(i32 addrspace(1)* %x) nounwind {
				; CHECK-LABEL: @combine_bitcast_addrspacecast_2(
				; CHECK: bitcast i32 addrspace(1)* %x to float addrspace(1)*
				; CHECK: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
				; CHECK-NEXT: ret
				%y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
				%z = addrspacecast i8 addrspace(1)* %y to float addrspace(2)*
				ret float addrspace(2)* %z
				}

				define float addrspace(2)* @combine_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
				; CHECK-LABEL: @combine_addrspacecast_types(
				; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
				; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
				; CHECK-NEXT: ret
				%y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
				ret float addrspace(2)* %y
				}

				define <4 x float addrspace(2)> @combine_addrspacecast_types_vector(<4 x i32 addrspace(1)> %x) nounwind {
				; CHECK-LABEL: @combine_addrspacecast_types_vector(
				; CHECK-NEXT: bitcast <4 x i32 addrspace(1)> %x to <4 x float addrspace(1)>
				; CHECK-NEXT: addrspacecast <4 x float addrspace(1)> %1 to <4 x float addrspace(2)>
				; CHECK-NEXT: ret
				%y = addrspacecast <4 x i32 addrspace(1)> %x to <4 x float addrspace(2)>
				ret <4 x float addrspace(2)*> %y
				}

				define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) {
				; CHECK-LABEL: @canonicalize_addrspacecast(
				; CHECK-NEXT: getelementptr inbounds [16 x i32] addrspace(1)* %arr, i32 0, i32 0
				; CHECK-NEXT: addrspacecast i32 addrspace(1)* %{{[a-zA-Z0-9]+}} to i32*
				; CHECK-NEXT: load i32*
				; CHECK-NEXT: ret i32
				%p = addrspacecast [16 x i32] addrspace(1)* %arr to i32*
				%v = load i32* %p
				ret i32 %v
				}

	@const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,			@const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
	i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,			i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
	i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,			i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
	i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,			i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
	i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22 ]			i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22 ]

	declare void @foo(i8*) nounwind			declare void @foo(i8*) nounwind

	Show All 24 Lines

test/Transforms/InstCombine/getelementptr.ll

Show First 20 Lines • Show All 726 Lines • ▼ Show 20 Lines	; CHECK: bitcast
%x = load i64* %t		%x = load i64* %t
ret i64 %x		ret i64 %x
}		}

; gep should be done in the original address space.		; gep should be done in the original address space.
define i64 @test_gep_bitcast_array_same_size_element_addrspacecast([100 x double]* %arr, i64 %N) {		define i64 @test_gep_bitcast_array_same_size_element_addrspacecast([100 x double]* %arr, i64 %N) {
; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast(		; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast(
; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V		; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V
; CHECK-NEXT: %t = addrspacecast double*		; CHECK-NEXT: bitcast double*
		; CHECK-NEXT: %t = addrspacecast i64*
; CHECK: load i64 addrspace(3)* %t		; CHECK: load i64 addrspace(3)* %t
%cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)*		%cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)*
%V = mul i64 %N, 8		%V = mul i64 %N, 8
%t = getelementptr i64 addrspace(3)* %cast, i64 %V		%t = getelementptr i64 addrspace(3)* %cast, i64 %V
%x = load i64 addrspace(3)* %t		%x = load i64 addrspace(3)* %t
ret i64 %x		ret i64 %x
}		}

▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	define i16 @test41([3 x i32] addrspace(1)* %array) {
%gep2 = getelementptr i8 addrspace(1)* %gepi8, i16 %np		%gep2 = getelementptr i8 addrspace(1)* %gepi8, i16 %np
%ret = ptrtoint i8 addrspace(1)* %gep2 to i16		%ret = ptrtoint i8 addrspace(1)* %gep2 to i16
ret i16 %ret		ret i16 %ret

; CHECK-LABEL: @test41(		; CHECK-LABEL: @test41(
; CHECK-NEXT: ret i16 8		; CHECK-NEXT: ret i16 8
}		}

define i32 addrspace(1)* @ascast_0_gep([128 x i32]* %p) nounwind {		define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind {
; CHECK-LABEL: @ascast_0_gep(		; CHECK-LABEL: @ascast_0_gep(
; CHECK-NOT: getelementptr		; CHECK-NOT: getelementptr
; CHECK: ret		; CHECK: ret
		%gep = getelementptr i32* %p, i32 0
		%x = addrspacecast i32* %gep to i32 addrspace(1)*
		ret i32 addrspace(1)* %x
		}

		; Do not merge the GEP and the addrspacecast, because it would undo the
		; addrspacecast canonicalization.
		define i32 addrspace(1)* @ascast_0_0_gep([128 x i32]* %p) nounwind {
		; CHECK-LABEL: @ascast_0_0_gep(
		; CHECK-NEXT: getelementptr [128 x i32]
		; CHECK-NEXT: addrspacecast i32*
		; CHECK-NEXT: ret i32 addrspace(1)*
%gep = getelementptr [128 x i32]* %p, i32 0, i32 0		%gep = getelementptr [128 x i32]* %p, i32 0, i32 0
%x = addrspacecast i32* %gep to i32 addrspace(1)*		%x = addrspacecast i32* %gep to i32 addrspace(1)*
ret i32 addrspace(1)* %x		ret i32 addrspace(1)* %x
}		}

; CHECK: attributes [[NUW]] = { nounwind }		; CHECK: attributes [[NUW]] = { nounwind }

test/Transforms/InstCombine/memcpy-from-global.ll

Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	define void @test2_addrspacecast() {
%a = addrspacecast %T* %A to i8 addrspace(1)*		%a = addrspacecast %T* %A to i8 addrspace(1)*
%b = addrspacecast %T* %B to i8 addrspace(1)*		%b = addrspacecast %T* %B to i8 addrspace(1)*

; CHECK-LABEL: @test2_addrspacecast(		; CHECK-LABEL: @test2_addrspacecast(

; %A alloca is deleted		; %A alloca is deleted
; This doesn't exactly match what test2 does, because folding the type		; This doesn't exactly match what test2 does, because folding the type
; cast into the alloca doesn't work for the addrspacecast yet.		; cast into the alloca doesn't work for the addrspacecast yet.
; CHECK-NEXT: alloca %T		; CHECK-NEXT: alloca [124 x i8]
		; CHECK-NEXT: getelementptr
; CHECK-NEXT: addrspacecast		; CHECK-NEXT: addrspacecast

; use @G instead of %A		; use @G instead of %A
; CHECK-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %{{.*}},		; CHECK-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %{{.*}},
call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)		call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %b, i8 addrspace(1)* %a, i64 124, i32 4, i1 false)		call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %b, i8 addrspace(1)* %a, i64 124, i32 4, i1 false)
call void @bar_as1(i8 addrspace(1)* %b)		call void @bar_as1(i8 addrspace(1)* %b)
ret void		ret void
▲ Show 20 Lines • Show All 116 Lines • Show Last 20 Lines