Diff 441228

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Show First 20 Lines • Show All 1,714 Lines • ▼ Show 20 Lines	static Type getMinimumFPType(Value V) {
if (Type *T = shrinkFPConstantVector(V))		if (Type *T = shrinkFPConstantVector(V))
return T;		return T;

return V->getType();		return V->getType();
}		}

/// Return true if the cast from integer to FP can be proven to be exact for all		/// Return true if the cast from integer to FP can be proven to be exact for all
/// possible inputs (the conversion does not lose any precision).		/// possible inputs (the conversion does not lose any precision).
static bool isKnownExactCastIntToFP(CastInst &I) {		static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
CastInst::CastOps Opcode = I.getOpcode();		CastInst::CastOps Opcode = I.getOpcode();
assert((Opcode == CastInst::SIToFP \|\| Opcode == CastInst::UIToFP) &&		assert((Opcode == CastInst::SIToFP \|\| Opcode == CastInst::UIToFP) &&
"Unexpected cast");		"Unexpected cast");
Value *Src = I.getOperand(0);		Value *Src = I.getOperand(0);
Type *SrcTy = Src->getType();		Type *SrcTy = Src->getType();
Type *FPTy = I.getType();		Type *FPTy = I.getType();
bool IsSigned = Opcode == Instruction::SIToFP;		bool IsSigned = Opcode == Instruction::SIToFP;
int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;		int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
Show All 18 Lines	if (match(Src, m_FPToSI(m_Value(F))) \|\| match(Src, m_FPToUI(m_Value(F)))) {
// significant bits than the destination (and make sure neither type is		// significant bits than the destination (and make sure neither type is
// weird -- ppc_fp128).		// weird -- ppc_fp128).
if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&		if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
SrcNumSigBits <= DestNumSigBits)		SrcNumSigBits <= DestNumSigBits)
return true;		return true;
}		}

// TODO:		// TODO:
// Try harder to find if the source integer type has less significant bits.		// Try harder to find if the source integer type has less significant bits.
		spatelUnsubmitted Done Reply Inline Actions Please leave the TODO note. We could still check sign bits rather than zeros or do some other kind of refinement. spatel: Please leave the TODO note. We could still check sign bits rather than zeros or do some other…
// For example, compute number of sign bits or compute low bit mask.		// For example, compute number of sign bits or compute low bit mask.
		KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
		nikicUnsubmitted Done Reply Inline Actions Can use `IC.computeKnownBits(Src, 0, &I)` to also make use of AC/DT, I believe. nikic: Can use `IC.computeKnownBits(Src, 0, &I)` to also make use of AC/DT, I believe.
		int LowBits =
		(int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros();
		if (LowBits <= DestNumSigBits)
		return true;

return false;		return false;
}		}

Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {		Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
if (Instruction *I = commonCastTransforms(FPT))		if (Instruction *I = commonCastTransforms(FPT))
return I;		return I;

// If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to		// If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
▲ Show 20 Lines • Show All 164 Lines • ▼ Show 20 Lines	Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
}		}

if (Instruction *I = shrinkInsertElt(FPT, Builder))		if (Instruction *I = shrinkInsertElt(FPT, Builder))
return I;		return I;

Value *Src = FPT.getOperand(0);		Value *Src = FPT.getOperand(0);
if (isa<SIToFPInst>(Src) \|\| isa<UIToFPInst>(Src)) {		if (isa<SIToFPInst>(Src) \|\| isa<UIToFPInst>(Src)) {
auto *FPCast = cast<CastInst>(Src);		auto *FPCast = cast<CastInst>(Src);
if (isKnownExactCastIntToFP(*FPCast))		if (isKnownExactCastIntToFP(FPCast, this))
		spatelUnsubmitted Done Reply Inline Actions We should have at least one test providing coverage for this path (and fpext too?). IIUC, this patch will improve a case like this: define half @masked_int_to_fp_trunc(i32 %A) { %m = and i32 %A, 16777215 %B = sitofp i32 %m to float %C = fptrunc float %B to half ret half %C } But we don't actually need a mask: https://alive2.llvm.org/ce/z/iaEX2i ...so the fold conditions still aren't quite right. spatel: We should have at least one test providing coverage for this path (and fpext too?). IIUC…
		AllenAuthorUnsubmitted Done Reply Inline Actions hi @spatel, I think the case in your link https://alive2.llvm.org/ce/z/iaEX2i is related to another top, maybe make use of the fptrunc? In this patch, we hope fold the case %m = and i25 %A, smallMaskValue %B = uitofp i25 %m to float %C = fptoui float %B to i25 base on the bt we can see, it has both FPToUI and FPtoI. #0 isKnownExactCastIntToFP (I=..., IC=...) at /home/zhongyunde/llvm-project-init-dev_12x/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp:1762 #1 0x0000aaaaae7ee18c in llvm::InstCombinerImpl::foldItoFPtoI (this=0xffffffffbce0, FI=...) at /home/zhongyunde/llvm-project-init-dev_12x/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp:1986 #2 0x0000aaaaae7ee398 in llvm::InstCombinerImpl::visitFPToUI (this=0xffffffffbce0, FI=...) at /home/zhongyunde/llvm-project-init-dev_12x/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp:2011 Allen: hi @spatel, I think the case in your link https://alive2.llvm.org/ce/z/iaEX2i is related to…
		spatelUnsubmitted Done Reply Inline Actions I understand that we are not optimizing ideally for this pattern, but that's not my point. These kinds of transforms are hard to get right (see for example D124692). This patch affects 3 caller code paths, so I'd like to have tests that provide some coverage for those paths. Please rebase with test diffs after: a5040860412f spatel: I understand that we are not optimizing ideally for this pattern, but that's not my point.
		AllenAuthorUnsubmitted Done Reply Inline Actions Done, thanks! Allen: Done, thanks!
return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);		return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
}		}

return nullptr;		return nullptr;
}		}

Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {		Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
// If the source operand is a cast from integer to FP and known exact, then		// If the source operand is a cast from integer to FP and known exact, then
// cast the integer operand directly to the destination type.		// cast the integer operand directly to the destination type.
Type *Ty = FPExt.getType();		Type *Ty = FPExt.getType();
Value *Src = FPExt.getOperand(0);		Value *Src = FPExt.getOperand(0);
if (isa<SIToFPInst>(Src) \|\| isa<UIToFPInst>(Src)) {		if (isa<SIToFPInst>(Src) \|\| isa<UIToFPInst>(Src)) {
auto *FPCast = cast<CastInst>(Src);		auto *FPCast = cast<CastInst>(Src);
if (isKnownExactCastIntToFP(*FPCast))		if (isKnownExactCastIntToFP(FPCast, this))
return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);		return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
}		}

return commonCastTransforms(FPExt);		return commonCastTransforms(FPExt);
}		}

/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)		/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
/// This is safe if the intermediate type has enough bits in its mantissa to		/// This is safe if the intermediate type has enough bits in its mantissa to
Show All 10 Lines	Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
bool IsOutputSigned = isa<FPToSIInst>(FI);		bool IsOutputSigned = isa<FPToSIInst>(FI);

// Since we can assume the conversion won't overflow, our decision as to		// Since we can assume the conversion won't overflow, our decision as to
// whether the input will fit in the float should depend on the minimum		// whether the input will fit in the float should depend on the minimum
// of the input range and output range.		// of the input range and output range.

// This means this is also safe for a signed input and unsigned output, since		// This means this is also safe for a signed input and unsigned output, since
// a negative input would lead to undefined behavior.		// a negative input would lead to undefined behavior.
if (!isKnownExactCastIntToFP(*OpI)) {		if (!isKnownExactCastIntToFP(OpI, this)) {
// The first cast may not round exactly based on the source integer width		// The first cast may not round exactly based on the source integer width
// and FP width, but the overflow UB rules can still allow this to fold.		// and FP width, but the overflow UB rules can still allow this to fold.
// If the destination type is narrow, that means the intermediate FP value		// If the destination type is narrow, that means the intermediate FP value
// must be large enough to hold the source value exactly.		// must be large enough to hold the source value exactly.
// For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.		// For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
int OutputSize = (int)DestType->getScalarSizeInBits();		int OutputSize = (int)DestType->getScalarSizeInBits();
if (OutputSize > OpI->getType()->getFPMantissaWidth())		if (OutputSize > OpI->getType()->getFPMantissaWidth())
return nullptr;		return nullptr;
▲ Show 20 Lines • Show All 926 Lines • Show Last 20 Lines

llvm/test/Transforms/InstCombine/fpcast.ll

Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines	;
%f = sitofp i32 %x to float		%f = sitofp i32 %x to float
%r = fptrunc float %f to half		%r = fptrunc float %f to half
ret half %r		ret half %r
}		}

define half @masked_sint_to_fptrunc1(i32 %x) {		define half @masked_sint_to_fptrunc1(i32 %x) {
; CHECK-LABEL: @masked_sint_to_fptrunc1(		; CHECK-LABEL: @masked_sint_to_fptrunc1(
; CHECK-NEXT: [[M:%.]] = and i32 [[X:%.]], 16777215		; CHECK-NEXT: [[M:%.]] = and i32 [[X:%.]], 16777215
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float		; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to half
; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half
; CHECK-NEXT: ret half [[R]]		; CHECK-NEXT: ret half [[R]]
;		;
%m = and i32 %x, 16777215		%m = and i32 %x, 16777215
%f = sitofp i32 %m to float		%f = sitofp i32 %m to float
%r = fptrunc float %f to half		%r = fptrunc float %f to half
ret half %r		ret half %r
}		}

define half @masked_sint_to_fptrunc2(i32 %x) {		define half @masked_sint_to_fptrunc2(i32 %x) {
; CHECK-LABEL: @masked_sint_to_fptrunc2(		; CHECK-LABEL: @masked_sint_to_fptrunc2(
; CHECK-NEXT: [[M:%.]] = lshr i32 [[X:%.]], 8		; CHECK-NEXT: [[M:%.]] = lshr i32 [[X:%.]], 8
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float		; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to half
; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half
; CHECK-NEXT: ret half [[R]]		; CHECK-NEXT: ret half [[R]]
;		;
%m = lshr i32 %x, 8		%m = lshr i32 %x, 8
%f = sitofp i32 %m to float		%f = sitofp i32 %m to float
%r = fptrunc float %f to half		%r = fptrunc float %f to half
ret half %r		ret half %r
}		}

Show All 19 Lines	;
%f = sitofp i32 %x to float		%f = sitofp i32 %x to float
%r = fpext float %f to double		%r = fpext float %f to double
ret double %r		ret double %r
}		}

define double @masked_sint_to_fpext1(i32 %x) {		define double @masked_sint_to_fpext1(i32 %x) {
; CHECK-LABEL: @masked_sint_to_fpext1(		; CHECK-LABEL: @masked_sint_to_fpext1(
; CHECK-NEXT: [[M:%.]] = and i32 [[X:%.]], 16777215		; CHECK-NEXT: [[M:%.]] = and i32 [[X:%.]], 16777215
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float		; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to double
; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double
; CHECK-NEXT: ret double [[R]]		; CHECK-NEXT: ret double [[R]]
;		;
%m = and i32 %x, 16777215		%m = and i32 %x, 16777215
%f = sitofp i32 %m to float		%f = sitofp i32 %m to float
%r = fpext float %f to double		%r = fpext float %f to double
ret double %r		ret double %r
}		}

define double @masked_sint_to_fpext2(i32 %x) {		define double @masked_sint_to_fpext2(i32 %x) {
; CHECK-LABEL: @masked_sint_to_fpext2(		; CHECK-LABEL: @masked_sint_to_fpext2(
; CHECK-NEXT: [[M:%.]] = lshr i32 [[X:%.]], 8		; CHECK-NEXT: [[M:%.]] = lshr i32 [[X:%.]], 8
; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float		; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to double
; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double
; CHECK-NEXT: ret double [[R]]		; CHECK-NEXT: ret double [[R]]
;		;
%m = lshr i32 %x, 8		%m = lshr i32 %x, 8
%f = sitofp i32 %m to float		%f = sitofp i32 %m to float
%r = fpext float %f to double		%r = fpext float %f to double
ret double %r		ret double %r
}		}

Show All 19 Lines	;
%f = uitofp i32 %x to float		%f = uitofp i32 %x to float
%r = fptrunc float %f to half		%r = fptrunc float %f to half
ret half %r		ret half %r
}		}

define half @masked_uint_to_fptrunc1(i32 %x) {		define half @masked_uint_to_fptrunc1(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fptrunc1(		; CHECK-LABEL: @masked_uint_to_fptrunc1(
; CHECK-NEXT: [[M:%.]] = and i32 [[X:%.]], 16777215		; CHECK-NEXT: [[M:%.]] = and i32 [[X:%.]], 16777215
; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float		; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to half
; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half
; CHECK-NEXT: ret half [[R]]		; CHECK-NEXT: ret half [[R]]
;		;
%m = and i32 %x, 16777215		%m = and i32 %x, 16777215
%f = uitofp i32 %m to float		%f = uitofp i32 %m to float
%r = fptrunc float %f to half		%r = fptrunc float %f to half
ret half %r		ret half %r
}		}

define half @masked_uint_to_fptrunc2(i32 %x) {		define half @masked_uint_to_fptrunc2(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fptrunc2(		; CHECK-LABEL: @masked_uint_to_fptrunc2(
; CHECK-NEXT: [[M:%.]] = lshr i32 [[X:%.]], 8		; CHECK-NEXT: [[M:%.]] = lshr i32 [[X:%.]], 8
; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float		; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to half
; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half
; CHECK-NEXT: ret half [[R]]		; CHECK-NEXT: ret half [[R]]
;		;
%m = lshr i32 %x, 8		%m = lshr i32 %x, 8
%f = uitofp i32 %m to float		%f = uitofp i32 %m to float
%r = fptrunc float %f to half		%r = fptrunc float %f to half
ret half %r		ret half %r
}		}

Show All 19 Lines	;
%f = uitofp i32 %x to float		%f = uitofp i32 %x to float
%r = fpext float %f to double		%r = fpext float %f to double
ret double %r		ret double %r
}		}

define double @masked_uint_to_fpext1(i32 %x) {		define double @masked_uint_to_fpext1(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fpext1(		; CHECK-LABEL: @masked_uint_to_fpext1(
; CHECK-NEXT: [[M:%.]] = and i32 [[X:%.]], 16777215		; CHECK-NEXT: [[M:%.]] = and i32 [[X:%.]], 16777215
; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float		; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to double
; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double
; CHECK-NEXT: ret double [[R]]		; CHECK-NEXT: ret double [[R]]
;		;
%m = and i32 %x, 16777215		%m = and i32 %x, 16777215
%f = uitofp i32 %m to float		%f = uitofp i32 %m to float
%r = fpext float %f to double		%r = fpext float %f to double
ret double %r		ret double %r
}		}

define double @masked_uint_to_fpext2(i32 %x) {		define double @masked_uint_to_fpext2(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fpext2(		; CHECK-LABEL: @masked_uint_to_fpext2(
; CHECK-NEXT: [[M:%.]] = lshr i32 [[X:%.]], 8		; CHECK-NEXT: [[M:%.]] = lshr i32 [[X:%.]], 8
; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float		; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to double
; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double
; CHECK-NEXT: ret double [[R]]		; CHECK-NEXT: ret double [[R]]
;		;
%m = lshr i32 %x, 8		%m = lshr i32 %x, 8
%f = uitofp i32 %m to float		%f = uitofp i32 %m to float
%r = fpext float %f to double		%r = fpext float %f to double
ret double %r		ret double %r
}		}

Show All 12 Lines

llvm/test/Transforms/InstCombine/sitofp.ll

	Show First 20 Lines • Show All 212 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: [[C:%.*]] = fptosi double [[B]] to i55			; CHECK-NEXT: [[C:%.*]] = fptosi double [[B]] to i55
	; CHECK-NEXT: ret i55 [[C]]			; CHECK-NEXT: ret i55 [[C]]
	;			;
	%B = sitofp i64 %A to double			%B = sitofp i64 %A to double
	%C = fptosi double %B to i55			%C = fptosi double %B to i55
	ret i55 %C			ret i55 %C
	}			}

	; TODO: The mask guarantees that the input is small enough to eliminate the FP casts.			; The mask guarantees that the input is small enough to eliminate the FP casts.

	define i25 @masked_input(i25 %A) {			define i25 @masked_input(i25 %A) {
	; CHECK-LABEL: @masked_input(			; CHECK-LABEL: @masked_input(
	; CHECK-NEXT: [[M:%.]] = and i25 [[A:%.]], 65535			; CHECK-NEXT: [[M:%.]] = and i25 [[A:%.]], 65535
				; CHECK-NEXT: ret i25 [[M]]
				;
				%m = and i25 %A, 65535
				%B = uitofp i25 %m to float
				%C = fptoui float %B to i25
				ret i25 %C
				}

				define i25 @max_masked_input(i25 %A) {
				; CHECK-LABEL: @max_masked_input(
				; CHECK-NEXT: [[M:%.]] = and i25 [[A:%.]], 16777215
				; CHECK-NEXT: ret i25 [[M]]
				;
				%m = and i25 %A, 16777215 ; max intermediate 16777215 (= 1 << 24)-1
				%B = uitofp i25 %m to float
				%C = fptoui float %B to i25
				ret i25 %C
				}

				define i25 @overflow_masked_input(i25 %A) {
				nikicUnsubmitted Done Reply Inline Actions According to alive, this mask would be fine: https://alive2.llvm.org/ce/z/PdpF6F This is because this is a mask of the form `0b1000...` rather than `0b1111...`. The right mask to test would be: https://alive2.llvm.org/ce/z/GYxTRu (We could also make use of these low zero bits, but I think that would be better as a separate change.) nikic: According to alive, this mask would be fine: https://alive2.llvm.org/ce/z/PdpF6F This is…
				AllenAuthorUnsubmitted Done Reply Inline Actions good catch. Thanks @nikic for the detailed explanations, I'll address this after this change. Allen: good catch. Thanks @nikic for the detailed explanations, I'll address this after this change.
				; CHECK-LABEL: @overflow_masked_input(
				; CHECK-NEXT: [[M:%.]] = and i25 [[A:%.]], -16777216
	; CHECK-NEXT: [[B:%.*]] = uitofp i25 [[M]] to float			; CHECK-NEXT: [[B:%.*]] = uitofp i25 [[M]] to float
	; CHECK-NEXT: [[C:%.*]] = fptoui float [[B]] to i25			; CHECK-NEXT: [[C:%.*]] = fptoui float [[B]] to i25
	; CHECK-NEXT: ret i25 [[C]]			; CHECK-NEXT: ret i25 [[C]]
	;			;
	%m = and i25 %A, 65535			%m = and i25 %A, 16777216 ; Negative test - intermediate 16777216 (= 1 << 24)
	%B = uitofp i25 %m to float			%B = uitofp i25 %m to float
	%C = fptoui float %B to i25			%C = fptoui float %B to i25
	ret i25 %C			ret i25 %C
	}			}

	; TODO: Clear the low bit - guarantees that the input is converted to FP without rounding.			; TODO: Clear the low bit - guarantees that the input is converted to FP without rounding.

	define i25 @low_masked_input(i25 %A) {			define i25 @low_masked_input(i25 %A) {
	▲ Show 20 Lines • Show All 111 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Use known bits to determine exact int->fp cast
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 441228

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

llvm/test/Transforms/InstCombine/fpcast.ll

llvm/test/Transforms/InstCombine/sitofp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Use known bits to determine exact int->fp castClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 441228

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

llvm/test/Transforms/InstCombine/fpcast.ll

llvm/test/Transforms/InstCombine/sitofp.ll

[InstCombine] Use known bits to determine exact int->fp cast
ClosedPublic