This is an archive of the discontinued LLVM Phabricator instance.

Do not translate rint into nearbyint, but truncate it like nearbyint
ClosedPublic

Authored by joerg on Mar 31 2017, 6:52 AM.

Download Raw Diff

Details

Reviewers

scanon
arsenm

Summary

A common way to implement nearbyint is by fiddling with the floating point environment and calling rint. This is used at least by the BSD libm and musl. As such, canonicalizing the latter to the former will create infinite loops for libm and generally pessimize performance, at least when the generic C versions are used. This patch preserves the rint in the libcall translation and also handle the domain truncation logic, so that rint with float argument will be reduced to rintf etc.

Diff Detail

Repository: rL LLVM

Event Timeline

joerg created this revision.Mar 31 2017, 6:52 AM

Herald added a subscriber: wdng. · View Herald TranscriptMar 31 2017, 6:52 AM

LGTM.

This revision is now accepted and ready to land.Mar 31 2017, 6:56 AM

LGTM

If rint() is faster than nearbyint(), should we canonicalize nearbyint()->rint()?

In D31533#715623, @efriedma wrote:

If rint() is faster than nearbyint(), should we canonicalize nearbyint()->rint()?

nearbyint( ) is not allowed to raise inexact. rint( ) is. Replacing nearbyint( ) with rint( ) requires that we support the FENV_ACCESS pragma (and only do the transformation when FENV_ACCESS is off).

Also, this is relatively low-value, because on newer platforms there's no difference; e.g. SSE4.1 and armv8 have single instruction implementations for both.

r299247.

Revision Contents

Path

Size

lib/

Transforms/

InstCombine/

InstCombineCalls.cpp

1 line

Utils/

SimplifyLibCalls.cpp

3 lines

test/

Transforms/

InstCombine/

float-shrink-compare.ll

4 lines

Diff 93632

lib/Transforms/InstCombine/InstCombineCalls.cpp

Show First 20 Lines • Show All 2,085 Lines • ▼ Show 20 Lines	case Intrinsic::fabs: {
}		}

LLVM_FALLTHROUGH;		LLVM_FALLTHROUGH;
}		}
case Intrinsic::ceil:		case Intrinsic::ceil:
case Intrinsic::floor:		case Intrinsic::floor:
case Intrinsic::round:		case Intrinsic::round:
case Intrinsic::nearbyint:		case Intrinsic::nearbyint:
		case Intrinsic::rint:
case Intrinsic::trunc: {		case Intrinsic::trunc: {
Value *ExtSrc;		Value *ExtSrc;
if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&		if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
II->getArgOperand(0)->hasOneUse()) {		II->getArgOperand(0)->hasOneUse()) {
// fabs (fpext x) -> fpext (fabs x)		// fabs (fpext x) -> fpext (fabs x)
Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(),		Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(),
{ ExtSrc->getType() });		{ ExtSrc->getType() });
CallInst *NewFabs = Builder->CreateCall(F, ExtSrc);		CallInst *NewFabs = Builder->CreateCall(F, ExtSrc);
▲ Show 20 Lines • Show All 2,248 Lines • Show Last 20 Lines

lib/Transforms/Utils/SimplifyLibCalls.cpp

Show First 20 Lines • Show All 2,154 Lines • ▼ Show 20 Lines	case LibFunc_fputc:
return optimizeErrorReporting(CI, Builder, 1);		return optimizeErrorReporting(CI, Builder, 1);
case LibFunc_ceil:		case LibFunc_ceil:
return replaceUnaryCall(CI, Builder, Intrinsic::ceil);		return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
case LibFunc_floor:		case LibFunc_floor:
return replaceUnaryCall(CI, Builder, Intrinsic::floor);		return replaceUnaryCall(CI, Builder, Intrinsic::floor);
case LibFunc_round:		case LibFunc_round:
return replaceUnaryCall(CI, Builder, Intrinsic::round);		return replaceUnaryCall(CI, Builder, Intrinsic::round);
case LibFunc_nearbyint:		case LibFunc_nearbyint:
case LibFunc_rint:
return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);		return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
		case LibFunc_rint:
		return replaceUnaryCall(CI, Builder, Intrinsic::rint);
case LibFunc_trunc:		case LibFunc_trunc:
return replaceUnaryCall(CI, Builder, Intrinsic::trunc);		return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
case LibFunc_acos:		case LibFunc_acos:
case LibFunc_acosh:		case LibFunc_acosh:
case LibFunc_asin:		case LibFunc_asin:
case LibFunc_asinh:		case LibFunc_asinh:
case LibFunc_atan:		case LibFunc_atan:
case LibFunc_atanh:		case LibFunc_atanh:
▲ Show 20 Lines • Show All 241 Lines • Show Last 20 Lines

test/Transforms/InstCombine/float-shrink-compare.ll

	Show First 20 Lines • Show All 113 Lines • ▼ Show 20 Lines
	define i32 @test5(float %x, float %y) nounwind uwtable {			define i32 @test5(float %x, float %y) nounwind uwtable {
	%x.ext = fpext float %x to double			%x.ext = fpext float %x to double
	%rint = call double @rint(double %x.ext) nounwind			%rint = call double @rint(double %x.ext) nounwind
	%y.ext = fpext float %y to double			%y.ext = fpext float %y to double
	%cmp = fcmp oeq double %rint, %y.ext			%cmp = fcmp oeq double %rint, %y.ext
	%cmp.ext = zext i1 %cmp to i32			%cmp.ext = zext i1 %cmp to i32
	ret i32 %cmp.ext			ret i32 %cmp.ext
	; CHECK-LABEL: @test5(			; CHECK-LABEL: @test5(
	; CHECK-NEXT: %rint = call float @llvm.nearbyint.f32(float %x)			; CHECK-NEXT: %rint = call float @llvm.rint.f32(float %x)
	; CHECK-NEXT: fcmp oeq float %rint, %y			; CHECK-NEXT: fcmp oeq float %rint, %y
	}			}

	define i32 @test6(float %x, float %y) nounwind uwtable {			define i32 @test6(float %x, float %y) nounwind uwtable {
	%x.ext = fpext float %x to double			%x.ext = fpext float %x to double
	%round = call double @round(double %x.ext) nounwind readnone			%round = call double @round(double %x.ext) nounwind readnone
	%y.ext = fpext float %y to double			%y.ext = fpext float %y to double
	%cmp = fcmp oeq double %round, %y.ext			%cmp = fcmp oeq double %round, %y.ext
	▲ Show 20 Lines • Show All 140 Lines • ▼ Show 20 Lines
	define i32 @test12(float %x, float %y) nounwind uwtable {			define i32 @test12(float %x, float %y) nounwind uwtable {
	%x.ext = fpext float %x to double			%x.ext = fpext float %x to double
	%y.ext = fpext float %y to double			%y.ext = fpext float %y to double
	%rint = call double @rint(double %x.ext) nounwind			%rint = call double @rint(double %x.ext) nounwind
	%cmp = fcmp oeq double %y.ext, %rint			%cmp = fcmp oeq double %y.ext, %rint
	%cmp.ext = zext i1 %cmp to i32			%cmp.ext = zext i1 %cmp to i32
	ret i32 %cmp.ext			ret i32 %cmp.ext
	; CHECK-LABEL: @test12(			; CHECK-LABEL: @test12(
	; CHECK-NEXT: %rint = call float @llvm.nearbyint.f32(float %x)			; CHECK-NEXT: %rint = call float @llvm.rint.f32(float %x)
	; CHECK-NEXT: fcmp oeq float %rint, %y			; CHECK-NEXT: fcmp oeq float %rint, %y
	}			}

	define i32 @test13(float %x, float %y) nounwind uwtable {			define i32 @test13(float %x, float %y) nounwind uwtable {
	%x.ext = fpext float %x to double			%x.ext = fpext float %x to double
	%y.ext = fpext float %y to double			%y.ext = fpext float %y to double
	%round = call double @round(double %x.ext) nounwind readnone			%round = call double @round(double %x.ext) nounwind readnone
	%cmp = fcmp oeq double %y.ext, %round			%cmp = fcmp oeq double %y.ext, %round
	▲ Show 20 Lines • Show All 149 Lines • Show Last 20 Lines