This is an archive of the discontinued LLVM Phabricator instance.

Show First 20 Lines • Show All 963 Lines • ▼ Show 20 Lines	if (abs_opr1 <= 12) {
}		}
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "		LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
<< ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0		<< ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
<< ")\n");		<< ")\n");
replaceCall(FPOp, nval);		replaceCall(FPOp, nval);
return true;		return true;
}		}

		// If we should use the generic intrinsic instead of emitting a libcall
		const bool ShouldUseIntrinsic = eltType->isFloatTy() \|\| eltType->isHalfTy();

// powr ---> exp2(y * log2(x))		// powr ---> exp2(y * log2(x))
// pown/pow ---> powr(fabs(x), y) \| (x & ((int)y << 31))		// pown/pow ---> powr(fabs(x), y) \| (x & ((int)y << 31))
FunctionCallee ExpExpr =		FunctionCallee ExpExpr;
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));		if (ShouldUseIntrinsic)
		ExpExpr = Intrinsic::getDeclaration(M, Intrinsic::exp2, {FPOp->getType()});
		else {
		ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
if (!ExpExpr)		if (!ExpExpr)
return false;		return false;
		}

bool needlog = false;		bool needlog = false;
bool needabs = false;		bool needabs = false;
bool needcopysign = false;		bool needcopysign = false;
Constant *cnval = nullptr;		Constant *cnval = nullptr;
if (getVecSize(FInfo) == 1) {		if (getVecSize(FInfo) == 1) {
CF = nullptr;		CF = nullptr;
match(opr0, m_APFloatAllowUndef(CF));		match(opr0, m_APFloatAllowUndef(CF));
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines	bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,

Value *nval;		Value *nval;
if (needabs) {		if (needabs) {
nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs");		nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs");
} else {		} else {
nval = cnval ? cnval : opr0;		nval = cnval ? cnval : opr0;
}		}
if (needlog) {		if (needlog) {
FunctionCallee LogExpr =		FunctionCallee LogExpr;
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));		if (ShouldUseIntrinsic) {
		LogExpr =
		Intrinsic::getDeclaration(M, Intrinsic::log2, {FPOp->getType()});
		} else {
		LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
if (!LogExpr)		if (!LogExpr)
return false;		return false;
		}

nval = CreateCallEx(B,LogExpr, nval, "__log2");		nval = CreateCallEx(B,LogExpr, nval, "__log2");
}		}

if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {		if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
// convert int(32) to fp(f32 or f64)		// convert int(32) to fp(f32 or f64)
opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");		opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
}		}
nval = B.CreateFMul(opr1, nval, "__ylogx");		nval = B.CreateFMul(opr1, nval, "__ylogx");
▲ Show 20 Lines • Show All 619 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	; CHECK-NEXT: s_setpc_b64 s[16:17]
%pow = tail call fast double @_Z3powdd(double %x, double %y)		%pow = tail call fast double @_Z3powdd(double %x, double %y)
ret double %pow		ret double %pow
}		}

define half @test_pow_fast_f16__integral_y(half %x, i32 %y.i) {		define half @test_pow_fast_f16__integral_y(half %x, i32 %y.i) {
; CHECK-LABEL: test_pow_fast_f16__integral_y:		; CHECK-LABEL: test_pow_fast_f16__integral_y:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_log_f16_e64 v2, \|v0\|
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill		; CHECK-NEXT: v_mul_f16_e32 v2, v2, v1
; CHECK-NEXT: s_mov_b64 exec, s[18:19]		; CHECK-NEXT: v_exp_f16_e32 v2, v2
; CHECK-NEXT: v_writelane_b32 v40, s16, 14		; CHECK-NEXT: v_cvt_i16_f16_e32 v1, v1
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2Dh@gotpcrel32@hi+12
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v42, v0
; CHECK-NEXT: v_cvt_f32_i32_e32 v0, v1
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_cvt_f16_f32_e32 v43, v0
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v42
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_mul_f16_e32 v0, v0, v43
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_i16_f16_e32 v1, v43
; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1		; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1
; CHECK-NEXT: v_and_b32_e32 v1, v1, v42		; CHECK-NEXT: v_and_b32_e32 v0, v1, v0
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload		; CHECK-NEXT: v_or_b32_e32 v0, v0, v2
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_or_b32_e32 v0, v1, v0
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xf800
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%y = sitofp i32 %y.i to half		%y = sitofp i32 %y.i to half
%pow = tail call fast half @_Z3powDhDh(half %x, half %y)		%pow = tail call fast half @_Z3powDhDh(half %x, half %y)
ret half %pow		ret half %pow
}		}

define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {		define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
; CHECK-LABEL: test_pow_fast_f32__integral_y:		; CHECK-LABEL: test_pow_fast_f32__integral_y:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: s_mov_b32 s4, 0x800000
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, \|v0\|, s4
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill		; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
; CHECK-NEXT: s_mov_b64 exec, s[18:19]		; CHECK-NEXT: v_mul_f32_e64 v3, \|v0\|, v3
; CHECK-NEXT: v_writelane_b32 v40, s16, 14		; CHECK-NEXT: v_log_f32_e32 v3, v3
; CHECK-NEXT: v_writelane_b32 v40, s30, 0		; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
; CHECK-NEXT: v_writelane_b32 v40, s31, 1		; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
; CHECK-NEXT: v_writelane_b32 v40, s34, 2		; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; CHECK-NEXT: v_writelane_b32 v40, s35, 3		; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2
; CHECK-NEXT: v_writelane_b32 v40, s36, 4		; CHECK-NEXT: v_mul_f32_e32 v3, v2, v1
; CHECK-NEXT: v_writelane_b32 v40, s37, 5		; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
; CHECK-NEXT: v_writelane_b32 v40, s38, 6		; CHECK-NEXT: v_mov_b32_e32 v4, 0x42800000
; CHECK-NEXT: v_writelane_b32 v40, s39, 7		; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
; CHECK-NEXT: s_addk_i32 s32, 0x800		; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
; CHECK-NEXT: v_writelane_b32 v40, s40, 8		; CHECK-NEXT: v_fma_f32 v2, v2, v1, v3
; CHECK-NEXT: v_writelane_b32 v40, s41, 9		; CHECK-NEXT: v_exp_f32_e32 v2, v2
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]		; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1
; CHECK-NEXT: s_getpc_b64 s[4:5]		; CHECK-NEXT: v_mov_b32_e32 v3, 0x1f800000
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2f@gotpcrel32@lo+4		; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2f@gotpcrel32@hi+12		; CHECK-NEXT: v_mul_f32_e32 v2, v2, v3
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_mov_b32_e32 v42, v0
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffffff, v42
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_cvt_f32_i32_e32 v43, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2f@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2f@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v43
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v43
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1		; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1
; CHECK-NEXT: v_and_or_b32 v0, v1, v42, v0		; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xf800
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%y = sitofp i32 %y.i to float		%y = sitofp i32 %y.i to float
%pow = tail call fast float @_Z3powff(float %x, float %y)		%pow = tail call fast float @_Z3powff(float %x, float %y)
ret float %pow		ret float %pow
}		}

define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {		define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-LABEL: test_pow_fast_f64__integral_y:		; CHECK-LABEL: test_pow_fast_f64__integral_y:
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines
; --------------------------------------------------------------------		; --------------------------------------------------------------------
; test powr		; test powr
; --------------------------------------------------------------------		; --------------------------------------------------------------------

define half @test_powr_fast_f16(half %x, half %y) {		define half @test_powr_fast_f16(half %x, half %y) {
; CHECK-LABEL: test_powr_fast_f16:		; CHECK-LABEL: test_powr_fast_f16:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: v_log_f16_e32 v0, v0
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_mul_f16_e32 v0, v0, v1
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_exp_f16_e32 v0, v0
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
; CHECK-NEXT: v_writelane_b32 v40, s16, 14
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x400
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_mov_b32_e32 v42, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_mul_f16_e32 v0, v0, v42
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%powr = tail call fast half @_Z4powrDhDh(half %x, half %y)		%powr = tail call fast half @_Z4powrDhDh(half %x, half %y)
ret half %powr		ret half %powr
}		}

define float @test_powr_fast_f32(float %x, float %y) {		define float @test_powr_fast_f32(float %x, float %y) {
; CHECK-LABEL: test_powr_fast_f32:		; CHECK-LABEL: test_powr_fast_f32:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: s_mov_b32 s4, 0x800000
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill		; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
; CHECK-NEXT: s_mov_b64 exec, s[18:19]		; CHECK-NEXT: v_mul_f32_e32 v0, v0, v3
; CHECK-NEXT: v_writelane_b32 v40, s16, 14		; CHECK-NEXT: v_log_f32_e32 v0, v0
; CHECK-NEXT: v_writelane_b32 v40, s30, 0		; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
; CHECK-NEXT: v_writelane_b32 v40, s31, 1		; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; CHECK-NEXT: v_writelane_b32 v40, s34, 2		; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
; CHECK-NEXT: v_writelane_b32 v40, s35, 3		; CHECK-NEXT: v_sub_f32_e32 v0, v0, v2
; CHECK-NEXT: v_writelane_b32 v40, s36, 4		; CHECK-NEXT: v_mul_f32_e32 v2, v0, v1
; CHECK-NEXT: v_writelane_b32 v40, s37, 5		; CHECK-NEXT: v_mov_b32_e32 v3, 0x42800000
; CHECK-NEXT: v_writelane_b32 v40, s38, 6		; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
; CHECK-NEXT: v_writelane_b32 v40, s39, 7		; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
; CHECK-NEXT: s_addk_i32 s32, 0x400		; CHECK-NEXT: v_fma_f32 v0, v0, v1, v2
; CHECK-NEXT: v_writelane_b32 v40, s40, 8		; CHECK-NEXT: v_exp_f32_e32 v0, v0
; CHECK-NEXT: v_writelane_b32 v40, s41, 9		; CHECK-NEXT: v_mov_b32_e32 v1, 0x1f800000
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]		; CHECK-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
; CHECK-NEXT: s_getpc_b64 s[4:5]		; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2f@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2f@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_mov_b32_e32 v42, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2f@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2f@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v42
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%powr = tail call fast float @_Z4powrff(float %x, float %y)		%powr = tail call fast float @_Z4powrff(float %x, float %y)
ret float %powr		ret float %powr
}		}

define double @test_powr_fast_f64(double %x, double %y) {		define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-LABEL: test_powr_fast_f64:		; CHECK-LABEL: test_powr_fast_f64:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines
; --------------------------------------------------------------------		; --------------------------------------------------------------------
; test pown		; test pown
; --------------------------------------------------------------------		; --------------------------------------------------------------------

define half @test_pown_fast_f16(half %x, i32 %y) {		define half @test_pown_fast_f16(half %x, i32 %y) {
; CHECK-LABEL: test_pown_fast_f16:		; CHECK-LABEL: test_pown_fast_f16:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: v_cvt_f32_i32_e32 v2, v1
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_log_f16_e64 v3, \|v0\|
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill		; CHECK-NEXT: v_and_b32_e32 v0, v1, v0
; CHECK-NEXT: s_mov_b64 exec, s[18:19]		; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2
; CHECK-NEXT: v_writelane_b32 v40, s16, 14		; CHECK-NEXT: v_mul_f16_e32 v2, v3, v2
; CHECK-NEXT: v_writelane_b32 v40, s30, 0		; CHECK-NEXT: v_exp_f16_e32 v2, v2
; CHECK-NEXT: v_writelane_b32 v40, s31, 1		; CHECK-NEXT: v_or_b32_e32 v0, v0, v2
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_mov_b32_e32 v43, v0
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v43
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_mov_b32_e32 v42, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v42
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: v_mul_f16_e32 v0, v0, v1
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v42
; CHECK-NEXT: v_and_b32_e32 v1, v1, v43
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_or_b32_e32 v0, v1, v0
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xf800
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%call = tail call fast half @_Z4pownDhi(half %x, i32 %y)		%call = tail call fast half @_Z4pownDhi(half %x, i32 %y)
ret half %call		ret half %call
}		}

define float @test_pown_fast_f32(float %x, i32 %y) {		define float @test_pown_fast_f32(float %x, i32 %y) {
; CHECK-LABEL: test_pown_fast_f32:		; CHECK-LABEL: test_pown_fast_f32:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: s_mov_b32 s4, 0x800000
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, \|v0\|, s4
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill		; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
; CHECK-NEXT: s_mov_b64 exec, s[18:19]		; CHECK-NEXT: v_mul_f32_e64 v3, \|v0\|, v3
; CHECK-NEXT: v_writelane_b32 v40, s16, 14		; CHECK-NEXT: v_log_f32_e32 v3, v3
; CHECK-NEXT: v_writelane_b32 v40, s30, 0		; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
; CHECK-NEXT: v_writelane_b32 v40, s31, 1		; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
; CHECK-NEXT: v_writelane_b32 v40, s34, 2		; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; CHECK-NEXT: v_writelane_b32 v40, s35, 3		; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2
; CHECK-NEXT: v_writelane_b32 v40, s36, 4		; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4
; CHECK-NEXT: v_writelane_b32 v40, s37, 5		; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
; CHECK-NEXT: v_writelane_b32 v40, s38, 6		; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000
; CHECK-NEXT: v_writelane_b32 v40, s39, 7		; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
; CHECK-NEXT: s_addk_i32 s32, 0x800		; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
; CHECK-NEXT: v_writelane_b32 v40, s40, 8		; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3
; CHECK-NEXT: v_writelane_b32 v40, s41, 9		; CHECK-NEXT: v_exp_f32_e32 v2, v2
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]		; CHECK-NEXT: v_mov_b32_e32 v3, 0x1f800000
; CHECK-NEXT: s_getpc_b64 s[4:5]		; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2f@gotpcrel32@lo+4		; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2f@gotpcrel32@hi+12		; CHECK-NEXT: v_mul_f32_e32 v2, v2, v3
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0		; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_mov_b32_e32 v43, v0
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffffff, v43
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_mov_b32_e32 v42, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2f@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2f@gotpcrel32@hi+12
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v42
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v42
; CHECK-NEXT: v_and_or_b32 v0, v1, v43, v0
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xf800
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%call = tail call fast float @_Z4pownfi(float %x, i32 %y)		%call = tail call fast float @_Z4pownfi(float %x, i32 %y)
ret float %call		ret float %call
}		}

define double @test_pown_fast_f64(double %x, i32 %y) {		define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-LABEL: test_pown_fast_f64:		; CHECK-LABEL: test_pown_fast_f64:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines	; CHECK-NEXT: s_setpc_b64 s[30:31]
%call = tail call fast double @_Z4powndi(double %x, i32 %y)		%call = tail call fast double @_Z4powndi(double %x, i32 %y)
ret double %call		ret double %call
}		}

define half @test_pown_fast_f16_known_even(half %x, i32 %y.arg) {		define half @test_pown_fast_f16_known_even(half %x, i32 %y.arg) {
; CHECK-LABEL: test_pown_fast_f16_known_even:		; CHECK-LABEL: test_pown_fast_f16_known_even:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_log_f16_e64 v0, \|v0\|
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
; CHECK-NEXT: v_writelane_b32 v40, s16, 14
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x400
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_lshlrev_b32_e32 v42, 1, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v42
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1		; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: v_mul_f16_e32 v0, v0, v1		; CHECK-NEXT: v_mul_f16_e32 v0, v0, v1
; CHECK-NEXT: s_mov_b32 s12, s45		; CHECK-NEXT: v_exp_f16_e32 v0, v0
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%y = shl i32 %y.arg, 1		%y = shl i32 %y.arg, 1
%call = tail call fast half @_Z4pownDhi(half %x, i32 %y)		%call = tail call fast half @_Z4pownDhi(half %x, i32 %y)
ret half %call		ret half %call
}		}

define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {		define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
; CHECK-LABEL: test_pown_fast_f32_known_even:		; CHECK-LABEL: test_pown_fast_f32_known_even:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: s_mov_b32 s4, 0x800000
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, \|v0\|, s4
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill		; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
; CHECK-NEXT: s_mov_b64 exec, s[18:19]		; CHECK-NEXT: v_mul_f32_e64 v0, \|v0\|, v3
; CHECK-NEXT: v_writelane_b32 v40, s16, 14		; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1
; CHECK-NEXT: v_writelane_b32 v40, s30, 0		; CHECK-NEXT: v_log_f32_e32 v0, v0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1		; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
; CHECK-NEXT: v_writelane_b32 v40, s34, 2		; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
; CHECK-NEXT: v_writelane_b32 v40, s35, 3		; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; CHECK-NEXT: v_writelane_b32 v40, s36, 4		; CHECK-NEXT: v_sub_f32_e32 v0, v0, v2
; CHECK-NEXT: v_writelane_b32 v40, s37, 5		; CHECK-NEXT: v_mul_f32_e32 v2, v0, v1
; CHECK-NEXT: v_writelane_b32 v40, s38, 6		; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
; CHECK-NEXT: v_writelane_b32 v40, s39, 7		; CHECK-NEXT: v_mov_b32_e32 v3, 0x42800000
; CHECK-NEXT: s_addk_i32 s32, 0x400		; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2
; CHECK-NEXT: v_writelane_b32 v40, s40, 8		; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
; CHECK-NEXT: v_writelane_b32 v40, s41, 9		; CHECK-NEXT: v_fma_f32 v0, v0, v1, v2
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]		; CHECK-NEXT: v_exp_f32_e32 v0, v0
; CHECK-NEXT: s_getpc_b64 s[4:5]		; CHECK-NEXT: v_mov_b32_e32 v1, 0x1f800000
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2f@gotpcrel32@lo+4		; CHECK-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2f@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_lshlrev_b32_e32 v42, 1, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2f@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2f@gotpcrel32@hi+12
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v42
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1		; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%y = shl i32 %y.arg, 1		%y = shl i32 %y.arg, 1
%call = tail call fast float @_Z4pownfi(float %x, i32 %y)		%call = tail call fast float @_Z4pownfi(float %x, i32 %y)
ret float %call		ret float %call
}		}

define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {		define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-LABEL: test_pown_fast_f64_known_even:		; CHECK-LABEL: test_pown_fast_f64_known_even:
▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines	; CHECK-NEXT: s_setpc_b64 s[30:31]
%call = tail call fast double @_Z4powndi(double %x, i32 %y)		%call = tail call fast double @_Z4powndi(double %x, i32 %y)
ret double %call		ret double %call
}		}

define half @test_pown_fast_f16_known_odd(half %x, i32 %y.arg) {		define half @test_pown_fast_f16_known_odd(half %x, i32 %y.arg) {
; CHECK-LABEL: test_pown_fast_f16_known_odd:		; CHECK-LABEL: test_pown_fast_f16_known_odd:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: v_or_b32_e32 v1, 1, v1
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_log_f16_e64 v2, \|v0\|
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill		; CHECK-NEXT: v_and_b32_e32 v0, 0xffff8000, v0
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
; CHECK-NEXT: v_writelane_b32 v40, s16, 14
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_mov_b32_e32 v42, v0
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v42
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_or_b32_e32 v43, 1, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v43
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2Dh@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2Dh@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1		; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]		; CHECK-NEXT: v_mul_f16_e32 v1, v2, v1
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]		; CHECK-NEXT: v_exp_f16_e32 v1, v1
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]		; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
; CHECK-NEXT: v_mul_f16_e32 v0, v0, v1
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_and_b32_e32 v1, 0xffff8000, v42
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_or_b32_e32 v0, v1, v0
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xf800
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%y = or i32 %y.arg, 1		%y = or i32 %y.arg, 1
%call = tail call fast half @_Z4pownDhi(half %x, i32 %y)		%call = tail call fast half @_Z4pownDhi(half %x, i32 %y)
ret half %call		ret half %call
}		}

define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {		define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
; CHECK-LABEL: test_pown_fast_f32_known_odd:		; CHECK-LABEL: test_pown_fast_f32_known_odd:
; CHECK: ; %bb.0:		; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)		; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33		; CHECK-NEXT: s_mov_b32 s4, 0x800000
; CHECK-NEXT: s_mov_b32 s33, s32		; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, \|v0\|, s4
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1		; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill		; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
; CHECK-NEXT: s_mov_b64 exec, s[18:19]		; CHECK-NEXT: v_mul_f32_e64 v3, \|v0\|, v3
; CHECK-NEXT: v_writelane_b32 v40, s16, 14		; CHECK-NEXT: v_or_b32_e32 v1, 1, v1
; CHECK-NEXT: v_writelane_b32 v40, s30, 0		; CHECK-NEXT: v_log_f32_e32 v3, v3
; CHECK-NEXT: v_writelane_b32 v40, s31, 1		; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
; CHECK-NEXT: v_writelane_b32 v40, s34, 2		; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
; CHECK-NEXT: v_writelane_b32 v40, s35, 3		; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; CHECK-NEXT: v_writelane_b32 v40, s36, 4		; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2
; CHECK-NEXT: v_writelane_b32 v40, s37, 5		; CHECK-NEXT: v_mul_f32_e32 v3, v2, v1
; CHECK-NEXT: v_writelane_b32 v40, s38, 6		; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
; CHECK-NEXT: v_writelane_b32 v40, s39, 7		; CHECK-NEXT: v_mov_b32_e32 v4, 0x42800000
; CHECK-NEXT: s_addk_i32 s32, 0x800		; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
; CHECK-NEXT: v_writelane_b32 v40, s40, 8		; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
; CHECK-NEXT: v_writelane_b32 v40, s41, 9		; CHECK-NEXT: v_fma_f32 v1, v2, v1, v3
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]		; CHECK-NEXT: v_exp_f32_e32 v1, v1
; CHECK-NEXT: s_getpc_b64 s[4:5]		; CHECK-NEXT: v_mov_b32_e32 v2, 0x1f800000
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2f@gotpcrel32@lo+4		; CHECK-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2f@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_mov_b32_e32 v42, v0
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffffff, v42
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_or_b32_e32 v43, 1, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2f@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2f@gotpcrel32@hi+12
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v43
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_brev_b32 s4, 1		; CHECK-NEXT: s_brev_b32 s4, 1
; CHECK-NEXT: v_and_or_b32 v0, v42, s4, v0		; CHECK-NEXT: v_mul_f32_e32 v1, v1, v2
; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload		; CHECK-NEXT: v_and_or_b32 v0, v0, s4, v1
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 14
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xf800
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]		; CHECK-NEXT: s_setpc_b64 s[30:31]
%y = or i32 %y.arg, 1		%y = or i32 %y.arg, 1
%call = tail call fast float @_Z4pownfi(float %x, i32 %y)		%call = tail call fast float @_Z4pownfi(float %x, i32 %y)
ret float %call		ret float %call
}		}

define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {		define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-LABEL: test_pown_fast_f64_known_odd:		; CHECK-LABEL: test_pown_fast_f64_known_odd:
▲ Show 20 Lines • Show All 93 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll

Show First 20 Lines • Show All 1,068 Lines • ▼ Show 20 Lines
;		;
%pow = tail call afn nnan float @_Z3powff(float %x, float %y)		%pow = tail call afn nnan float @_Z3powff(float %x, float %y)
ret float %pow		ret float %pow
}		}

define float @test_pow_afn_f32_nnan_ninf_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {		define float @test_pow_afn_f32_nnan_ninf_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf_x_known_positive		; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf_x_known_positive
; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], float [[Y:%.]]) {		; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], float [[Y:%.]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]		; CHECK-NEXT: ret float [[__EXP2]]
;		;
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y)		%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y)
ret float %pow		ret float %pow
}		}

define <2 x float> @test_pow_afn_v2f32_nnan_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {		define <2 x float> @test_pow_afn_v2f32_nnan_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_x_known_positive		; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_x_known_positive
; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x float> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x float> [[Y:%.]]) {
; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])		; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
; CHECK-NEXT: ret <2 x float> [[POW]]		; CHECK-NEXT: ret <2 x float> [[POW]]
;		;
%pow = tail call afn nnan <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)		%pow = tail call afn nnan <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
ret <2 x float> %pow		ret <2 x float> %pow
}		}

define <2 x float> @test_pow_afn_v2f32_nnan_ninf_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {		define <2 x float> @test_pow_afn_v2f32_nnan_ninf_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf_x_known_positive		; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf_x_known_positive
; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x float> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x float> [[Y:%.]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]		; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;		;
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)		%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y)
ret <2 x float> %pow		ret <2 x float> %pow
}		}

define float @test_pow_f32_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {		define float @test_pow_f32_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
; CHECK-LABEL: define float @test_pow_f32_x_known_positive		; CHECK-LABEL: define float @test_pow_f32_x_known_positive
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines
;		;
%pow = tail call afn nnan half @_Z3powDhDh(half %x, half %y)		%pow = tail call afn nnan half @_Z3powDhDh(half %x, half %y)
ret half %pow		ret half %pow
}		}

define half @test_pow_afn_f16_nnan_ninf_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) {		define half @test_pow_afn_f16_nnan_ninf_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) {
; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf_x_known_positive		; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf_x_known_positive
; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.]], half [[Y:%.]]) {		; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.]], half [[Y:%.]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn half @_Z4log2Dh(half [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn half @llvm.log2.f16(half [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn half [[__LOG2]], [[Y]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn half [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn half @_Z4exp2Dh(half [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn half @llvm.exp2.f16(half [[__YLOGX]])
; CHECK-NEXT: ret half [[__EXP2]]		; CHECK-NEXT: ret half [[__EXP2]]
;		;
%pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half %y)		%pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half %y)
ret half %pow		ret half %pow
}		}

define <2 x half> @test_pow_afn_v2f16_nnan_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) {		define <2 x half> @test_pow_afn_v2f16_nnan_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) {
; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_x_known_positive		; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_x_known_positive
; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x half> [[Y:%.]]) {		; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x half> [[Y:%.]]) {
; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn <2 x half> @_Z4powrDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])		; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn <2 x half> @_Z4powrDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]])
; CHECK-NEXT: ret <2 x half> [[POW]]		; CHECK-NEXT: ret <2 x half> [[POW]]
;		;
%pow = tail call afn nnan <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)		%pow = tail call afn nnan <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)
ret <2 x half> %pow		ret <2 x half> %pow
}		}

define <2 x half> @test_pow_afn_v2f16_nnan_ninf_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) {		define <2 x half> @test_pow_afn_v2f16_nnan_ninf_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) {
; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf_x_known_positive		; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf_x_known_positive
; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x half> [[Y:%.]]) {		; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x half> [[Y:%.]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @_Z4log2Dv2_Dh(<2 x half> [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @llvm.log2.v2f16(<2 x half> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[Y]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @_Z4exp2Dv2_Dh(<2 x half> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]])
; CHECK-NEXT: ret <2 x half> [[__EXP2]]		; CHECK-NEXT: ret <2 x half> [[__EXP2]]
;		;
%pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)		%pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y)
ret <2 x half> %pow		ret <2 x half> %pow
}		}

define half @test_pow_f16_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) {		define half @test_pow_f16_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) {
; CHECK-LABEL: define half @test_pow_f16_x_known_positive		; CHECK-LABEL: define half @test_pow_f16_x_known_positive
▲ Show 20 Lines • Show All 501 Lines • ▼ Show 20 Lines
;		;
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float 10.0)		%pow = tail call afn nnan ninf float @_Z3powff(float %x, float 10.0)
ret float %pow		ret float %pow
}		}

define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison(<2 x float> %x) {		define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison(<2 x float> %x) {
; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison		; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison
; CHECK-SAME: (<2 x float> [[X:%.*]]) {		; CHECK-SAME: (<2 x float> [[X:%.*]]) {
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[__FABS]])
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> poison)
; CHECK-NEXT: ret <2 x float> poison		; CHECK-NEXT: ret <2 x float> poison
;		;
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> poison)		%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> poison)
ret <2 x float> %pow		ret <2 x float> %pow
}		}

define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_3(<2 x float> %x) {		define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_3(<2 x float> %x) {
; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_3		; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_3
▲ Show 20 Lines • Show All 406 Lines • ▼ Show 20 Lines	;
ret float %pow		ret float %pow
}		}

define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y) {		define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y) {
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp		; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp
; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float		; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32		; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31		; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32		; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32		; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float		; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
; CHECK-NEXT: ret float [[TMP4]]		; CHECK-NEXT: ret float [[TMP4]]
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	;
ret float %pow		ret float %pow
}		}

define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y) {		define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y) {
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp		; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp
; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float		; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32		; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31		; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32		; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32		; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float		; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
; CHECK-NEXT: ret float [[TMP4]]		; CHECK-NEXT: ret float [[TMP4]]
Show All 29 Lines	;
ret float %pow		ret float %pow
}		}

define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i256 %y) {		define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i256 %y) {
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256		; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256
; CHECK-SAME: (float [[X:%.]], i256 [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], i256 [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float		; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32		; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31		; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32		; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32		; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float		; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
; CHECK-NEXT: ret float [[TMP4]]		; CHECK-NEXT: ret float [[TMP4]]
;		;
%y.cast = uitofp i256 %y to float		%y.cast = uitofp i256 %y to float
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)		%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
ret float %pow		ret float %pow
}		}

define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i256 %y) {		define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i256 %y) {
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256		; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256
; CHECK-SAME: (float [[X:%.]], i256 [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], i256 [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float		; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32		; CHECK-NEXT: [[__YTOU:%.*]] = fptosi float [[Y_CAST]] to i32
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31		; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[__YTOU]], 31
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32		; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32		; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[__POW_SIGN]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float		; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
; CHECK-NEXT: ret float [[TMP4]]		; CHECK-NEXT: ret float [[TMP4]]
;		;
%y.cast = sitofp i256 %y to float		%y.cast = sitofp i256 %y to float
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)		%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
ret float %pow		ret float %pow
}		}

define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x float> %x, <2 x i32> %y) {		define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x float> %x, <2 x i32> %y) {
; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp		; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp
; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>		; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>		; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>		; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP1]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>		; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>		; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP4]]		; CHECK-NEXT: ret <2 x float> [[TMP4]]
Show All 27 Lines	;
ret <2 x float> %pow		ret <2 x float> %pow
}		}

define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {		define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {
; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp		; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp
; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>		; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>		; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32>
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>		; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP1]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>		; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>		; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP4]]		; CHECK-NEXT: ret <2 x float> [[TMP4]]
Show All 27 Lines	;
%pow = tail call afn float @_Z3powff(float %x, float %y.cast)		%pow = tail call afn float @_Z3powff(float %x, float %y.cast)
ret float %pow		ret float %pow
}		}

define float @test_pow_afn_nnan_ninf_f32__known_positive_x__known_integral_sitofp(float nofpclass(ninf nsub nnorm) %x, i32 %y) {		define float @test_pow_afn_nnan_ninf_f32__known_positive_x__known_integral_sitofp(float nofpclass(ninf nsub nnorm) %x, i32 %y) {
; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32__known_positive_x__known_integral_sitofp		; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32__known_positive_x__known_integral_sitofp
; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], i32 [[Y:%.]]) {		; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], i32 [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float		; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]		; CHECK-NEXT: ret float [[__EXP2]]
;		;
%y.cast = sitofp i32 %y to float		%y.cast = sitofp i32 %y to float
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)		%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
ret float %pow		ret float %pow
}		}

define float @test_pow_f32__y_known_integral_trunc_maybe_inf(float %x, float nofpclass(nan) %y.arg) {		define float @test_pow_f32__y_known_integral_trunc_maybe_inf(float %x, float nofpclass(nan) %y.arg) {
▲ Show 20 Lines • Show All 161 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll

Show First 20 Lines • Show All 666 Lines • ▼ Show 20 Lines	entry:
ret <2 x half> %call		ret <2 x half> %call
}		}

define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {		define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
; CHECK-LABEL: define float @test_pown_afn_nnan_ninf_f32		; CHECK-LABEL: define float @test_pown_afn_nnan_ninf_f32
; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float		; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31		; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32		; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32		; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]		; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float		; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]		; CHECK-NEXT: ret float [[TMP3]]
;		;
entry:		entry:
%call = tail call nnan ninf afn float @_Z4pownfi(float %x, i32 %y)		%call = tail call nnan ninf afn float @_Z4pownfi(float %x, i32 %y)
ret float %call		ret float %call
}		}

define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y) {		define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y) {
; CHECK-LABEL: define <2 x float> @test_pown_afn_nnan_ninf_v2f32		; CHECK-LABEL: define <2 x float> @test_pown_afn_nnan_ninf_v2f32
; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]])
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>		; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[POWNI2F]] to <2 x i32>		; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[POWNI2F]] to <2 x i32>
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>		; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>		; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP1]]		; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>		; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP3]]		; CHECK-NEXT: ret <2 x float> [[TMP3]]
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	entry:
ret <2 x double> %call		ret <2 x double> %call
}		}

define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {		define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
; CHECK-LABEL: define half @test_pown_afn_nnan_ninf_f16		; CHECK-LABEL: define half @test_pown_afn_nnan_ninf_f16
; CHECK-SAME: (half [[X:%.]], i32 [[Y:%.]]) {		; CHECK-SAME: (half [[X:%.]], i32 [[Y:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn half @llvm.fabs.f16(half [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn half @llvm.fabs.f16(half [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn half @_Z4log2Dh(half [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn half @llvm.log2.f16(half [[__FABS]])
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to half		; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to half
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn half [[__LOG2]], [[POWNI2F]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn half [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn half @_Z4exp2Dh(half [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn half @llvm.exp2.f16(half [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = trunc i32 [[Y]] to i16		; CHECK-NEXT: [[__YTOU:%.*]] = trunc i32 [[Y]] to i16
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i16 [[__YTOU]], 15		; CHECK-NEXT: [[__YEVEN:%.*]] = shl i16 [[__YTOU]], 15
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16		; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16		; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[__POW_SIGN]], [[TMP1]]		; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half		; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
; CHECK-NEXT: ret half [[TMP3]]		; CHECK-NEXT: ret half [[TMP3]]
;		;
entry:		entry:
%call = tail call nnan ninf afn half @_Z4pownDhi(half %x, i32 %y)		%call = tail call nnan ninf afn half @_Z4pownDhi(half %x, i32 %y)
ret half %call		ret half %call
}		}

define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {		define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
; CHECK-LABEL: define <2 x half> @test_pown_afn_nnan_ninf_v2f16		; CHECK-LABEL: define <2 x half> @test_pown_afn_nnan_ninf_v2f16
; CHECK-SAME: (<2 x half> [[X:%.]], <2 x i32> [[Y:%.]]) {		; CHECK-SAME: (<2 x half> [[X:%.]], <2 x i32> [[Y:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @_Z4log2Dv2_Dh(<2 x half> [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @llvm.log2.v2f16(<2 x half> [[__FABS]])
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x half>		; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x half>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @_Z4exp2Dv2_Dh(<2 x half> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]])
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x half> [[POWNI2F]] to <2 x i16>		; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x half> [[POWNI2F]] to <2 x i16>
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], <i16 15, i16 15>		; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], <i16 15, i16 15>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>		; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[__POW_SIGN]], [[TMP1]]		; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>		; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
; CHECK-NEXT: ret <2 x half> [[TMP3]]		; CHECK-NEXT: ret <2 x half> [[TMP3]]
Show All 15 Lines	entry:
ret float %call		ret float %call
}		}

define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {		define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
; CHECK-LABEL: define float @test_pown_fast_f32_strictfp		; CHECK-LABEL: define float @test_pown_fast_f32_strictfp
; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) #[[ATTR0:[0-9]+]] {		; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float		; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31		; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32		; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32		; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]		; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float		; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]		; CHECK-NEXT: ret float [[TMP3]]
;		;
entry:		entry:
%call = tail call fast float @_Z4pownfi(float %x, i32 %y) #1		%call = tail call fast float @_Z4pownfi(float %x, i32 %y) #1
ret float %call		ret float %call
}		}

define float @test_pown_fast_f32__y_poison(float %x) {		define float @test_pown_fast_f32__y_poison(float %x) {
; CHECK-LABEL: define float @test_pown_fast_f32__y_poison		; CHECK-LABEL: define float @test_pown_fast_f32__y_poison
; CHECK-SAME: (float [[X:%.*]]) {		; CHECK-SAME: (float [[X:%.*]]) {
; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @_Z4log2f(float [[__FABS]])
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @_Z4exp2f(float poison)
; CHECK-NEXT: ret float poison		; CHECK-NEXT: ret float poison
;		;
%call = tail call fast float @_Z4pownfi(float %x, i32 poison)		%call = tail call fast float @_Z4pownfi(float %x, i32 poison)
ret float %call		ret float %call
}		}

define float @test_pown_afn_nnan_ninf_f32__y_3(float %x) {		define float @test_pown_afn_nnan_ninf_f32__y_3(float %x) {
; CHECK-LABEL: define float @test_pown_afn_nnan_ninf_f32__y_3		; CHECK-LABEL: define float @test_pown_afn_nnan_ninf_f32__y_3
▲ Show 20 Lines • Show All 210 Lines • ▼ Show 20 Lines	entry:
ret float %call		ret float %call
}		}

define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf nsub nnorm) %x, i32 %y) {		define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf nsub nnorm) %x, i32 %y) {
; CHECK-LABEL: define float @test_pown_afn_ninf_nnan_f32__x_known_positive		; CHECK-LABEL: define float @test_pown_afn_ninf_nnan_f32__x_known_positive
; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], i32 [[Y:%.]]) {		; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], i32 [[Y:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float		; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31		; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32		; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]		; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32		; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]		; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float		; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]		; CHECK-NEXT: ret float [[TMP3]]
;		;
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
}		}

define float @test_fast_pown_f32_y_known_even(float %x, i32 %y.arg) {		define float @test_fast_pown_f32_y_known_even(float %x, i32 %y.arg) {
; CHECK-LABEL: define float @test_fast_pown_f32_y_known_even		; CHECK-LABEL: define float @test_fast_pown_f32_y_known_even
; CHECK-SAME: (float [[X:%.]], i32 [[Y_ARG:%.]]) {		; CHECK-SAME: (float [[X:%.]], i32 [[Y_ARG:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[Y:%.*]] = shl i32 [[Y_ARG]], 1		; CHECK-NEXT: [[Y:%.*]] = shl i32 [[Y_ARG]], 1
; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float		; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]		; CHECK-NEXT: ret float [[__EXP2]]
;		;
entry:		entry:
%y = shl i32 %y.arg, 1		%y = shl i32 %y.arg, 1
%call = tail call fast float @_Z4pownfi(float %x, i32 %y)		%call = tail call fast float @_Z4pownfi(float %x, i32 %y)
ret float %call		ret float %call
}		}

define float @test_fast_pown_f32_known_positive_y_known_even(float nofpclass(ninf nsub nnorm) %x, i32 %y.arg) {		define float @test_fast_pown_f32_known_positive_y_known_even(float nofpclass(ninf nsub nnorm) %x, i32 %y.arg) {
; CHECK-LABEL: define float @test_fast_pown_f32_known_positive_y_known_even		; CHECK-LABEL: define float @test_fast_pown_f32_known_positive_y_known_even
; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], i32 [[Y_ARG:%.]]) {		; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], i32 [[Y_ARG:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[Y:%.*]] = shl i32 [[Y_ARG]], 1		; CHECK-NEXT: [[Y:%.*]] = shl i32 [[Y_ARG]], 1
; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])		; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @_Z4log2f(float [[__FABS]])		; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]])
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float		; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]		; CHECK-NEXT: ret float [[__EXP2]]
;		;
entry:		entry:
%y = shl i32 %y.arg, 1		%y = shl i32 %y.arg, 1
%call = tail call fast float @_Z4pownfi(float %x, i32 %y)		%call = tail call fast float @_Z4pownfi(float %x, i32 %y)
ret float %call		ret float %call
}		}

attributes #0 = { nobuiltin }		attributes #0 = { nobuiltin }
attributes #1 = { strictfp }		attributes #1 = { strictfp }

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll

Show All 19 Lines
declare <3 x half> @_Z4powrDv3_DhS_(<3 x half>, <3 x half>)		declare <3 x half> @_Z4powrDv3_DhS_(<3 x half>, <3 x half>)
declare <4 x half> @_Z4powrDv4_DhS_(<4 x half>, <4 x half>)		declare <4 x half> @_Z4powrDv4_DhS_(<4 x half>, <4 x half>)
declare <8 x half> @_Z4powrDv8_DhS_(<8 x half>, <8 x half>)		declare <8 x half> @_Z4powrDv8_DhS_(<8 x half>, <8 x half>)
declare <16 x half> @_Z4powrDv16_DhS_(<16 x half>, <16 x half>)		declare <16 x half> @_Z4powrDv16_DhS_(<16 x half>, <16 x half>)

define float @test_powr_fast_f32(float %x, float %y) {		define float @test_powr_fast_f32(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_fast_f32		; CHECK-LABEL: define float @test_powr_fast_f32
; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @_Z4log2f(float [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[Y]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]		; CHECK-NEXT: ret float [[__EXP2]]
;		;
%powr = tail call fast float @_Z4powrff(float %x, float %y)		%powr = tail call fast float @_Z4powrff(float %x, float %y)
ret float %powr		ret float %powr
}		}

define <2 x float> @test_powr_fast_v2f32(<2 x float> %x, <2 x float> %y) {		define <2 x float> @test_powr_fast_v2f32(<2 x float> %x, <2 x float> %y) {
; CHECK-LABEL: define <2 x float> @test_powr_fast_v2f32		; CHECK-LABEL: define <2 x float> @test_powr_fast_v2f32
; CHECK-SAME: (<2 x float> [[X:%.]], <2 x float> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> [[X:%.]], <2 x float> [[Y:%.]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call fast <2 x float> @_Z4log2Dv2_f(<2 x float> [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call fast <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast <2 x float> [[__LOG2]], [[Y]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast <2 x float> [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call fast <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]		; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;		;
%powr = tail call fast <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y)		%powr = tail call fast <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y)
ret <2 x float> %powr		ret <2 x float> %powr
}		}

define float @test_powr_afn_f32(float %x, float %y) {		define float @test_powr_afn_f32(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_afn_f32		; CHECK-LABEL: define float @test_powr_afn_f32
▲ Show 20 Lines • Show All 393 Lines • ▼ Show 20 Lines
;		;
%powr = tail call afn nnan float @_Z4powrff(float %x, float %y)		%powr = tail call afn nnan float @_Z4powrff(float %x, float %y)
ret float %powr		ret float %powr
}		}

define float @test_powr_afn_f32_noinline(float %x, float %y) {		define float @test_powr_afn_f32_noinline(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_afn_f32_noinline		; CHECK-LABEL: define float @test_powr_afn_f32_noinline
; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]		; CHECK-NEXT: [[POWR:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: ret float [[POWR]]		; CHECK-NEXT: ret float [[POWR]]
;		;
%powr = tail call afn float @_Z4powrff(float %x, float %y) #1		%powr = tail call afn float @_Z4powrff(float %x, float %y) #1
ret float %powr		ret float %powr
}		}

define float @test_powr_afn_f32_nnan_noinline(float %x, float %y) {		define float @test_powr_afn_f32_nnan_noinline(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_afn_f32_nnan_noinline		; CHECK-LABEL: define float @test_powr_afn_f32_nnan_noinline
; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR3]]		; CHECK-NEXT: [[POWR:%.*]] = tail call nnan afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR4]]
; CHECK-NEXT: ret float [[POWR]]		; CHECK-NEXT: ret float [[POWR]]
;		;
%powr = tail call afn nnan float @_Z4powrff(float %x, float %y) #1		%powr = tail call afn nnan float @_Z4powrff(float %x, float %y) #1
ret float %powr		ret float %powr
}		}

define float @test_powr_afn_f32_strictfp(float %x, float %y) #2 {		define float @test_powr_afn_f32_strictfp(float %x, float %y) #2 {
; CHECK-LABEL: define float @test_powr_afn_f32_strictfp		; CHECK-LABEL: define float @test_powr_afn_f32_strictfp
; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) #[[ATTR1:[0-9]+]] {		; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan nsz afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR1]]		; CHECK-NEXT: [[POWR:%.*]] = tail call nnan nsz afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR1]]
; CHECK-NEXT: ret float [[POWR]]		; CHECK-NEXT: ret float [[POWR]]
;		;
%powr = tail call afn nsz nnan float @_Z4powrff(float %x, float %y) #2		%powr = tail call afn nsz nnan float @_Z4powrff(float %x, float %y) #2
ret float %powr		ret float %powr
}		}

define float @test_powr_fast_f32_nobuiltin(float %x, float %y) {		define float @test_powr_fast_f32_nobuiltin(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_fast_f32_nobuiltin		; CHECK-LABEL: define float @test_powr_fast_f32_nobuiltin
; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], float [[Y:%.]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call fast float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]		; CHECK-NEXT: [[POWR:%.*]] = tail call fast float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: ret float [[POWR]]		; CHECK-NEXT: ret float [[POWR]]
;		;
%powr = tail call fast float @_Z4powrff(float %x, float %y) #3		%powr = tail call fast float @_Z4powrff(float %x, float %y) #3
ret float %powr		ret float %powr
}		}

define float @test_powr_afn_f32_poison(float %x) {		define float @test_powr_afn_f32_poison(float %x) {
; CHECK-LABEL: define float @test_powr_afn_f32_poison		; CHECK-LABEL: define float @test_powr_afn_f32_poison
▲ Show 20 Lines • Show All 514 Lines • ▼ Show 20 Lines
;		;
%powr = tail call afn nnan float @_Z4powrff(float %x, float %y)		%powr = tail call afn nnan float @_Z4powrff(float %x, float %y)
ret float %powr		ret float %powr
}		}

define float @test_powr_afn_f32_nnan_ninf_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {		define float @test_powr_afn_f32_nnan_ninf_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
; CHECK-LABEL: define float @test_powr_afn_f32_nnan_ninf_x_known_positive		; CHECK-LABEL: define float @test_powr_afn_f32_nnan_ninf_x_known_positive
; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], float [[Y:%.]]) {		; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.]], float [[Y:%.]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]		; CHECK-NEXT: ret float [[__EXP2]]
;		;
%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y)		%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y)
ret float %powr		ret float %powr
}		}

define <2 x float> @test_powr_afn_v2f32_nnan_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {		define <2 x float> @test_powr_afn_v2f32_nnan_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_nnan_x_known_positive		; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_nnan_x_known_positive
; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x float> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x float> [[Y:%.]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])		; CHECK-NEXT: [[POWR:%.*]] = tail call nnan afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
; CHECK-NEXT: ret <2 x float> [[POWR]]		; CHECK-NEXT: ret <2 x float> [[POWR]]
;		;
%powr = tail call afn nnan <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y)		%powr = tail call afn nnan <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y)
ret <2 x float> %powr		ret <2 x float> %powr
}		}

define <2 x float> @test_powr_afn_v2f32_nnan_ninf_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {		define <2 x float> @test_powr_afn_v2f32_nnan_ninf_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_nnan_ninf_x_known_positive		; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_nnan_ninf_x_known_positive
; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x float> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.]], <2 x float> [[Y:%.]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]		; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;		;
%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y)		%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y)
ret <2 x float> %powr		ret <2 x float> %powr
}		}

define float @test_powr_f32_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {		define float @test_powr_f32_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
; CHECK-LABEL: define float @test_powr_f32_x_known_positive		; CHECK-LABEL: define float @test_powr_f32_x_known_positive
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	;
%powr = tail call afn float @_Z4powrff(float %x, float %y.cast)		%powr = tail call afn float @_Z4powrff(float %x, float %y.cast)
ret float %powr		ret float %powr
}		}

define float @test_powr_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y) {		define float @test_powr_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y) {
; CHECK-LABEL: define float @test_powr_afn_nnan_ninf_f32_known_integral_sitofp		; CHECK-LABEL: define float @test_powr_afn_nnan_ninf_f32_known_integral_sitofp
; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float		; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]		; CHECK-NEXT: ret float [[__EXP2]]
;		;
%y.cast = sitofp i32 %y to float		%y.cast = sitofp i32 %y to float
%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y.cast)		%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y.cast)
ret float %powr		ret float %powr
}		}

define float @test_powr_f32_known_integral_uitofp(float %x, i32 %y) {		define float @test_powr_f32_known_integral_uitofp(float %x, i32 %y) {
Show All 19 Lines	;
%powr = tail call afn float @_Z4powrff(float %x, float %y.cast)		%powr = tail call afn float @_Z4powrff(float %x, float %y.cast)
ret float %powr		ret float %powr
}		}

define float @test_powr_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y) {		define float @test_powr_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y) {
; CHECK-LABEL: define float @test_powr_afn_nnan_ninf_f32_known_integral_uitofp		; CHECK-LABEL: define float @test_powr_afn_nnan_ninf_f32_known_integral_uitofp
; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {		; CHECK-SAME: (float [[X:%.]], i32 [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float		; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @_Z4log2f(float [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @_Z4exp2f(float [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]		; CHECK-NEXT: ret float [[__EXP2]]
;		;
%y.cast = uitofp i32 %y to float		%y.cast = uitofp i32 %y to float
%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y.cast)		%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y.cast)
ret float %powr		ret float %powr
}		}

define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x float> %x, <2 x i32> %y) {		define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x float> %x, <2 x i32> %y) {
; CHECK-LABEL: define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_sitofp		; CHECK-LABEL: define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_sitofp
; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>		; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]		; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;		;
%y.cast = sitofp <2 x i32> %y to <2 x float>		%y.cast = sitofp <2 x i32> %y to <2 x float>
%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)		%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
ret <2 x float> %powr		ret <2 x float> %powr
}		}

define <2 x float> @test_powr_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {		define <2 x float> @test_powr_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {
Show All 19 Lines	;
%powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)		%powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
ret <2 x float> %powr		ret <2 x float> %powr
}		}

define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {		define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) {
; CHECK-LABEL: define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_uitofp		; CHECK-LABEL: define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_uitofp
; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {		; CHECK-SAME: (<2 x float> [[X:%.]], <2 x i32> [[Y:%.]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>		; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @_Z4log2Dv2_f(<2 x float> [[X]])		; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]		; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])		; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]		; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;		;
%y.cast = uitofp <2 x i32> %y to <2 x float>		%y.cast = uitofp <2 x i32> %y to <2 x float>
%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)		%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
ret <2 x float> %powr		ret <2 x float> %powr
}		}

attributes #0 = { minsize }		attributes #0 = { minsize }
attributes #1 = { noinline }		attributes #1 = { noinline }
attributes #2 = { strictfp }		attributes #2 = { strictfp }
attributes #3 = { nobuiltin }		attributes #3 = { nobuiltin }

llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll

Show First 20 Lines • Show All 345 Lines • ▼ Show 20 Lines	entry:
store float %call, ptr addrspace(1) %a, align 4		store float %call, ptr addrspace(1) %a, align 4
ret void		ret void
}		}

declare half @_Z4pownDhi(half, i32)		declare half @_Z4pownDhi(half, i32)

; GCN-LABEL: {{^}}define half @test_pown_f16(		; GCN-LABEL: {{^}}define half @test_pown_f16(
; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x)		; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
; GCN-NATIVE: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)		; GCN-NATIVE: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs)
; GCN-NATIVE: %pownI2F = sitofp i32 %y to half		; GCN-NATIVE: %pownI2F = sitofp i32 %y to half
; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F		; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F
; GCN-NATIVE: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)		; GCN-NATIVE: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
; GCN-NATIVE: %__ytou = trunc i32 %y to i16		; GCN-NATIVE: %__ytou = trunc i32 %y to i16
; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15		; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15
; GCN-NATIVE: %0 = bitcast half %x to i16		; GCN-NATIVE: %0 = bitcast half %x to i16
; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0		; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
; GCN-NATIVE: %1 = bitcast half %__exp2 to i16		; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
; GCN-NATIVE: %2 = or i16 %__pow_sign, %1		; GCN-NATIVE: %2 = or i16 %__pow_sign, %1
; GCN-NATIVE: %3 = bitcast i16 %2 to half		; GCN-NATIVE: %3 = bitcast i16 %2 to half
define half @test_pown_f16(half %x, i32 %y) {		define half @test_pown_f16(half %x, i32 %y) {
entry:		entry:
%call = call fast half @_Z4pownDhi(half %x, i32 %y)		%call = call fast half @_Z4pownDhi(half %x, i32 %y)
ret half %call		ret half %call
}		}

declare float @_Z4pownfi(float, i32)		declare float @_Z4pownfi(float, i32)

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow		; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 1.013000e+03)		; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
; GCN-PRELINK: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)		; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs)
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)		; GCN: %__ylogx = fmul fast float %__log2, 1.013000e+03
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03		; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)		; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32		; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648		; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32		; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]		; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
; GCN-PRELINK: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {		define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
entry:		entry:
%tmp = load float, ptr addrspace(1) %a, align 4		%tmp = load float, ptr addrspace(1) %a, align 4
%call = call fast float @_Z3powff(float %tmp, float 1.013000e+03)		%call = call fast float @_Z3powff(float %tmp, float 1.013000e+03)
store float %call, ptr addrspace(1) %a, align 4		store float %call, ptr addrspace(1) %a, align 4
ret void		ret void
}		}

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr		; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
; GCN-POSTLINK: call fast float @_Z4powrff(float %tmp, float %tmp1)		; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %tmp)
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp)		; GCN: %__ylogx = fmul fast float %tmp1, %__log2
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1		; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)		; GCN: store float %__exp2, ptr addrspace(1) %a, align 4
; GCN-PRELINK: store float %__exp2, ptr addrspace(1) %a, align 4
; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
; GCN-NATIVE: store float %__exp2, ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_powr(ptr addrspace(1) nocapture %a) {		define amdgpu_kernel void @test_powr(ptr addrspace(1) nocapture %a) {
entry:		entry:
%tmp = load float, ptr addrspace(1) %a, align 4		%tmp = load float, ptr addrspace(1) %a, align 4
%arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1		%arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
%tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4		%tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
%call = call fast float @_Z4powrff(float %tmp, float %tmp1)		%call = call fast float @_Z4powrff(float %tmp, float %tmp1)
store float %call, ptr addrspace(1) %a, align 4		store float %call, ptr addrspace(1) %a, align 4
ret void		ret void
}		}

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown		; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
; GCN-POSTLINK: call fast float @_Z4pownfi(float %tmp, i32 %conv)		; GCN: %conv = fptosi float %tmp1 to i32
; GCN-PRELINK: %conv = fptosi float %tmp1 to i32		; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
; GCN-PRELINK: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)		; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs)
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)		; GCN: %pownI2F = sitofp i32 %conv to float
; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float		; GCN: %__ylogx = fmul fast float %__log2, %pownI2F
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F		; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)		; GCN: %__yeven = shl i32 %conv, 31
; GCN-PRELINK: %__yeven = shl i32 %conv, 31		; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32		; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]		; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32		; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]		; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
; GCN-PRELINK: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {		define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
entry:		entry:
%tmp = load float, ptr addrspace(1) %a, align 4		%tmp = load float, ptr addrspace(1) %a, align 4
%arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1		%arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
%tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4		%tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
%conv = fptosi float %tmp1 to i32		%conv = fptosi float %tmp1 to i32
%call = call fast float @_Z4pownfi(float %tmp, i32 %conv)		%call = call fast float @_Z4pownfi(float %tmp, i32 %conv)
store float %call, ptr addrspace(1) %a, align 4		store float %call, ptr addrspace(1) %a, align 4
ret void		ret void
}		}

declare half @_Z3powDhDh(half, half)		declare half @_Z3powDhDh(half, half)
declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)		declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)

; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x)		; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x)
; GCN-PRELINK: %__fabs = tail call fast half @llvm.fabs.f16(half %x)		; GCN: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
; GCN-PRELINK: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)		; GCN: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs)
; GCN-PRELINK: %__ylogx = fmul fast half %__log2, 0xH4A80		; GCN: %__ylogx = fmul fast half %__log2, 0xH4A80
; GCN-PRELINK: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)		; GCN: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
; GCN-PRELINK: %1 = bitcast half %x to i16		; GCN: %1 = bitcast half %x to i16
; GCN-PRELINK: %__pow_sign = and i16 %1, -32768		; GCN: %__pow_sign = and i16 %1, -32768
; GCN-PRELINK: %2 = bitcast half %__exp2 to i16		; GCN: %2 = bitcast half %__exp2 to i16
; GCN-PRELINK: %3 = or i16 %__pow_sign, %2		; GCN: %3 = or i16 %__pow_sign, %2
; GCN-PRELINK: %4 = bitcast i16 %3 to half		; GCN: %4 = bitcast i16 %3 to half
define half @test_pow_fast_f16__y_13(half %x) {		define half @test_pow_fast_f16__y_13(half %x) {
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)		%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
ret half %powr		ret half %powr
}		}

; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x)		; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x)
; GCN-PRELINK: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x)		; GCN: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
; GCN-PRELINK: %__log2 = tail call fast <2 x half> @_Z4log2Dv2_Dh(<2 x half> %__fabs)		; GCN: %__log2 = tail call fast <2 x half> @llvm.log2.v2f16(<2 x half> %__fabs)
; GCN-PRELINK: %__ylogx = fmul fast <2 x half> %__log2, <half 0xH4A80, half 0xH4A80>		; GCN: %__ylogx = fmul fast <2 x half> %__log2, <half 0xH4A80, half 0xH4A80>
; GCN-PRELINK: %__exp2 = tail call fast <2 x half> @_Z4exp2Dv2_Dh(<2 x half> %__ylogx)		; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx)
; GCN-PRELINK: %1 = bitcast <2 x half> %x to <2 x i16>		; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
; GCN-PRELINK: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>		; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
; GCN-PRELINK: %2 = bitcast <2 x half> %__exp2 to <2 x i16>		; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
; GCN-PRELINK: %3 = or <2 x i16> %__pow_sign, %2		; GCN: %3 = or <2 x i16> %__pow_sign, %2
; GCN-PRELINK: %4 = bitcast <2 x i16> %3 to <2 x half>		; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {		define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)		%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
ret <2 x half> %powr		ret <2 x half> %powr
}		}

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1		; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4		; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
; GCN: store float %tmp, ptr addrspace(1) %a, align 4		; GCN: store float %tmp, ptr addrspace(1) %a, align 4
▲ Show 20 Lines • Show All 195 Lines • ▼ Show 20 Lines	entry:
%call = call fast float @_Z5log10f(float %tmp)		%call = call fast float @_Z5log10f(float %tmp)
store float %call, ptr addrspace(1) %a, align 4		store float %call, ptr addrspace(1) %a, align 4
ret void		ret void
}		}

declare float @_Z5log10f(float)		declare float @_Z5log10f(float)

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr		; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
; GCN-NATIVE: %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4		; GCN: %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)		; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %tmp)
; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1		; GCN: %__ylogx = fmul fast float %tmp1, %__log2
; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)		; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN-NATIVE: store float %__exp2, ptr addrspace(1) %a, align 4		; GCN: store float %__exp2, ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_use_native_powr(ptr addrspace(1) nocapture %a) {		define amdgpu_kernel void @test_use_native_powr(ptr addrspace(1) nocapture %a) {
entry:		entry:
%tmp = load float, ptr addrspace(1) %a, align 4		%tmp = load float, ptr addrspace(1) %a, align 4
%arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1		%arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
%tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4		%tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
%call = call fast float @_Z4powrff(float %tmp, float %tmp1)		%call = call fast float @_Z4powrff(float %tmp, float %tmp1)
store float %call, ptr addrspace(1) %a, align 4		store float %call, ptr addrspace(1) %a, align 4
ret void		ret void
▲ Show 20 Lines • Show All 162 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Use exp2 and log2 intrinsics directly for f16/f32ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 553950

llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll

llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll

AMDGPU: Use exp2 and log2 intrinsics directly for f16/f32
ClosedPublic