Diff 218667

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Show First 20 Lines • Show All 3,290 Lines • ▼ Show 20 Lines	case Intrinsic::aarch64_crypto_aese: {
if (match(KeyArg, m_ZeroInt()) &&		if (match(KeyArg, m_ZeroInt()) &&
match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {		match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
II->setArgOperand(0, Data);		II->setArgOperand(0, Data);
II->setArgOperand(1, Key);		II->setArgOperand(1, Key);
return II;		return II;
}		}
break;		break;
}		}
		case Intrinsic::arm_mve_minv_u:
		dmgreenUnsubmitted Not Done Reply Inline Actions This looks like something else entirely! It's setting the range metadata on vminv's? dmgreen: This looks like something else entirely! It's setting the range metadata on vminv's?
		simon_tathamAuthorUnsubmitted Done Reply Inline Actions D'oh! Well spotted. That's what I get for writing my commit messages at the last minute. simon_tatham: D'oh! Well spotted. That's what I get for writing my commit messages at the last minute.
		case Intrinsic::arm_mve_minv_s: {
		unsigned ScalarWidth = II->getArgOperand(1)
		->getType()
		->getVectorElementType()
		->getScalarSizeInBits();

		bool Modified = false;

		KnownBits ScalarKnown(32);
		if (SimplifyDemandedBits(II, 0, APInt::getLowBitsSet(32, ScalarWidth),
		ScalarKnown, 0))
		Modified = true;
		if (ScalarWidth < 32 && !II->getMetadata(LLVMContext::MD_range)) {
		uint32_t Lo = 0, Hi = (uint32_t)1 << ScalarWidth;
		if (IID == Intrinsic::arm_mve_minv_s) {
		uint32_t Offset = Hi >> 1;
		Lo -= Offset;
		Hi -= Offset;
		}
		Type *IntTy32 = Type::getInt32Ty(II->getContext());
		Metadata *M[] = {ConstantAsMetadata::get(ConstantInt::get(IntTy32, Lo)),
		ConstantAsMetadata::get(ConstantInt::get(IntTy32, Hi))};
		II->setMetadata(LLVMContext::MD_range, MDNode::get(II->getContext(), M));
		Modified = true;
		}
		if (Modified)
		return II;
		break;
		}
		case Intrinsic::arm_mve_vadc:
		case Intrinsic::arm_mve_vadc_predicated: {
		unsigned CarryOp =
		(II->getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
		Value *CarryArg = II->getArgOperand(CarryOp);
		unsigned CarryWidth = CarryArg->getType()->getScalarSizeInBits();
		dmgreenUnsubmitted Not Done Reply Inline Actions Is this ever not 32? If so the 32 below should be the same value dmgreen: Is this ever not 32? If so the 32 below should be the same value
		simon_tathamAuthorUnsubmitted Done Reply Inline Actions It should always be 32, because that intrinsic argument represents a value you can VMSR into FPSCR. I'll get rid of CarryWidth completely and make it an assertion. simon_tatham: It should always be 32, because that intrinsic argument represents a value you can VMSR into…

		KnownBits CarryKnown(32);
		if (SimplifyDemandedBits(II, CarryOp, APInt::getOneBitSet(CarryWidth, 29),
		CarryKnown, 0))
		dmgreenUnsubmitted Done Reply Inline Actions I think the last parameter is depth, and the default is 0? If so it might as well be left off. dmgreen: I think the last parameter is depth, and the default is 0? If so it might as well be left off.
		return II;
		break;
		}
case Intrinsic::amdgcn_rcp: {		case Intrinsic::amdgcn_rcp: {
Value *Src = II->getArgOperand(0);		Value *Src = II->getArgOperand(0);

// TODO: Move to ConstantFolding/InstSimplify?		// TODO: Move to ConstantFolding/InstSimplify?
if (isa<UndefValue>(Src))		if (isa<UndefValue>(Src))
return replaceInstUsesWith(CI, Src);		return replaceInstUsesWith(CI, Src);

if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {		if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
▲ Show 20 Lines • Show All 1,481 Lines • Show Last 20 Lines

llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc-multiple.ll

This file was added.

				; RUN: opt -instcombine -S %s \| FileCheck --check-prefix=IR %s
				; RUN: opt -instcombine %s \| llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -O3 -o - \| FileCheck --check-prefix=ASM %s

				%struct.foo = type { [2 x <4 x i32>] }

				; Function Attrs: noinline nounwind optnone
				define arm_aapcs_vfpcc i32 @test_vadciq_multiple(%struct.foo %a, %struct.foo %b, i32 %carry) {
				entry:
				%a.0 = extractvalue %struct.foo %a, 0, 0
				%a.1 = extractvalue %struct.foo %a, 0, 1
				%b.0 = extractvalue %struct.foo %b, 0, 0
				%b.1 = extractvalue %struct.foo %b, 0, 1

				%fpscr.in.0 = shl i32 %carry, 29
				%outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0)
				%fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1
				%shifted.out.0 = lshr i32 %fpscr.out.0, 29
				%carry.out.0 = and i32 1, %shifted.out.0
				%fpscr.in.1 = shl i32 %carry.out.0, 29
				%outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.in.1)
				dmgreenUnsubmitted Done Reply Inline Actions Can you include a test for arm_mve_vadc_predicated? dmgreen: Can you include a test for arm_mve_vadc_predicated?
				%fpscr.out.1 = extractvalue { <4 x i32>, i32 } %outpair.1, 1
				%shifted.out.1 = lshr i32 %fpscr.out.1, 29
				%carry.out.1 = and i32 1, %shifted.out.1
				ret i32 %carry.out.1
				}

				declare { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32>, <4 x i32>, i32)

				; Expect the transformation in between the two intrinsics, where the
				; fpscr-formatted output value is turned back into just the carry bit
				; at bit 0 and then back again for the next call, to be optimized away
				; completely in InstCombine, so that the FPSCR output from one
				; intrinsic is passed straight on to the next:

				; IR: %outpair.0 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.0, <4 x i32> %b.0, i32 %fpscr.in.0)
				; IR: %fpscr.out.0 = extractvalue { <4 x i32>, i32 } %outpair.0, 1
				; IR: %outpair.1 = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> %a.1, <4 x i32> %b.1, i32 %fpscr.out.0)

				; And this is the assembly language we expect at the end of it, with
				; the two vadc.i32 instructions right next to each other, and the
				; second one implicitly reusing the FPSCR written by the first.

				; ASM: test_vadciq_multiple:
				; ASM: lsls r0, r0, #29
				; ASM-NEXT: vmsr fpscr_nzcvqc, r0
				; ASM-NEXT: vadc.i32 q0, q0, q2
				; ASM-NEXT: vadc.i32 q0, q1, q3
				; ASM-NEXT: vmrs r0, fpscr_nzcvqc
				; ASM-NEXT: ubfx r0, r0, #29, #1
				; ASM-NEXT: bx lr

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Known-bits optimization for ARM MVE VADC.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 218667

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc-multiple.ll

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Known-bits optimization for ARM MVE VADC.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 218667

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc-multiple.ll

[InstCombine] Known-bits optimization for ARM MVE VADC.
ClosedPublic