This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AMDGPU/
-
Target/
-
AMDGPU/
1/3
AMDGPUInstCombineIntrinsic.cpp
-
test/Transforms/InstCombine/AMDGPU/
-
Transforms/
-
InstCombine/
-
AMDGPU/
-
amdgcn-intrinsics.ll

Differential D151340

AMDGPU: Refine undef handling for llvm.amdgcn.class intrinsic
ClosedPublic

Authored by arsenm on May 24 2023, 8:29 AM.

Download Raw Diff

Details

Reviewers

foad
rampitec

Group Reviewers

Restricted Project

Summary

This barely matters since 99% are converted to the generic intrinsic now,
and the only real difference is the target intrinsic supports a variable
test mask. Start propagating poison. Prefer folding to a defined result (false)
for an undef test mask. Propagate undef for the first operand.

Diff Detail

Event Timeline

arsenm created this revision.May 24 2023, 8:29 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 24 2023, 8:29 AM

Herald added subscribers: nlopes, StephenFan, kerbowa and 6 others. · View Herald Transcript

arsenm requested review of this revision.May 24 2023, 8:29 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 24 2023, 8:29 AM

Herald added a subscriber: wdng. · View Herald Transcript

arsenm added a parent revision: D143420: AMDGPU: Replace certain llvm.amdgcn.class uses with llvm.is.fpclass.May 24 2023, 8:29 AM

Harbormaster completed remote builds in B234212: Diff 525193.May 24 2023, 8:30 AM

arsenm updated this revision to Diff 525194.May 24 2023, 8:31 AM

arsenm updated this revision to Diff 525195.

Harbormaster completed remote builds in B234213: Diff 525194.May 24 2023, 8:32 AM

Harbormaster completed remote builds in B234214: Diff 525195.

LGTM

This revision is now accepted and ready to land.May 24 2023, 9:59 AM

foad added inline comments.May 24 2023, 1:29 PM

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
471	I don't think this is sound, e.g if the RHS is 0 (but not a ConstantInt) then the result should be 0, not undef. Perhaps you could fold it to `RHS != 0`?

arsenm added inline comments.May 24 2023, 11:25 PM

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
471	icmp ne x, undef folds to undef, so that would be the same thing

foad added inline comments.May 24 2023, 11:54 PM

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
471	No we're talking about the case where LHS is undef.

Compare with undef

Harbormaster completed remote builds in B234627: Diff 525775.May 25 2023, 2:25 PM

foad accepted this revision.May 26 2023, 1:10 AM

8609df7c6e91301af72080caab01b2edcef78b33

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

AMDGPUInstCombineIntrinsic.cpp

16 lines

test/

Transforms/

InstCombine/

AMDGPU/

amdgcn-intrinsics.ll

17 lines

Diff 525775

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Show First 20 Lines • Show All 454 Lines • ▼ Show 20 Lines	if (CMask) {
II.getModule(), Intrinsic::is_fpclass, Src0->getType()));		II.getModule(), Intrinsic::is_fpclass, Src0->getType()));

// Clamp any excess bits, as they're illegal for the generic intrinsic.		// Clamp any excess bits, as they're illegal for the generic intrinsic.
II.setArgOperand(1, ConstantInt::get(Src1->getType(),		II.setArgOperand(1, ConstantInt::get(Src1->getType(),
CMask->getZExtValue() & fcAllFlags));		CMask->getZExtValue() & fcAllFlags));
return &II;		return &II;
}		}

// FIXME: Should propagate poison.		// Propagate poison.
if (isa<UndefValue>(Src0))		if (isa<PoisonValue>(Src0) \|\| isa<PoisonValue>(Src1))
return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));		return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));

if (isa<UndefValue>(Src1)) {		// llvm.amdgcn.class(_, undef) -> false
		if (IC.getSimplifyQuery().isUndefValue(Src1))
return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));		return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
}

		// llvm.amdgcn.class(undef, mask) -> mask != 0
		foadUnsubmitted Not Done Reply Inline Actions I don't think this is sound, e.g if the RHS is 0 (but not a ConstantInt) then the result should be 0, not undef. Perhaps you could fold it to `RHS != 0`? foad: I don't think this is sound, e.g if the RHS is 0 (but not a ConstantInt) then the result should…
		arsenmAuthorUnsubmitted Done Reply Inline Actions icmp ne x, undef folds to undef, so that would be the same thing arsenm: icmp ne x, undef folds to undef, so that would be the same thing
		foadUnsubmitted Not Done Reply Inline Actions No we're talking about the case where LHS is undef. foad: No we're talking about the case where LHS is undef.
		if (IC.getSimplifyQuery().isUndefValue(Src0)) {
		Value *CmpMask = IC.Builder.CreateICmpNE(
		Src1, ConstantInt::getNullValue(Src1->getType()));
		return IC.replaceInstUsesWith(II, CmpMask);
		}
break;		break;
}		}
case Intrinsic::amdgcn_cvt_pkrtz: {		case Intrinsic::amdgcn_cvt_pkrtz: {
Value *Src0 = II.getArgOperand(0);		Value *Src0 = II.getArgOperand(0);
Value *Src1 = II.getArgOperand(1);		Value *Src1 = II.getArgOperand(1);
if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {		if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {		if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
const fltSemantics &HalfSem =		const fltSemantics &HalfSem =
▲ Show 20 Lines • Show All 728 Lines • Show Last 20 Lines

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 563 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret i1 false			; CHECK-NEXT: ret i1 false
	;			;
	%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef)			%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef)
	ret i1 %val			ret i1 %val
	}			}

	define i1 @test_class_poison_poison_f32(float %x) nounwind {			define i1 @test_class_poison_poison_f32(float %x) nounwind {
	; CHECK-LABEL: @test_class_poison_poison_f32(			; CHECK-LABEL: @test_class_poison_poison_f32(
	; CHECK-NEXT: ret i1 undef			; CHECK-NEXT: ret i1 poison
	;			;
	%val = call i1 @llvm.amdgcn.class.f32(float poison, i32 poison)			%val = call i1 @llvm.amdgcn.class.f32(float poison, i32 poison)
	ret i1 %val			ret i1 %val
	}			}
	define i1 @test_class_val_poison_f32(float %arg) nounwind {			define i1 @test_class_val_poison_f32(float %arg) nounwind {
	; CHECK-LABEL: @test_class_val_poison_f32(			; CHECK-LABEL: @test_class_val_poison_f32(
	; CHECK-NEXT: ret i1 false			; CHECK-NEXT: ret i1 poison
	;			;
	%val = call i1 @llvm.amdgcn.class.f32(float %arg, i32 poison)			%val = call i1 @llvm.amdgcn.class.f32(float %arg, i32 poison)
	ret i1 %val			ret i1 %val
	}			}

	define i1 @test_class_poison_val_f32(i32 %arg) nounwind {			define i1 @test_class_poison_val_f32(i32 %arg) nounwind {
	; CHECK-LABEL: @test_class_poison_val_f32(			; CHECK-LABEL: @test_class_poison_val_f32(
	; CHECK-NEXT: ret i1 undef			; CHECK-NEXT: ret i1 poison
	;			;
	%val = call i1 @llvm.amdgcn.class.f32(float poison, i32 %arg)			%val = call i1 @llvm.amdgcn.class.f32(float poison, i32 %arg)
	ret i1 %val			ret i1 %val
	}			}

	define i1 @test_class_over_max_mask_f32(float %x) nounwind {			define i1 @test_class_over_max_mask_f32(float %x) nounwind {
	; CHECK-LABEL: @test_class_over_max_mask_f32(			; CHECK-LABEL: @test_class_over_max_mask_f32(
	; CHECK-NEXT: [[VAL:%.]] = call i1 @llvm.is.fpclass.f32(float [[X:%.]], i32 1)			; CHECK-NEXT: [[VAL:%.]] = call i1 @llvm.is.fpclass.f32(float [[X:%.]], i32 1)
	Show All 38 Lines
	define i1 @test_class_undef_val_f32() nounwind {			define i1 @test_class_undef_val_f32() nounwind {
	; CHECK-LABEL: @test_class_undef_val_f32(			; CHECK-LABEL: @test_class_undef_val_f32(
	; CHECK-NEXT: ret i1 undef			; CHECK-NEXT: ret i1 undef
	;			;
	%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4)			%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4)
	ret i1 %val			ret i1 %val
	}			}

				define i1 @test_class_undef_val_f32_var(i32 %arg) nounwind {
				; CHECK-LABEL: @test_class_undef_val_f32_var(
				; CHECK-NEXT: [[VAL:%.]] = icmp ne i32 [[ARG:%.]], 0
				; CHECK-NEXT: ret i1 [[VAL]]
				;
				%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 %arg)
				ret i1 %val
				}

	define i1 @test_class_val_undef_f32(float %arg) nounwind {			define i1 @test_class_val_undef_f32(float %arg) nounwind {
	; CHECK-LABEL: @test_class_val_undef_f32(			; CHECK-LABEL: @test_class_val_undef_f32(
	; CHECK-NEXT: ret i1 false			; CHECK-NEXT: ret i1 false
	;			;
	%val = call i1 @llvm.amdgcn.class.f32(float %arg, i32 undef)			%val = call i1 @llvm.amdgcn.class.f32(float %arg, i32 undef)
	ret i1 %val			ret i1 %val
	}			}

	define i1 @test_class_undef_undef_f32() nounwind {			define i1 @test_class_undef_undef_f32() nounwind {
	; CHECK-LABEL: @test_class_undef_undef_f32(			; CHECK-LABEL: @test_class_undef_undef_f32(
	; CHECK-NEXT: ret i1 undef			; CHECK-NEXT: ret i1 false
	;			;
	%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)			%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
	ret i1 %val			ret i1 %val
	}			}

	define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind {			define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind {
	; CHECK-LABEL: @test_class_var_mask_f32(			; CHECK-LABEL: @test_class_var_mask_f32(
	; CHECK-NEXT: [[VAL:%.]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.]], i32 [[MASK:%.*]])			; CHECK-NEXT: [[VAL:%.]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.]], i32 [[MASK:%.*]])
	▲ Show 20 Lines • Show All 5,046 Lines • Show Last 20 Lines