Diff 284762

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Show First 20 Lines • Show All 347 Lines • ▼ Show 20 Lines	public:
LegalizeResult lowerInsert(MachineInstr &MI);		LegalizeResult lowerInsert(MachineInstr &MI);
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI);		LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI);
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI);		LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI);
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI);		LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI);
LegalizeResult lowerShlSat(MachineInstr &MI);		LegalizeResult lowerShlSat(MachineInstr &MI);
LegalizeResult lowerBswap(MachineInstr &MI);		LegalizeResult lowerBswap(MachineInstr &MI);
LegalizeResult lowerBitreverse(MachineInstr &MI);		LegalizeResult lowerBitreverse(MachineInstr &MI);
LegalizeResult lowerReadWriteRegister(MachineInstr &MI);		LegalizeResult lowerReadWriteRegister(MachineInstr &MI);
		LegalizeResult lowerSMULH_UMULH(MachineInstr &MI);
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: invalid case style for function 'lowerSMULH_UMULH' [readability-identifier-naming] not useful Lint: Pre-merge checks: clang-tidy: warning: invalid case style for function 'lowerSMULH_UMULH' [readability-identifier…
};		};

/// Helper function that creates a libcall to the given \p Name using the given		/// Helper function that creates a libcall to the given \p Name using the given
/// calling convention \p CC.		/// calling convention \p CC.
LegalizerHelper::LegalizeResult		LegalizerHelper::LegalizeResult
createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,		createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
const CallLowering::ArgInfo &Result,		const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args, CallingConv::ID CC);		ArrayRef<CallLowering::ArgInfo> Args, CallingConv::ID CC);
Show All 15 Lines

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Show First 20 Lines • Show All 1,755 Lines • ▼ Show 20 Lines	LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
MI.eraseFromParent();		MI.eraseFromParent();
return Legalized;		return Legalized;
}		}

LegalizerHelper::LegalizeResult		LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {		LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {		switch (MI.getOpcode()) {
default:		default:
return UnableToLegalize;		return UnableToLegalize;
		arsenmUnsubmitted Not Done Reply Inline Actions no auto. Can't this use anyext? arsenm: no auto. Can't this use anyext?
		pdhaliwalAuthorUnsubmitted Done Reply Inline Actions I am a bit doubtful if G_ANYEXT would work here. From docs, it doesn't take care of higher bits. pdhaliwal: I am a bit doubtful if G_ANYEXT would work here. From docs, it doesn't take care of [[ https…
case TargetOpcode::G_EXTRACT:		case TargetOpcode::G_EXTRACT:
return widenScalarExtract(MI, TypeIdx, WideTy);		return widenScalarExtract(MI, TypeIdx, WideTy);
case TargetOpcode::G_INSERT:		case TargetOpcode::G_INSERT:
return widenScalarInsert(MI, TypeIdx, WideTy);		return widenScalarInsert(MI, TypeIdx, WideTy);
		arsenmUnsubmitted Not Done Reply Inline Actions Something seems off to me about introducing a full multiply, and in whatever type the user requested. I think this only works if WideTy == 2 * OriginalType. Can you produce a mulh in the wider type? This seems more like a lowering arsenm: Something seems off to me about introducing a full multiply, and in whatever type the user…
		pdhaliwalAuthorUnsubmitted Done Reply Inline Actions Yes, it would only work when WideTy == 2 * OriginalType. And now if I think again it is more of a lowering operation than widening as user is not always free to choose the wider type. pdhaliwal: Yes, it would only work when WideTy == 2 * OriginalType. And now if I think again it is more…
case TargetOpcode::G_MERGE_VALUES:		case TargetOpcode::G_MERGE_VALUES:
		arsenmUnsubmitted Done Reply Inline Actions Use Register, I would worry about introducing a copy of MachineOperand here arsenm: Use Register, I would worry about introducing a copy of MachineOperand here
return widenScalarMergeValues(MI, TypeIdx, WideTy);		return widenScalarMergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UNMERGE_VALUES:		case TargetOpcode::G_UNMERGE_VALUES:
		arsenmUnsubmitted Done Reply Inline Actions LLT not auto arsenm: LLT not auto
return widenScalarUnmergeValues(MI, TypeIdx, WideTy);		return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UADDO:		case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO: {		case TargetOpcode::G_USUBO: {
if (TypeIdx == 1)		if (TypeIdx == 1)
		arsenmUnsubmitted Done Reply Inline Actions ShiftAmt? arsenm: ShiftAmt?
		arsenmUnsubmitted Done Reply Inline Actions Why isn't the shift amount WideTy.getSizeInBits() - Size? I don't understand - IsSigned arsenm: Why isn't the shift amount WideTy.getSizeInBits() - Size? I don't understand - IsSigned
		pdhaliwalAuthorUnsubmitted Done Reply Inline Actions To accomodate the sign bit in case of signed operation. pdhaliwal: To accomodate the sign bit in case of signed operation.
return UnableToLegalize; // TODO		return UnableToLegalize; // TODO
auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));		auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));		auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO		unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
? TargetOpcode::G_ADD		? TargetOpcode::G_ADD
: TargetOpcode::G_SUB;		: TargetOpcode::G_SUB;
// Do the arithmetic in the larger type.		// Do the arithmetic in the larger type.
auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});		auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());		LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
APInt Mask =		APInt Mask =
APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());		APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
auto AndOp = MIRBuilder.buildAnd(		auto AndOp = MIRBuilder.buildAnd(
WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));		WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
// There is no overflow if the AndOp is the same as NewOp.		// There is no overflow if the AndOp is the same as NewOp.
MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);		MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
// Now trunc the NewOp to the original result.		// Now trunc the NewOp to the original result.
MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);		MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
MI.eraseFromParent();		MI.eraseFromParent();
return Legalized;		return Legalized;
}		}
case TargetOpcode::G_SADDSAT:		case TargetOpcode::G_SADDSAT:
		arsenmUnsubmitted Done Reply Inline Actions Extra newline arsenm: Extra newline
case TargetOpcode::G_SSUBSAT:		case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_SSHLSAT:		case TargetOpcode::G_SSHLSAT:
case TargetOpcode::G_UADDSAT:		case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:		case TargetOpcode::G_USUBSAT:
case TargetOpcode::G_USHLSAT:		case TargetOpcode::G_USHLSAT:
return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);		return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
case TargetOpcode::G_CTTZ:		case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:		case TargetOpcode::G_CTTZ_ZERO_UNDEF:
▲ Show 20 Lines • Show All 899 Lines • ▼ Show 20 Lines	case TargetOpcode::G_UREM: {
auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));		auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);		MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
MI.eraseFromParent();		MI.eraseFromParent();
return Legalized;		return Legalized;
}		}
case TargetOpcode::G_SADDO:		case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:		case TargetOpcode::G_SSUBO:
return lowerSADDO_SSUBO(MI);		return lowerSADDO_SSUBO(MI);
		case TargetOpcode::G_UMULH:
		case TargetOpcode::G_SMULH:
		return lowerSMULH_UMULH(MI);
case TargetOpcode::G_SMULO:		case TargetOpcode::G_SMULO:
case TargetOpcode::G_UMULO: {		case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the		// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
// result.		// result.
Register Res = MI.getOperand(0).getReg();		Register Res = MI.getOperand(0).getReg();
Register Overflow = MI.getOperand(1).getReg();		Register Overflow = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();		Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();		Register RHS = MI.getOperand(3).getReg();
▲ Show 20 Lines • Show All 3,275 Lines • ▼ Show 20 Lines	LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
if (IsRead)		if (IsRead)
MIRBuilder.buildCopy(ValReg, PhysReg);		MIRBuilder.buildCopy(ValReg, PhysReg);
else		else
MIRBuilder.buildCopy(PhysReg, ValReg);		MIRBuilder.buildCopy(PhysReg, ValReg);

MI.eraseFromParent();		MI.eraseFromParent();
return Legalized;		return Legalized;
}		}

		LegalizerHelper::LegalizeResult
		LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: invalid case style for function 'lowerSMULH_UMULH' [readability-identifier-naming] not useful Lint: Pre-merge checks: clang-tidy: warning: invalid case style for function 'lowerSMULH_UMULH' [readability-identifier…
		bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
		unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
		foadUnsubmitted Not Done Reply Inline Actions I agree that anyext would not work here. foad: I agree that anyext would not work here.
		Register Result = MI.getOperand(0).getReg();
		LLT OrigTy = MRI.getType(Result);
		bool IsVector = OrigTy.isVector();
		foadUnsubmitted Done Reply Inline Actions Would this lowering also work for vector types, if you used LLT::scalarOrVector here? foad: Would this lowering also work for vector types, if you used LLT::scalarOrVector here?
		auto SizeInBits = OrigTy.getScalarSizeInBits();
		auto NumElements = IsVector ? OrigTy.getNumElements() : 1;
		LLT WideElementTy = LLT::scalar(SizeInBits * 2);
		LLT WideTy = LLT::scalarOrVector(NumElements, WideElementTy);
		foadUnsubmitted Not Done Reply Inline Actions Actually it would be neater to use `LLT::changeElementSize`. foad: Actually it would be neater to use `LLT::changeElementSize`.

		auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
		auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
		foadUnsubmitted Done Reply Inline Actions As Matt said you definitely should not subtract IsSigned here. foad: As Matt said you definitely should not subtract IsSigned here.
		pdhaliwalAuthorUnsubmitted Done Reply Inline Actions I got confused in signed binary multiplication. For this operation, it is not required to subtract IsSigned. pdhaliwal: I got confused in signed binary multiplication. For this operation, it is not required to…
		auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
		unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;

		auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
		auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
		MIRBuilder.buildTrunc(Result, Shifted);

		MI.eraseFromParent();
		return Legalized;
		}

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Show First 20 Lines • Show All 530 Lines • ▼ Show 20 Lines	AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,

getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM})		getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM})
.customFor({S32, S64})		.customFor({S32, S64})
.clampScalar(0, S32, S64)		.clampScalar(0, S32, S64)
.widenScalarToNextPow2(0, 32)		.widenScalarToNextPow2(0, 32)
.scalarize(0);		.scalarize(0);

getActionDefinitionsBuilder({G_UMULH, G_SMULH})		getActionDefinitionsBuilder({G_UMULH, G_SMULH})
.legalFor({S32})		.legalFor({S32})
.clampScalar(0, S32, S32)		.maxScalar(0, S32)
.scalarize(0);		.scalarize(0)
		.lower();
		arsenmUnsubmitted Done Reply Inline Actions The expansion can fully use packed instructions with VOP3P instructions. This should try to clamp the number of elements for 16-bit cases if available before scalarizing arsenm: The expansion can fully use packed instructions with VOP3P instructions. This should try to…

// Report legal for any types we can handle anywhere. For the cases only legal		// Report legal for any types we can handle anywhere. For the cases only legal
		arsenmUnsubmitted Not Done Reply Inline Actions This isn't the right logic, the intent is to go down to 2 elements for cases that can promote to <2 x i16>. s8 is't special here arsenm: This isn't the right logic, the intent is to go down to 2 elements for cases that can promote…
		arsenmUnsubmitted Done Reply Inline Actions Put the actions on separate lines arsenm: Put the actions on separate lines
// on the SALU, RegBankSelect will be able to re-legalize.		// on the SALU, RegBankSelect will be able to re-legalize.
		arsenmUnsubmitted Not Done Reply Inline Actions This should be unnecessary arsenm: This should be unnecessary
		pdhaliwalAuthorUnsubmitted Done Reply Inline Actions If I drop this, the <2 x s32> case starts generating worse code. This is due to lowering coming into the picture which promotes the 32-bit mulh to 64-bit mul and then legalizing 64-bit mul. I can use VOP3P instruction only for S8. For others, I need to specify the scalarization. pdhaliwal: If I drop this, the <2 x s32> case starts generating worse code. This is due to lowering coming…
		arsenmUnsubmitted Not Done Reply Inline Actions This should be an unconditional scalarize. The scalarization shouldn't cause a 64-bit multiply to be used arsenm: This should be an unconditional scalarize. The scalarization shouldn't cause a 64-bit multiply…
		pdhaliwalAuthorUnsubmitted Done Reply Inline Actions Hmm, unconditional scalarize would remove the possibility of using vector path for <2 x s8>. This is bit different from other operations like MUL, ADD where <2 x s16> would have been legal and unconditional scalarization would have worked. The whole point of having the scalarization conditional is because <2 x s8> can easily use <2 x s16> MUL from lowering path. And as <2 x s16> is legal for AMDGPU, the lowering will correctly use vector operations. Unconditional scalarization would simply make logic of using vector ops void. pdhaliwal: Hmm, unconditional scalarize would remove the possibility of using vector path for <2 x s8>.
		arsenmUnsubmitted Done Reply Inline Actions You already handled this case with the first fewerElementsIf, the second one just handles everything else. It doesn't need to specify not -s8 arsenm: You already handled this case with the first fewerElementsIf, the second one just handles…
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})		getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})		.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
		arsenmUnsubmitted Done Reply Inline Actions Separate lines arsenm: Separate lines
.clampScalar(0, S32, S64)		.clampScalar(0, S32, S64)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))		.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))		.fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
.widenScalarToNextPow2(0)		.widenScalarToNextPow2(0)
.scalarize(0);		.scalarize(0);

getActionDefinitionsBuilder({G_UADDO, G_USUBO,		getActionDefinitionsBuilder({G_UADDO, G_USUBO,
G_UADDE, G_SADDE, G_USUBE, G_SSUBE})		G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
▲ Show 20 Lines • Show All 3,992 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir

Show All 32 Lines	bb.0:
; CHECK: [[SMULH1:%[0-9]+]]:_(s32) = G_SMULH [[UV1]], [[UV3]]		; CHECK: [[SMULH1:%[0-9]+]]:_(s32) = G_SMULH [[UV1]], [[UV3]]
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMULH]](s32), [[SMULH1]](s32)		; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMULH]](s32), [[SMULH1]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)		; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1		%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3		%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<2 x s32>) = G_SMULH %0, %1		%2:_(<2 x s32>) = G_SMULH %0, %1
$vgpr0_vgpr1 = COPY %2		$vgpr0_vgpr1 = COPY %2
...		...

		arsenmUnsubmitted Done Reply Inline Actions Should add <2 x s16>, <3 x 16> and <4 x s16> cases arsenm: Should add <2 x s16>, <3 x 16> and <4 x s16> cases
		---
		name: test_smulh_s16
		body: \|
		bb.0:
		liveins: $vgpr0, $vgpr1

		; CHECK-LABEL: name: test_smulh_s16
		; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
		; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
		; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
		; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
		; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
		; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
		; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]]
		; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
		; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32)
		; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ASHR]](s32)
		; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
		; CHECK: $vgpr0 = COPY [[SEXT_INREG2]](s32)
		%0:_(s32) = COPY $vgpr0
		%1:_(s32) = COPY $vgpr1
		%2:_(s16) = G_TRUNC %0
		%3:_(s16) = G_TRUNC %1
		%4:_(s16) = G_SMULH %2, %3
		%5:_(s32) = G_SEXT %4
		$vgpr0 = COPY %5
		...
		arsenmUnsubmitted Not Done Reply Inline Actions Can you add an 8 and 24-bit test? arsenm: Can you add an 8 and 24-bit test?
		pdhaliwalAuthorUnsubmitted Done Reply Inline Actions 24-bit case won't work as it requires 48-bit MUL op which is not working yet. pdhaliwal: 24-bit case won't work as it requires 48-bit MUL op which is not working yet.

		---
		name: test_smulh_s8
		body: \|
		bb.0:
		liveins: $vgpr0, $vgpr1

		; CHECK-LABEL: name: test_smulh_s8
		; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
		; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
		; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
		; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
		; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16)
		; CHECK: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16)
		; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
		; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16)
		; CHECK: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16)
		; CHECK: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[ASHR]], [[ASHR1]]
		; CHECK: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16)
		; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16)
		; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 8
		; CHECK: $vgpr0 = COPY [[SEXT_INREG]](s32)
		%0:_(s32) = COPY $vgpr0
		%1:_(s32) = COPY $vgpr1
		%2:_(s8) = G_TRUNC %0
		%3:_(s8) = G_TRUNC %1
		%4:_(s8) = G_SMULH %2, %3
		%5:_(s32) = G_SEXT %4
		$vgpr0 = COPY %5
		...

		---
		name: test_smulh_v2s16
		body: \|
		bb.0:
		liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
		; CHECK-LABEL: name: test_smulh_v2s16
		; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
		; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
		; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
		; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
		; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
		; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
		; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
		; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
		; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG]], [[SEXT_INREG1]]
		; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
		; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32)
		; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
		; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
		; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
		; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
		; CHECK: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG2]], [[SEXT_INREG3]]
		; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[MUL1]], [[C]](s32)
		; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
		; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ASHR]](s32)
		; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]]
		; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32)
		; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]]
		; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
		; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
		; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
		; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
		; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
		; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
		; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 16
		; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
		; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 16
		; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG4]](s32), [[SEXT_INREG5]](s32)
		; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
		%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
		%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
		%2:_(<2 x s16>) = G_TRUNC %0
		%3:_(<2 x s16>) = G_TRUNC %1
		%4:_(<2 x s16>) = G_SMULH %2, %3
		%5:_(<2 x s32>) = G_SEXT %4
		$vgpr0_vgpr1 = COPY %5
		...

		---
		name: test_smulh_v2s8
		body: \|
		bb.0:
		liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
		; CHECK-LABEL: name: test_smulh_v2s8
		; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
		; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
		; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
		; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
		; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
		; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
		; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16)
		; CHECK: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16)
		; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
		; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16)
		; CHECK: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16)
		; CHECK: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[ASHR]], [[ASHR1]]
		; CHECK: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[MUL]], [[C]](s16)
		; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
		; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C]](s16)
		; CHECK: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C]](s16)
		; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
		; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C]](s16)
		; CHECK: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C]](s16)
		; CHECK: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[ASHR3]], [[ASHR4]]
		; CHECK: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[MUL1]], [[C]](s16)
		; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16)
		; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR5]](s16)
		; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
		; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
		; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
		; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 8
		; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
		; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
		%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
		%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
		%2:_(<2 x s8>) = G_TRUNC %0
		%3:_(<2 x s8>) = G_TRUNC %1
		%4:_(<2 x s8>) = G_SMULH %2, %3
		%5:_(<2 x s32>) = G_SEXT %4
		$vgpr0_vgpr1 = COPY %5
		...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir

Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	bb.0:
; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]		; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)		; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)		; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1		%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3		%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = G_UMULH %0, %1		%2:_(s64) = G_UMULH %0, %1
$vgpr0_vgpr1 = COPY %2		$vgpr0_vgpr1 = COPY %2
...		...

		---
		name: test_umulh_s16
		body: \|
		bb.0:
		liveins: $vgpr0, $vgpr1

		; CHECK-LABEL: name: test_umulh_s16
		; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
		; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
		; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
		; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
		; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
		; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
		; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
		; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
		; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
		; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32)
		; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
		; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
		; CHECK: $vgpr0 = COPY [[AND2]](s32)
		%0:_(s32) = COPY $vgpr0
		%1:_(s32) = COPY $vgpr1
		%2:_(s16) = G_TRUNC %0
		%3:_(s16) = G_TRUNC %1
		%4:_(s16) = G_UMULH %2, %3
		%5:_(s32) = G_ZEXT %4
		$vgpr0 = COPY %5
		...
		arsenmUnsubmitted Not Done Reply Inline Actions Can you add an 8 and 24-bit test? arsenm: Can you add an 8 and 24-bit test?
		pdhaliwalAuthorUnsubmitted Done Reply Inline Actions Added 8-bit case. But, 24-bit case won't work as it requires 48-bit MUL op which is not working yet. pdhaliwal: Added 8-bit case. But, 24-bit case won't work as it requires 48-bit MUL op which is not working…

		---
		name: test_umulh_s8
		body: \|
		bb.0:
		liveins: $vgpr0, $vgpr1

		; CHECK-LABEL: name: test_umulh_s8
		; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
		; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
		; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
		; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
		; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
		; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
		; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
		; CHECK: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]]
		; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
		; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16)
		; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
		; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
		; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
		; CHECK: $vgpr0 = COPY [[AND2]](s32)
		%0:_(s32) = COPY $vgpr0
		%1:_(s32) = COPY $vgpr1
		%2:_(s8) = G_TRUNC %0
		%3:_(s8) = G_TRUNC %1
		%4:_(s8) = G_UMULH %2, %3
		%5:_(s32) = G_ZEXT %4
		$vgpr0 = COPY %5
		...

		---
		name: test_umulh_v2s16
		body: \|
		bb.0:
		liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
		; CHECK-LABEL: name: test_umulh_v2s16
		; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
		; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
		; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
		; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
		; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
		; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
		; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
		; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
		; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
		; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
		; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
		; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32)
		; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
		; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
		; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
		; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
		; CHECK: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]]
		; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32)
		; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
		; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
		; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
		; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]]
		; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
		; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]]
		; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
		; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
		; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
		; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
		; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]]
		; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
		; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]]
		; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32)
		; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
		%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
		%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
		%2:_(<2 x s16>) = G_TRUNC %0
		%3:_(<2 x s16>) = G_TRUNC %1
		%4:_(<2 x s16>) = G_UMULH %2, %3
		%5:_(<2 x s32>) = G_ZEXT %4
		$vgpr0_vgpr1 = COPY %5
		...

		---
		name: test_umulh_v2s8
		body: \|
		bb.0:
		liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
		; CHECK-LABEL: name: test_umulh_v2s8
		; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
		; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
		; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
		; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
		; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
		; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
		; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
		; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
		; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
		; CHECK: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]]
		; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
		; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16)
		; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
		; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
		; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
		; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
		; CHECK: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]]
		; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16)
		; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
		; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
		; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
		; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
		; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
		; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32)
		; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
		; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32)
		; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
		%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
		%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
		%2:_(<2 x s8>) = G_TRUNC %0
		%3:_(<2 x s8>) = G_TRUNC %1
		%4:_(<2 x s8>) = G_UMULH %2, %3
		%5:_(<2 x s32>) = G_ZEXT %4
		$vgpr0_vgpr1 = COPY %5
		...
		arsenmUnsubmitted Done Reply Inline Actions Shouldn't use implicit uses of s8 values. I'm trying to fix implicit uses with illegal register types because we can't ultimately legalize these arsenm: Shouldn't use implicit uses of s8 values. I'm trying to fix implicit uses with illegal register…

This is an archive of the discontinued LLVM Phabricator instance.

[GlobalISel][AMDGPU] Lower G_SMULH/G_UMULH
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 284762

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir

This is an archive of the discontinued LLVM Phabricator instance.

[GlobalISel][AMDGPU] Lower G_SMULH/G_UMULHClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 284762

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir

[GlobalISel][AMDGPU] Lower G_SMULH/G_UMULH
ClosedPublic