Diff 381825

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Show First 20 Lines • Show All 471 Lines • ▼ Show 20 Lines	static Value *insertValues(IRBuilder<> &Builder,

Value *NewVal = UndefValue::get(Ty);		Value *NewVal = UndefValue::get(Ty);
for (int I = 0, E = Values.size(); I != E; ++I)		for (int I = 0, E = Values.size(); I != E; ++I)
NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);		NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);

return NewVal;		return NewVal;
}		}

		// Returns 24-bit or 48-bit (as per `NumBits`) mul of `LHS` and `RHS`.
		static Value getMul24(IRBuilder<> &Builder, Value LHS, Value *RHS,
		unsigned NumBits, bool IsSigned) {
		Module *Mod = Builder.GetInsertBlock()->getModule();

		if ((!IsSigned && NumBits <= 32) \|\| (IsSigned && NumBits <= 30)) {
		Intrinsic::ID ID =
		IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
		foadUnsubmitted Done Reply Inline Actions This would be neater with `IsSigned ? ... : ...` foad: This would be neater with `IsSigned ? ... : ...`
		return Builder.CreateCall(Intrinsic::getDeclaration(Mod, ID), {LHS, RHS});
		}

		assert((!IsSigned && NumBits <= 48) \|\| (IsSigned && NumBits <= 46));

		Intrinsic::ID LoID =
		IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
		foadUnsubmitted Done Reply Inline Actions This would be neater with `IsSigned ? ... : ...` foad: This would be neater with `IsSigned ? ... : ...`
		Intrinsic::ID HiID =
		IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24;

		Value *Lo =
		Builder.CreateCall(Intrinsic::getDeclaration(Mod, LoID), {LHS, RHS});
		Value *Hi =
		Builder.CreateCall(Intrinsic::getDeclaration(Mod, HiID), {LHS, RHS});

		foadUnsubmitted Done Reply Inline Actions Can you use CreateIntrinsic to create all the calls? foad: Can you use CreateIntrinsic to create all the calls?
		IntegerType *I64Ty = Builder.getInt64Ty();
		Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty);
		Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty);

		return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
		}

bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {		bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
if (I.getOpcode() != Instruction::Mul)		if (I.getOpcode() != Instruction::Mul)
return false;		return false;

Type *Ty = I.getType();		Type *Ty = I.getType();
unsigned Size = Ty->getScalarSizeInBits();		unsigned Size = Ty->getScalarSizeInBits();
if (Size <= 16 && ST->has16BitInsts())		if (Size <= 16 && ST->has16BitInsts())
return false;		return false;

// Prefer scalar if this could be s_mul_i32		// Prefer scalar if this could be s_mul_i32
if (DA->isUniform(&I))		if (DA->isUniform(&I))
return false;		return false;

Value *LHS = I.getOperand(0);		Value *LHS = I.getOperand(0);
Value *RHS = I.getOperand(1);		Value *RHS = I.getOperand(1);
IRBuilder<> Builder(&I);		IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());		Builder.SetCurrentDebugLocation(I.getDebugLoc());

Intrinsic::ID IntrID = Intrinsic::not_intrinsic;

unsigned LHSBits = 0, RHSBits = 0;		unsigned LHSBits = 0, RHSBits = 0;
		bool IsSigned = false;

if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 &&		if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 &&
		foadUnsubmitted Not Done Reply Inline Actions Do all subtargets that have mul_u24 also have mulhi_u24, and the same for i24? foad: Do all subtargets that have mul_u24 also have mulhi_u24, and the same for i24?
		abinavppAuthorUnsubmitted Done Reply Inline Actions I guess so. We're doing the same thing in AMDGPUISelLowering.cpp. For amdgcn, I can see a corresponding 24-bit mulhi instruction for GFX6 and above. For r600, I can see that in HD6900 and Evergreen. I haven't looked at all the subtargets. The subtarget initialization should take care of this. abinavpp: I guess so. We're doing the same thing in AMDGPUISelLowering.cpp. For amdgcn, I can see a…
(RHSBits = numBitsUnsigned(RHS, Size)) <= 24) {		(RHSBits = numBitsUnsigned(RHS, Size)) <= 24) {
// The mul24 instruction yields the low-order 32 bits. If the original		// If the original result and the destination is wider than 48 bits, the
// result and the destination is wider than 32 bits, the mul24 would		// mul48 (i.e. mul24, mul24hi pair) would truncate the result.
// truncate the result.		if (Size > 48 && LHSBits + RHSBits > 48)
		abinavppAuthorUnsubmitted Done Reply Inline Actions I think we were incorrect in doing the `LHSBits + RHSBits > ...` check in D111523. We did not consider the case when an operand has > 24 known bits, but the sum of known bits being in limits. mul24 instruction works only on the low-order 24 bits of its operands. abinavpp: I think we were incorrect in doing the `LHSBits + RHSBits > ...` check in D111523. We did not…
		abinavppAuthorUnsubmitted Done Reply Inline Actions Ignore this comment. I missed the operand width check in the if above. abinavpp: Ignore this comment. I missed the operand width check in the if above.
		abinavppAuthorUnsubmitted Done Reply Inline Actions Just realized that this if and the similar one for signed below is not required. abinavpp: Just realized that this if and the similar one for signed below is not required.
if (Size > 32 && LHSBits + RHSBits > 32)
return false;		return false;

IntrID = Intrinsic::amdgcn_mul_u24;		IsSigned = false;

} else if (ST->hasMulI24() &&		} else if (ST->hasMulI24() &&
(LHSBits = numBitsSigned(LHS, Size)) < 24 &&		(LHSBits = numBitsSigned(LHS, Size)) < 24 &&
(RHSBits = numBitsSigned(RHS, Size)) < 24) {		(RHSBits = numBitsSigned(RHS, Size)) < 24) {
// The original result is positive if its destination is wider than 32 bits		// The original result is positive if its destination is wider than 48 bits
// and its highest set bit is at bit 31. Generating mul24 and sign-extending		// and its highest set bit is at bit 47. Generating mul48 and sign-extending
// it would yield a negative value.		// it would yield a negative value.
if (Size > 32 && LHSBits + RHSBits > 30)		if (Size > 48 && LHSBits + RHSBits > 46)
return false;		return false;

IntrID = Intrinsic::amdgcn_mul_i24;		IsSigned = true;
} else		} else
return false;		return false;

SmallVector<Value *, 4> LHSVals;		SmallVector<Value *, 4> LHSVals;
SmallVector<Value *, 4> RHSVals;		SmallVector<Value *, 4> RHSVals;
SmallVector<Value *, 4> ResultVals;		SmallVector<Value *, 4> ResultVals;
extractValues(Builder, LHSVals, LHS);		extractValues(Builder, LHSVals, LHS);
extractValues(Builder, RHSVals, RHS);		extractValues(Builder, RHSVals, RHS);


IntegerType *I32Ty = Builder.getInt32Ty();		IntegerType *I32Ty = Builder.getInt32Ty();
FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
for (int I = 0, E = LHSVals.size(); I != E; ++I) {		for (int I = 0, E = LHSVals.size(); I != E; ++I) {
Value LHS, RHS;		Value LHS, RHS;
if (IntrID == Intrinsic::amdgcn_mul_u24) {		if (!IsSigned) {
		foadUnsubmitted Done Reply Inline Actions I prefer not to have a negated condition for an "if" if there's an "else" as well, otherwise the condition for the "else" is a double negative which is harder to understand. foad: I prefer not to have a negated condition for an "if" if there's an "else" as well, otherwise…
LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);		LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);		RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
} else {		} else {
LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);		LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);		RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
}		}

Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});		Value *Result = getMul24(Builder, LHS, RHS, LHSBits + RHSBits, IsSigned);

if (IntrID == Intrinsic::amdgcn_mul_u24) {		if (!IsSigned) {
		foadUnsubmitted Done Reply Inline Actions Same. foad: Same.
ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,		ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
LHSVals[I]->getType()));		LHSVals[I]->getType()));
} else {		} else {
ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,		ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
LHSVals[I]->getType()));		LHSVals[I]->getType()));
}		}
}		}

▲ Show 20 Lines • Show All 898 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll

	Show All 23 Lines
	}			}

	define i32 @smul24_i32(i32 %lhs, i32 %rhs) {			define i32 @smul24_i32(i32 %lhs, i32 %rhs) {
	; SI-LABEL: @smul24_i32(			; SI-LABEL: @smul24_i32(
	; SI-NEXT: [[SHL_LHS:%.]] = shl i32 [[LHS:%.]], 8			; SI-NEXT: [[SHL_LHS:%.]] = shl i32 [[LHS:%.]], 8
	; SI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8			; SI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
	; SI-NEXT: [[LSHR_RHS:%.]] = shl i32 [[RHS:%.]], 8			; SI-NEXT: [[LSHR_RHS:%.]] = shl i32 [[RHS:%.]], 8
	; SI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8			; SI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8
	; SI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])			; SI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
				; SI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[LHS24]], i32 [[RHS24]])
				; SI-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64
				; SI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP2]] to i64
				; SI-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
				; SI-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
				; SI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP6]] to i32
	; SI-NEXT: ret i32 [[MUL]]			; SI-NEXT: ret i32 [[MUL]]
	;			;
	; VI-LABEL: @smul24_i32(			; VI-LABEL: @smul24_i32(
	; VI-NEXT: [[SHL_LHS:%.]] = shl i32 [[LHS:%.]], 8			; VI-NEXT: [[SHL_LHS:%.]] = shl i32 [[LHS:%.]], 8
	; VI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8			; VI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
	; VI-NEXT: [[LSHR_RHS:%.]] = shl i32 [[RHS:%.]], 8			; VI-NEXT: [[LSHR_RHS:%.]] = shl i32 [[RHS:%.]], 8
	; VI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8			; VI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8
	; VI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])			; VI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
				; VI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[LHS24]], i32 [[RHS24]])
				; VI-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64
				; VI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP2]] to i64
				; VI-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
				; VI-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
				; VI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP6]] to i32
	; VI-NEXT: ret i32 [[MUL]]			; VI-NEXT: ret i32 [[MUL]]
	;			;
	; DISABLED-LABEL: @smul24_i32(			; DISABLED-LABEL: @smul24_i32(
	; DISABLED-NEXT: [[SHL_LHS:%.]] = shl i32 [[LHS:%.]], 8			; DISABLED-NEXT: [[SHL_LHS:%.]] = shl i32 [[LHS:%.]], 8
	; DISABLED-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8			; DISABLED-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
	; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl i32 [[RHS:%.]], 8			; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl i32 [[RHS:%.]], 8
	; DISABLED-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8			; DISABLED-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8
	; DISABLED-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
	Show All 13 Lines
	; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>			; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
	; SI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i32> [[RHS:%.]], <i32 8, i32 8>			; SI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i32> [[RHS:%.]], <i32 8, i32 8>
	; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>			; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>
	; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0			; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
	; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1			; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
	; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0			; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
	; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1			; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
	; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])			; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
	; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])			; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP3]])
	; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0			; SI-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
	; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1			; SI-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
				; SI-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 32
				; SI-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]]
				; SI-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
				; SI-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
				; SI-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP2]], i32 [[TMP4]])
				; SI-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
				; SI-NEXT: [[TMP15:%.*]] = zext i32 [[TMP13]] to i64
				; SI-NEXT: [[TMP16:%.*]] = shl i64 [[TMP15]], 32
				; SI-NEXT: [[TMP17:%.*]] = or i64 [[TMP14]], [[TMP16]]
				; SI-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32
				; SI-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i64 0
				; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP18]], i64 1
	; SI-NEXT: ret <2 x i32> [[MUL]]			; SI-NEXT: ret <2 x i32> [[MUL]]
	;			;
	; VI-LABEL: @smul24_v2i32(			; VI-LABEL: @smul24_v2i32(
	; VI-NEXT: [[SHL_LHS:%.]] = shl <2 x i32> [[LHS:%.]], <i32 8, i32 8>			; VI-NEXT: [[SHL_LHS:%.]] = shl <2 x i32> [[LHS:%.]], <i32 8, i32 8>
	; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>			; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
	; VI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i32> [[RHS:%.]], <i32 8, i32 8>			; VI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i32> [[RHS:%.]], <i32 8, i32 8>
	; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>			; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>
	; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0			; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
	; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1			; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
	; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0			; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
	; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1			; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
	; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])			; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
	; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])			; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP3]])
	; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0			; VI-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
	; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1			; VI-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
				; VI-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 32
				; VI-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]]
				; VI-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
				; VI-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
				; VI-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP2]], i32 [[TMP4]])
				; VI-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
				; VI-NEXT: [[TMP15:%.*]] = zext i32 [[TMP13]] to i64
				; VI-NEXT: [[TMP16:%.*]] = shl i64 [[TMP15]], 32
				; VI-NEXT: [[TMP17:%.*]] = or i64 [[TMP14]], [[TMP16]]
				; VI-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32
				; VI-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i64 0
				; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP18]], i64 1
	; VI-NEXT: ret <2 x i32> [[MUL]]			; VI-NEXT: ret <2 x i32> [[MUL]]
	;			;
	; DISABLED-LABEL: @smul24_v2i32(			; DISABLED-LABEL: @smul24_v2i32(
	; DISABLED-NEXT: [[SHL_LHS:%.]] = shl <2 x i32> [[LHS:%.]], <i32 8, i32 8>			; DISABLED-NEXT: [[SHL_LHS:%.]] = shl <2 x i32> [[LHS:%.]], <i32 8, i32 8>
	; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>			; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
	; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl <2 x i32> [[RHS:%.]], <i32 8, i32 8>			; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl <2 x i32> [[RHS:%.]], <i32 8, i32 8>
	; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>			; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>
	; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret <2 x i32> [[MUL]]			; DISABLED-NEXT: ret <2 x i32> [[MUL]]
	;			;
	%shl.lhs = shl <2 x i32> %lhs, <i32 8, i32 8>			%shl.lhs = shl <2 x i32> %lhs, <i32 8, i32 8>
	%lhs24 = ashr <2 x i32> %shl.lhs, <i32 8, i32 8>			%lhs24 = ashr <2 x i32> %shl.lhs, <i32 8, i32 8>
	%lshr.rhs = shl <2 x i32> %rhs, <i32 8, i32 8>			%lshr.rhs = shl <2 x i32> %rhs, <i32 8, i32 8>
	%rhs24 = ashr <2 x i32> %lhs, <i32 8, i32 8>			%rhs24 = ashr <2 x i32> %lhs, <i32 8, i32 8>
	%mul = mul <2 x i32> %lhs24, %rhs24			%mul = mul <2 x i32> %lhs24, %rhs24
	ret <2 x i32> %mul			ret <2 x i32> %mul
	}			}

	define i32 @umul24_i32(i32 %lhs, i32 %rhs) {			define i32 @umul24_i32(i32 %lhs, i32 %rhs) {
	; SI-LABEL: @umul24_i32(			; SI-LABEL: @umul24_i32(
	; SI-NEXT: [[LHS24:%.]] = and i32 [[LHS:%.]], 16777215			; SI-NEXT: [[LHS24:%.]] = and i32 [[LHS:%.]], 16777215
	; SI-NEXT: [[RHS24:%.]] = and i32 [[RHS:%.]], 16777215			; SI-NEXT: [[RHS24:%.]] = and i32 [[RHS:%.]], 16777215
	; SI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])			; SI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
				; SI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[LHS24]], i32 [[RHS24]])
				; SI-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64
				; SI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP2]] to i64
				; SI-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
				; SI-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
				; SI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP6]] to i32
	; SI-NEXT: ret i32 [[MUL]]			; SI-NEXT: ret i32 [[MUL]]
	;			;
	; VI-LABEL: @umul24_i32(			; VI-LABEL: @umul24_i32(
	; VI-NEXT: [[LHS24:%.]] = and i32 [[LHS:%.]], 16777215			; VI-NEXT: [[LHS24:%.]] = and i32 [[LHS:%.]], 16777215
	; VI-NEXT: [[RHS24:%.]] = and i32 [[RHS:%.]], 16777215			; VI-NEXT: [[RHS24:%.]] = and i32 [[RHS:%.]], 16777215
	; VI-NEXT: [[MUL:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])			; VI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
				; VI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[LHS24]], i32 [[RHS24]])
				; VI-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64
				; VI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP2]] to i64
				; VI-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
				; VI-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
				; VI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP6]] to i32
	; VI-NEXT: ret i32 [[MUL]]			; VI-NEXT: ret i32 [[MUL]]
	;			;
	; DISABLED-LABEL: @umul24_i32(			; DISABLED-LABEL: @umul24_i32(
	; DISABLED-NEXT: [[LHS24:%.]] = and i32 [[LHS:%.]], 16777215			; DISABLED-NEXT: [[LHS24:%.]] = and i32 [[LHS:%.]], 16777215
	; DISABLED-NEXT: [[RHS24:%.]] = and i32 [[RHS:%.]], 16777215			; DISABLED-NEXT: [[RHS24:%.]] = and i32 [[RHS:%.]], 16777215
	; DISABLED-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret i32 [[MUL]]			; DISABLED-NEXT: ret i32 [[MUL]]
	;			;
	%lhs24 = and i32 %lhs, 16777215			%lhs24 = and i32 %lhs, 16777215
	%rhs24 = and i32 %rhs, 16777215			%rhs24 = and i32 %rhs, 16777215
	%mul = mul i32 %lhs24, %rhs24			%mul = mul i32 %lhs24, %rhs24
	ret i32 %mul			ret i32 %mul
	}			}

	define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {			define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
	; SI-LABEL: @umul24_v2i32(			; SI-LABEL: @umul24_v2i32(
	; SI-NEXT: [[LHS24:%.]] = and <2 x i32> [[LHS:%.]], <i32 16777215, i32 16777215>			; SI-NEXT: [[LHS24:%.]] = and <2 x i32> [[LHS:%.]], <i32 16777215, i32 16777215>
	; SI-NEXT: [[RHS24:%.]] = and <2 x i32> [[RHS:%.]], <i32 16777215, i32 16777215>			; SI-NEXT: [[RHS24:%.]] = and <2 x i32> [[RHS:%.]], <i32 16777215, i32 16777215>
	; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0			; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
	; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1			; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
	; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0			; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
	; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1			; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
	; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])			; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
	; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])			; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP3]])
	; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0			; SI-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
	; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1			; SI-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
				; SI-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 32
				; SI-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]]
				; SI-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
				; SI-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
				; SI-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP2]], i32 [[TMP4]])
				; SI-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
				; SI-NEXT: [[TMP15:%.*]] = zext i32 [[TMP13]] to i64
				; SI-NEXT: [[TMP16:%.*]] = shl i64 [[TMP15]], 32
				; SI-NEXT: [[TMP17:%.*]] = or i64 [[TMP14]], [[TMP16]]
				; SI-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32
				; SI-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i64 0
				; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP18]], i64 1
	; SI-NEXT: ret <2 x i32> [[MUL]]			; SI-NEXT: ret <2 x i32> [[MUL]]
	;			;
	; VI-LABEL: @umul24_v2i32(			; VI-LABEL: @umul24_v2i32(
	; VI-NEXT: [[LHS24:%.]] = and <2 x i32> [[LHS:%.]], <i32 16777215, i32 16777215>			; VI-NEXT: [[LHS24:%.]] = and <2 x i32> [[LHS:%.]], <i32 16777215, i32 16777215>
	; VI-NEXT: [[RHS24:%.]] = and <2 x i32> [[RHS:%.]], <i32 16777215, i32 16777215>			; VI-NEXT: [[RHS24:%.]] = and <2 x i32> [[RHS:%.]], <i32 16777215, i32 16777215>
	; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0			; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
	; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1			; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
	; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0			; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
	; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1			; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
	; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])			; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
	; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])			; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP3]])
	; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0			; VI-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
	; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1			; VI-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
				; VI-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 32
				; VI-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]]
				; VI-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
				; VI-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
				; VI-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP2]], i32 [[TMP4]])
				; VI-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
				; VI-NEXT: [[TMP15:%.*]] = zext i32 [[TMP13]] to i64
				; VI-NEXT: [[TMP16:%.*]] = shl i64 [[TMP15]], 32
				; VI-NEXT: [[TMP17:%.*]] = or i64 [[TMP14]], [[TMP16]]
				; VI-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32
				; VI-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i64 0
				; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP18]], i64 1
	; VI-NEXT: ret <2 x i32> [[MUL]]			; VI-NEXT: ret <2 x i32> [[MUL]]
	;			;
	; DISABLED-LABEL: @umul24_v2i32(			; DISABLED-LABEL: @umul24_v2i32(
	; DISABLED-NEXT: [[LHS24:%.]] = and <2 x i32> [[LHS:%.]], <i32 16777215, i32 16777215>			; DISABLED-NEXT: [[LHS24:%.]] = and <2 x i32> [[LHS:%.]], <i32 16777215, i32 16777215>
	; DISABLED-NEXT: [[RHS24:%.]] = and <2 x i32> [[RHS:%.]], <i32 16777215, i32 16777215>			; DISABLED-NEXT: [[RHS24:%.]] = and <2 x i32> [[RHS:%.]], <i32 16777215, i32 16777215>
	; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret <2 x i32> [[MUL]]			; DISABLED-NEXT: ret <2 x i32> [[MUL]]
	;			;
	%lhs24 = and <2 x i32> %lhs, <i32 16777215, i32 16777215>			%lhs24 = and <2 x i32> %lhs, <i32 16777215, i32 16777215>
	%rhs24 = and <2 x i32> %rhs, <i32 16777215, i32 16777215>			%rhs24 = and <2 x i32> %rhs, <i32 16777215, i32 16777215>
	%mul = mul <2 x i32> %lhs24, %rhs24			%mul = mul <2 x i32> %lhs24, %rhs24
	ret <2 x i32> %mul			ret <2 x i32> %mul
	}			}

	define i64 @smul24_i64(i64 %lhs, i64 %rhs) {			define i64 @smul24_i64(i64 %lhs, i64 %rhs) {
	; SI-LABEL: @smul24_i64(			; SI-LABEL: @smul24_i64(
	; SI-NEXT: [[SHL_LHS:%.]] = shl i64 [[LHS:%.]], 40			; SI-NEXT: [[SHL_LHS:%.]] = shl i64 [[LHS:%.]], 40
	; SI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40			; SI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40
	; SI-NEXT: [[LSHR_RHS:%.]] = shl i64 [[RHS:%.]], 40			; SI-NEXT: [[LSHR_RHS:%.]] = shl i64 [[RHS:%.]], 40
	; SI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40			; SI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40
	; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
				; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
				; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; SI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
	; SI-NEXT: ret i64 [[MUL]]			; SI-NEXT: ret i64 [[MUL]]
	;			;
	; VI-LABEL: @smul24_i64(			; VI-LABEL: @smul24_i64(
	; VI-NEXT: [[SHL_LHS:%.]] = shl i64 [[LHS:%.]], 40			; VI-NEXT: [[SHL_LHS:%.]] = shl i64 [[LHS:%.]], 40
	; VI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40			; VI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40
	; VI-NEXT: [[LSHR_RHS:%.]] = shl i64 [[RHS:%.]], 40			; VI-NEXT: [[LSHR_RHS:%.]] = shl i64 [[RHS:%.]], 40
	; VI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40			; VI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40
	; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
				; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
				; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; VI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
	; VI-NEXT: ret i64 [[MUL]]			; VI-NEXT: ret i64 [[MUL]]
	;			;
	; DISABLED-LABEL: @smul24_i64(			; DISABLED-LABEL: @smul24_i64(
	; DISABLED-NEXT: [[SHL_LHS:%.]] = shl i64 [[LHS:%.]], 40			; DISABLED-NEXT: [[SHL_LHS:%.]] = shl i64 [[LHS:%.]], 40
	; DISABLED-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40			; DISABLED-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40
	; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl i64 [[RHS:%.]], 40			; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl i64 [[RHS:%.]], 40
	; DISABLED-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40			; DISABLED-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40
	; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
	▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines
	}			}

	define i64 @smul24_i64_3(i64 %lhs, i64 %rhs) {			define i64 @smul24_i64_3(i64 %lhs, i64 %rhs) {
	; SI-LABEL: @smul24_i64_3(			; SI-LABEL: @smul24_i64_3(
	; SI-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i16			; SI-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i16
	; SI-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64			; SI-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
	; SI-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i17			; SI-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i17
	; SI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64			; SI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
	; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
				; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
				; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; SI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
	; SI-NEXT: ret i64 [[MUL]]			; SI-NEXT: ret i64 [[MUL]]
	;			;
	; VI-LABEL: @smul24_i64_3(			; VI-LABEL: @smul24_i64_3(
	; VI-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i16			; VI-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i16
	; VI-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64			; VI-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
	; VI-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i17			; VI-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i17
	; VI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64			; VI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
	; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
				; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
				; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; VI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
	; VI-NEXT: ret i64 [[MUL]]			; VI-NEXT: ret i64 [[MUL]]
	;			;
	; DISABLED-LABEL: @smul24_i64_3(			; DISABLED-LABEL: @smul24_i64_3(
	; DISABLED-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i16			; DISABLED-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i16
	; DISABLED-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64			; DISABLED-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
	; DISABLED-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i17			; DISABLED-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i17
	; DISABLED-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64			; DISABLED-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
	; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret i64 [[MUL]]			; DISABLED-NEXT: ret i64 [[MUL]]
	;			;
	%lhs.trunc = trunc i64 %lhs to i16			%lhs.trunc = trunc i64 %lhs to i16
	%lhs24 = sext i16 %lhs.trunc to i64			%lhs24 = sext i16 %lhs.trunc to i64
	%rhs.trunc = trunc i64 %rhs to i17			%rhs.trunc = trunc i64 %rhs to i17
	%rhs24 = sext i17 %rhs.trunc to i64			%rhs24 = sext i17 %rhs.trunc to i64
	%mul = mul i64 %lhs24, %rhs24			%mul = mul i64 %lhs24, %rhs24
	ret i64 %mul			ret i64 %mul
	}			}

				define i64 @smul24_i64_4(i64 %lhs, i64 %rhs) {
				; SI-LABEL: @smul24_i64_4(
				; SI-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i24
				; SI-NEXT: [[LHS24:%.*]] = sext i24 [[LHS_TRUNC]] to i64
				; SI-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i25
				; SI-NEXT: [[RHS24:%.*]] = sext i25 [[RHS_TRUNC]] to i64
				; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
				; SI-NEXT: ret i64 [[MUL]]
				;
				; VI-LABEL: @smul24_i64_4(
				; VI-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i24
				; VI-NEXT: [[LHS24:%.*]] = sext i24 [[LHS_TRUNC]] to i64
				; VI-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i25
				; VI-NEXT: [[RHS24:%.*]] = sext i25 [[RHS_TRUNC]] to i64
				; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
				; VI-NEXT: ret i64 [[MUL]]
				;
				; DISABLED-LABEL: @smul24_i64_4(
				; DISABLED-NEXT: [[LHS_TRUNC:%.]] = trunc i64 [[LHS:%.]] to i24
				; DISABLED-NEXT: [[LHS24:%.*]] = sext i24 [[LHS_TRUNC]] to i64
				; DISABLED-NEXT: [[RHS_TRUNC:%.]] = trunc i64 [[RHS:%.]] to i25
				; DISABLED-NEXT: [[RHS24:%.*]] = sext i25 [[RHS_TRUNC]] to i64
				; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
				; DISABLED-NEXT: ret i64 [[MUL]]
				;
				%lhs.trunc = trunc i64 %lhs to i24
				%lhs24 = sext i24 %lhs.trunc to i64
				%rhs.trunc = trunc i64 %rhs to i25
				%rhs24 = sext i25 %rhs.trunc to i64
				%mul = mul i64 %lhs24, %rhs24
				ret i64 %mul
				}

	define i64 @umul24_i64(i64 %lhs, i64 %rhs) {			define i64 @umul24_i64(i64 %lhs, i64 %rhs) {
	; SI-LABEL: @umul24_i64(			; SI-LABEL: @umul24_i64(
	; SI-NEXT: [[LHS24:%.]] = and i64 [[LHS:%.]], 16777215			; SI-NEXT: [[LHS24:%.]] = and i64 [[LHS:%.]], 16777215
	; SI-NEXT: [[RHS24:%.]] = and i64 [[RHS:%.]], 16777215			; SI-NEXT: [[RHS24:%.]] = and i64 [[RHS:%.]], 16777215
	; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
				; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
				; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; SI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
	; SI-NEXT: ret i64 [[MUL]]			; SI-NEXT: ret i64 [[MUL]]
	;			;
	; VI-LABEL: @umul24_i64(			; VI-LABEL: @umul24_i64(
	; VI-NEXT: [[LHS24:%.]] = and i64 [[LHS:%.]], 16777215			; VI-NEXT: [[LHS24:%.]] = and i64 [[LHS:%.]], 16777215
	; VI-NEXT: [[RHS24:%.]] = and i64 [[RHS:%.]], 16777215			; VI-NEXT: [[RHS24:%.]] = and i64 [[RHS:%.]], 16777215
	; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
				; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
				; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; VI-NEXT: [[MUL:%.*]] = or i64 [[TMP5]], [[TMP7]]
	; VI-NEXT: ret i64 [[MUL]]			; VI-NEXT: ret i64 [[MUL]]
	;			;
	; DISABLED-LABEL: @umul24_i64(			; DISABLED-LABEL: @umul24_i64(
	; DISABLED-NEXT: [[LHS24:%.]] = and i64 [[LHS:%.]], 16777215			; DISABLED-NEXT: [[LHS24:%.]] = and i64 [[LHS:%.]], 16777215
	; DISABLED-NEXT: [[RHS24:%.]] = and i64 [[RHS:%.]], 16777215			; DISABLED-NEXT: [[RHS24:%.]] = and i64 [[RHS:%.]], 16777215
	; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret i64 [[MUL]]			; DISABLED-NEXT: ret i64 [[MUL]]
	;			;
	Show All 38 Lines
	; SI-LABEL: @smul24_i31(			; SI-LABEL: @smul24_i31(
	; SI-NEXT: [[SHL_LHS:%.]] = shl i31 [[LHS:%.]], 7			; SI-NEXT: [[SHL_LHS:%.]] = shl i31 [[LHS:%.]], 7
	; SI-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7			; SI-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7
	; SI-NEXT: [[LSHR_RHS:%.]] = shl i31 [[RHS:%.]], 7			; SI-NEXT: [[LSHR_RHS:%.]] = shl i31 [[RHS:%.]], 7
	; SI-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7			; SI-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7
	; SI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32			; SI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32
	; SI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32			; SI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32
	; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])			; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
	; SI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i31			; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; SI-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
				; SI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP8]] to i31
	; SI-NEXT: ret i31 [[MUL]]			; SI-NEXT: ret i31 [[MUL]]
	;			;
	; VI-LABEL: @smul24_i31(			; VI-LABEL: @smul24_i31(
	; VI-NEXT: [[SHL_LHS:%.]] = shl i31 [[LHS:%.]], 7			; VI-NEXT: [[SHL_LHS:%.]] = shl i31 [[LHS:%.]], 7
	; VI-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7			; VI-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7
	; VI-NEXT: [[LSHR_RHS:%.]] = shl i31 [[RHS:%.]], 7			; VI-NEXT: [[LSHR_RHS:%.]] = shl i31 [[RHS:%.]], 7
	; VI-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7			; VI-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7
	; VI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32			; VI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32
	; VI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32			; VI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32
	; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])			; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
	; VI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i31			; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; VI-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
				; VI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP8]] to i31
	; VI-NEXT: ret i31 [[MUL]]			; VI-NEXT: ret i31 [[MUL]]
	;			;
	; DISABLED-LABEL: @smul24_i31(			; DISABLED-LABEL: @smul24_i31(
	; DISABLED-NEXT: [[SHL_LHS:%.]] = shl i31 [[LHS:%.]], 7			; DISABLED-NEXT: [[SHL_LHS:%.]] = shl i31 [[LHS:%.]], 7
	; DISABLED-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7			; DISABLED-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7
	; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl i31 [[RHS:%.]], 7			; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl i31 [[RHS:%.]], 7
	; DISABLED-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7			; DISABLED-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7
	; DISABLED-NEXT: [[MUL:%.*]] = mul i31 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i31 [[LHS24]], [[RHS24]]
	Show All 9 Lines

	define i31 @umul24_i31(i31 %lhs, i31 %rhs) {			define i31 @umul24_i31(i31 %lhs, i31 %rhs) {
	; SI-LABEL: @umul24_i31(			; SI-LABEL: @umul24_i31(
	; SI-NEXT: [[LHS24:%.]] = and i31 [[LHS:%.]], 16777215			; SI-NEXT: [[LHS24:%.]] = and i31 [[LHS:%.]], 16777215
	; SI-NEXT: [[RHS24:%.]] = and i31 [[RHS:%.]], 16777215			; SI-NEXT: [[RHS24:%.]] = and i31 [[RHS:%.]], 16777215
	; SI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32			; SI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32
	; SI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32			; SI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32
	; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])			; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
	; SI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i31			; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; SI-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
				; SI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP8]] to i31
	; SI-NEXT: ret i31 [[MUL]]			; SI-NEXT: ret i31 [[MUL]]
	;			;
	; VI-LABEL: @umul24_i31(			; VI-LABEL: @umul24_i31(
	; VI-NEXT: [[LHS24:%.]] = and i31 [[LHS:%.]], 16777215			; VI-NEXT: [[LHS24:%.]] = and i31 [[LHS:%.]], 16777215
	; VI-NEXT: [[RHS24:%.]] = and i31 [[RHS:%.]], 16777215			; VI-NEXT: [[RHS24:%.]] = and i31 [[RHS:%.]], 16777215
	; VI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32			; VI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32
	; VI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32			; VI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32
	; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])			; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
	; VI-NEXT: [[MUL:%.*]] = trunc i32 [[TMP3]] to i31			; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; VI-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
				; VI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP8]] to i31
	; VI-NEXT: ret i31 [[MUL]]			; VI-NEXT: ret i31 [[MUL]]
	;			;
	; DISABLED-LABEL: @umul24_i31(			; DISABLED-LABEL: @umul24_i31(
	; DISABLED-NEXT: [[LHS24:%.]] = and i31 [[LHS:%.]], 16777215			; DISABLED-NEXT: [[LHS24:%.]] = and i31 [[LHS:%.]], 16777215
	; DISABLED-NEXT: [[RHS24:%.]] = and i31 [[RHS:%.]], 16777215			; DISABLED-NEXT: [[RHS24:%.]] = and i31 [[RHS:%.]], 16777215
	; DISABLED-NEXT: [[MUL:%.*]] = mul i31 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i31 [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret i31 [[MUL]]			; DISABLED-NEXT: ret i31 [[MUL]]
	;			;
	Show All 9 Lines
	; SI-NEXT: [[RHS24:%.]] = and <2 x i31> [[RHS:%.]], <i31 16777215, i31 16777215>			; SI-NEXT: [[RHS24:%.]] = and <2 x i31> [[RHS:%.]], <i31 16777215, i31 16777215>
	; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0			; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
	; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1			; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
	; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0			; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
	; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1			; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
	; SI-NEXT: [[TMP5:%.*]] = zext i31 [[TMP1]] to i32			; SI-NEXT: [[TMP5:%.*]] = zext i31 [[TMP1]] to i32
	; SI-NEXT: [[TMP6:%.*]] = zext i31 [[TMP3]] to i32			; SI-NEXT: [[TMP6:%.*]] = zext i31 [[TMP3]] to i32
	; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]])			; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]])
	; SI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31			; SI-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP5]], i32 [[TMP6]])
	; SI-NEXT: [[TMP9:%.*]] = zext i31 [[TMP2]] to i32			; SI-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
	; SI-NEXT: [[TMP10:%.*]] = zext i31 [[TMP4]] to i32			; SI-NEXT: [[TMP10:%.*]] = zext i32 [[TMP8]] to i64
	; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])			; SI-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 32
	; SI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31			; SI-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP11]]
	; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0			; SI-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i31
	; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1			; SI-NEXT: [[TMP14:%.*]] = zext i31 [[TMP2]] to i32
				; SI-NEXT: [[TMP15:%.*]] = zext i31 [[TMP4]] to i32
				; SI-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP14]], i32 [[TMP15]])
				; SI-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP14]], i32 [[TMP15]])
				; SI-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64
				; SI-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64
				; SI-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 32
				; SI-NEXT: [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP20]]
				; SI-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i31
				; SI-NEXT: [[TMP23:%.*]] = insertelement <2 x i31> undef, i31 [[TMP13]], i64 0
				; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP23]], i31 [[TMP22]], i64 1
	; SI-NEXT: ret <2 x i31> [[MUL]]			; SI-NEXT: ret <2 x i31> [[MUL]]
	;			;
	; VI-LABEL: @umul24_v2i31(			; VI-LABEL: @umul24_v2i31(
	; VI-NEXT: [[LHS24:%.]] = and <2 x i31> [[LHS:%.]], <i31 16777215, i31 16777215>			; VI-NEXT: [[LHS24:%.]] = and <2 x i31> [[LHS:%.]], <i31 16777215, i31 16777215>
	; VI-NEXT: [[RHS24:%.]] = and <2 x i31> [[RHS:%.]], <i31 16777215, i31 16777215>			; VI-NEXT: [[RHS24:%.]] = and <2 x i31> [[RHS:%.]], <i31 16777215, i31 16777215>
	; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0			; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
	; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1			; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
	; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0			; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
	; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1			; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
	; VI-NEXT: [[TMP5:%.*]] = zext i31 [[TMP1]] to i32			; VI-NEXT: [[TMP5:%.*]] = zext i31 [[TMP1]] to i32
	; VI-NEXT: [[TMP6:%.*]] = zext i31 [[TMP3]] to i32			; VI-NEXT: [[TMP6:%.*]] = zext i31 [[TMP3]] to i32
	; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]])			; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]])
	; VI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31			; VI-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP5]], i32 [[TMP6]])
	; VI-NEXT: [[TMP9:%.*]] = zext i31 [[TMP2]] to i32			; VI-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
	; VI-NEXT: [[TMP10:%.*]] = zext i31 [[TMP4]] to i32			; VI-NEXT: [[TMP10:%.*]] = zext i32 [[TMP8]] to i64
	; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])			; VI-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 32
	; VI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31			; VI-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP11]]
	; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0			; VI-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i31
	; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1			; VI-NEXT: [[TMP14:%.*]] = zext i31 [[TMP2]] to i32
				; VI-NEXT: [[TMP15:%.*]] = zext i31 [[TMP4]] to i32
				; VI-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP14]], i32 [[TMP15]])
				; VI-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP14]], i32 [[TMP15]])
				; VI-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64
				; VI-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64
				; VI-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 32
				; VI-NEXT: [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP20]]
				; VI-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i31
				; VI-NEXT: [[TMP23:%.*]] = insertelement <2 x i31> undef, i31 [[TMP13]], i64 0
				; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP23]], i31 [[TMP22]], i64 1
	; VI-NEXT: ret <2 x i31> [[MUL]]			; VI-NEXT: ret <2 x i31> [[MUL]]
	;			;
	; DISABLED-LABEL: @umul24_v2i31(			; DISABLED-LABEL: @umul24_v2i31(
	; DISABLED-NEXT: [[LHS24:%.]] = and <2 x i31> [[LHS:%.]], <i31 16777215, i31 16777215>			; DISABLED-NEXT: [[LHS24:%.]] = and <2 x i31> [[LHS:%.]], <i31 16777215, i31 16777215>
	; DISABLED-NEXT: [[RHS24:%.]] = and <2 x i31> [[RHS:%.]], <i31 16777215, i31 16777215>			; DISABLED-NEXT: [[RHS24:%.]] = and <2 x i31> [[RHS:%.]], <i31 16777215, i31 16777215>
	; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret <2 x i31> [[MUL]]			; DISABLED-NEXT: ret <2 x i31> [[MUL]]
	;			;
	Show All 11 Lines
	; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>			; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>
	; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0			; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
	; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1			; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
	; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0			; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
	; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1			; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
	; SI-NEXT: [[TMP5:%.*]] = sext i31 [[TMP1]] to i32			; SI-NEXT: [[TMP5:%.*]] = sext i31 [[TMP1]] to i32
	; SI-NEXT: [[TMP6:%.*]] = sext i31 [[TMP3]] to i32			; SI-NEXT: [[TMP6:%.*]] = sext i31 [[TMP3]] to i32
	; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])			; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
	; SI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31			; SI-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP5]], i32 [[TMP6]])
	; SI-NEXT: [[TMP9:%.*]] = sext i31 [[TMP2]] to i32			; SI-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
	; SI-NEXT: [[TMP10:%.*]] = sext i31 [[TMP4]] to i32			; SI-NEXT: [[TMP10:%.*]] = zext i32 [[TMP8]] to i64
	; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])			; SI-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 32
	; SI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31			; SI-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP11]]
	; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0			; SI-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i31
	; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1			; SI-NEXT: [[TMP14:%.*]] = sext i31 [[TMP2]] to i32
				; SI-NEXT: [[TMP15:%.*]] = sext i31 [[TMP4]] to i32
				; SI-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP14]], i32 [[TMP15]])
				; SI-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP14]], i32 [[TMP15]])
				; SI-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64
				; SI-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64
				; SI-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 32
				; SI-NEXT: [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP20]]
				; SI-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i31
				; SI-NEXT: [[TMP23:%.*]] = insertelement <2 x i31> undef, i31 [[TMP13]], i64 0
				; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP23]], i31 [[TMP22]], i64 1
	; SI-NEXT: ret <2 x i31> [[MUL]]			; SI-NEXT: ret <2 x i31> [[MUL]]
	;			;
	; VI-LABEL: @smul24_v2i31(			; VI-LABEL: @smul24_v2i31(
	; VI-NEXT: [[SHL_LHS:%.]] = shl <2 x i31> [[LHS:%.]], <i31 8, i31 8>			; VI-NEXT: [[SHL_LHS:%.]] = shl <2 x i31> [[LHS:%.]], <i31 8, i31 8>
	; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8>			; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8>
	; VI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i31> [[RHS:%.]], <i31 8, i31 8>			; VI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i31> [[RHS:%.]], <i31 8, i31 8>
	; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>			; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>
	; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0			; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
	; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1			; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
	; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0			; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
	; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1			; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
	; VI-NEXT: [[TMP5:%.*]] = sext i31 [[TMP1]] to i32			; VI-NEXT: [[TMP5:%.*]] = sext i31 [[TMP1]] to i32
	; VI-NEXT: [[TMP6:%.*]] = sext i31 [[TMP3]] to i32			; VI-NEXT: [[TMP6:%.*]] = sext i31 [[TMP3]] to i32
	; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])			; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
	; VI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31			; VI-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP5]], i32 [[TMP6]])
	; VI-NEXT: [[TMP9:%.*]] = sext i31 [[TMP2]] to i32			; VI-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
	; VI-NEXT: [[TMP10:%.*]] = sext i31 [[TMP4]] to i32			; VI-NEXT: [[TMP10:%.*]] = zext i32 [[TMP8]] to i64
	; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])			; VI-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 32
	; VI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31			; VI-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP11]]
	; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0			; VI-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i31
	; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1			; VI-NEXT: [[TMP14:%.*]] = sext i31 [[TMP2]] to i32
				; VI-NEXT: [[TMP15:%.*]] = sext i31 [[TMP4]] to i32
				; VI-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP14]], i32 [[TMP15]])
				; VI-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP14]], i32 [[TMP15]])
				; VI-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64
				; VI-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64
				; VI-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 32
				; VI-NEXT: [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP20]]
				; VI-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i31
				; VI-NEXT: [[TMP23:%.*]] = insertelement <2 x i31> undef, i31 [[TMP13]], i64 0
				; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i31> [[TMP23]], i31 [[TMP22]], i64 1
	; VI-NEXT: ret <2 x i31> [[MUL]]			; VI-NEXT: ret <2 x i31> [[MUL]]
	;			;
	; DISABLED-LABEL: @smul24_v2i31(			; DISABLED-LABEL: @smul24_v2i31(
	; DISABLED-NEXT: [[SHL_LHS:%.]] = shl <2 x i31> [[LHS:%.]], <i31 8, i31 8>			; DISABLED-NEXT: [[SHL_LHS:%.]] = shl <2 x i31> [[LHS:%.]], <i31 8, i31 8>
	; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8>			; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8>
	; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl <2 x i31> [[RHS:%.]], <i31 8, i31 8>			; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl <2 x i31> [[RHS:%.]], <i31 8, i31 8>
	; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>			; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>
	; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret <2 x i31> [[MUL]]			; DISABLED-NEXT: ret <2 x i31> [[MUL]]
	;			;
	%shl.lhs = shl <2 x i31> %lhs, <i31 8, i31 8>			%shl.lhs = shl <2 x i31> %lhs, <i31 8, i31 8>
	%lhs24 = ashr <2 x i31> %shl.lhs, <i31 8, i31 8>			%lhs24 = ashr <2 x i31> %shl.lhs, <i31 8, i31 8>
	%lshr.rhs = shl <2 x i31> %rhs, <i31 8, i31 8>			%lshr.rhs = shl <2 x i31> %rhs, <i31 8, i31 8>
	%rhs24 = ashr <2 x i31> %lhs, <i31 8, i31 8>			%rhs24 = ashr <2 x i31> %lhs, <i31 8, i31 8>
	%mul = mul <2 x i31> %lhs24, %rhs24			%mul = mul <2 x i31> %lhs24, %rhs24
	ret <2 x i31> %mul			ret <2 x i31> %mul
	}			}

	define i33 @smul24_i33(i33 %lhs, i33 %rhs) {			define i33 @smul24_i33(i33 %lhs, i33 %rhs) {
	; SI-LABEL: @smul24_i33(			; SI-LABEL: @smul24_i33(
	; SI-NEXT: [[SHL_LHS:%.]] = shl i33 [[LHS:%.]], 9			; SI-NEXT: [[SHL_LHS:%.]] = shl i33 [[LHS:%.]], 9
	; SI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9			; SI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9
	; SI-NEXT: [[LSHR_RHS:%.]] = shl i33 [[RHS:%.]], 9			; SI-NEXT: [[LSHR_RHS:%.]] = shl i33 [[RHS:%.]], 9
	; SI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9			; SI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9
	; SI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]			; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
				; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
				; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; SI-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
				; SI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP8]] to i33
	; SI-NEXT: ret i33 [[MUL]]			; SI-NEXT: ret i33 [[MUL]]
	;			;
	; VI-LABEL: @smul24_i33(			; VI-LABEL: @smul24_i33(
	; VI-NEXT: [[SHL_LHS:%.]] = shl i33 [[LHS:%.]], 9			; VI-NEXT: [[SHL_LHS:%.]] = shl i33 [[LHS:%.]], 9
	; VI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9			; VI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9
	; VI-NEXT: [[LSHR_RHS:%.]] = shl i33 [[RHS:%.]], 9			; VI-NEXT: [[LSHR_RHS:%.]] = shl i33 [[RHS:%.]], 9
	; VI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9			; VI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9
	; VI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]			; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
				; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
				; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; VI-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
				; VI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP8]] to i33
	; VI-NEXT: ret i33 [[MUL]]			; VI-NEXT: ret i33 [[MUL]]
	;			;
	; DISABLED-LABEL: @smul24_i33(			; DISABLED-LABEL: @smul24_i33(
	; DISABLED-NEXT: [[SHL_LHS:%.]] = shl i33 [[LHS:%.]], 9			; DISABLED-NEXT: [[SHL_LHS:%.]] = shl i33 [[LHS:%.]], 9
	; DISABLED-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9			; DISABLED-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9
	; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl i33 [[RHS:%.]], 9			; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl i33 [[RHS:%.]], 9
	; DISABLED-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9			; DISABLED-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9
	; DISABLED-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret i33 [[MUL]]			; DISABLED-NEXT: ret i33 [[MUL]]
	;			;
	%shl.lhs = shl i33 %lhs, 9			%shl.lhs = shl i33 %lhs, 9
	%lhs24 = ashr i33 %shl.lhs, 9			%lhs24 = ashr i33 %shl.lhs, 9
	%lshr.rhs = shl i33 %rhs, 9			%lshr.rhs = shl i33 %rhs, 9
	%rhs24 = ashr i33 %lhs, 9			%rhs24 = ashr i33 %lhs, 9
	%mul = mul i33 %lhs24, %rhs24			%mul = mul i33 %lhs24, %rhs24
	ret i33 %mul			ret i33 %mul
	}			}

	define i33 @umul24_i33(i33 %lhs, i33 %rhs) {			define i33 @umul24_i33(i33 %lhs, i33 %rhs) {
	; SI-LABEL: @umul24_i33(			; SI-LABEL: @umul24_i33(
	; SI-NEXT: [[LHS24:%.]] = and i33 [[LHS:%.]], 16777215			; SI-NEXT: [[LHS24:%.]] = and i33 [[LHS:%.]], 16777215
	; SI-NEXT: [[RHS24:%.]] = and i33 [[RHS:%.]], 16777215			; SI-NEXT: [[RHS24:%.]] = and i33 [[RHS:%.]], 16777215
	; SI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]			; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
				; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
				; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP2]])
				; SI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; SI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; SI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; SI-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
				; SI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP8]] to i33
	; SI-NEXT: ret i33 [[MUL]]			; SI-NEXT: ret i33 [[MUL]]
	;			;
	; VI-LABEL: @umul24_i33(			; VI-LABEL: @umul24_i33(
	; VI-NEXT: [[LHS24:%.]] = and i33 [[LHS:%.]], 16777215			; VI-NEXT: [[LHS24:%.]] = and i33 [[LHS:%.]], 16777215
	; VI-NEXT: [[RHS24:%.]] = and i33 [[RHS:%.]], 16777215			; VI-NEXT: [[RHS24:%.]] = and i33 [[RHS:%.]], 16777215
	; VI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]			; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
				; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
				; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mulhi.u24(i32 [[TMP1]], i32 [[TMP2]])
				; VI-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
				; VI-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
				; VI-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
				; VI-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
				; VI-NEXT: [[MUL:%.*]] = trunc i64 [[TMP8]] to i33
	; VI-NEXT: ret i33 [[MUL]]			; VI-NEXT: ret i33 [[MUL]]
	;			;
	; DISABLED-LABEL: @umul24_i33(			; DISABLED-LABEL: @umul24_i33(
	; DISABLED-NEXT: [[LHS24:%.]] = and i33 [[LHS:%.]], 16777215			; DISABLED-NEXT: [[LHS24:%.]] = and i33 [[LHS:%.]], 16777215
	; DISABLED-NEXT: [[RHS24:%.]] = and i33 [[RHS:%.]], 16777215			; DISABLED-NEXT: [[RHS24:%.]] = and i33 [[RHS:%.]], 16777215
	; DISABLED-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]]
	; DISABLED-NEXT: ret i33 [[MUL]]			; DISABLED-NEXT: ret i33 [[MUL]]
	;			;
	▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines
	}			}

	define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) {			define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) {
	; SI-LABEL: @smul24_v2i33(			; SI-LABEL: @smul24_v2i33(
	; SI-NEXT: [[SHL_LHS:%.]] = shl <2 x i33> [[LHS:%.]], <i33 9, i33 9>			; SI-NEXT: [[SHL_LHS:%.]] = shl <2 x i33> [[LHS:%.]], <i33 9, i33 9>
	; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>			; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>
	; SI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i33> [[RHS:%.]], <i33 9, i33 9>			; SI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i33> [[RHS:%.]], <i33 9, i33 9>
	; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>			; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>
	; SI-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]]			; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0
				; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1
				; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0
				; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1
				; SI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32
				; SI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32
				; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
				; SI-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP5]], i32 [[TMP6]])
				; SI-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
				; SI-NEXT: [[TMP10:%.*]] = zext i32 [[TMP8]] to i64
				; SI-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 32
				; SI-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP11]]
				; SI-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i33
				; SI-NEXT: [[TMP14:%.*]] = trunc i33 [[TMP2]] to i32
				; SI-NEXT: [[TMP15:%.*]] = trunc i33 [[TMP4]] to i32
				; SI-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP14]], i32 [[TMP15]])
				; SI-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP14]], i32 [[TMP15]])
				; SI-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64
				; SI-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64
				; SI-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 32
				; SI-NEXT: [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP20]]
				; SI-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i33
				; SI-NEXT: [[TMP23:%.*]] = insertelement <2 x i33> undef, i33 [[TMP13]], i64 0
				; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i33> [[TMP23]], i33 [[TMP22]], i64 1
	; SI-NEXT: ret <2 x i33> [[MUL]]			; SI-NEXT: ret <2 x i33> [[MUL]]
	;			;
	; VI-LABEL: @smul24_v2i33(			; VI-LABEL: @smul24_v2i33(
	; VI-NEXT: [[SHL_LHS:%.]] = shl <2 x i33> [[LHS:%.]], <i33 9, i33 9>			; VI-NEXT: [[SHL_LHS:%.]] = shl <2 x i33> [[LHS:%.]], <i33 9, i33 9>
	; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>			; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>
	; VI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i33> [[RHS:%.]], <i33 9, i33 9>			; VI-NEXT: [[LSHR_RHS:%.]] = shl <2 x i33> [[RHS:%.]], <i33 9, i33 9>
	; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>			; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>
	; VI-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]]			; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0
				; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1
				; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0
				; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1
				; VI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32
				; VI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32
				; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
				; VI-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP5]], i32 [[TMP6]])
				; VI-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
				; VI-NEXT: [[TMP10:%.*]] = zext i32 [[TMP8]] to i64
				; VI-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 32
				; VI-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP11]]
				; VI-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i33
				; VI-NEXT: [[TMP14:%.*]] = trunc i33 [[TMP2]] to i32
				; VI-NEXT: [[TMP15:%.*]] = trunc i33 [[TMP4]] to i32
				; VI-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP14]], i32 [[TMP15]])
				; VI-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.mulhi.i24(i32 [[TMP14]], i32 [[TMP15]])
				; VI-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64
				; VI-NEXT: [[TMP19:%.*]] = zext i32 [[TMP17]] to i64
				; VI-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 32
				; VI-NEXT: [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP20]]
				; VI-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i33
				; VI-NEXT: [[TMP23:%.*]] = insertelement <2 x i33> undef, i33 [[TMP13]], i64 0
				; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i33> [[TMP23]], i33 [[TMP22]], i64 1
	; VI-NEXT: ret <2 x i33> [[MUL]]			; VI-NEXT: ret <2 x i33> [[MUL]]
	;			;
	; DISABLED-LABEL: @smul24_v2i33(			; DISABLED-LABEL: @smul24_v2i33(
	; DISABLED-NEXT: [[SHL_LHS:%.]] = shl <2 x i33> [[LHS:%.]], <i33 9, i33 9>			; DISABLED-NEXT: [[SHL_LHS:%.]] = shl <2 x i33> [[LHS:%.]], <i33 9, i33 9>
	; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>			; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>
	; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl <2 x i33> [[RHS:%.]], <i33 9, i33 9>			; DISABLED-NEXT: [[LSHR_RHS:%.]] = shl <2 x i33> [[RHS:%.]], <i33 9, i33 9>
	; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>			; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>
	; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]]			; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]]
	Show All 9 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Enable 48-bit mul in AMDGPUCodeGenPrepare.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 381825

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Enable 48-bit mul in AMDGPUCodeGenPrepare.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 381825

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll

[AMDGPU] Enable 48-bit mul in AMDGPUCodeGenPrepare.
ClosedPublic