Diff 72435

lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Show All 33 Lines
class AMDGPUCodeGenPrepare : public FunctionPass,		class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {		public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNTargetMachine *TM;		const GCNTargetMachine *TM;
const SISubtarget *ST;		const SISubtarget *ST;
DivergenceAnalysis *DA;		DivergenceAnalysis *DA;
Module *Mod;		Module *Mod;
bool HasUnsafeFPMath;		bool HasUnsafeFPMath;

		/// \brief Copies exact/nsw/nuw flags (if any) from binary operator \p I to
		/// binary operator \p V.
		///
		/// \returns Binary operator \p V.
		Value copyFlags(const BinaryOperator &I, Value V) const;

		/// \returns Equivalent 16 bit integer type for given 32 bit integer type
		/// \p T.
		Type getI16Ty(IRBuilder<> &B, const Type T) const;

		tstellarAMDUnsubmitted Done Reply Inline Actions CmpInst has an isSigned() memeber function that you can use instead. tstellarAMD: CmpInst has an isSigned() memeber function that you can use instead.
		/// \returns Equivalent 32 bit integer type for given 16 bit integer type
		/// \p T.
		Type getI32Ty(IRBuilder<> &B, const Type T) const;

		/// \returns True if the base element of type \p T is 16 bit integer, false
		/// otherwise.
		bool isI16Ty(const Type *T) const;

		/// \returns True if the base element of type \p T is 32 bit integer, false
		/// otherwise.
		bool isI32Ty(const Type *T) const;
		tstellarAMDUnsubmitted Done Reply Inline Actions Same thing here. tstellarAMD: Same thing here.

		/// \returns True if binary operation \p I is a signed binary operation, false
		/// otherwise.
		bool isSigned(const BinaryOperator &I) const;

		/// \returns True if the condition of 'select' operation \p I comes from a
		/// signed 'icmp' operation, false otherwise.
		bool isSigned(const SelectInst &I) const;

		/// \brief Promotes uniform 16 bit binary operation \p I to equivalent 32 bit
		/// binary operation by sign or zero extending operands to 32 bits, replacing
		/// 16 bit operation with equivalent 32 bit operation, and truncating the
		/// result of 32 bit operation back to 16 bits. 16 bit division operation is
		/// not promoted.
		///
		/// \returns True if 16 bit binary operation is promoted to equivalent 32 bit
		/// binary operation, false otherwise.
		bool promoteUniformI16OpToI32Op(BinaryOperator &I) const;

		/// \brief Promotes uniform 16 bit 'icmp' operation \p I to 32 bit 'icmp'
		/// operation by sign or zero extending operands to 32 bits, and replacing 16
		/// bit operation with 32 bit operation.
		///
		/// \returns True.
		bool promoteUniformI16OpToI32Op(ICmpInst &I) const;

		/// \brief Promotes uniform 16 bit 'select' operation \p I to 32 bit 'select'
		/// operation by sign or zero extending operands to 32 bits, replacing 16 bit
		/// operation with 32 bit operation, and truncating the result of 32 bit
		/// operation back to 16 bits.
		///
		/// \returns True.
		bool promoteUniformI16OpToI32Op(SelectInst &I) const;

public:		public:
static char ID;		static char ID;
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :		AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
FunctionPass(ID),		FunctionPass(ID),
TM(static_cast<const GCNTargetMachine *>(TM)),		TM(static_cast<const GCNTargetMachine *>(TM)),
ST(nullptr),		ST(nullptr),
DA(nullptr),		DA(nullptr),
Mod(nullptr),		Mod(nullptr),
HasUnsafeFPMath(false) { }		HasUnsafeFPMath(false) { }

bool visitFDiv(BinaryOperator &I);		bool visitFDiv(BinaryOperator &I);

bool visitInstruction(Instruction &I) {		bool visitInstruction(Instruction &I) { return false; }
return false;		bool visitBinaryOperator(BinaryOperator &I);
}		bool visitICmpInst(ICmpInst &I);
		bool visitSelectInst(SelectInst &I);

bool doInitialization(Module &M) override;		bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;		bool runOnFunction(Function &F) override;

const char *getPassName() const override {		const char *getPassName() const override {
return "AMDGPU IR optimizations";		return "AMDGPU IR optimizations";
}		}

void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DivergenceAnalysis>();		AU.addRequired<DivergenceAnalysis>();
AU.setPreservesAll();		AU.setPreservesAll();
}		}
};		};

} // End anonymous namespace		} // End anonymous namespace

		Value *AMDGPUCodeGenPrepare::copyFlags(
		const BinaryOperator &I, Value *V) const {
		assert(isa<BinaryOperator>(V) && "V must be binary operator");
		arsenmUnsubmitted Done Reply Inline Actions I don't think you need this assert. It's not like the code will be incorrect if this is too aggressive arsenm: I don't think you need this assert. It's not like the code will be incorrect if this is too…

		BinaryOperator *BinOp = cast<BinaryOperator>(V);
		if (isa<OverflowingBinaryOperator>(BinOp)) {
		BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
		BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
		} else if (isa<PossiblyExactOperator>(BinOp)) {
		BinOp->setIsExact(I.isExact());
		}

		return V;
		}

		Type AMDGPUCodeGenPrepare::getI16Ty(IRBuilder<> &B, const Type T) const {
		assert(isI32Ty(T) && "T must be 32 bits");

		if (T->isIntegerTy())
		return B.getInt16Ty();
		return VectorType::get(B.getInt16Ty(), cast<VectorType>(T)->getNumElements());
		}

		Type AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type T) const {
		assert(isI16Ty(T) && "T must be 16 bits");
		arsenmUnsubmitted Done Reply Inline Actions You shouldn't need this arsenm: You shouldn't need this

		if (T->isIntegerTy())
		return B.getInt32Ty();
		return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
		}

		bool AMDGPUCodeGenPrepare::isI16Ty(const Type *T) const {
		if (T->isIntegerTy(16))
		return true;
		arsenmUnsubmitted Done Reply Inline Actions Ditto arsenm: Ditto
		if (!T->isVectorTy())
		return false;
		return cast<VectorType>(T)->getElementType()->isIntegerTy(16);
		}

		bool AMDGPUCodeGenPrepare::isI32Ty(const Type *T) const {
		if (T->isIntegerTy(32))
		return true;
		if (!T->isVectorTy())
		return false;
		return cast<VectorType>(T)->getElementType()->isIntegerTy(32);
		}

		bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
		return I.getOpcode() == Instruction::SDiv \|\|
		I.getOpcode() == Instruction::SRem;
		}

		bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
		return isa<ICmpInst>(I.getOperand(0)) ?
		arsenmUnsubmitted Done Reply Inline Actions ditto arsenm: ditto
		cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
		}

		bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(BinaryOperator &I) const {
		assert(isI16Ty(I.getType()) && "Op must be 16 bits");

		if (I.getOpcode() == Instruction::SDiv \|\| I.getOpcode() == Instruction::UDiv)
		return false;

		IRBuilder<> Builder(&I);
		Builder.SetCurrentDebugLocation(I.getDebugLoc());

		Type *I32Ty = getI32Ty(Builder, I.getType());
		Value *ExtOp0 = nullptr;
		Value *ExtOp1 = nullptr;
		Value *ExtRes = nullptr;
		Value *TruncRes = nullptr;

		if (isSigned(I)) {
		ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
		ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
		} else {
		ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
		ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
		}
		ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
		TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));

		I.replaceAllUsesWith(TruncRes);
		I.eraseFromParent();

		return true;
		}

		bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(ICmpInst &I) const {
		assert(isI16Ty(I.getOperand(0)->getType()) && "Op0 must be 16 bits");
		assert(isI16Ty(I.getOperand(1)->getType()) && "Op1 must be 16 bits");

		IRBuilder<> Builder(&I);
		Builder.SetCurrentDebugLocation(I.getDebugLoc());

		Type *I32TyOp0 = getI32Ty(Builder, I.getOperand(0)->getType());
		Type *I32TyOp1 = getI32Ty(Builder, I.getOperand(1)->getType());
		Value *ExtOp0 = nullptr;
		Value *ExtOp1 = nullptr;
		Value *NewICmp = nullptr;

		if (I.isSigned()) {
		ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32TyOp0);
		ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32TyOp1);
		} else {
		ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32TyOp0);
		ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32TyOp1);
		}
		NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);

		I.replaceAllUsesWith(NewICmp);
		I.eraseFromParent();

		return true;
		}

		bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(SelectInst &I) const {
		assert(isI16Ty(I.getType()) && "Op must be 16 bits");

		IRBuilder<> Builder(&I);
		Builder.SetCurrentDebugLocation(I.getDebugLoc());

		Type *I32Ty = getI32Ty(Builder, I.getType());
		Value *ExtOp1 = nullptr;
		Value *ExtOp2 = nullptr;
		Value *ExtRes = nullptr;
		Value *TruncRes = nullptr;

		if (isSigned(I)) {
		ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
		ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
		} else {
		ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
		ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
		}
		tstellarAMDUnsubmitted Not Done Reply Inline Actions I think you can always zero extend for select, since you will be discarding the high-bits with the truncate. tstellarAMD: I think you can always zero extend for select, since you will be discarding the high-bits with…
		kzhuravlAuthorUnsubmitted Not Done Reply Inline Actions min/max tests fail when always zero extending kzhuravl: min/max tests fail when always zero extending
		tstellarAMDUnsubmitted Not Done Reply Inline Actions Ok, I just noticed Matt's comment below. What happens if you always sign extend? If you get no regressions from that I think that would be better. tstellarAMD: Ok, I just noticed Matt's comment below. What happens if you always sign extend? If you get…
		kzhuravlAuthorUnsubmitted Not Done Reply Inline Actions some min patterns do not get matched, which causes min lit test to fail kzhuravl: some min patterns do not get matched, which causes min lit test to fail
		ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
		TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));

		I.replaceAllUsesWith(TruncRes);
		I.eraseFromParent();

		return true;
		}

static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {		static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);		const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
if (!CNum)		if (!CNum)
return false;		return false;

// Reciprocal f32 is handled separately without denormals.		// Reciprocal f32 is handled separately without denormals.
return UnsafeDiv \|\| CNum->isExactlyValue(+1.0);		return UnsafeDiv \|\| CNum->isExactlyValue(+1.0);
}		}
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	if (NewFDiv) {
FDiv.replaceAllUsesWith(NewFDiv);		FDiv.replaceAllUsesWith(NewFDiv);
NewFDiv->takeName(&FDiv);		NewFDiv->takeName(&FDiv);
FDiv.eraseFromParent();		FDiv.eraseFromParent();
}		}

return true;		return true;
}		}

static bool hasUnsafeFPMath(const Function &F) {		static bool hasUnsafeFPMath(const Function &F) {
		arsenmUnsubmitted Done Reply Inline Actions You can't use zext to promote any operation, you must use sext for the signed operations. arsenm: You can't use zext to promote any operation, you must use sext for the signed operations.
		arsenmUnsubmitted Done Reply Inline Actions This also won't get selects for the min/max pattern arsenm: This also won't get selects for the min/max pattern
Attribute Attr = F.getFnAttribute("unsafe-fp-math");		Attribute Attr = F.getFnAttribute("unsafe-fp-math");
return Attr.getValueAsString() == "true";		return Attr.getValueAsString() == "true";
}		}

		arsenmUnsubmitted Done Reply Inline Actions This should check the type first since it is cheaper. We also should investigate whether this should be done for smaller types that will be legalized to i16 arsenm: This should check the type first since it is cheaper. We also should investigate whether this…
		bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
		bool Changed = false;

		// TODO: Should we promote smaller types that will be legalized to i16?
		if (!ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
		tstellarAMDUnsubmitted Done Reply Inline Actions I just noticed that this condition is wrong. We should only be doing the promotion when the target supports 16-bit operations not when it does not support them. tstellarAMD: I just noticed that this condition is wrong. We should only be doing the promotion when the…
		Changed \|= promoteUniformI16OpToI32Op(I);
		arsenmUnsubmitted Done Reply Inline Actions I think all of these should be skipped if the target doesn't have i16 instructions arsenm: I think all of these should be skipped if the target doesn't have i16 instructions

		return Changed;
		}

		bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
		bool Changed = false;

		// TODO: Should we promote smaller types that will be legalized to i16?
		if (!ST->has16BitInsts() && isI16Ty(I.getOperand(0)->getType()) &&
		isI16Ty(I.getOperand(1)->getType()) && DA->isUniform(&I))
		Changed \|= promoteUniformI16OpToI32Op(I);

		return Changed;
		}

		bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
		bool Changed = false;

		// TODO: Should we promote smaller types that will be legalized to i16?
		if (!ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
		Changed \|= promoteUniformI16OpToI32Op(I);

		return Changed;
		}

bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {		bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;		Mod = &M;
return false;		return false;
}		}

bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {		bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
if (!TM \|\| skipFunction(F))		if (!TM \|\| skipFunction(F))
return false;		return false;
Show All 29 Lines

lib/Target/AMDGPU/SIISelLowering.cpp

	Show First 20 Lines • Show All 534 Lines • ▼ Show 20 Lines
	bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,			bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const {			Type *Ty) const {
	// FIXME: Could be smarter if called for vector constants.			// FIXME: Could be smarter if called for vector constants.
	return true;			return true;
	}			}

	bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {			bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {

				// i16 is not desirable unless it is a load or a store.
				if (VT == MVT::i16 && Op != ISD::LOAD && Op != ISD::STORE)
				return false;

	// SimplifySetCC uses this function to determine whether or not it should			// SimplifySetCC uses this function to determine whether or not it should
	// create setcc with i1 operands. We don't have instructions for i1 setcc.			// create setcc with i1 operands. We don't have instructions for i1 setcc.
	if (VT == MVT::i1 && Op == ISD::SETCC)			if (VT == MVT::i1 && Op == ISD::SETCC)
	return false;			return false;

	return TargetLowering::isTypeDesirableForOp(Op, VT);			return TargetLowering::isTypeDesirableForOp(Op, VT);
	}			}

	▲ Show 20 Lines • Show All 3,331 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll

This file was added.

				; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s \| FileCheck %s
				; RUN: opt -S -amdgpu-codegenprepare %s \| FileCheck -check-prefix=NOOP %s
				; Make sure this doesn't crash with no triple

				; NOOP-LABEL: @noop_fdiv_fpmath(
				; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
				define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
				%md.25ulp = fdiv float %a, %b, !fpmath !0
				store volatile float %md.25ulp, float addrspace(1)* %out
				ret void
				}

				; CHECK-LABEL: @fdiv_fpmath(
				; CHECK: %no.md = fdiv float %a, %b{{$}}
				; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
				; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
				; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
				; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
				; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
				; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
				define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
				%no.md = fdiv float %a, %b
				store volatile float %no.md, float addrspace(1)* %out

				%md.half.ulp = fdiv float %a, %b, !fpmath !1
				store volatile float %md.half.ulp, float addrspace(1)* %out

				%md.1ulp = fdiv float %a, %b, !fpmath !2
				store volatile float %md.1ulp, float addrspace(1)* %out

				%md.25ulp = fdiv float %a, %b, !fpmath !0
				store volatile float %md.25ulp, float addrspace(1)* %out

				%md.3ulp = fdiv float %a, %b, !fpmath !3
				store volatile float %md.3ulp, float addrspace(1)* %out

				%fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
				store volatile float %fast.md.25ulp, float addrspace(1)* %out

				%arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
				store volatile float %arcp.md.25ulp, float addrspace(1)* %out

				ret void
				}

				; CHECK-LABEL: @rcp_fdiv_fpmath(
				; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
				; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
				; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
				; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}}
				; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
				; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}}
				; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
				define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
				%no.md = fdiv float 1.0, %x
				store volatile float %no.md, float addrspace(1)* %out

				%md.25ulp = fdiv float 1.0, %x, !fpmath !0
				store volatile float %md.25ulp, float addrspace(1)* %out

				%md.half.ulp = fdiv float 1.0, %x, !fpmath !1
				store volatile float %md.half.ulp, float addrspace(1)* %out

				%arcp.no.md = fdiv arcp float 1.0, %x
				store volatile float %arcp.no.md, float addrspace(1)* %out

				%arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0
				store volatile float %arcp.25ulp, float addrspace(1)* %out

				%fast.no.md = fdiv fast float 1.0, %x
				store volatile float %fast.no.md, float addrspace(1)* %out

				%fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
				store volatile float %fast.25ulp, float addrspace(1)* %out

				ret void
				}

				; CHECK-LABEL: @fdiv_fpmath_vector(
				; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}}
				; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
				; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2

				; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
				; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
				; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0
				; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
				; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
				; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
				; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0
				; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
				define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
				%no.md = fdiv <2 x float> %a, %b
				store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out

				%md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
				store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out

				%md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
				store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out

				%md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
				store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out

				ret void
				}

				; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
				; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
				; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
				; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
				; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}

				; CHECK: extractelement <2 x float> %x
				; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
				; CHECK: extractelement <2 x float> %x
				; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
				; CHECK: store volatile <2 x float> %arcp.25ulp

				; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
				; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
				; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
				define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
				%no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
				store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out

				%md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
				store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out

				%arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x
				store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out

				%fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
				store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out

				%arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
				store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out

				%fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
				store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out

				ret void
				}

				; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
				; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
				; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
				; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}}

				; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
				; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0
				; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
				; CHECK: fdiv arcp float 2.000000e+00, %[[X1]], !fpmath !0
				; CHECK: store volatile <2 x float> %arcp.25ulp

				; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
				; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0
				; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
				; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0
				; CHECK: store volatile <2 x float> %fast.25ulp
				define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
				%no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
				store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out

				%arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x
				store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out

				%fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
				store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out

				%arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
				store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out

				%fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
				store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out

				ret void
				}

				; FIXME: Should be able to get fdiv for 1.0 component
				; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
				; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
				; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
				; CHECK: store volatile <2 x float> %arcp.25ulp

				; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
				; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
				; CHECK: store volatile <2 x float> %fast.25ulp
				define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
				%x.insert = insertelement <2 x float> %x, float 1.0, i32 0

				%arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
				store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out

				%fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
				store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out

				ret void
				}

				; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
				; CHECK: %no.md = fdiv float %a, %b{{$}}
				; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
				; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
				; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0
				; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
				; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
				; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
				define void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
				%no.md = fdiv float %a, %b
				store volatile float %no.md, float addrspace(1)* %out

				%md.half.ulp = fdiv float %a, %b, !fpmath !1
				store volatile float %md.half.ulp, float addrspace(1)* %out

				%md.1ulp = fdiv float %a, %b, !fpmath !2
				store volatile float %md.1ulp, float addrspace(1)* %out

				%md.25ulp = fdiv float %a, %b, !fpmath !0
				store volatile float %md.25ulp, float addrspace(1)* %out

				%md.3ulp = fdiv float %a, %b, !fpmath !3
				store volatile float %md.3ulp, float addrspace(1)* %out

				%fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
				store volatile float %fast.md.25ulp, float addrspace(1)* %out

				%arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
				store volatile float %arcp.md.25ulp, float addrspace(1)* %out

				ret void
				}

				attributes #0 = { nounwind optnone noinline }
				attributes #1 = { nounwind }
				attributes #2 = { nounwind "target-features"="+fp32-denormals" }

				; CHECK: !0 = !{float 2.500000e+00}
				; CHECK: !1 = !{float 5.000000e-01}
				; CHECK: !2 = !{float 1.000000e+00}
				; CHECK: !3 = !{float 3.000000e+00}

				!0 = !{float 2.500000e+00}
				!1 = !{float 5.000000e-01}
				!2 = !{float 1.000000e+00}
				!3 = !{float 3.000000e+00}

test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll

This file was added.

				; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s \| FileCheck -check-prefix=SI %s
				; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s \| FileCheck -check-prefix=VI %s

				; VI-NOT: zext
				; VI-NOT: sext
				; VI-NOT: trunc

				; SI-LABEL: @add_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = add i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @add_i16(i16 %a, i16 %b) {
				%r = add i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @add_nsw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = add nsw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @add_nsw_i16(i16 %a, i16 %b) {
				%r = add nsw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @add_nuw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = add nuw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @add_nuw_i16(i16 %a, i16 %b) {
				%r = add nuw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @add_nuw_nsw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @add_nuw_nsw_i16(i16 %a, i16 %b) {
				%r = add nuw nsw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @sub_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = sub i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @sub_i16(i16 %a, i16 %b) {
				%r = sub i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @sub_nsw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @sub_nsw_i16(i16 %a, i16 %b) {
				%r = sub nsw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @sub_nuw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = sub nuw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @sub_nuw_i16(i16 %a, i16 %b) {
				%r = sub nuw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @sub_nuw_nsw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @sub_nuw_nsw_i16(i16 %a, i16 %b) {
				%r = sub nuw nsw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @mul_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = mul i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @mul_i16(i16 %a, i16 %b) {
				%r = mul i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @mul_nsw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = mul nsw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @mul_nsw_i16(i16 %a, i16 %b) {
				%r = mul nsw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @mul_nuw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @mul_nuw_i16(i16 %a, i16 %b) {
				%r = mul nuw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @mul_nuw_nsw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @mul_nuw_nsw_i16(i16 %a, i16 %b) {
				%r = mul nuw nsw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @urem_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @urem_i16(i16 %a, i16 %b) {
				%r = urem i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @srem_i16(
				; SI: %[[A_32:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = sext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @srem_i16(i16 %a, i16 %b) {
				%r = srem i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @shl_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = shl i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @shl_i16(i16 %a, i16 %b) {
				%r = shl i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @shl_nsw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = shl nsw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @shl_nsw_i16(i16 %a, i16 %b) {
				%r = shl nsw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @shl_nuw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = shl nuw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @shl_nuw_i16(i16 %a, i16 %b) {
				%r = shl nuw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @shl_nuw_nsw_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @shl_nuw_nsw_i16(i16 %a, i16 %b) {
				%r = shl nuw nsw i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @lshr_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @lshr_i16(i16 %a, i16 %b) {
				%r = lshr i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @lshr_exact_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @lshr_exact_i16(i16 %a, i16 %b) {
				%r = lshr exact i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @ashr_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @ashr_i16(i16 %a, i16 %b) {
				%r = ashr i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @ashr_exact_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @ashr_exact_i16(i16 %a, i16 %b) {
				%r = ashr exact i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @and_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @and_i16(i16 %a, i16 %b) {
				%r = and i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @or_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @or_i16(i16 %a, i16 %b) {
				%r = or i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @xor_i16(
				; SI: %[[A_32:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32:[0-9]+]] = zext i16 %b to i32
				; SI: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
				; SI: ret i16 %[[R_16]]
				define i16 @xor_i16(i16 %a, i16 %b) {
				%r = xor i16 %a, %b
				ret i16 %r
				}

				; SI-LABEL: @select_eq_i16(
				; SI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_eq_i16(i16 %a, i16 %b) {
				%cmp = icmp eq i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_ne_i16(
				; SI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_ne_i16(i16 %a, i16 %b) {
				%cmp = icmp ne i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_ugt_i16(
				; SI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_ugt_i16(i16 %a, i16 %b) {
				%cmp = icmp ugt i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_uge_i16(
				; SI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_uge_i16(i16 %a, i16 %b) {
				%cmp = icmp uge i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_ult_i16(
				; SI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_ult_i16(i16 %a, i16 %b) {
				%cmp = icmp ult i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_ule_i16(
				; SI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_ule_i16(i16 %a, i16 %b) {
				%cmp = icmp ule i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_sgt_i16(
				; SI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_sgt_i16(i16 %a, i16 %b) {
				%cmp = icmp sgt i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_sge_i16(
				; SI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_sge_i16(i16 %a, i16 %b) {
				%cmp = icmp sge i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_slt_i16(
				; SI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_slt_i16(i16 %a, i16 %b) {
				%cmp = icmp slt i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @select_sle_i16(
				; SI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
				; SI: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
				; SI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
				; SI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
				; SI: ret i16 %[[SEL_16]]
				define i16 @select_sle_i16(i16 %a, i16 %b) {
				%cmp = icmp sle i16 %a, %b
				%sel = select i1 %cmp, i16 %a, i16 %b
				ret i16 %sel
				}

				; SI-LABEL: @add_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = add <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = add <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @add_nsw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = add nsw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = add nsw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @add_nuw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = add nuw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = add nuw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @add_nuw_nsw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = add nuw nsw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @sub_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = sub <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = sub <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @sub_nsw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = sub nsw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @sub_nuw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = sub nuw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = sub nuw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @sub_nuw_nsw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = sub nuw nsw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @mul_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = mul <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = mul <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @mul_nsw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = mul nsw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = mul nsw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @mul_nuw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = mul nuw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @mul_nuw_nsw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = mul nuw nsw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @urem_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @urem_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = urem <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @srem_3xi16(
				; SI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @srem_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = srem <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @shl_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = shl <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = shl <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @shl_nsw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = shl nsw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = shl nsw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @shl_nuw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = shl nuw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = shl nuw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @shl_nuw_nsw_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = shl nuw nsw <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @lshr_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = lshr <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @lshr_exact_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = lshr exact <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @ashr_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = ashr <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @ashr_exact_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = ashr exact <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @and_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = and <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @or_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = or <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @xor_3xi16(
				; SI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
				; SI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[R_16]]
				define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%r = xor <3 x i16> %a, %b
				ret <3 x i16> %r
				}

				; SI-LABEL: @select_eq_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp eq <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_ne_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp ne <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_ugt_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp ugt <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_uge_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp uge <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_ult_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp ult <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_ule_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp ule <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_sgt_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp sgt <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_sge_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp sge <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_slt_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp slt <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

				; SI-LABEL: @select_sle_3xi16(
				; SI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
				; SI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
				; SI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
				; SI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
				; SI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
				; SI: ret <3 x i16> %[[SEL_16]]
				define <3 x i16> @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
				%cmp = icmp sle <3 x i16> %a, %b
				%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
				ret <3 x i16> %sel
				}

test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll

This file was deleted.

	; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s \| FileCheck %s
	; RUN: opt -S -amdgpu-codegenprepare %s \| FileCheck -check-prefix=NOOP %s
	; Make sure this doesn't crash with no triple

	; NOOP-LABEL: @noop_fdiv_fpmath(
	; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
	define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
	%md.25ulp = fdiv float %a, %b, !fpmath !0
	store volatile float %md.25ulp, float addrspace(1)* %out
	ret void
	}

	; CHECK-LABEL: @fdiv_fpmath(
	; CHECK: %no.md = fdiv float %a, %b{{$}}
	; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
	; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
	; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
	; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
	; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
	; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
	define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
	%no.md = fdiv float %a, %b
	store volatile float %no.md, float addrspace(1)* %out

	%md.half.ulp = fdiv float %a, %b, !fpmath !1
	store volatile float %md.half.ulp, float addrspace(1)* %out

	%md.1ulp = fdiv float %a, %b, !fpmath !2
	store volatile float %md.1ulp, float addrspace(1)* %out

	%md.25ulp = fdiv float %a, %b, !fpmath !0
	store volatile float %md.25ulp, float addrspace(1)* %out

	%md.3ulp = fdiv float %a, %b, !fpmath !3
	store volatile float %md.3ulp, float addrspace(1)* %out

	%fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
	store volatile float %fast.md.25ulp, float addrspace(1)* %out

	%arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
	store volatile float %arcp.md.25ulp, float addrspace(1)* %out

	ret void
	}

	; CHECK-LABEL: @rcp_fdiv_fpmath(
	; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
	; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
	; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
	; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}}
	; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
	; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}}
	; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
	define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
	%no.md = fdiv float 1.0, %x
	store volatile float %no.md, float addrspace(1)* %out

	%md.25ulp = fdiv float 1.0, %x, !fpmath !0
	store volatile float %md.25ulp, float addrspace(1)* %out

	%md.half.ulp = fdiv float 1.0, %x, !fpmath !1
	store volatile float %md.half.ulp, float addrspace(1)* %out

	%arcp.no.md = fdiv arcp float 1.0, %x
	store volatile float %arcp.no.md, float addrspace(1)* %out

	%arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0
	store volatile float %arcp.25ulp, float addrspace(1)* %out

	%fast.no.md = fdiv fast float 1.0, %x
	store volatile float %fast.no.md, float addrspace(1)* %out

	%fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
	store volatile float %fast.25ulp, float addrspace(1)* %out

	ret void
	}

	; CHECK-LABEL: @fdiv_fpmath_vector(
	; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}}
	; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
	; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2

	; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
	; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
	; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0
	; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
	; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
	; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
	; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0
	; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
	define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
	%no.md = fdiv <2 x float> %a, %b
	store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out

	%md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
	store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out

	%md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
	store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out

	%md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
	store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out

	ret void
	}

	; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
	; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
	; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
	; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
	; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}

	; CHECK: extractelement <2 x float> %x
	; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
	; CHECK: extractelement <2 x float> %x
	; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
	; CHECK: store volatile <2 x float> %arcp.25ulp

	; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
	; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0
	; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
	define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
	%no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
	store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out

	%md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
	store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out

	%arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x
	store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out

	%fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
	store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out

	%arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
	store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out

	%fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
	store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out

	ret void
	}

	; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
	; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
	; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
	; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}}

	; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
	; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0
	; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
	; CHECK: fdiv arcp float 2.000000e+00, %[[X1]], !fpmath !0
	; CHECK: store volatile <2 x float> %arcp.25ulp

	; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0
	; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0
	; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1
	; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0
	; CHECK: store volatile <2 x float> %fast.25ulp
	define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
	%no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
	store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out

	%arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x
	store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out

	%fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
	store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out

	%arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
	store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out

	%fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
	store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out

	ret void
	}

	; FIXME: Should be able to get fdiv for 1.0 component
	; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
	; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
	; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
	; CHECK: store volatile <2 x float> %arcp.25ulp

	; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
	; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0
	; CHECK: store volatile <2 x float> %fast.25ulp
	define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
	%x.insert = insertelement <2 x float> %x, float 1.0, i32 0

	%arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
	store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out

	%fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
	store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out

	ret void
	}

	; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
	; CHECK: %no.md = fdiv float %a, %b{{$}}
	; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
	; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
	; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0
	; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
	; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
	; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
	define void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
	%no.md = fdiv float %a, %b
	store volatile float %no.md, float addrspace(1)* %out

	%md.half.ulp = fdiv float %a, %b, !fpmath !1
	store volatile float %md.half.ulp, float addrspace(1)* %out

	%md.1ulp = fdiv float %a, %b, !fpmath !2
	store volatile float %md.1ulp, float addrspace(1)* %out

	%md.25ulp = fdiv float %a, %b, !fpmath !0
	store volatile float %md.25ulp, float addrspace(1)* %out

	%md.3ulp = fdiv float %a, %b, !fpmath !3
	store volatile float %md.3ulp, float addrspace(1)* %out

	%fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
	store volatile float %fast.md.25ulp, float addrspace(1)* %out

	%arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
	store volatile float %arcp.md.25ulp, float addrspace(1)* %out

	ret void
	}

	attributes #0 = { nounwind optnone noinline }
	attributes #1 = { nounwind }
	attributes #2 = { nounwind "target-features"="+fp32-denormals" }

	; CHECK: !0 = !{float 2.500000e+00}
	; CHECK: !1 = !{float 5.000000e-01}
	; CHECK: !2 = !{float 1.000000e+00}
	; CHECK: !3 = !{float 3.000000e+00}

	!0 = !{float 2.500000e+00}
	!1 = !{float 5.000000e-01}
	!2 = !{float 1.000000e+00}
	!3 = !{float 3.000000e+00}

test/CodeGen/AMDGPU/ctlz.ll

	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s			; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
	; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s			; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
	; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s			; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s

	declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone			declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone
	declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone			declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
	declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone			declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone

	declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone			declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
	declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone			declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
	declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone			declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone

	declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone			declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
	declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone			declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
	declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone			declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone

	declare i32 @llvm.r600.read.tidig.x() nounwind readnone			declare i32 @llvm.r600.read.tidig.x() nounwind readnone

	; FUNC-LABEL: {{^}}s_ctlz_i32:			; FUNC-LABEL: {{^}}s_ctlz_i32:
	; SI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb\|0x2c}}			; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb\|0x2c}}
	; SI-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]			; GCN-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
	; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}			; GCN-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
	; SI-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]			; GCN-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
	; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]			; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]
	; SI: buffer_store_dword [[RESULT]]			; GCN: buffer_store_dword [[RESULT]]
	; SI: s_endpgm			; GCN: s_endpgm

	; EG: FFBH_UINT			; EG: FFBH_UINT
	; EG: CNDE_INT			; EG: CNDE_INT
	define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {			define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
	%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone			%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
	store i32 %ctlz, i32 addrspace(1)* %out, align 4			store i32 %ctlz, i32 addrspace(1)* %out, align 4
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i32:			; FUNC-LABEL: {{^}}v_ctlz_i32:
	; SI: buffer_load_dword [[VAL:v[0-9]+]],			; GCN: buffer_load_dword [[VAL:v[0-9]+]],
	; SI-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]			; GCN-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]
	; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]			; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
	; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc			; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc
	; SI: buffer_store_dword [[RESULT]],			; GCN: buffer_store_dword [[RESULT]],
	; SI: s_endpgm			; GCN: s_endpgm

	; EG: FFBH_UINT			; EG: FFBH_UINT
	; EG: CNDE_INT			; EG: CNDE_INT
	define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
	%val = load i32, i32 addrspace(1)* %valptr, align 4			%val = load i32, i32 addrspace(1)* %valptr, align 4
	%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone			%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
	store i32 %ctlz, i32 addrspace(1)* %out, align 4			store i32 %ctlz, i32 addrspace(1)* %out, align 4
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_v2i32:			; FUNC-LABEL: {{^}}v_ctlz_v2i32:
	; SI: buffer_load_dwordx2			; GCN: buffer_load_dwordx2
	; SI: v_ffbh_u32_e32			; GCN: v_ffbh_u32_e32
	; SI: v_ffbh_u32_e32			; GCN: v_ffbh_u32_e32
	; SI: buffer_store_dwordx2			; GCN: buffer_store_dwordx2
	; SI: s_endpgm			; GCN: s_endpgm

	; EG: FFBH_UINT			; EG: FFBH_UINT
	; EG: CNDE_INT			; EG: CNDE_INT
	; EG: FFBH_UINT			; EG: FFBH_UINT
	; EG: CNDE_INT			; EG: CNDE_INT
	define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
	%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8			%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
	%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone			%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
	store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8			store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_v4i32:			; FUNC-LABEL: {{^}}v_ctlz_v4i32:
	; SI: buffer_load_dwordx4			; GCN: buffer_load_dwordx4
	; SI: v_ffbh_u32_e32			; GCN: v_ffbh_u32_e32
	; SI: v_ffbh_u32_e32			; GCN: v_ffbh_u32_e32
	; SI: v_ffbh_u32_e32			; GCN: v_ffbh_u32_e32
	; SI: v_ffbh_u32_e32			; GCN: v_ffbh_u32_e32
	; SI: buffer_store_dwordx4			; GCN: buffer_store_dwordx4
	; SI: s_endpgm			; GCN: s_endpgm


	; EG-DAG: FFBH_UINT			; EG-DAG: FFBH_UINT
	; EG-DAG: CNDE_INT			; EG-DAG: CNDE_INT

	; EG-DAG: FFBH_UINT			; EG-DAG: FFBH_UINT
	; EG-DAG: CNDE_INT			; EG-DAG: CNDE_INT

	; EG-DAG: FFBH_UINT			; EG-DAG: FFBH_UINT
	; EG-DAG: CNDE_INT			; EG-DAG: CNDE_INT

	; EG-DAG: FFBH_UINT			; EG-DAG: FFBH_UINT
	; EG-DAG: CNDE_INT			; EG-DAG: CNDE_INT
	define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
	%val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16			%val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
	%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone			%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
	store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16			store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i8:			; FUNC-LABEL: {{^}}v_ctlz_i8:
	; SI: buffer_load_ubyte [[VAL:v[0-9]+]],			; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
	; SI-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]			; GCN-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
	; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]			; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
	; SI-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc			; GCN-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc
	; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]]			; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]]
	; SI: buffer_store_byte [[RESULT]],			; GCN: buffer_store_byte [[RESULT]],
	define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
	%val = load i8, i8 addrspace(1)* %valptr			%val = load i8, i8 addrspace(1)* %valptr
	%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone			%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
	store i8 %ctlz, i8 addrspace(1)* %out			store i8 %ctlz, i8 addrspace(1)* %out
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}s_ctlz_i64:			; FUNC-LABEL: {{^}}s_ctlz_i64:
	; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb\|0x2c}}			; GCN: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb\|0x2c}}
	; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}			; GCN-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
	; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]			; GCN-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
	; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32			; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
	; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]			; GCN-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
	; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]]			; GCN-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]]
	; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]			; GCN-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
	; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]			; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
	; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}			; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
	; SI: {{buffer\|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}			; GCN: {{buffer\|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
	define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {			define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
	%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)			%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
	store i64 %ctlz, i64 addrspace(1)* %out			store i64 %ctlz, i64 addrspace(1)* %out
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}s_ctlz_i64_trunc:			; FUNC-LABEL: {{^}}s_ctlz_i64_trunc:
	define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {			define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
	%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)			%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
	%trunc = trunc i64 %ctlz to i32			%trunc = trunc i64 %ctlz to i32
	store i32 %trunc, i32 addrspace(1)* %out			store i32 %trunc, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i64:			; FUNC-LABEL: {{^}}v_ctlz_i64:
	; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}			; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
	; SI-DAG: {{buffer\|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}			; GCN-DAG: {{buffer\|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
	; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]			; GCN-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
	; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]			; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
	; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]			; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
	; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]			; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
	; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]			; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
	; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]			; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
	; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]			; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
	; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc			; GCN-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
	; SI: {{buffer\|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}			; GCN: {{buffer\|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
	define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {			define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
	%tid = call i32 @llvm.r600.read.tidig.x()			%tid = call i32 @llvm.r600.read.tidig.x()
	%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid			%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
	%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid			%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
	%val = load i64, i64 addrspace(1)* %in.gep			%val = load i64, i64 addrspace(1)* %in.gep
	%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)			%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
	store i64 %ctlz, i64 addrspace(1)* %out.gep			store i64 %ctlz, i64 addrspace(1)* %out.gep
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i64_trunc:			; FUNC-LABEL: {{^}}v_ctlz_i64_trunc:
	define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {			define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
	%tid = call i32 @llvm.r600.read.tidig.x()			%tid = call i32 @llvm.r600.read.tidig.x()
	%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid			%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
	%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid			%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
	%val = load i64, i64 addrspace(1)* %in.gep			%val = load i64, i64 addrspace(1)* %in.gep
	%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)			%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
	%trunc = trunc i64 %ctlz to i32			%trunc = trunc i64 %ctlz to i32
	store i32 %trunc, i32 addrspace(1)* %out.gep			store i32 %trunc, i32 addrspace(1)* %out.gep
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:			; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:
	; SI: buffer_load_dword [[VAL:v[0-9]+]],			; GCN: buffer_load_dword [[VAL:v[0-9]+]],
	; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]			; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
	; SI: buffer_store_dword [[RESULT]],			; GCN: buffer_store_dword [[RESULT]],
	; SI: s_endpgm			; GCN: s_endpgm
	define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
	%val = load i32, i32 addrspace(1)* %valptr			%val = load i32, i32 addrspace(1)* %valptr
	%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone			%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
	%cmp = icmp eq i32 %val, 0			%cmp = icmp eq i32 %val, 0
	%sel = select i1 %cmp, i32 -1, i32 %ctlz			%sel = select i1 %cmp, i32 -1, i32 %ctlz
	store i32 %sel, i32 addrspace(1)* %out			store i32 %sel, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:			; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:
	; SI: buffer_load_dword [[VAL:v[0-9]+]],			; GCN: buffer_load_dword [[VAL:v[0-9]+]],
	; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]			; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
	; SI: buffer_store_dword [[RESULT]],			; GCN: buffer_store_dword [[RESULT]],
	; SI: s_endpgm			; GCN: s_endpgm
	define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
	%val = load i32, i32 addrspace(1)* %valptr			%val = load i32, i32 addrspace(1)* %valptr
	%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone			%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
	%cmp = icmp ne i32 %val, 0			%cmp = icmp ne i32 %val, 0
	%sel = select i1 %cmp, i32 %ctlz, i32 -1			%sel = select i1 %cmp, i32 %ctlz, i32 -1
	store i32 %sel, i32 addrspace(1)* %out			store i32 %sel, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; TODO: Should be able to eliminate select here as well.			; TODO: Should be able to eliminate select here as well.
	; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:			; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:
	; SI: buffer_load_dword			; GCN: buffer_load_dword
	; SI: v_ffbh_u32_e32			; GCN: v_ffbh_u32_e32
	; SI: v_cmp			; GCN: v_cmp
	; SI: v_cndmask			; GCN: v_cndmask
	; SI: s_endpgm			; GCN: s_endpgm
	define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
	%val = load i32, i32 addrspace(1)* %valptr			%val = load i32, i32 addrspace(1)* %valptr
	%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone			%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
	%cmp = icmp eq i32 %ctlz, 32			%cmp = icmp eq i32 %ctlz, 32
	%sel = select i1 %cmp, i32 -1, i32 %ctlz			%sel = select i1 %cmp, i32 -1, i32 %ctlz
	store i32 %sel, i32 addrspace(1)* %out			store i32 %sel, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:			; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:
	; SI: buffer_load_dword			; GCN: buffer_load_dword
	; SI: v_ffbh_u32_e32			; GCN: v_ffbh_u32_e32
	; SI: v_cmp			; GCN: v_cmp
	; SI: v_cndmask			; GCN: v_cndmask
	; SI: s_endpgm			; GCN: s_endpgm
	define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
	%val = load i32, i32 addrspace(1)* %valptr			%val = load i32, i32 addrspace(1)* %valptr
	%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone			%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
	%cmp = icmp ne i32 %ctlz, 32			%cmp = icmp ne i32 %ctlz, 32
	%sel = select i1 %cmp, i32 %ctlz, i32 -1			%sel = select i1 %cmp, i32 %ctlz, i32 -1
	store i32 %sel, i32 addrspace(1)* %out			store i32 %sel, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i8_sel_eq_neg1:			; FUNC-LABEL: {{^}}v_ctlz_i8_sel_eq_neg1:
	; SI: buffer_load_ubyte [[VAL:v[0-9]+]],			; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
	; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]			; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
	; SI: buffer_store_byte [[FFBH]],			; GCN: buffer_store_byte [[FFBH]],
	define void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
	%val = load i8, i8 addrspace(1)* %valptr			%val = load i8, i8 addrspace(1)* %valptr
	%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone			%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
	%cmp = icmp eq i8 %val, 0			%cmp = icmp eq i8 %val, 0
	%sel = select i1 %cmp, i8 -1, i8 %ctlz			%sel = select i1 %cmp, i8 -1, i8 %ctlz
	store i8 %sel, i8 addrspace(1)* %out			store i8 %sel, i8 addrspace(1)* %out
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i16_sel_eq_neg1:			; FUNC-LABEL: {{^}}v_ctlz_i16_sel_eq_neg1:
	; SI: buffer_load_ushort [[VAL:v[0-9]+]],			; VI: buffer_load_ushort [[VAL:v[0-9]+]],
	; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]			; VI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
	; SI: buffer_store_short [[FFBH]],			; VI: buffer_store_short [[FFBH]],
				tstellarAMDUnsubmitted Done Reply Inline Actions Did you mean to change this? tstellarAMD: Did you mean to change this?
	define void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
	%val = load i16, i16 addrspace(1)* %valptr			%val = load i16, i16 addrspace(1)* %valptr
	%ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone			%ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone
	%cmp = icmp eq i16 %val, 0			%cmp = icmp eq i16 %val, 0
	%sel = select i1 %cmp, i16 -1, i16 %ctlz			%sel = select i1 %cmp, i16 -1, i16 %ctlz
	store i16 %sel, i16 addrspace(1)* %out			store i16 %sel, i16 addrspace(1)* %out
	ret void			ret void
	}			}

	; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1:			; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1:
	; SI: buffer_load_ubyte [[VAL:v[0-9]+]],			; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
	; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]			; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
	; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]			; GCN: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
	; SI: buffer_store_byte [[TRUNC]],			; GCN: buffer_store_byte [[TRUNC]],
	define void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {			define void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
	%val = load i7, i7 addrspace(1)* %valptr			%val = load i7, i7 addrspace(1)* %valptr
	%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone			%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone
	%cmp = icmp eq i7 %val, 0			%cmp = icmp eq i7 %val, 0
	%sel = select i1 %cmp, i7 -1, i7 %ctlz			%sel = select i1 %cmp, i7 -1, i7 %ctlz
	store i7 %sel, i7 addrspace(1)* %out			store i7 %sel, i7 addrspace(1)* %out
	ret void			ret void
	}			}

test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll

This file was added.

				; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s

				declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
				declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone

				; FUNC-LABEL: {{^}}test_umul24_i32:
				; GCN: v_mul_u32_u24
				define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
				entry:
				%0 = shl i32 %a, 8
				%a_24 = lshr i32 %0, 8
				%1 = shl i32 %b, 8
				%b_24 = lshr i32 %1, 8
				%2 = mul i32 %a_24, %b_24
				store i32 %2, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umul24_i16_sext:
				; SI: s_mul_i32 [[SI_MUL:s[0-9]]], s{{[0-9]}}, s{{[0-9]}}
				; SI: s_sext_i32_i16 s{{[0-9]}}, [[SI_MUL]]
				; VI: v_mul_u32_u24_e{{(32\|64)}} [[VI_MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
				; VI: v_bfe_i32 v{{[0-9]}}, [[VI_MUL]], 0, 16
				define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
				entry:
				%mul = mul i16 %a, %b
				%ext = sext i16 %mul to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umul24_i16_vgpr_sext:
				; GCN: v_mul_u32_u24_e{{(32\|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
				; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
				define void @test_umul24_i16_vgpr_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
				%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
				%tid.y = call i32 @llvm.amdgcn.workitem.id.y()
				%ptr_a = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.x
				%ptr_b = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.y
				%a = load i16, i16 addrspace(1)* %ptr_a
				%b = load i16, i16 addrspace(1)* %ptr_b
				%mul = mul i16 %a, %b
				%val = sext i16 %mul to i32
				store i32 %val, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umul24_i16:
				; SI: s_mul_i32
				; SI: s_and_b32
				; SI: v_mov_b32_e32
				; VI: s_and_b32
				; VI: v_mul_u32_u24_e32
				; VI: v_and_b32_e32
				define void @test_umul24_i16(i32 addrspace(1)* %out, i16 %a, i16 %b) {
				entry:
				%mul = mul i16 %a, %b
				%ext = zext i16 %mul to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umul24_i16_vgpr:
				; GCN: v_mul_u32_u24_e32
				; GCN: v_and_b32_e32
				define void @test_umul24_i16_vgpr(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
				%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
				%tid.y = call i32 @llvm.amdgcn.workitem.id.y()
				%ptr_a = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.x
				%ptr_b = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.y
				%a = load i16, i16 addrspace(1)* %ptr_a
				%b = load i16, i16 addrspace(1)* %ptr_b
				%mul = mul i16 %a, %b
				%val = zext i16 %mul to i32
				store i32 %val, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umul24_i8:
				; GCN: v_mul_u32_u24_e{{(32\|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
				; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
				define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
				entry:
				%mul = mul i8 %a, %b
				%ext = sext i8 %mul to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umulhi24_i32_i64:
				; GCN-NOT: and
				; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
				; GCN-NEXT: buffer_store_dword [[RESULT]]
				define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
				entry:
				%a.24 = and i32 %a, 16777215
				%b.24 = and i32 %b, 16777215
				%a.24.i64 = zext i32 %a.24 to i64
				%b.24.i64 = zext i32 %b.24 to i64
				%mul48 = mul i64 %a.24.i64, %b.24.i64
				%mul48.hi = lshr i64 %mul48, 32
				%mul24hi = trunc i64 %mul48.hi to i32
				store i32 %mul24hi, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umulhi24:
				; GCN-NOT: and
				; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
				; GCN-NEXT: buffer_store_dword [[RESULT]]
				define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
				entry:
				%a.24 = and i64 %a, 16777215
				%b.24 = and i64 %b, 16777215
				%mul48 = mul i64 %a.24, %b.24
				%mul48.hi = lshr i64 %mul48, 32
				%mul24.hi = trunc i64 %mul48.hi to i32
				store i32 %mul24.hi, i32 addrspace(1)* %out
				ret void
				}

				; Multiply with 24-bit inputs and 64-bit output.
				; FUNC-LABEL: {{^}}test_umul24_i64:
				; GCN-NOT: and
				; GCN-NOT: lshr
				; GCN-DAG: v_mul_u32_u24_e32
				; GCN-DAG: v_mul_hi_u32_u24_e32
				; GCN: buffer_store_dwordx2
				define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
				entry:
				%tmp0 = shl i64 %a, 40
				%a_24 = lshr i64 %tmp0, 40
				%tmp1 = shl i64 %b, 40
				%b_24 = lshr i64 %tmp1, 40
				%tmp2 = mul i64 %a_24, %b_24
				store i64 %tmp2, i64 addrspace(1)* %out
				ret void
				}

				; FIXME: Should be able to eliminate the and.
				; FUNC-LABEL: {{^}}test_umul24_i64_square:
				; GCN: s_load_dword [[A:s[0-9]+]]
				; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[A]], 0xffffff{{$}}
				; GCN-DAG: v_mul_hi_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]]
				; GCN-DAG: v_mul_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]]
				define void @test_umul24_i64_square(i64 addrspace(1)* %out, i64 %a) {
				entry:
				%tmp0 = shl i64 %a, 40
				%a.24 = lshr i64 %tmp0, 40
				%tmp2 = mul i64 %a.24, %a.24
				store i64 %tmp2, i64 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umulhi16_i32:
				; GCN: s_and_b32
				; GCN: s_and_b32
				; GCN: v_mul_u32_u24_e32 [[MUL24:v[0-9]+]]
				; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, [[MUL24]]
				define void @test_umulhi16_i32(i16 addrspace(1)* %out, i32 %a, i32 %b) {
				entry:
				%a.16 = and i32 %a, 65535
				%b.16 = and i32 %b, 65535
				%mul = mul i32 %a.16, %b.16
				%hi = lshr i32 %mul, 16
				%mulhi = trunc i32 %hi to i16
				store i16 %mulhi, i16 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umul24_i33:
				; GCN: s_load_dword s
				; GCN: s_load_dword s
				; GCN-NOT: and
				; GCN-NOT: lshr
				; GCN-DAG: v_mul_u32_u24_e32 v[[MUL_LO:[0-9]+]],
				; GCN-DAG: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
				; GCN-DAG: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
				; GCN: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[HI]]{{\]}}
				define void @test_umul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) {
				entry:
				%tmp0 = shl i33 %a, 9
				%a_24 = lshr i33 %tmp0, 9
				%tmp1 = shl i33 %b, 9
				%b_24 = lshr i33 %tmp1, 9
				%tmp2 = mul i33 %a_24, %b_24
				%ext = zext i33 %tmp2 to i64
				store i64 %ext, i64 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umulhi24_i33:
				; GCN: s_load_dword s
				; GCN: s_load_dword s
				; GCN-NOT: and
				; GCN-NOT: lshr
				; GCN: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
				; GCN-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
				; GCN-NEXT: buffer_store_dword v[[HI]]
				define void @test_umulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) {
				entry:
				%tmp0 = shl i33 %a, 9
				%a_24 = lshr i33 %tmp0, 9
				%tmp1 = shl i33 %b, 9
				%b_24 = lshr i33 %tmp1, 9
				%tmp2 = mul i33 %a_24, %b_24
				%hi = lshr i33 %tmp2, 32
				%trunc = trunc i33 %hi to i32
				store i32 %trunc, i32 addrspace(1)* %out
				ret void
				}

test/CodeGen/AMDGPU/mul_uint24-r600.ll

This file was added.

				; RUN: llc -march=r600 -mcpu=cayman < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s
				; RUN: llc -march=r600 -mcpu=redwood < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s

				; FUNC-LABEL: {{^}}test_umul24_i32:
				; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
				define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
				entry:
				%0 = shl i32 %a, 8
				%a_24 = lshr i32 %0, 8
				%1 = shl i32 %b, 8
				%b_24 = lshr i32 %1, 8
				%2 = mul i32 %a_24, %b_24
				store i32 %2, i32 addrspace(1)* %out
				ret void
				}

				; The result must be sign-extended.
				; FUNC-LABEL: {{^}}test_umul24_i16_sext:
				; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
				; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
				; EG: 16
				define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
				entry:
				%mul = mul i16 %a, %b
				%ext = sext i16 %mul to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; The result must be sign-extended.
				; FUNC-LABEL: {{^}}test_umul24_i8:
				; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
				; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
				define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
				entry:
				%mul = mul i8 %a, %b
				%ext = sext i8 %mul to i32
				store i32 %ext, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umulhi24_i32_i64:
				; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
				define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
				entry:
				%a.24 = and i32 %a, 16777215
				%b.24 = and i32 %b, 16777215
				%a.24.i64 = zext i32 %a.24 to i64
				%b.24.i64 = zext i32 %b.24 to i64
				%mul48 = mul i64 %a.24.i64, %b.24.i64
				%mul48.hi = lshr i64 %mul48, 32
				%mul24hi = trunc i64 %mul48.hi to i32
				store i32 %mul24hi, i32 addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}test_umulhi24:
				; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
				define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
				entry:
				%a.24 = and i64 %a, 16777215
				%b.24 = and i64 %b, 16777215
				%mul48 = mul i64 %a.24, %b.24
				%mul48.hi = lshr i64 %mul48, 32
				%mul24.hi = trunc i64 %mul48.hi to i32
				store i32 %mul24.hi, i32 addrspace(1)* %out
				ret void
				}

				; Multiply with 24-bit inputs and 64-bit output.
				; FUNC-LABEL: {{^}}test_umul24_i64:
				; EG; MUL_UINT24
				; EG: MULHI
				define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
				entry:
				%tmp0 = shl i64 %a, 40
				%a_24 = lshr i64 %tmp0, 40
				%tmp1 = shl i64 %b, 40
				%b_24 = lshr i64 %tmp1, 40
				%tmp2 = mul i64 %a_24, %b_24
				store i64 %tmp2, i64 addrspace(1)* %out
				ret void
				}

test/CodeGen/AMDGPU/mul_uint24.ll

This file was deleted.

	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s
	; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s
	; RUN: llc -march=r600 -mcpu=redwood < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s
	; RUN: llc -march=r600 -mcpu=cayman < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s

	; FUNC-LABEL: {{^}}test_umul24_i32:
	; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
	; SI: v_mul_u32_u24
	define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
	entry:
	%0 = shl i32 %a, 8
	%a_24 = lshr i32 %0, 8
	%1 = shl i32 %b, 8
	%b_24 = lshr i32 %1, 8
	%2 = mul i32 %a_24, %b_24
	store i32 %2, i32 addrspace(1)* %out
	ret void
	}

	; FUNC-LABEL: {{^}}test_umul24_i16_sext:
	; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
	; The result must be sign-extended
	; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
	; EG: 16

	; SI: v_mul_u32_u24_e{{(32\|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
	; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
	define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
	entry:
	%mul = mul i16 %a, %b
	%ext = sext i16 %mul to i32
	store i32 %ext, i32 addrspace(1)* %out
	ret void
	}

	; FUNC-LABEL: {{^}}test_umul24_i16:
	; SI: s_and_b32
	; SI: v_mul_u32_u24_e32
	; SI: v_and_b32_e32
	define void @test_umul24_i16(i32 addrspace(1)* %out, i16 %a, i16 %b) {
	entry:
	%mul = mul i16 %a, %b
	%ext = zext i16 %mul to i32
	store i32 %ext, i32 addrspace(1)* %out
	ret void
	}

	; FUNC-LABEL: {{^}}test_umul24_i8:
	; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
	; The result must be sign-extended
	; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
	; SI: v_mul_u32_u24_e{{(32\|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
	; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8

	define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
	entry:
	%mul = mul i8 %a, %b
	%ext = sext i8 %mul to i32
	store i32 %ext, i32 addrspace(1)* %out
	ret void
	}

	; FUNC-LABEL: {{^}}test_umulhi24_i32_i64:
	; SI-NOT: and
	; SI: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
	; SI-NEXT: buffer_store_dword [[RESULT]]

	; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
	define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
	entry:
	%a.24 = and i32 %a, 16777215
	%b.24 = and i32 %b, 16777215
	%a.24.i64 = zext i32 %a.24 to i64
	%b.24.i64 = zext i32 %b.24 to i64
	%mul48 = mul i64 %a.24.i64, %b.24.i64
	%mul48.hi = lshr i64 %mul48, 32
	%mul24hi = trunc i64 %mul48.hi to i32
	store i32 %mul24hi, i32 addrspace(1)* %out
	ret void
	}

	; FUNC-LABEL: {{^}}test_umulhi24:
	; SI-NOT: and
	; SI: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
	; SI-NEXT: buffer_store_dword [[RESULT]]

	; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
	define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
	entry:
	%a.24 = and i64 %a, 16777215
	%b.24 = and i64 %b, 16777215
	%mul48 = mul i64 %a.24, %b.24
	%mul48.hi = lshr i64 %mul48, 32
	%mul24.hi = trunc i64 %mul48.hi to i32
	store i32 %mul24.hi, i32 addrspace(1)* %out
	ret void
	}

	; Multiply with 24-bit inputs and 64-bit output
	; FUNC-LABEL: {{^}}test_umul24_i64:
	; EG; MUL_UINT24
	; EG: MULHI

	; SI-NOT: and
	; SI-NOT: lshr

	; SI-DAG: v_mul_u32_u24_e32
	; SI-DAG: v_mul_hi_u32_u24_e32

	; SI: buffer_store_dwordx2
	define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
	entry:
	%tmp0 = shl i64 %a, 40
	%a_24 = lshr i64 %tmp0, 40
	%tmp1 = shl i64 %b, 40
	%b_24 = lshr i64 %tmp1, 40
	%tmp2 = mul i64 %a_24, %b_24
	store i64 %tmp2, i64 addrspace(1)* %out
	ret void
	}

	; FIXME: Should be able to eliminate the and
	; FUNC-LABEL: {{^}}test_umul24_i64_square:
	; SI: s_load_dword [[A:s[0-9]+]]
	; SI: s_and_b32 [[TRUNC:s[0-9]+]], [[A]], 0xffffff{{$}}
	; SI-DAG: v_mul_hi_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]]
	; SI-DAG: v_mul_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]]
	define void @test_umul24_i64_square(i64 addrspace(1)* %out, i64 %a) {
	entry:
	%tmp0 = shl i64 %a, 40
	%a.24 = lshr i64 %tmp0, 40
	%tmp2 = mul i64 %a.24, %a.24
	store i64 %tmp2, i64 addrspace(1)* %out
	ret void
	}

	; FUNC-LABEL: {{^}}test_umulhi16_i32:
	; SI: s_and_b32
	; SI: s_and_b32
	; SI: v_mul_u32_u24_e32 [[MUL24:v[0-9]+]]
	; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, [[MUL24]]
	define void @test_umulhi16_i32(i16 addrspace(1)* %out, i32 %a, i32 %b) {
	entry:
	%a.16 = and i32 %a, 65535
	%b.16 = and i32 %b, 65535
	%mul = mul i32 %a.16, %b.16
	%hi = lshr i32 %mul, 16
	%mulhi = trunc i32 %hi to i16
	store i16 %mulhi, i16 addrspace(1)* %out
	ret void
	}

	; FUNC-LABEL: {{^}}test_umul24_i33:
	; SI: s_load_dword s
	; SI: s_load_dword s

	; SI-NOT: and
	; SI-NOT: lshr

	; SI-DAG: v_mul_u32_u24_e32 v[[MUL_LO:[0-9]+]],
	; SI-DAG: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
	; SI-DAG: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
	; SI: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[HI]]{{\]}}
	define void @test_umul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) {
	entry:
	%tmp0 = shl i33 %a, 9
	%a_24 = lshr i33 %tmp0, 9
	%tmp1 = shl i33 %b, 9
	%b_24 = lshr i33 %tmp1, 9
	%tmp2 = mul i33 %a_24, %b_24
	%ext = zext i33 %tmp2 to i64
	store i64 %ext, i64 addrspace(1)* %out
	ret void
	}

	; FUNC-LABEL: {{^}}test_umulhi24_i33:
	; SI: s_load_dword s
	; SI: s_load_dword s

	; SI-NOT: and
	; SI-NOT: lshr

	; SI: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
	; SI-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
	; SI-NEXT: buffer_store_dword v[[HI]]
	define void @test_umulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) {
	entry:
	%tmp0 = shl i33 %a, 9
	%a_24 = lshr i33 %tmp0, 9
	%tmp1 = shl i33 %b, 9
	%b_24 = lshr i33 %tmp1, 9
	%tmp2 = mul i33 %a_24, %b_24
	%hi = lshr i33 %tmp2, 32
	%trunc = trunc i33 %hi to i32
	store i32 %trunc, i32 addrspace(1)* %out
	ret void
	}

test/CodeGen/AMDGPU/sad.ll

	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s			; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
				; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s

	; GCN-LABEL: {{^}}v_sad_u32_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_pat1:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%t0 = select i1 %icmp0, i32 %a, i32 %b			%t0 = select i1 %icmp0, i32 %a, i32 %b

	%icmp1 = icmp ule i32 %a, %b			%icmp1 = icmp ule i32 %a, %b
	%t1 = select i1 %icmp1, i32 %a, i32 %b			%t1 = select i1 %icmp1, i32 %a, i32 %b

	%ret0 = sub i32 %t0, %t1			%ret0 = sub i32 %t0, %t1
	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_constant_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_constant_pat1:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 20			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 20
	define void @v_sad_u32_constant_pat1(i32 addrspace(1)* %out, i32 %a) {			define void @v_sad_u32_constant_pat1(i32 addrspace(1)* %out, i32 %a) {
	%icmp0 = icmp ugt i32 %a, 90			%icmp0 = icmp ugt i32 %a, 90
	%t0 = select i1 %icmp0, i32 %a, i32 90			%t0 = select i1 %icmp0, i32 %a, i32 90

	%icmp1 = icmp ule i32 %a, 90			%icmp1 = icmp ule i32 %a, 90
	%t1 = select i1 %icmp1, i32 %a, i32 90			%t1 = select i1 %icmp1, i32 %a, i32 90

	%ret0 = sub i32 %t0, %t1			%ret0 = sub i32 %t0, %t1
	%ret = add i32 %ret0, 20			%ret = add i32 %ret0, 20

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_pat2:			; FUNC-LABEL: {{^}}v_sad_u32_pat2:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%sub0 = sub i32 %a, %b			%sub0 = sub i32 %a, %b
	%sub1 = sub i32 %b, %a			%sub1 = sub i32 %b, %a
	%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1			%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1

	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_multi_use_sub_pat1:
	; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	; GCN: s_min_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_min_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	define void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%t0 = select i1 %icmp0, i32 %a, i32 %b			%t0 = select i1 %icmp0, i32 %a, i32 %b

	%icmp1 = icmp ule i32 %a, %b			%icmp1 = icmp ule i32 %a, %b
	%t1 = select i1 %icmp1, i32 %a, i32 %b			%t1 = select i1 %icmp1, i32 %a, i32 %b

	%ret0 = sub i32 %t0, %t1			%ret0 = sub i32 %t0, %t1
	store volatile i32 %ret0, i32 *undef			store volatile i32 %ret0, i32 *undef
	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_multi_use_add_pat1:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%t0 = select i1 %icmp0, i32 %a, i32 %b			%t0 = select i1 %icmp0, i32 %a, i32 %b

	%icmp1 = icmp ule i32 %a, %b			%icmp1 = icmp ule i32 %a, %b
	%t1 = select i1 %icmp1, i32 %a, i32 %b			%t1 = select i1 %icmp1, i32 %a, i32 %b

	%ret0 = sub i32 %t0, %t1			%ret0 = sub i32 %t0, %t1
	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c
	store volatile i32 %ret, i32 *undef			store volatile i32 %ret, i32 *undef
	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_multi_use_max_pat1:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%t0 = select i1 %icmp0, i32 %a, i32 %b			%t0 = select i1 %icmp0, i32 %a, i32 %b
	store volatile i32 %t0, i32 *undef			store volatile i32 %t0, i32 *undef

	%icmp1 = icmp ule i32 %a, %b			%icmp1 = icmp ule i32 %a, %b
	%t1 = select i1 %icmp1, i32 %a, i32 %b			%t1 = select i1 %icmp1, i32 %a, i32 %b

	%ret0 = sub i32 %t0, %t1			%ret0 = sub i32 %t0, %t1
	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_multi_use_min_pat1:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%t0 = select i1 %icmp0, i32 %a, i32 %b			%t0 = select i1 %icmp0, i32 %a, i32 %b

	%icmp1 = icmp ule i32 %a, %b			%icmp1 = icmp ule i32 %a, %b
	%t1 = select i1 %icmp1, i32 %a, i32 %b			%t1 = select i1 %icmp1, i32 %a, i32 %b

	store volatile i32 %t1, i32 *undef			store volatile i32 %t1, i32 *undef

	%ret0 = sub i32 %t0, %t1			%ret0 = sub i32 %t0, %t1
	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2:			; FUNC-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%sub0 = sub i32 %a, %b			%sub0 = sub i32 %a, %b
	store volatile i32 %sub0, i32 *undef			store volatile i32 %sub0, i32 *undef
	%sub1 = sub i32 %b, %a			%sub1 = sub i32 %b, %a
	%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1			%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1

	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_multi_use_select_pat2:			; FUNC-LABEL: {{^}}v_sad_u32_multi_use_select_pat2:
	; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	; GCN: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
	; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	define void @v_sad_u32_multi_use_select_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {			define void @v_sad_u32_multi_use_select_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%sub0 = sub i32 %a, %b			%sub0 = sub i32 %a, %b
	%sub1 = sub i32 %b, %a			%sub1 = sub i32 %b, %a
	%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1			%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
	store volatile i32 %ret0, i32 *undef			store volatile i32 %ret0, i32 *undef

	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_vector_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_vector_pat1:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {			define void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
	%icmp0 = icmp ugt <4 x i32> %a, %b			%icmp0 = icmp ugt <4 x i32> %a, %b
	%t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b			%t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b

	%icmp1 = icmp ule <4 x i32> %a, %b			%icmp1 = icmp ule <4 x i32> %a, %b
	%t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b			%t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b

	%ret0 = sub <4 x i32> %t0, %t1			%ret0 = sub <4 x i32> %t0, %t1
	%ret = add <4 x i32> %ret0, %c			%ret = add <4 x i32> %ret0, %c

	store <4 x i32> %ret, <4 x i32> addrspace(1)* %out			store <4 x i32> %ret, <4 x i32> addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_vector_pat2:			; FUNC-LABEL: {{^}}v_sad_u32_vector_pat2:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {			define void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
	%icmp0 = icmp ugt <4 x i32> %a, %b			%icmp0 = icmp ugt <4 x i32> %a, %b
	%sub0 = sub <4 x i32> %a, %b			%sub0 = sub <4 x i32> %a, %b
	%sub1 = sub <4 x i32> %b, %a			%sub1 = sub <4 x i32> %b, %a
	%ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1			%ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1

	%ret = add <4 x i32> %ret0, %c			%ret = add <4 x i32> %ret0, %c

	store <4 x i32> %ret, <4 x i32> addrspace(1)* %out			store <4 x i32> %ret, <4 x i32> addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_i16_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_i16_pat1:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_i16_pat1(i16 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {			define void @v_sad_u32_i16_pat1(i16 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {

	%icmp0 = icmp ugt i16 %a, %b			%icmp0 = icmp ugt i16 %a, %b
	%t0 = select i1 %icmp0, i16 %a, i16 %b			%t0 = select i1 %icmp0, i16 %a, i16 %b

	%icmp1 = icmp ule i16 %a, %b			%icmp1 = icmp ule i16 %a, %b
	%t1 = select i1 %icmp1, i16 %a, i16 %b			%t1 = select i1 %icmp1, i16 %a, i16 %b

	%ret0 = sub i16 %t0, %t1			%ret0 = sub i16 %t0, %t1
	%ret = add i16 %ret0, %c			%ret = add i16 %ret0, %c

	store i16 %ret, i16 addrspace(1)* %out			store i16 %ret, i16 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_i16_pat2:			; FUNC-LABEL: {{^}}v_sad_u32_i16_pat2:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; VI: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_i16_pat2(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b, i16 zeroext %c) {			define void @v_sad_u32_i16_pat2(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b, i16 zeroext %c) {
	%icmp0 = icmp ugt i16 %a, %b			%icmp0 = icmp ugt i16 %a, %b
	%sub0 = sub i16 %a, %b			%sub0 = sub i16 %a, %b
	%sub1 = sub i16 %b, %a			%sub1 = sub i16 %b, %a
	%ret0 = select i1 %icmp0, i16 %sub0, i16 %sub1			%ret0 = select i1 %icmp0, i16 %sub0, i16 %sub1

	%ret = add i16 %ret0, %c			%ret = add i16 %ret0, %c

	store i16 %ret, i16 addrspace(1)* %out			store i16 %ret, i16 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_i8_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_2xi16_pat2:
				; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
				; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
				define void @v_sad_u32_2xi16_pat2(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b, <2 x i16> %c) {
				%icmp0 = icmp ugt <2 x i16> %a, %b
				%sub0 = sub <2 x i16> %a, %b
				%sub1 = sub <2 x i16> %b, %a
				%ret0 = select <2 x i1> %icmp0, <2 x i16> %sub0, <2 x i16> %sub1

				%ret = add <2 x i16> %ret0, %c

				store <2 x i16> %ret, <2 x i16> addrspace(1)* %out
				ret void
				}

				; FUNC-LABEL: {{^}}v_sad_u32_i8_pat1:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {			define void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
	%icmp0 = icmp ugt i8 %a, %b			%icmp0 = icmp ugt i8 %a, %b
	%t0 = select i1 %icmp0, i8 %a, i8 %b			%t0 = select i1 %icmp0, i8 %a, i8 %b

	%icmp1 = icmp ule i8 %a, %b			%icmp1 = icmp ule i8 %a, %b
	%t1 = select i1 %icmp1, i8 %a, i8 %b			%t1 = select i1 %icmp1, i8 %a, i8 %b

	%ret0 = sub i8 %t0, %t1			%ret0 = sub i8 %t0, %t1
	%ret = add i8 %ret0, %c			%ret = add i8 %ret0, %c

	store i8 %ret, i8 addrspace(1)* %out			store i8 %ret, i8 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_i8_pat2:			; FUNC-LABEL: {{^}}v_sad_u32_i8_pat2:
	; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) {			define void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) {
	%icmp0 = icmp ugt i8 %a, %b			%icmp0 = icmp ugt i8 %a, %b
	%sub0 = sub i8 %a, %b			%sub0 = sub i8 %a, %b
	%sub1 = sub i8 %b, %a			%sub1 = sub i8 %b, %a
	%ret0 = select i1 %icmp0, i8 %sub0, i8 %sub1			%ret0 = select i1 %icmp0, i8 %sub0, i8 %sub1

	%ret = add i8 %ret0, %c			%ret = add i8 %ret0, %c

	store i8 %ret, i8 addrspace(1)* %out			store i8 %ret, i8 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat1:			; FUNC-LABEL: {{^}}v_sad_u32_mismatched_operands_pat1:
	; GCN: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
	; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
	; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {			define void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%t0 = select i1 %icmp0, i32 %a, i32 %b			%t0 = select i1 %icmp0, i32 %a, i32 %b

	%icmp1 = icmp ule i32 %a, %b			%icmp1 = icmp ule i32 %a, %b
	%t1 = select i1 %icmp1, i32 %a, i32 %d			%t1 = select i1 %icmp1, i32 %a, i32 %d

	%ret0 = sub i32 %t0, %t1			%ret0 = sub i32 %t0, %t1
	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

	; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat2:			; FUNC-LABEL: {{^}}v_sad_u32_mismatched_operands_pat2:
	; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}			; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
	; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}			; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
	define void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {			define void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
	%icmp0 = icmp ugt i32 %a, %b			%icmp0 = icmp ugt i32 %a, %b
	%sub0 = sub i32 %a, %d			%sub0 = sub i32 %a, %d
	%sub1 = sub i32 %b, %a			%sub1 = sub i32 %b, %a
	%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1			%ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1

	%ret = add i32 %ret0, %c			%ret = add i32 %ret0, %c

	store i32 %ret, i32 addrspace(1)* %out			store i32 %ret, i32 addrspace(1)* %out
	ret void			ret void
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Promote uniform i16 ops to i32 ops for targets that have 16 bit instructions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 72435

lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

lib/Target/AMDGPU/SIISelLowering.cpp

test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll

test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll

test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll

test/CodeGen/AMDGPU/ctlz.ll

test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll

test/CodeGen/AMDGPU/mul_uint24-r600.ll

test/CodeGen/AMDGPU/mul_uint24.ll

test/CodeGen/AMDGPU/sad.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Promote uniform i16 ops to i32 ops for targets that have 16 bit instructionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 72435

lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

lib/Target/AMDGPU/SIISelLowering.cpp

test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll

test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll

test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll

test/CodeGen/AMDGPU/ctlz.ll

test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll

test/CodeGen/AMDGPU/mul_uint24-r600.ll

test/CodeGen/AMDGPU/mul_uint24.ll

test/CodeGen/AMDGPU/sad.ll

[AMDGPU] Promote uniform i16 ops to i32 ops for targets that have 16 bit instructions
ClosedPublic