Diff 193103

llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h

	Show First 20 Lines • Show All 550 Lines • ▼ Show 20 Lines
	private:			private:
	const NVPTXSubtarget &STI; // cache the subtarget here			const NVPTXSubtarget &STI; // cache the subtarget here
	SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;			SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;

	SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;

				SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
				SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
				SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;

	SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;

	SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;

	SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;			SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
	Show All 14 Lines

llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp

Show First 20 Lines • Show All 540 Lines • ▼ Show 20 Lines	NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// There's no neg.f16 instruction. Expand to (0-x).		// There's no neg.f16 instruction. Expand to (0-x).
setOperationAction(ISD::FNEG, MVT::f16, Expand);		setOperationAction(ISD::FNEG, MVT::f16, Expand);
setOperationAction(ISD::FNEG, MVT::v2f16, Expand);		setOperationAction(ISD::FNEG, MVT::v2f16, Expand);

// (would be) Library functions.		// (would be) Library functions.

// These map to conversion instructions for scalar FP types.		// These map to conversion instructions for scalar FP types.
for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,		for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
ISD::FROUND, ISD::FTRUNC}) {		ISD::FTRUNC}) {
setOperationAction(Op, MVT::f16, Legal);		setOperationAction(Op, MVT::f16, Legal);
setOperationAction(Op, MVT::f32, Legal);		setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);		setOperationAction(Op, MVT::f64, Legal);
setOperationAction(Op, MVT::v2f16, Expand);		setOperationAction(Op, MVT::v2f16, Expand);
}		}

		setOperationAction(ISD::FROUND, MVT::f16, Promote);
		setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
		setOperationAction(ISD::FROUND, MVT::f32, Custom);
		setOperationAction(ISD::FROUND, MVT::f64, Custom);


// 'Expand' implements FCOPYSIGN without calling an external library.		// 'Expand' implements FCOPYSIGN without calling an external library.
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);		setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);		setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);		setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);		setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);

// These map to corresponding instructions for f32/f64. f16 must be		// These map to corresponding instructions for f32/f64. f16 must be
// promoted to f32. v2f16 is expanded to f16, which is then promoted		// promoted to f32. v2f16 is expanded to f16, which is then promoted
▲ Show 20 Lines • Show All 1,499 Lines • ▼ Show 20 Lines	else {
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);		SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);		SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);

SDValue Ops[2] = { Lo, Hi };		SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);		return DAG.getMergeValues(Ops, dl);
}		}
}		}

		SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
		EVT VT = Op.getValueType();

		if (VT == MVT::f32)
		return LowerFROUND32(Op, DAG);

		if (VT == MVT::f64)
		return LowerFROUND64(Op, DAG);

		llvm_unreachable("unhandled type");
		}

		// This is the the rounding method used in CUDA libdevice in C like code:
		// float roundf(float A)
		// {
		// float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
		// RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
		// return abs(A) < 0.5 ? (float)(int)A : RoundedA;
		// }
		SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
		SelectionDAG &DAG) const {
		SDLoc SL(Op);
		SDValue A = Op.getOperand(0);
		EVT VT = Op.getValueType();

		SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);

		// RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))
		SDValue Bitcast = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A);
		const int SignBitMask = 0x80000000;
		SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast,
		DAG.getConstant(SignBitMask, SL, MVT::i32));
		const int PointFiveInBits = 0x3F000000;
		SDValue PointFiveWithSignRaw =
		DAG.getNode(ISD::OR, SL, MVT::i32, Sign,
		DAG.getConstant(PointFiveInBits, SL, MVT::i32));
		SDValue PointFiveWithSign =
		DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw);
		SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign);
		SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);

		// RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
		EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
		SDValue IsLarge =
		DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT),
		ISD::SETOGT);
		RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);

		// return abs(A) < 0.5 ? (float)(int)A : RoundedA;
		SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
		DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
		SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A);
		return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA);
		}

		// The implementation of round(double) is similar to that of round(float) in
		// that they both separate the value range into three regions and use a method
		// specific to the region to round the values. However, round(double) first
		// calculates the round of the absolute value and then adds the sign back while
		// round(float) directly rounds the value with sign.
		SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
		SelectionDAG &DAG) const {
		SDLoc SL(Op);
		SDValue A = Op.getOperand(0);
		EVT VT = Op.getValueType();

		SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);

		// double RoundedA = (double) (int) (abs(A) + 0.5f);
		SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA,
		DAG.getConstantFP(0.5, SL, VT));
		SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);

		// RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA;
		EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
		SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
		DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
		RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall,
		DAG.getConstantFP(0, SL, VT),
		RoundedA);

		// Add sign to rounded_A
		RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A);
		DAG.getNode(ISD::FTRUNC, SL, VT, A);

		// RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA;
		SDValue IsLarge =
		DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT),
		ISD::SETOGT);
		return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
		}



SDValue		SDValue
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {		NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
case ISD::RETURNADDR:		case ISD::RETURNADDR:
return SDValue();		return SDValue();
case ISD::FRAMEADDR:		case ISD::FRAMEADDR:
return SDValue();		return SDValue();
case ISD::GlobalAddress:		case ISD::GlobalAddress:
Show All 14 Lines	case ISD::LOAD:
return LowerLOAD(Op, DAG);		return LowerLOAD(Op, DAG);
case ISD::SHL_PARTS:		case ISD::SHL_PARTS:
return LowerShiftLeftParts(Op, DAG);		return LowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS:		case ISD::SRA_PARTS:
case ISD::SRL_PARTS:		case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG);		return LowerShiftRightParts(Op, DAG);
case ISD::SELECT:		case ISD::SELECT:
return LowerSelect(Op, DAG);		return LowerSelect(Op, DAG);
		case ISD::FROUND:
		return LowerFROUND(Op, DAG);
default:		default:
llvm_unreachable("Custom lowering not defined for operation");		llvm_unreachable("Custom lowering not defined for operation");
}		}
}		}

SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {		SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
SDValue Op0 = Op->getOperand(0);		SDValue Op0 = Op->getOperand(0);
SDValue Op1 = Op->getOperand(1);		SDValue Op1 = Op->getOperand(1);
▲ Show 20 Lines • Show All 2,693 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td

Show First 20 Lines • Show All 2,996 Lines • ▼ Show 20 Lines	def : Pat<(ffloor Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRMI)>;		(CVT_f16_f16 Float16Regs:$a, CvtRMI)>;
def : Pat<(ffloor Float32Regs:$a),		def : Pat<(ffloor Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;		(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ffloor Float32Regs:$a),		def : Pat<(ffloor Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;		(CVT_f32_f32 Float32Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
def : Pat<(ffloor Float64Regs:$a),		def : Pat<(ffloor Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;		(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;

def : Pat<(f16 (fround Float16Regs:$a)),
(CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(fround Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fround Float32Regs:$a)),
(CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
def : Pat<(f64 (fround Float64Regs:$a)),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;

def : Pat<(ftrunc Float16Regs:$a),		def : Pat<(ftrunc Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRZI)>;		(CVT_f16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(ftrunc Float32Regs:$a),		def : Pat<(ftrunc Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;		(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ftrunc Float32Regs:$a),		def : Pat<(ftrunc Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;		(CVT_f32_f32 Float32Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;
def : Pat<(ftrunc Float64Regs:$a),		def : Pat<(ftrunc Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRZI)>;		(CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
▲ Show 20 Lines • Show All 119 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/NVPTX/f16-instructions.ll

	Show First 20 Lines • Show All 1,101 Lines • ▼ Show 20 Lines
	; CHECK: st.param.b16 [func_retval0+0], [[R]];			; CHECK: st.param.b16 [func_retval0+0], [[R]];
	; CHECK: ret;			; CHECK: ret;
	define half @test_nearbyint(half %a) #0 {			define half @test_nearbyint(half %a) #0 {
	%r = call half @llvm.nearbyint.f16(half %a)			%r = call half @llvm.nearbyint.f16(half %a)
	ret half %r			ret half %r
	}			}

	; CHECK-LABEL: test_round(			; CHECK-LABEL: test_round(
	; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_round_param_0];			; CHECK: ld.param.b16 {{.*}}, [test_round_param_0];
	; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];			; check the use of sign mask and 0.5 to implement round
	; CHECK: st.param.b16 [func_retval0+0], [[R]];			; CHECK: and.b32 [[R:%r[0-9]+]], {{.*}}, -2147483648;
				; CHECK: or.b32 {{.*}}, [[R]], 1056964608;
				; CHECK: st.param.b16 [func_retval0+0], {{.*}};
	; CHECK: ret;			; CHECK: ret;
	define half @test_round(half %a) #0 {			define half @test_round(half %a) #0 {
	%r = call half @llvm.round.f16(half %a)			%r = call half @llvm.round.f16(half %a)
	ret half %r			ret half %r
	}			}

	; CHECK-LABEL: test_fmuladd(			; CHECK-LABEL: test_fmuladd(
	; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0];			; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0];
	Show All 18 Lines

llvm/trunk/test/CodeGen/NVPTX/f16x2-instructions.ll

	Show First 20 Lines • Show All 1,372 Lines • ▼ Show 20 Lines
	; CHECK: st.param.b32 [func_retval0+0], [[R]];			; CHECK: st.param.b32 [func_retval0+0], [[R]];
	; CHECK: ret;			; CHECK: ret;
	define <2 x half> @test_nearbyint(<2 x half> %a) #0 {			define <2 x half> @test_nearbyint(<2 x half> %a) #0 {
	%r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a)			%r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a)
	ret <2 x half> %r			ret <2 x half> %r
	}			}

	; CHECK-LABEL: test_round(			; CHECK-LABEL: test_round(
	; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_round_param_0];			; CHECK: ld.param.b32 {{.*}}, [test_round_param_0];
	; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]];			; check the use of sign mask and 0.5 to implement round
	; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]];			; CHECK: and.b32 [[R1:%r[0-9]+]], {{.*}}, -2147483648;
	; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]];			; CHECK: or.b32 {{.*}}, [[R1]], 1056964608;
	; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]}			; CHECK: and.b32 [[R2:%r[0-9]+]], {{.*}}, -2147483648;
	; CHECK: st.param.b32 [func_retval0+0], [[R]];			; CHECK: or.b32 {{.*}}, [[R2]], 1056964608;
				; CHECK: st.param.b32 [func_retval0+0], {{.*}};
	; CHECK: ret;			; CHECK: ret;
	define <2 x half> @test_round(<2 x half> %a) #0 {			define <2 x half> @test_round(<2 x half> %a) #0 {
	%r = call <2 x half> @llvm.round.f16(<2 x half> %a)			%r = call <2 x half> @llvm.round.f16(<2 x half> %a)
	ret <2 x half> %r			ret <2 x half> %r
	}			}

	; CHECK-LABEL: test_fmuladd(			; CHECK-LABEL: test_fmuladd(
	; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0];			; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0];
	▲ Show 20 Lines • Show All 45 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/NVPTX/math-intrins.ll

Show First 20 Lines • Show All 68 Lines • ▼ Show 20 Lines	define double @floor_double(double %a) {
%b = call double @llvm.floor.f64(double %a)		%b = call double @llvm.floor.f64(double %a)
ret double %b		ret double %b
}		}

; ---- round ----		; ---- round ----

; CHECK-LABEL: round_float		; CHECK-LABEL: round_float
define float @round_float(float %a) {		define float @round_float(float %a) {
; CHECK: cvt.rni.f32.f32		; check the use of sign mask and 0.5 to implement round
		; CHECK: and.b32 [[R1:%r[0-9]+]], {{.*}}, -2147483648;
		; CHECK: or.b32 {{.*}}, [[R1]], 1056964608;
%b = call float @llvm.round.f32(float %a)		%b = call float @llvm.round.f32(float %a)
ret float %b		ret float %b
}		}

; CHECK-LABEL: round_float_ftz		; CHECK-LABEL: round_float_ftz
define float @round_float_ftz(float %a) #1 {		define float @round_float_ftz(float %a) #1 {
; CHECK: cvt.rni.ftz.f32.f32		; check the use of sign mask and 0.5 to implement round
		; CHECK: and.b32 [[R1:%r[0-9]+]], {{.*}}, -2147483648;
		; CHECK: or.b32 {{.*}}, [[R1]], 1056964608;
%b = call float @llvm.round.f32(float %a)		%b = call float @llvm.round.f32(float %a)
ret float %b		ret float %b
}		}

; CHECK-LABEL: round_double		; CHECK-LABEL: round_double
define double @round_double(double %a) {		define double @round_double(double %a) {
; CHECK: cvt.rni.f64.f64		; check the use of 0.5 to implement round
		; CHECK: setp.lt.f64 {{.*}}, [[R:%fd[0-9]+]], 0d3FE0000000000000;
		; CHECK: add.rn.f64 {{.*}}, [[R]], 0d3FE0000000000000;
%b = call double @llvm.round.f64(double %a)		%b = call double @llvm.round.f64(double %a)
ret double %b		ret double %b
}		}

; ---- nearbyint ----		; ---- nearbyint ----

; CHECK-LABEL: nearbyint_float		; CHECK-LABEL: nearbyint_float
define float @nearbyint_float(float %a) {		define float @nearbyint_float(float %a) {
▲ Show 20 Lines • Show All 187 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[NVPTX] Fix the codegen for llvm.round.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 193103

llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h

llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp

llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td

llvm/trunk/test/CodeGen/NVPTX/f16-instructions.ll

llvm/trunk/test/CodeGen/NVPTX/f16x2-instructions.ll

llvm/trunk/test/CodeGen/NVPTX/math-intrins.ll

This is an archive of the discontinued LLVM Phabricator instance.

[NVPTX] Fix the codegen for llvm.round.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 193103

llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h

llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp

llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td

llvm/trunk/test/CodeGen/NVPTX/f16-instructions.ll

llvm/trunk/test/CodeGen/NVPTX/f16x2-instructions.ll

llvm/trunk/test/CodeGen/NVPTX/math-intrins.ll

[NVPTX] Fix the codegen for llvm.round.
ClosedPublic