Diff 525963

llvm/lib/Target/RISCV/RISCVISelLowering.h

Show First 20 Lines • Show All 832 Lines • ▼ Show 20 Lines	SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
unsigned RISCVISDOpc) const;		unsigned RISCVISDOpc) const;
SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;		SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;		SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,		SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
unsigned ExtendOpc) const;		unsigned ExtendOpc) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;		SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;		SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;

		SDValue lowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
		SDValue lowerBF16_TO_FP(SDValue Op, SelectionDAG &DAG) const;

SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;		SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;		SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;

SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;		SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;

SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const;		SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const;

SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;		SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 419 Lines • ▼ Show 20 Lines	if (Subtarget.hasStdExtFOrZfinx()) {
setOperationAction(FPRndMode, MVT::f32,		setOperationAction(FPRndMode, MVT::f32,
Subtarget.hasStdExtZfa() ? Legal : Custom);		Subtarget.hasStdExtZfa() ? Legal : Custom);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);		setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);		setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);		setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);		setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(FPOpToExpand, MVT::f32, Expand);		setOperationAction(FPOpToExpand, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);		setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
		setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
		craig.topperUnsubmitted Not Done Reply Inline Actions This wasn't needed by D151663? Is there a test case missing from D151663 that requires this? craig.topper: This wasn't needed by D151663? Is there a test case missing from D151663 that requires this?
		joshua-arch1AuthorUnsubmitted Done Reply Inline Actions Maybe I can remove this part. In my first version of this patch, the expand is needed for LoadExt and TruncStore. joshua-arch1: Maybe I can remove this part. In my first version of this patch, the expand is needed for…
setTruncStoreAction(MVT::f32, MVT::f16, Expand);		setTruncStoreAction(MVT::f32, MVT::f16, Expand);
		setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
		setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom);
		setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
		craig.topperUnsubmitted Not Done Reply Inline Actions D151663 uses setOperationAction(ISD::FP_TO_BF16, MVT::f32, Subtarget.isSoftFPABI() ? LibCall : Custom); Is that difference important? craig.topper: D151663 uses ``` setOperationAction(ISD::FP_TO_BF16, MVT::f32…
		joshua-arch1AuthorUnsubmitted Done Reply Inline Actions Since we use also libcall in the lowerFP_TO_BF16 function, I don't think we need to use "Libcall" here for SoftABI. joshua-arch1: Since we use also libcall in the lowerFP_TO_BF16 function, I don't think we need to use…
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);		setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);

if (Subtarget.hasStdExtZfa())		if (Subtarget.hasStdExtZfa())
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);		setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
}		}

if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())		if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
setOperationAction(ISD::BITCAST, MVT::i32, Custom);		setOperationAction(ISD::BITCAST, MVT::i32, Custom);
Show All 18 Lines	if (Subtarget.hasStdExtDOrZdinx()) {
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);		setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);		setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);		setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);		setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);		setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setOperationAction(FPOpToExpand, MVT::f64, Expand);		setOperationAction(FPOpToExpand, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);		setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);		setTruncStoreAction(MVT::f64, MVT::f16, Expand);
		setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
		setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
		setOperationAction(ISD::FP_TO_BF16, MVT::f64, Custom);
		setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);		setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
}		}

if (Subtarget.is64Bit()) {		if (Subtarget.is64Bit()) {
setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,		setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},		ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
MVT::i32, Custom);		MVT::i32, Custom);
setOperationAction(ISD::LROUND, MVT::i32, Custom);		setOperationAction(ISD::LROUND, MVT::i32, Custom);
▲ Show 20 Lines • Show All 1,949 Lines • ▼ Show 20 Lines	Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
Res, VL);		Res, VL);

if (DstVT.isFixedLengthVector())		if (DstVT.isFixedLengthVector())
Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);		Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);

return Res;		return Res;
}		}

		SDValue RISCVTargetLowering::lowerFP_TO_BF16(SDValue Op,
		SelectionDAG &DAG) const {
		SDLoc DL(Op);
		craig.topperUnsubmitted Done Reply Inline Actions This is an incorrect conversion. We can't truncate the mantissa. It would turn some nan encodings into infinity. Probably other issues I haven't thought of yet. craig.topper: This is an incorrect conversion. We can't truncate the mantissa. It would turn some nan…
		craig.topperUnsubmitted Done Reply Inline Actions We need to use `truncsfbf2` craig.topper: We need to use `truncsfbf2`
		MakeLibCallOptions CallOptions;
		RTLIB::Libcall LC =
		RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
		SDValue Res =
		makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
		if (Subtarget.is64Bit())
		return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);

		return DAG.getBitcast(MVT::i32, Res);
		}

		SDValue RISCVTargetLowering::lowerBF16_TO_FP(SDValue Op,
		SelectionDAG &DAG) const {
		// Always expand bf16 to f32 casts, they lower to ext + shift.
		//
		// Note that the operand of this code can be bf16 or an integer type in case
		// bf16 is not supported on the target and was softened.
		SDLoc DL(Op);
		SDValue Op0;
		SDValue Res;
		craig.topperUnsubmitted Done Reply Inline Actions If the libcall doesn't exist, you can cast it to i16, any_extend to i32, and shift it left by 16 and bitcast it to f32. craig.topper: If the libcall doesn't exist, you can cast it to i16, any_extend to i32, and shift it left by…
		craig.topperUnsubmitted Done Reply Inline Actions Which is what the default lowering in LegalizeDAG.cpp does. craig.topper: Which is what the default lowering in LegalizeDAG.cpp does.
		joshua-arch1AuthorUnsubmitted Done Reply Inline Actions We cannot directly use the default lowering in LegalizeDAG.cpp. It uses a bitcast from i32 to f32. However, i32 is not a legal type in RV64 backend. joshua-arch1: We cannot directly use the default lowering in LegalizeDAG.cpp. It uses a bitcast from i32 to…
		MVT XLenVT = Subtarget.getXLenVT();
		if (Op.getOperand(0).getValueType() == MVT::bf16) {
		Op0 =
		craig.topperUnsubmitted Not Done Reply Inline Actions Why wasn't this part needed in D151663? craig.topper: Why wasn't this part needed in D151663?
		joshua-arch1AuthorUnsubmitted Done Reply Inline Actions Although bf16 type isn't legal, that does not mean this condition will never happen. joshua-arch1: Although bf16 type isn't legal, that does not mean this condition will never happen.
		DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT,
		DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0)));
		} else {
		Op0 = Op.getOperand(0);
		}

		SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, Op0,
		DAG.getShiftAmountConstant(16, XLenVT, DL));
		craig.topperUnsubmitted Done Reply Inline Actions No need for else after an `if` that returns craig.topper: No need for else after an `if` that returns

		if (Subtarget.is64Bit()) {
		Res = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Shift);
		} else {
		Res = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Shift);
		}

		// Add fp_extend in case the output is bigger than f32.
		if (Op.getValueType() != MVT::f32)
		return DAG.getNode(ISD::FP_EXTEND, DL, Op.getValueType(), Res);

		return Res;
		}

static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {		static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
switch (Opc) {		switch (Opc) {
case ISD::FROUNDEVEN:		case ISD::FROUNDEVEN:
		craig.topperUnsubmitted Not Done Reply Inline Actions Doesn't this need to be i64 on 64 bit target? craig.topper: Doesn't this need to be i64 on 64 bit target?
case ISD::STRICT_FROUNDEVEN:		case ISD::STRICT_FROUNDEVEN:
		craig.topperUnsubmitted Not Done Reply Inline Actions This creates an MVT::i16 after type legalization which is illegal. craig.topper: This creates an MVT::i16 after type legalization which is illegal.
		joshua-arch1AuthorUnsubmitted Done Reply Inline Actions In fact, without Zfbfmin enabled, bf16 type isn't legal and this condition will never happen. If Zfbfmin is enabled, maybe we can use FCVT.S.BF16 to directly convert BF16 value to an FP32 value. joshua-arch1: In fact, without Zfbfmin enabled, bf16 type isn't legal and this condition will never happen.
case ISD::VP_FROUNDEVEN:		case ISD::VP_FROUNDEVEN:
return RISCVFPRndMode::RNE;		return RISCVFPRndMode::RNE;
case ISD::FTRUNC:		case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:		case ISD::STRICT_FTRUNC:
case ISD::VP_FROUNDTOZERO:		case ISD::VP_FROUNDTOZERO:
return RISCVFPRndMode::RTZ;		return RISCVFPRndMode::RTZ;
		craig.topperUnsubmitted Done Reply Inline Actions You share the shift code between both paths by using XLenVT. craig.topper: You share the shift code between both paths by using XLenVT.
case ISD::FFLOOR:		case ISD::FFLOOR:
case ISD::STRICT_FFLOOR:		case ISD::STRICT_FFLOOR:
		craig.topperUnsubmitted Done Reply Inline Actions Use getShiftAmountConstant craig.topper: Use getShiftAmountConstant
case ISD::VP_FFLOOR:		case ISD::VP_FFLOOR:
return RISCVFPRndMode::RDN;		return RISCVFPRndMode::RDN;
case ISD::FCEIL:		case ISD::FCEIL:
case ISD::STRICT_FCEIL:		case ISD::STRICT_FCEIL:
case ISD::VP_FCEIL:		case ISD::VP_FCEIL:
return RISCVFPRndMode::RUP;		return RISCVFPRndMode::RUP;
case ISD::FROUND:		case ISD::FROUND:
case ISD::STRICT_FROUND:		case ISD::STRICT_FROUND:
case ISD::VP_FROUND:		case ISD::VP_FROUND:
return RISCVFPRndMode::RMM;		return RISCVFPRndMode::RMM;
case ISD::FRINT:		case ISD::FRINT:
return RISCVFPRndMode::DYN;		return RISCVFPRndMode::DYN;
}		}

		craig.topperUnsubmitted Done Reply Inline Actions No need for `else` after `if` returns craig.topper: No need for `else` after `if` returns
return RISCVFPRndMode::Invalid;		return RISCVFPRndMode::Invalid;
}		}

// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND		// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to		// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
// the integer domain and back. Taking care to avoid converting values that are		// the integer domain and back. Taking care to avoid converting values that are
// nan or already correct.		// nan or already correct.
static SDValue		static SDValue
▲ Show 20 Lines • Show All 2,332 Lines • ▼ Show 20 Lines	if (IsStrict) {
return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);		return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
}		}
Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);		Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
return convertFromScalableVector(VT, Src, DAG, Subtarget);		return convertFromScalableVector(VT, Src, DAG, Subtarget);
}		}
case ISD::FP_TO_SINT_SAT:		case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:		case ISD::FP_TO_UINT_SAT:
return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);		return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
		case ISD::FP_TO_BF16:
		return lowerFP_TO_BF16(Op, DAG);
		case ISD::BF16_TO_FP:
		return lowerBF16_TO_FP(Op, DAG);
case ISD::FTRUNC:		case ISD::FTRUNC:
case ISD::FCEIL:		case ISD::FCEIL:
case ISD::FFLOOR:		case ISD::FFLOOR:
case ISD::FNEARBYINT:		case ISD::FNEARBYINT:
case ISD::FRINT:		case ISD::FRINT:
case ISD::FROUND:		case ISD::FROUND:
case ISD::FROUNDEVEN:		case ISD::FROUNDEVEN:
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);		return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
▲ Show 20 Lines • Show All 11,281 Lines • ▼ Show 20 Lines	bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
return Subtarget.enableUnalignedVectorMem();		return Subtarget.enableUnalignedVectorMem();
}		}

bool RISCVTargetLowering::splitValueIntoRegisterParts(		bool RISCVTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,		SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {		unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.has_value();		bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();		EVT ValueVT = Val.getValueType();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {		if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
// Cast the f16 to i16, extend to i32, pad with ones to make a float nan,		PartVT == MVT::f32) {
		// Cast the f16/bf16 to i16, extend to i32, pad with ones to make a float nan,
// and cast to f32.		// and cast to f32.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);		Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);		Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,		Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
DAG.getConstant(0xFFFF0000, DL, MVT::i32));		DAG.getConstant(0xFFFF0000, DL, MVT::i32));
Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);		Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
Parts[0] = Val;		Parts[0] = Val;
return true;		return true;
Show All 35 Lines	bool RISCVTargetLowering::splitValueIntoRegisterParts(
}		}
return false;		return false;
}		}

SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(		SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,		SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {		MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.has_value();		bool IsABIRegCopy = CC.has_value();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {		if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
		PartVT == MVT::f32) {
SDValue Val = Parts[0];		SDValue Val = Parts[0];

// Cast the f32 to i32, truncate to i16, and cast back to f16.		// Cast the f32 to i32, truncate to i16, and cast back to f16/bf16.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);		Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);		Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);		Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
return Val;		return Val;
}		}

if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {		if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
LLVMContext &Context = *DAG.getContext();		LLVMContext &Context = *DAG.getContext();
SDValue Val = Parts[0];		SDValue Val = Parts[0];
EVT ValueEltVT = ValueVT.getVectorElementType();		EVT ValueEltVT = ValueVT.getVectorElementType();
EVT PartEltVT = PartVT.getVectorElementType();		EVT PartEltVT = PartVT.getVectorElementType();
▲ Show 20 Lines • Show All 289 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/bfloat.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				craig.topperUnsubmitted Not Done Reply Inline Actions This file already exists in trunk. Not sure if is the same or not? craig.topper: This file already exists in trunk. Not sure if is the same or not?
				; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs\| FileCheck %s --check-prefixes=RV64
				; RUN: llc < %s -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs\| FileCheck %s --check-prefixes=RV32

				define void @add(ptr %pa, ptr %pb, ptr %pc) nounwind {
				; RV64-LABEL: add:
				; RV64: # %bb.0:
				; RV64-NEXT: addi sp, sp, -16
				; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
				; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
				; RV64-NEXT: lhu a1, 0(a1)
				; RV64-NEXT: lhu a0, 0(a0)
				; RV64-NEXT: mv s0, a2
				; RV64-NEXT: slli a1, a1, 16
				; RV64-NEXT: fmv.w.x fa5, a1
				; RV64-NEXT: slli a0, a0, 16
				; RV64-NEXT: fmv.w.x fa4, a0
				; RV64-NEXT: fadd.s fa0, fa4, fa5
				; RV64-NEXT: call __truncsfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: sh a0, 0(s0)
				; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
				; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
				; RV64-NEXT: addi sp, sp, 16
				; RV64-NEXT: ret
				;
				; RV32-LABEL: add:
				; RV32: # %bb.0:
				; RV32-NEXT: addi sp, sp, -16
				; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
				; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
				; RV32-NEXT: lhu a1, 0(a1)
				; RV32-NEXT: lhu a0, 0(a0)
				; RV32-NEXT: mv s0, a2
				; RV32-NEXT: slli a1, a1, 16
				; RV32-NEXT: fmv.w.x fa5, a1
				; RV32-NEXT: slli a0, a0, 16
				; RV32-NEXT: fmv.w.x fa4, a0
				; RV32-NEXT: fadd.s fa0, fa4, fa5
				; RV32-NEXT: call __truncsfbf2@plt
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: sh a0, 0(s0)
				; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
				; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
				; RV32-NEXT: addi sp, sp, 16
				; RV32-NEXT: ret
				%a = load bfloat, ptr %pa
				%b = load bfloat, ptr %pb
				%add = fadd bfloat %a, %b
				store bfloat %add, ptr %pc
				ret void
				}

				define bfloat @add2(bfloat %a, bfloat %b) nounwind {
				; RV64-LABEL: add2:
				; RV64: # %bb.0:
				; RV64-NEXT: addi sp, sp, -16
				; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: lui a1, 16
				; RV64-NEXT: addiw a1, a1, -1
				; RV64-NEXT: and a0, a0, a1
				; RV64-NEXT: fmv.x.w a2, fa1
				; RV64-NEXT: and a1, a2, a1
				; RV64-NEXT: slli a1, a1, 16
				; RV64-NEXT: fmv.w.x fa5, a1
				; RV64-NEXT: slli a0, a0, 16
				; RV64-NEXT: fmv.w.x fa4, a0
				; RV64-NEXT: fadd.s fa0, fa4, fa5
				; RV64-NEXT: call __truncsfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: lui a1, 1048560
				; RV64-NEXT: or a0, a0, a1
				; RV64-NEXT: fmv.w.x fa0, a0
				; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
				; RV64-NEXT: addi sp, sp, 16
				; RV64-NEXT: ret
				;
				; RV32-LABEL: add2:
				; RV32: # %bb.0:
				; RV32-NEXT: addi sp, sp, -16
				; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: fmv.x.w a1, fa1
				; RV32-NEXT: slli a1, a1, 16
				; RV32-NEXT: fmv.w.x fa5, a1
				; RV32-NEXT: slli a0, a0, 16
				; RV32-NEXT: fmv.w.x fa4, a0
				; RV32-NEXT: fadd.s fa0, fa4, fa5
				; RV32-NEXT: call __truncsfbf2@plt
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: lui a1, 1048560
				; RV32-NEXT: or a0, a0, a1
				; RV32-NEXT: fmv.w.x fa0, a0
				; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
				; RV32-NEXT: addi sp, sp, 16
				; RV32-NEXT: ret
				%add = fadd bfloat %a, %b
				ret bfloat %add
				}

				define void @add_constant(ptr %pa, ptr %pc) nounwind {
				; RV64-LABEL: add_constant:
				; RV64: # %bb.0:
				; RV64-NEXT: addi sp, sp, -16
				; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
				; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
				; RV64-NEXT: lhu a0, 0(a0)
				; RV64-NEXT: mv s0, a1
				; RV64-NEXT: slli a0, a0, 16
				; RV64-NEXT: fmv.w.x fa5, a0
				; RV64-NEXT: lui a0, 260096
				; RV64-NEXT: fmv.w.x fa4, a0
				; RV64-NEXT: fadd.s fa0, fa5, fa4
				; RV64-NEXT: call __truncsfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: sh a0, 0(s0)
				; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
				; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
				; RV64-NEXT: addi sp, sp, 16
				; RV64-NEXT: ret
				;
				; RV32-LABEL: add_constant:
				; RV32: # %bb.0:
				; RV32-NEXT: addi sp, sp, -16
				; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
				; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
				; RV32-NEXT: lhu a0, 0(a0)
				; RV32-NEXT: mv s0, a1
				; RV32-NEXT: slli a0, a0, 16
				; RV32-NEXT: fmv.w.x fa5, a0
				; RV32-NEXT: lui a0, 260096
				; RV32-NEXT: fmv.w.x fa4, a0
				; RV32-NEXT: fadd.s fa0, fa5, fa4
				; RV32-NEXT: call __truncsfbf2@plt
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: sh a0, 0(s0)
				; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
				; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
				; RV32-NEXT: addi sp, sp, 16
				; RV32-NEXT: ret
				%a = load bfloat, ptr %pa
				%add = fadd bfloat %a, 1.0
				store bfloat %add, ptr %pc
				ret void
				}

				define bfloat @add_constant2(bfloat %a) nounwind {
				; RV64-LABEL: add_constant2:
				; RV64: # %bb.0:
				; RV64-NEXT: addi sp, sp, -16
				; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: slli a0, a0, 48
				; RV64-NEXT: srli a0, a0, 48
				; RV64-NEXT: slli a0, a0, 16
				; RV64-NEXT: fmv.w.x fa5, a0
				; RV64-NEXT: lui a0, 260096
				; RV64-NEXT: fmv.w.x fa4, a0
				; RV64-NEXT: fadd.s fa0, fa5, fa4
				; RV64-NEXT: call __truncsfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: lui a1, 1048560
				; RV64-NEXT: or a0, a0, a1
				; RV64-NEXT: fmv.w.x fa0, a0
				; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
				; RV64-NEXT: addi sp, sp, 16
				; RV64-NEXT: ret
				;
				; RV32-LABEL: add_constant2:
				; RV32: # %bb.0:
				; RV32-NEXT: addi sp, sp, -16
				; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: slli a0, a0, 16
				; RV32-NEXT: fmv.w.x fa5, a0
				; RV32-NEXT: lui a0, 260096
				; RV32-NEXT: fmv.w.x fa4, a0
				; RV32-NEXT: fadd.s fa0, fa5, fa4
				; RV32-NEXT: call __truncsfbf2@plt
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: lui a1, 1048560
				; RV32-NEXT: or a0, a0, a1
				; RV32-NEXT: fmv.w.x fa0, a0
				; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
				; RV32-NEXT: addi sp, sp, 16
				; RV32-NEXT: ret
				%add = fadd bfloat %a, 1.0
				ret bfloat %add
				}

				define void @store_constant(ptr %pc) nounwind {
				; RV64-LABEL: store_constant:
				; RV64: # %bb.0:
				; RV64-NEXT: lui a1, 4
				; RV64-NEXT: addiw a1, a1, -128
				; RV64-NEXT: sh a1, 0(a0)
				; RV64-NEXT: ret
				;
				; RV32-LABEL: store_constant:
				; RV32: # %bb.0:
				; RV32-NEXT: lui a1, 4
				; RV32-NEXT: addi a1, a1, -128
				; RV32-NEXT: sh a1, 0(a0)
				; RV32-NEXT: ret
				store bfloat 1.0, ptr %pc
				ret void
				}

				define void @fold_ext_trunc(ptr %pa, ptr %pc) nounwind {
				; RV64-LABEL: fold_ext_trunc:
				; RV64: # %bb.0:
				; RV64-NEXT: lh a0, 0(a0)
				; RV64-NEXT: sh a0, 0(a1)
				; RV64-NEXT: ret
				;
				; RV32-LABEL: fold_ext_trunc:
				; RV32: # %bb.0:
				; RV32-NEXT: lh a0, 0(a0)
				; RV32-NEXT: sh a0, 0(a1)
				; RV32-NEXT: ret
				%a = load bfloat, ptr %pa
				%ext = fpext bfloat %a to float
				%trunc = fptrunc float %ext to bfloat
				store bfloat %trunc, ptr %pc
				ret void
				}

				define bfloat @fold_ext_trunc2(bfloat %a) nounwind {
				; RV64-LABEL: fold_ext_trunc2:
				; RV64: # %bb.0:
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: lui a1, 1048560
				; RV64-NEXT: or a0, a0, a1
				; RV64-NEXT: fmv.w.x fa0, a0
				; RV64-NEXT: ret
				;
				; RV32-LABEL: fold_ext_trunc2:
				; RV32: # %bb.0:
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: lui a1, 1048560
				; RV32-NEXT: or a0, a0, a1
				; RV32-NEXT: fmv.w.x fa0, a0
				; RV32-NEXT: ret
				%ext = fpext bfloat %a to float
				%trunc = fptrunc float %ext to bfloat
				ret bfloat %trunc
				}

				define void @add_double(ptr %pa, ptr %pb, ptr %pc) nounwind {
				; RV64-LABEL: add_double:
				; RV64: # %bb.0:
				; RV64-NEXT: addi sp, sp, -32
				; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
				; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
				; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
				; RV64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
				; RV64-NEXT: fld fa0, 0(a0)
				; RV64-NEXT: mv s0, a2
				; RV64-NEXT: mv s1, a1
				; RV64-NEXT: call __truncdfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: fld fa0, 0(s1)
				; RV64-NEXT: lui a1, 16
				; RV64-NEXT: addiw s1, a1, -1
				; RV64-NEXT: and s2, a0, s1
				; RV64-NEXT: call __truncdfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: and a0, a0, s1
				; RV64-NEXT: slli a0, a0, 16
				; RV64-NEXT: fmv.w.x fa5, a0
				; RV64-NEXT: slli s2, s2, 16
				; RV64-NEXT: fmv.w.x fa4, s2
				; RV64-NEXT: fadd.s fa0, fa4, fa5
				; RV64-NEXT: call __truncsfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: and a0, a0, s1
				; RV64-NEXT: slli a0, a0, 16
				; RV64-NEXT: fmv.w.x fa5, a0
				; RV64-NEXT: fcvt.d.s fa5, fa5
				; RV64-NEXT: fsd fa5, 0(s0)
				; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
				; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
				; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
				; RV64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
				; RV64-NEXT: addi sp, sp, 32
				; RV64-NEXT: ret
				;
				; RV32-LABEL: add_double:
				; RV32: # %bb.0:
				; RV32-NEXT: addi sp, sp, -16
				; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
				; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
				; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
				; RV32-NEXT: fld fa0, 0(a0)
				; RV32-NEXT: mv s0, a2
				; RV32-NEXT: mv s1, a1
				; RV32-NEXT: call __truncdfbf2@plt
				; RV32-NEXT: fld fa5, 0(s1)
				; RV32-NEXT: fmv.x.w s1, fa0
				; RV32-NEXT: fmv.d fa0, fa5
				; RV32-NEXT: call __truncdfbf2@plt
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: slli a0, a0, 16
				; RV32-NEXT: fmv.w.x fa5, a0
				; RV32-NEXT: slli s1, s1, 16
				; RV32-NEXT: fmv.w.x fa4, s1
				; RV32-NEXT: fadd.s fa0, fa4, fa5
				; RV32-NEXT: call __truncsfbf2@plt
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: slli a0, a0, 16
				; RV32-NEXT: fmv.w.x fa5, a0
				; RV32-NEXT: fcvt.d.s fa5, fa5
				; RV32-NEXT: fsd fa5, 0(s0)
				; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
				; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
				; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
				; RV32-NEXT: addi sp, sp, 16
				; RV32-NEXT: ret
				%la = load double, ptr %pa
				%a = fptrunc double %la to bfloat
				%lb = load double, ptr %pb
				%b = fptrunc double %lb to bfloat
				%add = fadd bfloat %a, %b
				%dadd = fpext bfloat %add to double
				store double %dadd, ptr %pc
				ret void
				}

				define double @add_double2(double %da, double %db) nounwind {
				; RV64-LABEL: add_double2:
				; RV64: # %bb.0:
				; RV64-NEXT: addi sp, sp, -32
				; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
				; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
				; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
				; RV64-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
				; RV64-NEXT: fmv.d fs0, fa1
				; RV64-NEXT: call __truncdfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: lui a1, 16
				; RV64-NEXT: addiw s0, a1, -1
				; RV64-NEXT: and s1, a0, s0
				; RV64-NEXT: fmv.d fa0, fs0
				; RV64-NEXT: call __truncdfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: and a0, a0, s0
				; RV64-NEXT: slli a0, a0, 16
				; RV64-NEXT: fmv.w.x fa5, a0
				; RV64-NEXT: slli s1, s1, 16
				; RV64-NEXT: fmv.w.x fa4, s1
				; RV64-NEXT: fadd.s fa0, fa4, fa5
				; RV64-NEXT: call __truncsfbf2@plt
				; RV64-NEXT: fmv.x.w a0, fa0
				; RV64-NEXT: and a0, a0, s0
				; RV64-NEXT: slli a0, a0, 16
				; RV64-NEXT: fmv.w.x fa5, a0
				; RV64-NEXT: fcvt.d.s fa0, fa5
				; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
				; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
				; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
				; RV64-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
				; RV64-NEXT: addi sp, sp, 32
				; RV64-NEXT: ret
				;
				; RV32-LABEL: add_double2:
				; RV32: # %bb.0:
				; RV32-NEXT: addi sp, sp, -16
				; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
				; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
				; RV32-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
				; RV32-NEXT: fmv.d fs0, fa1
				; RV32-NEXT: call __truncdfbf2@plt
				; RV32-NEXT: fmv.x.w s0, fa0
				; RV32-NEXT: fmv.d fa0, fs0
				; RV32-NEXT: call __truncdfbf2@plt
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: slli a0, a0, 16
				; RV32-NEXT: fmv.w.x fa5, a0
				; RV32-NEXT: slli s0, s0, 16
				; RV32-NEXT: fmv.w.x fa4, s0
				; RV32-NEXT: fadd.s fa0, fa4, fa5
				; RV32-NEXT: call __truncsfbf2@plt
				; RV32-NEXT: fmv.x.w a0, fa0
				; RV32-NEXT: slli a0, a0, 16
				; RV32-NEXT: fmv.w.x fa5, a0
				; RV32-NEXT: fcvt.d.s fa0, fa5
				; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
				; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
				; RV32-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
				; RV32-NEXT: addi sp, sp, 16
				; RV32-NEXT: ret
				%a = fptrunc double %da to bfloat
				%b = fptrunc double %db to bfloat
				%add = fadd bfloat %a, %b
				%dadd = fpext bfloat %add to double
				ret double %dadd
				}

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV][BF16] Make backend type bf16 to follow the psABI
AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 525963

llvm/lib/Target/RISCV/RISCVISelLowering.h

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/bfloat.ll

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV][BF16] Make backend type bf16 to follow the psABIAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 525963

llvm/lib/Target/RISCV/RISCVISelLowering.h

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/bfloat.ll

[RISCV][BF16] Make backend type bf16 to follow the psABI
AbandonedPublic