diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -383,6 +383,9 @@ EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; + void AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8015,6 +8015,22 @@ } } +void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const { + // Add FRM dependency to any instructions with dynamic rounding mode. + unsigned Opc = MI.getOpcode(); + auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm); + if (Idx < 0) + return; + if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) + return; + // If the instruction already reads FRM, don't add another read. + if (MI.readsRegister(RISCV::FRM)) + return; + MI.addOperand( + MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); +} + // Calling Convention Implementation. // The expectations for frontend ABI lowering vary from target to target. // Ideally, an LLVM frontend would be able to avoid worrying about many ABI diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -277,14 +277,14 @@ bits<5> rs3; bits<5> rs2; bits<5> rs1; - bits<3> funct3; + bits<3> frm; bits<5> rd; let Inst{31-27} = rs3; let Inst{26-25} = funct2; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = funct3; + let Inst{14-12} = frm; let Inst{11-7} = rd; let Opcode = opcode.Value; } @@ -312,13 +312,13 @@ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> rs2; bits<5> rs1; - bits<3> funct3; + bits<3> frm; bits<5> rd; let Inst{31-25} = funct7; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = funct3; + let Inst{14-12} = frm; let Inst{11-7} = rd; let Opcode = opcode.Value; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -18,6 +18,7 @@ #include "llvm/IR/DiagnosticInfo.h" #define GET_INSTRINFO_HEADER +#define GET_INSTRINFO_OPERAND_ENUM #include "RISCVGenInstrInfo.inc" namespace llvm { @@ -181,6 +182,10 @@ }; namespace RISCV { + +// Implemented in RISCVGenInstrInfo.inc +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); + // Special immediate for AVL operand of V pseudo instructions to indicate VLMax. static constexpr int64_t VLMaxSentinel = -1LL; } // namespace RISCV diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -35,6 +35,7 @@ #include "RISCVGenCompressInstEmitter.inc" #define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRINFO_NAMED_OPS #include "RISCVGenInstrInfo.inc" static cl::opt<bool> PreferWholeRegisterMove( diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -59,12 +59,13 @@ // Instruction class templates //===----------------------------------------------------------------------===// -let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, + UseNamedOperandTable = 1, hasPostISelHook = 1 in class FPFMA_rrr_frm<RISCVOpcode opcode, bits<2> funct2, string opcodestr, RegisterClass rty> : RVInstR4Frm<funct2, opcode, (outs rty:$rd), - (ins rty:$rs1, rty:$rs2, rty:$rs3, frmarg:$funct3), - opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">; + (ins rty:$rs1, rty:$rs2, rty:$rs3, frmarg:$frm), + opcodestr, "$rd, $rs1, $rs2, $rs3, $frm">; class FPFMADynFrmAlias<FPFMA_rrr_frm Inst, string OpcodeStr, RegisterClass rty> @@ -77,11 +78,12 @@ : RVInstR<funct7, funct3, OPC_OP_FP, (outs rty:$rd), (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">; -let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, + UseNamedOperandTable = 1, hasPostISelHook = 1 in class FPALU_rr_frm<bits<7> funct7, string opcodestr, RegisterClass rty> : RVInstRFrm<funct7, OPC_OP_FP, (outs rty:$rd), - (ins rty:$rs1, rty:$rs2, frmarg:$funct3), opcodestr, - "$rd, $rs1, $rs2, $funct3">; + (ins rty:$rs1, rty:$rs2, frmarg:$frm), opcodestr, + "$rd, $rs1, $rs2, $frm">; class FPALUDynFrmAlias<FPALU_rr_frm Inst, string OpcodeStr, RegisterClass rty> @@ -96,12 +98,13 @@ let rs2 = rs2val; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, + UseNamedOperandTable = 1, hasPostISelHook = 1 in class FPUnaryOp_r_frm<bits<7> funct7, bits<5> rs2val, RegisterClass rdty, RegisterClass rs1ty, string opcodestr> : RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd), - (ins rs1ty:$rs1, frmarg:$funct3), opcodestr, - "$rd, $rs1, $funct3"> { + (ins rs1ty:$rs1, frmarg:$frm), opcodestr, + "$rd, $rs1, $frm"> { let rs2 = rs2val; } diff --git a/llvm/test/CodeGen/RISCV/frm-dependency.ll b/llvm/test/CodeGen/RISCV/frm-dependency.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/frm-dependency.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f,+d -stop-after=finalize-isel < %s \ +; RUN: | FileCheck -check-prefixes=RV32IF %s +; RUN: llc -mtriple=riscv64 -mattr=+f,+d -stop-after=finalize-isel < %s \ +; RUN: | FileCheck -check-prefixes=RV64IF %s + +; Make sure an implicit FRM dependency is added to instructions with dynamic +; rounding. + +define float @fadd_s(float %a, float %b) nounwind { + ; RV32IF-LABEL: name: fadd_s + ; RV32IF: bb.0 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10, $x11 + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; RV32IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] + ; RV32IF-NEXT: [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]] + ; RV32IF-NEXT: %4:fpr32 = nofpexcept FADD_S killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm + ; RV32IF-NEXT: [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %4 + ; RV32IF-NEXT: $x10 = COPY [[FMV_X_W]] + ; RV32IF-NEXT: PseudoRET implicit $x10 + ; RV64IF-LABEL: name: fadd_s + ; RV64IF: bb.0 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10, $x11 + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; RV64IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] + ; RV64IF-NEXT: [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]] + ; RV64IF-NEXT: %4:fpr32 = nofpexcept FADD_S killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm + ; RV64IF-NEXT: [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %4 + ; RV64IF-NEXT: $x10 = COPY [[FMV_X_W]] + ; RV64IF-NEXT: PseudoRET implicit $x10 + %1 = fadd float %a, %b + ret float %1 +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fmadd_s(float %a, float %b, float %c) nounwind { + ; RV32IF-LABEL: name: fmadd_s + ; RV32IF: bb.0 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10, $x11, $x12 + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x12 + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; RV32IF-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; RV32IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] + ; RV32IF-NEXT: [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]] + ; RV32IF-NEXT: [[FMV_W_X2:%[0-9]+]]:fpr32 = FMV_W_X [[COPY2]] + ; RV32IF-NEXT: %6:fpr32 = nofpexcept FMADD_S killed [[FMV_W_X2]], killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm + ; RV32IF-NEXT: [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %6 + ; RV32IF-NEXT: $x10 = COPY [[FMV_X_W]] + ; RV32IF-NEXT: PseudoRET implicit $x10 + ; RV64IF-LABEL: name: fmadd_s + ; RV64IF: bb.0 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10, $x11, $x12 + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x12 + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; RV64IF-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; RV64IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] + ; RV64IF-NEXT: [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]] + ; RV64IF-NEXT: [[FMV_W_X2:%[0-9]+]]:fpr32 = FMV_W_X [[COPY2]] + ; RV64IF-NEXT: %6:fpr32 = nofpexcept FMADD_S killed [[FMV_W_X2]], killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm + ; RV64IF-NEXT: [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %6 + ; RV64IF-NEXT: $x10 = COPY [[FMV_X_W]] + ; RV64IF-NEXT: PseudoRET implicit $x10 + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +; This uses rtz instead of dyn rounding mode so shouldn't have an FRM dependncy. +define i32 @fcvt_w_s(float %a) nounwind { + ; RV32IF-LABEL: name: fcvt_w_s + ; RV32IF: bb.0 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10 + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV32IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] + ; RV32IF-NEXT: %2:gpr = nofpexcept FCVT_W_S killed [[FMV_W_X]], 1 + ; RV32IF-NEXT: $x10 = COPY %2 + ; RV32IF-NEXT: PseudoRET implicit $x10 + ; RV64IF-LABEL: name: fcvt_w_s + ; RV64IF: bb.0 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10 + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV64IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] + ; RV64IF-NEXT: %2:gpr = nofpexcept FCVT_W_S killed [[FMV_W_X]], 1 + ; RV64IF-NEXT: $x10 = COPY %2 + ; RV64IF-NEXT: PseudoRET implicit $x10 + %1 = fptosi float %a to i32 + ret i32 %1 +} + +; This doesn't use a rounding mode since i32 can be represented exactly as a +; double. +define double @fcvt_d_w(i32 %a) nounwind { + ; RV32IF-LABEL: name: fcvt_d_w + ; RV32IF: bb.0 (%ir-block.0): + ; RV32IF-NEXT: liveins: $x10 + ; RV32IF-NEXT: {{ $}} + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV32IF-NEXT: %1:fpr64 = nofpexcept FCVT_D_W [[COPY]] + ; RV32IF-NEXT: FSD killed %1, %stack.0, 0 :: (store (s64) into %stack.0) + ; RV32IF-NEXT: [[LW:%[0-9]+]]:gpr = LW %stack.0, 0 :: (load (s32) from %stack.0, align 8) + ; RV32IF-NEXT: [[LW1:%[0-9]+]]:gpr = LW %stack.0, 4 :: (load (s32) from %stack.0 + 4, basealign 8) + ; RV32IF-NEXT: $x10 = COPY [[LW]] + ; RV32IF-NEXT: $x11 = COPY [[LW1]] + ; RV32IF-NEXT: PseudoRET implicit $x10, implicit $x11 + ; RV64IF-LABEL: name: fcvt_d_w + ; RV64IF: bb.0 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10 + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV64IF-NEXT: %1:fpr64 = nofpexcept FCVT_D_W [[COPY]] + ; RV64IF-NEXT: [[FMV_X_D:%[0-9]+]]:gpr = FMV_X_D killed %1 + ; RV64IF-NEXT: $x10 = COPY [[FMV_X_D]] + ; RV64IF-NEXT: PseudoRET implicit $x10 + %1 = sitofp i32 %a to double + ret double %1 +}