diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -157,6 +157,11 @@ } break; } + case RISCVISD::READ_CYCLE_WIDE: + ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ReadCycleWide, DL, MVT::i32, + MVT::i32, MVT::Other, + Node->getOperand(0))); + return; } // Select the default instruction. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -48,7 +48,10 @@ // This is a more convenient semantic for producing dagcombines that remove // unnecessary GPR->FPR->GPR moves. FMV_W_X_RV64, - FMV_X_ANYEXTW_RV64 + FMV_X_ANYEXTW_RV64, + // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target + // (returns (Lo, Hi)). It takes a chain operand. + READ_CYCLE_WIDE }; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -180,6 +180,10 @@ setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); + // READCYCLECOUNTER will use RDCYCLE[H] + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, + Subtarget.is64Bit() ? Legal : Custom); + if (Subtarget.hasStdExtA()) { setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); setMinCmpXchgSizeInBits(32); @@ -836,6 +840,16 @@ switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom type legalize this operation!"); + case ISD::READCYCLECOUNTER: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue RTB = + DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); + + Results.push_back(RTB); + Results.push_back(RTB.getValue(1)); + Results.push_back(RTB.getValue(2)); + break; + } case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -1034,6 +1048,68 @@ return 1; } +MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, + MachineBasicBlock *BB) { + assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); + + // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. + // Should the count have wrapped while it was being read, we need to try + // again. + // ... + // read: + // rdcycleh x3 # load high word of cycle + // rdcycle x2 # load low word of cycle + // rdcycleh x4 # load high word of cycle + // bne x3, x4, read # check if high word reads match, otherwise try again + // ... + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = ++BB->getIterator(); + MachineFunction &MF = *BB->getParent(); + + MachineBasicBlock *readMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF.CreateMachineBasicBlock(LLVM_BB); + DebugLoc dl = MI.getDebugLoc(); + MF.insert(It, readMBB); + MF.insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(readMBB); + BB = readMBB; + + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + unsigned LoReg = MI.getOperand(0).getReg(); + unsigned HiReg = MI.getOperand(1).getReg(); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + BuildMI(BB, dl, TII->get(RISCV::CSRRS), HiReg) + .addImm(0xC80) + .addReg(RISCV::X0); + BuildMI(BB, dl, TII->get(RISCV::CSRRS), LoReg) + .addImm(0xC00) + .addReg(RISCV::X0); + BuildMI(BB, dl, TII->get(RISCV::CSRRS), ReadAgainReg) + .addImm(0xC80) + .addReg(RISCV::X0); + + BuildMI(BB, dl, TII->get(RISCV::BNE)) + .addReg(HiReg) + .addReg(ReadAgainReg) + .addMBB(readMBB); + + BB->addSuccessor(readMBB); + BB->addSuccessor(sinkMBB); + + MI.eraseFromParent(); + + return BB; +} + static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB) { assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); @@ -1237,6 +1313,8 @@ switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); + case RISCV::ReadCycleWide: + return emitReadCycleWidePseudo(MI, BB); case RISCV::Select_GPR_Using_CC_GPR: case RISCV::Select_FPR32_Using_CC_GPR: case RISCV::Select_FPR64_Using_CC_GPR: @@ -2306,6 +2384,8 @@ return "RISCVISD::FMV_W_X_RV64"; case RISCVISD::FMV_X_ANYEXTW_RV64: return "RISCVISD::FMV_X_ANYEXTW_RV64"; + case RISCVISD::READ_CYCLE_WIDE: + return "RISCVISD::READ_CYCLE_WIDE"; } return nullptr; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1056,6 +1056,15 @@ defm : StPat; } // Predicates = [IsRV64] +/// readcyclecounter +// On RV64, we can directly use "rdcycle" +let Predicates = [IsRV64] in +def : Pat<(readcyclecounter), (CSRRS 0xC00, X0)>; +// On RV32, ReadCycleWide is inserted and expanded to the suggested rdcycle[h] loop. +let Predicates = [IsRV32], usesCustomInserter = 1, hasSideEffects = 0, +mayLoad = 0, mayStore = 0, hasNoSchedulingInfo = 1 in +def ReadCycleWide : Pseudo<(outs GPR:$lo, GPR:$hi), (ins), [], "#ReadCycleWide", "($hi, $lo)">; + //===----------------------------------------------------------------------===// // Standard extensions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/readcyclecounter.ll b/llvm/test/CodeGen/RISCV/readcyclecounter.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/readcyclecounter.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; Verify that we lower @llvm.readcyclecounter() correctly. + +declare i64 @llvm.readcyclecounter() + +define i64 @test_builtin_readcyclecounter() nounwind { +; RV32I-LABEL: test_builtin_readcyclecounter: +; RV32I: # %bb.0: +; RV32I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: rdcycleh a1 +; RV32I-NEXT: rdcycle a0 +; RV32I-NEXT: rdcycleh a2 +; RV32I-NEXT: bne a1, a2, .LBB0_1 +; RV32I-NEXT: # %bb.2: +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_builtin_readcyclecounter: +; RV64I: # %bb.0: +; RV64I-NEXT: rdcycle a0 +; RV64I-NEXT: ret + %1 = tail call i64 @llvm.readcyclecounter() + ret i64 %1 +}