diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9560,6 +9560,111 @@ return BB; } +static MachineBasicBlock * +EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, + MachineBasicBlock *ThisMBB, + const RISCVSubtarget &Subtarget) { + // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) + // Without this, custom-inserter would have generated: + // + // A + // | \ + // | B + // | / + // C + // | \ + // | D + // | / + // E + // + // A: X = ...; Y = ... + // B: empty + // C: Z = PHI [X, A], [Y, B] + // D: empty + // E: PHI [X, C], [Z, D] + // + // If we lower both Select_FPRX_ in a single step, we can instead generate: + // + // A + // | \ + // | C + // | /| + // |/ | + // | | + // | D + // | / + // E + // + // A: X = ...; Y = ... + // D: empty + // E: PHI [X, A], [X, C], [Y, D] + + const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); + const DebugLoc &DL = First.getDebugLoc(); + const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); + MachineFunction *F = ThisMBB->getParent(); + MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++ThisMBB->getIterator(); + F->insert(It, FirstMBB); + F->insert(It, SecondMBB); + F->insert(It, SinkMBB); + + // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. + SinkMBB->splice(SinkMBB->begin(), ThisMBB, + std::next(MachineBasicBlock::iterator(First)), + ThisMBB->end()); + SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); + + // Fallthrough block for ThisMBB. + ThisMBB->addSuccessor(FirstMBB); + // Fallthrough block for FirstMBB. + FirstMBB->addSuccessor(SecondMBB); + ThisMBB->addSuccessor(SinkMBB); + FirstMBB->addSuccessor(SinkMBB); + // This is fallthrough. + SecondMBB->addSuccessor(SinkMBB); + + auto FirstCC = static_cast(First.getOperand(3).getImm()); + Register fLHS = First.getOperand(1).getReg(); + Register fRHS = First.getOperand(2).getReg(); + // Insert appropriate branch. + BuildMI(ThisMBB, DL, TII.getBrCond(FirstCC)) + .addReg(fLHS) + .addReg(fRHS) + .addMBB(SinkMBB); + BuildMI(ThisMBB, DL, TII.get(RISCV::PseudoBR)).addMBB(FirstMBB); + + Register sLHS = Second.getOperand(1).getReg(); + Register sRHS = Second.getOperand(2).getReg(); + Register Op1Reg4 = First.getOperand(4).getReg(); + Register Op1Reg5 = First.getOperand(5).getReg(); + + // Insert appropriate branch. + BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC)) + .addReg(sLHS) + .addReg(sRHS) + .addMBB(SinkMBB); + + BuildMI(FirstMBB, DL, TII.get(RISCV::PseudoBR)).addMBB(SecondMBB); + + Register DestReg = Second.getOperand(0).getReg(); + Register Op2Reg4 = Second.getOperand(4).getReg(); + BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) + .addReg(Op1Reg4) + .addMBB(ThisMBB) + .addReg(Op2Reg4) + .addMBB(FirstMBB) + .addReg(Op1Reg5) + .addMBB(SecondMBB); + + // Now remove the Select_FPRX_s. + First.eraseFromParent(); + Second.eraseFromParent(); + return SinkMBB; +} + static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget) { @@ -9587,6 +9692,10 @@ // previous selects in the sequence. // These conditions could be further relaxed. See the X86 target for a // related approach and more information. + // + // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) + // is checked here and handled by a separate function - + // EmitLoweredCascadedSelect. Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); auto CC = static_cast(MI.getOperand(3).getImm()); @@ -9596,6 +9705,14 @@ SelectDests.insert(MI.getOperand(0).getReg()); MachineInstr *LastSelectPseudo = &MI; + MachineBasicBlock::iterator NextMIIt = MachineBasicBlock::iterator(MI); + ++NextMIIt; + if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && + NextMIIt->getOpcode() == MI.getOpcode() && NextMIIt != BB->end() && + NextMIIt->getOperand(5).getReg() == MI.getOperand(0).getReg() && + NextMIIt->getOperand(5).isKill()) { + return EmitLoweredCascadedSelect(MI, *NextMIIt, BB, Subtarget); + } for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); SequenceMBBI != E; ++SequenceMBBI) { diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll --- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll +++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll @@ -533,3 +533,79 @@ %ret = add i32 %cond1, %cond2 ret i32 %ret } + +define dso_local float @CascadedSelect(float noundef %a) local_unnamed_addr #0 { +; RV32I-LABEL: CascadedSelect: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: fmv.w.x ft0, a0 +; RV32I-NEXT: fmv.w.x ft1, zero +; RV32I-NEXT: flt.s a0, ft0, ft1 +; RV32I-NEXT: bnez a0, .LBB8_3 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: lui a0, %hi(.LCPI8_0) +; RV32I-NEXT: flw ft1, %lo(.LCPI8_0)(a0) +; RV32I-NEXT: flt.s a0, ft1, ft0 +; RV32I-NEXT: bnez a0, .LBB8_3 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: fmv.s ft1, ft0 +; RV32I-NEXT: .LBB8_3: # %entry +; RV32I-NEXT: fmv.x.w a0, ft1 +; RV32I-NEXT: ret +; +; RV32IBT-LABEL: CascadedSelect: +; RV32IBT: # %bb.0: # %entry +; RV32IBT-NEXT: fmv.w.x ft0, a0 +; RV32IBT-NEXT: fmv.w.x ft1, zero +; RV32IBT-NEXT: flt.s a0, ft0, ft1 +; RV32IBT-NEXT: bnez a0, .LBB8_3 +; RV32IBT-NEXT: # %bb.1: # %entry +; RV32IBT-NEXT: lui a0, %hi(.LCPI8_0) +; RV32IBT-NEXT: flw ft1, %lo(.LCPI8_0)(a0) +; RV32IBT-NEXT: flt.s a0, ft1, ft0 +; RV32IBT-NEXT: bnez a0, .LBB8_3 +; RV32IBT-NEXT: # %bb.2: # %entry +; RV32IBT-NEXT: fmv.s ft1, ft0 +; RV32IBT-NEXT: .LBB8_3: # %entry +; RV32IBT-NEXT: fmv.x.w a0, ft1 +; RV32IBT-NEXT: ret +; +; RV64I-LABEL: CascadedSelect: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: fmv.w.x ft0, a0 +; RV64I-NEXT: fmv.w.x ft1, zero +; RV64I-NEXT: flt.s a0, ft0, ft1 +; RV64I-NEXT: bnez a0, .LBB8_3 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: lui a0, %hi(.LCPI8_0) +; RV64I-NEXT: flw ft1, %lo(.LCPI8_0)(a0) +; RV64I-NEXT: flt.s a0, ft1, ft0 +; RV64I-NEXT: bnez a0, .LBB8_3 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: fmv.s ft1, ft0 +; RV64I-NEXT: .LBB8_3: # %entry +; RV64I-NEXT: fmv.x.w a0, ft1 +; RV64I-NEXT: ret +; +; RV64IBT-LABEL: CascadedSelect: +; RV64IBT: # %bb.0: # %entry +; RV64IBT-NEXT: fmv.w.x ft0, a0 +; RV64IBT-NEXT: fmv.w.x ft1, zero +; RV64IBT-NEXT: flt.s a0, ft0, ft1 +; RV64IBT-NEXT: bnez a0, .LBB8_3 +; RV64IBT-NEXT: # %bb.1: # %entry +; RV64IBT-NEXT: lui a0, %hi(.LCPI8_0) +; RV64IBT-NEXT: flw ft1, %lo(.LCPI8_0)(a0) +; RV64IBT-NEXT: flt.s a0, ft1, ft0 +; RV64IBT-NEXT: bnez a0, .LBB8_3 +; RV64IBT-NEXT: # %bb.2: # %entry +; RV64IBT-NEXT: fmv.s ft1, ft0 +; RV64IBT-NEXT: .LBB8_3: # %entry +; RV64IBT-NEXT: fmv.x.w a0, ft1 +; RV64IBT-NEXT: ret +entry: + %cmp = fcmp ogt float %a, 1.000000e+00 + %cmp1 = fcmp olt float %a, 0.000000e+00 + %.a = select i1 %cmp1, float 0.000000e+00, float %a + %retval.0 = select i1 %cmp, float 1.000000e+00, float %.a + ret float %retval.0 +}