Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2128,9 +2128,7 @@ /// for use by selects and conditional branches. With multiple condition /// registers, the code generator will not aggressively sink comparisons into /// the blocks of their users. - void setHasMultipleConditionRegisters(bool hasManyRegs = true) { - HasMultipleConditionRegisters = hasManyRegs; - } + void setHasMultipleConditionRegisters(bool hasManyRegs = true); /// Tells the code generator that the target has BitExtract instructions. /// The code generator will aggressively sink "shift"s into the blocks of Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -98,6 +98,12 @@ cl::desc("Don't mutate strict-float node to a legalize node"), cl::init(false), cl::Hidden); +static cl::opt HasMultipleConditionRegistersOverride( + "has-multiple-condition-registers", cl::init(false), + cl::desc("The target has multiple condition registers and doesn't need to " + "sink compare instructions to save virtual registers."), + cl::Hidden); + static bool darwinHasSinCos(const Triple &TT) { assert(TT.isOSDarwin() && "should be called with darwin triple"); // Don't bother with 32 bit x86. @@ -700,7 +706,7 @@ MaxGluedStoresPerMemcpy = 0; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; - HasMultipleConditionRegisters = false; + HasMultipleConditionRegisters = HasMultipleConditionRegistersOverride; HasExtractBitsInsn = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; @@ -952,6 +958,12 @@ JumpIsExpensive = isExpensive; } +void TargetLoweringBase::setHasMultipleConditionRegisters(bool hasManyRegs) { + // If the command-line option was specified, ignore this request. + if (!HasMultipleConditionRegistersOverride.getNumOccurrences()) + HasMultipleConditionRegisters = hasManyRegs; +} + TargetLoweringBase::LegalizeKind TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // If this is a simple type, use the ComputeRegisterProp mechanism. Index: llvm/test/CodeGen/RISCV/multiple-cond-regs.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/multiple-cond-regs.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -has-multiple-condition-registers=0 | FileCheck %s --check-prefix=RV32-NO-MCR +; RUN: llc < %s -mtriple=riscv32 -has-multiple-condition-registers=1 | FileCheck %s --check-prefix=RV32-MCR + +; Setting -has-multiple-condition-registers=1/0 switches on and off the hasMultipleConditionRegisters in +; target lowering. Such feature controls how select is lowered and whether compare instructions are sinked. +; The following tests show how enabling the feature achieves better sinking of compares but generates more +; branches when lowering select. + +define void @foo_select(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) nounwind { +; RV32-NO-MCR-LABEL: foo_select: +; RV32-NO-MCR: # %bb.0: +; RV32-NO-MCR-NEXT: beq a1, a3, .LBB0_3 +; RV32-NO-MCR-NEXT: # %bb.1: +; RV32-NO-MCR-NEXT: beq a1, a2, .LBB0_4 +; RV32-NO-MCR-NEXT: .LBB0_2: +; RV32-NO-MCR-NEXT: sw a4, 0(a0) +; RV32-NO-MCR-NEXT: ret +; RV32-NO-MCR-NEXT: .LBB0_3: +; RV32-NO-MCR-NEXT: mv a4, a5 +; RV32-NO-MCR-NEXT: bne a1, a2, .LBB0_2 +; RV32-NO-MCR-NEXT: .LBB0_4: +; RV32-NO-MCR-NEXT: mv a4, a5 +; RV32-NO-MCR-NEXT: sw a4, 0(a0) +; RV32-NO-MCR-NEXT: ret +; +; RV32-MCR-LABEL: foo_select: +; RV32-MCR: # %bb.0: +; RV32-MCR-NEXT: xor a2, a1, a2 +; RV32-MCR-NEXT: snez a2, a2 +; RV32-MCR-NEXT: xor a1, a1, a3 +; RV32-MCR-NEXT: snez a1, a1 +; RV32-MCR-NEXT: and a1, a2, a1 +; RV32-MCR-NEXT: bnez a1, .LBB0_2 +; RV32-MCR-NEXT: # %bb.1: +; RV32-MCR-NEXT: mv a4, a5 +; RV32-MCR-NEXT: .LBB0_2: +; RV32-MCR-NEXT: sw a4, 0(a0) +; RV32-MCR-NEXT: ret + %icmp0 = icmp ne i32 %a, %b + %icmp1 = icmp ne i32 %a, %c + %and = and i1 %icmp0, %icmp1 + %select = select i1 %and, i32 %x, i32 %y + store i32 %select, i32 addrspace(1)* %out + ret void +} + +define dso_local signext i16 @foo_cmp(i16* %a, i16* %b) nounwind { +; RV32-NO-MCR-LABEL: foo_cmp: +; RV32-NO-MCR: # %bb.0: # %entry +; RV32-NO-MCR-NEXT: lh a0, 0(a0) +; RV32-NO-MCR-NEXT: bltz a0, .LBB1_3 +; RV32-NO-MCR-NEXT: # %bb.1: # %.LBB0_1 +; RV32-NO-MCR-NEXT: beqz a1, .LBB1_3 +; RV32-NO-MCR-NEXT: # %bb.2: # %.LBB0_2 +; RV32-NO-MCR-NEXT: ret +; RV32-NO-MCR-NEXT: .LBB1_3: # %return +; RV32-NO-MCR-NEXT: mv a0, zero +; RV32-NO-MCR-NEXT: ret +; +; RV32-MCR-LABEL: foo_cmp: +; RV32-MCR: # %bb.0: # %entry +; RV32-MCR-NEXT: lh a0, 0(a0) +; RV32-MCR-NEXT: bltz a0, .LBB1_3 +; RV32-MCR-NEXT: # %bb.1: # %.LBB0_1 +; RV32-MCR-NEXT: seqz a1, a1 +; RV32-MCR-NEXT: bnez a1, .LBB1_3 +; RV32-MCR-NEXT: # %bb.2: # %.LBB0_2 +; RV32-MCR-NEXT: ret +; RV32-MCR-NEXT: .LBB1_3: # %return +; RV32-MCR-NEXT: mv a0, zero +; RV32-MCR-NEXT: ret +entry: + %0 = load i16, i16* %a + %cmp = icmp sgt i16 %0, -1 + %tobool.not = icmp eq i16* %b, null + br i1 %cmp, label %.LBB0_1, label %return + +.LBB0_1: + br i1 %tobool.not, label %return, label %.LBB0_2 + +.LBB0_2: + ret i16 %0 + +return: + ret i16 0 +} +