diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -449,9 +449,18 @@ : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false", "Disable default unroll preference.">; +// This is a more advanced version of CMOVBranchOpt where ALU operations can be +// predicated. +def TuneShortForwardBranchOpt + : SubtargetFeature<"short-forward-branch-opt", "HasShortForwardBranchOpt", + "true", "Enable short forward branch optimization">; +def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">; +def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">; + def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", "SiFive 7-Series processors", - [TuneNoDefaultUnroll]>; + [TuneNoDefaultUnroll, + TuneShortForwardBranchOpt]>; // Assume that lock-free native-width atomics are available, even if the target // and operating system combination would not usually provide them. The user diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -45,6 +45,8 @@ bool expandMBB(MachineBasicBlock &MBB); bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandVMSET_VMCLR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opcode); @@ -82,6 +84,8 @@ // expanded instructions for each pseudo is correct in the Size field of the // tablegen definition for the pseudo. switch (MBBI->getOpcode()) { + case RISCV::PseudoCCMOVGPR: + return expandCCOp(MBB, MBBI, NextMBBI); case RISCV::PseudoVSETVLI: case RISCV::PseudoVSETVLIX0: case RISCV::PseudoVSETIVLI: @@ -133,6 +137,60 @@ return false; } +bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + assert(MBBI->getOpcode() == RISCV::PseudoCCMOVGPR && "Unexpected opcode"); + + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + MachineBasicBlock *MergeBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MBB.getIterator(), TrueBB); + MF->insert(++TrueBB->getIterator(), MergeBB); + + // We want to copy the "true" value when the condition is true which means + // we need to invert the branch condition to jump over TrueBB when the + // condition is false. + auto CC = static_cast(MI.getOperand(3).getImm()); + CC = RISCVCC::getOppositeBranchCondition(CC); + + // Insert branch instruction. + BuildMI(MBB, MBBI, DL, TII->getBrCond(CC)) + .addReg(MI.getOperand(1).getReg()) + .addReg(MI.getOperand(2).getReg()) + .addMBB(MergeBB); + + Register DestReg = MI.getOperand(0).getReg(); + assert(MI.getOperand(4).getReg() == DestReg); + + // Add MV. + BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg) + .add(MI.getOperand(5)) + .addImm(0); + + TrueBB->addSuccessor(MergeBB); + + MergeBB->splice(MergeBB->end(), &MBB, MI, MBB.end()); + MergeBB->transferSuccessors(&MBB); + + MBB.addSuccessor(TrueBB); + MBB.addSuccessor(MergeBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + // Make sure live-ins are correctly attached to this new basic block. + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *TrueBB); + computeAndAddLiveIns(LiveRegs, *MergeBB); + + return true; +} + bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { assert(MBBI->getNumExplicitOperands() == 3 && MBBI->getNumOperands() >= 5 && diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9426,9 +9426,11 @@ // (select (x in [0,1] != 0), (z ^ y), y ) -> (-x & z ) ^ y // (select (x in [0,1] == 0), y, (z | y) ) -> (-x & z ) | y // (select (x in [0,1] != 0), (z | y), y ) -> (-x & z ) | y + // NOTE: We only do this if the target does not have the short forward + // branch optimization. APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); - if (isNullConstant(RHS) && ISD::isIntEqualitySetCC(CCVal) && - DAG.MaskedValueIsZero(LHS, Mask)) { + if (!Subtarget.hasShortForwardBranchOpt() && isNullConstant(RHS) && + ISD::isIntEqualitySetCC(CCVal) && DAG.MaskedValueIsZero(LHS, Mask)) { unsigned Opcode; SDValue Src1, Src2; // true if FalseV is XOR or OR operator and one of its operands @@ -9480,35 +9482,36 @@ return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), {LHS, RHS, CC, TrueV, FalseV}); - // (select c, -1, y) -> -c | y - if (isAllOnesConstant(TrueV)) { - SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); - SDValue Neg = DAG.getNegative(C, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); - } - // (select c, y, -1) -> -!c | y - if (isAllOnesConstant(FalseV)) { - SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, - ISD::getSetCCInverse(CCVal, VT)); - SDValue Neg = DAG.getNegative(C, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); - } + if (!Subtarget.hasShortForwardBranchOpt()) { + // (select c, -1, y) -> -c | y + if (isAllOnesConstant(TrueV)) { + SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); + SDValue Neg = DAG.getNegative(C, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); + } + // (select c, y, -1) -> -!c | y + if (isAllOnesConstant(FalseV)) { + SDValue C = + DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); + SDValue Neg = DAG.getNegative(C, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); + } - // (select c, 0, y) -> -!c & y - if (isNullConstant(TrueV)) { - SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, - ISD::getSetCCInverse(CCVal, VT)); - SDValue Neg = DAG.getNegative(C, DL, VT); - return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); - } - // (select c, y, 0) -> -c & y - if (isNullConstant(FalseV)) { - SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); - SDValue Neg = DAG.getNegative(C, DL, VT); - return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); + // (select c, 0, y) -> -!c & y + if (isNullConstant(TrueV)) { + SDValue C = + DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); + SDValue Neg = DAG.getNegative(C, DL, VT); + return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); + } + // (select c, y, 0) -> -c & y + if (isNullConstant(FalseV)) { + SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); + SDValue Neg = DAG.getNegative(C, DL, VT); + return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); + } } - return SDValue(); } case RISCVISD::BR_CC: { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1616,6 +1616,9 @@ return false; switch (MI.getOpcode()) { + case RISCV::PseudoCCMOVGPR: + // Operands 4 and 5 are commutable. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); case CASE_VFMA_SPLATS(FMADD): case CASE_VFMA_SPLATS(FMSUB): case CASE_VFMA_SPLATS(FMACC): @@ -1761,6 +1764,15 @@ }; switch (MI.getOpcode()) { + case RISCV::PseudoCCMOVGPR: { + // CCMOV can be commuted by inverting the condition. + auto CC = static_cast(MI.getOperand(3).getImm()); + CC = RISCVCC::getOppositeBranchCondition(CC); + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.getOperand(3).setImm(CC); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, + OpIdx1, OpIdx2); + } case CASE_VFMA_SPLATS(FMACC): case CASE_VFMA_SPLATS(FMADD): case CASE_VFMA_SPLATS(FMSAC): diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1313,6 +1313,20 @@ node:$falsev), [{}], IntCCtoRISCVCC>; +let Predicates = [HasShortForwardBranchOpt], + Constraints = "$dst = $falsev", isCommutable = 1, Size = 8 in { +// This instruction moves $truev to $dst when the condition is true. It will +// be expanded to control flow in RISCVExpandPseudoInsts. +def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$truev), + [(set GPR:$dst, + (riscv_selectcc_frag:$cc GPR:$lhs, GPR:$rhs, + cond, GPR:$truev, + GPR:$falsev))]>, + Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>; +} + multiclass SelectCC_GPR_rrirr { let usesCustomInserter = 1 in def _Using_CC_GPR : Pseudo<(outs valty:$dst), @@ -1329,6 +1343,7 @@ (IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>; } +let Predicates = [NoShortForwardBranchOpt] in defm Select_GPR : SelectCC_GPR_rrirr; class SelectCompressOpt: Pat<(riscv_selectcc_frag:$select GPR:$lhs, simm12_no6:$Constant, Cond, diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -246,4 +246,5 @@ defm : UnsupportedSchedZbkb; defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfh; +defm : UnsupportedSchedSFB; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -44,6 +44,12 @@ def : WriteRes; def : WriteRes; +//Short forward branch +def : WriteRes { + let Latency = 3; + let NumMicroOps = 2; +} + // Integer arithmetic and logic let Latency = 3 in { def : WriteRes; @@ -223,6 +229,8 @@ def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; + //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedV; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -105,6 +105,10 @@ def WriteFST32 : SchedWrite; // Floating point sp store def WriteFST64 : SchedWrite; // Floating point dp store +// short forward branch for Bullet +def WriteSFB : SchedWrite; +def ReadSFB : SchedRead; + /// Define scheduler resources associated with use operands. def ReadJmp : SchedRead; def ReadJalr : SchedRead; @@ -229,6 +233,14 @@ } // Unsupported = true } +multiclass UnsupportedSchedSFB { +let Unsupported = true in { +def : WriteRes; + +def : ReadAdvance; +} // Unsupported = true +} + // Include the scheduler resources for other instruction extensions. include "RISCVScheduleZb.td" include "RISCVScheduleV.td" diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -95,6 +95,7 @@ bool EnableRVCHintInstrs = true; bool EnableDefaultUnroll = true; bool EnableSaveRestore = false; + bool HasShortForwardBranchOpt = false; bool EnableUnalignedScalarMem = false; bool HasLUIADDIFusion = false; bool HasForcedAtomics = false; @@ -190,6 +191,7 @@ bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; } bool enableDefaultUnroll() const { return EnableDefaultUnroll; } bool enableSaveRestore() const { return EnableSaveRestore; } + bool hasShortForwardBranchOpt() const { return HasShortForwardBranchOpt; } bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; } bool hasLUIADDIFusion() const { return HasLUIADDIFusion; } bool hasForcedAtomics() const { return HasForcedAtomics; } diff --git a/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll @@ -0,0 +1,388 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=NOSFB %s +; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=SFB %s + +; The conditional move optimization in sifive-8-series requires that only a +; single c.mv instruction appears in the branch shadow. + +; The sifive-7-series can predicate a mv. + +define signext i32 @test1(i32 signext %x, i32 signext %y, i32 signext %z) { +; NOSFB-LABEL: test1: +; NOSFB: # %bb.0: +; NOSFB-NEXT: beqz a2, .LBB0_2 +; NOSFB-NEXT: # %bb.1: +; NOSFB-NEXT: mv a0, a1 +; NOSFB-NEXT: .LBB0_2: +; NOSFB-NEXT: ret +; +; SFB-LABEL: test1: +; SFB: # %bb.0: +; SFB-NEXT: beqz a2, .LBB0_2 +; SFB-NEXT: # %bb.1: +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB0_2: +; SFB-NEXT: ret + %c = icmp eq i32 %z, 0 + %b = select i1 %c, i32 %x, i32 %y + ret i32 %b +} + +; Same as above with select operands swapped. +define signext i32 @test2(i32 signext %x, i32 signext %y, i32 signext %z) { +; NOSFB-LABEL: test2: +; NOSFB: # %bb.0: +; NOSFB-NEXT: beqz a2, .LBB1_2 +; NOSFB-NEXT: # %bb.1: +; NOSFB-NEXT: mv a1, a0 +; NOSFB-NEXT: .LBB1_2: +; NOSFB-NEXT: mv a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: test2: +; SFB: # %bb.0: +; SFB-NEXT: bnez a2, .LBB1_2 +; SFB-NEXT: # %bb.1: +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB1_2: +; SFB-NEXT: ret + %c = icmp eq i32 %z, 0 + %b = select i1 %c, i32 %y, i32 %x + ret i32 %b +} + +; Make sure we don't share the same basic block for two selects with the same +; condition this would break the predication. +define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 signext %y, i32 signext %z) { +; NOSFB-LABEL: test3: +; NOSFB: # %bb.0: +; NOSFB-NEXT: beqz a4, .LBB2_2 +; NOSFB-NEXT: # %bb.1: +; NOSFB-NEXT: mv a1, a0 +; NOSFB-NEXT: mv a2, a3 +; NOSFB-NEXT: .LBB2_2: +; NOSFB-NEXT: addw a0, a1, a2 +; NOSFB-NEXT: ret +; +; SFB-LABEL: test3: +; SFB: # %bb.0: +; SFB-NEXT: bnez a4, .LBB2_2 +; SFB-NEXT: # %bb.1: +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB2_2: +; SFB-NEXT: beqz a4, .LBB2_4 +; SFB-NEXT: # %bb.3: +; SFB-NEXT: mv a2, a3 +; SFB-NEXT: .LBB2_4: +; SFB-NEXT: addw a0, a0, a2 +; SFB-NEXT: ret + %c = icmp eq i32 %z, 0 + %a = select i1 %c, i32 %w, i32 %v + %b = select i1 %c, i32 %x, i32 %y + %d = add i32 %a, %b + ret i32 %d +} + +; Test with false value 0. +define signext i32 @test4(i32 signext %x, i32 signext %z) { +; NOSFB-LABEL: test4: +; NOSFB: # %bb.0: +; NOSFB-NEXT: snez a1, a1 +; NOSFB-NEXT: addi a1, a1, -1 +; NOSFB-NEXT: and a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: test4: +; SFB: # %bb.0: +; SFB-NEXT: beqz a1, .LBB3_2 +; SFB-NEXT: # %bb.1: +; SFB-NEXT: li a0, 0 +; SFB-NEXT: .LBB3_2: +; SFB-NEXT: ret + %c = icmp eq i32 %z, 0 + %b = select i1 %c, i32 %x, i32 0 + ret i32 %b +} + +; Same as above with select operands swapped. +define signext i32 @test5(i32 signext %x, i32 signext %z) { +; NOSFB-LABEL: test5: +; NOSFB: # %bb.0: +; NOSFB-NEXT: seqz a1, a1 +; NOSFB-NEXT: addi a1, a1, -1 +; NOSFB-NEXT: and a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: test5: +; SFB: # %bb.0: +; SFB-NEXT: bnez a1, .LBB4_2 +; SFB-NEXT: # %bb.1: +; SFB-NEXT: li a0, 0 +; SFB-NEXT: .LBB4_2: +; SFB-NEXT: ret + %c = icmp eq i32 %z, 0 + %b = select i1 %c, i32 0, i32 %x + ret i32 %b +} + +; Test with false value -1. +define signext i32 @test6(i32 signext %x, i32 signext %z) { +; NOSFB-LABEL: test6: +; NOSFB: # %bb.0: +; NOSFB-NEXT: seqz a1, a1 +; NOSFB-NEXT: addi a1, a1, -1 +; NOSFB-NEXT: or a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: test6: +; SFB: # %bb.0: +; SFB-NEXT: li a2, -1 +; SFB-NEXT: beqz a1, .LBB5_2 +; SFB-NEXT: # %bb.1: +; SFB-NEXT: mv a0, a2 +; SFB-NEXT: .LBB5_2: +; SFB-NEXT: ret + %c = icmp eq i32 %z, 0 + %b = select i1 %c, i32 %x, i32 -1 + ret i32 %b +} + +; Same as above with select operands swapped. +define signext i32 @test7(i32 signext %x, i32 signext %z) { +; NOSFB-LABEL: test7: +; NOSFB: # %bb.0: +; NOSFB-NEXT: snez a1, a1 +; NOSFB-NEXT: addi a1, a1, -1 +; NOSFB-NEXT: or a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: test7: +; SFB: # %bb.0: +; SFB-NEXT: li a2, -1 +; SFB-NEXT: bnez a1, .LBB6_2 +; SFB-NEXT: # %bb.1: +; SFB-NEXT: mv a0, a2 +; SFB-NEXT: .LBB6_2: +; SFB-NEXT: ret + %c = icmp eq i32 %z, 0 + %b = select i1 %c, i32 -1, i32 %x + ret i32 %b +} + +define i16 @select_xor_1(i16 %A, i8 %cond) { +; NOSFB-LABEL: select_xor_1: +; NOSFB: # %bb.0: # %entry +; NOSFB-NEXT: andi a1, a1, 1 +; NOSFB-NEXT: negw a1, a1 +; NOSFB-NEXT: andi a1, a1, 43 +; NOSFB-NEXT: xor a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: select_xor_1: +; SFB: # %bb.0: # %entry +; SFB-NEXT: andi a1, a1, 1 +; SFB-NEXT: xori a2, a0, 43 +; SFB-NEXT: beqz a1, .LBB7_2 +; SFB-NEXT: # %bb.1: # %entry +; SFB-NEXT: mv a0, a2 +; SFB-NEXT: .LBB7_2: # %entry +; SFB-NEXT: ret +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp eq i8 %and, 0 + %0 = xor i16 %A, 43 + %1 = select i1 %cmp10, i16 %A, i16 %0 + ret i16 %1 +} + +; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of +; icmp eq (and %cond, 1), 0 +define i16 @select_xor_1b(i16 %A, i8 %cond) { +; NOSFB-LABEL: select_xor_1b: +; NOSFB: # %bb.0: # %entry +; NOSFB-NEXT: andi a1, a1, 1 +; NOSFB-NEXT: negw a1, a1 +; NOSFB-NEXT: andi a1, a1, 43 +; NOSFB-NEXT: xor a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: select_xor_1b: +; SFB: # %bb.0: # %entry +; SFB-NEXT: andi a1, a1, 1 +; SFB-NEXT: xori a2, a0, 43 +; SFB-NEXT: beqz a1, .LBB8_2 +; SFB-NEXT: # %bb.1: # %entry +; SFB-NEXT: mv a0, a2 +; SFB-NEXT: .LBB8_2: # %entry +; SFB-NEXT: ret +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp ne i8 %and, 1 + %0 = xor i16 %A, 43 + %1 = select i1 %cmp10, i16 %A, i16 %0 + ret i16 %1 +} + +define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { +; NOSFB-LABEL: select_xor_2: +; NOSFB: # %bb.0: # %entry +; NOSFB-NEXT: andi a2, a2, 1 +; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: and a1, a1, a2 +; NOSFB-NEXT: xor a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: select_xor_2: +; SFB: # %bb.0: # %entry +; SFB-NEXT: andi a2, a2, 1 +; SFB-NEXT: xor a1, a1, a0 +; SFB-NEXT: beqz a2, .LBB9_2 +; SFB-NEXT: # %bb.1: # %entry +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB9_2: # %entry +; SFB-NEXT: ret +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp eq i8 %and, 0 + %0 = xor i32 %B, %A + %1 = select i1 %cmp10, i32 %A, i32 %0 + ret i32 %1 +} + +; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of +; icmp eq (and %cond, 1), 0 +define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) { +; NOSFB-LABEL: select_xor_2b: +; NOSFB: # %bb.0: # %entry +; NOSFB-NEXT: andi a2, a2, 1 +; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: and a1, a1, a2 +; NOSFB-NEXT: xor a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: select_xor_2b: +; SFB: # %bb.0: # %entry +; SFB-NEXT: andi a2, a2, 1 +; SFB-NEXT: xor a1, a1, a0 +; SFB-NEXT: beqz a2, .LBB10_2 +; SFB-NEXT: # %bb.1: # %entry +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB10_2: # %entry +; SFB-NEXT: ret +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp ne i8 %and, 1 + %0 = xor i32 %B, %A + %1 = select i1 %cmp10, i32 %A, i32 %0 + ret i32 %1 +} + +define i32 @select_or(i32 %A, i32 %B, i8 %cond) { +; NOSFB-LABEL: select_or: +; NOSFB: # %bb.0: # %entry +; NOSFB-NEXT: andi a2, a2, 1 +; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: and a1, a1, a2 +; NOSFB-NEXT: or a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: select_or: +; SFB: # %bb.0: # %entry +; SFB-NEXT: andi a2, a2, 1 +; SFB-NEXT: or a1, a1, a0 +; SFB-NEXT: beqz a2, .LBB11_2 +; SFB-NEXT: # %bb.1: # %entry +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB11_2: # %entry +; SFB-NEXT: ret +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp eq i8 %and, 0 + %0 = or i32 %B, %A + %1 = select i1 %cmp10, i32 %A, i32 %0 + ret i32 %1 +} + +; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of +; icmp eq (and %cond, 1), 0 +define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) { +; NOSFB-LABEL: select_or_b: +; NOSFB: # %bb.0: # %entry +; NOSFB-NEXT: andi a2, a2, 1 +; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: and a1, a1, a2 +; NOSFB-NEXT: or a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: select_or_b: +; SFB: # %bb.0: # %entry +; SFB-NEXT: andi a2, a2, 1 +; SFB-NEXT: or a1, a1, a0 +; SFB-NEXT: beqz a2, .LBB12_2 +; SFB-NEXT: # %bb.1: # %entry +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB12_2: # %entry +; SFB-NEXT: ret +entry: + %and = and i8 %cond, 1 + %cmp10 = icmp ne i8 %and, 1 + %0 = or i32 %B, %A + %1 = select i1 %cmp10, i32 %A, i32 %0 + ret i32 %1 +} + +define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) { +; NOSFB-LABEL: select_or_1: +; NOSFB: # %bb.0: # %entry +; NOSFB-NEXT: andi a2, a2, 1 +; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: and a1, a1, a2 +; NOSFB-NEXT: or a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: select_or_1: +; SFB: # %bb.0: # %entry +; SFB-NEXT: andi a2, a2, 1 +; SFB-NEXT: or a1, a1, a0 +; SFB-NEXT: beqz a2, .LBB13_2 +; SFB-NEXT: # %bb.1: # %entry +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB13_2: # %entry +; SFB-NEXT: ret +entry: + %and = and i32 %cond, 1 + %cmp10 = icmp eq i32 %and, 0 + %0 = or i32 %B, %A + %1 = select i1 %cmp10, i32 %A, i32 %0 + ret i32 %1 +} + +; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of +; icmp eq (and %cond, 1), 0 +define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) { +; NOSFB-LABEL: select_or_1b: +; NOSFB: # %bb.0: # %entry +; NOSFB-NEXT: andi a2, a2, 1 +; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: and a1, a1, a2 +; NOSFB-NEXT: or a0, a0, a1 +; NOSFB-NEXT: ret +; +; SFB-LABEL: select_or_1b: +; SFB: # %bb.0: # %entry +; SFB-NEXT: andi a2, a2, 1 +; SFB-NEXT: or a1, a1, a0 +; SFB-NEXT: beqz a2, .LBB14_2 +; SFB-NEXT: # %bb.1: # %entry +; SFB-NEXT: mv a0, a1 +; SFB-NEXT: .LBB14_2: # %entry +; SFB-NEXT: ret +entry: + %and = and i32 %cond, 1 + %cmp10 = icmp ne i32 %and, 1 + %0 = or i32 %B, %A + %1 = select i1 %cmp10, i32 %A, i32 %0 + ret i32 %1 +}