diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -52,6 +52,7 @@ bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt); bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt); bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, SDValue &Shamt); // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -459,6 +459,55 @@ return false; } +// Check that it is a FSRIW (i32 Funnel Shift Right Immediate on RV64). +// We first check that it is the right node tree: +// +// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2), +// (SRL (AND (AssertSext RS2, i32), VC3), VC1))) +// +// Then we check that the constant operands respect these constraints: +// +// VC2 == 32 - VC1 +// VC3 == maskLeadingOnes(VC2) +// +// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32 +// and VC3 a 32 bit mask of (32 - VC1) leading ones. + +bool RISCVDAGToDAGISel::SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, + SDValue &Shamt) { + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && + Subtarget->getXLenVT() == MVT::i64 && + cast(N.getOperand(1))->getVT() == MVT::i32) { + if (N.getOperand(0).getOpcode() == ISD::OR) { + SDValue Or = N.getOperand(0); + if (Or.getOperand(0).getOpcode() == ISD::SHL && + Or.getOperand(1).getOpcode() == ISD::SRL) { + SDValue Shl = Or.getOperand(0); + SDValue Srl = Or.getOperand(1); + if (Srl.getOperand(0).getOpcode() == ISD::AND) { + SDValue And = Srl.getOperand(0); + if (isa(Srl.getOperand(1)) && + isa(Shl.getOperand(1)) && + isa(And.getOperand(1))) { + uint32_t VC1 = Srl.getConstantOperandVal(1); + uint32_t VC2 = Shl.getConstantOperandVal(1); + uint32_t VC3 = And.getConstantOperandVal(1); + if (VC2 == (32 - VC1) && + VC3 == maskLeadingOnes(VC2)) { + RS1 = Shl.getOperand(0); + RS2 = And.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + } + return false; +} + // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -166,6 +166,11 @@ if (Subtarget.hasStdExtZbp()) setOperationAction(ISD::BITREVERSE, XLenVT, Legal); + if (Subtarget.hasStdExtZbt()) { + setOperationAction(ISD::FSHL, XLenVT, Legal); + setOperationAction(ISD::FSHR, XLenVT, Legal); + } + ISD::CondCode FPCCToExtend[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -643,6 +643,7 @@ def SLOIWPat : ComplexPattern; def SROIWPat : ComplexPattern; def RORIWPat : ComplexPattern; +def FSRIWPat : ComplexPattern; let Predicates = [HasStdExtZbbOrZbp] in { def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>; @@ -804,6 +805,19 @@ def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>; } // Predicates = [HasStdExtZbp, IsRV64] +let Predicates = [HasStdExtZbt] in { +def : Pat<(or (and (xor GPR:$rs2, -1), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)), + (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1), + (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(fshl GPR:$rs1, GPR:$rs2, GPR:$rs3), + (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(fshr GPR:$rs1, GPR:$rs2, GPR:$rs3), + (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(fshr GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt), + (FSRI GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>; +} // Predicates = [HasStdExtZbt] + let Predicates = [HasStdExtZbb] in { def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>; def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>; @@ -1004,6 +1018,31 @@ def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>; } // Predicates = [HasStdExtZbp, IsRV64] +let Predicates = [HasStdExtZbt, IsRV64] in { +def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31), + (i64 0), + (i64 17), + (assertsexti32 GPR:$rs1), + (or (riscv_sllw (assertsexti32 GPR:$rs1), + (and (assertsexti32 GPR:$rs3), 31)), + (riscv_srlw (assertsexti32 GPR:$rs2), + (sub (i64 32), + (assertsexti32 GPR:$rs3))))), + (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31), + (i64 0), + (i64 17), + (assertsexti32 GPR:$rs2), + (or (riscv_sllw (assertsexti32 GPR:$rs1), + (sub (i64 32), + (assertsexti32 GPR:$rs3))), + (riscv_srlw (assertsexti32 GPR:$rs2), + (and (assertsexti32 GPR:$rs3), 31)))), + (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(FSRIWPat GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt), + (FSRIW GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>; +} // Predicates = [HasStdExtZbt, IsRV64] + let Predicates = [HasStdExtZbb, IsRV64] in { def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)), (CLZW GPR:$rs1)>; diff --git a/llvm/test/CodeGen/RISCV/rv32Zbt.ll b/llvm/test/CodeGen/RISCV/rv32Zbt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32Zbt.ll @@ -0,0 +1,569 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBT + +define i32 @cmix_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32I-LABEL: cmix_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cmix_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: cmix a0, a1, a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: cmix_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: cmix a0, a1, a0, a2 +; RV32IBT-NEXT: ret + %and = and i32 %b, %a + %neg = xor i32 %b, -1 + %and1 = and i32 %neg, %c + %or = or i32 %and1, %and + ret i32 %or +} + +define i64 @cmix_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: cmix_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cmix_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: cmix a0, a2, a0, a4 +; RV32IB-NEXT: cmix a1, a3, a1, a5 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: cmix_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: cmix a0, a2, a0, a4 +; RV32IBT-NEXT: cmix a1, a3, a1, a5 +; RV32IBT-NEXT: ret + %and = and i64 %b, %a + %neg = xor i64 %b, -1 + %and1 = and i64 %neg, %c + %or = or i64 %and1, %and + ret i64 %or +} + +define i32 @cmov_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32I-LABEL: cmov_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: beqz a1, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cmov_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: cmov a0, a1, a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: cmov_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: cmov a0, a1, a0, a2 +; RV32IBT-NEXT: ret + %tobool.not = icmp eq i32 %b, 0 + %cond = select i1 %tobool.not, i32 %c, i32 %a + ret i32 %cond +} + +define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: cmov_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: beqz a2, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: mv a5, a1 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cmov_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: or a2, a2, a3 +; RV32IB-NEXT: cmov a0, a2, a0, a4 +; RV32IB-NEXT: cmov a1, a2, a1, a5 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: cmov_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: or a2, a2, a3 +; RV32IBT-NEXT: cmov a0, a2, a0, a4 +; RV32IBT-NEXT: cmov a1, a2, a1, a5 +; RV32IBT-NEXT: ret + %tobool.not = icmp eq i64 %b, 0 + %cond = select i1 %tobool.not, i64 %c, i64 %a + ret i64 %cond +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define i32 @fshl_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32I-LABEL: fshl_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 31 +; RV32I-NEXT: beqz a3, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: addi a2, zero, 32 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshl_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: fsl a0, a0, a2, a1 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshl_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: fsl a0, a0, a2, a1 +; RV32IBT-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet an efficient pattern-matching with bit manipulation +; instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions that can match more efficiently this pattern. + +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: fshl_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi t1, a4, 63 +; RV32I-NEXT: addi a6, t1, -32 +; RV32I-NEXT: addi a7, zero, 31 +; RV32I-NEXT: bltz a6, .LBB5_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll t0, a0, a6 +; RV32I-NEXT: j .LBB5_3 +; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: sll t0, a1, a4 +; RV32I-NEXT: sub t2, a7, t1 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a5, a5, t2 +; RV32I-NEXT: or t0, t0, a5 +; RV32I-NEXT: .LBB5_3: +; RV32I-NEXT: addi a5, zero, 32 +; RV32I-NEXT: sub t4, a5, t1 +; RV32I-NEXT: addi a5, zero, 64 +; RV32I-NEXT: sub t2, a5, t1 +; RV32I-NEXT: bltz t4, .LBB5_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: mv t3, zero +; RV32I-NEXT: bnez t1, .LBB5_6 +; RV32I-NEXT: j .LBB5_7 +; RV32I-NEXT: .LBB5_5: +; RV32I-NEXT: srl t3, a3, t2 +; RV32I-NEXT: beqz t1, .LBB5_7 +; RV32I-NEXT: .LBB5_6: +; RV32I-NEXT: or a1, t0, t3 +; RV32I-NEXT: .LBB5_7: +; RV32I-NEXT: bltz t4, .LBB5_10 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: srl a2, a3, t4 +; RV32I-NEXT: bgez a6, .LBB5_11 +; RV32I-NEXT: .LBB5_9: +; RV32I-NEXT: sll a3, a0, a4 +; RV32I-NEXT: bnez t1, .LBB5_12 +; RV32I-NEXT: j .LBB5_13 +; RV32I-NEXT: .LBB5_10: +; RV32I-NEXT: srl a2, a2, t2 +; RV32I-NEXT: sub a5, a7, t2 +; RV32I-NEXT: slli a3, a3, 1 +; RV32I-NEXT: sll a3, a3, a5 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: bltz a6, .LBB5_9 +; RV32I-NEXT: .LBB5_11: +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: beqz t1, .LBB5_13 +; RV32I-NEXT: .LBB5_12: +; RV32I-NEXT: or a0, a3, a2 +; RV32I-NEXT: .LBB5_13: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshl_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi t1, a4, 63 +; RV32IB-NEXT: addi a6, t1, -32 +; RV32IB-NEXT: addi a7, zero, 31 +; RV32IB-NEXT: bltz a6, .LBB5_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: sll t0, a0, a6 +; RV32IB-NEXT: j .LBB5_3 +; RV32IB-NEXT: .LBB5_2: +; RV32IB-NEXT: sll t0, a1, a4 +; RV32IB-NEXT: sub t2, a7, t1 +; RV32IB-NEXT: srli a5, a0, 1 +; RV32IB-NEXT: srl a5, a5, t2 +; RV32IB-NEXT: or t0, t0, a5 +; RV32IB-NEXT: .LBB5_3: +; RV32IB-NEXT: addi a5, zero, 32 +; RV32IB-NEXT: sub t4, a5, t1 +; RV32IB-NEXT: addi a5, zero, 64 +; RV32IB-NEXT: sub t2, a5, t1 +; RV32IB-NEXT: bltz t4, .LBB5_7 +; RV32IB-NEXT: # %bb.4: +; RV32IB-NEXT: mv t3, zero +; RV32IB-NEXT: or t0, t0, t3 +; RV32IB-NEXT: bgez t4, .LBB5_8 +; RV32IB-NEXT: .LBB5_5: +; RV32IB-NEXT: srl a2, a2, t2 +; RV32IB-NEXT: sub a5, a7, t2 +; RV32IB-NEXT: slli a3, a3, 1 +; RV32IB-NEXT: sll a3, a3, a5 +; RV32IB-NEXT: or a2, a2, a3 +; RV32IB-NEXT: cmov a1, t1, t0, a1 +; RV32IB-NEXT: bgez a6, .LBB5_9 +; RV32IB-NEXT: .LBB5_6: +; RV32IB-NEXT: sll a3, a0, a4 +; RV32IB-NEXT: j .LBB5_10 +; RV32IB-NEXT: .LBB5_7: +; RV32IB-NEXT: srl t3, a3, t2 +; RV32IB-NEXT: or t0, t0, t3 +; RV32IB-NEXT: bltz t4, .LBB5_5 +; RV32IB-NEXT: .LBB5_8: +; RV32IB-NEXT: srl a2, a3, t4 +; RV32IB-NEXT: cmov a1, t1, t0, a1 +; RV32IB-NEXT: bltz a6, .LBB5_6 +; RV32IB-NEXT: .LBB5_9: +; RV32IB-NEXT: mv a3, zero +; RV32IB-NEXT: .LBB5_10: +; RV32IB-NEXT: or a2, a3, a2 +; RV32IB-NEXT: cmov a0, t1, a2, a0 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshl_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: andi t1, a4, 63 +; RV32IBT-NEXT: addi a6, t1, -32 +; RV32IBT-NEXT: addi a7, zero, 31 +; RV32IBT-NEXT: bltz a6, .LBB5_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: sll t0, a0, a6 +; RV32IBT-NEXT: j .LBB5_3 +; RV32IBT-NEXT: .LBB5_2: +; RV32IBT-NEXT: sll t0, a1, a4 +; RV32IBT-NEXT: sub t2, a7, t1 +; RV32IBT-NEXT: srli a5, a0, 1 +; RV32IBT-NEXT: srl a5, a5, t2 +; RV32IBT-NEXT: or t0, t0, a5 +; RV32IBT-NEXT: .LBB5_3: +; RV32IBT-NEXT: addi a5, zero, 32 +; RV32IBT-NEXT: sub t4, a5, t1 +; RV32IBT-NEXT: addi a5, zero, 64 +; RV32IBT-NEXT: sub t2, a5, t1 +; RV32IBT-NEXT: bltz t4, .LBB5_7 +; RV32IBT-NEXT: # %bb.4: +; RV32IBT-NEXT: mv t3, zero +; RV32IBT-NEXT: or t0, t0, t3 +; RV32IBT-NEXT: bgez t4, .LBB5_8 +; RV32IBT-NEXT: .LBB5_5: +; RV32IBT-NEXT: srl a2, a2, t2 +; RV32IBT-NEXT: sub a5, a7, t2 +; RV32IBT-NEXT: slli a3, a3, 1 +; RV32IBT-NEXT: sll a3, a3, a5 +; RV32IBT-NEXT: or a2, a2, a3 +; RV32IBT-NEXT: cmov a1, t1, t0, a1 +; RV32IBT-NEXT: bgez a6, .LBB5_9 +; RV32IBT-NEXT: .LBB5_6: +; RV32IBT-NEXT: sll a3, a0, a4 +; RV32IBT-NEXT: j .LBB5_10 +; RV32IBT-NEXT: .LBB5_7: +; RV32IBT-NEXT: srl t3, a3, t2 +; RV32IBT-NEXT: or t0, t0, t3 +; RV32IBT-NEXT: bltz t4, .LBB5_5 +; RV32IBT-NEXT: .LBB5_8: +; RV32IBT-NEXT: srl a2, a3, t4 +; RV32IBT-NEXT: cmov a1, t1, t0, a1 +; RV32IBT-NEXT: bltz a6, .LBB5_6 +; RV32IBT-NEXT: .LBB5_9: +; RV32IBT-NEXT: mv a3, zero +; RV32IBT-NEXT: .LBB5_10: +; RV32IBT-NEXT: or a2, a3, a2 +; RV32IBT-NEXT: cmov a0, t1, a2, a0 +; RV32IBT-NEXT: ret + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define i32 @fshr_i32(i32 %a, i32 %b, i32 %c) nounwind { +; RV32I-LABEL: fshr_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 31 +; RV32I-NEXT: beqz a3, .LBB6_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: addi a2, zero, 32 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshr_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: fsr a0, a0, a2, a1 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshr_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: fsr a0, a0, a2, a1 +; RV32IBT-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet an efficient pattern-matching with bit manipulation +; instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions that can match more efficiently this pattern. + +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: fshr_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv t1, a3 +; RV32I-NEXT: mv a6, a2 +; RV32I-NEXT: andi a5, a4, 63 +; RV32I-NEXT: addi t2, a5, -32 +; RV32I-NEXT: addi a7, zero, 31 +; RV32I-NEXT: bltz t2, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl t0, t1, t2 +; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: srl t0, a6, a4 +; RV32I-NEXT: sub a3, a7, a5 +; RV32I-NEXT: slli a2, t1, 1 +; RV32I-NEXT: sll a2, a2, a3 +; RV32I-NEXT: or t0, t0, a2 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: addi a2, zero, 32 +; RV32I-NEXT: sub a3, a2, a5 +; RV32I-NEXT: addi a2, zero, 64 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: bltz a3, .LBB7_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: mv t3, zero +; RV32I-NEXT: bnez a5, .LBB7_6 +; RV32I-NEXT: j .LBB7_7 +; RV32I-NEXT: .LBB7_5: +; RV32I-NEXT: sll t3, a0, a2 +; RV32I-NEXT: beqz a5, .LBB7_7 +; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: or a6, t3, t0 +; RV32I-NEXT: .LBB7_7: +; RV32I-NEXT: bltz a3, .LBB7_10 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: sll a0, a0, a3 +; RV32I-NEXT: bgez t2, .LBB7_11 +; RV32I-NEXT: .LBB7_9: +; RV32I-NEXT: srl a1, t1, a4 +; RV32I-NEXT: bnez a5, .LBB7_12 +; RV32I-NEXT: j .LBB7_13 +; RV32I-NEXT: .LBB7_10: +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: sub a2, a7, a2 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: bltz t2, .LBB7_9 +; RV32I-NEXT: .LBB7_11: +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: beqz a5, .LBB7_13 +; RV32I-NEXT: .LBB7_12: +; RV32I-NEXT: or t1, a0, a1 +; RV32I-NEXT: .LBB7_13: +; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: mv a1, t1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshr_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi t1, a4, 63 +; RV32IB-NEXT: addi a6, t1, -32 +; RV32IB-NEXT: addi a7, zero, 31 +; RV32IB-NEXT: bltz a6, .LBB7_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: srl t0, a3, a6 +; RV32IB-NEXT: j .LBB7_3 +; RV32IB-NEXT: .LBB7_2: +; RV32IB-NEXT: srl t0, a2, a4 +; RV32IB-NEXT: sub t2, a7, t1 +; RV32IB-NEXT: slli a5, a3, 1 +; RV32IB-NEXT: sll a5, a5, t2 +; RV32IB-NEXT: or t0, t0, a5 +; RV32IB-NEXT: .LBB7_3: +; RV32IB-NEXT: addi a5, zero, 32 +; RV32IB-NEXT: sub t4, a5, t1 +; RV32IB-NEXT: addi a5, zero, 64 +; RV32IB-NEXT: sub t2, a5, t1 +; RV32IB-NEXT: bltz t4, .LBB7_7 +; RV32IB-NEXT: # %bb.4: +; RV32IB-NEXT: mv t3, zero +; RV32IB-NEXT: or t0, t3, t0 +; RV32IB-NEXT: bgez t4, .LBB7_8 +; RV32IB-NEXT: .LBB7_5: +; RV32IB-NEXT: sll a1, a1, t2 +; RV32IB-NEXT: sub a5, a7, t2 +; RV32IB-NEXT: srli a0, a0, 1 +; RV32IB-NEXT: srl a0, a0, a5 +; RV32IB-NEXT: or a1, a1, a0 +; RV32IB-NEXT: cmov a0, t1, t0, a2 +; RV32IB-NEXT: bgez a6, .LBB7_9 +; RV32IB-NEXT: .LBB7_6: +; RV32IB-NEXT: srl a2, a3, a4 +; RV32IB-NEXT: j .LBB7_10 +; RV32IB-NEXT: .LBB7_7: +; RV32IB-NEXT: sll t3, a0, t2 +; RV32IB-NEXT: or t0, t3, t0 +; RV32IB-NEXT: bltz t4, .LBB7_5 +; RV32IB-NEXT: .LBB7_8: +; RV32IB-NEXT: sll a1, a0, t4 +; RV32IB-NEXT: cmov a0, t1, t0, a2 +; RV32IB-NEXT: bltz a6, .LBB7_6 +; RV32IB-NEXT: .LBB7_9: +; RV32IB-NEXT: mv a2, zero +; RV32IB-NEXT: .LBB7_10: +; RV32IB-NEXT: or a1, a1, a2 +; RV32IB-NEXT: cmov a1, t1, a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshr_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: andi t1, a4, 63 +; RV32IBT-NEXT: addi a6, t1, -32 +; RV32IBT-NEXT: addi a7, zero, 31 +; RV32IBT-NEXT: bltz a6, .LBB7_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT: srl t0, a3, a6 +; RV32IBT-NEXT: j .LBB7_3 +; RV32IBT-NEXT: .LBB7_2: +; RV32IBT-NEXT: srl t0, a2, a4 +; RV32IBT-NEXT: sub t2, a7, t1 +; RV32IBT-NEXT: slli a5, a3, 1 +; RV32IBT-NEXT: sll a5, a5, t2 +; RV32IBT-NEXT: or t0, t0, a5 +; RV32IBT-NEXT: .LBB7_3: +; RV32IBT-NEXT: addi a5, zero, 32 +; RV32IBT-NEXT: sub t4, a5, t1 +; RV32IBT-NEXT: addi a5, zero, 64 +; RV32IBT-NEXT: sub t2, a5, t1 +; RV32IBT-NEXT: bltz t4, .LBB7_7 +; RV32IBT-NEXT: # %bb.4: +; RV32IBT-NEXT: mv t3, zero +; RV32IBT-NEXT: or t0, t3, t0 +; RV32IBT-NEXT: bgez t4, .LBB7_8 +; RV32IBT-NEXT: .LBB7_5: +; RV32IBT-NEXT: sll a1, a1, t2 +; RV32IBT-NEXT: sub a5, a7, t2 +; RV32IBT-NEXT: srli a0, a0, 1 +; RV32IBT-NEXT: srl a0, a0, a5 +; RV32IBT-NEXT: or a1, a1, a0 +; RV32IBT-NEXT: cmov a0, t1, t0, a2 +; RV32IBT-NEXT: bgez a6, .LBB7_9 +; RV32IBT-NEXT: .LBB7_6: +; RV32IBT-NEXT: srl a2, a3, a4 +; RV32IBT-NEXT: j .LBB7_10 +; RV32IBT-NEXT: .LBB7_7: +; RV32IBT-NEXT: sll t3, a0, t2 +; RV32IBT-NEXT: or t0, t3, t0 +; RV32IBT-NEXT: bltz t4, .LBB7_5 +; RV32IBT-NEXT: .LBB7_8: +; RV32IBT-NEXT: sll a1, a0, t4 +; RV32IBT-NEXT: cmov a0, t1, t0, a2 +; RV32IBT-NEXT: bltz a6, .LBB7_6 +; RV32IBT-NEXT: .LBB7_9: +; RV32IBT-NEXT: mv a2, zero +; RV32IBT-NEXT: .LBB7_10: +; RV32IBT-NEXT: or a1, a1, a2 +; RV32IBT-NEXT: cmov a1, t1, a1, a3 +; RV32IBT-NEXT: ret + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +define i32 @fshri_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: fshri_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a1, 5 +; RV32I-NEXT: slli a0, a0, 27 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshri_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: fsri a0, a0, a1, 5 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshri_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: fsri a0, a0, a1, 5 +; RV32IBT-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5) + ret i32 %1 +} + +define i64 @fshri_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: fshri_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a3, 27 +; RV32I-NEXT: srli a2, a2, 5 +; RV32I-NEXT: or a2, a2, a1 +; RV32I-NEXT: srli a1, a3, 5 +; RV32I-NEXT: slli a0, a0, 27 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: fshri_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a1, zero, 27 +; RV32IB-NEXT: fsl a2, a3, a1, a2 +; RV32IB-NEXT: fsl a1, a0, a1, a3 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBT-LABEL: fshri_i64: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT: addi a1, zero, 27 +; RV32IBT-NEXT: fsl a2, a3, a1, a2 +; RV32IBT-NEXT: fsl a1, a0, a1, a3 +; RV32IBT-NEXT: mv a0, a2 +; RV32IBT-NEXT: ret + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 5) + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll @@ -0,0 +1,266 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBT + +define signext i32 @cmix_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { +; RV64I-LABEL: cmix_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cmix_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: cmix a0, a1, a0, a2 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: cmix_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: cmix a0, a1, a0, a2 +; RV64IBT-NEXT: ret + %and = and i32 %b, %a + %neg = xor i32 %b, -1 + %and1 = and i32 %neg, %c + %or = or i32 %and1, %and + ret i32 %or +} + +define i64 @cmix_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV64I-LABEL: cmix_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cmix_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: cmix a0, a1, a0, a2 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: cmix_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: cmix a0, a1, a0, a2 +; RV64IBT-NEXT: ret + %and = and i64 %b, %a + %neg = xor i64 %b, -1 + %and1 = and i64 %neg, %c + %or = or i64 %and1, %and + ret i64 %or +} + +define signext i32 @cmov_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { +; RV64I-LABEL: cmov_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a1, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cmov_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: cmov a0, a1, a0, a2 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: cmov_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: cmov a0, a1, a0, a2 +; RV64IBT-NEXT: ret + %tobool.not = icmp eq i32 %b, 0 + %cond = select i1 %tobool.not, i32 %c, i32 %a + ret i32 %cond +} + +define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV64I-LABEL: cmov_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a1, .LBB3_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: .LBB3_2: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cmov_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: cmov a0, a1, a0, a2 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: cmov_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: cmov a0, a1, a0, a2 +; RV64IBT-NEXT: ret + %tobool.not = icmp eq i64 %b, 0 + %cond = select i1 %tobool.not, i64 %c, i64 %a + ret i64 %cond +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { +; RV64I-LABEL: fshl_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a3, a2, 31 +; RV64I-NEXT: beqz a3, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: addi a4, zero, 32 +; RV64I-NEXT: sub a2, a4, a2 +; RV64I-NEXT: srlw a1, a1, a2 +; RV64I-NEXT: sllw a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshl_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fslw a0, a0, a2, a1 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshl_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fslw a0, a0, a2, a1 +; RV64IBT-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV64I-LABEL: fshl_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a3, a2, 63 +; RV64I-NEXT: beqz a3, .LBB5_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 64 +; RV64I-NEXT: sub a2, a2, a3 +; RV64I-NEXT: srl a1, a1, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: .LBB5_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshl_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsl a0, a0, a2, a1 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshl_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsl a0, a0, a2, a1 +; RV64IBT-NEXT: ret + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { +; RV64I-LABEL: fshr_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a3, a2, 31 +; RV64I-NEXT: beqz a3, .LBB6_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: srlw a1, a1, a3 +; RV64I-NEXT: addi a3, zero, 32 +; RV64I-NEXT: sub a2, a3, a2 +; RV64I-NEXT: sllw a0, a0, a2 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshr_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsrw a0, a0, a2, a1 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshr_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsrw a0, a0, a2, a1 +; RV64IBT-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) + ret i32 %1 +} + +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { +; RV64I-LABEL: fshr_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a3, a2, 63 +; RV64I-NEXT: beqz a3, .LBB7_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: srl a1, a1, a2 +; RV64I-NEXT: addi a2, zero, 64 +; RV64I-NEXT: sub a2, a2, a3 +; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshr_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsr a0, a0, a2, a1 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshr_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsr a0, a0, a2, a1 +; RV64IBT-NEXT: ret + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) + ret i64 %1 +} + +define signext i32 @fshri_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: fshri_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a1, 5 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshri_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsriw a0, a0, a1, 5 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshri_i32: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsriw a0, a0, a1, 5 +; RV64IBT-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5) + ret i32 %1 +} + +define i64 @fshri_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: fshri_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a1, 5 +; RV64I-NEXT: slli a0, a0, 59 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: fshri_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsri a0, a0, a1, 5 +; RV64IB-NEXT: ret +; +; RV64IBT-LABEL: fshri_i64: +; RV64IBT: # %bb.0: +; RV64IBT-NEXT: fsri a0, a0, a1, 5 +; RV64IBT-NEXT: ret + %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 5) + ret i64 %1 +}