diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -47,9 +47,11 @@ bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt); bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt); bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt); bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt); bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt); // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -272,6 +272,44 @@ return false; } +// Check that it is a RORI (Rotate Right Immediate). We first check that +// it is the right node tree: +// +// (ROTL RS1, VC) +// +// The compiler translates immediate rotations to the right given by the call +// to the rotateright32/rotateright64 intrinsics as rotations to the left. +// Since the rotation to the left can be easily emulated as a rotation to the +// right by negating the constant, there is no encoding for ROLI. +// We then select the immediate left rotations as RORI by the complementary +// constant: +// +// Shamt == XLen - VC + +bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) { + MVT XLenVT = Subtarget->getXLenVT(); + if (N.getOpcode() == ISD::ROTL) { + if (isa(N.getOperand(1))) { + if (XLenVT == MVT::i64) { + uint64_t VC = N.getConstantOperandVal(1); + Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N), + N.getOperand(1).getValueType()); + RS1 = N.getOperand(0); + return true; + } + if (XLenVT == MVT::i32) { + uint32_t VC = N.getConstantOperandVal(1); + Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N), + N.getOperand(1).getValueType()); + RS1 = N.getOperand(0); + return true; + } + } + } + return false; +} + + // Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32 // on RV64). // SLLIUW is the same as SLLI except for the fact that it clears the bits @@ -374,6 +412,53 @@ return false; } +// Check that it is a RORIW (i32 Right Rotate Immediate on RV64). +// We first check that it is the right node tree: +// +// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2), +// (SRL (AND (AssertSext RS2, i32), VC3), VC1))) +// +// Then we check that the constant operands respect these constraints: +// +// VC2 == 32 - VC1 +// VC3 == maskLeadingOnes(VC2) +// +// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32 +// and VC3 a 32 bit mask of (32 - VC1) leading ones. + +bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) { + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && + Subtarget->getXLenVT() == MVT::i64 && + cast(N.getOperand(1))->getVT() == MVT::i32) { + if (N.getOperand(0).getOpcode() == ISD::OR) { + SDValue Or = N.getOperand(0); + if (Or.getOperand(0).getOpcode() == ISD::SHL && + Or.getOperand(1).getOpcode() == ISD::SRL) { + SDValue Shl = Or.getOperand(0); + SDValue Srl = Or.getOperand(1); + if (Srl.getOperand(0).getOpcode() == ISD::AND) { + SDValue And = Srl.getOperand(0); + if (isa(Srl.getOperand(1)) && + isa(Shl.getOperand(1)) && + isa(And.getOperand(1))) { + uint32_t VC1 = Srl.getConstantOperandVal(1); + uint32_t VC2 = Shl.getConstantOperandVal(1); + uint32_t VC3 = And.getConstantOperandVal(1); + if (VC2 == (32 - VC1) && + VC3 == maskLeadingOnes(VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + } + return false; +} + // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -149,8 +149,10 @@ setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); - setOperationAction(ISD::ROTL, XLenVT, Expand); - setOperationAction(ISD::ROTR, XLenVT, Expand); + if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) { + setOperationAction(ISD::ROTL, XLenVT, Expand); + setOperationAction(ISD::ROTR, XLenVT, Expand); + } if (!Subtarget.hasStdExtZbp()) setOperationAction(ISD::BSWAP, XLenVT, Expand); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -638,21 +638,46 @@ //===----------------------------------------------------------------------===// def SLOIPat : ComplexPattern; def SROIPat : ComplexPattern; +def RORIPat : ComplexPattern; def SLLIUWPat : ComplexPattern; def SLOIWPat : ComplexPattern; def SROIWPat : ComplexPattern; +def RORIWPat : ComplexPattern; + +let Predicates = [HasStdExtZbbOrZbp] in { +def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>; +def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbbOrZbp] let Predicates = [HasStdExtZbb] in { def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1), (SLO GPR:$rs1, GPR:$rs2)>; def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1), (SRO GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbb] + +let Predicates = [HasStdExtZbbOrZbp] in { +def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>; +def : Pat<(fshl GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>; +def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>; +def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbbOrZbp] + +let Predicates = [HasStdExtZbb] in { def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt), (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>; def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt), (SROI GPR:$rs1, uimmlog2xlen:$shamt)>; } // Predicates = [HasStdExtZbb] +// There's no encoding for roli in the current version of the 'B' extension +// (v0.92) as it can be implemented with rori by negating the immediate. +// For this reason we pattern-match only against rori[w]. +let Predicates = [HasStdExtZbbOrZbp] in +def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt), + (RORI GPR:$rs1, uimmlog2xlen:$shamt)>; + let Predicates = [HasStdExtZbp, IsRV32] in { def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1), (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))), @@ -772,6 +797,23 @@ (MAXU GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbb] +let Predicates = [HasStdExtZbbOrZbp, IsRV32] in +def : Pat<(or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16))), + (PACK GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +def : Pat<(or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32))), + (PACK GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp, IsRV32] in +def : Pat<(or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16))), + (PACKU GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +def : Pat<(or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))), + (PACKU GPR:$rs1, GPR:$rs2)>; +let Predicates = [HasStdExtZbbOrZbp] in +def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFF00), + (and GPR:$rs1, 0x00FF)), + (PACKH GPR:$rs1, GPR:$rs2)>; + let Predicates = [HasStdExtZbp, IsRV32] in { def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)), (and GPR:$rs1, (i32 0xFF0000FF))), @@ -831,12 +873,30 @@ (SLOW GPR:$rs1, GPR:$rs2)>; def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1), (SROW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbb, IsRV64] + +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)), + (riscv_srlw (assertsexti32 GPR:$rs1), + (sub (i64 0), (assertsexti32 GPR:$rs2)))), + (ROLW GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), + (sub (i64 0), (assertsexti32 GPR:$rs2))), + (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2))), + (RORW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbbOrZbp, IsRV64] + +let Predicates = [HasStdExtZbb, IsRV64] in { def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt), (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>; def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt), (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>; } // Predicates = [HasStdExtZbb, IsRV64] +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt), + (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>; + let Predicates = [HasStdExtZbp, IsRV64] in { def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)), GPR:$rs1), @@ -898,3 +958,14 @@ // RV64 CTZ def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>; } // Predicates = [HasStdExtZbb, IsRV64] + +let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +def : Pat<(sext_inreg (or (shl (assertsexti32 GPR:$rs2), (i64 16)), + (and (assertsexti32 GPR:$rs1), 0x000000000000FFFF)), + i32), + (PACKW GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000), + (srl (and (assertsexti32 GPR:$rs1), 0x00000000FFFF0000), + (i64 16))), + (PACKUW GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbbOrZbp, IsRV64] diff --git a/llvm/test/CodeGen/RISCV/rv32Zbbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll @@ -0,0 +1,892 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBP + +define i32 @andn_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: andn_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: andn_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andn a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: andn_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: andn a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: andn_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: andn a0, a0, a1 +; RV32IBP-NEXT: ret + %neg = xor i32 %b, -1 + %and = and i32 %neg, %a + ret i32 %and +} + +define i64 @andn_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: andn_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: andn_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andn a0, a0, a2 +; RV32IB-NEXT: andn a1, a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: andn_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: andn a0, a0, a2 +; RV32IBB-NEXT: andn a1, a1, a3 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: andn_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: andn a0, a0, a2 +; RV32IBP-NEXT: andn a1, a1, a3 +; RV32IBP-NEXT: ret + %neg = xor i64 %b, -1 + %and = and i64 %neg, %a + ret i64 %and +} + +define i32 @orn_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: orn_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: orn_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orn a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: orn_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: orn a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: orn_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orn a0, a0, a1 +; RV32IBP-NEXT: ret + %neg = xor i32 %b, -1 + %or = or i32 %neg, %a + ret i32 %or +} + +define i64 @orn_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: orn_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: orn_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orn a0, a0, a2 +; RV32IB-NEXT: orn a1, a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: orn_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: orn a0, a0, a2 +; RV32IBB-NEXT: orn a1, a1, a3 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: orn_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: orn a0, a0, a2 +; RV32IBP-NEXT: orn a1, a1, a3 +; RV32IBP-NEXT: ret + %neg = xor i64 %b, -1 + %or = or i64 %neg, %a + ret i64 %or +} + +define i32 @xnor_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: xnor_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: xnor_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: xnor a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: xnor_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: xnor a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: xnor_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: xnor a0, a0, a1 +; RV32IBP-NEXT: ret + %neg = xor i32 %a, -1 + %xor = xor i32 %neg, %b + ret i32 %xor +} + +define i64 @xnor_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: xnor_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: xor a1, a1, a3 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: xnor_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: xnor a0, a0, a2 +; RV32IB-NEXT: xnor a1, a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: xnor_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: xnor a0, a0, a2 +; RV32IBB-NEXT: xnor a1, a1, a3 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: xnor_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: xnor a0, a0, a2 +; RV32IBP-NEXT: xnor a1, a1, a3 +; RV32IBP-NEXT: ret + %neg = xor i64 %a, -1 + %xor = xor i64 %neg, %b + ret i64 %xor +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define i32 @rol_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: rol_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: sll a2, a0, a1 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: rol_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rol a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: rol_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: rol a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: rol_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rol a0, a0, a1 +; RV32IBP-NEXT: ret + %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b) + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @rol_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: rol_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi t1, a3, -32 +; RV32I-NEXT: addi a6, zero, 31 +; RV32I-NEXT: bltz t1, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll a7, a0, t1 +; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: sll a4, a1, a2 +; RV32I-NEXT: sub a3, a6, a3 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a3, a5, a3 +; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: andi a5, a4, 63 +; RV32I-NEXT: addi a3, a5, -32 +; RV32I-NEXT: bltz a3, .LBB7_7 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: mv t0, zero +; RV32I-NEXT: bgez a3, .LBB7_8 +; RV32I-NEXT: .LBB7_5: +; RV32I-NEXT: srl a3, a0, a4 +; RV32I-NEXT: sub a4, a6, a5 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: or a4, a3, a1 +; RV32I-NEXT: or a1, a7, t0 +; RV32I-NEXT: bgez t1, .LBB7_9 +; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB7_7: +; RV32I-NEXT: srl t0, a1, a4 +; RV32I-NEXT: bltz a3, .LBB7_5 +; RV32I-NEXT: .LBB7_8: +; RV32I-NEXT: srl a4, a1, a3 +; RV32I-NEXT: or a1, a7, t0 +; RV32I-NEXT: bltz t1, .LBB7_6 +; RV32I-NEXT: .LBB7_9: +; RV32I-NEXT: or a0, zero, a4 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: rol_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi a3, a2, 63 +; RV32IB-NEXT: addi t1, a3, -32 +; RV32IB-NEXT: addi a6, zero, 31 +; RV32IB-NEXT: bltz t1, .LBB7_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: sll a7, a0, t1 +; RV32IB-NEXT: j .LBB7_3 +; RV32IB-NEXT: .LBB7_2: +; RV32IB-NEXT: sll a4, a1, a2 +; RV32IB-NEXT: sub a3, a6, a3 +; RV32IB-NEXT: srli a5, a0, 1 +; RV32IB-NEXT: srl a3, a5, a3 +; RV32IB-NEXT: or a7, a4, a3 +; RV32IB-NEXT: .LBB7_3: +; RV32IB-NEXT: neg a4, a2 +; RV32IB-NEXT: andi a5, a4, 63 +; RV32IB-NEXT: addi a3, a5, -32 +; RV32IB-NEXT: bltz a3, .LBB7_7 +; RV32IB-NEXT: # %bb.4: +; RV32IB-NEXT: mv t0, zero +; RV32IB-NEXT: bgez a3, .LBB7_8 +; RV32IB-NEXT: .LBB7_5: +; RV32IB-NEXT: srl a3, a0, a4 +; RV32IB-NEXT: sub a4, a6, a5 +; RV32IB-NEXT: slli a1, a1, 1 +; RV32IB-NEXT: sll a1, a1, a4 +; RV32IB-NEXT: or a4, a3, a1 +; RV32IB-NEXT: or a1, a7, t0 +; RV32IB-NEXT: bgez t1, .LBB7_9 +; RV32IB-NEXT: .LBB7_6: +; RV32IB-NEXT: sll a0, a0, a2 +; RV32IB-NEXT: or a0, a0, a4 +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB7_7: +; RV32IB-NEXT: srl t0, a1, a4 +; RV32IB-NEXT: bltz a3, .LBB7_5 +; RV32IB-NEXT: .LBB7_8: +; RV32IB-NEXT: srl a4, a1, a3 +; RV32IB-NEXT: or a1, a7, t0 +; RV32IB-NEXT: bltz t1, .LBB7_6 +; RV32IB-NEXT: .LBB7_9: +; RV32IB-NEXT: or a0, zero, a4 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: rol_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: andi a3, a2, 63 +; RV32IBB-NEXT: addi t1, a3, -32 +; RV32IBB-NEXT: addi a6, zero, 31 +; RV32IBB-NEXT: bltz t1, .LBB7_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: sll a7, a0, t1 +; RV32IBB-NEXT: j .LBB7_3 +; RV32IBB-NEXT: .LBB7_2: +; RV32IBB-NEXT: sll a4, a1, a2 +; RV32IBB-NEXT: sub a3, a6, a3 +; RV32IBB-NEXT: srli a5, a0, 1 +; RV32IBB-NEXT: srl a3, a5, a3 +; RV32IBB-NEXT: or a7, a4, a3 +; RV32IBB-NEXT: .LBB7_3: +; RV32IBB-NEXT: neg a4, a2 +; RV32IBB-NEXT: andi a5, a4, 63 +; RV32IBB-NEXT: addi a3, a5, -32 +; RV32IBB-NEXT: bltz a3, .LBB7_7 +; RV32IBB-NEXT: # %bb.4: +; RV32IBB-NEXT: mv t0, zero +; RV32IBB-NEXT: bgez a3, .LBB7_8 +; RV32IBB-NEXT: .LBB7_5: +; RV32IBB-NEXT: srl a3, a0, a4 +; RV32IBB-NEXT: sub a4, a6, a5 +; RV32IBB-NEXT: slli a1, a1, 1 +; RV32IBB-NEXT: sll a1, a1, a4 +; RV32IBB-NEXT: or a4, a3, a1 +; RV32IBB-NEXT: or a1, a7, t0 +; RV32IBB-NEXT: bgez t1, .LBB7_9 +; RV32IBB-NEXT: .LBB7_6: +; RV32IBB-NEXT: sll a0, a0, a2 +; RV32IBB-NEXT: or a0, a0, a4 +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB7_7: +; RV32IBB-NEXT: srl t0, a1, a4 +; RV32IBB-NEXT: bltz a3, .LBB7_5 +; RV32IBB-NEXT: .LBB7_8: +; RV32IBB-NEXT: srl a4, a1, a3 +; RV32IBB-NEXT: or a1, a7, t0 +; RV32IBB-NEXT: bltz t1, .LBB7_6 +; RV32IBB-NEXT: .LBB7_9: +; RV32IBB-NEXT: or a0, zero, a4 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: rol_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: andi a3, a2, 63 +; RV32IBP-NEXT: addi t1, a3, -32 +; RV32IBP-NEXT: addi a6, zero, 31 +; RV32IBP-NEXT: bltz t1, .LBB7_2 +; RV32IBP-NEXT: # %bb.1: +; RV32IBP-NEXT: sll a7, a0, t1 +; RV32IBP-NEXT: j .LBB7_3 +; RV32IBP-NEXT: .LBB7_2: +; RV32IBP-NEXT: sll a4, a1, a2 +; RV32IBP-NEXT: sub a3, a6, a3 +; RV32IBP-NEXT: srli a5, a0, 1 +; RV32IBP-NEXT: srl a3, a5, a3 +; RV32IBP-NEXT: or a7, a4, a3 +; RV32IBP-NEXT: .LBB7_3: +; RV32IBP-NEXT: neg a4, a2 +; RV32IBP-NEXT: andi a5, a4, 63 +; RV32IBP-NEXT: addi a3, a5, -32 +; RV32IBP-NEXT: bltz a3, .LBB7_7 +; RV32IBP-NEXT: # %bb.4: +; RV32IBP-NEXT: mv t0, zero +; RV32IBP-NEXT: bgez a3, .LBB7_8 +; RV32IBP-NEXT: .LBB7_5: +; RV32IBP-NEXT: srl a3, a0, a4 +; RV32IBP-NEXT: sub a4, a6, a5 +; RV32IBP-NEXT: slli a1, a1, 1 +; RV32IBP-NEXT: sll a1, a1, a4 +; RV32IBP-NEXT: or a4, a3, a1 +; RV32IBP-NEXT: or a1, a7, t0 +; RV32IBP-NEXT: bgez t1, .LBB7_9 +; RV32IBP-NEXT: .LBB7_6: +; RV32IBP-NEXT: sll a0, a0, a2 +; RV32IBP-NEXT: or a0, a0, a4 +; RV32IBP-NEXT: ret +; RV32IBP-NEXT: .LBB7_7: +; RV32IBP-NEXT: srl t0, a1, a4 +; RV32IBP-NEXT: bltz a3, .LBB7_5 +; RV32IBP-NEXT: .LBB7_8: +; RV32IBP-NEXT: srl a4, a1, a3 +; RV32IBP-NEXT: or a1, a7, t0 +; RV32IBP-NEXT: bltz t1, .LBB7_6 +; RV32IBP-NEXT: .LBB7_9: +; RV32IBP-NEXT: or a0, zero, a4 +; RV32IBP-NEXT: ret + %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b) + ret i64 %or +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define i32 @ror_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: ror_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srl a2, a0, a1 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ror_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: ror a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ror_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: ror a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: ror_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: ror a0, a0, a1 +; RV32IBP-NEXT: ret + %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b) + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define i64 @ror_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: ror_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi t1, a3, -32 +; RV32I-NEXT: addi a6, zero, 31 +; RV32I-NEXT: bltz t1, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a7, a1, t1 +; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: srl a4, a0, a2 +; RV32I-NEXT: sub a3, a6, a3 +; RV32I-NEXT: slli a5, a1, 1 +; RV32I-NEXT: sll a3, a5, a3 +; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: .LBB9_3: +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: andi a5, a4, 63 +; RV32I-NEXT: addi a3, a5, -32 +; RV32I-NEXT: bltz a3, .LBB9_7 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: mv t0, zero +; RV32I-NEXT: bgez a3, .LBB9_8 +; RV32I-NEXT: .LBB9_5: +; RV32I-NEXT: sll a3, a1, a4 +; RV32I-NEXT: sub a4, a6, a5 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: or a4, a3, a0 +; RV32I-NEXT: or a0, t0, a7 +; RV32I-NEXT: bgez t1, .LBB9_9 +; RV32I-NEXT: .LBB9_6: +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB9_7: +; RV32I-NEXT: sll t0, a0, a4 +; RV32I-NEXT: bltz a3, .LBB9_5 +; RV32I-NEXT: .LBB9_8: +; RV32I-NEXT: sll a4, a0, a3 +; RV32I-NEXT: or a0, t0, a7 +; RV32I-NEXT: bltz t1, .LBB9_6 +; RV32I-NEXT: .LBB9_9: +; RV32I-NEXT: or a1, a4, zero +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ror_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: andi a3, a2, 63 +; RV32IB-NEXT: addi t1, a3, -32 +; RV32IB-NEXT: addi a6, zero, 31 +; RV32IB-NEXT: bltz t1, .LBB9_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: srl a7, a1, t1 +; RV32IB-NEXT: j .LBB9_3 +; RV32IB-NEXT: .LBB9_2: +; RV32IB-NEXT: srl a4, a0, a2 +; RV32IB-NEXT: sub a3, a6, a3 +; RV32IB-NEXT: slli a5, a1, 1 +; RV32IB-NEXT: sll a3, a5, a3 +; RV32IB-NEXT: or a7, a4, a3 +; RV32IB-NEXT: .LBB9_3: +; RV32IB-NEXT: neg a4, a2 +; RV32IB-NEXT: andi a5, a4, 63 +; RV32IB-NEXT: addi a3, a5, -32 +; RV32IB-NEXT: bltz a3, .LBB9_7 +; RV32IB-NEXT: # %bb.4: +; RV32IB-NEXT: mv t0, zero +; RV32IB-NEXT: bgez a3, .LBB9_8 +; RV32IB-NEXT: .LBB9_5: +; RV32IB-NEXT: sll a3, a1, a4 +; RV32IB-NEXT: sub a4, a6, a5 +; RV32IB-NEXT: srli a0, a0, 1 +; RV32IB-NEXT: srl a0, a0, a4 +; RV32IB-NEXT: or a4, a3, a0 +; RV32IB-NEXT: or a0, t0, a7 +; RV32IB-NEXT: bgez t1, .LBB9_9 +; RV32IB-NEXT: .LBB9_6: +; RV32IB-NEXT: srl a1, a1, a2 +; RV32IB-NEXT: or a1, a4, a1 +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB9_7: +; RV32IB-NEXT: sll t0, a0, a4 +; RV32IB-NEXT: bltz a3, .LBB9_5 +; RV32IB-NEXT: .LBB9_8: +; RV32IB-NEXT: sll a4, a0, a3 +; RV32IB-NEXT: or a0, t0, a7 +; RV32IB-NEXT: bltz t1, .LBB9_6 +; RV32IB-NEXT: .LBB9_9: +; RV32IB-NEXT: or a1, a4, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ror_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: andi a3, a2, 63 +; RV32IBB-NEXT: addi t1, a3, -32 +; RV32IBB-NEXT: addi a6, zero, 31 +; RV32IBB-NEXT: bltz t1, .LBB9_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: srl a7, a1, t1 +; RV32IBB-NEXT: j .LBB9_3 +; RV32IBB-NEXT: .LBB9_2: +; RV32IBB-NEXT: srl a4, a0, a2 +; RV32IBB-NEXT: sub a3, a6, a3 +; RV32IBB-NEXT: slli a5, a1, 1 +; RV32IBB-NEXT: sll a3, a5, a3 +; RV32IBB-NEXT: or a7, a4, a3 +; RV32IBB-NEXT: .LBB9_3: +; RV32IBB-NEXT: neg a4, a2 +; RV32IBB-NEXT: andi a5, a4, 63 +; RV32IBB-NEXT: addi a3, a5, -32 +; RV32IBB-NEXT: bltz a3, .LBB9_7 +; RV32IBB-NEXT: # %bb.4: +; RV32IBB-NEXT: mv t0, zero +; RV32IBB-NEXT: bgez a3, .LBB9_8 +; RV32IBB-NEXT: .LBB9_5: +; RV32IBB-NEXT: sll a3, a1, a4 +; RV32IBB-NEXT: sub a4, a6, a5 +; RV32IBB-NEXT: srli a0, a0, 1 +; RV32IBB-NEXT: srl a0, a0, a4 +; RV32IBB-NEXT: or a4, a3, a0 +; RV32IBB-NEXT: or a0, t0, a7 +; RV32IBB-NEXT: bgez t1, .LBB9_9 +; RV32IBB-NEXT: .LBB9_6: +; RV32IBB-NEXT: srl a1, a1, a2 +; RV32IBB-NEXT: or a1, a4, a1 +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB9_7: +; RV32IBB-NEXT: sll t0, a0, a4 +; RV32IBB-NEXT: bltz a3, .LBB9_5 +; RV32IBB-NEXT: .LBB9_8: +; RV32IBB-NEXT: sll a4, a0, a3 +; RV32IBB-NEXT: or a0, t0, a7 +; RV32IBB-NEXT: bltz t1, .LBB9_6 +; RV32IBB-NEXT: .LBB9_9: +; RV32IBB-NEXT: or a1, a4, zero +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: ror_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: andi a3, a2, 63 +; RV32IBP-NEXT: addi t1, a3, -32 +; RV32IBP-NEXT: addi a6, zero, 31 +; RV32IBP-NEXT: bltz t1, .LBB9_2 +; RV32IBP-NEXT: # %bb.1: +; RV32IBP-NEXT: srl a7, a1, t1 +; RV32IBP-NEXT: j .LBB9_3 +; RV32IBP-NEXT: .LBB9_2: +; RV32IBP-NEXT: srl a4, a0, a2 +; RV32IBP-NEXT: sub a3, a6, a3 +; RV32IBP-NEXT: slli a5, a1, 1 +; RV32IBP-NEXT: sll a3, a5, a3 +; RV32IBP-NEXT: or a7, a4, a3 +; RV32IBP-NEXT: .LBB9_3: +; RV32IBP-NEXT: neg a4, a2 +; RV32IBP-NEXT: andi a5, a4, 63 +; RV32IBP-NEXT: addi a3, a5, -32 +; RV32IBP-NEXT: bltz a3, .LBB9_7 +; RV32IBP-NEXT: # %bb.4: +; RV32IBP-NEXT: mv t0, zero +; RV32IBP-NEXT: bgez a3, .LBB9_8 +; RV32IBP-NEXT: .LBB9_5: +; RV32IBP-NEXT: sll a3, a1, a4 +; RV32IBP-NEXT: sub a4, a6, a5 +; RV32IBP-NEXT: srli a0, a0, 1 +; RV32IBP-NEXT: srl a0, a0, a4 +; RV32IBP-NEXT: or a4, a3, a0 +; RV32IBP-NEXT: or a0, t0, a7 +; RV32IBP-NEXT: bgez t1, .LBB9_9 +; RV32IBP-NEXT: .LBB9_6: +; RV32IBP-NEXT: srl a1, a1, a2 +; RV32IBP-NEXT: or a1, a4, a1 +; RV32IBP-NEXT: ret +; RV32IBP-NEXT: .LBB9_7: +; RV32IBP-NEXT: sll t0, a0, a4 +; RV32IBP-NEXT: bltz a3, .LBB9_5 +; RV32IBP-NEXT: .LBB9_8: +; RV32IBP-NEXT: sll a4, a0, a3 +; RV32IBP-NEXT: or a0, t0, a7 +; RV32IBP-NEXT: bltz t1, .LBB9_6 +; RV32IBP-NEXT: .LBB9_9: +; RV32IBP-NEXT: or a1, a4, zero +; RV32IBP-NEXT: ret + %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) + ret i64 %or +} + +define i32 @rori_i32(i32 %a) nounwind { +; RV32I-LABEL: rori_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: rori_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rori a0, a0, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: rori_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: rori a0, a0, 1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: rori_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: rori a0, a0, 1 +; RV32IBP-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31) + ret i32 %1 +} + +define i64 @rori_i64(i64 %a) nounwind { +; RV32I-LABEL: rori_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a1, 31 +; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: rori_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a3, zero, 31 +; RV32IB-NEXT: fsl a2, a1, a3, a0 +; RV32IB-NEXT: fsl a1, a0, a3, a1 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: rori_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: slli a2, a1, 31 +; RV32IBB-NEXT: srli a3, a0, 1 +; RV32IBB-NEXT: or a2, a3, a2 +; RV32IBB-NEXT: srli a1, a1, 1 +; RV32IBB-NEXT: slli a0, a0, 31 +; RV32IBB-NEXT: or a1, a0, a1 +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: rori_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: slli a2, a1, 31 +; RV32IBP-NEXT: srli a3, a0, 1 +; RV32IBP-NEXT: or a2, a3, a2 +; RV32IBP-NEXT: srli a1, a1, 1 +; RV32IBP-NEXT: slli a0, a0, 31 +; RV32IBP-NEXT: or a1, a0, a1 +; RV32IBP-NEXT: mv a0, a2 +; RV32IBP-NEXT: ret + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63) + ret i64 %1 +} + +define i32 @pack_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: pack_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: pack_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: pack a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: pack_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: pack a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: pack_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: pack a0, a0, a1 +; RV32IBP-NEXT: ret + %shl = and i32 %a, 65535 + %shl1 = shl i32 %b, 16 + %or = or i32 %shl1, %shl + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @pack_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: pack_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: pack_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: mv a1, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: pack_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: mv a1, a2 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: pack_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: mv a1, a2 +; RV32IBP-NEXT: ret + %shl = and i64 %a, 4294967295 + %shl1 = shl i64 %b, 32 + %or = or i64 %shl1, %shl + ret i64 %or +} + +define i32 @packu_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: packu_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lui a2, 1048560 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: packu_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: packu a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: packu_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: packu a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: packu_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: packu a0, a0, a1 +; RV32IBP-NEXT: ret + %shr = lshr i32 %a, 16 + %shr1 = and i32 %b, -65536 + %or = or i32 %shr1, %shr + ret i32 %or +} + +; As we are not matching directly i64 code patterns on RV32 some i64 patterns +; don't have yet any matching bit manipulation instructions on RV32. +; This test is presented here in case future expansions of the experimental-b +; extension introduce instructions suitable for this pattern. + +define i64 @packu_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: packu_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: packu_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: mv a0, a1 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: packu_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: mv a0, a1 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: packu_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: mv a0, a1 +; RV32IBP-NEXT: mv a1, a3 +; RV32IBP-NEXT: ret + %shr = lshr i64 %a, 32 + %shr1 = and i64 %b, -4294967296 + %or = or i64 %shr1, %shr + ret i64 %or +} + +define i32 @packh_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: packh_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: packh_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: packh a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: packh_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: packh a0, a0, a1 +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: packh_i32: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: packh a0, a0, a1 +; RV32IBP-NEXT: ret + %and = and i32 %a, 255 + %and1 = shl i32 %b, 8 + %shl = and i32 %and1, 65280 + %or = or i32 %shl, %and + ret i32 %or +} + +define i64 @packh_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: packh_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: slli a1, a2, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: ret +; +; RV32IB-LABEL: packh_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: packh a0, a0, a2 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: packh_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: packh a0, a0, a2 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret +; +; RV32IBP-LABEL: packh_i64: +; RV32IBP: # %bb.0: +; RV32IBP-NEXT: packh a0, a0, a2 +; RV32IBP-NEXT: mv a1, zero +; RV32IBP-NEXT: ret + %and = and i64 %a, 255 + %and1 = shl i64 %b, 8 + %shl = and i64 %and1, 65280 + %or = or i64 %shl, %and + ret i64 %or +} diff --git a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll @@ -0,0 +1,517 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBP + +define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: andn_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: andn_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: andn a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: andn_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: andn a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: andn_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: andn a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i32 %b, -1 + %and = and i32 %neg, %a + ret i32 %and +} + +define i64 @andn_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: andn_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: andn_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: andn a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: andn_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: andn a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: andn_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: andn a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i64 %b, -1 + %and = and i64 %neg, %a + ret i64 %and +} + +define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: orn_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: orn_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orn a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: orn_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: orn a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: orn_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orn a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i32 %b, -1 + %or = or i32 %neg, %a + ret i32 %or +} + +define i64 @orn_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: orn_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: orn_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orn a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: orn_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: orn a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: orn_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: orn a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i64 %b, -1 + %or = or i64 %neg, %a + ret i64 %or +} + +define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: xnor_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: xnor_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: xnor a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: xnor_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: xnor a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: xnor_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: xnor a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i32 %a, -1 + %xor = xor i32 %neg, %b + ret i32 %xor +} + +define i64 @xnor_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: xnor_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: xnor_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: xnor a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: xnor_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: xnor a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: xnor_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: xnor a0, a0, a1 +; RV64IBP-NEXT: ret + %neg = xor i64 %a, -1 + %xor = xor i64 %neg, %b + ret i64 %xor +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) + +define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: rol_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: sllw a2, a0, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: rol_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rolw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: rol_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: rolw a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: rol_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rolw a0, a0, a1 +; RV64IBP-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b) + ret i32 %1 +} + +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @rol_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: rol_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: sll a2, a0, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: rol_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rol a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: rol_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: rol a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: rol_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rol a0, a0, a1 +; RV64IBP-NEXT: ret + %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b) + ret i64 %or +} + +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: ror_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srlw a2, a0, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ror_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rorw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ror_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: rorw a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: ror_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rorw a0, a0, a1 +; RV64IBP-NEXT: ret + %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b) + ret i32 %1 +} + +declare i64 @llvm.fshr.i64(i64, i64, i64) + +define i64 @ror_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: ror_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srl a2, a0, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ror_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: ror a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ror_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: ror a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: ror_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: ror a0, a0, a1 +; RV64IBP-NEXT: ret + %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) + ret i64 %or +} + +define signext i32 @rori_i32(i32 signext %a) nounwind { +; RV64I-LABEL: rori_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 31 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: rori_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: fsriw a0, a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: rori_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: roriw a0, a0, 1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: rori_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: roriw a0, a0, 1 +; RV64IBP-NEXT: ret + %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31) + ret i32 %1 +} + +define i64 @rori_i64(i64 %a) nounwind { +; RV64I-LABEL: rori_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 63 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: rori_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rori a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: rori_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: rori a0, a0, 1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: rori_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: rori a0, a0, 1 +; RV64IBP-NEXT: ret + %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63) + ret i64 %1 +} + +define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: pack_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: pack_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: pack_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packw a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: pack_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packw a0, a0, a1 +; RV64IBP-NEXT: ret + %shl = and i32 %a, 65535 + %shl1 = shl i32 %b, 16 + %or = or i32 %shl1, %shl + ret i32 %or +} + +define i64 @pack_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: pack_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: pack_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: pack a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: pack_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: pack a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: pack_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: pack a0, a0, a1 +; RV64IBP-NEXT: ret + %shl = and i64 %a, 4294967295 + %shl1 = shl i64 %b, 32 + %or = or i64 %shl1, %shl + ret i64 %or +} + +define signext i32 @packu_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: packu_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a0, a0, 16 +; RV64I-NEXT: lui a2, 1048560 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: packu_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packuw a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: packu_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packuw a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: packu_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packuw a0, a0, a1 +; RV64IBP-NEXT: ret + %shr = lshr i32 %a, 16 + %shr1 = and i32 %b, -65536 + %or = or i32 %shr1, %shr + ret i32 %or +} + +define i64 @packu_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: packu_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: packu_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: packu_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packu a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: packu_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packu a0, a0, a1 +; RV64IBP-NEXT: ret + %shr = lshr i64 %a, 32 + %shr1 = and i64 %b, -4294967296 + %or = or i64 %shr1, %shr + ret i64 %or +} + +define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: packh_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: packh_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packh a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: packh_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packh a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: packh_i32: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packh a0, a0, a1 +; RV64IBP-NEXT: ret + %and = and i32 %a, 255 + %and1 = shl i32 %b, 8 + %shl = and i32 %and1, 65280 + %or = or i32 %shl, %and + ret i32 %or +} + +define i64 @packh_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: packh_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: packh_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: packh a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: packh_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: packh a0, a0, a1 +; RV64IBB-NEXT: ret +; +; RV64IBP-LABEL: packh_i64: +; RV64IBP: # %bb.0: +; RV64IBP-NEXT: packh a0, a0, a1 +; RV64IBP-NEXT: ret + %and = and i64 %a, 255 + %and1 = shl i64 %b, 8 + %shl = and i64 %and1, 65280 + %or = or i64 %shl, %and + ret i64 %or +}