Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -150,7 +150,9 @@ setOperationAction(ISD::ROTL, XLenVT, Expand); setOperationAction(ISD::ROTR, XLenVT, Expand); - setOperationAction(ISD::BSWAP, XLenVT, Expand); + + if (!Subtarget.hasStdExtZbp()) + setOperationAction(ISD::BSWAP, XLenVT, Expand); if (!Subtarget.hasStdExtZbb()) { setOperationAction(ISD::CTTZ, XLenVT, Expand); @@ -158,6 +160,9 @@ setOperationAction(ISD::CTPOP, XLenVT, Expand); } + if (Subtarget.hasStdExtZbp()) + setOperationAction(ISD::BITREVERSE, XLenVT, Legal); + ISD::CondCode FPCCToExtend[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, Index: llvm/lib/Target/RISCV/RISCVInstrInfoB.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -642,6 +642,99 @@ (SLO GPR:$rs1, GPR:$rs2)>; def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1), (SRO GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbb] + +let Predicates = [HasStdExtZbp, IsRV32] in { +def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1), + (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))), + (GORCI GPR:$rs1, (i32 1))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333)), GPR:$rs1), + (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC))), + (GORCI GPR:$rs1, (i32 2))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F)), GPR:$rs1), + (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0))), + (GORCI GPR:$rs1, (i32 4))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF)), GPR:$rs1), + (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00))), + (GORCI GPR:$rs1, (i32 8))>; +def : Pat<(or (or (srl GPR:$rs1, (i32 16)), GPR:$rs1), + (shl GPR:$rs1, (i32 16))), + (GORCI GPR:$rs1, (i32 16))>; +} // Predicates = [HasStdExtZbp, IsRV32] + +let Predicates = [HasStdExtZbp, IsRV64] in { +def : Pat<(or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA))), + (GORCI GPR:$rs1, (i64 1))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC))), + (GORCI GPR:$rs1, (i64 2))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0))), + (GORCI GPR:$rs1, (i64 4))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00))), + (GORCI GPR:$rs1, (i64 8))>; +def : Pat<(or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF)), + GPR:$rs1), + (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000))), + (GORCI GPR:$rs1, (i64 16))>; +def : Pat<(or (or (srl GPR:$rs1, (i64 32)), GPR:$rs1), + (shl GPR:$rs1, (i64 32))), + (GORCI GPR:$rs1, (i64 32))>; +} // Predicates = [HasStdExtZbp, IsRV64] + +let Predicates = [HasStdExtZbp, IsRV32] in { +def : Pat<(or (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA)), + (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555))), + (GREVI GPR:$rs1, (i32 1))>; +def : Pat<(or (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC)), + (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333))), + (GREVI GPR:$rs1, (i32 2))>; +def : Pat<(or (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0)), + (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F))), + (GREVI GPR:$rs1, (i32 4))>; +def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)), + (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))), + (GREVI GPR:$rs1, (i32 8))>; +def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>; +def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))), + (GREVI GPR:$rs1, (i32 16))>; +def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>; +def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>; +def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>; +} // Predicates = [HasStdExtZbp, IsRV32] + +let Predicates = [HasStdExtZbp, IsRV64] in { +def : Pat<(or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA)), + (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555))), + (GREVI GPR:$rs1, (i64 1))>; +def : Pat<(or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC)), + (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333))), + (GREVI GPR:$rs1, (i64 2))>; +def : Pat<(or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0)), + (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F))), + (GREVI GPR:$rs1, (i64 4))>; +def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)), + (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF))), + (GREVI GPR:$rs1, (i64 8))>; +def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)), + (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))), + (GREVI GPR:$rs1, (i64 16))>; +def : Pat<(or (shl GPR:$rs1, (i64 16)), (srl GPR:$rs1, (i64 16))), + (GREVI GPR:$rs1, (i64 16))>; +def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))), + (GREVI GPR:$rs1, (i64 32))>; +def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>; +def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>; +def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>; +} // Predicates = [HasStdExtZbp, IsRV64] + +let Predicates = [HasStdExtZbb] in { def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>; def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>; def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>; @@ -672,6 +765,48 @@ (MAXU GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbb] +let Predicates = [HasStdExtZbp, IsRV32] in { +def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)), + (and GPR:$rs1, (i32 0xFF0000FF))), + (and (srl GPR:$rs1, (i32 8)), (i32 0x0000FF00))), + (SHFLI GPR:$rs1, (i32 8))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i32 4)), (i32 0x0F000F00)), + (and GPR:$rs1, (i32 0xF00FF00F))), + (and (srl GPR:$rs1, (i32 4)), (i32 0x00F000F0))), + (SHFLI GPR:$rs1, (i32 4))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i32 2)), (i32 0x30303030)), + (and GPR:$rs1, (i32 0xC3C3C3C3))), + (and (srl GPR:$rs1, (i32 2)), (i32 0x0C0C0C0C))), + (SHFLI GPR:$rs1, (i32 2))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i32 1)), (i32 0x44444444)), + (and GPR:$rs1, (i32 0x99999999))), + (and (srl GPR:$rs1, (i32 1)), (i32 0x22222222))), + (SHFLI GPR:$rs1, (i32 1))>; +} // Predicates = [HasStdExtZbp, IsRV32] + +let Predicates = [HasStdExtZbp, IsRV64] in { +def : Pat<(or (or (and (shl GPR:$rs1, (i64 16)), (i64 0x0000FFFF00000000)), + (and GPR:$rs1, (i64 0xFFFF00000000FFFF))), + (and (srl GPR:$rs1, (i64 16)), (i64 0x00000000FFFF0000))), + (SHFLI GPR:$rs1, (i64 16))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i64 8)), (i64 0x00FF000000FF0000)), + (and GPR:$rs1, (i64 0xFF0000FFFF0000FF))), + (and (srl GPR:$rs1, (i64 8)), (i64 0x0000FF000000FF00))), + (SHFLI GPR:$rs1, (i64 8))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i64 4)), (i64 0x0F000F000F000F00)), + (and GPR:$rs1, (i64 0xF00FF00FF00FF00F))), + (and (srl GPR:$rs1, (i64 4)), (i64 0x00F000F000F000F0))), + (SHFLI GPR:$rs1, (i64 4))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i64 2)), (i64 0x3030303030303030)), + (and GPR:$rs1, (i64 0xC3C3C3C3C3C3C3C3))), + (and (srl GPR:$rs1, (i64 2)), (i64 0x0C0C0C0C0C0C0C0C))), + (SHFLI GPR:$rs1, (i64 2))>; +def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)), + (and GPR:$rs1, (i64 0x9999999999999999))), + (and (srl GPR:$rs1, (i64 1)), (i64 0x2222222222222222))), + (SHFLI GPR:$rs1, (i64 1))>; +} // Predicates = [HasStdExtZbp, IsRV64] + let Predicates = [HasStdExtZbb, IsRV64] in { def : Pat<(and (add GPR:$rs, simm12:$simm12), 0xFFFFFFFF), (ADDIWU GPR:$rs, simm12:$simm12)>; Index: llvm/test/CodeGen/RISCV/rv32Zbp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rv32Zbp.ll @@ -0,0 +1,251 @@ +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB + +define i32 @gorc1(i32 %a) nounwind { +; RV32I-NOT: orc.p a0, a0 +; +; RV32IB-LABEL: gorc1: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc.p a0, a0 +; RV32IB-NEXT: ret + %and = shl i32 %a, 1 + %shl = and i32 %and, -1431655766 + %and1 = lshr i32 %a, 1 + %shr = and i32 %and1, 1431655765 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i32 @gorc2(i32 %a) nounwind { +; RV32I-NOT: orc2.n a0, a0 +; +; RV32IB-LABEL: gorc2: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc2.n a0, a0 +; RV32IB-NEXT: ret + %and = shl i32 %a, 2 + %shl = and i32 %and, -858993460 + %and1 = lshr i32 %a, 2 + %shr = and i32 %and1, 858993459 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i32 @gorc4(i32 %a) nounwind { +; RV32I-NOT: orc4.b a0, a0 +; +; RV32IB-LABEL: gorc4: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc4.b a0, a0 +; RV32IB-NEXT: ret + %and = shl i32 %a, 4 + %shl = and i32 %and, -252645136 + %and1 = lshr i32 %a, 4 + %shr = and i32 %and1, 252645135 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i32 @gorc8(i32 %a) nounwind { +; RV32I-NOT: orc8.h a0, a0 +; +; RV32IB-LABEL: gorc8: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc8.h a0, a0 +; RV32IB-NEXT: ret + %and = shl i32 %a, 8 + %shl = and i32 %and, -16711936 + %and1 = lshr i32 %a, 8 + %shr = and i32 %and1, 16711935 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i32 @gorc16(i32 %a) nounwind { +; RV32I-NOT: orc16 a0, a0 +; +; RV32IB-LABEL: gorc16: +; RV32IB: # %bb.0: +; RV32IB-NEXT: orc16 a0, a0 +; RV32IB-NEXT: ret + %shl = shl i32 %a, 16 + %shr = lshr i32 %a, 16 + %or = or i32 %shr, %a + %or2 = or i32 %or, %shl + ret i32 %or2 +} + +define i32 @grev1(i32 %a) nounwind { +; RV32I-NOT: rev.p a0, a0 +; +; RV32IB-LABEL: grev1: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev.p a0, a0 +; RV32IB-NEXT: ret + %and = shl i32 %a, 1 + %shl = and i32 %and, -1431655766 + %and1 = lshr i32 %a, 1 + %shr = and i32 %and1, 1431655765 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i32 @grev2(i32 %a) nounwind { +; RV32I-NOT: rev2.n a0, a0 +; +; RV32IB-LABEL: grev2: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev2.n a0, a0 +; RV32IB-NEXT: ret + %and = shl i32 %a, 2 + %shl = and i32 %and, -858993460 + %and1 = lshr i32 %a, 2 + %shr = and i32 %and1, 858993459 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i32 @grev4(i32 %a) nounwind { +; RV32I-NOT: rev4.b a0, a0 +; +; RV32IB-LABEL: grev4: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev4.b a0, a0 +; RV32IB-NEXT: ret + %and = shl i32 %a, 4 + %shl = and i32 %and, -252645136 + %and1 = lshr i32 %a, 4 + %shr = and i32 %and1, 252645135 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i32 @grev8(i32 %a) nounwind { +; RV32I-NOT: rev8.h a0, a0 +; +; RV32IB-LABEL: grev8: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev8.h a0, a0 +; RV32IB-NEXT: ret + %and = shl i32 %a, 8 + %shl = and i32 %and, -16711936 + %and1 = lshr i32 %a, 8 + %shr = and i32 %and1, 16711935 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i32 @grev16(i32 %a) nounwind { +; RV32I-NOT: rev16 a0, a0 +; +; RV32IB-LABEL: grev16: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev16 a0, a0 +; RV32IB-NEXT: ret + %shl = shl i32 %a, 16 + %shr = lshr i32 %a, 16 + %or = or i32 %shl, %shr + ret i32 %or +} + +declare i32 @llvm.bswap.i32(i32) + +define i32 @bswap_i32(i32 %a) nounwind { +; RV32I-NOT: rev8 a0, a0 +; +; RV32IB-LABEL: bswap_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev8 a0, a0 +; RV32IB-NEXT: ret + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + ret i32 %1 +} + +declare i32 @llvm.bitreverse.i32(i32) + +define i32 @bitreverse_i32(i32 %a) nounwind { +; RV32I-NOT: rev a0, a0 +; +; RV32IB-LABEL: bitreverse_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: rev a0, a0 +; RV32IB-NEXT: ret + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %1 +} + +define i32 @shfl1(i32 %a, i32 %b) nounwind { +; RV32I-NOT: zip.n a0, a0 +; +; RV32IB-LABEL: shfl1: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip.n a0, a0 +; RV32IB-NEXT: ret + %and = and i32 %a, -1717986919 + %shl = shl i32 %a, 1 + %and1 = and i32 %shl, 1145324612 + %or = or i32 %and1, %and + %shr = lshr i32 %a, 1 + %and2 = and i32 %shr, 572662306 + %or3 = or i32 %or, %and2 + ret i32 %or3 +} + +define i32 @shfl2(i32 %a, i32 %b) nounwind { +; RV32I-NOT: zip2.b a0, a0 +; +; RV32IB-LABEL: shfl2: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip2.b a0, a0 +; RV32IB-NEXT: ret + %and = and i32 %a, -1010580541 + %shl = shl i32 %a, 2 + %and1 = and i32 %shl, 808464432 + %or = or i32 %and1, %and + %shr = lshr i32 %a, 2 + %and2 = and i32 %shr, 202116108 + %or3 = or i32 %or, %and2 + ret i32 %or3 +} + +define i32 @shfl4(i32 %a, i32 %b) nounwind { +; RV32I-NOT: zip4.h a0, a0 +; +; RV32IB-LABEL: shfl4: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip4.h a0, a0 +; RV32IB-NEXT: ret + %and = and i32 %a, -267390961 + %shl = shl i32 %a, 4 + %and1 = and i32 %shl, 251662080 + %or = or i32 %and1, %and + %shr = lshr i32 %a, 4 + %and2 = and i32 %shr, 15728880 + %or3 = or i32 %or, %and2 + ret i32 %or3 +} + +define i32 @shfl8(i32 %a, i32 %b) nounwind { +; RV32I-NOT: zip8 a0, a0 +; +; RV32IB-LABEL: shfl8: +; RV32IB: # %bb.0: +; RV32IB-NEXT: zip8 a0, a0 +; RV32IB-NEXT: ret + %and = and i32 %a, -16776961 + %shl = shl i32 %a, 8 + %and1 = and i32 %shl, 16711680 + %or = or i32 %and1, %and + %shr = lshr i32 %a, 8 + %and2 = and i32 %shr, 65280 + %or3 = or i32 %or, %and2 + ret i32 %or3 +} Index: llvm/test/CodeGen/RISCV/rv64Zbp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rv64Zbp.ll @@ -0,0 +1,299 @@ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB + +define i64 @gorc1(i64 %a) nounwind { +; RV64I-NOT: orc.p a0, a0 +; +; RV64IB-LABEL: gorc1: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc.p a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 1 + %shl = and i64 %and, -6148914691236517206 + %and1 = lshr i64 %a, 1 + %shr = and i64 %and1, 6148914691236517205 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i64 @gorc2(i64 %a) nounwind { +; RV64I-NOT: orc2.n a0, a0 +; +; RV64IB-LABEL: gorc2: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc2.n a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 2 + %shl = and i64 %and, -3689348814741910324 + %and1 = lshr i64 %a, 2 + %shr = and i64 %and1, 3689348814741910323 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i64 @gorc4(i64 %a) nounwind { +; RV64I-NOT: orc4.b a0, a0 +; +; RV64IB-LABEL: gorc4: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc4.b a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 4 + %shl = and i64 %and, -1085102592571150096 + %and1 = lshr i64 %a, 4 + %shr = and i64 %and1, 1085102592571150095 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i64 @gorc8(i64 %a) nounwind { +; RV64I-NOT: orc8.h a0, a0 +; +; RV64IB-LABEL: gorc8: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc8.h a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 8 + %shl = and i64 %and, -71777214294589696 + %and1 = lshr i64 %a, 8 + %shr = and i64 %and1, 71777214294589695 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i64 @gorc16(i64 %a) nounwind { +; RV64I-NOT: orc16.w a0, a0 +; +; RV64IB-LABEL: gorc16: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc16.w a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 16 + %shl = and i64 %and, -281470681808896 + %and1 = lshr i64 %a, 16 + %shr = and i64 %and1, 281470681808895 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i64 @gorc32(i64 %a) nounwind { +; RV64I-NOT: orc32 a0, a0 +; +; RV64IB-LABEL: gorc32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: orc32 a0, a0 +; RV64IB-NEXT: ret + %shl = shl i64 %a, 32 + %shr = lshr i64 %a, 32 + %or = or i64 %shr, %a + %or2 = or i64 %or, %shl + ret i64 %or2 +} + +define i64 @grev1(i64 %a) nounwind { +; RV64I-NOT: rev.p a0, a0 +; +; RV64IB-LABEL: grev1: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev.p a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 1 + %shl = and i64 %and, -6148914691236517206 + %and1 = lshr i64 %a, 1 + %shr = and i64 %and1, 6148914691236517205 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @grev2(i64 %a) nounwind { +; RV64I-NOT: rev2.n a0, a0 +; +; RV64IB-LABEL: grev2: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev2.n a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 2 + %shl = and i64 %and, -3689348814741910324 + %and1 = lshr i64 %a, 2 + %shr = and i64 %and1, 3689348814741910323 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @grev4(i64 %a) nounwind { +; RV64I-NOT: rev4.b a0, a0 +; +; RV64IB-LABEL: grev4: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev4.b a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 4 + %shl = and i64 %and, -1085102592571150096 + %and1 = lshr i64 %a, 4 + %shr = and i64 %and1, 1085102592571150095 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @grev8(i64 %a) nounwind { +; RV64I-NOT: rev8.h a0, a0 +; +; RV64IB-LABEL: grev8: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev8.h a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 8 + %shl = and i64 %and, -71777214294589696 + %and1 = lshr i64 %a, 8 + %shr = and i64 %and1, 71777214294589695 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @grev16(i64 %a) nounwind { +; RV64I-NOT: rev16.w a0, a0 +; +; RV64IB-LABEL: grev16: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev16.w a0, a0 +; RV64IB-NEXT: ret + %and = shl i64 %a, 16 + %shl = and i64 %and, -281470681808896 + %and1 = lshr i64 %a, 16 + %shr = and i64 %and1, 281470681808895 + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @grev32(i64 %a) nounwind { +; RV64I-NOT: rev32 a0, a0 +; +; RV64IB-LABEL: grev32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev32 a0, a0 +; RV64IB-NEXT: ret + %shl = shl i64 %a, 32 + %shr = lshr i64 %a, 32 + %or = or i64 %shl, %shr + ret i64 %or +} + +declare i64 @llvm.bswap.i64(i64) + +define i64 @bswap_i64(i64 %a) { +; RV64I-NOT: rev8 a0, a0 +; +; RV64IB-LABEL: bswap_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev8 a0, a0 +; RV64IB-NEXT: ret + %1 = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %1 +} + +declare i64 @llvm.bitreverse.i64(i64) + +define i64 @bitreverse_i64(i64 %a) nounwind { +; RV64IB-NOT: rev a0, a0 +; +; RV64IB-LABEL: bitreverse_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: rev a0, a0 +; RV64IB-NEXT: ret + %1 = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %1 +} + +define i64 @shfl1(i64 %a, i64 %b) nounwind { +; RV64I-NOT: zip.n a0, a0 +; +; RV64IB-LABEL: shfl1: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip.n a0, a0 +; RV64IB-NEXT: ret + %and = and i64 %a, -7378697629483820647 + %shl = shl i64 %a, 1 + %and1 = and i64 %shl, 4919131752989213764 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 1 + %and2 = and i64 %shr, 2459565876494606882 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i64 @shfl2(i64 %a, i64 %b) nounwind { +; RV64I-NOT: zip2.b a0, a0 +; +; RV64IB-LABEL: shfl2: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip2.b a0, a0 +; RV64IB-NEXT: ret + %and = and i64 %a, -4340410370284600381 + %shl = shl i64 %a, 2 + %and1 = and i64 %shl, 3472328296227680304 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 2 + %and2 = and i64 %shr, 868082074056920076 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i64 @shfl4(i64 %a, i64 %b) nounwind { +; RV64I-NOT: zip4.h a0, a0 +; +; RV64IB-LABEL: shfl4: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip4.h a0, a0 +; RV64IB-NEXT: ret + %and = and i64 %a, -1148435428713435121 + %shl = shl i64 %a, 4 + %and1 = and i64 %shl, 1080880403494997760 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 4 + %and2 = and i64 %shr, 67555025218437360 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i64 @shfl8(i64 %a, i64 %b) nounwind { +; RV64I-NOT: zip8.w a0, a0 +; +; RV64IB-LABEL: shfl8: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip8.w a0, a0 +; RV64IB-NEXT: ret + %and = and i64 %a, -72056494543077121 + %shl = shl i64 %a, 8 + %and1 = and i64 %shl, 71776119077928960 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 8 + %and2 = and i64 %shr, 280375465148160 + %or3 = or i64 %or, %and2 + ret i64 %or3 +} + +define i64 @shfl16(i64 %a, i64 %b) nounwind { +; RV64I-NOT: zip16 a0, a0 +; +; RV64IB-LABEL: shfl16: +; RV64IB: # %bb.0: +; RV64IB-NEXT: zip16 a0, a0 +; RV64IB-NEXT: ret + %and = and i64 %a, -281474976645121 + %shl = shl i64 %a, 16 + %and1 = and i64 %shl, 281470681743360 + %or = or i64 %and1, %and + %shr = lshr i64 %a, 16 + %and2 = and i64 %shr, 4294901760 + %or3 = or i64 %or, %and2 + ret i64 %or3 +}