diff --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def --- a/clang/include/clang/Basic/BuiltinsRISCV.def +++ b/clang/include/clang/Basic/BuiltinsRISCV.def @@ -18,8 +18,8 @@ // Zbb extension TARGET_BUILTIN(__builtin_riscv_orc_b_32, "ZiZi", "nc", "zbb") TARGET_BUILTIN(__builtin_riscv_orc_b_64, "WiWi", "nc", "zbb,64bit") -TARGET_BUILTIN(__builtin_riscv_clz_32, "ZiZi", "nc", "zbb") -TARGET_BUILTIN(__builtin_riscv_clz_64, "WiWi", "nc", "zbb,64bit") +TARGET_BUILTIN(__builtin_riscv_clz_32, "ZiZi", "nc", "zbb|xtheadbb") +TARGET_BUILTIN(__builtin_riscv_clz_64, "WiWi", "nc", "zbb|xtheadbb,64bit") TARGET_BUILTIN(__builtin_riscv_ctz_32, "ZiZi", "nc", "zbb") TARGET_BUILTIN(__builtin_riscv_ctz_64, "WiWi", "nc", "zbb,64bit") diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-xtheadbb.c @@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv32 -target-feature +xtheadbb -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV32XTHEADBB + +// RV32XTHEADBB-LABEL: @clz_32( +// RV32XTHEADBB-NEXT: entry: +// RV32XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV32XTHEADBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV32XTHEADBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// RV32XTHEADBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false) +// RV32XTHEADBB-NEXT: ret i32 [[TMP1]] +// +int clz_32(int a) { + return __builtin_riscv_clz_32(a); +} + +// RV32XTHEADBB-LABEL: @clo_32( +// RV32XTHEADBB-NEXT: entry: +// RV32XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV32XTHEADBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV32XTHEADBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// RV32XTHEADBB-NEXT: [[NOT:%.*]] = xor i32 [[TMP0]], -1 +// RV32XTHEADBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[NOT]], i1 false) +// RV32XTHEADBB-NEXT: ret i32 [[TMP1]] +// +int clo_32(int a) { + return __builtin_riscv_clz_32(~a); +} diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-xtheadbb.c @@ -0,0 +1,53 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64 -target-feature +xtheadbb -emit-llvm %s -o - \ +// RUN: | FileCheck %s -check-prefix=RV64XTHEADBB + +// RV64XTHEADBB-LABEL: @clz_32( +// RV64XTHEADBB-NEXT: entry: +// RV64XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV64XTHEADBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV64XTHEADBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// RV64XTHEADBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false) +// RV64XTHEADBB-NEXT: ret i32 [[TMP1]] +// +int clz_32(int a) { + return __builtin_riscv_clz_32(a); +} + +// RV64XTHEADBB-LABEL: @clo_32( +// RV64XTHEADBB-NEXT: entry: +// RV64XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV64XTHEADBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV64XTHEADBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// RV64XTHEADBB-NEXT: [[NOT:%.*]] = xor i32 [[TMP0]], -1 +// RV64XTHEADBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[NOT]], i1 false) +// RV64XTHEADBB-NEXT: ret i32 [[TMP1]] +// +int clo_32(int a) { + return __builtin_riscv_clz_32(~a); +} + +// RV64XTHEADBB-LABEL: @clz_64( +// RV64XTHEADBB-NEXT: entry: +// RV64XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// RV64XTHEADBB-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8 +// RV64XTHEADBB-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8 +// RV64XTHEADBB-NEXT: [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[TMP0]], i1 false) +// RV64XTHEADBB-NEXT: ret i64 [[TMP1]] +// +long clz_64(long a) { + return __builtin_riscv_clz_64(a); +} + +// RV64XTHEADBB-LABEL: @clo_64( +// RV64XTHEADBB-NEXT: entry: +// RV64XTHEADBB-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// RV64XTHEADBB-NEXT: store i64 [[A:%.*]], ptr [[A_ADDR]], align 8 +// RV64XTHEADBB-NEXT: [[TMP0:%.*]] = load i64, ptr [[A_ADDR]], align 8 +// RV64XTHEADBB-NEXT: [[NOT:%.*]] = xor i64 [[TMP0]], -1 +// RV64XTHEADBB-NEXT: [[TMP1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[NOT]], i1 false) +// RV64XTHEADBB-NEXT: ret i64 [[TMP1]] +// +long clo_64(long a) { + return __builtin_riscv_clz_64(~a); +} diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -172,6 +172,9 @@ ``XTHeadBa`` LLVM implements `the THeadBa (address-generation) vendor-defined instructions specified in `_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification. +``XTHeadBb`` + LLVM implements `the THeadBb (basic bit-manipulation) vendor-defined instructions specified in `_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification. + ``XTHeadBs`` LLVM implements `the THeadBs (single-bit operations) vendor-defined instructions specified in `_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -109,6 +109,7 @@ * vsetvli intrinsics no longer have side effects. They may now be combined, moved, deleted, etc. by optimizations. * Adds support for the vendor-defined XTHeadBa (address-generation) extension. +* Adds support for the vendor-defined XTHeadBb (basic bit-manipulation) extension. * Adds support for the vendor-defined XTHeadBs (single-bit) extension. Changes to the WebAssembly Backend diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -110,6 +110,7 @@ // vendor-defined ('X') extensions {"xtheadba", RISCVExtensionVersion{1, 0}}, + {"xtheadbb", RISCVExtensionVersion{1, 0}}, {"xtheadbs", RISCVExtensionVersion{1, 0}}, {"xtheadvdot", RISCVExtensionVersion{1, 0}}, {"xventanacondops", RISCVExtensionVersion{1, 0}}, diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -478,6 +478,13 @@ if (Result != MCDisassembler::Fail) return Result; } + if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadBb]) { + LLVM_DEBUG(dbgs() << "Trying XTHeadBb custom opcode table:\n"); + Result = decodeInstruction(DecoderTableTHeadBb32, MI, Insn, Address, this, + STI); + if (Result != MCDisassembler::Fail) + return Result; + } if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadBs]) { LLVM_DEBUG(dbgs() << "Trying XTHeadBs custom opcode table:\n"); Result = decodeInstruction(DecoderTableTHeadBs32, MI, Insn, Address, this, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -352,15 +352,20 @@ } } - // Perform optimization with rori in the Zbb extension. - if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) { + // Perform optimization with rori in the Zbb and th.srri in the XTheadBb + // extension. + if (Res.size() > 2 && (ActiveFeatures[RISCV::FeatureStdExtZbb] || + ActiveFeatures[RISCV::FeatureVendorXTHeadBb])) { if (unsigned Rotate = extractRotateInfo(Val)) { RISCVMatInt::InstSeq TmpSeq; uint64_t NegImm12 = ((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate); assert(isInt<12>(NegImm12)); TmpSeq.emplace_back(RISCV::ADDI, NegImm12); - TmpSeq.emplace_back(RISCV::RORI, Rotate); + TmpSeq.emplace_back(ActiveFeatures[RISCV::FeatureStdExtZbb] + ? RISCV::RORI + : RISCV::TH_SRRI, + Rotate); Res = TmpSeq; } } @@ -405,6 +410,7 @@ case RISCV::RORI: case RISCV::BSETI: case RISCV::BCLRI: + case RISCV::TH_SRRI: return RISCVMatInt::RegImm; } } diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -470,6 +470,13 @@ AssemblerPredicate<(all_of FeatureVendorXTHeadBa), "'xtheadba' (T-Head address calculation instructions)">; +def FeatureVendorXTHeadBb + : SubtargetFeature<"xtheadbb", "HasVendorXTHeadBb", "true", + "'xtheadbb' (T-Head basic bit-manipulation instructions)">; +def HasVendorXTHeadBb : Predicate<"Subtarget->hasVendorXTHeadBb()">, + AssemblerPredicate<(all_of FeatureVendorXTHeadBb), + "'xtheadbb' (T-Head basic bit-manipulation instructions)">; + def FeatureVendorXTHeadBs : SubtargetFeature<"xtheadbs", "HasVendorXTHeadBs", "true", "'xtheadbs' (T-Head single-bit instructions)">; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1104,11 +1104,15 @@ bool IsANDIOrZExt = isInt<12>(C2) || (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()); + // With XTHeadBb, we can use TH.EXTU. + IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb(); if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) break; // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or // the constant is a simm32. bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba(); + // With XTHeadBb, we can use TH.EXTU. + IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb(); if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse())) break; @@ -2386,6 +2390,8 @@ case RISCV::FCVT_S_WU: case RISCV::FCVT_D_W: case RISCV::FCVT_D_WU: + case RISCV::TH_REVW: + case RISCV::TH_SRRIW: if (Bits < 32) return false; break; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -233,7 +233,7 @@ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); - if (!Subtarget.hasStdExtZbb()) + if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb()) setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); if (Subtarget.is64Bit()) { @@ -280,7 +280,8 @@ setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT, Custom); - if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { + if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || + Subtarget.hasVendorXTHeadBb()) { if (Subtarget.is64Bit()) setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); } else { @@ -290,7 +291,8 @@ // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll // pattern match it directly in isel. setOperationAction(ISD::BSWAP, XLenVT, - (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) + (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || + Subtarget.hasVendorXTHeadBb()) ? Legal : Expand); // Zbkb can use rev8+brev8 to implement bitreverse. @@ -309,6 +311,15 @@ setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand); } + if (Subtarget.hasVendorXTHeadBb()) { + setOperationAction({ISD::CTLZ}, XLenVT, Legal); + + // We need the custom lowering to make sure that the resulting sequence + // for the 32bit case is efficient on 64bit targets. + if (Subtarget.is64Bit()) + setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); + } + if (Subtarget.is64Bit()) setOperationAction(ISD::ABS, MVT::i32, Custom); @@ -1212,7 +1223,7 @@ } bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { - return Subtarget.hasStdExtZbb(); + return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb(); } bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -54,6 +54,38 @@ let Inst{26-25} = uimm2; } +let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "THeadBb", + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +class THShift_ri funct5, bits<3> funct3, string opcodestr> + : RVInstIShift; + +class THBitfieldExtract_rii funct3, string opcodestr> + : RVInstI { + bits<6> msb; + bits<6> lsb; + let Inst{31-26} = msb; + let Inst{25-20} = lsb; +} + +class THRev_r funct5, bits<2> funct2, string opcodestr> + : RVInstR4 { + let rs3 = funct5; + let rs2 = 0; +} +} + +let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "THeadBb", + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class THShiftW_ri funct7, bits<3> funct3, string opcodestr> + : RVInstIShiftW; + //===----------------------------------------------------------------------===// // Combination of instruction classes. // Use these multiclasses to define instructions more easily. @@ -75,6 +107,21 @@ Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>; } // Predicates = [HasVendorXTHeadBa] +let Predicates = [HasVendorXTHeadBb] in { +def TH_SRRI : THShift_ri<0b00010, 0b001, "th.srri">; +def TH_EXT : THBitfieldExtract_rii<0b010, "th.ext">; +def TH_EXTU : THBitfieldExtract_rii<0b011, "th.extu">; +def TH_FF0 : THRev_r<0b10000, 0b10, "th.ff0">; +def TH_FF1 : THRev_r<0b10000, 0b11, "th.ff1">; +def TH_REV : THRev_r<0b10000, 0b01, "th.rev">; +def TH_TSTNBZ : THRev_r<0b10000, 0b00, "th.tstnbz">; +} // Predicates = [HasVendorXTHeadBb] + +let Predicates = [HasVendorXTHeadBb, IsRV64], IsSignExtendingOpW = 1 in { +def TH_SRRIW : THShiftW_ri<0b0001010, 0b001, "th.srriw">; +def TH_REVW : THRev_r<0b10010, 0b00, "th.revw">; +} // Predicates = [HasVendorXTHeadBb, IsRV64] + let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "THeadBs" in { let IsSignExtendingOpW = 1 in def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">, @@ -230,6 +277,49 @@ (TH_ADDSL GPR:$r, GPR:$r, 2), 2), 3)>; } // Predicates = [HasVendorXTHeadBa] +let Predicates = [HasVendorXTHeadBb] in { +def : PatGprImm; +// There's no encoding for a rotate-left-immediate in X-THead-Bb, as +// it can be implemented with th.srri by negating the immediate. +def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt), + (TH_SRRI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>; +def : Pat<(rotr GPR:$rs1, GPR:$rs2), + (OR (SRL GPR:$rs1, GPR:$rs2), + (SLL GPR:$rs1, (SUB X0, GPR:$rs2)))>; +def : Pat<(rotl GPR:$rs1, GPR:$rs2), + (OR (SLL GPR:$rs1, GPR:$rs2), + (SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>; +//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>; +//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>; +def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>; +def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>; +def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>; +def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>; +def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>; +def : Pat<(sext_inreg GPR:$rs1, i1), (TH_EXT GPR:$rs1, 0, 0)>; +def : PatGpr; +def : Pat<(ctlz (xor GPR:$rs1, -1)), (TH_FF0 GPR:$rs1)>; +def : PatGpr; +} // Predicates = [HasVendorXTHeadBb] + +let Predicates = [HasVendorXTHeadBb, IsRV64] in { +def : PatGprImm; +def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), + (TH_SRRIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; +def : Pat<(riscv_rorw i64:$rs1, i64:$rs2), + (OR (SRLW i64:$rs1, i64:$rs2), + (SLLW i64:$rs1, (SUB X0, i64:$rs2)))>; +def : Pat<(riscv_rolw i64:$rs1, i64:$rs2), + (OR (SLLW i64:$rs1, i64:$rs2), + (SRLW i64:$rs1, (SUB X0, i64:$rs2)))>; +def : Pat<(sra (bswap i64:$rs1), (i64 32)), + (TH_REVW i64:$rs1)>; +def : Pat<(binop_allwusers (bswap i64:$rs1), (i64 32)), + (TH_REVW i64:$rs1)>; +def : Pat<(riscv_clzw i64:$rs1), + (TH_FF0 (SLLI (XORI i64:$rs1, -1), 32))>; +} // Predicates = [HasVendorXTHeadBb, IsRV64] + let Predicates = [HasVendorXTHeadBs] in { def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), 1), (TH_TST GPR:$rs1, uimmlog2xlen:$shamt)>; diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -89,6 +89,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV64SVINVAL %s ; RUN: llc -mtriple=riscv64 -mattr=+xventanacondops %s -o - | FileCheck --check-prefixes=CHECK,RV64XVENTANACONDOPS %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadba %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBA %s +; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBB %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBS %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZAWRS %s @@ -187,6 +188,7 @@ ; RV64SVINVAL: .attribute 5, "rv64i2p0_svinval1p0" ; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p0_xventanacondops1p0" ; RV64XTHEADBA: .attribute 5, "rv64i2p0_xtheadba1p0" +; RV64XTHEADBB: .attribute 5, "rv64i2p0_xtheadbb1p0" ; RV64XTHEADBS: .attribute 5, "rv64i2p0_xtheadbs1p0" ; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0" ; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1" diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -11,6 +11,10 @@ ; RUN: | FileCheck %s -check-prefix=RV32ZBB ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64ZBB +; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEADBB +; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB declare i8 @llvm.cttz.i8(i8, i1) declare i16 @llvm.cttz.i16(i16, i1) @@ -83,6 +87,38 @@ ; RV64ZBB-NEXT: ori a0, a0, 256 ; RV64ZBB-NEXT: ctz a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_cttz_i8: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a1, a0, 255 +; RV32XTHEADBB-NEXT: beqz a1, .LBB0_2 +; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB0_2: +; RV32XTHEADBB-NEXT: li a0, 8 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_cttz_i8: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a1, a0, 255 +; RV64XTHEADBB-NEXT: beqz a1, .LBB0_2 +; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret +; RV64XTHEADBB-NEXT: .LBB0_2: +; RV64XTHEADBB-NEXT: li a0, 8 +; RV64XTHEADBB-NEXT: ret %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false) ret i8 %tmp } @@ -161,6 +197,38 @@ ; RV64ZBB-NEXT: or a0, a0, a1 ; RV64ZBB-NEXT: ctz a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_cttz_i16: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: slli a1, a0, 16 +; RV32XTHEADBB-NEXT: beqz a1, .LBB1_2 +; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB1_2: +; RV32XTHEADBB-NEXT: li a0, 16 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_cttz_i16: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: slli a1, a0, 48 +; RV64XTHEADBB-NEXT: beqz a1, .LBB1_2 +; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret +; RV64XTHEADBB-NEXT: .LBB1_2: +; RV64XTHEADBB-NEXT: li a0, 16 +; RV64XTHEADBB-NEXT: ret %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false) ret i16 %tmp } @@ -261,6 +329,37 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctzw a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_cttz_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: beqz a0, .LBB2_2 +; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB2_2: +; RV32XTHEADBB-NEXT: li a0, 32 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_cttz_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sext.w a1, a0 +; RV64XTHEADBB-NEXT: beqz a1, .LBB2_2 +; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret +; RV64XTHEADBB-NEXT: .LBB2_2: +; RV64XTHEADBB-NEXT: li a0, 32 +; RV64XTHEADBB-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) ret i32 %tmp } @@ -408,6 +507,42 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctz a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_cttz_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: addi a0, a1, -1 +; RV32XTHEADBB-NEXT: not a1, a1 +; RV32XTHEADBB-NEXT: and a0, a1, a0 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 64 +; RV32XTHEADBB-NEXT: j .LBB3_3 +; RV32XTHEADBB-NEXT: .LBB3_2: +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_cttz_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: beqz a0, .LBB3_2 +; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret +; RV64XTHEADBB-NEXT: .LBB3_2: +; RV64XTHEADBB-NEXT: li a0, 64 +; RV64XTHEADBB-NEXT: ret %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) ret i64 %tmp } @@ -456,6 +591,26 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctz a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_cttz_i8_zero_undef: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_cttz_i8_zero_undef: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true) ret i8 %tmp } @@ -518,6 +673,26 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctz a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_cttz_i16_zero_undef: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_cttz_i16_zero_undef: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true) ret i16 %tmp } @@ -596,6 +771,26 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctzw a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_cttz_i32_zero_undef: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_cttz_i32_zero_undef: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true) ret i32 %tmp } @@ -723,6 +918,37 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ctz a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_cttz_i64_zero_undef: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: bnez a0, .LBB7_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: addi a0, a1, -1 +; RV32XTHEADBB-NEXT: not a1, a1 +; RV32XTHEADBB-NEXT: and a0, a1, a0 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 64 +; RV32XTHEADBB-NEXT: j .LBB7_3 +; RV32XTHEADBB-NEXT: .LBB7_2: +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: .LBB7_3: +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_cttz_i64_zero_undef: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true) ret i64 %tmp } @@ -801,6 +1027,20 @@ ; RV64ZBB-NEXT: clz a0, a0 ; RV64ZBB-NEXT: addi a0, a0, -56 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctlz_i8: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a0, a0, 255 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: addi a0, a0, -24 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctlz_i8: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a0, a0, 255 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: addi a0, a0, -56 +; RV64XTHEADBB-NEXT: ret %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false) ret i8 %tmp } @@ -897,6 +1137,20 @@ ; RV64ZBB-NEXT: clz a0, a0 ; RV64ZBB-NEXT: addi a0, a0, -48 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctlz_i16: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: addi a0, a0, -16 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctlz_i16: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: addi a0, a0, -48 +; RV64XTHEADBB-NEXT: ret %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false) ret i16 %tmp } @@ -1081,6 +1335,18 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: clzw a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctlz_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctlz_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: slli a0, a0, 32 +; RV64XTHEADBB-NEXT: th.ff0 a0, a0 +; RV64XTHEADBB-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) ret i32 %tmp } @@ -1344,6 +1610,24 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: clz a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctlz_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: bnez a1, .LBB11_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: addi a0, a0, 32 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB11_2: +; RV32XTHEADBB-NEXT: th.ff1 a0, a1 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctlz_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: ret %tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 false) ret i64 %tmp } @@ -1410,6 +1694,20 @@ ; RV64ZBB-NEXT: clz a0, a0 ; RV64ZBB-NEXT: addi a0, a0, -56 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctlz_i8_zero_undef: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a0, a0, 255 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: addi a0, a0, -24 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctlz_i8_zero_undef: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a0, a0, 255 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: addi a0, a0, -56 +; RV64XTHEADBB-NEXT: ret %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true) ret i8 %tmp } @@ -1496,6 +1794,20 @@ ; RV64ZBB-NEXT: clz a0, a0 ; RV64ZBB-NEXT: addi a0, a0, -48 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctlz_i16_zero_undef: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: addi a0, a0, -16 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctlz_i16_zero_undef: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: addi a0, a0, -48 +; RV64XTHEADBB-NEXT: ret %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true) ret i16 %tmp } @@ -1658,6 +1970,18 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: clzw a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctlz_i32_zero_undef: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctlz_i32_zero_undef: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: slli a0, a0, 32 +; RV64XTHEADBB-NEXT: th.ff0 a0, a0 +; RV64XTHEADBB-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true) ret i32 %tmp } @@ -1911,6 +2235,24 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: clz a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctlz_i64_zero_undef: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: bnez a1, .LBB15_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: addi a0, a0, 32 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB15_2: +; RV32XTHEADBB-NEXT: th.ff1 a0, a1 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctlz_i64_zero_undef: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: ret %tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 true) ret i64 %tmp } @@ -1955,6 +2297,34 @@ ; RV64ZBB-NEXT: andi a0, a0, 255 ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctpop_i8: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srli a1, a0, 1 +; RV32XTHEADBB-NEXT: andi a1, a1, 85 +; RV32XTHEADBB-NEXT: sub a0, a0, a1 +; RV32XTHEADBB-NEXT: andi a1, a0, 51 +; RV32XTHEADBB-NEXT: srli a0, a0, 2 +; RV32XTHEADBB-NEXT: andi a0, a0, 51 +; RV32XTHEADBB-NEXT: add a0, a1, a0 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: andi a0, a0, 15 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctpop_i8: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srli a1, a0, 1 +; RV64XTHEADBB-NEXT: andi a1, a1, 85 +; RV64XTHEADBB-NEXT: subw a0, a0, a1 +; RV64XTHEADBB-NEXT: andi a1, a0, 51 +; RV64XTHEADBB-NEXT: srli a0, a0, 2 +; RV64XTHEADBB-NEXT: andi a0, a0, 51 +; RV64XTHEADBB-NEXT: add a0, a1, a0 +; RV64XTHEADBB-NEXT: srli a1, a0, 4 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: andi a0, a0, 15 +; RV64XTHEADBB-NEXT: ret %1 = call i8 @llvm.ctpop.i8(i8 %a) ret i8 %1 } @@ -2013,6 +2383,48 @@ ; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctpop_i16: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srli a1, a0, 1 +; RV32XTHEADBB-NEXT: lui a2, 5 +; RV32XTHEADBB-NEXT: addi a2, a2, 1365 +; RV32XTHEADBB-NEXT: and a1, a1, a2 +; RV32XTHEADBB-NEXT: sub a0, a0, a1 +; RV32XTHEADBB-NEXT: lui a1, 3 +; RV32XTHEADBB-NEXT: addi a1, a1, 819 +; RV32XTHEADBB-NEXT: and a2, a0, a1 +; RV32XTHEADBB-NEXT: srli a0, a0, 2 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: add a0, a2, a0 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: andi a1, a0, 15 +; RV32XTHEADBB-NEXT: slli a0, a0, 20 +; RV32XTHEADBB-NEXT: srli a0, a0, 28 +; RV32XTHEADBB-NEXT: add a0, a1, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctpop_i16: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srli a1, a0, 1 +; RV64XTHEADBB-NEXT: lui a2, 5 +; RV64XTHEADBB-NEXT: addiw a2, a2, 1365 +; RV64XTHEADBB-NEXT: and a1, a1, a2 +; RV64XTHEADBB-NEXT: sub a0, a0, a1 +; RV64XTHEADBB-NEXT: lui a1, 3 +; RV64XTHEADBB-NEXT: addiw a1, a1, 819 +; RV64XTHEADBB-NEXT: and a2, a0, a1 +; RV64XTHEADBB-NEXT: srli a0, a0, 2 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: add a0, a2, a0 +; RV64XTHEADBB-NEXT: srli a1, a0, 4 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: andi a1, a0, 15 +; RV64XTHEADBB-NEXT: slli a0, a0, 52 +; RV64XTHEADBB-NEXT: srli a0, a0, 60 +; RV64XTHEADBB-NEXT: add a0, a1, a0 +; RV64XTHEADBB-NEXT: ret %1 = call i16 @llvm.ctpop.i16(i16 %a) ret i16 %1 } @@ -2131,6 +2543,62 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctpop_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi sp, sp, -16 +; RV32XTHEADBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: srli a1, a0, 1 +; RV32XTHEADBB-NEXT: lui a2, 349525 +; RV32XTHEADBB-NEXT: addi a2, a2, 1365 +; RV32XTHEADBB-NEXT: and a1, a1, a2 +; RV32XTHEADBB-NEXT: sub a0, a0, a1 +; RV32XTHEADBB-NEXT: lui a1, 209715 +; RV32XTHEADBB-NEXT: addi a1, a1, 819 +; RV32XTHEADBB-NEXT: and a2, a0, a1 +; RV32XTHEADBB-NEXT: srli a0, a0, 2 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: add a0, a2, a0 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: lui a1, 61681 +; RV32XTHEADBB-NEXT: addi a1, a1, -241 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: lui a1, 4112 +; RV32XTHEADBB-NEXT: addi a1, a1, 257 +; RV32XTHEADBB-NEXT: call __mulsi3@plt +; RV32XTHEADBB-NEXT: srli a0, a0, 24 +; RV32XTHEADBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: addi sp, sp, 16 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctpop_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi sp, sp, -16 +; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64XTHEADBB-NEXT: srli a1, a0, 1 +; RV64XTHEADBB-NEXT: lui a2, 349525 +; RV64XTHEADBB-NEXT: addiw a2, a2, 1365 +; RV64XTHEADBB-NEXT: and a1, a1, a2 +; RV64XTHEADBB-NEXT: sub a0, a0, a1 +; RV64XTHEADBB-NEXT: lui a1, 209715 +; RV64XTHEADBB-NEXT: addiw a1, a1, 819 +; RV64XTHEADBB-NEXT: and a2, a0, a1 +; RV64XTHEADBB-NEXT: srli a0, a0, 2 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: add a0, a2, a0 +; RV64XTHEADBB-NEXT: srli a1, a0, 4 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: lui a1, 61681 +; RV64XTHEADBB-NEXT: addiw a1, a1, -241 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: lui a1, 4112 +; RV64XTHEADBB-NEXT: addiw a1, a1, 257 +; RV64XTHEADBB-NEXT: call __muldi3@plt +; RV64XTHEADBB-NEXT: srliw a0, a0, 24 +; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64XTHEADBB-NEXT: addi sp, sp, 16 +; RV64XTHEADBB-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 } @@ -2295,6 +2763,91 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: cpop a0, a0 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_ctpop_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi sp, sp, -32 +; RV32XTHEADBB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: mv s0, a0 +; RV32XTHEADBB-NEXT: srli a0, a1, 1 +; RV32XTHEADBB-NEXT: lui a2, 349525 +; RV32XTHEADBB-NEXT: addi s2, a2, 1365 +; RV32XTHEADBB-NEXT: and a0, a0, s2 +; RV32XTHEADBB-NEXT: sub a1, a1, a0 +; RV32XTHEADBB-NEXT: lui a0, 209715 +; RV32XTHEADBB-NEXT: addi s3, a0, 819 +; RV32XTHEADBB-NEXT: and a0, a1, s3 +; RV32XTHEADBB-NEXT: srli a1, a1, 2 +; RV32XTHEADBB-NEXT: and a1, a1, s3 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: lui a1, 61681 +; RV32XTHEADBB-NEXT: addi s4, a1, -241 +; RV32XTHEADBB-NEXT: and a0, a0, s4 +; RV32XTHEADBB-NEXT: lui a1, 4112 +; RV32XTHEADBB-NEXT: addi s1, a1, 257 +; RV32XTHEADBB-NEXT: mv a1, s1 +; RV32XTHEADBB-NEXT: call __mulsi3@plt +; RV32XTHEADBB-NEXT: srli s5, a0, 24 +; RV32XTHEADBB-NEXT: srli a0, s0, 1 +; RV32XTHEADBB-NEXT: and a0, a0, s2 +; RV32XTHEADBB-NEXT: sub s0, s0, a0 +; RV32XTHEADBB-NEXT: and a0, s0, s3 +; RV32XTHEADBB-NEXT: srli s0, s0, 2 +; RV32XTHEADBB-NEXT: and a1, s0, s3 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: and a0, a0, s4 +; RV32XTHEADBB-NEXT: mv a1, s1 +; RV32XTHEADBB-NEXT: call __mulsi3@plt +; RV32XTHEADBB-NEXT: srli a0, a0, 24 +; RV32XTHEADBB-NEXT: add a0, a0, s5 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: addi sp, sp, 32 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_ctpop_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi sp, sp, -16 +; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI19_0) +; RV64XTHEADBB-NEXT: ld a1, %lo(.LCPI19_0)(a1) +; RV64XTHEADBB-NEXT: lui a2, %hi(.LCPI19_1) +; RV64XTHEADBB-NEXT: ld a2, %lo(.LCPI19_1)(a2) +; RV64XTHEADBB-NEXT: srli a3, a0, 1 +; RV64XTHEADBB-NEXT: and a1, a3, a1 +; RV64XTHEADBB-NEXT: sub a0, a0, a1 +; RV64XTHEADBB-NEXT: and a1, a0, a2 +; RV64XTHEADBB-NEXT: srli a0, a0, 2 +; RV64XTHEADBB-NEXT: and a0, a0, a2 +; RV64XTHEADBB-NEXT: lui a2, %hi(.LCPI19_2) +; RV64XTHEADBB-NEXT: ld a2, %lo(.LCPI19_2)(a2) +; RV64XTHEADBB-NEXT: add a0, a1, a0 +; RV64XTHEADBB-NEXT: srli a1, a0, 4 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: and a0, a0, a2 +; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI19_3) +; RV64XTHEADBB-NEXT: ld a1, %lo(.LCPI19_3)(a1) +; RV64XTHEADBB-NEXT: call __muldi3@plt +; RV64XTHEADBB-NEXT: srli a0, a0, 56 +; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64XTHEADBB-NEXT: addi sp, sp, 16 +; RV64XTHEADBB-NEXT: ret %1 = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %1 } @@ -2337,6 +2890,30 @@ ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: andi a0, a0, 1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_parity_i8: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a0, a0, 255 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 2 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 1 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: andi a0, a0, 1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_parity_i8: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a0, a0, 255 +; RV64XTHEADBB-NEXT: srli a1, a0, 4 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 2 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 1 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: andi a0, a0, 1 +; RV64XTHEADBB-NEXT: ret %1 = call i8 @llvm.ctpop.i8(i8 %a) %2 = and i8 %1, 1 ret i8 %2 @@ -2386,6 +2963,34 @@ ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: andi a0, a0, 1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_parity_i16: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: srli a1, a0, 8 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 2 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 1 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: andi a0, a0, 1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_parity_i16: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: srli a1, a0, 8 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 4 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 2 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 1 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: andi a0, a0, 1 +; RV64XTHEADBB-NEXT: ret %1 = call i16 @llvm.ctpop.i16(i16 %a) %2 = and i16 %1, 1 ret i16 %2 @@ -2435,6 +3040,37 @@ ; RV64ZBB-NEXT: cpopw a0, a0 ; RV64ZBB-NEXT: andi a0, a0, 1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_parity_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srli a1, a0, 16 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 8 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 2 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 1 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: andi a0, a0, 1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_parity_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.extu a1, a0, 31, 0 +; RV64XTHEADBB-NEXT: srliw a0, a0, 16 +; RV64XTHEADBB-NEXT: xor a0, a1, a0 +; RV64XTHEADBB-NEXT: srli a1, a0, 8 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 4 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 2 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 1 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: andi a0, a0, 1 +; RV64XTHEADBB-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) %2 = and i32 %1, 1 ret i32 %2 @@ -2488,6 +3124,40 @@ ; RV64ZBB-NEXT: cpop a0, a0 ; RV64ZBB-NEXT: andi a0, a0, 1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: test_parity_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 16 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 8 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 4 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 2 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: srli a1, a0, 1 +; RV32XTHEADBB-NEXT: xor a0, a0, a1 +; RV32XTHEADBB-NEXT: andi a0, a0, 1 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: test_parity_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srli a1, a0, 32 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 16 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 8 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 4 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 2 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 1 +; RV64XTHEADBB-NEXT: xor a0, a0, a1 +; RV64XTHEADBB-NEXT: andi a0, a0, 1 +; RV64XTHEADBB-NEXT: ret %1 = call i64 @llvm.ctpop.i64(i64 %a) %2 = and i64 %1, 1 ret i64 %2 diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll --- a/llvm/test/CodeGen/RISCV/imm.ll +++ b/llvm/test/CodeGen/RISCV/imm.ll @@ -9,6 +9,8 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBB ; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+zbs \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBS +; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+xtheadbb \ +; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IXTHEADBB ; Materializing constants @@ -41,6 +43,11 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: li a0, 0 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: zero: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, 0 +; RV64IXTHEADBB-NEXT: ret ret i32 0 } @@ -69,6 +76,11 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: li a0, 2047 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: pos_small: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, 2047 +; RV64IXTHEADBB-NEXT: ret ret i32 2047 } @@ -97,6 +109,11 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: li a0, -2048 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: neg_small: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -2048 +; RV64IXTHEADBB-NEXT: ret ret i32 -2048 } @@ -130,6 +147,12 @@ ; RV64IZBS-NEXT: lui a0, 423811 ; RV64IZBS-NEXT: addiw a0, a0, -1297 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: pos_i32: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 423811 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1297 +; RV64IXTHEADBB-NEXT: ret ret i32 1735928559 } @@ -163,6 +186,12 @@ ; RV64IZBS-NEXT: lui a0, 912092 ; RV64IZBS-NEXT: addiw a0, a0, -273 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: neg_i32: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 912092 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -273 +; RV64IXTHEADBB-NEXT: ret ret i32 -559038737 } @@ -191,6 +220,11 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: lui a0, 16 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: pos_i32_hi20_only: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 16 +; RV64IXTHEADBB-NEXT: ret ret i32 65536 ; 0x10000 } @@ -219,6 +253,11 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: lui a0, 1048560 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: neg_i32_hi20_only: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1048560 +; RV64IXTHEADBB-NEXT: ret ret i32 -65536 ; -0x10000 } @@ -254,6 +293,12 @@ ; RV64IZBS-NEXT: lui a0, 32 ; RV64IZBS-NEXT: addiw a0, a0, -64 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_left_shifted_addi: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 32 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -64 +; RV64IXTHEADBB-NEXT: ret ret i32 131008 ; 0x1FFC0 } @@ -289,6 +334,12 @@ ; RV64IZBS-NEXT: lui a0, 524288 ; RV64IZBS-NEXT: addiw a0, a0, -1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_right_shifted_addi: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 524288 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1 +; RV64IXTHEADBB-NEXT: ret ret i32 2147483647 ; 0x7FFFFFFF } @@ -324,6 +375,12 @@ ; RV64IZBS-NEXT: lui a0, 56 ; RV64IZBS-NEXT: addiw a0, a0, 580 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_right_shifted_lui: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 56 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 580 +; RV64IXTHEADBB-NEXT: ret ret i32 229956 ; 0x38244 } @@ -356,6 +413,12 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: bseti a0, zero, 31 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, 1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 31 +; RV64IXTHEADBB-NEXT: ret ret i64 2147483648 ; 0x8000_0000 } @@ -389,6 +452,12 @@ ; RV64IZBS-NEXT: li a0, -1 ; RV64IZBS-NEXT: srli a0, a0, 32 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_2: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -1 +; RV64IXTHEADBB-NEXT: srli a0, a0, 32 +; RV64IXTHEADBB-NEXT: ret ret i64 4294967295 ; 0xFFFF_FFFF } @@ -421,6 +490,12 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: bseti a0, zero, 32 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_3: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, 1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 32 +; RV64IXTHEADBB-NEXT: ret ret i64 4294967296 ; 0x1_0000_0000 } @@ -453,6 +528,12 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: bseti a0, zero, 63 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_4: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 63 +; RV64IXTHEADBB-NEXT: ret ret i64 9223372036854775808 ; 0x8000_0000_0000_0000 } @@ -485,6 +566,12 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: bseti a0, zero, 63 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_5: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 63 +; RV64IXTHEADBB-NEXT: ret ret i64 -9223372036854775808 ; 0x8000_0000_0000_0000 } @@ -523,6 +610,13 @@ ; RV64IZBS-NEXT: addiw a0, a0, -1329 ; RV64IZBS-NEXT: slli a0, a0, 35 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_6: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 9321 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1329 +; RV64IXTHEADBB-NEXT: slli a0, a0, 35 +; RV64IXTHEADBB-NEXT: ret ret i64 1311768464867721216 ; 0x1234_5678_0000_0000 } @@ -569,6 +663,15 @@ ; RV64IZBS-NEXT: slli a0, a0, 24 ; RV64IZBS-NEXT: addi a0, a0, 15 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_7: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, 7 +; RV64IXTHEADBB-NEXT: slli a0, a0, 36 +; RV64IXTHEADBB-NEXT: addi a0, a0, 11 +; RV64IXTHEADBB-NEXT: slli a0, a0, 24 +; RV64IXTHEADBB-NEXT: addi a0, a0, 15 +; RV64IXTHEADBB-NEXT: ret ret i64 8070450532432478223 ; 0x7000_0000_0B00_000F } @@ -629,6 +732,18 @@ ; RV64IZBS-NEXT: slli a0, a0, 13 ; RV64IZBS-NEXT: addi a0, a0, -272 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_8: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 583 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1875 +; RV64IXTHEADBB-NEXT: slli a0, a0, 14 +; RV64IXTHEADBB-NEXT: addi a0, a0, -947 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1511 +; RV64IXTHEADBB-NEXT: slli a0, a0, 13 +; RV64IXTHEADBB-NEXT: addi a0, a0, -272 +; RV64IXTHEADBB-NEXT: ret ret i64 1311768467463790320 ; 0x1234_5678_9ABC_DEF0 } @@ -658,6 +773,11 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: li a0, -1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm64_9: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -1 +; RV64IXTHEADBB-NEXT: ret ret i64 -1 } @@ -694,6 +814,12 @@ ; RV64IZBS-NEXT: lui a0, 262145 ; RV64IZBS-NEXT: slli a0, a0, 1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 262145 +; RV64IXTHEADBB-NEXT: slli a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 2147491840 ; 0x8000_2000 } @@ -727,6 +853,12 @@ ; RV64IZBS-NEXT: lui a0, 262145 ; RV64IZBS-NEXT: slli a0, a0, 2 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_2: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 262145 +; RV64IXTHEADBB-NEXT: slli a0, a0, 2 +; RV64IXTHEADBB-NEXT: ret ret i64 4294983680 ; 0x1_0000_4000 } @@ -761,6 +893,12 @@ ; RV64IZBS-NEXT: lui a0, 4097 ; RV64IZBS-NEXT: slli a0, a0, 20 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_left_shifted_lui_3: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 4097 +; RV64IXTHEADBB-NEXT: slli a0, a0, 20 +; RV64IXTHEADBB-NEXT: ret ret i64 17596481011712 ; 0x1001_0000_0000 } @@ -799,6 +937,12 @@ ; RV64IZBS-NEXT: lui a0, 983056 ; RV64IZBS-NEXT: srli a0, a0, 16 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 983056 +; RV64IXTHEADBB-NEXT: srli a0, a0, 16 +; RV64IXTHEADBB-NEXT: ret ret i64 281474976706561 ; 0xFFFF_FFFF_F001 } @@ -837,6 +981,13 @@ ; RV64IZBS-NEXT: slli a0, a0, 12 ; RV64IZBS-NEXT: srli a0, a0, 24 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_right_shifted_lui_2: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1044481 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: srli a0, a0, 24 +; RV64IXTHEADBB-NEXT: ret ret i64 1099511623681 ; 0xFF_FFFF_F001 } @@ -877,6 +1028,13 @@ ; RV64IZBS-NEXT: slli a0, a0, 20 ; RV64IZBS-NEXT: addi a0, a0, -3 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_decoupled_lui_addi: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 4097 +; RV64IXTHEADBB-NEXT: slli a0, a0, 20 +; RV64IXTHEADBB-NEXT: addi a0, a0, -3 +; RV64IXTHEADBB-NEXT: ret ret i64 17596481011709 ; 0x1000_FFFF_FFFD } @@ -925,6 +1083,15 @@ ; RV64IZBS-NEXT: slli a0, a0, 25 ; RV64IZBS-NEXT: addi a0, a0, -1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_end_xori_1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 36 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 25 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1 +; RV64IXTHEADBB-NEXT: ret ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF } @@ -974,6 +1141,15 @@ ; RV64IZBS-NEXT: slli a0, a0, 12 ; RV64IZBS-NEXT: addi a0, a0, 2047 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_end_2addi_1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -2047 +; RV64IXTHEADBB-NEXT: slli a0, a0, 27 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 2047 +; RV64IXTHEADBB-NEXT: ret ret i64 -1125350151030785 ; 0xFFFC_007F_FFFF_F7FF } @@ -1030,6 +1206,17 @@ ; RV64IZBS-NEXT: bseti a0, a0, 62 ; RV64IZBS-NEXT: bseti a0, a0, 63 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_2reg_1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 35 +; RV64IXTHEADBB-NEXT: addi a0, a0, 9 +; RV64IXTHEADBB-NEXT: slli a0, a0, 13 +; RV64IXTHEADBB-NEXT: addi a0, a0, 837 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1656 +; RV64IXTHEADBB-NEXT: ret ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678 } @@ -1064,6 +1251,12 @@ ; RV64IZBS-NEXT: li a1, -1 ; RV64IZBS-NEXT: sh a1, 0(a0) ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_store_i16_neg1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a1, -1 +; RV64IXTHEADBB-NEXT: sh a1, 0(a0) +; RV64IXTHEADBB-NEXT: ret store i16 -1, ptr %p ret void } @@ -1099,6 +1292,12 @@ ; RV64IZBS-NEXT: li a1, -1 ; RV64IZBS-NEXT: sw a1, 0(a0) ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_store_i32_neg1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a1, -1 +; RV64IXTHEADBB-NEXT: sw a1, 0(a0) +; RV64IXTHEADBB-NEXT: ret store i32 -1, ptr %p ret void } @@ -1140,6 +1339,14 @@ ; RV64IZBS-NEXT: addiw a0, a0, -795 ; RV64IZBS-NEXT: bseti a0, a0, 32 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_5372288229: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 160 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 437 +; RV64IXTHEADBB-NEXT: slli a0, a0, 13 +; RV64IXTHEADBB-NEXT: addi a0, a0, -795 +; RV64IXTHEADBB-NEXT: ret ret i64 5372288229 } @@ -1180,6 +1387,14 @@ ; RV64IZBS-NEXT: addiw a0, a0, 795 ; RV64IZBS-NEXT: bclri a0, a0, 32 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_5372288229: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1048416 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -437 +; RV64IXTHEADBB-NEXT: slli a0, a0, 13 +; RV64IXTHEADBB-NEXT: addi a0, a0, 795 +; RV64IXTHEADBB-NEXT: ret ret i64 -5372288229 } @@ -1220,6 +1435,14 @@ ; RV64IZBS-NEXT: addiw a0, a0, -1325 ; RV64IZBS-NEXT: bseti a0, a0, 33 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_8953813715: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 267 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -637 +; RV64IXTHEADBB-NEXT: slli a0, a0, 13 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1325 +; RV64IXTHEADBB-NEXT: ret ret i64 8953813715 } @@ -1260,6 +1483,14 @@ ; RV64IZBS-NEXT: addiw a0, a0, 1325 ; RV64IZBS-NEXT: bclri a0, a0, 33 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_8953813715: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1048309 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 637 +; RV64IXTHEADBB-NEXT: slli a0, a0, 13 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1325 +; RV64IXTHEADBB-NEXT: ret ret i64 -8953813715 } @@ -1301,6 +1532,14 @@ ; RV64IZBS-NEXT: slli a0, a0, 12 ; RV64IZBS-NEXT: addi a0, a0, 1711 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_16116864687: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 961 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1475 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1711 +; RV64IXTHEADBB-NEXT: ret ret i64 16116864687 } @@ -1342,6 +1581,14 @@ ; RV64IZBS-NEXT: slli a0, a0, 12 ; RV64IZBS-NEXT: addi a0, a0, -1711 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_16116864687: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1047615 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 1475 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1711 +; RV64IXTHEADBB-NEXT: ret ret i64 -16116864687 } @@ -1380,6 +1627,13 @@ ; RV64IZBS-NEXT: slli a0, a0, 2 ; RV64IZBS-NEXT: addi a0, a0, -1093 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_2344336315: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 143087 +; RV64IXTHEADBB-NEXT: slli a0, a0, 2 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 +; RV64IXTHEADBB-NEXT: ret ret i64 2344336315 ; 0x8bbbbbbb } @@ -1427,6 +1681,16 @@ ; RV64IZBS-NEXT: addiw a0, a0, -1093 ; RV64IZBS-NEXT: bseti a0, a0, 46 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_70370820078523: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 256 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 31 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, -273 +; RV64IXTHEADBB-NEXT: slli a0, a0, 14 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 +; RV64IXTHEADBB-NEXT: ret ret i64 70370820078523 ; 0x40007bbbbbbb } @@ -1477,6 +1741,17 @@ ; RV64IZBS-NEXT: addiw a0, a0, -1093 ; RV64IZBS-NEXT: bseti a0, a0, 63 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 37 +; RV64IXTHEADBB-NEXT: addi a0, a0, 31 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, -273 +; RV64IXTHEADBB-NEXT: slli a0, a0, 14 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 +; RV64IXTHEADBB-NEXT: ret ret i64 -9223372034778874949 ; 0x800000007bbbbbbb } @@ -1528,6 +1803,17 @@ ; RV64IZBS-NEXT: bseti a0, a0, 46 ; RV64IZBS-NEXT: bseti a0, a0, 63 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_9223301666034697285: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 917505 +; RV64IXTHEADBB-NEXT: slli a0, a0, 8 +; RV64IXTHEADBB-NEXT: addi a0, a0, 31 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, -273 +; RV64IXTHEADBB-NEXT: slli a0, a0, 14 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 +; RV64IXTHEADBB-NEXT: ret ret i64 -9223301666034697285 ; 0x800040007bbbbbbb } @@ -1566,6 +1852,13 @@ ; RV64IZBS-NEXT: slli a0, a0, 2 ; RV64IZBS-NEXT: addi a0, a0, -1093 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_2219066437: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 913135 +; RV64IXTHEADBB-NEXT: slli a0, a0, 2 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 +; RV64IXTHEADBB-NEXT: ret ret i64 -2219066437 ; 0xffffffff7bbbbbbb } @@ -1608,6 +1901,14 @@ ; RV64IZBS-NEXT: addiw a0, a0, -1093 ; RV64IZBS-NEXT: bclri a0, a0, 43 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_8798043653189: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 917475 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -273 +; RV64IXTHEADBB-NEXT: slli a0, a0, 14 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 +; RV64IXTHEADBB-NEXT: ret ret i64 -8798043653189 ; 0xfffff7ff8bbbbbbb } @@ -1653,6 +1954,15 @@ ; RV64IZBS-NEXT: addiw a0, a0, -1093 ; RV64IZBS-NEXT: bclri a0, a0, 63 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_9223372034904144827: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1048343 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 1911 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1911 +; RV64IXTHEADBB-NEXT: srli a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb } @@ -1705,6 +2015,17 @@ ; RV64IZBS-NEXT: bclri a0, a0, 44 ; RV64IZBS-NEXT: bclri a0, a0, 63 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_9223354442718100411: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 524287 +; RV64IXTHEADBB-NEXT: slli a0, a0, 6 +; RV64IXTHEADBB-NEXT: addi a0, a0, -29 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, -273 +; RV64IXTHEADBB-NEXT: slli a0, a0, 14 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 +; RV64IXTHEADBB-NEXT: ret ret i64 9223354442718100411 ; 0x7fffefff8bbbbbbb } @@ -1743,6 +2064,13 @@ ; RV64IZBS-NEXT: addiw a0, a0, 1365 ; RV64IZBS-NEXT: slli a0, a0, 1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_2863311530: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 349525 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 1365 +; RV64IXTHEADBB-NEXT: slli a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 2863311530 ; #0xaaaaaaaa } @@ -1781,6 +2109,13 @@ ; RV64IZBS-NEXT: addiw a0, a0, -1365 ; RV64IZBS-NEXT: slli a0, a0, 1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_2863311530: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 699051 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1365 +; RV64IXTHEADBB-NEXT: slli a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 -2863311530 ; #0xffffffff55555556 } @@ -1818,6 +2153,13 @@ ; RV64IZBS-NEXT: li a0, 1365 ; RV64IZBS-NEXT: bseti a0, a0, 31 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_2147486378: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, 1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 31 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1365 +; RV64IXTHEADBB-NEXT: ret ret i64 2147485013 } @@ -1852,6 +2194,12 @@ ; RV64IZBS-NEXT: lui a0, 524288 ; RV64IZBS-NEXT: addi a0, a0, -1365 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_2147485013: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 524288 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1365 +; RV64IXTHEADBB-NEXT: ret ret i64 -2147485013 } @@ -1894,6 +2242,14 @@ ; RV64IZBS-NEXT: slli a0, a0, 24 ; RV64IZBS-NEXT: addi a0, a0, 1979 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_12900924131259: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 188 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093 +; RV64IXTHEADBB-NEXT: slli a0, a0, 24 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1979 +; RV64IXTHEADBB-NEXT: ret ret i64 12900924131259 } @@ -1930,6 +2286,13 @@ ; RV64IZBS-NEXT: addiw a0, a0, -1093 ; RV64IZBS-NEXT: slli a0, a0, 16 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_50394234880: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 188 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093 +; RV64IXTHEADBB-NEXT: slli a0, a0, 16 +; RV64IXTHEADBB-NEXT: ret ret i64 50394234880 } @@ -1976,6 +2339,15 @@ ; RV64IZBS-NEXT: slli a0, a0, 12 ; RV64IZBS-NEXT: addi a0, a0, 1911 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_12900936431479: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 192239 +; RV64IXTHEADBB-NEXT: slli a0, a0, 2 +; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1911 +; RV64IXTHEADBB-NEXT: ret ret i64 12900936431479 } @@ -2022,6 +2394,15 @@ ; RV64IZBS-NEXT: addi a0, a0, 1365 ; RV64IZBS-NEXT: slli a0, a0, 1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_12900918536874: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 384477 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 1365 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1365 +; RV64IXTHEADBB-NEXT: slli a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 12900918536874 } @@ -2071,6 +2452,16 @@ ; RV64IZBS-NEXT: slli a0, a0, 12 ; RV64IZBS-NEXT: addi a0, a0, 273 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_12900925247761: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 188 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 273 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 273 +; RV64IXTHEADBB-NEXT: ret ret i64 12900925247761 } @@ -2112,6 +2503,14 @@ ; RV64IZBS-NEXT: slli a0, a0, 12 ; RV64IZBS-NEXT: addi a0, a0, 1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_7158272001: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 427 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1367 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 7158272001 ; 0x0000_0001_aaaa_9001 } @@ -2153,6 +2552,14 @@ ; RV64IZBS-NEXT: slli a0, a0, 12 ; RV64IZBS-NEXT: addi a0, a0, 1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_12884889601: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 768 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -3 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 12884889601 ; 0x0000_0002_ffff_d001 } @@ -2193,6 +2600,14 @@ ; RV64IZBS-NEXT: addiw a0, a0, 1 ; RV64IZBS-NEXT: bclri a0, a0, 31 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_3435982847: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1048371 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 817 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 -3435982847 ; 0xffff_ffff_3333_1001 } @@ -2233,6 +2648,14 @@ ; RV64IZBS-NEXT: addiw a0, a0, 1 ; RV64IZBS-NEXT: bclri a0, a0, 32 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_5726842879: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1048235 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1419 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 -5726842879 ; 0xffff_fffe_aaa7_5001 } @@ -2273,6 +2696,14 @@ ; RV64IZBS-NEXT: addiw a0, a0, 1 ; RV64IZBS-NEXT: bclri a0, a0, 33 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: imm_neg_10307948543: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1047962 +; RV64IXTHEADBB-NEXT: addiw a0, a0, -1645 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: addi a0, a0, 1 +; RV64IXTHEADBB-NEXT: ret ret i64 -10307948543 ; 0xffff_fffd_9999_3001 } @@ -2310,6 +2741,12 @@ ; RV64IZBS-NEXT: slli a0, a0, 43 ; RV64IZBS-NEXT: addi a0, a0, -1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: li_rori_1: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -18 +; RV64IXTHEADBB-NEXT: th.srri a0, a0, 21 +; RV64IXTHEADBB-NEXT: ret ret i64 -149533581377537 } @@ -2347,6 +2784,12 @@ ; RV64IZBS-NEXT: slli a0, a0, 60 ; RV64IZBS-NEXT: addi a0, a0, -6 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: li_rori_2: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -86 +; RV64IXTHEADBB-NEXT: th.srri a0, a0, 4 +; RV64IXTHEADBB-NEXT: ret ret i64 -5764607523034234886 } @@ -2384,6 +2827,12 @@ ; RV64IZBS-NEXT: slli a0, a0, 27 ; RV64IZBS-NEXT: addi a0, a0, -1 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: li_rori_3: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, -18 +; RV64IXTHEADBB-NEXT: th.srri a0, a0, 37 +; RV64IXTHEADBB-NEXT: ret ret i64 -2281701377 } @@ -2420,6 +2869,13 @@ ; RV64IZBS-NEXT: lui a0, 1045887 ; RV64IZBS-NEXT: bclri a0, a0, 31 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: PR54812: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: lui a0, 1048447 +; RV64IXTHEADBB-NEXT: addiw a0, a0, 1407 +; RV64IXTHEADBB-NEXT: slli a0, a0, 12 +; RV64IXTHEADBB-NEXT: ret ret i64 -2158497792; } @@ -2452,6 +2908,12 @@ ; RV64IZBS: # %bb.0: ; RV64IZBS-NEXT: bseti a0, zero, 11 ; RV64IZBS-NEXT: ret +; +; RV64IXTHEADBB-LABEL: pos_2048: +; RV64IXTHEADBB: # %bb.0: +; RV64IXTHEADBB-NEXT: li a0, 1 +; RV64IXTHEADBB-NEXT: slli a0, a0, 11 +; RV64IXTHEADBB-NEXT: ret ret i32 2048 } diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -7,6 +7,10 @@ ; RUN: | FileCheck %s -check-prefix=RV32ZBB ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64ZBB +; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32XTHEADBB +; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB ; NOTE: -enable-legalize-types-checking is on one command line due to a previous ; assertion failure on an expensive checks build for @rotr_32_mask_multiple. @@ -40,6 +44,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rolw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: sll a2, a0, a1 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sllw a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srlw a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i32 32, %y %b = shl i32 %x, %y %c = lshr i32 %x, %z @@ -73,6 +93,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rorw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srl a2, a0, a1 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: sll a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srlw a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sllw a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i32 32, %y %b = lshr i32 %x, %y %c = shl i32 %x, %z @@ -177,6 +213,56 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rol a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: sll a4, a0, a2 +; RV32XTHEADBB-NEXT: addi a3, a2, -32 +; RV32XTHEADBB-NEXT: slti a5, a3, 0 +; RV32XTHEADBB-NEXT: neg a5, a5 +; RV32XTHEADBB-NEXT: bltz a3, .LBB2_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a3, a4 +; RV32XTHEADBB-NEXT: j .LBB2_3 +; RV32XTHEADBB-NEXT: .LBB2_2: +; RV32XTHEADBB-NEXT: sll a3, a1, a2 +; RV32XTHEADBB-NEXT: not a6, a2 +; RV32XTHEADBB-NEXT: srli a7, a0, 1 +; RV32XTHEADBB-NEXT: srl a6, a7, a6 +; RV32XTHEADBB-NEXT: or a3, a3, a6 +; RV32XTHEADBB-NEXT: .LBB2_3: +; RV32XTHEADBB-NEXT: and a4, a5, a4 +; RV32XTHEADBB-NEXT: neg a7, a2 +; RV32XTHEADBB-NEXT: li a5, 32 +; RV32XTHEADBB-NEXT: sub a6, a5, a2 +; RV32XTHEADBB-NEXT: srl a5, a1, a7 +; RV32XTHEADBB-NEXT: bltz a6, .LBB2_5 +; RV32XTHEADBB-NEXT: # %bb.4: +; RV32XTHEADBB-NEXT: mv a0, a5 +; RV32XTHEADBB-NEXT: j .LBB2_6 +; RV32XTHEADBB-NEXT: .LBB2_5: +; RV32XTHEADBB-NEXT: srl a0, a0, a7 +; RV32XTHEADBB-NEXT: li a7, 64 +; RV32XTHEADBB-NEXT: sub a2, a7, a2 +; RV32XTHEADBB-NEXT: not a2, a2 +; RV32XTHEADBB-NEXT: slli a1, a1, 1 +; RV32XTHEADBB-NEXT: sll a1, a1, a2 +; RV32XTHEADBB-NEXT: or a0, a0, a1 +; RV32XTHEADBB-NEXT: .LBB2_6: +; RV32XTHEADBB-NEXT: slti a1, a6, 0 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: and a1, a1, a5 +; RV32XTHEADBB-NEXT: or a1, a3, a1 +; RV32XTHEADBB-NEXT: or a0, a4, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sll a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srl a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i64 64, %y %b = shl i64 %x, %y %c = lshr i64 %x, %z @@ -281,6 +367,56 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ror a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srl a4, a1, a2 +; RV32XTHEADBB-NEXT: addi a3, a2, -32 +; RV32XTHEADBB-NEXT: slti a5, a3, 0 +; RV32XTHEADBB-NEXT: neg a5, a5 +; RV32XTHEADBB-NEXT: bltz a3, .LBB3_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a3, a4 +; RV32XTHEADBB-NEXT: j .LBB3_3 +; RV32XTHEADBB-NEXT: .LBB3_2: +; RV32XTHEADBB-NEXT: srl a3, a0, a2 +; RV32XTHEADBB-NEXT: not a6, a2 +; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: sll a6, a7, a6 +; RV32XTHEADBB-NEXT: or a3, a3, a6 +; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: and a4, a5, a4 +; RV32XTHEADBB-NEXT: neg a7, a2 +; RV32XTHEADBB-NEXT: li a5, 32 +; RV32XTHEADBB-NEXT: sub a6, a5, a2 +; RV32XTHEADBB-NEXT: sll a5, a0, a7 +; RV32XTHEADBB-NEXT: bltz a6, .LBB3_5 +; RV32XTHEADBB-NEXT: # %bb.4: +; RV32XTHEADBB-NEXT: mv a1, a5 +; RV32XTHEADBB-NEXT: j .LBB3_6 +; RV32XTHEADBB-NEXT: .LBB3_5: +; RV32XTHEADBB-NEXT: sll a1, a1, a7 +; RV32XTHEADBB-NEXT: li a7, 64 +; RV32XTHEADBB-NEXT: sub a2, a7, a2 +; RV32XTHEADBB-NEXT: not a2, a2 +; RV32XTHEADBB-NEXT: srli a0, a0, 1 +; RV32XTHEADBB-NEXT: srl a0, a0, a2 +; RV32XTHEADBB-NEXT: or a1, a1, a0 +; RV32XTHEADBB-NEXT: .LBB3_6: +; RV32XTHEADBB-NEXT: slti a0, a6, 0 +; RV32XTHEADBB-NEXT: neg a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a5 +; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: or a1, a4, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srl a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sll a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i64 64, %y %b = lshr i64 %x, %y %c = shl i64 %x, %z @@ -314,6 +450,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rolw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_32_mask: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: sll a2, a0, a1 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_32_mask: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sllw a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srlw a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i32 0, %y %and = and i32 %z, 31 %b = shl i32 %x, %y @@ -348,6 +500,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rolw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_32_mask_and_63_and_31: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: sll a2, a0, a1 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_32_mask_and_63_and_31: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sllw a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srlw a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %a = and i32 %y, 63 %b = shl i32 %x, %a %c = sub i32 0, %y @@ -385,6 +553,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rolw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_32_mask_or_64_or_32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: sll a2, a0, a1 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_32_mask_or_64_or_32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sllw a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srlw a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %a = or i32 %y, 64 %b = shl i32 %x, %a %c = sub i32 0, %y @@ -420,6 +604,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rorw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_32_mask: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srl a2, a0, a1 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: sll a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_32_mask: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srlw a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sllw a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i32 0, %y %and = and i32 %z, 31 %b = lshr i32 %x, %y @@ -454,6 +654,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rorw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_32_mask_and_63_and_31: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srl a2, a0, a1 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: sll a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_32_mask_and_63_and_31: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srlw a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sllw a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %a = and i32 %y, 63 %b = lshr i32 %x, %a %c = sub i32 0, %y @@ -491,6 +707,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rorw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_32_mask_or_64_or_32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srl a2, a0, a1 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: sll a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_32_mask_or_64_or_32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srlw a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sllw a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %a = or i32 %y, 64 %b = lshr i32 %x, %a %c = sub i32 0, %y @@ -593,6 +825,54 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rol a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_64_mask: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a5, a2, -32 +; RV32XTHEADBB-NEXT: sll a4, a0, a2 +; RV32XTHEADBB-NEXT: bltz a5, .LBB10_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a3, a4 +; RV32XTHEADBB-NEXT: j .LBB10_3 +; RV32XTHEADBB-NEXT: .LBB10_2: +; RV32XTHEADBB-NEXT: sll a3, a1, a2 +; RV32XTHEADBB-NEXT: not a6, a2 +; RV32XTHEADBB-NEXT: srli a7, a0, 1 +; RV32XTHEADBB-NEXT: srl a6, a7, a6 +; RV32XTHEADBB-NEXT: or a3, a3, a6 +; RV32XTHEADBB-NEXT: .LBB10_3: +; RV32XTHEADBB-NEXT: slti a5, a5, 0 +; RV32XTHEADBB-NEXT: neg a5, a5 +; RV32XTHEADBB-NEXT: and a4, a5, a4 +; RV32XTHEADBB-NEXT: neg a6, a2 +; RV32XTHEADBB-NEXT: srl a2, a1, a6 +; RV32XTHEADBB-NEXT: andi a5, a6, 63 +; RV32XTHEADBB-NEXT: addi a7, a5, -32 +; RV32XTHEADBB-NEXT: slti t0, a7, 0 +; RV32XTHEADBB-NEXT: neg t0, t0 +; RV32XTHEADBB-NEXT: and a2, t0, a2 +; RV32XTHEADBB-NEXT: bltz a7, .LBB10_5 +; RV32XTHEADBB-NEXT: # %bb.4: +; RV32XTHEADBB-NEXT: srl a0, a1, a5 +; RV32XTHEADBB-NEXT: j .LBB10_6 +; RV32XTHEADBB-NEXT: .LBB10_5: +; RV32XTHEADBB-NEXT: srl a0, a0, a6 +; RV32XTHEADBB-NEXT: not a5, a5 +; RV32XTHEADBB-NEXT: slli a1, a1, 1 +; RV32XTHEADBB-NEXT: sll a1, a1, a5 +; RV32XTHEADBB-NEXT: or a0, a0, a1 +; RV32XTHEADBB-NEXT: .LBB10_6: +; RV32XTHEADBB-NEXT: or a0, a4, a0 +; RV32XTHEADBB-NEXT: or a1, a3, a2 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_64_mask: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sll a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srl a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i64 0, %y %and = and i64 %z, 63 %b = shl i64 %x, %y @@ -696,6 +976,55 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rol a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_64_mask_and_127_and_63: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a3, a2, 127 +; RV32XTHEADBB-NEXT: addi a4, a3, -32 +; RV32XTHEADBB-NEXT: bltz a4, .LBB11_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: sll a3, a0, a3 +; RV32XTHEADBB-NEXT: j .LBB11_3 +; RV32XTHEADBB-NEXT: .LBB11_2: +; RV32XTHEADBB-NEXT: sll a5, a1, a2 +; RV32XTHEADBB-NEXT: srli a6, a0, 1 +; RV32XTHEADBB-NEXT: not a3, a3 +; RV32XTHEADBB-NEXT: srl a3, a6, a3 +; RV32XTHEADBB-NEXT: or a3, a5, a3 +; RV32XTHEADBB-NEXT: .LBB11_3: +; RV32XTHEADBB-NEXT: sll a5, a0, a2 +; RV32XTHEADBB-NEXT: slti a4, a4, 0 +; RV32XTHEADBB-NEXT: neg a4, a4 +; RV32XTHEADBB-NEXT: and a4, a4, a5 +; RV32XTHEADBB-NEXT: neg a6, a2 +; RV32XTHEADBB-NEXT: srl a2, a1, a6 +; RV32XTHEADBB-NEXT: andi a5, a6, 63 +; RV32XTHEADBB-NEXT: addi a7, a5, -32 +; RV32XTHEADBB-NEXT: slti t0, a7, 0 +; RV32XTHEADBB-NEXT: neg t0, t0 +; RV32XTHEADBB-NEXT: and a2, t0, a2 +; RV32XTHEADBB-NEXT: bltz a7, .LBB11_5 +; RV32XTHEADBB-NEXT: # %bb.4: +; RV32XTHEADBB-NEXT: srl a0, a1, a5 +; RV32XTHEADBB-NEXT: j .LBB11_6 +; RV32XTHEADBB-NEXT: .LBB11_5: +; RV32XTHEADBB-NEXT: srl a0, a0, a6 +; RV32XTHEADBB-NEXT: not a5, a5 +; RV32XTHEADBB-NEXT: slli a1, a1, 1 +; RV32XTHEADBB-NEXT: sll a1, a1, a5 +; RV32XTHEADBB-NEXT: or a0, a0, a1 +; RV32XTHEADBB-NEXT: .LBB11_6: +; RV32XTHEADBB-NEXT: or a0, a4, a0 +; RV32XTHEADBB-NEXT: or a1, a3, a2 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_64_mask_and_127_and_63: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sll a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srl a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %a = and i64 %y, 127 %b = shl i64 %x, %a %c = sub i64 0, %y @@ -736,6 +1065,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rol a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_64_mask_or_128_or_64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: sll a3, a0, a2 +; RV32XTHEADBB-NEXT: neg a0, a2 +; RV32XTHEADBB-NEXT: srl a0, a1, a0 +; RV32XTHEADBB-NEXT: mv a1, a3 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_64_mask_or_128_or_64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sll a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srl a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %a = or i64 %y, 128 %b = shl i64 %x, %a %c = sub i64 0, %y @@ -838,6 +1183,54 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ror a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_64_mask: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srl a4, a1, a2 +; RV32XTHEADBB-NEXT: addi a3, a2, -32 +; RV32XTHEADBB-NEXT: slti a5, a3, 0 +; RV32XTHEADBB-NEXT: neg a5, a5 +; RV32XTHEADBB-NEXT: bltz a3, .LBB13_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a3, a4 +; RV32XTHEADBB-NEXT: j .LBB13_3 +; RV32XTHEADBB-NEXT: .LBB13_2: +; RV32XTHEADBB-NEXT: srl a3, a0, a2 +; RV32XTHEADBB-NEXT: not a6, a2 +; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: sll a6, a7, a6 +; RV32XTHEADBB-NEXT: or a3, a3, a6 +; RV32XTHEADBB-NEXT: .LBB13_3: +; RV32XTHEADBB-NEXT: neg a6, a2 +; RV32XTHEADBB-NEXT: andi t0, a6, 63 +; RV32XTHEADBB-NEXT: addi a7, t0, -32 +; RV32XTHEADBB-NEXT: and a2, a5, a4 +; RV32XTHEADBB-NEXT: bltz a7, .LBB13_5 +; RV32XTHEADBB-NEXT: # %bb.4: +; RV32XTHEADBB-NEXT: sll a1, a0, t0 +; RV32XTHEADBB-NEXT: j .LBB13_6 +; RV32XTHEADBB-NEXT: .LBB13_5: +; RV32XTHEADBB-NEXT: sll a1, a1, a6 +; RV32XTHEADBB-NEXT: not a4, t0 +; RV32XTHEADBB-NEXT: srli a5, a0, 1 +; RV32XTHEADBB-NEXT: srl a4, a5, a4 +; RV32XTHEADBB-NEXT: or a1, a1, a4 +; RV32XTHEADBB-NEXT: .LBB13_6: +; RV32XTHEADBB-NEXT: sll a0, a0, a6 +; RV32XTHEADBB-NEXT: slti a4, a7, 0 +; RV32XTHEADBB-NEXT: neg a4, a4 +; RV32XTHEADBB-NEXT: and a0, a4, a0 +; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: or a1, a2, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_64_mask: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srl a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sll a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i64 0, %y %and = and i64 %z, 63 %b = lshr i64 %x, %y @@ -941,6 +1334,55 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ror a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_64_mask_and_127_and_63: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srl a4, a1, a2 +; RV32XTHEADBB-NEXT: andi a3, a2, 127 +; RV32XTHEADBB-NEXT: addi a6, a3, -32 +; RV32XTHEADBB-NEXT: slti a5, a6, 0 +; RV32XTHEADBB-NEXT: neg a5, a5 +; RV32XTHEADBB-NEXT: bltz a6, .LBB14_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: srl a3, a1, a3 +; RV32XTHEADBB-NEXT: j .LBB14_3 +; RV32XTHEADBB-NEXT: .LBB14_2: +; RV32XTHEADBB-NEXT: srl a6, a0, a2 +; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: not a3, a3 +; RV32XTHEADBB-NEXT: sll a3, a7, a3 +; RV32XTHEADBB-NEXT: or a3, a6, a3 +; RV32XTHEADBB-NEXT: .LBB14_3: +; RV32XTHEADBB-NEXT: neg a6, a2 +; RV32XTHEADBB-NEXT: andi t0, a6, 63 +; RV32XTHEADBB-NEXT: addi a7, t0, -32 +; RV32XTHEADBB-NEXT: and a2, a5, a4 +; RV32XTHEADBB-NEXT: bltz a7, .LBB14_5 +; RV32XTHEADBB-NEXT: # %bb.4: +; RV32XTHEADBB-NEXT: sll a1, a0, t0 +; RV32XTHEADBB-NEXT: j .LBB14_6 +; RV32XTHEADBB-NEXT: .LBB14_5: +; RV32XTHEADBB-NEXT: sll a1, a1, a6 +; RV32XTHEADBB-NEXT: not a4, t0 +; RV32XTHEADBB-NEXT: srli a5, a0, 1 +; RV32XTHEADBB-NEXT: srl a4, a5, a4 +; RV32XTHEADBB-NEXT: or a1, a1, a4 +; RV32XTHEADBB-NEXT: .LBB14_6: +; RV32XTHEADBB-NEXT: sll a0, a0, a6 +; RV32XTHEADBB-NEXT: slti a4, a7, 0 +; RV32XTHEADBB-NEXT: neg a4, a4 +; RV32XTHEADBB-NEXT: and a0, a4, a0 +; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: or a1, a2, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_64_mask_and_127_and_63: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srl a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sll a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %a = and i64 %y, 127 %b = lshr i64 %x, %a %c = sub i64 0, %y @@ -981,6 +1423,22 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ror a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_64_mask_or_128_or_64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srl a3, a1, a2 +; RV32XTHEADBB-NEXT: neg a1, a2 +; RV32XTHEADBB-NEXT: sll a1, a0, a1 +; RV32XTHEADBB-NEXT: mv a0, a3 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_64_mask_or_128_or_64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srl a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sll a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %a = or i64 %y, 128 %b = lshr i64 %x, %a %c = sub i64 0, %y @@ -1026,6 +1484,27 @@ ; RV64ZBB-NEXT: sllw a1, a1, a2 ; RV64ZBB-NEXT: addw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_32_mask_shared: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a3, a2, 31 +; RV32XTHEADBB-NEXT: sll a4, a0, a3 +; RV32XTHEADBB-NEXT: neg a3, a3 +; RV32XTHEADBB-NEXT: srl a0, a0, a3 +; RV32XTHEADBB-NEXT: or a0, a4, a0 +; RV32XTHEADBB-NEXT: sll a1, a1, a2 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_32_mask_shared: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sllw a3, a0, a2 +; RV64XTHEADBB-NEXT: neg a4, a2 +; RV64XTHEADBB-NEXT: srlw a0, a0, a4 +; RV64XTHEADBB-NEXT: or a0, a3, a0 +; RV64XTHEADBB-NEXT: sllw a1, a1, a2 +; RV64XTHEADBB-NEXT: addw a0, a0, a1 +; RV64XTHEADBB-NEXT: ret %maskedamt = and i32 %amt, 31 %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt) %2 = shl i32 %b, %maskedamt @@ -1141,6 +1620,62 @@ ; RV64ZBB-NEXT: sll a1, a1, a2 ; RV64ZBB-NEXT: add a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_64_mask_shared: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: slli a5, a4, 26 +; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: mv a7, a0 +; RV32XTHEADBB-NEXT: bnez a5, .LBB17_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a7, a1 +; RV32XTHEADBB-NEXT: .LBB17_2: +; RV32XTHEADBB-NEXT: andi a6, a4, 63 +; RV32XTHEADBB-NEXT: sll t0, a7, a4 +; RV32XTHEADBB-NEXT: bnez a5, .LBB17_4 +; RV32XTHEADBB-NEXT: # %bb.3: +; RV32XTHEADBB-NEXT: mv a1, a0 +; RV32XTHEADBB-NEXT: .LBB17_4: +; RV32XTHEADBB-NEXT: srli a0, a1, 1 +; RV32XTHEADBB-NEXT: not t1, a4 +; RV32XTHEADBB-NEXT: srl a0, a0, t1 +; RV32XTHEADBB-NEXT: or a5, t0, a0 +; RV32XTHEADBB-NEXT: sll a1, a1, a4 +; RV32XTHEADBB-NEXT: srli a0, a7, 1 +; RV32XTHEADBB-NEXT: srl a7, a0, t1 +; RV32XTHEADBB-NEXT: addi a0, a6, -32 +; RV32XTHEADBB-NEXT: or a1, a1, a7 +; RV32XTHEADBB-NEXT: bltz a0, .LBB17_6 +; RV32XTHEADBB-NEXT: # %bb.5: +; RV32XTHEADBB-NEXT: sll a3, a2, a6 +; RV32XTHEADBB-NEXT: j .LBB17_7 +; RV32XTHEADBB-NEXT: .LBB17_6: +; RV32XTHEADBB-NEXT: sll a3, a3, a4 +; RV32XTHEADBB-NEXT: srli a7, a2, 1 +; RV32XTHEADBB-NEXT: not a6, a6 +; RV32XTHEADBB-NEXT: srl a6, a7, a6 +; RV32XTHEADBB-NEXT: or a3, a3, a6 +; RV32XTHEADBB-NEXT: .LBB17_7: +; RV32XTHEADBB-NEXT: sll a2, a2, a4 +; RV32XTHEADBB-NEXT: slti a0, a0, 0 +; RV32XTHEADBB-NEXT: neg a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a2 +; RV32XTHEADBB-NEXT: add a0, a1, a0 +; RV32XTHEADBB-NEXT: sltu a1, a0, a1 +; RV32XTHEADBB-NEXT: add a3, a5, a3 +; RV32XTHEADBB-NEXT: add a1, a3, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_64_mask_shared: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a3, a2, 63 +; RV64XTHEADBB-NEXT: sll a4, a0, a3 +; RV64XTHEADBB-NEXT: neg a3, a3 +; RV64XTHEADBB-NEXT: srl a0, a0, a3 +; RV64XTHEADBB-NEXT: or a0, a4, a0 +; RV64XTHEADBB-NEXT: sll a1, a1, a2 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: ret %maskedamt = and i64 %amt, 63 %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt) %2 = shl i64 %b, %maskedamt @@ -1183,6 +1718,27 @@ ; RV64ZBB-NEXT: sllw a1, a1, a2 ; RV64ZBB-NEXT: addw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_32_mask_shared: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a3, a2, 31 +; RV32XTHEADBB-NEXT: srl a4, a0, a3 +; RV32XTHEADBB-NEXT: neg a3, a3 +; RV32XTHEADBB-NEXT: sll a0, a0, a3 +; RV32XTHEADBB-NEXT: or a0, a4, a0 +; RV32XTHEADBB-NEXT: sll a1, a1, a2 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_32_mask_shared: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srlw a3, a0, a2 +; RV64XTHEADBB-NEXT: neg a4, a2 +; RV64XTHEADBB-NEXT: sllw a0, a0, a4 +; RV64XTHEADBB-NEXT: or a0, a3, a0 +; RV64XTHEADBB-NEXT: sllw a1, a1, a2 +; RV64XTHEADBB-NEXT: addw a0, a0, a1 +; RV64XTHEADBB-NEXT: ret %maskedamt = and i32 %amt, 31 %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt) %2 = shl i32 %b, %maskedamt @@ -1296,6 +1852,61 @@ ; RV64ZBB-NEXT: sll a1, a1, a2 ; RV64ZBB-NEXT: add a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_64_mask_shared: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a7, a4, 32 +; RV32XTHEADBB-NEXT: mv a6, a1 +; RV32XTHEADBB-NEXT: beqz a7, .LBB19_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a6, a0 +; RV32XTHEADBB-NEXT: .LBB19_2: +; RV32XTHEADBB-NEXT: andi a5, a4, 63 +; RV32XTHEADBB-NEXT: srl t0, a6, a4 +; RV32XTHEADBB-NEXT: beqz a7, .LBB19_4 +; RV32XTHEADBB-NEXT: # %bb.3: +; RV32XTHEADBB-NEXT: mv a0, a1 +; RV32XTHEADBB-NEXT: .LBB19_4: +; RV32XTHEADBB-NEXT: slli a1, a0, 1 +; RV32XTHEADBB-NEXT: not a7, a4 +; RV32XTHEADBB-NEXT: sll a1, a1, a7 +; RV32XTHEADBB-NEXT: or a1, a1, t0 +; RV32XTHEADBB-NEXT: srl t0, a0, a4 +; RV32XTHEADBB-NEXT: slli a6, a6, 1 +; RV32XTHEADBB-NEXT: sll a6, a6, a7 +; RV32XTHEADBB-NEXT: addi a0, a5, -32 +; RV32XTHEADBB-NEXT: or a6, a6, t0 +; RV32XTHEADBB-NEXT: bltz a0, .LBB19_6 +; RV32XTHEADBB-NEXT: # %bb.5: +; RV32XTHEADBB-NEXT: sll a3, a2, a5 +; RV32XTHEADBB-NEXT: j .LBB19_7 +; RV32XTHEADBB-NEXT: .LBB19_6: +; RV32XTHEADBB-NEXT: sll a3, a3, a4 +; RV32XTHEADBB-NEXT: srli a7, a2, 1 +; RV32XTHEADBB-NEXT: not a5, a5 +; RV32XTHEADBB-NEXT: srl a5, a7, a5 +; RV32XTHEADBB-NEXT: or a3, a3, a5 +; RV32XTHEADBB-NEXT: .LBB19_7: +; RV32XTHEADBB-NEXT: sll a2, a2, a4 +; RV32XTHEADBB-NEXT: slti a0, a0, 0 +; RV32XTHEADBB-NEXT: neg a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a2 +; RV32XTHEADBB-NEXT: add a0, a6, a0 +; RV32XTHEADBB-NEXT: sltu a2, a0, a6 +; RV32XTHEADBB-NEXT: add a1, a1, a3 +; RV32XTHEADBB-NEXT: add a1, a1, a2 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_64_mask_shared: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a3, a2, 63 +; RV64XTHEADBB-NEXT: srl a4, a0, a3 +; RV64XTHEADBB-NEXT: neg a3, a3 +; RV64XTHEADBB-NEXT: sll a0, a0, a3 +; RV64XTHEADBB-NEXT: or a0, a4, a0 +; RV64XTHEADBB-NEXT: sll a1, a1, a2 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: ret %maskedamt = and i64 %amt, 63 %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt) %2 = shl i64 %b, %maskedamt @@ -1342,6 +1953,32 @@ ; RV64ZBB-NEXT: rolw a1, a1, a2 ; RV64ZBB-NEXT: addw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_32_mask_multiple: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a2, a2, 31 +; RV32XTHEADBB-NEXT: sll a3, a0, a2 +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: srl a0, a0, a4 +; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: sll a2, a1, a2 +; RV32XTHEADBB-NEXT: srl a1, a1, a4 +; RV32XTHEADBB-NEXT: or a1, a2, a1 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_32_mask_multiple: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a2, a2, 31 +; RV64XTHEADBB-NEXT: sllw a3, a0, a2 +; RV64XTHEADBB-NEXT: neg a4, a2 +; RV64XTHEADBB-NEXT: srlw a0, a0, a4 +; RV64XTHEADBB-NEXT: or a0, a3, a0 +; RV64XTHEADBB-NEXT: sllw a2, a1, a2 +; RV64XTHEADBB-NEXT: srlw a1, a1, a4 +; RV64XTHEADBB-NEXT: or a1, a2, a1 +; RV64XTHEADBB-NEXT: addw a0, a0, a1 +; RV64XTHEADBB-NEXT: ret %maskedamt = and i32 %amt, 31 %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt) %2 = tail call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 %maskedamt) @@ -1458,6 +2095,64 @@ ; RV64ZBB-NEXT: rol a1, a1, a2 ; RV64ZBB-NEXT: add a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_64_mask_multiple: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: slli a5, a4, 26 +; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: mv a6, a1 +; RV32XTHEADBB-NEXT: bnez a5, .LBB21_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a6, a0 +; RV32XTHEADBB-NEXT: .LBB21_2: +; RV32XTHEADBB-NEXT: bnez a5, .LBB21_4 +; RV32XTHEADBB-NEXT: # %bb.3: +; RV32XTHEADBB-NEXT: mv a0, a1 +; RV32XTHEADBB-NEXT: .LBB21_4: +; RV32XTHEADBB-NEXT: sll a7, a6, a4 +; RV32XTHEADBB-NEXT: srli t0, a0, 1 +; RV32XTHEADBB-NEXT: not a1, a4 +; RV32XTHEADBB-NEXT: srl t0, t0, a1 +; RV32XTHEADBB-NEXT: sll t1, a0, a4 +; RV32XTHEADBB-NEXT: srli a0, a6, 1 +; RV32XTHEADBB-NEXT: srl t2, a0, a1 +; RV32XTHEADBB-NEXT: mv a0, a3 +; RV32XTHEADBB-NEXT: bnez a5, .LBB21_6 +; RV32XTHEADBB-NEXT: # %bb.5: +; RV32XTHEADBB-NEXT: mv a0, a2 +; RV32XTHEADBB-NEXT: .LBB21_6: +; RV32XTHEADBB-NEXT: or a6, a7, t0 +; RV32XTHEADBB-NEXT: or a7, t1, t2 +; RV32XTHEADBB-NEXT: sll t0, a0, a4 +; RV32XTHEADBB-NEXT: bnez a5, .LBB21_8 +; RV32XTHEADBB-NEXT: # %bb.7: +; RV32XTHEADBB-NEXT: mv a2, a3 +; RV32XTHEADBB-NEXT: .LBB21_8: +; RV32XTHEADBB-NEXT: srli a3, a2, 1 +; RV32XTHEADBB-NEXT: srl a3, a3, a1 +; RV32XTHEADBB-NEXT: or a3, t0, a3 +; RV32XTHEADBB-NEXT: sll a2, a2, a4 +; RV32XTHEADBB-NEXT: srli a0, a0, 1 +; RV32XTHEADBB-NEXT: srl a0, a0, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: add a1, a7, a0 +; RV32XTHEADBB-NEXT: add a0, a6, a3 +; RV32XTHEADBB-NEXT: sltu a2, a0, a6 +; RV32XTHEADBB-NEXT: add a1, a1, a2 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_64_mask_multiple: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a2, a2, 63 +; RV64XTHEADBB-NEXT: sll a3, a0, a2 +; RV64XTHEADBB-NEXT: neg a4, a2 +; RV64XTHEADBB-NEXT: srl a0, a0, a4 +; RV64XTHEADBB-NEXT: or a0, a3, a0 +; RV64XTHEADBB-NEXT: sll a2, a1, a2 +; RV64XTHEADBB-NEXT: srl a1, a1, a4 +; RV64XTHEADBB-NEXT: or a1, a2, a1 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: ret %maskedamt = and i64 %amt, 63 %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %maskedamt) %2 = tail call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 %maskedamt) @@ -1503,6 +2198,32 @@ ; RV64ZBB-NEXT: rorw a1, a1, a2 ; RV64ZBB-NEXT: addw a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_32_mask_multiple: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a2, a2, 31 +; RV32XTHEADBB-NEXT: srl a3, a0, a2 +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: sll a0, a0, a4 +; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: srl a2, a1, a2 +; RV32XTHEADBB-NEXT: sll a1, a1, a4 +; RV32XTHEADBB-NEXT: or a1, a2, a1 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_32_mask_multiple: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a2, a2, 31 +; RV64XTHEADBB-NEXT: srlw a3, a0, a2 +; RV64XTHEADBB-NEXT: neg a4, a2 +; RV64XTHEADBB-NEXT: sllw a0, a0, a4 +; RV64XTHEADBB-NEXT: or a0, a3, a0 +; RV64XTHEADBB-NEXT: srlw a2, a1, a2 +; RV64XTHEADBB-NEXT: sllw a1, a1, a4 +; RV64XTHEADBB-NEXT: or a1, a2, a1 +; RV64XTHEADBB-NEXT: addw a0, a0, a1 +; RV64XTHEADBB-NEXT: ret %maskedamt = and i32 %amt, 31 %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt) %2 = tail call i32 @llvm.fshr.i32(i32 %b, i32 %b, i32 %maskedamt) @@ -1617,6 +2338,63 @@ ; RV64ZBB-NEXT: ror a1, a1, a2 ; RV64ZBB-NEXT: add a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_64_mask_multiple: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: andi a5, a4, 32 +; RV32XTHEADBB-NEXT: mv a6, a0 +; RV32XTHEADBB-NEXT: beqz a5, .LBB23_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a6, a1 +; RV32XTHEADBB-NEXT: .LBB23_2: +; RV32XTHEADBB-NEXT: beqz a5, .LBB23_4 +; RV32XTHEADBB-NEXT: # %bb.3: +; RV32XTHEADBB-NEXT: mv a1, a0 +; RV32XTHEADBB-NEXT: .LBB23_4: +; RV32XTHEADBB-NEXT: srl a7, a6, a4 +; RV32XTHEADBB-NEXT: slli t0, a1, 1 +; RV32XTHEADBB-NEXT: not a0, a4 +; RV32XTHEADBB-NEXT: sll t0, t0, a0 +; RV32XTHEADBB-NEXT: srl t1, a1, a4 +; RV32XTHEADBB-NEXT: slli a6, a6, 1 +; RV32XTHEADBB-NEXT: sll t2, a6, a0 +; RV32XTHEADBB-NEXT: mv a6, a2 +; RV32XTHEADBB-NEXT: beqz a5, .LBB23_6 +; RV32XTHEADBB-NEXT: # %bb.5: +; RV32XTHEADBB-NEXT: mv a6, a3 +; RV32XTHEADBB-NEXT: .LBB23_6: +; RV32XTHEADBB-NEXT: or a1, t0, a7 +; RV32XTHEADBB-NEXT: or a7, t2, t1 +; RV32XTHEADBB-NEXT: srl t0, a6, a4 +; RV32XTHEADBB-NEXT: beqz a5, .LBB23_8 +; RV32XTHEADBB-NEXT: # %bb.7: +; RV32XTHEADBB-NEXT: mv a3, a2 +; RV32XTHEADBB-NEXT: .LBB23_8: +; RV32XTHEADBB-NEXT: slli a2, a3, 1 +; RV32XTHEADBB-NEXT: sll a2, a2, a0 +; RV32XTHEADBB-NEXT: or a2, a2, t0 +; RV32XTHEADBB-NEXT: srl a3, a3, a4 +; RV32XTHEADBB-NEXT: slli a6, a6, 1 +; RV32XTHEADBB-NEXT: sll a0, a6, a0 +; RV32XTHEADBB-NEXT: or a0, a0, a3 +; RV32XTHEADBB-NEXT: add a7, a7, a0 +; RV32XTHEADBB-NEXT: add a0, a1, a2 +; RV32XTHEADBB-NEXT: sltu a1, a0, a1 +; RV32XTHEADBB-NEXT: add a1, a7, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_64_mask_multiple: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: andi a2, a2, 63 +; RV64XTHEADBB-NEXT: srl a3, a0, a2 +; RV64XTHEADBB-NEXT: neg a4, a2 +; RV64XTHEADBB-NEXT: sll a0, a0, a4 +; RV64XTHEADBB-NEXT: or a0, a3, a0 +; RV64XTHEADBB-NEXT: srl a2, a1, a2 +; RV64XTHEADBB-NEXT: sll a1, a1, a4 +; RV64XTHEADBB-NEXT: or a1, a2, a1 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: ret %maskedamt = and i64 %amt, 63 %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %maskedamt) %2 = tail call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 %maskedamt) @@ -1721,6 +2499,56 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rol a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotl_64_zext: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: sll a5, a0, a2 +; RV32XTHEADBB-NEXT: addi a3, a2, -32 +; RV32XTHEADBB-NEXT: slti a6, a3, 0 +; RV32XTHEADBB-NEXT: neg a6, a6 +; RV32XTHEADBB-NEXT: bltz a3, .LBB24_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a3, a5 +; RV32XTHEADBB-NEXT: j .LBB24_3 +; RV32XTHEADBB-NEXT: .LBB24_2: +; RV32XTHEADBB-NEXT: sll a3, a1, a2 +; RV32XTHEADBB-NEXT: not a7, a2 +; RV32XTHEADBB-NEXT: srli t0, a0, 1 +; RV32XTHEADBB-NEXT: srl a7, t0, a7 +; RV32XTHEADBB-NEXT: or a3, a3, a7 +; RV32XTHEADBB-NEXT: .LBB24_3: +; RV32XTHEADBB-NEXT: and a5, a6, a5 +; RV32XTHEADBB-NEXT: li a6, 32 +; RV32XTHEADBB-NEXT: sub a7, a6, a2 +; RV32XTHEADBB-NEXT: srl a6, a1, a4 +; RV32XTHEADBB-NEXT: bltz a7, .LBB24_5 +; RV32XTHEADBB-NEXT: # %bb.4: +; RV32XTHEADBB-NEXT: mv a0, a6 +; RV32XTHEADBB-NEXT: j .LBB24_6 +; RV32XTHEADBB-NEXT: .LBB24_5: +; RV32XTHEADBB-NEXT: li t0, 64 +; RV32XTHEADBB-NEXT: sub a2, t0, a2 +; RV32XTHEADBB-NEXT: srl a0, a0, a4 +; RV32XTHEADBB-NEXT: not a2, a2 +; RV32XTHEADBB-NEXT: slli a1, a1, 1 +; RV32XTHEADBB-NEXT: sll a1, a1, a2 +; RV32XTHEADBB-NEXT: or a0, a0, a1 +; RV32XTHEADBB-NEXT: .LBB24_6: +; RV32XTHEADBB-NEXT: slti a1, a7, 0 +; RV32XTHEADBB-NEXT: neg a1, a1 +; RV32XTHEADBB-NEXT: and a1, a1, a6 +; RV32XTHEADBB-NEXT: or a1, a3, a1 +; RV32XTHEADBB-NEXT: or a0, a5, a0 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotl_64_zext: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: sll a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: srl a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i32 64, %y %zext = zext i32 %z to i64 %zexty = zext i32 %y to i64 @@ -1827,6 +2655,56 @@ ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: ror a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; RV32XTHEADBB-LABEL: rotr_64_zext: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: srl a5, a1, a2 +; RV32XTHEADBB-NEXT: addi a3, a2, -32 +; RV32XTHEADBB-NEXT: slti a6, a3, 0 +; RV32XTHEADBB-NEXT: neg a6, a6 +; RV32XTHEADBB-NEXT: bltz a3, .LBB25_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: mv a3, a5 +; RV32XTHEADBB-NEXT: j .LBB25_3 +; RV32XTHEADBB-NEXT: .LBB25_2: +; RV32XTHEADBB-NEXT: srl a3, a0, a2 +; RV32XTHEADBB-NEXT: not a7, a2 +; RV32XTHEADBB-NEXT: slli t0, a1, 1 +; RV32XTHEADBB-NEXT: sll a7, t0, a7 +; RV32XTHEADBB-NEXT: or a3, a3, a7 +; RV32XTHEADBB-NEXT: .LBB25_3: +; RV32XTHEADBB-NEXT: and a5, a6, a5 +; RV32XTHEADBB-NEXT: li a6, 32 +; RV32XTHEADBB-NEXT: sub a7, a6, a2 +; RV32XTHEADBB-NEXT: sll a6, a0, a4 +; RV32XTHEADBB-NEXT: bltz a7, .LBB25_5 +; RV32XTHEADBB-NEXT: # %bb.4: +; RV32XTHEADBB-NEXT: mv a1, a6 +; RV32XTHEADBB-NEXT: j .LBB25_6 +; RV32XTHEADBB-NEXT: .LBB25_5: +; RV32XTHEADBB-NEXT: li t0, 64 +; RV32XTHEADBB-NEXT: sub a2, t0, a2 +; RV32XTHEADBB-NEXT: sll a1, a1, a4 +; RV32XTHEADBB-NEXT: not a2, a2 +; RV32XTHEADBB-NEXT: srli a0, a0, 1 +; RV32XTHEADBB-NEXT: srl a0, a0, a2 +; RV32XTHEADBB-NEXT: or a1, a1, a0 +; RV32XTHEADBB-NEXT: .LBB25_6: +; RV32XTHEADBB-NEXT: slti a0, a7, 0 +; RV32XTHEADBB-NEXT: neg a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a6 +; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: or a1, a5, a1 +; RV32XTHEADBB-NEXT: ret +; +; RV64XTHEADBB-LABEL: rotr_64_zext: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srl a2, a0, a1 +; RV64XTHEADBB-NEXT: neg a1, a1 +; RV64XTHEADBB-NEXT: sll a0, a0, a1 +; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: ret %z = sub i32 64, %y %zext = zext i32 %z to i64 %zexty = zext i32 %y to i64 diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -0,0 +1,453 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32XTHEADBB + +declare i32 @llvm.ctlz.i32(i32, i1) + +define i32 @ctlz_i32(i32 %a) nounwind { +; RV32I-LABEL: ctlz_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: beqz a0, .LBB0_2 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: call __mulsi3@plt +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: ctlz_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.ctlz.i64(i64, i1) + +define i64 @ctlz_i64(i64 %a) nounwind { +; RV32I-LABEL: ctlz_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi s6, a1, -241 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: call __mulsi3@plt +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: srli a0, s2, 1 +; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: call __mulsi3@plt +; RV32I-NEXT: bnez s0, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: j .LBB1_3 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: srli a0, s1, 24 +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: ctlz_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: bnez a1, .LBB1_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: addi a0, a0, 32 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB1_2: +; RV32XTHEADBB-NEXT: th.ff1 a0, a1 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret + %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +define i32 @cttz_i32(i32 %a) nounwind { +; RV32I-LABEL: cttz_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: beqz a0, .LBB2_2 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 30667 +; RV32I-NEXT: addi a1, a1, 1329 +; RV32I-NEXT: call __mulsi3@plt +; RV32I-NEXT: srli a0, a0, 27 +; RV32I-NEXT: lui a1, %hi(.LCPI2_0) +; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0) +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: cttz_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: beqz a0, .LBB2_2 +; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: ret +; RV32XTHEADBB-NEXT: .LBB2_2: +; RV32XTHEADBB-NEXT: li a0, 32 +; RV32XTHEADBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.cttz.i64(i64, i1) + +define i64 @cttz_i64(i64 %a) nounwind { +; RV32I-LABEL: cttz_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: and a0, s0, a0 +; RV32I-NEXT: lui a1, 30667 +; RV32I-NEXT: addi s3, a1, 1329 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: call __mulsi3@plt +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a0, %hi(.LCPI3_0) +; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0) +; RV32I-NEXT: neg a0, s2 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: call __mulsi3@plt +; RV32I-NEXT: bnez s2, .LBB3_3 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: li a0, 32 +; RV32I-NEXT: beqz s0, .LBB3_4 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: srli s1, s1, 27 +; RV32I-NEXT: add s1, s4, s1 +; RV32I-NEXT: lbu a0, 0(s1) +; RV32I-NEXT: j .LBB3_5 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: srli a0, a0, 27 +; RV32I-NEXT: add a0, s4, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: bnez s0, .LBB3_2 +; RV32I-NEXT: .LBB3_4: +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: .LBB3_5: +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: cttz_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2 +; RV32XTHEADBB-NEXT: # %bb.1: +; RV32XTHEADBB-NEXT: addi a0, a1, -1 +; RV32XTHEADBB-NEXT: not a1, a1 +; RV32XTHEADBB-NEXT: and a0, a1, a0 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 64 +; RV32XTHEADBB-NEXT: j .LBB3_3 +; RV32XTHEADBB-NEXT: .LBB3_2: +; RV32XTHEADBB-NEXT: addi a1, a0, -1 +; RV32XTHEADBB-NEXT: not a0, a0 +; RV32XTHEADBB-NEXT: and a0, a0, a1 +; RV32XTHEADBB-NEXT: th.ff1 a0, a0 +; RV32XTHEADBB-NEXT: li a1, 32 +; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: sub a0, a1, a0 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret + %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %1 +} + +define i32 @sextb_i32(i32 %a) nounwind { +; RV32I-LABEL: sextb_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: sextb_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.ext a0, a0, 7, 0 +; RV32XTHEADBB-NEXT: ret + %shl = shl i32 %a, 24 + %shr = ashr exact i32 %shl, 24 + ret i32 %shr +} + +define i64 @sextb_i64(i64 %a) nounwind { +; RV32I-LABEL: sextb_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a0, a1, 24 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: sextb_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.ext a0, a0, 7, 0 +; RV32XTHEADBB-NEXT: srai a1, a0, 31 +; RV32XTHEADBB-NEXT: ret + %shl = shl i64 %a, 56 + %shr = ashr exact i64 %shl, 56 + ret i64 %shr +} + +define i32 @sexth_i32(i32 %a) nounwind { +; RV32I-LABEL: sexth_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: sexth_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.ext a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: ret + %shl = shl i32 %a, 16 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + +define i64 @sexth_i64(i64 %a) nounwind { +; RV32I-LABEL: sexth_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a0, a1, 16 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: sexth_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.ext a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: srai a1, a0, 31 +; RV32XTHEADBB-NEXT: ret + %shl = shl i64 %a, 48 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + +define i32 @zexth_i32(i32 %a) nounwind { +; RV32I-LABEL: zexth_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: zexth_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: ret + %and = and i32 %a, 65535 + ret i32 %and +} + +define i64 @zexth_i64(i64 %a) nounwind { +; RV32I-LABEL: zexth_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: zexth_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: li a1, 0 +; RV32XTHEADBB-NEXT: ret + %and = and i64 %a, 65535 + ret i64 %and +} + +declare i32 @llvm.bswap.i32(i32) + +define i32 @bswap_i32(i32 %a) nounwind { +; RV32I-LABEL: bswap_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: slli a2, a2, 8 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: bswap_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.rev a0, a0 +; RV32XTHEADBB-NEXT: ret + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.bswap.i64(i64) + +define i64 @bswap_i64(i64 %a) { +; RV32I-LABEL: bswap_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -256 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: and a4, a1, a3 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a2, a1, a2 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: and a3, a0, a3 +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: bswap_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: th.rev a2, a1 +; RV32XTHEADBB-NEXT: th.rev a1, a0 +; RV32XTHEADBB-NEXT: mv a0, a2 +; RV32XTHEADBB-NEXT: ret + %1 = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -0,0 +1,768 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB + +declare i32 @llvm.ctlz.i32(i32, i1) + +define signext i32 @ctlz_i32(i32 signext %a) nounwind { +; RV64I-LABEL: ctlz_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a0, .LBB0_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB0_2: +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: ctlz_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: slli a0, a0, 32 +; RV64XTHEADBB-NEXT: th.ff0 a0, a0 +; RV64XTHEADBB-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %1 +} + +define signext i32 @log2_i32(i32 signext %a) nounwind { +; RV64I-LABEL: log2_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a0, .LBB1_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: j .LBB1_3 +; RV64I-NEXT: .LBB1_2: +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: .LBB1_3: # %cond.end +; RV64I-NEXT: li a1, 31 +; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: log2_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: slli a0, a0, 32 +; RV64XTHEADBB-NEXT: th.ff0 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 31 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + %2 = sub i32 31, %1 + ret i32 %2 +} + +define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { +; RV64I-LABEL: log2_ceil_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: li s0, 32 +; RV64I-NEXT: li a1, 32 +; RV64I-NEXT: beqz a0, .LBB2_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a1, a0, 24 +; RV64I-NEXT: .LBB2_2: # %cond.end +; RV64I-NEXT: sub a0, s0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: log2_ceil_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addiw a0, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: slli a0, a0, 32 +; RV64XTHEADBB-NEXT: th.ff0 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 32 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret + %1 = sub i32 %a, 1 + %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false) + %3 = sub i32 32, %2 + ret i32 %3 +} + +define signext i32 @findLastSet_i32(i32 signext %a) nounwind { +; RV64I-LABEL: findLastSet_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: srliw a0, a0, 1 +; RV64I-NEXT: or a0, s0, a0 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: xori a0, a0, 31 +; RV64I-NEXT: snez a1, s0 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: findLastSet_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: not a1, a0 +; RV64XTHEADBB-NEXT: slli a1, a1, 32 +; RV64XTHEADBB-NEXT: th.ff0 a1, a1 +; RV64XTHEADBB-NEXT: xori a1, a1, 31 +; RV64XTHEADBB-NEXT: snez a0, a0 +; RV64XTHEADBB-NEXT: addi a0, a0, -1 +; RV64XTHEADBB-NEXT: or a0, a0, a1 +; RV64XTHEADBB-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true) + %2 = xor i32 31, %1 + %3 = icmp eq i32 %a, 0 + %4 = select i1 %3, i32 -1, i32 %2 + ret i32 %4 +} + +define i32 @ctlz_lshr_i32(i32 signext %a) { +; RV64I-LABEL: ctlz_lshr_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a0, a0, 1 +; RV64I-NEXT: beqz a0, .LBB4_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: ctlz_lshr_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srliw a0, a0, 1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: slli a0, a0, 32 +; RV64XTHEADBB-NEXT: th.ff0 a0, a0 +; RV64XTHEADBB-NEXT: ret + %1 = lshr i32 %a, 1 + %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false) + ret i32 %2 +} + +declare i64 @llvm.ctlz.i64(i64, i1) + +define i64 @ctlz_i64(i64 %a) nounwind { +; RV64I-LABEL: ctlz_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a0, .LBB5_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: lui a1, %hi(.LCPI5_0) +; RV64I-NEXT: ld a1, %lo(.LCPI5_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI5_1) +; RV64I-NEXT: ld a2, %lo(.LCPI5_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: and a1, a0, a2 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI5_2) +; RV64I-NEXT: ld a2, %lo(.LCPI5_2)(a2) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI5_3) +; RV64I-NEXT: ld a1, %lo(.LCPI5_3)(a1) +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB5_2: +; RV64I-NEXT: li a0, 64 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: ctlz_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: ret + %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +define signext i32 @cttz_i32(i32 signext %a) nounwind { +; RV64I-LABEL: cttz_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a0, .LBB6_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 30667 +; RV64I-NEXT: addiw a1, a1, 1329 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a0, a0, 27 +; RV64I-NEXT: lui a1, %hi(.LCPI6_0) +; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: cttz_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: beqz a0, .LBB6_2 +; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret +; RV64XTHEADBB-NEXT: .LBB6_2: +; RV64XTHEADBB-NEXT: li a0, 32 +; RV64XTHEADBB-NEXT: ret +; RV64ZBB-LABEL: cttz_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %1 +} + +define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind { +; RV64I-LABEL: cttz_zero_undef_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 30667 +; RV64I-NEXT: addiw a1, a1, 1329 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a0, a0, 27 +; RV64I-NEXT: lui a1, %hi(.LCPI7_0) +; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: cttz_zero_undef_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) + ret i32 %1 +} + +define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { +; RV64I-LABEL: findFirstSet_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: and a0, s0, a0 +; RV64I-NEXT: lui a1, 30667 +; RV64I-NEXT: addiw a1, a1, 1329 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a0, a0, 27 +; RV64I-NEXT: lui a1, %hi(.LCPI8_0) +; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: snez a1, s0 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: findFirstSet_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a2, a0 +; RV64XTHEADBB-NEXT: and a1, a2, a1 +; RV64XTHEADBB-NEXT: th.ff1 a1, a1 +; RV64XTHEADBB-NEXT: li a2, 64 +; RV64XTHEADBB-NEXT: sub a2, a2, a1 +; RV64XTHEADBB-NEXT: snez a0, a0 +; RV64XTHEADBB-NEXT: addi a0, a0, -1 +; RV64XTHEADBB-NEXT: or a0, a0, a2 +; RV64XTHEADBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) + %2 = icmp eq i32 %a, 0 + %3 = select i1 %2, i32 -1, i32 %1 + ret i32 %3 +} + +define signext i32 @ffs_i32(i32 signext %a) nounwind { +; RV64I-LABEL: ffs_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: and a0, s0, a0 +; RV64I-NEXT: lui a1, 30667 +; RV64I-NEXT: addiw a1, a1, 1329 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srliw a0, a0, 27 +; RV64I-NEXT: lui a1, %hi(.LCPI9_0) +; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: seqz a1, s0 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: ffs_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a2, a0 +; RV64XTHEADBB-NEXT: and a1, a2, a1 +; RV64XTHEADBB-NEXT: th.ff1 a1, a1 +; RV64XTHEADBB-NEXT: li a2, 65 +; RV64XTHEADBB-NEXT: sub a2, a2, a1 +; RV64XTHEADBB-NEXT: seqz a0, a0 +; RV64XTHEADBB-NEXT: addi a0, a0, -1 +; RV64XTHEADBB-NEXT: and a0, a0, a2 +; RV64XTHEADBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) + %2 = add i32 %1, 1 + %3 = icmp eq i32 %a, 0 + %4 = select i1 %3, i32 0, i32 %2 + ret i32 %4 +} + +declare i64 @llvm.cttz.i64(i64, i1) + +define i64 @cttz_i64(i64 %a) nounwind { +; RV64I-LABEL: cttz_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: beqz a0, .LBB10_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI10_0) +; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1) +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srli a0, a0, 58 +; RV64I-NEXT: lui a1, %hi(.LCPI10_1) +; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB10_2: +; RV64I-NEXT: li a0, 64 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: cttz_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: beqz a0, .LBB10_2 +; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false +; RV64XTHEADBB-NEXT: addi a1, a0, -1 +; RV64XTHEADBB-NEXT: not a0, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: th.ff1 a0, a0 +; RV64XTHEADBB-NEXT: li a1, 64 +; RV64XTHEADBB-NEXT: sub a0, a1, a0 +; RV64XTHEADBB-NEXT: ret +; RV64XTHEADBB-NEXT: .LBB10_2: +; RV64XTHEADBB-NEXT: li a0, 64 +; RV64XTHEADBB-NEXT: ret + %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %1 +} + +define signext i32 @sextb_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sextb_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: sextb_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0 +; RV64XTHEADBB-NEXT: ret + %shl = shl i32 %a, 24 + %shr = ashr exact i32 %shl, 24 + ret i32 %shr +} + +define i64 @sextb_i64(i64 %a) nounwind { +; RV64I-LABEL: sextb_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: sextb_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0 +; RV64XTHEADBB-NEXT: ret + %shl = shl i64 %a, 56 + %shr = ashr exact i64 %shl, 56 + ret i64 %shr +} + +define signext i32 @sexth_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sexth_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: sexth_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: ret + %shl = shl i32 %a, 16 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + +define i64 @sexth_i64(i64 %a) nounwind { +; RV64I-LABEL: sexth_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: sexth_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: ret + %shl = shl i64 %a, 48 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + +define i32 @zexth_i32(i32 %a) nounwind { +; RV64I-LABEL: zexth_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: zexth_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: ret + %and = and i32 %a, 65535 + ret i32 %and +} + +define i64 @zexth_i64(i64 %a) nounwind { +; RV64I-LABEL: zexth_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: zexth_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: ret + %and = and i64 %a, 65535 + ret i64 %and +} + +declare i32 @llvm.bswap.i32(i32) + +define signext i32 @bswap_i32(i32 signext %a) nounwind { +; RV64I-LABEL: bswap_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 8 +; RV64I-NEXT: slliw a0, a0, 24 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: bswap_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.revw a0, a0 +; RV64XTHEADBB-NEXT: ret + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + ret i32 %1 +} + +; Similar to bswap_i32 but the result is not sign extended. +define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind { +; RV64I-LABEL: bswap_i32_nosext: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: addiw a3, a3, -256 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: and a3, a0, a3 +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: sw a0, 0(a1) +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: bswap_i32_nosext: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.revw a0, a0 +; RV64XTHEADBB-NEXT: sw a0, 0(a1) +; RV64XTHEADBB-NEXT: ret + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + store i32 %1, ptr %x + ret void +} + +declare i64 @llvm.bswap.i64(i64) + +define i64 @bswap_i64(i64 %a) { +; RV64I-LABEL: bswap_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: lui a4, 4080 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: srli a5, a0, 8 +; RV64I-NEXT: srliw a5, a5, 24 +; RV64I-NEXT: slli a5, a5, 24 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: and a4, a0, a4 +; RV64I-NEXT: slli a4, a4, 24 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 40 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: bswap_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.rev a0, a0 +; RV64XTHEADBB-NEXT: ret + %1 = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %1 +}