diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -169,6 +169,9 @@ The current vendor extensions supported are: +``XTHeadBa`` + LLVM implements `the THeadBa (address-generation) vendor-defined instructions specified in `_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification. + ``XTHeadVdot`` LLVM implements `version 1.0.0 of the THeadV-family custom instructions specification `_ by T-HEAD of Alibaba. All instructions are prefixed with `th.` as described in the specification, and the riscv-toolchain-convention document linked above. diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -109,6 +109,7 @@ {"svinval", RISCVExtensionVersion{1, 0}}, // vendor-defined ('X') extensions + {"xtheadba", RISCVExtensionVersion{1, 0}}, {"xtheadvdot", RISCVExtensionVersion{1, 0}}, {"xventanacondops", RISCVExtensionVersion{1, 0}}, }; diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -463,6 +463,13 @@ AssemblerPredicate<(all_of FeatureVendorXVentanaCondOps), "'XVentanaCondOps' (Ventana Conditional Ops)">; +def FeatureVendorXTHeadBa + : SubtargetFeature<"xtheadba", "HasVendorXTHeadBa", "true", + "'xtheadba' (T-Head address calculation instructions)">; +def HasVendorXTHeadBa : Predicate<"Subtarget->hasVendorXTHeadBa()">, + AssemblerPredicate<(all_of FeatureVendorXTHeadBa), + "'xtheadba' (T-Head address calculation instructions)">; + def FeatureVendorXTHeadVdot : SubtargetFeature<"xtheadvdot", "HasVendorXTHeadVdot", "true", "'xtheadvdot' (T-Head Vector Extensions for Dot)", diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// + class THInstVdotVV funct6, RISCVVFormat opv, dag outs, dag ins, string opcodestr, string argstr> : RVInstVV { @@ -43,6 +44,16 @@ opcodestr, "$vd, $rs1, $vs2$vm">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class THShiftALU_rri funct3, string opcodestr> + : RVInstR<0, funct3, OPC_CUSTOM_0, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2), + opcodestr, "$rd, $rs1, $rs2, $uimm2"> { + bits<2> uimm2; + let Inst{31-27} = 0; + let Inst{26-25} = uimm2; +} + //===----------------------------------------------------------------------===// // Combination of instruction classes. // Use these multiclasses to define instructions more easily. @@ -59,6 +70,11 @@ //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// +let Predicates = [HasVendorXTHeadBa] in { +def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">, + Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>; +} // Predicates = [HasVendorXTHeadBa] + let Predicates = [HasVendorXTHeadVdot], Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { @@ -134,6 +150,80 @@ //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// +let Predicates = [HasVendorXTHeadBa] in { +def : Pat<(add GPR:$rs1, (shl GPR:$rs2, uimm2:$uimm2)), + (TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>; + +// Reuse complex patterns from StdExtZba +def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2), + (TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>; +def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2), + (TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>; +def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2), + (TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>; + +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 1)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 1)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 1)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 2)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 2)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 2)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 3)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 3)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 3)>; + +def : Pat<(add GPR:$r, CSImm12MulBy4:$i), + (TH_ADDSL GPR:$r, (ADDI X0, (SimmShiftRightBy2XForm CSImm12MulBy4:$i)), 2)>; +def : Pat<(add GPR:$r, CSImm12MulBy8:$i), + (TH_ADDSL GPR:$r, (ADDI X0, (SimmShiftRightBy3XForm CSImm12MulBy8:$i)), 3)>; + +def : Pat<(mul GPR:$r, C3LeftShift:$i), + (SLLI (TH_ADDSL GPR:$r, GPR:$r, 1), + (TrailingZeros C3LeftShift:$i))>; +def : Pat<(mul GPR:$r, C5LeftShift:$i), + (SLLI (TH_ADDSL GPR:$r, GPR:$r, 2), + (TrailingZeros C5LeftShift:$i))>; +def : Pat<(mul GPR:$r, C9LeftShift:$i), + (SLLI (TH_ADDSL GPR:$r, GPR:$r, 3), + (TrailingZeros C9LeftShift:$i))>; + +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 1)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 1), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)), + (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2), (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 3)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)), + (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)), + (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)), + (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>; + +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)), + (SLLI (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2), + (TH_ADDSL GPR:$r, GPR:$r, 2), 2), 3)>; +} // Predicates = [HasVendorXTHeadBa] + defm PseudoTHVdotVMAQA : VPseudoVMAQA_VV_VX; defm PseudoTHVdotVMAQAU : VPseudoVMAQA_VV_VX; defm PseudoTHVdotVMAQASU : VPseudoVMAQA_VV_VX; diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -86,6 +86,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+svpbmt %s -o - | FileCheck --check-prefix=RV64SVPBMT %s ; RUN: llc -mtriple=riscv64 -mattr=+svinval %s -o - | FileCheck --check-prefix=RV64SVINVAL %s ; RUN: llc -mtriple=riscv64 -mattr=+xventanacondops %s -o - | FileCheck --check-prefix=RV64XVENTANACONDOPS %s +; RUN: llc -mtriple=riscv64 -mattr=+xtheadba %s -o - | FileCheck --check-prefix=RV64XTHEADBA %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefix=RV64XTHEADVDOT %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefix=RV64ZAWRS %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-ztso %s -o - | FileCheck --check-prefix=RV64ZTSO %s @@ -180,6 +181,7 @@ ; RV64SVPBMT: .attribute 5, "rv64i2p0_svpbmt1p0" ; RV64SVINVAL: .attribute 5, "rv64i2p0_svinval1p0" ; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p0_xventanacondops1p0" +; RV64XTHEADBA: .attribute 5, "rv64i2p0_xtheadba1p0" ; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0" ; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1" ; RV64ZCA: .attribute 5, "rv64i2p0_zca1p0" diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -0,0 +1,319 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=CHECK,RV64I +; RUN: llc -mtriple=riscv64 -mattr=+m,+xtheadba -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=CHECK,RV64XTHEADBA + +define signext i16 @sh1add(i64 %0, ptr %1) { +; RV64I-LABEL: sh1add: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: sh1add: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: lh a0, 0(a0) +; RV64XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i16, ptr %1, i64 %0 + %4 = load i16, ptr %3 + ret i16 %4 +} + +define signext i32 @sh2add(i64 %0, ptr %1) { +; RV64I-LABEL: sh2add: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: sh2add: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: lw a0, 0(a0) +; RV64XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i32, ptr %1, i64 %0 + %4 = load i32, ptr %3 + ret i32 %4 +} + +define i64 @sh3add(i64 %0, ptr %1) { +; RV64I-LABEL: sh3add: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: sh3add: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV64XTHEADBA-NEXT: ld a0, 0(a0) +; RV64XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i64, ptr %1, i64 %0 + %4 = load i64, ptr %3 + ret i64 %4 +} + +; Type legalization inserts a sext_inreg after the first add. That add will be +; selected as sh2add which does not sign extend. SimplifyDemandedBits is unable +; to remove the sext_inreg because it has multiple uses. The ashr will use the +; sext_inreg to become sraiw. This leaves the sext_inreg only used by the shl. +; If the shl is selected as sllw, we don't need the sext_inreg. +define i64 @sh2add_extra_sext(i32 %x, i32 %y, i32 %z) { +; RV64I-LABEL: sh2add_extra_sext: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: sllw a1, a2, a0 +; RV64I-NEXT: sraiw a0, a0, 2 +; RV64I-NEXT: mul a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: sh2add_extra_sext: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: sllw a1, a2, a0 +; RV64XTHEADBA-NEXT: sraiw a0, a0, 2 +; RV64XTHEADBA-NEXT: mul a0, a1, a0 +; RV64XTHEADBA-NEXT: ret + %a = shl i32 %x, 2 + %b = add i32 %a, %y + %c = shl i32 %z, %b + %d = ashr i32 %b, 2 + %e = sext i32 %c to i64 + %f = sext i32 %d to i64 + %g = mul i64 %e, %f + ret i64 %g +} + +define i64 @addmul6(i64 %a, i64 %b) { +; RV64I-LABEL: addmul6: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 6 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul6: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 6 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul10(i64 %a, i64 %b) { +; RV64I-LABEL: addmul10: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 10 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul10: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 10 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul12(i64 %a, i64 %b) { +; RV64I-LABEL: addmul12: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 12 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul12: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 12 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul18(i64 %a, i64 %b) { +; RV64I-LABEL: addmul18: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 18 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul18: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 18 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul20(i64 %a, i64 %b) { +; RV64I-LABEL: addmul20: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 20 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul20: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 20 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul24(i64 %a, i64 %b) { +; RV64I-LABEL: addmul24: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 24 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul24: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 24 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul36(i64 %a, i64 %b) { +; RV64I-LABEL: addmul36: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 36 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul36: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 36 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul40(i64 %a, i64 %b) { +; RV64I-LABEL: addmul40: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 40 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul40: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 40 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul72(i64 %a, i64 %b) { +; RV64I-LABEL: addmul72: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 72 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul72: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 72 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @mul96(i64 %a) { +; RV64I-LABEL: mul96: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 96 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul96: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 96 + ret i64 %c +} + +define i64 @mul160(i64 %a) { +; RV64I-LABEL: mul160: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 160 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul160: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 160 + ret i64 %c +} + +define i64 @mul200(i64 %a) { +; RV64I-LABEL: mul200: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 200 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul200: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: slli a0, a0, 3 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 200 + ret i64 %c +} + +define i64 @mul288(i64 %a) { +; RV64I-LABEL: mul288: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 288 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul288: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 288 + ret i64 %c +} + diff --git a/llvm/test/MC/RISCV/rv64xtheadba-invalid.s b/llvm/test/MC/RISCV/rv64xtheadba-invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64xtheadba-invalid.s @@ -0,0 +1,8 @@ +# RUN: not llvm-mc -triple riscv64 -mattr=+xtheadba < %s 2>&1 | FileCheck %s + +# Too few operands +th.addsl t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction +# Too few operands +th.addsl t0, t1, t2 # CHECK: :[[@LINE]]:1: error: too few operands for instruction +# Immediate operand out of range +th.addsl t0, t1, t2, 4 # CHECK: :[[@LINE]]:22: error: immediate must be an integer in the range [0, 3] diff --git a/llvm/test/MC/RISCV/rv64xtheadba-valid.s b/llvm/test/MC/RISCV/rv64xtheadba-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64xtheadba-valid.s @@ -0,0 +1,16 @@ +# With Bitmanip base extension: +# RUN: llvm-mc %s -triple=riscv64 -mattr=+xtheadba -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+xtheadba < %s \ +# RUN: | llvm-objdump --mattr=+xtheadba -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: th.addsl t0, t1, t2, 1 +# CHECK-ASM: encoding: [0x8b,0x12,0x73,0x02] +th.addsl t0, t1, t2, 1 +# CHECK-ASM-AND-OBJ: th.addsl t0, t1, t2, 2 +# CHECK-ASM: encoding: [0x8b,0x12,0x73,0x04] +th.addsl t0, t1, t2, 2 +# CHECK-ASM-AND-OBJ: th.addsl t0, t1, t2, 3 +# CHECK-ASM: encoding: [0x8b,0x12,0x73,0x06] +th.addsl t0, t1, t2, 3