diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -169,6 +169,9 @@ The current vendor extensions supported are: +``XTHeadBa`` + LLVM implements `the THeadBa (address-generation) vendor-defined instructions specified in `_ by T-HEAD of Alibaba. Instructions are prefixed with `th.` as described in the specification. + ``XTHeadVdot`` LLVM implements `version 1.0.0 of the THeadV-family custom instructions specification `_ by T-HEAD of Alibaba. All instructions are prefixed with `th.` as described in the specification, and the riscv-toolchain-convention document linked above. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -108,6 +108,7 @@ * Zca, Zcf, and Zcd extensions were upgraded to version 1.0.1. * vsetvli intrinsics no longer have side effects. They may now be combined, moved, deleted, etc. by optimizations. +* Adds support for the vendor-defined XTHeadBa (address-generation) extension. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -109,6 +109,7 @@ {"svinval", RISCVExtensionVersion{1, 0}}, // vendor-defined ('X') extensions + {"xtheadba", RISCVExtensionVersion{1, 0}}, {"xtheadvdot", RISCVExtensionVersion{1, 0}}, {"xventanacondops", RISCVExtensionVersion{1, 0}}, }; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -471,6 +471,13 @@ if (Result != MCDisassembler::Fail) return Result; } + if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadBa]) { + LLVM_DEBUG(dbgs() << "Trying XTHeadBa custom opcode table:\n"); + Result = decodeInstruction(DecoderTableTHeadBa32, MI, Insn, Address, this, + STI); + if (Result != MCDisassembler::Fail) + return Result; + } if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadVdot]) { LLVM_DEBUG(dbgs() << "Trying XTHeadVdot custom opcode table:\n"); Result = diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -463,6 +463,13 @@ AssemblerPredicate<(all_of FeatureVendorXVentanaCondOps), "'XVentanaCondOps' (Ventana Conditional Ops)">; +def FeatureVendorXTHeadBa + : SubtargetFeature<"xtheadba", "HasVendorXTHeadBa", "true", + "'xtheadba' (T-Head address calculation instructions)">; +def HasVendorXTHeadBa : Predicate<"Subtarget->hasVendorXTHeadBa()">, + AssemblerPredicate<(all_of FeatureVendorXTHeadBa), + "'xtheadba' (T-Head address calculation instructions)">; + def FeatureVendorXTHeadVdot : SubtargetFeature<"xtheadvdot", "HasVendorXTHeadVdot", "true", "'xtheadvdot' (T-Head Vector Extensions for Dot)", diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -43,6 +43,17 @@ opcodestr, "$vd, $rs1, $vs2$vm">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 +let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "THeadBa", + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class THShiftALU_rri funct3, string opcodestr> + : RVInstR<0, funct3, OPC_CUSTOM_0, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2), + opcodestr, "$rd, $rs1, $rs2, $uimm2"> { + bits<2> uimm2; + let Inst{31-27} = 0; + let Inst{26-25} = uimm2; +} + //===----------------------------------------------------------------------===// // Combination of instruction classes. // Use these multiclasses to define instructions more easily. @@ -59,6 +70,11 @@ //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// +let Predicates = [HasVendorXTHeadBa] in { +def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">, + Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>; +} // Predicates = [HasVendorXTHeadBa] + let Predicates = [HasVendorXTHeadVdot], Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { @@ -134,6 +150,80 @@ //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// +let Predicates = [HasVendorXTHeadBa] in { +def : Pat<(add GPR:$rs1, (shl GPR:$rs2, uimm2:$uimm2)), + (TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>; + +// Reuse complex patterns from StdExtZba +def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2), + (TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>; +def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2), + (TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>; +def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2), + (TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>; + +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 1)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 1)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 1)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 2)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 2)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 2)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 3)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 3)>; +def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2), + (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 3)>; + +def : Pat<(add GPR:$r, CSImm12MulBy4:$i), + (TH_ADDSL GPR:$r, (ADDI X0, (SimmShiftRightBy2XForm CSImm12MulBy4:$i)), 2)>; +def : Pat<(add GPR:$r, CSImm12MulBy8:$i), + (TH_ADDSL GPR:$r, (ADDI X0, (SimmShiftRightBy3XForm CSImm12MulBy8:$i)), 3)>; + +def : Pat<(mul GPR:$r, C3LeftShift:$i), + (SLLI (TH_ADDSL GPR:$r, GPR:$r, 1), + (TrailingZeros C3LeftShift:$i))>; +def : Pat<(mul GPR:$r, C5LeftShift:$i), + (SLLI (TH_ADDSL GPR:$r, GPR:$r, 2), + (TrailingZeros C5LeftShift:$i))>; +def : Pat<(mul GPR:$r, C9LeftShift:$i), + (SLLI (TH_ADDSL GPR:$r, GPR:$r, 3), + (TrailingZeros C9LeftShift:$i))>; + +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 1)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 1), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)), + (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2), (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 3)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)), + (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)), + (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)), + (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>; +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)), + (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>; + +def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)), + (SLLI (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2), + (TH_ADDSL GPR:$r, GPR:$r, 2), 2), 3)>; +} // Predicates = [HasVendorXTHeadBa] + defm PseudoTHVdotVMAQA : VPseudoVMAQA_VV_VX; defm PseudoTHVdotVMAQAU : VPseudoVMAQA_VV_VX; defm PseudoTHVdotVMAQASU : VPseudoVMAQA_VV_VX; diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -86,6 +86,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+svpbmt %s -o - | FileCheck --check-prefix=RV64SVPBMT %s ; RUN: llc -mtriple=riscv64 -mattr=+svinval %s -o - | FileCheck --check-prefix=RV64SVINVAL %s ; RUN: llc -mtriple=riscv64 -mattr=+xventanacondops %s -o - | FileCheck --check-prefix=RV64XVENTANACONDOPS %s +; RUN: llc -mtriple=riscv64 -mattr=+xtheadba %s -o - | FileCheck --check-prefix=RV64XTHEADBA %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefix=RV64XTHEADVDOT %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefix=RV64ZAWRS %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-ztso %s -o - | FileCheck --check-prefix=RV64ZTSO %s @@ -180,6 +181,7 @@ ; RV64SVPBMT: .attribute 5, "rv64i2p0_svpbmt1p0" ; RV64SVINVAL: .attribute 5, "rv64i2p0_svinval1p0" ; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p0_xventanacondops1p0" +; RV64XTHEADBA: .attribute 5, "rv64i2p0_xtheadba1p0" ; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0" ; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1" ; RV64ZCA: .attribute 5, "rv64i2p0_zca1p0" diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+m,+xtheadba -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32XTHEADBA + +define signext i16 @th_addsl_1(i64 %0, ptr %1) { +; RV32I-LABEL: th_addsl_1: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lh a0, 0(a0) +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: th_addsl_1: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a2, a0, 1 +; RV32XTHEADBA-NEXT: lh a0, 0(a0) +; RV32XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i16, ptr %1, i64 %0 + %4 = load i16, ptr %3 + ret i16 %4 +} + +define signext i32 @th_addsl_2(i64 %0, ptr %1) { +; RV32I-LABEL: th_addsl_2: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: th_addsl_2: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a2, a0, 2 +; RV32XTHEADBA-NEXT: lw a0, 0(a0) +; RV32XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i32, ptr %1, i64 %0 + %4 = load i32, ptr %3 + ret i32 %4 +} + +define i64 @th_addsl_3(i64 %0, ptr %1) { +; RV32I-LABEL: th_addsl_3: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a2, a2, a0 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: th_addsl_3: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a1, a2, a0, 3 +; RV32XTHEADBA-NEXT: lw a0, 0(a1) +; RV32XTHEADBA-NEXT: lw a1, 4(a1) +; RV32XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i64, ptr %1, i64 %0 + %4 = load i64, ptr %3 + ret i64 %4 +} + +; Type legalization inserts a sext_inreg after the first add. That add will be +; selected as th.addsl which does not sign extend. SimplifyDemandedBits is unable +; to remove the sext_inreg because it has multiple uses. The ashr will use the +; sext_inreg to become sraiw. This leaves the sext_inreg only used by the shl. +; If the shl is selected as sllw, we don't need the sext_inreg. +define i64 @th_addsl_2_extra_sext(i32 %x, i32 %y, i32 %z) { +; RV32I-LABEL: th_addsl_2_extra_sext: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: sll a1, a2, a0 +; RV32I-NEXT: srai a2, a0, 2 +; RV32I-NEXT: mul a0, a1, a2 +; RV32I-NEXT: mulh a1, a1, a2 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: th_addsl_2_extra_sext: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV32XTHEADBA-NEXT: sll a1, a2, a0 +; RV32XTHEADBA-NEXT: srai a2, a0, 2 +; RV32XTHEADBA-NEXT: mul a0, a1, a2 +; RV32XTHEADBA-NEXT: mulh a1, a1, a2 +; RV32XTHEADBA-NEXT: ret + %a = shl i32 %x, 2 + %b = add i32 %a, %y + %c = shl i32 %z, %b + %d = ashr i32 %b, 2 + %e = sext i32 %c to i64 + %f = sext i32 %d to i64 + %g = mul i64 %e, %f + ret i64 %g +} + +define i32 @addmul6(i32 %a, i32 %b) { +; RV32I-LABEL: addmul6: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 6 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul6: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 6 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @addmul10(i32 %a, i32 %b) { +; RV32I-LABEL: addmul10: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 10 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul10: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 10 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @addmul12(i32 %a, i32 %b) { +; RV32I-LABEL: addmul12: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 12 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul12: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 12 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @addmul18(i32 %a, i32 %b) { +; RV32I-LABEL: addmul18: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 18 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul18: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 18 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @addmul20(i32 %a, i32 %b) { +; RV32I-LABEL: addmul20: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 20 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul20: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 20 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @addmul24(i32 %a, i32 %b) { +; RV32I-LABEL: addmul24: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 24 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul24: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 24 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @addmul36(i32 %a, i32 %b) { +; RV32I-LABEL: addmul36: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 36 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul36: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 36 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @addmul40(i32 %a, i32 %b) { +; RV32I-LABEL: addmul40: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 40 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul40: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 40 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @addmul72(i32 %a, i32 %b) { +; RV32I-LABEL: addmul72: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 72 +; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: addmul72: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV32XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 72 + %d = add i32 %c, %b + ret i32 %d +} + +define i32 @mul96(i32 %a) { +; RV32I-LABEL: mul96: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 96 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: mul96: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV32XTHEADBA-NEXT: slli a0, a0, 5 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 96 + ret i32 %c +} + +define i32 @mul160(i32 %a) { +; RV32I-LABEL: mul160: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 160 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: mul160: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV32XTHEADBA-NEXT: slli a0, a0, 5 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 160 + ret i32 %c +} + +define i32 @mul200(i32 %a) { +; RV32I-LABEL: mul200: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 200 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: mul200: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV32XTHEADBA-NEXT: slli a0, a0, 3 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 200 + ret i32 %c +} + +define i32 @mul288(i32 %a) { +; RV32I-LABEL: mul288: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 288 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32XTHEADBA-LABEL: mul288: +; RV32XTHEADBA: # %bb.0: +; RV32XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV32XTHEADBA-NEXT: slli a0, a0, 5 +; RV32XTHEADBA-NEXT: ret + %c = mul i32 %a, 288 + ret i32 %c +} + diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -0,0 +1,319 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+m,+xtheadba -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64XTHEADBA + +define signext i16 @th_addsl_1(i64 %0, ptr %1) { +; RV64I-LABEL: th_addsl_1: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: th_addsl_1: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: lh a0, 0(a0) +; RV64XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i16, ptr %1, i64 %0 + %4 = load i16, ptr %3 + ret i16 %4 +} + +define signext i32 @th_addsl_2(i64 %0, ptr %1) { +; RV64I-LABEL: th_addsl_2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: th_addsl_2: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: lw a0, 0(a0) +; RV64XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i32, ptr %1, i64 %0 + %4 = load i32, ptr %3 + ret i32 %4 +} + +define i64 @th_addsl_3(i64 %0, ptr %1) { +; RV64I-LABEL: th_addsl_3: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: th_addsl_3: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV64XTHEADBA-NEXT: ld a0, 0(a0) +; RV64XTHEADBA-NEXT: ret + %3 = getelementptr inbounds i64, ptr %1, i64 %0 + %4 = load i64, ptr %3 + ret i64 %4 +} + +; Type legalization inserts a sext_inreg after the first add. That add will be +; selected as th.addsl which does not sign extend. SimplifyDemandedBits is unable +; to remove the sext_inreg because it has multiple uses. The ashr will use the +; sext_inreg to become sraiw. This leaves the sext_inreg only used by the shl. +; If the shl is selected as sllw, we don't need the sext_inreg. +define i64 @th_addsl_2_extra_sext(i32 %x, i32 %y, i32 %z) { +; RV64I-LABEL: th_addsl_2_extra_sext: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: sllw a1, a2, a0 +; RV64I-NEXT: sraiw a0, a0, 2 +; RV64I-NEXT: mul a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: th_addsl_2_extra_sext: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: sllw a1, a2, a0 +; RV64XTHEADBA-NEXT: sraiw a0, a0, 2 +; RV64XTHEADBA-NEXT: mul a0, a1, a0 +; RV64XTHEADBA-NEXT: ret + %a = shl i32 %x, 2 + %b = add i32 %a, %y + %c = shl i32 %z, %b + %d = ashr i32 %b, 2 + %e = sext i32 %c to i64 + %f = sext i32 %d to i64 + %g = mul i64 %e, %f + ret i64 %g +} + +define i64 @addmul6(i64 %a, i64 %b) { +; RV64I-LABEL: addmul6: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 6 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul6: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 6 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul10(i64 %a, i64 %b) { +; RV64I-LABEL: addmul10: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 10 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul10: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 10 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul12(i64 %a, i64 %b) { +; RV64I-LABEL: addmul12: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 12 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul12: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 12 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul18(i64 %a, i64 %b) { +; RV64I-LABEL: addmul18: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 18 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul18: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 18 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul20(i64 %a, i64 %b) { +; RV64I-LABEL: addmul20: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 20 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul20: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 20 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul24(i64 %a, i64 %b) { +; RV64I-LABEL: addmul24: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 24 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul24: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 24 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul36(i64 %a, i64 %b) { +; RV64I-LABEL: addmul36: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 36 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul36: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 2 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 36 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul40(i64 %a, i64 %b) { +; RV64I-LABEL: addmul40: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 40 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul40: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 40 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul72(i64 %a, i64 %b) { +; RV64I-LABEL: addmul72: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 72 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul72: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 3 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 72 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @mul96(i64 %a) { +; RV64I-LABEL: mul96: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 96 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul96: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 96 + ret i64 %c +} + +define i64 @mul160(i64 %a) { +; RV64I-LABEL: mul160: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 160 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul160: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 160 + ret i64 %c +} + +define i64 @mul200(i64 %a) { +; RV64I-LABEL: mul200: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 200 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul200: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2 +; RV64XTHEADBA-NEXT: slli a0, a0, 3 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 200 + ret i64 %c +} + +define i64 @mul288(i64 %a) { +; RV64I-LABEL: mul288: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 288 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul288: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3 +; RV64XTHEADBA-NEXT: slli a0, a0, 5 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 288 + ret i64 %c +} + diff --git a/llvm/test/MC/RISCV/XTHeadBa-invalid.s b/llvm/test/MC/RISCV/XTHeadBa-invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/XTHeadBa-invalid.s @@ -0,0 +1,11 @@ +# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadba < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv64 -mattr=+xtheadba < %s 2>&1 | FileCheck %s + +# Too few operands +th.addsl t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction +# Too few operands +th.addsl t0, t1, t2 # CHECK: :[[@LINE]]:1: error: too few operands for instruction +# Immediate operand out of range +th.addsl t0, t1, t2, 4 # CHECK: :[[@LINE]]:22: error: immediate must be an integer in the range [0, 3] +# Immediate operand out of range +th.addsl t0, t1, t2, -1 # CHECK: :[[@LINE]]:22: error: immediate must be an integer in the range [0, 3] diff --git a/llvm/test/MC/RISCV/XTHeadBa-valid.s b/llvm/test/MC/RISCV/XTHeadBa-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/XTHeadBa-valid.s @@ -0,0 +1,24 @@ +# With XTHeadBa (address generation) extension: +# RUN: llvm-mc %s -triple=riscv32 -mattr=+xtheadba -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+xtheadba -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+xtheadba < %s \ +# RUN: | llvm-objdump --mattr=+xtheadba -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+xtheadba < %s \ +# RUN: | llvm-objdump --mattr=+xtheadba -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: th.addsl t0, t1, t2, 0 +# CHECK-ASM: encoding: [0x8b,0x12,0x73,0x00] +th.addsl t0, t1, t2, 0 +# CHECK-ASM-AND-OBJ: th.addsl t0, t1, t2, 1 +# CHECK-ASM: encoding: [0x8b,0x12,0x73,0x02] +th.addsl t0, t1, t2, 1 +# CHECK-ASM-AND-OBJ: th.addsl t0, t1, t2, 2 +# CHECK-ASM: encoding: [0x8b,0x12,0x73,0x04] +th.addsl t0, t1, t2, 2 +# CHECK-ASM-AND-OBJ: th.addsl t0, t1, t2, 3 +# CHECK-ASM: encoding: [0x8b,0x12,0x73,0x06] +th.addsl t0, t1, t2, 3