diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h --- a/clang/lib/Basic/Targets/BPF.h +++ b/clang/lib/Basic/Targets/BPF.h @@ -106,7 +106,7 @@ void fillValidCPUList(SmallVectorImpl &Values) const override; bool setCPU(const std::string &Name) override { - if (Name == "v3") { + if (Name == "v3" || Name == "v4") { HasAlu32 = true; } diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -32,7 +32,7 @@ } static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2", - "v3", "probe"}; + "v3", "v4", "probe"}; bool BPFTargetInfo::isValidCPUName(StringRef Name) const { return llvm::is_contained(ValidCPUNames, Name); diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -73,7 +73,7 @@ // RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF // BPF: error: unknown target CPU 'not-a-cpu' -// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, probe{{$}} +// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, v4, probe{{$}} // RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR // AVR: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -227,6 +227,7 @@ .Case("if", true) .Case("call", true) .Case("goto", true) + .Case("gotol", true) .Case("*", true) .Case("exit", true) .Case("lock", true) @@ -241,13 +242,20 @@ .Case("u32", true) .Case("u16", true) .Case("u8", true) + .Case("s32", true) + .Case("s16", true) + .Case("s8", true) .Case("be64", true) .Case("be32", true) .Case("be16", true) .Case("le64", true) .Case("le32", true) .Case("le16", true) + .Case("bswap16", true) + .Case("bswap32", true) + .Case("bswap64", true) .Case("goto", true) + .Case("gotol", true) .Case("ll", true) .Case("skb", true) .Case("s", true) diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td --- a/llvm/lib/Target/BPF/BPF.td +++ b/llvm/lib/Target/BPF/BPF.td @@ -30,6 +30,7 @@ def : Proc<"v1", []>; def : Proc<"v2", []>; def : Proc<"v3", [ALU32]>; +def : Proc<"v4", [ALU32]>; def : Proc<"probe", []>; def BPFInstPrinter : AsmWriter { @@ -45,7 +46,7 @@ int Variant = 0; string Name = "BPF"; string BreakCharacters = "."; - string TokenizingCharacters = "#()[]=:.<>!+*"; + string TokenizingCharacters = "#()[]=:.<>!+*%/"; } def BPF : Target { diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp --- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -192,15 +192,17 @@ default: break; case ISD::SDIV: { - DebugLoc Empty; - const DebugLoc &DL = Node->getDebugLoc(); - if (DL != Empty) - errs() << "Error at line " << DL.getLine() << ": "; - else - errs() << "Error: "; - errs() << "Unsupport signed division for DAG: "; - Node->print(errs(), CurDAG); - errs() << "Please convert to unsigned div/mod.\n"; + if (!Subtarget->getCPUv4_sdiv()) { + DebugLoc Empty; + const DebugLoc &DL = Node->getDebugLoc(); + if (DL != Empty) + errs() << "Error at line " << DL.getLine() << ": "; + else + errs() << "Error: "; + errs() << "Unsupport signed division for DAG: "; + Node->print(errs(), CurDAG); + errs() << "Please convert to unsigned div/mod.\n"; + } break; } case ISD::INTRINSIC_W_CHAIN: { diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -71,6 +71,7 @@ bool HasAlu32; bool HasJmp32; bool HasJmpExt; + bool HasCPUv4_movsx; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -102,7 +102,8 @@ setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); + if (!STI.getCPUv4_sdiv()) + setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); @@ -141,9 +142,11 @@ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + if (!STI.getCPUv4_ldsx()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + } } setBooleanContents(ZeroOrOneBooleanContent); @@ -183,6 +186,7 @@ HasAlu32 = STI.getHasAlu32(); HasJmp32 = STI.getHasJmp32(); HasJmpExt = STI.getHasJmpExt(); + HasCPUv4_movsx = STI.getCPUv4_movsx(); } bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { @@ -673,11 +677,15 @@ Register PromotedReg0 = RegInfo.createVirtualRegister(RC); Register PromotedReg1 = RegInfo.createVirtualRegister(RC); Register PromotedReg2 = RegInfo.createVirtualRegister(RC); - BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); - BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) - .addReg(PromotedReg0).addImm(32); - BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2) - .addReg(PromotedReg1).addImm(32); + if (HasCPUv4_movsx) { + BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg); + } else { + BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); + BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) + .addReg(PromotedReg0).addImm(32); + BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2) + .addReg(PromotedReg1).addImm(32); + } return PromotedReg2; } diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td --- a/llvm/lib/Target/BPF/BPFInstrFormats.td +++ b/llvm/lib/Target/BPF/BPFInstrFormats.td @@ -90,6 +90,7 @@ def BPF_ABS : BPFModeModifer<0x1>; def BPF_IND : BPFModeModifer<0x2>; def BPF_MEM : BPFModeModifer<0x3>; +def BPF_MEMSX : BPFModeModifer<0x4>; def BPF_ATOMIC : BPFModeModifer<0x6>; class BPFAtomicFlag val> { diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -53,6 +53,12 @@ def BPFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">; def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">; def BPFNoALU32 : Predicate<"!Subtarget->getHasAlu32()">; +def BPFHasCPUv4_ldsx : Predicate<"Subtarget->getCPUv4_ldsx()">; +def BPFHasCPUv4_movsx : Predicate<"Subtarget->getCPUv4_movsx()">; +def BPFHasCPUv4_bswap : Predicate<"Subtarget->getCPUv4_bswap()">; +def BPFHasCPUv4_sdiv : Predicate<"Subtarget->getCPUv4_sdiv()">; +def BPFNoCPUv4_movsx : Predicate<"!Subtarget->getCPUv4_movsx()">; +def BPFNoCPUv4_bswap : Predicate<"!Subtarget->getCPUv4_bswap()">; def brtarget : Operand { let PrintMethod = "printBrTargetOperand"; @@ -240,18 +246,19 @@ } // ALU instructions -class ALU_RI pattern> : TYPE_ALU_JMP { bits<4> dst; bits<32> imm; let Inst{51-48} = dst; + let Inst{47-32} = off; let Inst{31-0} = imm; let BPFClass = Class; } -class ALU_RR pattern> : TYPE_ALU_JMP { bits<4> dst; @@ -259,26 +266,27 @@ let Inst{55-52} = src; let Inst{51-48} = dst; + let Inst{47-32} = off; let BPFClass = Class; } -multiclass ALU { - def _rr : ALU_RR { + def _rr : ALU_RR; - def _ri : ALU_RI; - def _rr_32 : ALU_RR; - def _ri_32 : ALU_RI>=", srl>; - defm XOR : ALU>=", sra>; -} - defm MUL : ALU; - defm MOD : ALU>=", srl>; + defm XOR : ALU>=", sra>; +} + defm MUL : ALU; + defm MOD : ALU; + defm SMOD : ALU; -def MOV_rr : ALU_RR; -def MOV_ri : ALU_RI; -def MOV_rr_32 : ALU_RR; -def MOV_ri_32 : ALU_RI; + +let Predicates = [BPFHasCPUv4_movsx] in { + def MOVSX_rr_8 : ALU_RR; + def MOVSX_rr_16 : ALU_RR; + def MOVSX_rr_32 : ALU_RR; + def MOVSX_rr_32_8 : ALU_RR; + def MOVSX_rr_32_16 : ALU_RR; +} } def FI_ri @@ -421,8 +457,8 @@ def STD : STOREi64; // LOAD instructions -class LOAD Pattern> - : TYPE_LD_ST Pattern> + : TYPE_LD_ST - : LOAD; +class LOADi64 + : LOAD; let isCodeGenOnly = 1 in { def CORE_MEM : TYPE_LD_ST; let Constraints = "$dst = $src" in { - def CORE_SHIFT : ALU_RR; - def LDH : LOADi64; - def LDB : LOADi64; + def LDW : LOADi64; + def LDH : LOADi64; + def LDB : LOADi64; +} + +let Predicates = [BPFHasCPUv4_ldsx] in { + def LDWSX : LOADi64; + def LDHSX : LOADi64; + def LDBSX : LOADi64; } -def LDD : LOADi64; +def LDD : LOADi64; class BRANCH Pattern> : TYPE_ALU_JMP Pattern> + : TYPE_ALU_JMP { + bits<32> BrDst; + + let Inst{31-0} = BrDst; + let BPFClass = BPF_JMP32; +} + class CALL : TYPE_ALU_JMP; + def JMPL : BRANCH_LONG; } // Jump and link @@ -835,7 +890,7 @@ } // bswap16, bswap32, bswap64 -class BSWAP SizeOp, string OpcodeStr, BPFSrcType SrcType, list Pattern> +class BSWAP SizeOp, string OpcodeStr, BPFSrcType SrcType, list Pattern> : TYPE_ALU_JMP; + def BSWAP32 : BSWAP; + def BSWAP64 : BSWAP; + } + + let Predicates = [BPFNoCPUv4_bswap] in { let Predicates = [BPFIsLittleEndian] in { - def BE16 : BSWAP<16, "be16", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>; - def BE32 : BSWAP<32, "be32", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>; - def BE64 : BSWAP<64, "be64", BPF_TO_BE, [(set GPR:$dst, (bswap GPR:$src))]>; + def BE16 : BSWAP; + def BE32 : BSWAP; + def BE64 : BSWAP; } let Predicates = [BPFIsBigEndian] in { - def LE16 : BSWAP<16, "le16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>; - def LE32 : BSWAP<32, "le32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>; - def LE64 : BSWAP<64, "le64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>; + def LE16 : BSWAP; + def LE32 : BSWAP; + def LE64 : BSWAP; } + } } let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1, @@ -898,13 +961,15 @@ def LD_IND_W : LOAD_IND; let isCodeGenOnly = 1 in { - def MOV_32_64 : ALU_RR; } -def : Pat<(i64 (sext GPR32:$src)), - (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; +let Predicates = [BPFNoCPUv4_movsx] in { + def : Pat<(i64 (sext GPR32:$src)), + (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; +} def : Pat<(i64 (zext GPR32:$src)), (MOV_32_64 GPR32:$src)>; @@ -940,8 +1005,8 @@ def STB32 : STOREi32; } -class LOAD32 Pattern> - : TYPE_LD_ST Pattern> + : TYPE_LD_ST - : LOAD32; +class LOADi32 + : LOAD32; let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { - def LDW32 : LOADi32; - def LDH32 : LOADi32; - def LDB32 : LOADi32; + def LDW32 : LOADi32; + def LDH32 : LOADi32; + def LDB32 : LOADi32; +} + +let Predicates = [BPFHasCPUv4_ldsx], DecoderNamespace = "BPFALU32" in { + def LDH32SX : LOADi32; + def LDB32SX : LOADi32; } let Predicates = [BPFHasALU32] in { @@ -973,6 +1043,7 @@ (STW32 (EXTRACT_SUBREG GPR:$src, sub_32), ADDRri:$dst)>; def : Pat<(i32 (extloadi8 ADDRri:$src)), (i32 (LDB32 ADDRri:$src))>; def : Pat<(i32 (extloadi16 ADDRri:$src)), (i32 (LDH32 ADDRri:$src))>; + def : Pat<(i64 (zextloadi8 ADDRri:$src)), (SUBREG_TO_REG (i64 0), (LDB32 ADDRri:$src), sub_32)>; def : Pat<(i64 (zextloadi16 ADDRri:$src)), diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include +#include using namespace llvm; @@ -301,7 +302,9 @@ static char ID; MachineFunction *MF; + const BPFInstrInfo *TII; const TargetRegisterInfo *TRI; + bool IsCPUv4; BPFMIPreEmitPeephole() : MachineFunctionPass(ID) { initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); @@ -311,7 +314,11 @@ // Initialize class variables. void initialize(MachineFunction &MFParm); + bool in16BitRange(int Num); bool eliminateRedundantMov(); + bool adjustBranch(); + + std::map ReverseCondOpMap; public: @@ -322,14 +329,20 @@ initialize(MF); - return eliminateRedundantMov(); + bool Changed; + Changed = eliminateRedundantMov(); + if (IsCPUv4) + Changed = adjustBranch() || Changed; + return Changed; } }; // Initialize class variables. void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; + TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); + IsCPUv4 = MF->getSubtarget().getCPUv4_ja(); LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n"); } @@ -374,6 +387,215 @@ return Eliminated; } +bool BPFMIPreEmitPeephole::in16BitRange(int Num) { + // Well, the cut-off is not precisely at 16bit range since + // new codes are added during the transformation. So let us + // a little bit conservative. + return Num >= (INT16_MIN >> 1) && Num <= (INT16_MAX >> 1); +} + +// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff) +// is supported for both unconditional (JMP) and condition (JEQ, JSGT, +// etc.) branches. In certain cases, e.g., full unrolling, the branch +// target offset might exceed 16bit range. If this happens, the llvm +// will generate incorrect code as the offset is truncated to 16bit. +// +// To fix this rare case, a new insn JMPL is introduced. This new +// insn supports supports 32bit branch target offset. The compiler +// does not use this insn during insn selection. Rather, BPF backend +// will estimate the branch target offset and do JMP -> JMPL and +// JEQ -> JEQ + JMPL conversion if the estimated branch target offset +// is beyond 16bit. +bool BPFMIPreEmitPeephole::adjustBranch() { + bool Changed = false; + int CurrNumInsns = 0; + std::map SoFarNumInsns; + std::map FollowThroughBB; + std::vector MBBs; + + MachineBasicBlock *PrevBB = nullptr; + for (MachineBasicBlock &MBB : *MF) { + // MBB.size() is the number of insns in this basic block, including some + // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit. + // Typically we have way more normal insns than DEBUG_VALUE insns. + // Also, if we indeed need to convert conditional branch like JEQ to + // JEQ + JMPL, we actually introduced some new insns like below. + CurrNumInsns += (int)MBB.size(); + SoFarNumInsns[&MBB] = CurrNumInsns; + if (PrevBB != nullptr) + FollowThroughBB[PrevBB] = &MBB; + PrevBB = &MBB; + // A list of original BBs to make later traveral easier. + MBBs.push_back(&MBB); + } + FollowThroughBB[PrevBB] = nullptr; + + for (unsigned i = 0; i < MBBs.size(); i++) { + // We have four cases here: + // (1). no terminator, simple follow through. + // (2). jmp to another bb. + // (3). conditional jmp to another bb or follow through. + // (4). conditional jmp followed by an unconditional jmp. + MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr; + + MachineBasicBlock *MBB = MBBs[i]; + for (MachineInstr &Term : MBB->terminators()) { + if (Term.isConditionalBranch()) { + assert(CondJmp == nullptr); + CondJmp = &Term; + } else if (Term.isUnconditionalBranch()) { + assert(UncondJmp == nullptr); + UncondJmp = &Term; + } + } + + // (1). no terminator, simple follow through. + if (!CondJmp && !UncondJmp) + continue; + + MachineBasicBlock *CondTargetBB, *JmpBB; + CurrNumInsns = SoFarNumInsns[MBB]; + + // (2). jmp to another bb. + if (!CondJmp && UncondJmp) { + JmpBB = UncondJmp->getOperand(0).getMBB(); + if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns)) + continue; + + // replace this insn as a JMPL. + BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB); + UncondJmp->eraseFromParent(); + Changed = true; + continue; + } + + const BasicBlock *TermBB = MBB->getBasicBlock(); + int Dist; + + // (3). conditional jmp to another bb or follow through. + if (!UncondJmp) { + CondTargetBB = CondJmp->getOperand(2).getMBB(); + MachineBasicBlock *FollowBB = FollowThroughBB[MBB]; + Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns; + if (in16BitRange(Dist)) + continue; + + // We have + // B2: ... + // if (cond) goto B5 + // B3: ... + // where B2 -> B5 is beyond 16bit range. + // + // We do not have 32bit cond jmp insn. So we try to do + // the following. + // B2: ... + // if (cond) goto New_B1 + // New_B0 goto B3 + // New_B1: gotol B5 + // B3: ... + // Basically two new basic blocks are created. + MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB); + MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB); + + // Insert New_B0 and New_B1 into function block list. + MachineFunction::iterator MBB_I = ++MBB->getIterator(); + MF->insert(MBB_I, New_B0); + MF->insert(MBB_I, New_B1); + + // replace B2 cond jump + if (CondJmp->getOperand(1).isReg()) + BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) + .addReg(CondJmp->getOperand(0).getReg()) + .addReg(CondJmp->getOperand(1).getReg()) + .addMBB(New_B1); + else + BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) + .addReg(CondJmp->getOperand(0).getReg()) + .addImm(CondJmp->getOperand(1).getImm()) + .addMBB(New_B1); + + // it is possible that CondTargetBB and FollowBB are the same. But the + // above Dist checking should already filtered this case. + MBB->removeSuccessor(CondTargetBB); + MBB->removeSuccessor(FollowBB); + MBB->addSuccessor(New_B0); + MBB->addSuccessor(New_B1); + + // Populate insns in New_B0 and New_B1. + BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB); + BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)) + .addMBB(CondTargetBB); + + New_B0->addSuccessor(FollowBB); + New_B1->addSuccessor(CondTargetBB); + CondJmp->eraseFromParent(); + Changed = true; + continue; + } + + // (4). conditional jmp followed by an unconditional jmp. + CondTargetBB = CondJmp->getOperand(2).getMBB(); + JmpBB = UncondJmp->getOperand(0).getMBB(); + + // We have + // B2: ... + // if (cond) goto B5 + // JMP B7 + // B3: ... + // + // If only B2->B5 is out of 16bit range, we can do + // B2: ... + // if (cond) goto new_B + // JMP B7 + // New_B: gotol B5 + // B3: ... + // + // If only 'JMP B7' is out of 16bit range, we can replace + // 'JMP B7' with 'JMPL B7'. + // + // If both B2->B5 and 'JMP B7' is out of range, just do + // both the above transformations. + Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns; + if (!in16BitRange(Dist)) { + MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB); + + // Insert New_B0 into function block list. + MF->insert(++MBB->getIterator(), New_B); + + // replace B2 cond jump + if (CondJmp->getOperand(1).isReg()) + BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) + .addReg(CondJmp->getOperand(0).getReg()) + .addReg(CondJmp->getOperand(1).getReg()) + .addMBB(New_B); + else + BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) + .addReg(CondJmp->getOperand(0).getReg()) + .addImm(CondJmp->getOperand(1).getImm()) + .addMBB(New_B); + + if (CondTargetBB != JmpBB) + MBB->removeSuccessor(CondTargetBB); + MBB->addSuccessor(New_B); + + // Populate insn in New_B. + BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB); + + New_B->addSuccessor(CondTargetBB); + CondJmp->eraseFromParent(); + Changed = true; + } + + if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) { + BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB); + UncondJmp->eraseFromParent(); + Changed = true; + } + } + + return Changed; +} + } // end default namespace INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -96,7 +96,9 @@ bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) { return Opcode == BPF::LDD || Opcode == BPF::LDW || Opcode == BPF::LDH || Opcode == BPF::LDB || Opcode == BPF::LDW32 || Opcode == BPF::LDH32 || - Opcode == BPF::LDB32; + Opcode == BPF::LDB32 || Opcode == BPF::LDWSX || Opcode == BPF::LDHSX || + Opcode == BPF::LDBSX || Opcode == BPF::LDH32SX || + Opcode == BPF::LDB32SX; } void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, @@ -119,7 +121,9 @@ unsigned COREOp; if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW || Opcode == BPF::LDD || Opcode == BPF::STB || Opcode == BPF::STH || - Opcode == BPF::STW || Opcode == BPF::STD) + Opcode == BPF::STW || Opcode == BPF::STD || Opcode == BPF::LDWSX || + Opcode == BPF::LDHSX || Opcode == BPF::LDBSX || Opcode == BPF::LDH32SX || + Opcode == BPF::LDB32SX) COREOp = BPF::CORE_MEM; else if (Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32 || Opcode == BPF::STB32 || diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h --- a/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/llvm/lib/Target/BPF/BPFSubtarget.h @@ -56,6 +56,9 @@ // whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections bool UseDwarfRIS; + // whether cpu v4 insns are enabled. + bool CPUv4_ldsx, CPUv4_movsx, CPUv4_bswap, CPUv4_sdiv, CPUv4_ja; + public: // This constructor initializes the data members to match that // of the specified triple. @@ -71,6 +74,11 @@ bool getHasJmp32() const { return HasJmp32; } bool getHasAlu32() const { return HasAlu32; } bool getUseDwarfRIS() const { return UseDwarfRIS; } + bool getCPUv4_ldsx() const { return CPUv4_ldsx; } + bool getCPUv4_movsx() const { return CPUv4_movsx; } + bool getCPUv4_bswap() const { return CPUv4_bswap; } + bool getCPUv4_sdiv() const { return CPUv4_sdiv; } + bool getCPUv4_ja() const { return CPUv4_ja; } const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; } const BPFFrameLowering *getFrameLowering() const override { diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -23,6 +23,17 @@ #define GET_SUBTARGETINFO_CTOR #include "BPFGenSubtargetInfo.inc" +static cl::opt Disable_CPUv4_ldsx("disable-cpuv4-ldsx", cl::Hidden, cl::init(false), + cl::desc("Disable ldsx insns in cpuv4")); +static cl::opt Disable_CPUv4_movsx("disable-cpuv4-movsx", cl::Hidden, cl::init(false), + cl::desc("Disable movsx insns in cpuv4")); +static cl::opt Disable_CPUv4_bswap("disable-cpuv4-bswap", cl::Hidden, cl::init(false), + cl::desc("Disable bswap insns in cpuv4")); +static cl::opt Disable_CPUv4_sdiv("disable-cpuv4-sdiv-smod", cl::Hidden, cl::init(false), + cl::desc("Disable sdiv/smod insns in cpuv4")); +static cl::opt Disable_CPUv4_ja("disable-cpuv4-ja", cl::Hidden, cl::init(false), + cl::desc("Disable 32-bit offset ja insn in cpuv4")); + void BPFSubtarget::anchor() {} BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU, @@ -38,6 +49,11 @@ HasJmp32 = false; HasAlu32 = false; UseDwarfRIS = false; + CPUv4_ldsx = false; + CPUv4_movsx = false; + CPUv4_bswap = false; + CPUv4_sdiv = false; + CPUv4_ja = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -55,6 +71,17 @@ HasAlu32 = true; return; } + if (CPU == "v4") { + HasJmpExt = true; + HasJmp32 = true; + HasAlu32 = true; + CPUv4_ldsx = !Disable_CPUv4_ldsx; + CPUv4_movsx = !Disable_CPUv4_movsx; + CPUv4_bswap = !Disable_CPUv4_bswap; + CPUv4_sdiv = !Disable_CPUv4_sdiv; + CPUv4_ja = !Disable_CPUv4_ja; + return; + } } BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU, diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp --- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -57,8 +57,7 @@ BPF_ABS = 0x1, BPF_IND = 0x2, BPF_MEM = 0x3, - BPF_LEN = 0x4, - BPF_MSH = 0x5, + BPF_MEMSX = 0x4, BPF_ATOMIC = 0x6 }; @@ -178,7 +177,7 @@ uint8_t InstMode = getInstMode(Insn); if ((InstClass == BPF_LDX || InstClass == BPF_STX) && getInstSize(Insn) != BPF_DW && - (InstMode == BPF_MEM || InstMode == BPF_ATOMIC) && + (InstMode == BPF_MEM || InstMode == BPF_MEMSX || InstMode == BPF_ATOMIC) && STI.hasFeature(BPF::ALU32)) Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address, this, STI); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -6,12 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/BPFMCFixups.h" #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/EndianStream.h" #include @@ -41,7 +43,10 @@ return false; } - unsigned getNumFixupKinds() const override { return 1; } + unsigned getNumFixupKinds() const override { + return BPF::NumTargetFixupKinds; + } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; @@ -49,6 +54,20 @@ } // end anonymous namespace +const MCFixupKindInfo & +BPFAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[BPF::NumTargetFixupKinds] = { + { "FK_BPF_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; +} + bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const { if ((Count % 8) != 0) @@ -85,6 +104,11 @@ Data[Fixup.getOffset() + 1] = 0x1; support::endian::write32be(&Data[Fixup.getOffset() + 4], Value); } + } else if (Fixup.getTargetKind() == BPF::FK_BPF_PCRel_4) { + // The input Value represents the number of bytes. + Value = (uint32_t)((Value - 8) / 8); + support::endian::write(&Data[Fixup.getOffset() + 4], Value, + Endian); } else { assert(Fixup.getKind() == FK_PCRel_2); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// + +#include "BPF.h" #include "MCTargetDesc/BPFInstPrinter.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -100,8 +102,13 @@ raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) { - int16_t Imm = Op.getImm(); - O << ((Imm >= 0) ? "+" : "") << formatImm(Imm); + if (MI->getOpcode() == BPF::JMPL) { + int32_t Imm = Op.getImm(); + O << ((Imm >= 0) ? "+" : "") << formatImm(Imm); + } else { + int16_t Imm = Op.getImm(); + O << ((Imm >= 0) ? "+" : "") << formatImm(Imm); + } } else if (Op.isExpr()) { printExpr(Op.getExpr(), O); } else { diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/BPFMCFixups.h" #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCCodeEmitter.h" @@ -95,6 +96,8 @@ Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_4)); else if (MI.getOpcode() == BPF::LD_imm64) Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_8)); + else if (MI.getOpcode() == BPF::JMPL) + Fixups.push_back(MCFixup::create(0, Expr, (MCFixupKind)BPF::FK_BPF_PCRel_4)); else // bb label Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_2)); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h @@ -0,0 +1,27 @@ +//=======-- BPFMCFixups.h - BPF-specific fixup entries ------*- C++ -*-=======// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H +#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace BPF { +enum FixupKind { + // These correspond directly to R_390_* relocations. + FK_BPF_PCRel_4 = FirstTargetFixupKind, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // end namespace BPF +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -79,12 +79,15 @@ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const override { // The target is the 3rd operand of cond inst and the 1st of uncond inst. - int16_t Imm; + int32_t Imm; if (isConditionalBranch(Inst)) { - Imm = Inst.getOperand(2).getImm(); - } else if (isUnconditionalBranch(Inst)) - Imm = Inst.getOperand(0).getImm(); - else + Imm = (short)Inst.getOperand(2).getImm(); + } else if (isUnconditionalBranch(Inst)) { + if (Inst.getOpcode() == BPF::JMP) + Imm = (short)Inst.getOperand(0).getImm(); + else + Imm = (int)Inst.getOperand(0).getImm(); + } else return false; Target = Addr + Size + Imm * Size; diff --git a/llvm/test/CodeGen/BPF/bswap.ll b/llvm/test/CodeGen/BPF/bswap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/bswap.ll @@ -0,0 +1,47 @@ +; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; Source: +; long foo(int a, int b, long c) { +; a = __builtin_bswap16(a); +; b = __builtin_bswap32(b); +; c = __builtin_bswap64(c); +; return a + b + c; +; } +; Compilation flags: +; clang -target bpf -O2 -S -emit-llvm t.c + +; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) +define dso_local i64 @foo(i32 noundef %a, i32 noundef %b, i64 noundef %c) local_unnamed_addr #0 { +entry: + %conv = trunc i32 %a to i16 + %0 = tail call i16 @llvm.bswap.i16(i16 %conv) + %conv1 = zext i16 %0 to i32 + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = tail call i64 @llvm.bswap.i64(i64 %c) + %add = add nsw i32 %1, %conv1 + %conv2 = sext i32 %add to i64 + %add3 = add nsw i64 %2, %conv2 + ret i64 %add3 +} + +; CHECK: r1 = bswap16 r1 # encoding: [0xd7,0x01,0x00,0x00,0x10,0x00,0x00,0x00] +; CHECK: r2 = bswap32 r2 # encoding: [0xd7,0x02,0x00,0x00,0x20,0x00,0x00,0x00] +; CHECK: r0 = bswap64 r0 # encoding: [0xd7,0x00,0x00,0x00,0x40,0x00,0x00,0x00] + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i16 @llvm.bswap.i16(i16) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.bswap.i32(i32) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.bswap.i64(i64) #1 + +attributes #0 = { mustprogress nofree nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git a2913a8a2bfe572d2f1bfea950ab9b0848373648)"} diff --git a/llvm/test/CodeGen/BPF/ldsx.ll b/llvm/test/CodeGen/BPF/ldsx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/ldsx.ll @@ -0,0 +1,104 @@ +; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; Source: +; int f1(char *p) { +; return *p; +; } +; int f2(short *p) { +; return *p; +; } +; int f3(int *p) { +; return *p; +; } +; long f4(char *p) { +; return *p; +; } +; long f5(short *p) { +; return *p; +; } +; long f6(int *p) { +; return *p; +; } +; long f7(long *p) { +; return *p; +; } +; Compilation flags: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i32 @f1(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i8, ptr %p, align 1, !tbaa !3 + %conv = sext i8 %0 to i32 +; CHECK: w0 = *(s8 *)(r1 + 0) # encoding: [0x91,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + ret i32 %conv +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i32 @f2(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i16, ptr %p, align 2, !tbaa !6 + %conv = sext i16 %0 to i32 +; CHECK: w0 = *(s16 *)(r1 + 0) # encoding: [0x89,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + ret i32 %conv +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i32 @f3(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i32, ptr %p, align 4, !tbaa !8 +; CHECK: w0 = *(u32 *)(r1 + 0) # encoding: [0x61,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + ret i32 %0 +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i64 @f4(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i8, ptr %p, align 1, !tbaa !3 + %conv = sext i8 %0 to i64 + ret i64 %conv +; CHECK: r0 = *(s8 *)(r1 + 0) # encoding: [0x91,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i64 @f5(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i16, ptr %p, align 2, !tbaa !6 + %conv = sext i16 %0 to i64 + ret i64 %conv +; CHECK: r0 = *(s16 *)(r1 + 0) # encoding: [0x89,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i64 @f6(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i32, ptr %p, align 4, !tbaa !8 + %conv = sext i32 %0 to i64 + ret i64 %conv +; CHECK: r0 = *(s32 *)(r1 + 0) # encoding: [0x81,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i64 @f7(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i64, ptr %p, align 8, !tbaa !10 + ret i64 %0 +; CHECK: r0 = *(u64 *)(r1 + 0) # encoding: [0x79,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +} + +attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 68665544c7d59735e9c0bb32b08829c006c7c594)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"short", !4, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"int", !4, i64 0} +!10 = !{!11, !11, i64 0} +!11 = !{!"long", !4, i64 0} diff --git a/llvm/test/CodeGen/BPF/movsx.ll b/llvm/test/CodeGen/BPF/movsx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/movsx.ll @@ -0,0 +1,79 @@ +; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; Source: +; short f1(char a) { +; return a; +; } +; int f2(char a) { +; return a; +; } +; long f3(char a) { +; return a; +; } +; int f4(short a) { +; return a; +; } +; long f5(short a) { +; return a; +; } +; long f6(int a) { +; return a; +; } +; Compilation flags: +; clang -target bpf -O2 -S -emit-llvm t.c + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i16 @f1(i8 noundef signext %a) local_unnamed_addr #0 { +entry: + %conv = sext i8 %a to i16 + ret i16 %conv +} +; CHECK: w0 = w1 # encoding: [0xbc,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @f2(i8 noundef signext %a) local_unnamed_addr #0 { +entry: + %conv = sext i8 %a to i32 + ret i32 %conv +} +; CHECK: w0 = w1 # encoding: [0xbc,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i64 @f3(i8 noundef signext %a) local_unnamed_addr #0 { +entry: + %conv = sext i8 %a to i64 + ret i64 %conv +} +; CHECK: r0 = (s32)w1 # encoding: [0xbf,0x10,0x20,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @f4(i16 noundef signext %a) local_unnamed_addr #0 { +entry: + %conv = sext i16 %a to i32 + ret i32 %conv +} +; CHECK: w0 = w1 # encoding: [0xbc,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i64 @f5(i16 noundef signext %a) local_unnamed_addr #0 { +entry: + %conv = sext i16 %a to i64 + ret i64 %conv +} +; CHECK: r0 = (s32)w1 # encoding: [0xbf,0x10,0x20,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i64 @f6(i32 noundef %a) local_unnamed_addr #0 { +entry: + %conv = sext i32 %a to i64 + ret i64 %conv +} +; CHECK: r0 = (s32)w1 # encoding: [0xbf,0x10,0x20,0x00,0x00,0x00,0x00,0x00] + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git a2913a8a2bfe572d2f1bfea950ab9b0848373648)"} diff --git a/llvm/test/CodeGen/BPF/sdiv_smod.ll b/llvm/test/CodeGen/BPF/sdiv_smod.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/sdiv_smod.ll @@ -0,0 +1,77 @@ +; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; Source: +; int foo(int a, int b, int c) { +; return a/b + a%c; +; } +; long bar(long a, long b, long c) { +; return a/b + a%c; +; } +; Compilation flags: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c + +; Function Attrs: nounwind +define dso_local i32 @foo(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4, !tbaa !3 + store i32 %b, ptr %b.addr, align 4, !tbaa !3 + store i32 %c, ptr %c.addr, align 4, !tbaa !3 + %0 = load i32, ptr %a.addr, align 4, !tbaa !3 + %1 = load i32, ptr %b.addr, align 4, !tbaa !3 + %div = sdiv i32 %0, %1 + %2 = load i32, ptr %a.addr, align 4, !tbaa !3 + %3 = load i32, ptr %c.addr, align 4, !tbaa !3 + %rem = srem i32 %2, %3 + %add = add nsw i32 %div, %rem + ret i32 %add +} + +; CHECK: w0 = w1 +; CHECK-NEXT: *(u32 *)(r10 - 8) = w2 +; CHECK-NEXT: *(u32 *)(r10 - 4) = w0 +; CHECK-NEXT: *(u32 *)(r10 - 12) = w3 +; CHECK-NEXT: w1 s%= w3 # encoding: [0x9c,0x31,0x01,0x00,0x00,0x00,0x00,0x00] +; CHECK-NEXT: w0 s/= w2 # encoding: [0x3c,0x20,0x01,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: nounwind +define dso_local i64 @bar(i64 noundef %a, i64 noundef %b, i64 noundef %c) #0 { +entry: + %a.addr = alloca i64, align 8 + %b.addr = alloca i64, align 8 + %c.addr = alloca i64, align 8 + store i64 %a, ptr %a.addr, align 8, !tbaa !7 + store i64 %b, ptr %b.addr, align 8, !tbaa !7 + store i64 %c, ptr %c.addr, align 8, !tbaa !7 + %0 = load i64, ptr %a.addr, align 8, !tbaa !7 + %1 = load i64, ptr %b.addr, align 8, !tbaa !7 + %div = sdiv i64 %0, %1 + %2 = load i64, ptr %a.addr, align 8, !tbaa !7 + %3 = load i64, ptr %c.addr, align 8, !tbaa !7 + %rem = srem i64 %2, %3 + %add = add nsw i64 %div, %rem + ret i64 %add +} + +; CHECK: r0 = r1 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 8) = r0 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: r1 s%= r3 # encoding: [0x9f,0x31,0x01,0x00,0x00,0x00,0x00,0x00] +; CHECK-NEXT: r0 s/= r2 # encoding: [0x3f,0x20,0x01,0x00,0x00,0x00,0x00,0x00] + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 569bd3b841e3167ddd7c6ceeddb282d3c280e761)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"long", !5, i64 0}