diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h --- a/clang/lib/Basic/Targets/BPF.h +++ b/clang/lib/Basic/Targets/BPF.h @@ -106,7 +106,7 @@ void fillValidCPUList(SmallVectorImpl &Values) const override; bool setCPU(const std::string &Name) override { - if (Name == "v3") { + if (Name == "v3" || Name == "v4") { HasAlu32 = true; } diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -32,7 +32,7 @@ } static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2", - "v3", "probe"}; + "v3", "v4", "probe"}; bool BPFTargetInfo::isValidCPUName(StringRef Name) const { return llvm::is_contained(ValidCPUNames, Name); diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -73,7 +73,7 @@ // RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF // BPF: error: unknown target CPU 'not-a-cpu' -// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, probe{{$}} +// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, v4, probe{{$}} // RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR // AVR: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -229,6 +229,7 @@ .Case("if", true) .Case("call", true) .Case("goto", true) + .Case("gotol", true) .Case("*", true) .Case("exit", true) .Case("lock", true) @@ -243,13 +244,20 @@ .Case("u32", true) .Case("u16", true) .Case("u8", true) + .Case("s32", true) + .Case("s16", true) + .Case("s8", true) .Case("be64", true) .Case("be32", true) .Case("be16", true) .Case("le64", true) .Case("le32", true) .Case("le16", true) + .Case("bswap16", true) + .Case("bswap32", true) + .Case("bswap64", true) .Case("goto", true) + .Case("gotol", true) .Case("ll", true) .Case("skb", true) .Case("s", true) diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td --- a/llvm/lib/Target/BPF/BPF.td +++ b/llvm/lib/Target/BPF/BPF.td @@ -30,6 +30,7 @@ def : Proc<"v1", []>; def : Proc<"v2", []>; def : Proc<"v3", [ALU32]>; +def : Proc<"v4", [ALU32]>; def : Proc<"probe", []>; def BPFInstPrinter : AsmWriter { @@ -45,7 +46,7 @@ int Variant = 0; string Name = "BPF"; string BreakCharacters = "."; - string TokenizingCharacters = "#()[]=:.<>!+*"; + string TokenizingCharacters = "#()[]=:.<>!+*%/"; } def BPF : Target { diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp --- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -192,15 +192,17 @@ default: break; case ISD::SDIV: { - DebugLoc Empty; - const DebugLoc &DL = Node->getDebugLoc(); - if (DL != Empty) - errs() << "Error at line " << DL.getLine() << ": "; - else - errs() << "Error: "; - errs() << "Unsupport signed division for DAG: "; - Node->print(errs(), CurDAG); - errs() << "Please convert to unsigned div/mod.\n"; + if (!Subtarget->getCPUv4()) { + DebugLoc Empty; + const DebugLoc &DL = Node->getDebugLoc(); + if (DL != Empty) + errs() << "Error at line " << DL.getLine() << ": "; + else + errs() << "Error: "; + errs() << "Unsupport signed division for DAG: "; + Node->print(errs(), CurDAG); + errs() << "Please convert to unsigned div/mod.\n"; + } break; } case ISD::INTRINSIC_W_CHAIN: { diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -102,7 +102,8 @@ setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); + if (!STI.getCPUv4()) + setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); @@ -141,9 +142,11 @@ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + if (!STI.getCPUv4()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + } } setBooleanContents(ZeroOrOneBooleanContent); diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td --- a/llvm/lib/Target/BPF/BPFInstrFormats.td +++ b/llvm/lib/Target/BPF/BPFInstrFormats.td @@ -90,6 +90,7 @@ def BPF_ABS : BPFModeModifer<0x1>; def BPF_IND : BPFModeModifer<0x2>; def BPF_MEM : BPFModeModifer<0x3>; +def BPF_MEMS : BPFModeModifer<0x4>; def BPF_ATOMIC : BPFModeModifer<0x6>; class BPFAtomicFlag val> { diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -53,6 +53,8 @@ def BPFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">; def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">; def BPFNoALU32 : Predicate<"!Subtarget->getHasAlu32()">; +def BPFHasCPUv4 : Predicate<"Subtarget->getCPUv4()">; +def BPFNoCPUv4 : Predicate<"!Subtarget->getCPUv4()">; def brtarget : Operand { let PrintMethod = "printBrTargetOperand"; @@ -240,18 +242,19 @@ } // ALU instructions -class ALU_RI pattern> : TYPE_ALU_JMP { bits<4> dst; bits<32> imm; let Inst{51-48} = dst; + let Inst{47-32} = off; let Inst{31-0} = imm; let BPFClass = Class; } -class ALU_RR pattern> : TYPE_ALU_JMP { bits<4> dst; @@ -259,26 +262,27 @@ let Inst{55-52} = src; let Inst{51-48} = dst; + let Inst{47-32} = off; let BPFClass = Class; } -multiclass ALU { - def _rr : ALU_RR { + def _rr : ALU_RR; - def _ri : ALU_RI; - def _rr_32 : ALU_RR; - def _ri_32 : ALU_RI>=", srl>; - defm XOR : ALU>=", sra>; -} - defm MUL : ALU; - defm MOD : ALU>=", srl>; + defm XOR : ALU>=", sra>; +} + defm MUL : ALU; + defm MOD : ALU; + defm SMOD : ALU; -def MOV_rr : ALU_RR; -def MOV_ri : ALU_RI; -def MOV_rr_32 : ALU_RR; -def MOV_ri_32 : ALU_RI; + +let Predicates = [BPFHasCPUv4] in { + def MOVS_rr_8 : ALU_RR; + def MOVS_rr_16 : ALU_RR; + def MOVS_rr_32 : ALU_RR; + def MOVS_rr_32_8 : ALU_RR; + def MOVS_rr_32_16 : ALU_RR; +} } def FI_ri @@ -421,8 +453,8 @@ def STD : STOREi64; // LOAD instructions -class LOAD Pattern> - : TYPE_LD_ST Pattern> + : TYPE_LD_ST - : LOAD; +class LOADi64 + : LOAD; let isCodeGenOnly = 1 in { def CORE_MEM : TYPE_LD_ST; let Constraints = "$dst = $src" in { - def CORE_SHIFT : ALU_RR; - def LDH : LOADi64; - def LDB : LOADi64; + def LDW : LOADi64; + def LDH : LOADi64; + def LDB : LOADi64; +} + +let Predicates = [BPFHasCPUv4] in { + def LDWS : LOADi64; + def LDHS : LOADi64; + def LDBS : LOADi64; } -def LDD : LOADi64; +def LDD : LOADi64; class BRANCH Pattern> : TYPE_ALU_JMP Pattern> + : TYPE_ALU_JMP { + bits<32> BrDst; + + let Inst{31-0} = BrDst; + let BPFClass = BPF_JMP32; +} + class CALL : TYPE_ALU_JMP; + def JMPL : BRANCH_LONG; } // Jump and link @@ -835,7 +886,7 @@ } // bswap16, bswap32, bswap64 -class BSWAP SizeOp, string OpcodeStr, BPFSrcType SrcType, list Pattern> +class BSWAP SizeOp, string OpcodeStr, BPFSrcType SrcType, list Pattern> : TYPE_ALU_JMP; + def BSWAP32 : BSWAP; + def BSWAP64 : BSWAP; + } + + let Predicates = [BPFNoCPUv4] in { let Predicates = [BPFIsLittleEndian] in { - def BE16 : BSWAP<16, "be16", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>; - def BE32 : BSWAP<32, "be32", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>; - def BE64 : BSWAP<64, "be64", BPF_TO_BE, [(set GPR:$dst, (bswap GPR:$src))]>; + def BE16 : BSWAP; + def BE32 : BSWAP; + def BE64 : BSWAP; } let Predicates = [BPFIsBigEndian] in { - def LE16 : BSWAP<16, "le16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>; - def LE32 : BSWAP<32, "le32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>; - def LE64 : BSWAP<64, "le64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>; + def LE16 : BSWAP; + def LE32 : BSWAP; + def LE64 : BSWAP; } + } } let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1, @@ -898,7 +957,7 @@ def LD_IND_W : LOAD_IND; let isCodeGenOnly = 1 in { - def MOV_32_64 : ALU_RR; } @@ -940,8 +999,8 @@ def STB32 : STOREi32; } -class LOAD32 Pattern> - : TYPE_LD_ST Pattern> + : TYPE_LD_ST - : LOAD32; +class LOADi32 + : LOAD32; let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { - def LDW32 : LOADi32; - def LDH32 : LOADi32; - def LDB32 : LOADi32; + def LDW32 : LOADi32; + def LDH32 : LOADi32; + def LDB32 : LOADi32; +} + +let Predicates = [BPFHasCPUv4], DecoderNamespace = "BPFALU32" in { + def LDH32S : LOADi32; + def LDB32S : LOADi32; } let Predicates = [BPFHasALU32] in { @@ -973,6 +1037,7 @@ (STW32 (EXTRACT_SUBREG GPR:$src, sub_32), ADDRri:$dst)>; def : Pat<(i32 (extloadi8 ADDRri:$src)), (i32 (LDB32 ADDRri:$src))>; def : Pat<(i32 (extloadi16 ADDRri:$src)), (i32 (LDH32 ADDRri:$src))>; + def : Pat<(i64 (zextloadi8 ADDRri:$src)), (SUBREG_TO_REG (i64 0), (LDB32 ADDRri:$src), sub_32)>; def : Pat<(i64 (zextloadi16 ADDRri:$src)), diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include +#include using namespace llvm; @@ -301,7 +302,9 @@ static char ID; MachineFunction *MF; + const BPFInstrInfo *TII; const TargetRegisterInfo *TRI; + bool IsCPUv4; BPFMIPreEmitPeephole() : MachineFunctionPass(ID) { initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); @@ -311,7 +314,12 @@ // Initialize class variables. void initialize(MachineFunction &MFParm); + bool in16BitRange(int Num); + void buildReverseCondOpMap(); bool eliminateRedundantMov(); + bool adjustBranch(); + + std::map ReverseCondOpMap; public: @@ -322,14 +330,20 @@ initialize(MF); - return eliminateRedundantMov(); + bool Changed; + Changed = eliminateRedundantMov(); + if (IsCPUv4) + Changed = adjustBranch() || Changed; + return Changed; } }; // Initialize class variables. void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; + TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); + IsCPUv4 = MF->getSubtarget().getCPUv4(); LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n"); } @@ -374,6 +388,256 @@ return Eliminated; } +// FIXME: this is very crude, do we have a better way to do this? +void BPFMIPreEmitPeephole::buildReverseCondOpMap() { + ReverseCondOpMap[BPF::JEQ_rr] = BPF::JNE_rr; + ReverseCondOpMap[BPF::JNE_rr] = BPF::JEQ_rr; + ReverseCondOpMap[BPF::JUGT_rr] = BPF::JULE_rr; + ReverseCondOpMap[BPF::JULE_rr] = BPF::JUGT_rr; + ReverseCondOpMap[BPF::JUGE_rr] = BPF::JULT_rr; + ReverseCondOpMap[BPF::JULT_rr] = BPF::JUGE_rr; + ReverseCondOpMap[BPF::JSGT_rr] = BPF::JSLE_rr; + ReverseCondOpMap[BPF::JSLE_rr] = BPF::JSGT_rr; + ReverseCondOpMap[BPF::JSGE_rr] = BPF::JSLT_rr; + ReverseCondOpMap[BPF::JSLT_rr] = BPF::JSGE_rr; + + ReverseCondOpMap[BPF::JEQ_ri] = BPF::JNE_ri; + ReverseCondOpMap[BPF::JNE_ri] = BPF::JEQ_ri; + ReverseCondOpMap[BPF::JUGT_ri] = BPF::JULE_ri; + ReverseCondOpMap[BPF::JULE_ri] = BPF::JUGT_ri; + ReverseCondOpMap[BPF::JUGE_ri] = BPF::JULT_ri; + ReverseCondOpMap[BPF::JULT_ri] = BPF::JUGE_ri; + ReverseCondOpMap[BPF::JSGT_ri] = BPF::JSLE_ri; + ReverseCondOpMap[BPF::JSLE_ri] = BPF::JSGT_ri; + ReverseCondOpMap[BPF::JSGE_ri] = BPF::JSLT_ri; + ReverseCondOpMap[BPF::JSLT_ri] = BPF::JSGE_ri; + + ReverseCondOpMap[BPF::JEQ_rr_32] = BPF::JNE_rr_32; + ReverseCondOpMap[BPF::JNE_rr_32] = BPF::JEQ_rr_32; + ReverseCondOpMap[BPF::JUGT_rr_32] = BPF::JULE_rr_32; + ReverseCondOpMap[BPF::JULE_rr_32] = BPF::JUGT_rr_32; + ReverseCondOpMap[BPF::JUGE_rr_32] = BPF::JULT_rr_32; + ReverseCondOpMap[BPF::JULT_rr_32] = BPF::JUGE_rr_32; + ReverseCondOpMap[BPF::JSGT_rr_32] = BPF::JSLE_rr_32; + ReverseCondOpMap[BPF::JSLE_rr_32] = BPF::JSGT_rr_32; + ReverseCondOpMap[BPF::JSGE_rr_32] = BPF::JSLT_rr_32; + ReverseCondOpMap[BPF::JSLT_rr_32] = BPF::JSGE_rr_32; + + ReverseCondOpMap[BPF::JEQ_ri_32] = BPF::JNE_ri_32; + ReverseCondOpMap[BPF::JNE_ri_32] = BPF::JEQ_ri_32; + ReverseCondOpMap[BPF::JUGT_ri_32] = BPF::JULE_ri_32; + ReverseCondOpMap[BPF::JULE_ri_32] = BPF::JUGT_ri_32; + ReverseCondOpMap[BPF::JUGE_ri_32] = BPF::JULT_ri_32; + ReverseCondOpMap[BPF::JULT_ri_32] = BPF::JUGE_ri_32; + ReverseCondOpMap[BPF::JSGT_ri_32] = BPF::JSLE_ri_32; + ReverseCondOpMap[BPF::JSLE_ri_32] = BPF::JSGT_ri_32; + ReverseCondOpMap[BPF::JSGE_ri_32] = BPF::JSLT_ri_32; + ReverseCondOpMap[BPF::JSLT_ri_32] = BPF::JSGE_ri_32; +} + +bool BPFMIPreEmitPeephole::in16BitRange(int Num) { + // Well, the cut-off is not precisely at 16bit range since + // new codes are added during the transformation. So let us + // a little bit conservative. + return Num >= (INT16_MIN >> 1) && Num <= (INT16_MAX >> 1); +} + +// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff) +// is supported for both unconditional (JMP) and condition (JEQ, JSGT, +// etc.) branches. In certain cases, e.g., full unrolling, the branch +// target offset might exceed 16bit range. If this happens, the llvm +// will generate incorrect code as the offset is truncated to 16bit. +// +// To fix this rare case, a new insn JMPL is introduced. This new +// insn supports supports 32bit branch target offset. The compiler +// does not use this insn during insn selection. Rather, BPF backend +// will estimate the branch target offset and do JMP -> JMPL and +// JEQ -> JEQ + JMPL conversion if the estimated branch target offset +// is beyond 16bit. +bool BPFMIPreEmitPeephole::adjustBranch() { + bool Changed = false; + int CurrNumInsns = 0; + std::map SoFarNumInsns; + std::map FollowThroughBB; + std::vector MBBs; + + buildReverseCondOpMap(); + + MachineBasicBlock *PrevBB = nullptr; + for (MachineBasicBlock &MBB : *MF) { + // MBB.size() is the number of insns in this basic block, including some + // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit. + // Typically we have way more normal insns than DEBUG_VALUE insns. + // Also, if we indeed need to convert conditional branch like JEQ to + // JEQ + JMPL, we actually introduced some new insns like below. + CurrNumInsns += (int)MBB.size(); + SoFarNumInsns[&MBB] = CurrNumInsns; + if (PrevBB != nullptr) + FollowThroughBB[PrevBB] = &MBB; + PrevBB = &MBB; + // A list of original BBs to make later traveral easier. + MBBs.push_back(&MBB); + } + FollowThroughBB[PrevBB] = nullptr; + + for (unsigned i = 0; i < MBBs.size(); i++) { + // We have four cases here: + // (1). no terminator, simple follow through. + // (2). jmp to another bb. + // (3). conditional jmp to another bb or follow through. + // (4). conditional jmp followed by an unconditional jmp. + MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr; + + MachineBasicBlock *MBB = MBBs[i]; + for (MachineInstr &Term : MBB->terminators()) { + if (Term.isConditionalBranch()) { + assert(CondJmp == nullptr); + CondJmp = &Term; + } else if (Term.isUnconditionalBranch()) { + assert(UncondJmp == nullptr); + UncondJmp = &Term; + } + } + + // (1). no terminator, simple follow through. + if (!CondJmp && !UncondJmp) + continue; + + MachineBasicBlock *CondTargetBB, *JmpBB; + CurrNumInsns = SoFarNumInsns[MBB]; + + // (2). jmp to another bb. + if (!CondJmp && UncondJmp) { + JmpBB = UncondJmp->getOperand(0).getMBB(); + if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns)) + continue; + + // replace this insn as a JMPL. + BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB); + UncondJmp->eraseFromParent(); + Changed = true; + continue; + } + + const BasicBlock *TermBB = MBB->getBasicBlock(); + int Dist; + + // (3). conditional jmp to another bb or follow through. + if (!UncondJmp) { + CondTargetBB = CondJmp->getOperand(2).getMBB(); + MachineBasicBlock *FollowBB = FollowThroughBB[MBB]; + Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns; + if (in16BitRange(Dist)) + continue; + + // We have + // B2: ... + // if (cond) goto B5 + // B3: ... + // where B2 -> B5 is beyond 16bit range. + // + // We do not have 32bit cond jmp insn. So we try to do + // the following. + // B2: ... + // if (!cond) goto B3 + // New_B0 gotol B5 + // B3: ... + // Basically two new basic blocks are created. + MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB); + + // Insert New_B0 and New_B1 into function block list. + MachineFunction::iterator MBB_I = ++MBB->getIterator(); + MF->insert(MBB_I, New_B0); + + // replace B2 cond jump + unsigned ReverseOpcode = ReverseCondOpMap[CondJmp->getOpcode()]; + if (CondJmp->getOperand(1).isReg()) + BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(),TII->get(ReverseOpcode)) + .addReg(CondJmp->getOperand(0).getReg()) + .addReg(CondJmp->getOperand(1).getReg()) + .addMBB(FollowBB); + else + BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(ReverseOpcode)) + .addReg(CondJmp->getOperand(0).getReg()) + .addImm(CondJmp->getOperand(1).getImm()) + .addMBB(FollowBB); + + MBB->removeSuccessor(CondTargetBB); + MBB->addSuccessor(New_B0); + + // Populate insns in New_B0. + BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)) + .addMBB(CondTargetBB); + + New_B0->addSuccessor(CondTargetBB); + CondJmp->eraseFromParent(); + Changed = true; + continue; + } + + // (4). conditional jmp followed by an unconditional jmp. + CondTargetBB = CondJmp->getOperand(2).getMBB(); + JmpBB = UncondJmp->getOperand(0).getMBB(); + + // We have + // B2: ... + // if (cond) goto B5 + // JMP B7 + // B3: ... + // + // If only B2->B5 is out of 16bit range, we can do + // B2: ... + // if (cond) goto new_B + // JMP B7 + // New_B: gotol B5 + // B3: ... + // + // If only 'JMP B7' is out of 16bit range, we can replace + // 'JMP B7' with 'JMPL B7'. + // + // If both B2->B5 and 'JMP B7' is out of range, just do + // both the above transformations. + Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns; + if (!in16BitRange(Dist)) { + MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB); + + // Insert New_B0 into function block list. + MF->insert(++MBB->getIterator(), New_B); + + // replace B2 cond jump + if (CondJmp->getOperand(1).isReg()) + BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) + .addReg(CondJmp->getOperand(0).getReg()) + .addReg(CondJmp->getOperand(1).getReg()) + .addMBB(New_B); + else + BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) + .addReg(CondJmp->getOperand(0).getReg()) + .addImm(CondJmp->getOperand(1).getImm()) + .addMBB(New_B); + + if (CondTargetBB != JmpBB) + MBB->removeSuccessor(CondTargetBB); + MBB->addSuccessor(New_B); + + // Populate insn in New_B. + BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB); + + New_B->addSuccessor(CondTargetBB); + CondJmp->eraseFromParent(); + Changed = true; + } + + if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) { + BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB); + UncondJmp->eraseFromParent(); + Changed = true; + } + } + + return Changed; +} + } // end default namespace INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -96,7 +96,8 @@ bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) { return Opcode == BPF::LDD || Opcode == BPF::LDW || Opcode == BPF::LDH || Opcode == BPF::LDB || Opcode == BPF::LDW32 || Opcode == BPF::LDH32 || - Opcode == BPF::LDB32; + Opcode == BPF::LDB32 || Opcode == BPF::LDWS || Opcode == BPF::LDHS || + Opcode == BPF::LDBS || Opcode == BPF::LDH32S || Opcode == BPF::LDB32S; } void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, @@ -119,7 +120,9 @@ unsigned COREOp; if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW || Opcode == BPF::LDD || Opcode == BPF::STB || Opcode == BPF::STH || - Opcode == BPF::STW || Opcode == BPF::STD) + Opcode == BPF::STW || Opcode == BPF::STD || Opcode == BPF::LDWS || + Opcode == BPF::LDHS || Opcode == BPF::LDBS || Opcode == BPF::LDH32S || + Opcode == BPF::LDB32S) COREOp = BPF::CORE_MEM; else if (Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32 || Opcode == BPF::STB32 || diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h --- a/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/llvm/lib/Target/BPF/BPFSubtarget.h @@ -56,6 +56,9 @@ // whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections bool UseDwarfRIS; + // whether cpu v4 is enabled. + bool CPUv4; + public: // This constructor initializes the data members to match that // of the specified triple. @@ -71,6 +74,7 @@ bool getHasJmp32() const { return HasJmp32; } bool getHasAlu32() const { return HasAlu32; } bool getUseDwarfRIS() const { return UseDwarfRIS; } + bool getCPUv4() const { return CPUv4; } const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; } const BPFFrameLowering *getFrameLowering() const override { diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -38,6 +38,7 @@ HasJmp32 = false; HasAlu32 = false; UseDwarfRIS = false; + CPUv4 = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -55,6 +56,13 @@ HasAlu32 = true; return; } + if (CPU == "v4") { + HasJmpExt = true; + HasJmp32 = true; + HasAlu32 = true; + CPUv4 = true; + return; + } } BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU, diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp --- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -57,8 +57,7 @@ BPF_ABS = 0x1, BPF_IND = 0x2, BPF_MEM = 0x3, - BPF_LEN = 0x4, - BPF_MSH = 0x5, + BPF_MEMS = 0x4, BPF_ATOMIC = 0x6 }; @@ -178,7 +177,7 @@ uint8_t InstMode = getInstMode(Insn); if ((InstClass == BPF_LDX || InstClass == BPF_STX) && getInstSize(Insn) != BPF_DW && - (InstMode == BPF_MEM || InstMode == BPF_ATOMIC) && + (InstMode == BPF_MEM || InstMode == BPF_MEMS || InstMode == BPF_ATOMIC) && STI.hasFeature(BPF::ALU32)) Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address, this, STI); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -6,12 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/BPFMCFixups.h" #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/EndianStream.h" #include @@ -41,7 +43,10 @@ return false; } - unsigned getNumFixupKinds() const override { return 1; } + unsigned getNumFixupKinds() const override { + return BPF::NumTargetFixupKinds; + } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; @@ -49,6 +54,20 @@ } // end anonymous namespace +const MCFixupKindInfo & +BPFAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[BPF::NumTargetFixupKinds] = { + { "FK_BPF_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; +} + bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const { if ((Count % 8) != 0) @@ -85,6 +104,11 @@ Data[Fixup.getOffset() + 1] = 0x1; support::endian::write32be(&Data[Fixup.getOffset() + 4], Value); } + } else if (Fixup.getTargetKind() == BPF::FK_BPF_PCRel_4) { + // The input Value represents the number of bytes. + Value = (uint32_t)((Value - 8) / 8); + support::endian::write(&Data[Fixup.getOffset() + 4], Value, + Endian); } else { assert(Fixup.getKind() == FK_PCRel_2); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// + +#include "BPF.h" #include "MCTargetDesc/BPFInstPrinter.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -100,8 +102,13 @@ raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) { - int16_t Imm = Op.getImm(); - O << ((Imm >= 0) ? "+" : "") << formatImm(Imm); + if (MI->getOpcode() == BPF::JMPL) { + int32_t Imm = Op.getImm(); + O << ((Imm >= 0) ? "+" : "") << formatImm(Imm); + } else { + int16_t Imm = Op.getImm(); + O << ((Imm >= 0) ? "+" : "") << formatImm(Imm); + } } else if (Op.isExpr()) { printExpr(Op.getExpr(), O); } else { diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/BPFMCFixups.h" #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCCodeEmitter.h" @@ -95,6 +96,8 @@ Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_4)); else if (MI.getOpcode() == BPF::LD_imm64) Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_8)); + else if (MI.getOpcode() == BPF::JMPL) + Fixups.push_back(MCFixup::create(0, Expr, (MCFixupKind)BPF::FK_BPF_PCRel_4)); else // bb label Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_2)); diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h @@ -0,0 +1,27 @@ +//=======-- BPFMCFixups.h - BPF-specific fixup entries ------*- C++ -*-=======// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H +#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace BPF { +enum FixupKind { + // These correspond directly to R_390_* relocations. + FK_BPF_PCRel_4 = FirstTargetFixupKind, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // end namespace BPF +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -79,12 +79,15 @@ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const override { // The target is the 3rd operand of cond inst and the 1st of uncond inst. - int16_t Imm; + int32_t Imm; if (isConditionalBranch(Inst)) { - Imm = Inst.getOperand(2).getImm(); - } else if (isUnconditionalBranch(Inst)) - Imm = Inst.getOperand(0).getImm(); - else + Imm = (short)Inst.getOperand(2).getImm(); + } else if (isUnconditionalBranch(Inst)) { + if (Inst.getOpcode() == BPF::JMP) + Imm = (short)Inst.getOperand(0).getImm(); + else + Imm = (int)Inst.getOperand(0).getImm(); + } else return false; Target = Addr + Size + Imm * Size; diff --git a/llvm/test/CodeGen/BPF/bswap.ll b/llvm/test/CodeGen/BPF/bswap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/bswap.ll @@ -0,0 +1,72 @@ +; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; Source: +; long foo(int a, int b, long c) { +; a = __builtin_bswap16(a); +; b = __builtin_bswap32(b); +; c = __builtin_bswap64(c); +; return a + b + c; +; } +; Compilation flags: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c + +; Function Attrs: nounwind +define dso_local i64 @foo(i32 noundef %a, i32 noundef %b, i64 noundef %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i64, align 8 + store i32 %a, ptr %a.addr, align 4, !tbaa !3 + store i32 %b, ptr %b.addr, align 4, !tbaa !3 + store i64 %c, ptr %c.addr, align 8, !tbaa !7 + %0 = load i32, ptr %a.addr, align 4, !tbaa !3 + %conv = trunc i32 %0 to i16 + %1 = call i16 @llvm.bswap.i16(i16 %conv) + %conv1 = zext i16 %1 to i32 + store i32 %conv1, ptr %a.addr, align 4, !tbaa !3 + %2 = load i32, ptr %b.addr, align 4, !tbaa !3 + %3 = call i32 @llvm.bswap.i32(i32 %2) + store i32 %3, ptr %b.addr, align 4, !tbaa !3 + %4 = load i64, ptr %c.addr, align 8, !tbaa !7 + %5 = call i64 @llvm.bswap.i64(i64 %4) + store i64 %5, ptr %c.addr, align 8, !tbaa !7 + %6 = load i32, ptr %a.addr, align 4, !tbaa !3 + %7 = load i32, ptr %b.addr, align 4, !tbaa !3 + %add = add nsw i32 %6, %7 + %conv2 = sext i32 %add to i64 + %8 = load i64, ptr %c.addr, align 8, !tbaa !7 + %add3 = add nsw i64 %conv2, %8 + ret i64 %add3 +} + +; CHECK: w0 = w1 +; CHECK-NEXT: r0 = bswap16 r0 # encoding: [0xd7,0x00,0x00,0x00,0x10,0x00,0x00,0x00] +; CHECK-NEXT: r2 = bswap32 r2 # encoding: [0xd7,0x02,0x00,0x00,0x20,0x00,0x00,0x00] +; CHECK-NEXT: *(u32 *)(r10 - 8) = w2 +; CHECK-NEXT: *(u32 *)(r10 - 4) = w0 +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: r3 = bswap64 r3 # encoding: [0xd7,0x03,0x00,0x00,0x40,0x00,0x00,0x00] + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i16 @llvm.bswap.i16(i16) #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.bswap.i32(i32) #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.bswap.i64(i64) #1 + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 569bd3b841e3167ddd7c6ceeddb282d3c280e761)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"long", !5, i64 0} diff --git a/llvm/test/CodeGen/BPF/sdiv_smod.ll b/llvm/test/CodeGen/BPF/sdiv_smod.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/sdiv_smod.ll @@ -0,0 +1,77 @@ +; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; Source: +; int foo(int a, int b, int c) { +; return a/b + a%c; +; } +; long bar(long a, long b, long c) { +; return a/b + a%c; +; } +; Compilation flags: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c + +; Function Attrs: nounwind +define dso_local i32 @foo(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4, !tbaa !3 + store i32 %b, ptr %b.addr, align 4, !tbaa !3 + store i32 %c, ptr %c.addr, align 4, !tbaa !3 + %0 = load i32, ptr %a.addr, align 4, !tbaa !3 + %1 = load i32, ptr %b.addr, align 4, !tbaa !3 + %div = sdiv i32 %0, %1 + %2 = load i32, ptr %a.addr, align 4, !tbaa !3 + %3 = load i32, ptr %c.addr, align 4, !tbaa !3 + %rem = srem i32 %2, %3 + %add = add nsw i32 %div, %rem + ret i32 %add +} + +; CHECK: w0 = w1 +; CHECK-NEXT: *(u32 *)(r10 - 8) = w2 +; CHECK-NEXT: *(u32 *)(r10 - 4) = w0 +; CHECK-NEXT: *(u32 *)(r10 - 12) = w3 +; CHECK-NEXT: w1 s%= w3 # encoding: [0x9c,0x31,0x01,0x00,0x00,0x00,0x00,0x00] +; CHECK-NEXT: w0 s/= w2 # encoding: [0x3c,0x20,0x01,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: nounwind +define dso_local i64 @bar(i64 noundef %a, i64 noundef %b, i64 noundef %c) #0 { +entry: + %a.addr = alloca i64, align 8 + %b.addr = alloca i64, align 8 + %c.addr = alloca i64, align 8 + store i64 %a, ptr %a.addr, align 8, !tbaa !7 + store i64 %b, ptr %b.addr, align 8, !tbaa !7 + store i64 %c, ptr %c.addr, align 8, !tbaa !7 + %0 = load i64, ptr %a.addr, align 8, !tbaa !7 + %1 = load i64, ptr %b.addr, align 8, !tbaa !7 + %div = sdiv i64 %0, %1 + %2 = load i64, ptr %a.addr, align 8, !tbaa !7 + %3 = load i64, ptr %c.addr, align 8, !tbaa !7 + %rem = srem i64 %2, %3 + %add = add nsw i64 %div, %rem + ret i64 %add +} + +; CHECK: r0 = r1 +; CHECK-NEXT: *(u64 *)(r10 - 16) = r2 +; CHECK-NEXT: *(u64 *)(r10 - 8) = r0 +; CHECK-NEXT: *(u64 *)(r10 - 24) = r3 +; CHECK-NEXT: r1 s%= r3 # encoding: [0x9f,0x31,0x01,0x00,0x00,0x00,0x00,0x00] +; CHECK-NEXT: r0 s/= r2 # encoding: [0x3f,0x20,0x01,0x00,0x00,0x00,0x00,0x00] + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 569bd3b841e3167ddd7c6ceeddb282d3c280e761)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"long", !5, i64 0} diff --git a/llvm/test/CodeGen/BPF/sext_ld.ll b/llvm/test/CodeGen/BPF/sext_ld.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/sext_ld.ll @@ -0,0 +1,104 @@ +; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; Source: +; int f1(char *p) { +; return *p; +; } +; int f2(short *p) { +; return *p; +; } +; int f3(int *p) { +; return *p; +; } +; long f4(char *p) { +; return *p; +; } +; long f5(short *p) { +; return *p; +; } +; long f6(int *p) { +; return *p; +; } +; long f7(long *p) { +; return *p; +; } +; Compilation flags: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i32 @f1(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i8, ptr %p, align 1, !tbaa !3 + %conv = sext i8 %0 to i32 +; CHECK: w0 = *(s8 *)(r1 + 0) # encoding: [0x91,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + ret i32 %conv +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i32 @f2(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i16, ptr %p, align 2, !tbaa !6 + %conv = sext i16 %0 to i32 +; CHECK: w0 = *(s16 *)(r1 + 0) # encoding: [0x89,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + ret i32 %conv +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i32 @f3(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i32, ptr %p, align 4, !tbaa !8 +; CHECK: w0 = *(u32 *)(r1 + 0) # encoding: [0x61,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + ret i32 %0 +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i64 @f4(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i8, ptr %p, align 1, !tbaa !3 + %conv = sext i8 %0 to i64 + ret i64 %conv +; CHECK: r0 = *(s8 *)(r1 + 0) # encoding: [0x91,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i64 @f5(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i16, ptr %p, align 2, !tbaa !6 + %conv = sext i16 %0 to i64 + ret i64 %conv +; CHECK: r0 = *(s16 *)(r1 + 0) # encoding: [0x89,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i64 @f6(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i32, ptr %p, align 4, !tbaa !8 + %conv = sext i32 %0 to i64 + ret i64 %conv +; CHECK: r0 = *(s32 *)(r1 + 0) # encoding: [0x81,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +define dso_local i64 @f7(ptr nocapture noundef readonly %p) local_unnamed_addr #0 { +entry: + %0 = load i64, ptr %p, align 8, !tbaa !10 + ret i64 %0 +; CHECK: r0 = *(u64 *)(r1 + 0) # encoding: [0x79,0x10,0x00,0x00,0x00,0x00,0x00,0x00] +} + +attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 68665544c7d59735e9c0bb32b08829c006c7c594)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"short", !4, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"int", !4, i64 0} +!10 = !{!11, !11, i64 0} +!11 = !{!"long", !4, i64 0} diff --git a/llvm/test/CodeGen/BPF/sext_mov.ll b/llvm/test/CodeGen/BPF/sext_mov.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/sext_mov.ll @@ -0,0 +1,109 @@ +; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; Source: +; short f1(int a) { +; return (char)a; +; } +; int f2(int a) { +; return (char)a; +; } +; long f3(int a) { +; return (char)a; +; } +; int f4(int a) { +; return (short)a; +; } +; long f5(int a) { +; return (short)a; +; } +; long f6(long a) { +; return (int)a; +; } +; Compilation flags: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c + +; Function Attrs: nounwind +define dso_local i16 @f1(i32 noundef %a) #0 { +entry: + %a.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4, !tbaa !3 + %0 = load i32, ptr %a.addr, align 4, !tbaa !3 + %conv = trunc i32 %0 to i8 + %conv1 = sext i8 %conv to i16 + ret i16 %conv1 +} +; CHECK: w0 = (s8)w1 # encoding: [0xbc,0x10,0x08,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: nounwind +define dso_local i32 @f2(i32 noundef %a) #0 { +entry: + %a.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4, !tbaa !3 + %0 = load i32, ptr %a.addr, align 4, !tbaa !3 + %conv = trunc i32 %0 to i8 + %conv1 = sext i8 %conv to i32 + ret i32 %conv1 +} +; CHECK: w0 = (s8)w1 # encoding: [0xbc,0x10,0x08,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: nounwind +define dso_local i64 @f3(i32 noundef %a) #0 { +entry: + %a.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4, !tbaa !3 + %0 = load i32, ptr %a.addr, align 4, !tbaa !3 + %conv = trunc i32 %0 to i8 + %conv1 = sext i8 %conv to i64 + ret i64 %conv1 +} +; CHECK: r0 = (s8)r1 # encoding: [0xbf,0x10,0x08,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: nounwind +define dso_local i32 @f4(i32 noundef %a) #0 { +entry: + %a.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4, !tbaa !3 + %0 = load i32, ptr %a.addr, align 4, !tbaa !3 + %conv = trunc i32 %0 to i16 + %conv1 = sext i16 %conv to i32 + ret i32 %conv1 +} +; CHECK: w0 = (s16)w1 # encoding: [0xbc,0x10,0x10,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: nounwind +define dso_local i64 @f5(i32 noundef %a) #0 { +entry: + %a.addr = alloca i32, align 4 + store i32 %a, ptr %a.addr, align 4, !tbaa !3 + %0 = load i32, ptr %a.addr, align 4, !tbaa !3 + %conv = trunc i32 %0 to i16 + %conv1 = sext i16 %conv to i64 + ret i64 %conv1 +} +; CHECK: r0 = (s16)r1 # encoding: [0xbf,0x10,0x10,0x00,0x00,0x00,0x00,0x00] + +; Function Attrs: nounwind +define dso_local i64 @f6(i64 noundef %a) #0 { +entry: + %a.addr = alloca i64, align 8 + store i64 %a, ptr %a.addr, align 8, !tbaa !7 + %0 = load i64, ptr %a.addr, align 8, !tbaa !7 + %conv = trunc i64 %0 to i32 + %conv1 = sext i32 %conv to i64 + ret i64 %conv1 +} +; CHECK: r0 = (s32)r1 # encoding: [0xbf,0x10,0x20,0x00,0x00,0x00,0x00,0x00] + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 569bd3b841e3167ddd7c6ceeddb282d3c280e761)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"long", !5, i64 0}