diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -437,6 +437,25 @@ class LOADi64 : LOAD; +let isCodeGenOnly = 1 in { + def CORE_MEM : TYPE_LD_ST; + def CORE_ALU32_MEM : TYPE_LD_ST; + let Constraints = "$dst = $src" in { + def CORE_SHIFT : ALU_RR; + } +} let Predicates = [BPFNoALU32] in { def LDW : LOADi64; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -53,6 +53,19 @@ void initialize(MachineFunction &MFParm); bool removeLD(void); + void processCandidate(MachineRegisterInfo *MRI, MachineBasicBlock &MBB, + MachineInstr &MI, Register &SrcReg, Register &DstReg, + const GlobalValue *GVal); + void processDstReg(MachineRegisterInfo *MRI, Register &DstReg, + Register &SrcReg, const GlobalValue *GVal, + bool doSrcRegProp); + void processInst(MachineRegisterInfo *MRI, MachineInstr *Inst, + MachineOperand *RelocOp, const GlobalValue *GVal); + void checkADDrr(MachineRegisterInfo *MRI, MachineOperand *RelocOp, + const GlobalValue *GVal); + void checkShift(MachineRegisterInfo *MRI, MachineBasicBlock &MBB, + MachineOperand *RelocOp, const GlobalValue *GVal, + unsigned Opcode); public: // Main entry point for this pass. @@ -71,6 +84,146 @@ LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n"); } +void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, + MachineOperand *RelocOp, const GlobalValue *GVal) { + const MachineInstr *Inst = RelocOp->getParent(); + const MachineOperand *Op1 = &Inst->getOperand(1); + const MachineOperand *Op2 = &Inst->getOperand(2); + const MachineOperand *BaseOp = (RelocOp == Op1) ? Op2 : Op1; + + // Go through all uses of %1 as in %1 = ADD_rr %2, %3 + const MachineOperand Op0 = Inst->getOperand(0); + auto Begin = MRI->use_begin(Op0.getReg()), End = MRI->use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + // The candidate needs to have a unique definition. + if (!MRI->getUniqueVRegDef(I->getReg())) + continue; + + MachineInstr *DefInst = I->getParent(); + unsigned Opcode = DefInst->getOpcode(); + unsigned COREOp; + if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW || + Opcode == BPF::LDD || Opcode == BPF::STB || Opcode == BPF::STH || + Opcode == BPF::STW || Opcode == BPF::STD) + COREOp = BPF::CORE_MEM; + else if (Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || + Opcode == BPF::LDW32 || Opcode == BPF::STB32 || + Opcode == BPF::STH32 || Opcode == BPF::STW32) + COREOp = BPF::CORE_ALU32_MEM; + else + continue; + + // It must be a form of %1 = *(type *)(%2 + 0) or *(type *)(%2 + 0) = %1. + const MachineOperand &ImmOp = DefInst->getOperand(2); + if (!ImmOp.isImm() || ImmOp.getImm() != 0) + continue; + + BuildMI(*DefInst->getParent(), *DefInst, DefInst->getDebugLoc(), TII->get(COREOp)) + .add(DefInst->getOperand(0)).addImm(Opcode).add(*BaseOp) + .addGlobalAddress(GVal); + DefInst->eraseFromParent(); + } +} + +void BPFMISimplifyPatchable::checkShift(MachineRegisterInfo *MRI, + MachineBasicBlock &MBB, MachineOperand *RelocOp, const GlobalValue *GVal, + unsigned Opcode) { + // Relocation operand should be the operand #2. + MachineInstr *Inst = RelocOp->getParent(); + if (RelocOp != &Inst->getOperand(2)) + return; + + BuildMI(MBB, *Inst, Inst->getDebugLoc(), TII->get(BPF::CORE_SHIFT)) + .add(Inst->getOperand(0)).addImm(Opcode) + .add(Inst->getOperand(1)).addGlobalAddress(GVal); + Inst->eraseFromParent(); +} + +void BPFMISimplifyPatchable::processCandidate(MachineRegisterInfo *MRI, + MachineBasicBlock &MBB, MachineInstr &MI, Register &SrcReg, + Register &DstReg, const GlobalValue *GVal) { + if (MRI->getRegClass(DstReg) == &BPF::GPR32RegClass) { + // We can optimize such a pattern: + // %1:gpr = LD_imm64 @"llvm.s:0:4$0:2" + // %2:gpr32 = LDW32 %1:gpr, 0 + // %3:gpr = SUBREG_TO_REG 0, %2:gpr32, %subreg.sub_32 + // %4:gpr = ADD_rr %0:gpr, %3:gpr + // or similar patterns below for non-alu32 case. + auto Begin = MRI->use_begin(DstReg), End = MRI->use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + if (!MRI->getUniqueVRegDef(I->getReg())) + continue; + + unsigned Opcode = I->getParent()->getOpcode(); + if (Opcode == BPF::SUBREG_TO_REG) { + Register TmpReg = I->getParent()->getOperand(0).getReg(); + processDstReg(MRI, TmpReg, DstReg, GVal, false); + } + } + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::COPY), DstReg) + .addReg(SrcReg, 0, BPF::sub_32); + return; + } + + // All uses of DstReg replaced by SrcReg + processDstReg(MRI, DstReg, SrcReg, GVal, true); +} + +void BPFMISimplifyPatchable::processDstReg(MachineRegisterInfo *MRI, + Register &DstReg, Register &SrcReg, const GlobalValue *GVal, + bool doSrcRegProp) { + auto Begin = MRI->use_begin(DstReg), End = MRI->use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + if (doSrcRegProp) + I->setReg(SrcReg); + + // The candidate needs to have a unique definition. + if (MRI->getUniqueVRegDef(I->getReg())) + processInst(MRI, I->getParent(), &*I, GVal); + } +} + +// Check to see whether we could do some optimization +// to attach relocation to downstream dependent instructions. +// Two kinds of patterns are recognized below: +// Pattern 1: +// %1 = LD_imm64 @"llvm.b:0:4$0:1" <== patch_imm = 4 +// %2 = LDD %1, 0 <== this insn will be removed +// %3 = ADD_rr %0, %2 +// %4 = LDW[32] %3, 0 OR STW[32] %4, %3, 0 +// The `%4 = ...` will be transformed to +// CORE_[ALU32_]MEM(%4, mem_opcode, %0, @"llvm.b:0:4$0:1") +// and later on, BTF emit phase will translate to +// %4 = LDW[32] %0, 4 STW[32] %4, %0, 4 +// and attach a relocation to it. +// Pattern 2: +// %15 = LD_imm64 @"llvm.t:5:63$0:2" <== relocation type 5 +// %16 = LDW %15, 0 <== this insn will be removed +// %17 = SRA_rr %14, %16 +// The `%17 = ...` will be transformed to +// %17 = CORE_SHIFT(SRA_ri, %14, @"llvm.t:5:63$0:2") +// and later on, BTF emit phase will translate to +// %r4 = SRA_ri %r4, 63 +void BPFMISimplifyPatchable::processInst(MachineRegisterInfo *MRI, + MachineInstr *Inst, MachineOperand *RelocOp, const GlobalValue *GVal) { + unsigned Opcode = Inst->getOpcode(); + if (Opcode == BPF::ADD_rr) + checkADDrr(MRI, RelocOp, GVal); + else if (Opcode == BPF::SLL_rr) + checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SLL_ri); + else if (Opcode == BPF::SRA_rr) + checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SRA_ri); + else if (Opcode == BPF::SRL_rr) + checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SRL_ri); +} + /// Remove unneeded Load instructions. bool BPFMISimplifyPatchable::removeLD() { MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -105,10 +258,11 @@ continue; bool IsCandidate = false; + const GlobalValue *GVal = nullptr; if (DefInst->getOpcode() == BPF::LD_imm64) { const MachineOperand &MO = DefInst->getOperand(1); if (MO.isGlobal()) { - const GlobalValue *GVal = MO.getGlobal(); + GVal = MO.getGlobal(); auto *GVar = dyn_cast(GVal); if (GVar) { // Global variables representing structure offset or @@ -124,17 +278,7 @@ if (!IsCandidate) continue; - if (MRI->getRegClass(DstReg) == &BPF::GPR32RegClass) { - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::COPY), DstReg) - .addReg(SrcReg, 0, BPF::sub_32); - } else { - auto Begin = MRI->use_begin(DstReg), End = MRI->use_end(); - decltype(End) NextI; - for (auto I = Begin; I != End; I = NextI) { - NextI = std::next(I); - I->setReg(SrcReg); - } - } + processCandidate(MRI, MBB, MI, SrcReg, DstReg, GVal); ToErase = &MI; Changed = true; diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -223,7 +223,7 @@ uint32_t ColumnNum; ///< the column number }; -/// Represent one offset relocation. +/// Represent one field relocation. struct BTFFieldReloc { const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc uint32_t TypeID; ///< Type ID @@ -296,15 +296,15 @@ /// Generate types for function prototypes. void processFuncPrototypes(); - /// Generate one offset relocation record. - void generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym, - DIType *RootTy, StringRef AccessPattern); + /// Generate one field relocation record. + void generateFieldReloc(const MCSymbol *ORSym, DIType *RootTy, + StringRef AccessPattern); /// Populating unprocessed struct type. unsigned populateStructType(const DIType *Ty); - /// Process LD_imm64 instructions. - void processLDimm64(const MachineInstr *MI); + /// Process relocation instructions. + void processReloc(const MachineOperand &MO); /// Emit common header of .BTF and .BTF.ext sections. void emitCommonHeader(); diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -937,9 +937,8 @@ } /// Generate a struct member field relocation. -void BTFDebug::generateFieldReloc(const MachineInstr *MI, - const MCSymbol *ORSym, DIType *RootTy, - StringRef AccessPattern) { +void BTFDebug::generateFieldReloc(const MCSymbol *ORSym, DIType *RootTy, + StringRef AccessPattern) { unsigned RootId = populateStructType(RootTy); size_t FirstDollar = AccessPattern.find_first_of('$'); size_t FirstColon = AccessPattern.find_first_of(':'); @@ -959,33 +958,8 @@ FieldRelocTable[SecNameOff].push_back(FieldReloc); } -void BTFDebug::processLDimm64(const MachineInstr *MI) { - // If the insn is an LD_imm64, the following two cases - // will generate an .BTF.ext record. - // - // If the insn is "r2 = LD_imm64 @__BTF_...", - // add this insn into the .BTF.ext FieldReloc subsection. - // Relocation looks like: - // . SecName: - // . InstOffset - // . TypeID - // . OffSetNameOff - // Later, the insn is replaced with "r2 = " - // where "" equals to the offset based on current - // type definitions. - // - // If the insn is "r2 = LD_imm64 @VAR" and VAR is - // a patchable external global, add this insn into the .BTF.ext - // ExternReloc subsection. - // Relocation looks like: - // . SecName: - // . InstOffset - // . ExternNameOff - // Later, the insn is replaced with "r2 = " or - // "LD_imm64 r2, " where "" = 0. - +void BTFDebug::processReloc(const MachineOperand &MO) { // check whether this is a candidate or not - const MachineOperand &MO = MI->getOperand(1); if (MO.isGlobal()) { const GlobalValue *GVal = MO.getGlobal(); auto *GVar = dyn_cast(GVal); @@ -995,7 +969,7 @@ MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = dyn_cast(MDN); - generateFieldReloc(MI, ORSym, Ty, GVar->getName()); + generateFieldReloc(ORSym, Ty, GVar->getName()); } } } @@ -1020,8 +994,25 @@ return; } - if (MI->getOpcode() == BPF::LD_imm64) - processLDimm64(MI); + if (MI->getOpcode() == BPF::LD_imm64) { + // If the insn is "r2 = LD_imm64 @", + // add this insn into the .BTF.ext FieldReloc subsection. + // Relocation looks like: + // . SecName: + // . InstOffset + // . TypeID + // . OffSetNameOff + // . RelocType + // Later, the insn is replaced with "r2 = " + // where "" equals to the offset based on current + // type definitions. + processReloc(MI->getOperand(1)); + } else if (MI->getOpcode() == BPF::CORE_MEM || + MI->getOpcode() == BPF::CORE_ALU32_MEM || + MI->getOpcode() == BPF::CORE_SHIFT) { + // relocation insn is a load, store or shift insn. + processReloc(MI->getOperand(3)); + } // Skip this instruction if no DebugLoc or the DebugLoc // is the same as the previous instruction. @@ -1148,6 +1139,25 @@ return true; } } + } else if (MI->getOpcode() == BPF::CORE_MEM || + MI->getOpcode() == BPF::CORE_ALU32_MEM || + MI->getOpcode() == BPF::CORE_SHIFT) { + const MachineOperand &MO = MI->getOperand(3); + if (MO.isGlobal()) { + const GlobalValue *GVal = MO.getGlobal(); + auto *GVar = dyn_cast(GVal); + if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) { + uint32_t Imm = PatchImms[GVar->getName().str()]; + OutMI.setOpcode(MI->getOperand(1).getImm()); + if (MI->getOperand(0).isImm()) + OutMI.addOperand(MCOperand::createImm(MI->getOperand(0).getImm())); + else + OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); + OutMI.addOperand(MCOperand::createReg(MI->getOperand(2).getReg())); + OutMI.addOperand(MCOperand::createImm(Imm)); + return true; + } + } } return false; } diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll @@ -20,10 +20,8 @@ } ; CHECK-LABEL: test -; CHECK: r2 = 4 -; CHECK: r1 += r2 -; CHECK-ALU64: r0 = *(u32 *)(r1 + 0) -; CHECK-ALU32: w0 = *(u32 *)(r1 + 0) +; CHECK-ALU64: r0 = *(u32 *)(r1 + 4) +; CHECK-ALU32: w0 = *(u32 *)(r1 + 4) ; CHECK: exit ; ; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2) diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll @@ -73,8 +73,9 @@ ; CHECK: r{{[0-9]+}} = 4 ; CHECK: r{{[0-9]+}} = 4 -; CHECK: r{{[0-9]+}} = 51 -; CHECK: r{{[0-9]+}} = 60 +; CHECK: r{{[0-9]+}} <<= 51 +; CHECK: r{{[0-9]+}} s>>= 60 +; CHECK: r{{[0-9]+}} >>= 60 ; CHECK: r{{[0-9]+}} = 1 ; CHECK: .byte 115 # string offset=1 @@ -83,7 +84,7 @@ ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 30 # Field reloc section string offset=30 -; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 6 ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 73 @@ -103,6 +104,10 @@ ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 73 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 73 ; CHECK-NEXT: .long 3 ; Function Attrs: argmemonly nounwind willreturn diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EL %s -; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EB %s -; RUN: llc -march=bpfel -mattr=+alu32 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EL %s -; RUN: llc -march=bpfeb -mattr=+alu32 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EB %s +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EL,CHECK64 %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EB,CHECK64 %s +; RUN: llc -march=bpfel -mattr=+alu32 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EL,CHECK32 %s +; RUN: llc -march=bpfeb -mattr=+alu32 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EB,CHECK32 %s ; Source code: ; struct s { ; int a; @@ -114,9 +114,10 @@ ; CHECK: r{{[0-9]+}} = 4 ; CHECK: r{{[0-9]+}} = 4 -; CHECK-EL: r{{[0-9]+}} = 51 -; CHECK-EB: r{{[0-9]+}} = 41 -; CHECK: r{{[0-9]+}} = 60 +; CHECK-EL: r{{[0-9]+}} <<= 51 +; CHECK-EB: r{{[0-9]+}} <<= 41 +; CHECK: r{{[0-9]+}} s>>= 60 +; CHECK: r{{[0-9]+}} >>= 60 ; CHECK: r{{[0-9]+}} = 1 ; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2) @@ -126,7 +127,8 @@ ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 30 # Field reloc section string offset=30 -; CHECK-NEXT: .long 5 +; CHECK32: .long 6 +; CHECK64: .long 7 ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 36 @@ -135,6 +137,10 @@ ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 36 ; CHECK-NEXT: .long 1 +; CHECK64: .long .Ltmp{{[0-9]+}} +; CHECK64: .long 2 +; CHECK64: .long 36 +; CHECK64: .long 0 ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 36 @@ -146,6 +152,10 @@ ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 36 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 36 ; CHECK-NEXT: .long 3 ; Function Attrs: nounwind readnone