diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -14,6 +14,7 @@ #ifndef LLVM_MC_MCINSTRANALYSIS_H #define LLVM_MC_MCINSTRANALYSIS_H +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" @@ -22,7 +23,6 @@ namespace llvm { class MCRegisterInfo; -class Triple; class MCInstrAnalysis { protected: @@ -146,6 +146,30 @@ return false; } + /// Given an instruction evaluate its operands to gleam information which can + /// be useful for multi-instruction patterns. An example is RISC-V's function + /// calls, whose target is calculated from two (not necessarily adjacent) + /// instructions. Assumes a linear scan of disassembly. + virtual bool evaluateInst(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) { + if (isCall(Inst) || isUnconditionalBranch(Inst) || + isConditionalBranch(Inst)) + return evaluateBranch(Inst, Addr, Size, Target); + + return false; + } + + /// For the evaluateInst call, reset any known values. + /// NewObject is set to true when a new object is being analyzed, in which + /// case Arch is used to indicate the Architecture of the incoming object. + virtual void resetAnalysis(bool NewObject = false, + Triple::ArchType Arch = Triple::UnknownArch) {} + + /// For the evaluateInst call, set a target's known GP register for GP-based + /// analysis. This value is cached until resetAnalysis is called with + /// NewObject=true + virtual void setGPForAnalysis(uint64_t Addr) {} + /// Given a branch instruction try to get the address the branch /// targets. Return true on success, and the address in Target. virtual bool diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -17,6 +17,7 @@ #include "RISCVTargetStreamer.h" #include "TargetInfo/RISCVTargetInfo.h" #include "Utils/RISCVBaseInfo.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAsmInfo.h" @@ -96,11 +97,161 @@ namespace { +// Cache of RISC-V Register Values used by RISCVMCInstrAnalysis +class RISCVRegCache { + uint64_t GPRKnownValues[32] = {0}; + bool GPRGoodValues[32] = {true, false}; + uint64_t KnownGPValue = 0; + bool GPIsSet = false; + +public: + void setReg(unsigned Reg, uint64_t Value) { + // Ignore writes to X0 + if (Reg == RISCV::X0) + return; + Reg -= RISCV::X0; + GPRKnownValues[Reg] = Value; + GPRGoodValues[Reg] = true; + } + Optional getReg(unsigned Reg) { + Reg -= RISCV::X0; + if (GPRGoodValues[Reg]) + return GPRKnownValues[Reg]; + return None; + } + // Invalidate all known register values. + // If Full is false, then do not invalidate the cached value of the global + // pointer + void invalidate(bool Full = false) { + for (unsigned i = 1; i < 32; i++) { + GPRKnownValues[i] = 0; + GPRGoodValues[i] = false; + } + if (Full) { + GPIsSet = false; + KnownGPValue = 0; + } else if (GPIsSet) { + GPRKnownValues[3] = KnownGPValue; + GPRGoodValues[3] = true; + } + } + void invalidateReg(unsigned Reg) { + if (Reg == RISCV::X0) + return; + Reg -= RISCV::X0; + GPRGoodValues[Reg] = false; + } + void setGP(uint64_t Addr) { + GPRKnownValues[3] = Addr; + GPRGoodValues[3] = true; + GPIsSet = true; + KnownGPValue = Addr; + } +}; + class RISCVMCInstrAnalysis : public MCInstrAnalysis { + RISCVRegCache RegCache; + bool IsRV64; + public: explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + void resetAnalysis(bool NewObject, Triple::ArchType Arch) override { + // Reset the register cache, using NewObject to determine whether any + // cached GP is valid + RegCache.invalidate(NewObject); + if (NewObject) + IsRV64 = Arch == Triple::riscv64; + } + + void setGPForAnalysis(uint64_t Addr) override { RegCache.setGP(Addr); } + + bool evaluateInst(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) override { + // First evaluate branches that evaluateBranch supports + if ((isCall(Inst) || isUnconditionalBranch(Inst) || + isConditionalBranch(Inst)) && + evaluateBranch(Inst, Addr, Size, Target)) { + RegCache.invalidate(); + return true; + } + + switch (Inst.getOpcode()) { + default: + break; + case RISCV::AUIPC: + case RISCV::LUI: + case RISCV::C_LUI: { + unsigned Reg = Inst.getOperand(0).getReg(); + uint64_t Value = Inst.getOperand(1).getImm() << 12; + if (Inst.getOpcode() == RISCV::AUIPC) + Value += Addr; + RegCache.setReg(Reg, Value); + return false; + } + case RISCV::ADDI: { + unsigned DstReg = Inst.getOperand(0).getReg(); + unsigned SrcReg = Inst.getOperand(1).getReg(); + if (auto SrcVal = RegCache.getReg(SrcReg)) { + Target = *SrcVal + Inst.getOperand(2).getImm(); + if (!IsRV64) + Target &= 0xffffffff; + RegCache.setReg(DstReg, Target); + return true; + } + break; + } + case RISCV::JALR: { + unsigned SrcReg = Inst.getOperand(1).getReg(); + if (auto SrcVal = RegCache.getReg(SrcReg)) { + Target = *SrcVal + Inst.getOperand(2).getImm(); + if (!IsRV64) + Target &= 0xffffffff; + // Since this is a jump to a new BB, invalidate the whole cache + RegCache.invalidate(); + return true; + } + break; + } + case RISCV::LB: + case RISCV::LH: + case RISCV::LW: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::LD: + case RISCV::FLW: + case RISCV::FLD: + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::FSW: + case RISCV::SD: + case RISCV::FSD: { + unsigned SrcReg = Inst.getOperand(1).getReg(); + if (auto SrcVal = RegCache.getReg(SrcReg)) { + Target = *SrcVal + Inst.getOperand(2).getImm(); + if (!IsRV64) + Target &= 0xffffffff; + return true; + } + break; + } + } + + // For all other instructions, it is no longer safe to assume the value of + // any destination register, so invalidate these. + auto &Desc = Info->get(Inst.getOpcode()); + for (unsigned i = 0, e = Desc.getNumDefs(); i < e; i++) { + auto &Op = Inst.getOperand(i); + if (Op.isReg() && Op.getReg() >= RISCV::X0 && Op.getReg() <= RISCV::X31) + RegCache.invalidateReg(Op.getReg()); + } + + return false; + } + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const override { if (isConditionalBranch(Inst)) { diff --git a/llvm/test/MC/Disassembler/RISCV/branch-targets.txt b/llvm/test/MC/Disassembler/RISCV/branch-targets.txt --- a/llvm/test/MC/Disassembler/RISCV/branch-targets.txt +++ b/llvm/test/MC/Disassembler/RISCV/branch-targets.txt @@ -15,13 +15,13 @@ bnez a0, label1 bnez a0, label2 # CHECK-LABEL: : -# CHECK-NEXT: jal zero, 0 -# CHECK-NEXT: jal zero, 20 -# CHECK-NEXT: bne a0, zero, -8 -# CHECK-NEXT: bne a0, zero, 12 -# CHECK-NEXT: c.j -16 -# CHECK-NEXT: c.j 6 -# CHECK-NEXT: c.bnez a0, -20 -# CHECK-NEXT: c.bnez a0, 2 +# CHECK-NEXT: jal zero, 0 #0 +# CHECK-NEXT: jal zero, 20 #18 +# CHECK-NEXT: bne a0, zero, -8 #0 +# CHECK-NEXT: bne a0, zero, 12 #18 +# CHECK-NEXT: c.j -16 #0 +# CHECK-NEXT: c.j 6 #18 +# CHECK-NEXT: c.bnez a0, -20 #0 +# CHECK-NEXT: c.bnez a0, 2 #18 label2: diff --git a/llvm/test/MC/RISCV/rv64-relax-all.s b/llvm/test/MC/RISCV/rv64-relax-all.s --- a/llvm/test/MC/RISCV/rv64-relax-all.s +++ b/llvm/test/MC/RISCV/rv64-relax-all.s @@ -6,10 +6,10 @@ NEAR: -# INSTR: c.beqz a0, 0 -# RELAX-INSTR: beq a0, zero, 0 +# INSTR: c.beqz a0, 0 #0 +# RELAX-INSTR: beq a0, zero, 0 #0 c.beqz a0, NEAR -# INSTR: c.j -2 -# RELAX-INSTR: jal zero, -4 +# INSTR: c.j -2 #0 +# RELAX-INSTR: jal zero, -4 #0 c.j NEAR diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1188,7 +1188,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, - const MCInstrAnalysis *MIA, MCInstPrinter *IP, + MCInstrAnalysis *MIA, MCInstPrinter *IP, const MCSubtargetInfo *PrimarySTI, const MCSubtargetInfo *SecondarySTI, PrettyPrinter &PIP, @@ -1199,6 +1199,9 @@ if (isArmElf(Obj)) PrimaryIsThumb = STI->checkFeatures("+thumb-mode"); + if (MIA) + MIA->resetAnalysis(true, Obj->getArch()); + std::map> RelocMap; if (InlineRelocs) RelocMap = getRelocsMap(*Obj); @@ -1230,6 +1233,12 @@ continue; } + if (MIA && + (Obj->getArch() == Triple::riscv32 || + Obj->getArch() == Triple::riscv64) && + Name == "__global_pointer$") + MIA->setGPForAnalysis(unwrapOrError(Symbol.getAddress(), FileName)); + section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); if (SecI != Obj->section_end()) AllSymbols[*SecI].push_back(createSymbolInfo(Obj, Symbol)); @@ -1302,6 +1311,9 @@ if (!SectSize) continue; + if (MIA) + MIA->resetAnalysis(); + // Get the list of all the symbols in this section. SectionSymbolsTy &Symbols = AllSymbols[Section]; std::vector MappingSymbols; @@ -1514,11 +1526,9 @@ // If disassembly has failed, avoid analysing invalid/incomplete // instruction information. Otherwise, try to resolve the target of a // call, tail call, etc. to a specific symbol. - if (Disassembled && MIA && - (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) || - MIA->isConditionalBranch(Inst))) { + if (Disassembled && MIA) { uint64_t Target; - if (MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) { + if (MIA->evaluateInst(Inst, SectionAddr + Index, Size, Target)) { // In a relocatable object, the target's section must reside in // the same section as the call instruction or it is accessed // through a relocation. @@ -1565,6 +1575,12 @@ } } + // For RISC-V it is not possible to print this until the MIA + // analysis is complete + if (Obj->getArch() == Triple::riscv32 || + Obj->getArch() == Triple::riscv64) + outs() << " #" << Twine::utohexstr(Target); + if (TargetSym != nullptr) { uint64_t TargetAddress = TargetSym->Addr; std::string TargetName = TargetSym->Name.str(); @@ -1682,7 +1698,7 @@ SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx)); } - std::unique_ptr MIA( + std::unique_ptr MIA( TheTarget->createMCInstrAnalysis(MII.get())); int AsmPrinterVariant = AsmInfo->getAssemblerDialect();