Index: llvm/trunk/lib/Target/Hexagon/HexagonFixupHwLoops.cpp =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ llvm/trunk/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -6,9 +6,8 @@ // License. See LICENSE.TXT for details. // // The loop start address in the LOOPn instruction is encoded as a distance -// from the LOOPn instruction itself. If the start address is too far from -// the LOOPn instruction, the loop needs to be set up manually, i.e. via -// direct transfers to SAn and LCn. +// from the LOOPn instruction itself. If the start address is too far from +// the LOOPn instruction, the instruction needs to use a constant extender. // This pass will identify and convert such LOOPn instructions to a proper // form. //===----------------------------------------------------------------------===// @@ -21,12 +20,15 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/PassSupport.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +static cl::opt MaxLoopRange( + "hexagon-loop-range", cl::Hidden, cl::init(200), + cl::desc("Restrict range of loopN instructions (testing only)")); + namespace llvm { void initializeHexagonFixupHwLoopsPass(PassRegistry&); } @@ -52,20 +54,15 @@ } private: - /// \brief Maximum distance between the loop instr and the basic block. - /// Just an estimate. - static const unsigned MAX_LOOP_DISTANCE = 200; - /// \brief Check the offset between each loop instruction and /// the loop basic block to determine if we can use the LOOP instruction /// or if we need to set the LC/SA registers explicitly. bool fixupLoopInstrs(MachineFunction &MF); - /// \brief Add the instruction to set the LC and SA registers explicitly. - void convertLoopInstr(MachineFunction &MF, - MachineBasicBlock::iterator &MII, - RegScavenger &RS); - + /// \brief Replace loop instruction with the constant extended + /// version if the loop label is too far from the loop instruction. + void useExtLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII); }; char HexagonFixupHwLoops::ID = 0; @@ -78,20 +75,18 @@ return new HexagonFixupHwLoops(); } - /// \brief Returns true if the instruction is a hardware loop instruction. static bool isHardwareLoop(const MachineInstr *MI) { return MI->getOpcode() == Hexagon::J2_loop0r || - MI->getOpcode() == Hexagon::J2_loop0i; + MI->getOpcode() == Hexagon::J2_loop0i || + MI->getOpcode() == Hexagon::J2_loop1r || + MI->getOpcode() == Hexagon::J2_loop1i; } - bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { - bool Changed = fixupLoopInstrs(MF); - return Changed; + return fixupLoopInstrs(MF); } - /// \brief For Hexagon, if the loop label is to far from the /// loop instruction then we need to set the LC0 and SA0 registers /// explicitly instead of using LOOP(start,count). This function @@ -105,41 +100,49 @@ // Offset of the current instruction from the start. unsigned InstOffset = 0; // Map for each basic block to it's first instruction. - DenseMap BlockToInstOffset; + DenseMap BlockToInstOffset; + + const HexagonInstrInfo *HII = + static_cast(MF.getSubtarget().getInstrInfo()); // First pass - compute the offset of each basic block. - for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); - MBB != MBBe; ++MBB) { - BlockToInstOffset[MBB] = InstOffset; - InstOffset += (MBB->size() * 4); + for (const MachineBasicBlock &MBB : MF) { + if (MBB.getAlignment()) { + // Although we don't know the exact layout of the final code, we need + // to account for alignment padding somehow. This heuristic pads each + // aligned basic block according to the alignment value. + int ByteAlign = (1u << MBB.getAlignment()) - 1; + InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); + } + + BlockToInstOffset[&MBB] = InstOffset; + for (const MachineInstr &MI : MBB) + InstOffset += HII->getSize(&MI); } - // Second pass - check each loop instruction to see if it needs to - // be converted. + // Second pass - check each loop instruction to see if it needs to be + // converted. InstOffset = 0; bool Changed = false; - RegScavenger RS; - - // Loop over all the basic blocks. - for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); - MBB != MBBe; ++MBB) { - InstOffset = BlockToInstOffset[MBB]; - RS.enterBasicBlock(MBB); + for (MachineBasicBlock &MBB : MF) { + InstOffset = BlockToInstOffset[&MBB]; // Loop over all the instructions. - MachineBasicBlock::iterator MIE = MBB->end(); - MachineBasicBlock::iterator MII = MBB->begin(); + MachineBasicBlock::iterator MII = MBB.begin(); + MachineBasicBlock::iterator MIE = MBB.end(); while (MII != MIE) { + InstOffset += HII->getSize(&*MII); + if (MII->isDebugValue()) { + ++MII; + continue; + } if (isHardwareLoop(MII)) { - RS.forward(MII); assert(MII->getOperand(0).isMBB() && "Expect a basic block as loop operand"); - int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; - unsigned Dist = Sub > 0 ? Sub : -Sub; - if (Dist > MAX_LOOP_DISTANCE) { - // Convert to explicity setting LC0 and SA0. - convertLoopInstr(MF, MII, RS); - MII = MBB->erase(MII); + int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; + if ((unsigned)abs(diff) > MaxLoopRange) { + useExtLoopInstr(MF, MII); + MII = MBB.erase(MII); Changed = true; } else { ++MII; @@ -147,39 +150,38 @@ } else { ++MII; } - InstOffset += 4; } } return Changed; } - -/// \brief convert a loop instruction to a sequence of instructions that -/// set the LC0 and SA0 register explicitly. -void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, - MachineBasicBlock::iterator &MII, - RegScavenger &RS) { +/// \brief Replace loop instructions with the constant extended version. +void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineBasicBlock *MBB = MII->getParent(); DebugLoc DL = MII->getDebugLoc(); - unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); - - // First, set the LC0 with the trip count. - if (MII->getOperand(1).isReg()) { - // Trip count is a register - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0) - .addReg(MII->getOperand(1).getReg()); - } else { - // Trip count is an immediate. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch) - .addImm(MII->getOperand(1).getImm()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0) - .addReg(Scratch); + MachineInstrBuilder MIB; + unsigned newOp; + switch (MII->getOpcode()) { + case Hexagon::J2_loop0r: + newOp = Hexagon::J2_loop0rext; + break; + case Hexagon::J2_loop0i: + newOp = Hexagon::J2_loop0iext; + break; + case Hexagon::J2_loop1r: + newOp = Hexagon::J2_loop1rext; + break; + case Hexagon::J2_loop1i: + newOp = Hexagon::J2_loop1iext; + break; + default: + llvm_unreachable("Invalid Hardware Loop Instruction."); } - // Then, set the SA0 with the loop start address. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch) - .addMBB(MII->getOperand(0).getMBB()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::SA0) - .addReg(Scratch); + MIB = BuildMI(*MBB, MII, DL, TII->get(newOp)); + + for (unsigned i = 0; i < MII->getNumOperands(); ++i) + MIB.addOperand(MII->getOperand(i)); } Index: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.h +++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.h @@ -1,3 +1,4 @@ + //===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure @@ -203,7 +204,8 @@ void immediateExtend(MachineInstr *MI) const; - bool isConstExtended(MachineInstr *MI) const; + bool isConstExtended(const MachineInstr *MI) const; + unsigned getSize(const MachineInstr *MI) const; int getDotNewPredJumpOp(MachineInstr *MI, const MachineBranchProbabilityInfo *MBPI) const; unsigned getAddrMode(const MachineInstr* MI) const; Index: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1742,14 +1742,14 @@ return false; } -bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { +bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; if (isExtended) // Instruction must be extended. return true; - unsigned isExtendable = (F >> HexagonII::ExtendablePos) - & HexagonII::ExtendableMask; + unsigned isExtendable = + (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; if (!isExtendable) return false; @@ -1785,6 +1785,27 @@ return (ImmValue < MinValue || ImmValue > MaxValue); } +// Return the number of bytes required to encode the instruction. +// Hexagon instructions are fixed length, 4 bytes, unless they +// use a constant extender, which requires another 4 bytes. +// For debug instructions and prolog labels, return 0. +unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { + + if (MI->isDebugValue() || MI->isPosition()) + return 0; + + unsigned Size = MI->getDesc().getSize(); + if (!Size) + // Assume the default insn size in case it cannot be determined + // for whatever reason. + Size = HEXAGON_INSTR_SIZE; + + if (isConstExtended(MI) || isExtended(MI)) + Size += HEXAGON_INSTR_SIZE; + + return Size; +} + // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. Index: llvm/trunk/lib/Target/Hexagon/HexagonOperands.td =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonOperands.td +++ llvm/trunk/lib/Target/Hexagon/HexagonOperands.td @@ -483,7 +483,9 @@ def jumptablebase : Operand; def brtarget : Operand; -def brtargetExt : Operand; +def brtargetExt : Operand { + let PrintMethod = "printExtBrtarget"; +} def calltarget : Operand; def bblabel : Operand; Index: llvm/trunk/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h =================================================================== --- llvm/trunk/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ llvm/trunk/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -56,6 +56,7 @@ void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printConstantPool(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; Index: llvm/trunk/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ llvm/trunk/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -249,3 +249,17 @@ printOperand(MI, OpNo, O); O << ')'; } + +void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand &MO = MI->getOperand(OpNo); + const MCInstrDesc &MII = getMII().get(MI->getOpcode()); + + assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) && + "Expecting an extendable operand"); + + if (MO.isExpr() || isExtended(MII.TSFlags)) { + O << "##"; + } + printOperand(MI, OpNo, O); +} Index: llvm/trunk/test/CodeGen/Hexagon/hwloop-range.ll =================================================================== --- llvm/trunk/test/CodeGen/Hexagon/hwloop-range.ll +++ llvm/trunk/test/CodeGen/Hexagon/hwloop-range.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=hexagon -hexagon-loop-range=0 < %s | FileCheck %s + +; Test that the loop start address operand uses a constant extender +; if the offset is out of range. + +; CHECK: loop0(##.LBB +; CHECK: endloop0 + +@g = external global i32, align 4 + +define void @test(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp slt i32 %n, 1 + br i1 %cmp6, label %for.end, label %for.body.preheader + +for.body.preheader: + br label %for.body + +for.body: + %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.07 + %0 = load i32, i32* %arrayidx, align 4 + %1 = load i32, i32* @g, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 %i.07 + store i32 %mul, i32* %arrayidx1, align 4 + %inc = add nuw nsw i32 %i.07, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}