Index: lib/Target/Hexagon/HexagonHardwareLoops.cpp =================================================================== --- lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -369,10 +369,10 @@ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); if (NotAnalyzed) return false; - - unsigned CSz = Cond.size(); - assert (CSz == 1 || CSz == 2); - unsigned PredR = Cond[CSz-1].getReg(); + + unsigned PredR, PredPos, PredRegFlags; + if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags)) + return false; MachineInstr *PredI = MRI->getVRegDef(PredR); if (!PredI->isCompare()) @@ -491,8 +491,10 @@ // to put imm(0), followed by P in the vector Cond. // If TB is not the header, it means that the "not-taken" path must lead // to the header. - bool Negated = (Cond.size() > 1) ^ (TB != Header); - unsigned PredReg = Cond[Cond.size()-1].getReg(); + bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header); + unsigned PredReg, PredPos, PredRegFlags; + if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags)) + return nullptr; MachineInstr *CondI = MRI->getVRegDef(PredReg); unsigned CondOpc = CondI->getOpcode(); Index: lib/Target/Hexagon/HexagonInstrInfo.h =================================================================== --- lib/Target/Hexagon/HexagonInstrInfo.h +++ lib/Target/Hexagon/HexagonInstrInfo.h @@ -32,9 +32,10 @@ virtual void anchor(); const HexagonRegisterInfo RI; const HexagonSubtarget &Subtarget; - typedef unsigned Opcode_t; public: + typedef unsigned Opcode_t; + explicit HexagonInstrInfo(HexagonSubtarget &ST); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As @@ -185,6 +186,7 @@ bool isConditionalStore(const MachineInstr* MI) const; bool isNewValueInst(const MachineInstr* MI) const; bool isNewValue(const MachineInstr* MI) const; + bool isNewValue(Opcode_t Opcode) const; bool isDotNewInst(const MachineInstr* MI) const; int GetDotOldOp(const int opc) const; int GetDotNewOp(const MachineInstr* MI) const; @@ -200,6 +202,7 @@ bool isNewValueStore(const MachineInstr* MI) const; bool isNewValueStore(unsigned Opcode) const; bool isNewValueJump(const MachineInstr* MI) const; + bool isNewValueJump(Opcode_t Opcode) const; bool isNewValueJumpCandidate(const MachineInstr *MI) const; @@ -217,7 +220,11 @@ bool NonExtEquivalentExists (const MachineInstr *MI) const; short getNonExtOpcode(const MachineInstr *MI) const; bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; - bool PredOpcodeHasNot(Opcode_t Opcode) const; + bool predOpcodeHasNot(const SmallVectorImpl &Cond) const; + bool isEndLoopN(Opcode_t Opcode) const; + bool getPredReg(const SmallVectorImpl &Cond, + unsigned &PredReg, unsigned &PredRegPos, + unsigned &PredRegFlags) const; int getCondOpcode(int Opc, bool sense) const; }; Index: lib/Target/Hexagon/HexagonInstrInfo.cpp =================================================================== --- lib/Target/Hexagon/HexagonInstrInfo.cpp +++ lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -115,72 +115,172 @@ return 0; } +// Find the hardware loop instruction used to set-up the specified loop. +// On Hexagon, we have two instructions used to set-up the hardware loop +// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions +// to indicate the end of a loop. +static MachineInstr * +findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, + SmallPtrSet &Visited) { + int LOOPi; + int LOOPr; + if (EndLoopOp == Hexagon::ENDLOOP0) { + LOOPi = Hexagon::J2_loop0i; + LOOPr = Hexagon::J2_loop0r; + } else { // EndLoopOp == Hexagon::EndLOOP1 + LOOPi = Hexagon::J2_loop1i; + LOOPr = Hexagon::J2_loop1r; + } -unsigned -HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const{ - - int BOpc = Hexagon::J2_jump; - int BccOpc = Hexagon::J2_jumpt; - - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - - int regPos = 0; - // Check if ReverseBranchCondition has asked to reverse this branch - // If we want to reverse the branch an odd number of times, we want - // JMP_f. - if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { - BccOpc = Hexagon::J2_jumpf; - regPos = 1; + // The loop set-up instruction will be in a predecessor block + for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(), + PE = BB->pred_end(); PB != PE; ++PB) { + // If this has been visited, already skip it. + if (!Visited.insert(*PB).second) + continue; + if (*PB == BB) + continue; + for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(), + E = (*PB)->instr_rend(); I != E; ++I) { + int Opc = I->getOpcode(); + if (Opc == LOOPi || Opc == LOOPr) + return &*I; + // We've reached a different loop, which means the loop0 has been removed. + if (Opc == EndLoopOp) + return 0; } + // Check the predecessors for the LOOP instruction. + MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited); + if (loop) + return loop; + } + return 0; +} - if (!FBB) { - if (Cond.empty()) { - // Due to a bug in TailMerging/CFG Optimization, we need to add a - // special case handling of a predicated jump followed by an - // unconditional jump. If not, Tail Merging and CFG Optimization go - // into an infinite loop. - MachineBasicBlock *NewTBB, *NewFBB; - SmallVector Cond; - MachineInstr *Term = MBB.getFirstTerminator(); - if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, - false)) { - MachineBasicBlock *NextBB = - std::next(MachineFunction::iterator(&MBB)); - if (NewTBB == NextBB) { - ReverseBranchCondition(Cond); - RemoveBranch(MBB); - return InsertBranch(MBB, TBB, nullptr, Cond, DL); - } +unsigned HexagonInstrInfo::InsertBranch( + MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, DebugLoc DL) const { + + Opcode_t BOpc = Hexagon::J2_jump; + Opcode_t BccOpc = Hexagon::J2_jumpt; + + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + // Check if ReverseBranchCondition has asked to reverse this branch + // If we want to reverse the branch an odd number of times, we want + // J2_jumpf. + if (!Cond.empty() && Cond[0].isImm()) + BccOpc = Cond[0].getImm(); + + if (!FBB) { + if (Cond.empty()) { + // Due to a bug in TailMerging/CFG Optimization, we need to add a + // special case handling of a predicated jump followed by an + // unconditional jump. If not, Tail Merging and CFG Optimization go + // into an infinite loop. + MachineBasicBlock *NewTBB, *NewFBB; + SmallVector Cond; + MachineInstr *Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && isPredicated(Term) && + !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { + MachineBasicBlock *NextBB = + std::next(MachineFunction::iterator(&MBB)); + if (NewTBB == NextBB) { + ReverseBranchCondition(Cond); + RemoveBranch(MBB); + return InsertBranch(MBB, TBB, nullptr, Cond, DL); } - BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - } else { - // If Cond[0] is a basic block, insert ENDLOOP0. - if (Cond[0].isMBB()) - BuildMI(&MBB, DL, get(Hexagon::ENDLOOP0)).addMBB(Cond[0].getMBB()); - else - BuildMI(&MBB, DL, - get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); } - return 1; + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else if (isNewValueJump(Cond[0].getImm())) { + assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump"); + // New value jump + // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) + // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) + unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); + DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber();); + if (Cond[2].isReg()) { + unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addReg(Cond[2].getReg(), Flags2).addMBB(TBB); + } else if(Cond[2].isImm()) { + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addImm(Cond[2].getImm()).addMBB(TBB); + } else + llvm_unreachable("Invalid condition for branching"); + } else { + assert((Cond.size() == 2) && "Malformed cond vector"); + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); } - - // We don't handle ENDLOOP0 with a conditional branch in AnalyzeBranch. - BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); - BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); - return 2; -} - - + return 1; + } + assert((!Cond.empty()) && + "Cond. cannot be empty when multiple branchings are required"); + assert((!isNewValueJump(Cond[0].getImm())) && + "NV-jump cannot be inserted with another branch"); + // Special case for hardware loops. The condition is a basic block. + if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else { + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + + return 2; +} + + +/// This function can analyze one/two way branching only and should (mostly) be +/// called by target independent side. +/// First entry is always the opcode of the branching instruction, except when +/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a +/// BB with only unconditional jump. Subsequent entries depend upon the opcode, +/// e.g. Jump_c p will have +/// Cond[0] = Jump_c +/// Cond[1] = p +/// HW-loop ENDLOOP: +/// Cond[0] = ENDLOOP +/// Cond[1] = MBB +/// New value jump: +/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode +/// Cond[1] = R +/// Cond[2] = Imm +/// @note Related function is \fn findInstrPredicate which fills in +/// Cond. vector when a predicated instruction is passed to it. +/// We follow same protocol in that case too. +/// bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { TBB = nullptr; FBB = nullptr; + Cond.clear(); // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::instr_iterator I = MBB.instr_end(); @@ -202,6 +302,7 @@ do { --I; if (I->isEHLabel()) + // Don't analyze EH branches. return true; } while (I != MBB.instr_begin()); @@ -216,7 +317,7 @@ bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump && I->getOperand(0).isMBB(); - // Delete the JMP if it's equivalent to a fall-through. + // Delete the J2_jump if it's equivalent to a fall-through. if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); @@ -257,7 +358,7 @@ return true; bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); - bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode); + bool LastOpcodeHasNVJump = isNewValueJump(LastInst); // If there is only one terminator instruction, process it. if (LastInst && !SecondLastInst) { @@ -265,34 +366,54 @@ TBB = LastInst->getOperand(0).getMBB(); return false; } - if (LastOpcode == Hexagon::ENDLOOP0) { + if (isEndLoopN(LastOpcode)) { TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); return false; } if (LastOpcodeHasJMP_c) { TBB = LastInst->getOperand(1).getMBB(); - if (LastOpcodeHasNot) { - Cond.push_back(MachineOperand::CreateImm(0)); - } + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Only supporting rr/ri versions of new-value jumps. + if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) { + TBB = LastInst->getOperand(2).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); + Cond.push_back(LastInst->getOperand(1)); return false; } + DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber() + << " with one jump\n";); // Otherwise, don't know what this is. return true; } bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); - bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode); + bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst); if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { TBB = SecondLastInst->getOperand(1).getMBB(); - if (SecLastOpcodeHasNot) - Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } + // Only supporting rr/ri versions of new-value jumps. + if (SecLastOpcodeHasNVJump && + (SecondLastInst->getNumExplicitOperands() == 3) && + (LastOpcode == Hexagon::J2_jump)) { + TBB = SecondLastInst->getOperand(2).getMBB(); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); + Cond.push_back(SecondLastInst->getOperand(0)); + Cond.push_back(SecondLastInst->getOperand(1)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + // If the block ends with two Hexagon:JMPs, handle it. The second one is not // executed, so remove it. if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) { @@ -303,53 +424,40 @@ return false; } - // If the block ends with an ENDLOOP, and JMP, handle it. - if (SecLastOpcode == Hexagon::ENDLOOP0 && - LastOpcode == Hexagon::J2_jump) { + // If the block ends with an ENDLOOP, and J2_jump, handle it. + if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) { TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; } - + DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber() + << " with two jumps";); // Otherwise, can't handle this. return true; } - unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber()); MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - unsigned Opc1 = I->getOpcode(); - switch (Opc1) { - case Hexagon::J2_jump: - case Hexagon::J2_jumpt: - case Hexagon::J2_jumpf: - case Hexagon::ENDLOOP0: - I->eraseFromParent(); - break; - default: - return 0; - } - - I = MBB.end(); - - if (I == MBB.begin()) return 1; - --I; - unsigned Opc2 = I->getOpcode(); - switch (Opc2) { - case Hexagon::J2_jumpt: - case Hexagon::J2_jumpf: - case Hexagon::ENDLOOP0: - I->eraseFromParent(); - return 2; - default: - return 1; + unsigned Count = 0; + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + // Only removing branches from end of MBB. + if (!I->isBranch()) + return Count; + if (Count && (I->getOpcode() == Hexagon::J2_jump)) + llvm_unreachable("Malformed basic block: unconditional branch not last"); + MBB.erase(&MBB.back()); + I = MBB.end(); + ++Count; } + return Count; } - /// \brief For a comparison instruction, return the source registers in /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -361,31 +469,39 @@ // Set mask and the first source register. switch (Opc) { - case Hexagon::C2_cmpeqp: - case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: case Hexagon::C2_cmpgtp: - case Hexagon::C2_cmpgtup: - case Hexagon::C2_cmpgtui: case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtup: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtui: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpltei: SrcReg = MI->getOperand(1).getReg(); Mask = ~0; break; - case Hexagon::A4_cmpbeqi: case Hexagon::A4_cmpbeq: - case Hexagon::A4_cmpbgtui: - case Hexagon::A4_cmpbgtu: case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpbgtui: SrcReg = MI->getOperand(1).getReg(); Mask = 0xFF; break; - case Hexagon::A4_cmpheqi: case Hexagon::A4_cmpheq: - case Hexagon::A4_cmphgtui: - case Hexagon::A4_cmphgtu: case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: SrcReg = MI->getOperand(1).getReg(); Mask = 0xFFFF; break; @@ -393,27 +509,35 @@ // Set the value/second source register. switch (Opc) { - case Hexagon::C2_cmpeqp: case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgt: case Hexagon::C2_cmpgtp: - case Hexagon::C2_cmpgtup: case Hexagon::C2_cmpgtu: - case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtup: case Hexagon::A4_cmpbeq: - case Hexagon::A4_cmpbgtu: case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgtu: case Hexagon::A4_cmpheq: - case Hexagon::A4_cmphgtu: case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: SrcReg2 = MI->getOperand(2).getReg(); return true; case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpgtui: case Hexagon::C2_cmpgti: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpltei: case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgti: case Hexagon::A4_cmpbgtui: case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: case Hexagon::A4_cmphgtui: SrcReg2 = 0; Value = MI->getOperand(2).getImm(); @@ -731,6 +855,16 @@ return false; } +bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4; } @@ -881,148 +1015,50 @@ bool HexagonInstrInfo:: PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Cond) const { + if (Cond.empty() || isEndLoopN(Cond[0].getImm())) { + DEBUG(dbgs() << "\nCannot predicate:"; MI->dump();); + return false; + } int Opc = MI->getOpcode(); assert (isPredicable(MI) && "Expected predicable instruction"); - bool invertJump = (!Cond.empty() && Cond[0].isImm() && - (Cond[0].getImm() == 0)); - - // This will change MI's opcode to its predicate version. - // However, its operand list is still the old one, i.e. the - // non-predicate one. - MI->setDesc(get(getCondOpcode(Opc, invertJump))); - - int oper = -1; - unsigned int GAIdx = 0; - - // Indicates whether the current MI has a GlobalAddress operand - bool hasGAOpnd = false; - std::vector tmpOpnds; - - // Indicates whether we need to shift operands to right. - bool needShift = true; - - // The predicate is ALWAYS the FIRST input operand !!! - if (MI->getNumOperands() == 0) { - // The non-predicate version of MI does not take any operands, - // i.e. no outs and no ins. In this condition, the predicate - // operand will be directly placed at Operands[0]. No operand - // shift is needed. - // Example: BARRIER - needShift = false; - oper = -1; - } - else if ( MI->getOperand(MI->getNumOperands()-1).isReg() - && MI->getOperand(MI->getNumOperands()-1).isDef() - && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) { - // The non-predicate version of MI does not have any input operands. - // In this condition, we extend the length of Operands[] by one and - // copy the original last operand to the newly allocated slot. - // At this moment, it is just a place holder. Later, we will put - // predicate operand directly into it. No operand shift is needed. - // Example: r0=BARRIER (this is a faked insn used here for illustration) - MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); - needShift = false; - oper = MI->getNumOperands() - 2; - } - else { - // We need to right shift all input operands by one. Duplicate the - // last operand into the newly allocated slot. - MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); - } + bool invertJump = predOpcodeHasNot(Cond); - if (needShift) - { - // Operands[ MI->getNumOperands() - 2 ] has been copied into - // Operands[ MI->getNumOperands() - 1 ], so we start from - // Operands[ MI->getNumOperands() - 3 ]. - // oper is a signed int. - // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1. - for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) - { - MachineOperand &MO = MI->getOperand(oper); - - // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7] - // - // /\~ - // /||\~ - // || - // Predicate Operand here - if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) { - break; - } - if (MO.isReg()) { - MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), - MO.isImplicit(), MO.isKill(), - MO.isDead(), MO.isUndef(), - MO.isDebug()); - } - else if (MO.isImm()) { - MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); - } - else if (MO.isGlobal()) { - // MI can not have more than one GlobalAddress operand. - assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd"); - - // There is no member function called "ChangeToGlobalAddress" in the - // MachineOperand class (not like "ChangeToRegister" and - // "ChangeToImmediate"). So we have to remove them from Operands[] list - // first, and then add them back after we have inserted the predicate - // operand. tmpOpnds[] is to remember these operands before we remove - // them. - tmpOpnds.push_back(MO); - - // Operands[oper] is a GlobalAddress operand; - // Operands[oper+1] has been copied into Operands[oper+2]; - hasGAOpnd = true; - GAIdx = oper; - continue; - } - else { - llvm_unreachable("Unexpected operand type"); - } - } - } + // We have to predicate MI "in place", i.e. after this function returns, + // MI will need to be transformed into a predicated form. To avoid com- + // plicated manipulations with the operands (handling tied operands, + // etc.), build a new temporary instruction, then overwrite MI with it. - int regPos = invertJump ? 1 : 0; - MachineOperand PredMO = Cond[regPos]; + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned PredOpc = getCondOpcode(Opc, invertJump); + MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc)); + unsigned NOp = 0, NumOps = MI->getNumOperands(); + while (NOp < NumOps) { + MachineOperand &Op = MI->getOperand(NOp); + if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) + break; + T.addOperand(Op); + NOp++; + } - // [oper] now points to the last explicit Def. Predicate operand must be - // located at [oper+1]. See diagram above. - // This assumes that the predicate is always the first operand, - // i.e. Operands[0+numResults], in the set of inputs - // It is better to have an assert here to check this. But I don't know how - // to write this assert because findFirstPredOperandIdx() would return -1 - if (oper < -1) oper = -1; + unsigned PredReg, PredRegPos, PredRegFlags; + bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags); + assert(GotPredReg); + T.addReg(PredReg, PredRegFlags); + while (NOp < NumOps) + T.addOperand(MI->getOperand(NOp++)); - MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(), - PredMO.isImplicit(), false, - PredMO.isDead(), PredMO.isUndef(), - PredMO.isDebug()); + MI->setDesc(get(PredOpc)); + while (unsigned n = MI->getNumOperands()) + MI->RemoveOperand(n-1); + for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) + MI->addOperand(T->getOperand(i)); - MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo(); - RegInfo.clearKillFlags(PredMO.getReg()); + MachineBasicBlock::instr_iterator TI = &*T; + B.erase(TI); - if (hasGAOpnd) - { - unsigned int i; - - // Operands[GAIdx] is the original GlobalAddress operand, which is - // already copied into tmpOpnds[0]. - // Operands[GAIdx] now stores a copy of Operands[GAIdx-1] - // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2], - // so we start from [GAIdx+2] - for (i = GAIdx + 2; i < MI->getNumOperands(); ++i) - tmpOpnds.push_back(MI->getOperand(i)); - - // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ] - // It is very important that we always remove from the end of Operands[] - // MI->getNumOperands() is at least 2 if program goes to here. - for (i = MI->getNumOperands() - 1; i > GAIdx; --i) - MI->RemoveOperand(i); - - for (i = 0; i < tmpOpnds.size(); ++i) - MI->addOperand(tmpOpnds[i]); - } + MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); + MRI.clearKillFlags(PredReg); return true; } @@ -1135,17 +1171,20 @@ // // We indicate that we want to reverse the branch by -// inserting a 0 at the beginning of the Cond vector. +// inserting the reversed branching opcode. // -bool HexagonInstrInfo:: -ReverseBranchCondition(SmallVectorImpl &Cond) const { - if (!Cond.empty() && Cond[0].isMBB()) +bool HexagonInstrInfo::ReverseBranchCondition( + SmallVectorImpl &Cond) const { + if (Cond.empty()) return true; - if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { - Cond.erase(Cond.begin()); - } else { - Cond.insert(Cond.begin(), MachineOperand::CreateImm(0)); - } + assert(Cond[0].isImm() && "First entry in the cond vector not imm-val"); + Opcode_t opcode = Cond[0].getImm(); + //unsigned temp; + assert(get(opcode).isBranch() && "Should be a branching condition."); + if (isEndLoopN(opcode)) + return true; + Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode); + Cond[0].setImm(NewOpcode); return false; } @@ -1583,13 +1622,12 @@ return false; } -bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { - return (getAddrMode(MI) == HexagonII::PostInc); +bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const { + return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); } -bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; - return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { + return (getAddrMode(MI) == HexagonII::PostInc); } // Returns true, if any one of the operands is a dot new @@ -1944,8 +1982,36 @@ (Opcode == Hexagon::J2_jumpf); } -bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const { - return (Opcode == Hexagon::J2_jumpf) || - (Opcode == Hexagon::J2_jumpfnewpt) || - (Opcode == Hexagon::J2_jumpfnew); +bool HexagonInstrInfo::predOpcodeHasNot( + const SmallVectorImpl &Cond) const { + if (Cond.empty() || !isPredicated(Cond[0].getImm())) + return false; + return !isPredicatedTrue(Cond[0].getImm()); +} + +bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const { + return (Opcode == Hexagon::ENDLOOP0 || + Opcode == Hexagon::ENDLOOP1); } + +bool HexagonInstrInfo::getPredReg(const SmallVectorImpl &Cond, + unsigned &PredReg, unsigned &PredRegPos, + unsigned &PredRegFlags) const { + if (Cond.empty()) + return false; + assert(Cond.size() == 2); + if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) { + DEBUG(dbgs() << "No predregs for new-value jumps/endloop"); + return false; + } + PredReg = Cond[1].getReg(); + PredRegPos = 1; + // See IfConversion.cpp why we add RegState::Implicit | RegState::Undef + PredRegFlags = 0; + if (Cond[1].isImplicit()) + PredRegFlags = RegState::Implicit; + if (Cond[1].isUndef()) + PredRegFlags |= RegState::Undef; + return true; +} + Index: test/CodeGen/Hexagon/alu64.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/alu64.ll @@ -0,0 +1,599 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: @test00 +; CHECK: p0 = cmp.eq(r1:0, r3:2) +define i32 @test00(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.C2.cmpeqp(i64 %Rs, i64 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test01 +; CHECK: p0 = cmp.gt(r1:0, r3:2) +define i32 @test01(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.C2.cmpgtp(i64 %Rs, i64 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test02 +; CHECK: p0 = cmp.gtu(r1:0, r3:2) +define i32 @test02(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.C2.cmpgtup(i64 %Rs, i64 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test10 +; CHECK: r0 = cmp.eq(r0, r1) +define i32 @test10(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.rcmpeq(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test11 +; CHECK: r0 = !cmp.eq(r0, r1) +define i32 @test11(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.rcmpneq(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test12 +; CHECK: r0 = cmp.eq(r0, #23) +define i32 @test12(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.rcmpeqi(i32 %Rs, i32 23) + ret i32 %0 +} + +; CHECK-LABEL: @test13 +; CHECK: r0 = !cmp.eq(r0, #47) +define i32 @test13(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.rcmpneqi(i32 %Rs, i32 47) + ret i32 %0 +} + +; CHECK-LABEL: @test20 +; CHECK: p0 = cmpb.eq(r0, r1) +define i32 @test20(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmpbeq(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test21 +; CHECK: p0 = cmpb.gt(r0, r1) +define i32 @test21(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmpbgt(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test22 +; CHECK: p0 = cmpb.gtu(r0, r1) +define i32 @test22(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmpbgtu(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test23 +; CHECK: p0 = cmpb.eq(r0, #56) +define i32 @test23(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmpbeqi(i32 %Rs, i32 56) + ret i32 %0 +} + +; CHECK-LABEL: @test24 +; CHECK: p0 = cmpb.gt(r0, #29) +define i32 @test24(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmpbgti(i32 %Rs, i32 29) + ret i32 %0 +} + +; CHECK-LABEL: @test25 +; CHECK: p0 = cmpb.gtu(r0, #111) +define i32 @test25(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmpbgtui(i32 %Rs, i32 111) + ret i32 %0 +} + +; CHECK-LABEL: @test30 +; CHECK: p0 = cmph.eq(r0, r1) +define i32 @test30(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmpheq(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test31 +; CHECK: p0 = cmph.gt(r0, r1) +define i32 @test31(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmphgt(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test32 +; CHECK: p0 = cmph.gtu(r0, r1) +define i32 @test32(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmphgtu(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test33 +; CHECK: p0 = cmph.eq(r0, #-123) +define i32 @test33(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmpheqi(i32 %Rs, i32 -123) + ret i32 %0 +} + +; CHECK-LABEL: @test34 +; CHECK: p0 = cmph.gt(r0, #-3) +define i32 @test34(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmphgti(i32 %Rs, i32 -3) + ret i32 %0 +} + +; CHECK-LABEL: @test35 +; CHECK: p0 = cmph.gtu(r0, #13) +define i32 @test35(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.cmphgtui(i32 %Rs, i32 13) + ret i32 %0 +} + +; CHECK-LABEL: @test40 +; CHECK: r1:0 = vmux(p0, r3:2, r5:4) +define i64 @test40(i32 %Pu, i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.C2.vmux(i32 %Pu, i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test41 +; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2)) +define i32 @test41(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %Rs, i64 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test50 +; CHECK: r1:0 = add(r1:0, r3:2) +define i64 @test50(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A2.addp(i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test51 +; CHECK: r1:0 = add(r1:0, r3:2):sat +define i64 @test51(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A2.addpsat(i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test52 +; CHECK: r1:0 = sub(r1:0, r3:2) +define i64 @test52(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A2.subp(i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test53 +; CHECK: r1:0 = add(r0, r3:2) +define i64 @test53(i32 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A2.addsp(i32 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test54 +; CHECK: r1:0 = and(r1:0, r3:2) +define i64 @test54(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A2.andp(i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test55 +; CHECK: r1:0 = or(r1:0, r3:2) +define i64 @test55(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A2.orp(i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test56 +; CHECK: r1:0 = xor(r1:0, r3:2) +define i64 @test56(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A2.xorp(i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test57 +; CHECK: r1:0 = and(r1:0, ~r3:2) +define i64 @test57(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A4.andnp(i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test58 +; CHECK: r1:0 = or(r1:0, ~r3:2) +define i64 @test58(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A4.ornp(i64 %Rs, i64 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test60 +; CHECK: r0 = add(r0.l, r1.l) +define i32 @test60(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test61 +; CHECK: r0 = add(r0.l, r1.h) +define i32 @test61(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test62 +; CHECK: r0 = add(r0.l, r1.l):sat +define i32 @test62(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test63 +; CHECK: r0 = add(r0.l, r1.h):sat +define i32 @test63(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test64 +; CHECK: r0 = add(r0.l, r1.l):<<16 +define i32 @test64(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test65 +; CHECK: r0 = add(r0.l, r1.h):<<16 +define i32 @test65(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test66 +; CHECK: r0 = add(r0.h, r1.l):<<16 +define i32 @test66(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test67 +; CHECK: r0 = add(r0.h, r1.h):<<16 +define i32 @test67(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test68 +; CHECK: r0 = add(r0.l, r1.l):sat:<<16 +define i32 @test68(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test69 +; CHECK: r0 = add(r0.l, r1.h):sat:<<16 +define i32 @test69(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test6A +; CHECK: r0 = add(r0.h, r1.l):sat:<<16 +define i32 @test6A(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test6B +; CHECK: r0 = add(r0.h, r1.h):sat:<<16 +define i32 @test6B(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test70 +; CHECK: r0 = sub(r0.l, r1.l) +define i32 @test70(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test71 +; CHECK: r0 = sub(r0.l, r1.h) +define i32 @test71(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test72 +; CHECK: r0 = sub(r0.l, r1.l):sat +define i32 @test72(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test73 +; CHECK: r0 = sub(r0.l, r1.h):sat +define i32 @test73(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test74 +; CHECK: r0 = sub(r0.l, r1.l):<<16 +define i32 @test74(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test75 +; CHECK: r0 = sub(r0.l, r1.h):<<16 +define i32 @test75(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test76 +; CHECK: r0 = sub(r0.h, r1.l):<<16 +define i32 @test76(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test77 +; CHECK: r0 = sub(r0.h, r1.h):<<16 +define i32 @test77(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test78 +; CHECK: r0 = sub(r0.l, r1.l):sat:<<16 +define i32 @test78(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test79 +; CHECK: r0 = sub(r0.l, r1.h):sat:<<16 +define i32 @test79(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test7A +; CHECK: r0 = sub(r0.h, r1.l):sat:<<16 +define i32 @test7A(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test7B +; CHECK: r0 = sub(r0.h, r1.h):sat:<<16 +define i32 @test7B(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test90 +; CHECK: r0 = and(#1, asl(r0, #2)) +define i32 @test90(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.andi.asl.ri(i32 1, i32 %Rs, i32 2) + ret i32 %0 +} + +; CHECK-LABEL: @test91 +; CHECK: r0 = or(#1, asl(r0, #2)) +define i32 @test91(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.ori.asl.ri(i32 1, i32 %Rs, i32 2) + ret i32 %0 +} + +; CHECK-LABEL: @test92 +; CHECK: r0 = add(#1, asl(r0, #2)) +define i32 @test92(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.addi.asl.ri(i32 1, i32 %Rs, i32 2) + ret i32 %0 +} + +; CHECK-LABEL: @test93 +; CHECK: r0 = sub(#1, asl(r0, #2)) +define i32 @test93(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.subi.asl.ri(i32 1, i32 %Rs, i32 2) + ret i32 %0 +} + +; CHECK-LABEL: @test94 +; CHECK: r0 = and(#1, lsr(r0, #2)) +define i32 @test94(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 1, i32 %Rs, i32 2) + ret i32 %0 +} + +; CHECK-LABEL: @test95 +; CHECK: r0 = or(#1, lsr(r0, #2)) +define i32 @test95(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 1, i32 %Rs, i32 2) + ret i32 %0 +} + +; CHECK-LABEL: @test96 +; CHECK: r0 = add(#1, lsr(r0, #2)) +define i32 @test96(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 1, i32 %Rs, i32 2) + ret i32 %0 +} + +; CHECK-LABEL: @test97 +; CHECK: r0 = sub(#1, lsr(r0, #2)) +define i32 @test97(i32 %Rs) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 1, i32 %Rs, i32 2) + ret i32 %0 +} + +; CHECK-LABEL: @test100 +; CHECK: r1:0 = bitsplit(r0, r1) +define i64 @test100(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i64 @llvm.hexagon.A4.bitsplit(i32 %Rs, i32 %Rt) + ret i64 %0 +} + +; CHECK-LABEL: @test101 +; CHECK: r0 = modwrap(r0, r1) +define i32 @test101(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test102 +; CHECK: r0 = parity(r1:0, r3:2) +define i32 @test102(i64 %Rs, i64 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S2.parityp(i64 %Rs, i64 %Rt) + ret i32 %0 +} + +; CHECK-LABEL: @test103 +; CHECK: r0 = parity(r0, r1) +define i32 @test103(i32 %Rs, i32 %Rt) #0 { +entry: + %0 = tail call i32 @llvm.hexagon.S4.parity(i32 %Rs, i32 %Rt) + ret i32 %0 +} + +declare i32 @llvm.hexagon.C2.cmpeqp(i64, i64) #1 +declare i32 @llvm.hexagon.C2.cmpgtp(i64, i64) #1 +declare i32 @llvm.hexagon.C2.cmpgtup(i64, i64) #1 +declare i32 @llvm.hexagon.A4.rcmpeq(i32, i32) #1 +declare i32 @llvm.hexagon.A4.rcmpneq(i32, i32) #1 +declare i32 @llvm.hexagon.A4.rcmpeqi(i32, i32) #1 +declare i32 @llvm.hexagon.A4.rcmpneqi(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmpbeq(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmpbgt(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmpbgtu(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmpbeqi(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmpbgti(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmpbgtui(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmpheq(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmphgt(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmphgtu(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmpheqi(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmphgti(i32, i32) #1 +declare i32 @llvm.hexagon.A4.cmphgtui(i32, i32) #1 +declare i64 @llvm.hexagon.C2.vmux(i32, i64, i64) #1 +declare i32 @llvm.hexagon.A4.vcmpbeq.any(i64, i64) #1 +declare i64 @llvm.hexagon.A2.addp(i64, i64) #1 +declare i64 @llvm.hexagon.A2.addpsat(i64, i64) #1 +declare i64 @llvm.hexagon.A2.subp(i64, i64) #1 +declare i64 @llvm.hexagon.A2.addsp(i32, i64) #1 +declare i64 @llvm.hexagon.A2.andp(i64, i64) #1 +declare i64 @llvm.hexagon.A2.orp(i64, i64) #1 +declare i64 @llvm.hexagon.A2.xorp(i64, i64) #1 +declare i64 @llvm.hexagon.A4.ornp(i64, i64) #1 +declare i64 @llvm.hexagon.A4.andnp(i64, i64) #1 +declare i32 @llvm.hexagon.A2.addh.l16.ll(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.l16.hl(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.h16.ll(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.h16.lh(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.h16.hl(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.h16.hh(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32, i32) #1 +declare i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.l16.ll(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.l16.hl(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.h16.ll(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.h16.lh(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.h16.hl(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.h16.hh(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32, i32) #1 +declare i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32, i32) #1 +declare i64 @llvm.hexagon.A4.bitsplit(i32, i32) #1 +declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #1 +declare i32 @llvm.hexagon.S2.parityp(i64, i64) #1 +declare i32 @llvm.hexagon.S4.parity(i32, i32) #1 +declare i32 @llvm.hexagon.S4.andi.asl.ri(i32, i32, i32) #1 +declare i32 @llvm.hexagon.S4.ori.asl.ri(i32, i32, i32) #1 +declare i32 @llvm.hexagon.S4.addi.asl.ri(i32, i32, i32) #1 +declare i32 @llvm.hexagon.S4.subi.asl.ri(i32, i32, i32) #1 +declare i32 @llvm.hexagon.S4.andi.lsr.ri(i32, i32, i32) #1 +declare i32 @llvm.hexagon.S4.ori.lsr.ri(i32, i32, i32) #1 +declare i32 @llvm.hexagon.S4.addi.lsr.ri(i32, i32, i32) #1 +declare i32 @llvm.hexagon.S4.subi.lsr.ri(i32, i32, i32) #1 + +attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } Index: test/CodeGen/Hexagon/remove-endloop.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/remove-endloop.ll @@ -0,0 +1,56 @@ +; RUN: llc -march=hexagon -O2 < %s | FileCheck %s + +define void @foo(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind optsize { +entry: + %cmp = icmp sgt i32 %n, 100 + br i1 %cmp, label %for.body.preheader, label %for.cond4.preheader + +; CHECK: endloop0 +; CHECK: endloop0 +; CHECK-NOT: endloop0 + +for.body.preheader: + br label %for.body + +for.cond4.preheader: + %cmp113 = icmp sgt i32 %n, 0 + br i1 %cmp113, label %for.body7.preheader, label %if.end + +for.body7.preheader: + br label %for.body7 + +for.body: + %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %B, %for.body.preheader ] + %arrayidx3.phi = phi i32* [ %arrayidx3.inc, %for.body ], [ %A, %for.body.preheader ] + %i.014 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %0 = load i32, i32* %arrayidx.phi, align 4 + %sub = add nsw i32 %0, -1 + store i32 %sub, i32* %arrayidx3.phi, align 4 + %inc = add nsw i32 %i.014, 1 + %exitcond = icmp eq i32 %inc, %n + %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1 + %arrayidx3.inc = getelementptr i32, i32* %arrayidx3.phi, i32 1 + br i1 %exitcond, label %if.end.loopexit, label %for.body + +for.body7: + %arrayidx8.phi = phi i32* [ %arrayidx8.inc, %for.body7 ], [ %B, %for.body7.preheader ] + %arrayidx9.phi = phi i32* [ %arrayidx9.inc, %for.body7 ], [ %A, %for.body7.preheader ] + %i.117 = phi i32 [ %inc11, %for.body7 ], [ 0, %for.body7.preheader ] + %1 = load i32, i32* %arrayidx8.phi, align 4 + %add = add nsw i32 %1, 1 + store i32 %add, i32* %arrayidx9.phi, align 4 + %inc11 = add nsw i32 %i.117, 1 + %exitcond18 = icmp eq i32 %inc11, %n + %arrayidx8.inc = getelementptr i32, i32* %arrayidx8.phi, i32 1 + %arrayidx9.inc = getelementptr i32, i32* %arrayidx9.phi, i32 1 + br i1 %exitcond18, label %if.end.loopexit21, label %for.body7 + +if.end.loopexit: + br label %if.end + +if.end.loopexit21: + br label %if.end + +if.end: + ret void +}