Index: llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -340,7 +340,7 @@ return ClauseFile(MIb, std::move(ClauseContent)); } - void getLiteral(MachineInstr *MI, std::vector &Lits) const { + void getLiteral(MachineInstr *MI, std::vector &Lits) const { static const unsigned LiteralRegs[] = { AMDGPU::ALU_LITERAL_X, AMDGPU::ALU_LITERAL_Y, @@ -349,19 +349,28 @@ }; const SmallVector, 3 > Srcs = TII->getSrcs(MI); - for (unsigned i = 0, e = Srcs.size(); i < e; ++i) { - if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X) + for (const auto &Src:Srcs) { + if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X) continue; - int64_t Imm = Srcs[i].second; - std::vector::iterator It = - std::find(Lits.begin(), Lits.end(), Imm); + int64_t Imm = Src.second; + std::vector::iterator It = + std::find_if(Lits.begin(), Lits.end(), + [&](MachineOperand* val) + { return val->isImm() && (val->getImm() == Imm);}); + + // Get corresponding Operand + MachineOperand &Operand = MI->getOperand( + TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)); + if (It != Lits.end()) { + // Reuse existing literal reg unsigned Index = It - Lits.begin(); - Srcs[i].first->setReg(LiteralRegs[Index]); + Src.first->setReg(LiteralRegs[Index]); } else { + // Allocate new literal reg assert(Lits.size() < 4 && "Too many literals in Instruction Group"); - Srcs[i].first->setReg(LiteralRegs[Lits.size()]); - Lits.push_back(Imm); + Src.first->setReg(LiteralRegs[Lits.size()]); + Lits.push_back(&Operand); } } } @@ -394,14 +403,13 @@ } if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) break; - std::vector Literals; + std::vectorLiterals; if (I->isBundle()) { MachineInstr *DeleteMI = I; MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); while (++BI != E && BI->isBundledWithPred()) { BI->unbundleFromPred(); - for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = BI->getOperand(i); + for (MachineOperand &MO : BI->operands()) { if (MO.isReg() && MO.isInternalRead()) MO.setIsInternalRead(false); } @@ -415,13 +423,22 @@ ClauseContent.push_back(I); I++; } - for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { - unsigned literal0 = Literals[i]; - unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0; - MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(), - TII->get(AMDGPU::LITERALS)) - .addImm(literal0) - .addImm(literal2); + for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { + MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), + TII->get(AMDGPU::LITERALS)); + if (Literals[i]->isImm()) { + MILit.addImm(Literals[i]->getImm()); + } else { + MILit.addImm(0); + } + if (i + 1 < e) { + if (Literals[i + 1]->isImm()) { + MILit.addImm(Literals[i + 1]->getImm()); + } else { + MILit.addImm(0); + } + } else + MILit.addImm(0); ClauseContent.push_back(MILit); } }