Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1311,12 +1311,6 @@ E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I) RemovePredicate(&*I); - // Check if the instruction defining vpr is a vcmp so it can be combined - // with the VPST This should be the divergent instruction - MachineInstr *VCMP = VCMPOpcodeToVPT(Divergent->MI->getOpcode()) != 0 - ? Divergent->MI - : nullptr; - unsigned Size = 0; auto E = MachineBasicBlock::reverse_iterator(Divergent->MI); auto I = MachineBasicBlock::reverse_iterator(Insts.back().MI); @@ -1329,6 +1323,25 @@ MachineInstrBuilder MIB; LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen()); + + // Check if the instruction defining vpr is a vcmp so it can be combined + // with the VPST. This should be the divergent instruction + MachineInstr *VCMP = VCMPOpcodeToVPT(Divergent->MI->getOpcode()) != 0 + ? Divergent->MI + : nullptr; + if (VCMP) { + MachineOperand Op1 = VCMP->getOperand(1); + MachineOperand Op2 = VCMP->getOperand(2); + MachineInstr *Op1Def = Op1.isReg() ? RDA->getUniqueReachingMIDef(VCMP, Op1.getReg()) : nullptr; + MachineInstr *Op2Def = Op2.isReg() ? RDA->getUniqueReachingMIDef(VCMP, Op2.getReg()) : nullptr; + + // Make sure that the VCMP operands will have the same value at the insertion point + if (Op1.isReg() && RDA->getUniqueReachingMIDef(InsertAt, Op1.getReg()) != Op1Def) + VCMP = nullptr; + else if (Op2.isReg() && RDA->getUniqueReachingMIDef(InsertAt, Op2.getReg()) != Op2Def) + VCMP = nullptr; + } + if (VCMP) { // Combine the VPST and VCMP into a VPT MIB =