diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -157,7 +157,7 @@ SmallVector, 16> BackTrace; // For each opcode, keep a list of potential CSE instructions. - DenseMap> CSEMap; + DenseMap> CSEMap; enum { SpeculateFalse = 0, @@ -259,13 +259,12 @@ MachineInstr *ExtractHoistableLoad(MachineInstr *MI); - const MachineInstr * - LookForDuplicate(const MachineInstr *MI, - std::vector &PrevMIs); + MachineInstr *LookForDuplicate(const MachineInstr *MI, + std::vector &PrevMIs); bool EliminateCSE( MachineInstr *MI, - DenseMap>::iterator &CI); + DenseMap>::iterator &CI); bool MayCSE(MachineInstr *MI); @@ -1405,10 +1404,10 @@ /// Find an instruction amount PrevMIs that is a duplicate of MI. /// Return this instruction if it's found. -const MachineInstr* +MachineInstr* MachineLICMBase::LookForDuplicate(const MachineInstr *MI, - std::vector &PrevMIs) { - for (const MachineInstr *PrevMI : PrevMIs) + std::vector &PrevMIs) { + for (MachineInstr *PrevMI : PrevMIs) if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; @@ -1420,13 +1419,13 @@ /// the existing instruction rather than hoisting the instruction to the /// preheader. bool MachineLICMBase::EliminateCSE(MachineInstr *MI, - DenseMap>::iterator &CI) { + DenseMap>::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. if (CI == CSEMap.end() || MI->isImplicitDef()) return false; - if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { + if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); // Replace virtual registers defined by MI by their counterparts defined @@ -1466,6 +1465,8 @@ Register DupReg = Dup->getOperand(Idx).getReg(); MRI->replaceRegWith(Reg, DupReg); MRI->clearKillFlags(DupReg); + // Clear Dup dead flag if any, we reuse it for Reg. + Dup->getOperand(Idx).setIsDead(false); } MI->eraseFromParent(); @@ -1479,7 +1480,7 @@ /// the loop. bool MachineLICMBase::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); - DenseMap>::iterator + DenseMap>::iterator CI = CSEMap.find(Opcode); // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. @@ -1534,7 +1535,7 @@ // Look for opportunity to CSE the hoisted instruction. unsigned Opcode = MI->getOpcode(); - DenseMap>::iterator + DenseMap>::iterator CI = CSEMap.find(Opcode); if (!EliminateCSE(MI, CI)) { // Otherwise, splice the instruction to the preheader. diff --git a/llvm/test/CodeGen/PowerPC/machinelicm-cse-dead-flag.mir b/llvm/test/CodeGen/PowerPC/machinelicm-cse-dead-flag.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/machinelicm-cse-dead-flag.mir @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass early-machinelicm -mtriple=powerpc64le-unknown-linux-gnu \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s +--- +name: deadFlagAfterCSE +# This case tests that after the dead %3 is CSE-ed with hoisted %5 in MachineLICM +# pass, the dead flag will be cleared. +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: deadFlagAfterCSE + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $x3, $x4 + ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x3 + ; CHECK: [[COPY1:%[0-9]+]]:g8rc = COPY $x4 + ; CHECK: [[ADD8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADD8 [[COPY]], [[COPY1]] + ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[ADD8_]], 100 + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[PHI:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADD8_]], %bb.0, %5, %bb.1 + ; CHECK: STDX [[PHI]], [[ADDI8_]], [[ADD8_]] + ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = nuw ADDI8 [[PHI]], 1 + ; CHECK: B %bb.1 + ; CHECK: bb.2: + ; CHECK: BLR8 implicit $lr8, implicit $rm + bb.0: + liveins: $x3, $x4 + %0:g8rc = COPY $x3 + %1:g8rc = COPY $x4 + %2:g8rc_and_g8rc_nox0 = ADD8 %0, %1 + dead %3:g8rc_and_g8rc_nox0 = ADDI8 %2, 100 + B %bb.1 + + bb.1: + %4:g8rc_and_g8rc_nox0 = PHI %2, %bb.0, %6, %bb.1 + %5:g8rc_and_g8rc_nox0 = ADDI8 %2, 100 + STDX %4, %5, %2 + %6:g8rc = nuw ADDI8 %4, 1 + B %bb.1 + + bb.2: + BLR8 implicit $lr8, implicit $rm +... +