diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -557,6 +557,24 @@ return true; } +/// \return true iff the two registers are known to have the same value. +static bool hasSameValue(const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, + Register TReg, Register FReg) { + if (TReg == FReg) + return true; + + const MachineInstr *TDef = TReg.isVirtual() ? MRI.getVRegDef(TReg) : nullptr; + const MachineInstr *FDef = FReg.isVirtual() ? MRI.getVRegDef(FReg) : nullptr; + if (TDef && TDef == FDef) + return true; + + if (TDef && FDef) + return TII->produceSameValue(*TDef, *FDef, &MRI); + + return false; +} + /// replacePHIInstrs - Completely replace PHI instructions with selects. /// This is possible when the only Tail predecessors are the if-converted /// blocks. @@ -571,7 +589,14 @@ PHIInfo &PI = PHIs[i]; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); Register DstReg = PI.PHI->getOperand(0).getReg(); - TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { + // We do not need the select instruction if both incoming values are + // equal, but we do need a COPY. + BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(PI.TReg); + } else { + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + } LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); PI.PHI = nullptr; @@ -592,7 +617,7 @@ unsigned DstReg = 0; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - if (PI.TReg == PI.FReg) { + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { // We do not need the select instruction if both incoming values are // equal. DstReg = PI.TReg; diff --git a/llvm/test/CodeGen/AArch64/early-ifcvt-same-value.mir b/llvm/test/CodeGen/AArch64/early-ifcvt-same-value.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/early-ifcvt-same-value.mir @@ -0,0 +1,128 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -mcpu=apple-a7 -run-pass=early-ifcvt -stress-early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: fmov0_diamond1 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: fmov0_diamond1 + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[FMOVS0_]] + ; CHECK: $s0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + %0:fpr32 = FMOVS0 + B %bb.3 + + bb.2: + successors: %bb.3 + + %1:fpr32 = FMOVS0 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: fmov0_diamond2 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: fmov0_diamond2 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.4 + ; CHECK: bb.1: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF + ; CHECK: B %bb.4 + ; CHECK: bb.4: + ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.0, [[DEF]], %bb.1 + ; CHECK: $s0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.4: + successors: %bb.3 + + ; Make sure we also handle the case when there are extra predecessors on + ; the tail block. + %3:fpr32 = IMPLICIT_DEF + B %bb.3 + + bb.1: + successors: %bb.3 + + %0:fpr32 = FMOVS0 + B %bb.3 + + bb.2: + successors: %bb.3 + + %1:fpr32 = FMOVS0 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1, %3, %bb.4 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +...