diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -557,6 +557,57 @@ return true; } +static unsigned operandIndex(const MachineInstr &Def, Register VReg) { + unsigned Idx = 0; + for (const MachineOperand &MO : Def.defs()) { + if (MO.getReg() == VReg) + return Idx; + Idx++; + } + llvm_unreachable("matching operand not found"); +} + +/// \return true iff the two registers are known to have the same value. +static bool hasSameValue(const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, Register TReg, + Register FReg) { + if (TReg == FReg) + return true; + + if (!TReg.isVirtual() || !FReg.isVirtual()) + return false; + + const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg); + const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg); + if (!TDef || !FDef) + return false; + + // If there are side-effects, all bets are off. + if (TDef->hasUnmodeledSideEffects()) + return false; + + // If the instruction could modify memory, or there may be some intervening + // store between the two, we can't consider them to be equal. + if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr)) + return false; + + // We also can't guarantee that they are the same if, for example, the + // instructions are both a copy from a physical reg, because some other + // instruction may have modified the value in that reg between the two + // defining insts. + if (any_of(TDef->uses(), [](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); + })) + return false; + + // Check whether the two defining instructions produce the same value(s). + if (!TII->produceSameValue(*TDef, *FDef, &MRI)) + return false; + + // Further, check that the two defs come from corresponding operands. + return operandIndex(*TDef, TReg) == operandIndex(*FDef, FReg); +} + /// replacePHIInstrs - Completely replace PHI instructions with selects. /// This is possible when the only Tail predecessors are the if-converted /// blocks. @@ -571,7 +622,15 @@ PHIInfo &PI = PHIs[i]; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); Register DstReg = PI.PHI->getOperand(0).getReg(); - TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { + // We do not need the select instruction if both incoming values are + // equal, but we do need a COPY. + BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(PI.TReg); + } else { + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, + PI.FReg); + } LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); PI.PHI = nullptr; @@ -592,7 +651,7 @@ unsigned DstReg = 0; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - if (PI.TReg == PI.FReg) { + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { // We do not need the select instruction if both incoming values are // equal. DstReg = PI.TReg; diff --git a/llvm/test/CodeGen/AArch64/early-ifcvt-same-value.mir b/llvm/test/CodeGen/AArch64/early-ifcvt-same-value.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/early-ifcvt-same-value.mir @@ -0,0 +1,250 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=early-ifcvt -stress-early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: fmov0 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: fmov0 + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[FMOVS0_]] + ; CHECK: $s0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + %0:fpr32 = FMOVS0 + B %bb.3 + + bb.2: + successors: %bb.3 + + %1:fpr32 = FMOVS0 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: fmov0_extrapred +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: fmov0_extrapred + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.4 + ; CHECK: bb.1: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF + ; CHECK: B %bb.4 + ; CHECK: bb.4: + ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.0, [[DEF]], %bb.1 + ; CHECK: $s0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.4: + successors: %bb.3 + + ; Make sure we also handle the case when there are extra predecessors on + ; the tail block. + %3:fpr32 = IMPLICIT_DEF + B %bb.3 + + bb.1: + successors: %bb.3 + + %0:fpr32 = FMOVS0 + B %bb.3 + + bb.2: + successors: %bb.3 + + %1:fpr32 = FMOVS0 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1, %3, %bb.4 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: copy_physreg +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } + - { id: 9, class: fpr32, preferred-register: '' } + - { id: 10, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: copy_physreg + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF implicit-def $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[DEF1:%[0-9]+]]:fpr32 = IMPLICIT_DEF implicit-def $s1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY2]], [[COPY3]], 1, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + %9:fpr32 = IMPLICIT_DEF implicit-def $s1 + %0:fpr32 = COPY $s1 + B %bb.3 + + bb.2: + successors: %bb.3 + + %10:fpr32 = IMPLICIT_DEF implicit-def $s1 + %1:fpr32 = COPY $s1 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: same_def_different_operand +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: gpr64common, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64, preferred-register: '' } + - { id: 11, class: gpr64common, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } + - { reg: '$x2', virtual-reg: '%9' } +body: | + ; CHECK-LABEL: name: same_def_different_operand + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: early-clobber %11:gpr64common, %10:gpr64 = LDRXpre [[COPY]], 16 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv + ; CHECK: [[CSELXr:%[0-9]+]]:gpr64common = CSELXr %11, %10, 1, implicit $nzcv + ; CHECK: $x2 = COPY [[CSELXr]] + ; CHECK: RET_ReallyLR implicit $x2 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0, $x2 + + %9:gpr64common = COPY $x0 + early-clobber %11:gpr64common, %10:gpr64 = LDRXpre %9:gpr64common, 16 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + B %bb.3 + + bb.2: + successors: %bb.3 + + B %bb.3 + + bb.3: + %2:gpr64common = PHI %11, %bb.2, %10, %bb.1 + $x2 = COPY %2 + RET_ReallyLR implicit $x2 + +...