diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -114,17 +114,20 @@ const InstructionMapping & getSameKindOfOperandsMapping(const MachineInstr &MI) const; - /// Returns true if the output of \p MI must be stored on a FPR register. + /// Maximum recursion depth for hasFPConstraints. + const unsigned MaxFPRSearchDepth = 2; + + /// \returns true if \p MI only uses and defines FPRs. bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const; + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; - /// Returns true if the source registers of \p MI must all be FPRs. + /// \returns true if \p MI only uses FPRs. bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const; + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; - /// Returns true if the destination register of \p MI must be a FPR. + /// \returns true if \p MI only defines FPRs. bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const; + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; public: AArch64RegisterBankInfo(const TargetRegisterInfo &TRI); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -466,9 +466,10 @@ getValueMapping(RBIdx, Size), NumOperands); } -bool AArch64RegisterBankInfo::hasFPConstraints( - const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const { +bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { unsigned Op = MI.getOpcode(); // Do we have an explicit floating point instruction? @@ -480,14 +481,30 @@ if (Op != TargetOpcode::COPY && !MI.isPHI()) return false; - // MI is copy-like. Return true if it outputs an FPR. - return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) == - &AArch64::FPRRegBank; + // Check if we already know the register bank. + auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); + if (RB == &AArch64::FPRRegBank) + return true; + if (RB == &AArch64::GPRRegBank) + return false; + + // We don't know anything. + // + // If we have a phi, we may be able to infer that it will be assigned a FPR + // based off of its inputs. + if (!MI.isPHI() || Depth > MaxFPRSearchDepth) + return false; + + return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { + return Op.isReg() && + onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); + }); } bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const { + const TargetRegisterInfo &TRI, + unsigned Depth) const { switch (MI.getOpcode()) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: @@ -496,12 +513,13 @@ default: break; } - return hasFPConstraints(MI, MRI, TRI); + return hasFPConstraints(MI, MRI, TRI, Depth); } -bool AArch64RegisterBankInfo::onlyDefinesFP( - const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) const { +bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { switch (MI.getOpcode()) { case AArch64::G_DUP: case TargetOpcode::G_SITOFP: @@ -512,7 +530,7 @@ default: break; } - return hasFPConstraints(MI, MRI, TRI); + return hasFPConstraints(MI, MRI, TRI, Depth); } const RegisterBankInfo::InstructionMapping & diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir @@ -101,3 +101,263 @@ %4:_(s32) = G_SITOFP %2 %6:_(s32) = G_SELECT %1(s1), %3, %4 %8:_(s32) = G_FPTOSI %6 + +... +--- +name: load_used_by_phi_fpr +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: load_used_by_phi_fpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $x0, $s0, $s1, $w0, $w1 + ; CHECK: %cond_wide:gpr(s32) = COPY $w0 + ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + ; CHECK: %fpr_copy:fpr(s32) = COPY $s0 + ; CHECK: %ptr:gpr(p0) = COPY $x0 + ; CHECK: G_BRCOND %cond(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load 4) + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %load(s32), %bb.1 + ; CHECK: $s0 = COPY %phi(s32) + ; CHECK: RET_ReallyLR implicit $s0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $x0, $s0, $s1, $w0, $w1 + %cond_wide:_(s32) = COPY $w0 + %cond:_(s1) = G_TRUNC %cond_wide(s32) + %fpr_copy:_(s32) = COPY $s0 + %ptr:_(p0) = COPY $x0 + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.2 + bb.1: + successors: %bb.2 + %load:_(s32) = G_LOAD %ptr(p0) :: (load 4) + G_BR %bb.2 + bb.2: + %phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %load(s32), %bb.1 + $s0 = COPY %phi(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: load_used_by_phi_gpr +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: load_used_by_phi_gpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $x0, $s0, $s1, $w0, $w1 + ; CHECK: %cond_wide:gpr(s32) = COPY $w0 + ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + ; CHECK: %gpr_copy:gpr(s32) = COPY $w1 + ; CHECK: %ptr:gpr(p0) = COPY $x0 + ; CHECK: G_BRCOND %cond(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1 + ; CHECK: $s0 = COPY %phi(s32) + ; CHECK: RET_ReallyLR implicit $s0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $x0, $s0, $s1, $w0, $w1 + %cond_wide:_(s32) = COPY $w0 + %cond:_(s1) = G_TRUNC %cond_wide(s32) + %gpr_copy:_(s32) = COPY $w1 + %ptr:_(p0) = COPY $x0 + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.2 + bb.1: + successors: %bb.2 + %load:_(s32) = G_LOAD %ptr(p0) :: (load 4) + G_BR %bb.2 + bb.2: + %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1 + $s0 = COPY %phi(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: select_used_by_phi_fpr +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: select_used_by_phi_fpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cond_wide:gpr(s32) = COPY $w0 + ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + ; CHECK: %fpr_copy:fpr(s32) = COPY $s0 + ; CHECK: %gpr_copy:gpr(s32) = COPY $w1 + ; CHECK: G_BRCOND %cond(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %gpr_copy(s32) + ; CHECK: %select:fpr(s32) = G_SELECT %cond(s1), %fpr_copy, [[COPY]] + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %select(s32), %bb.1 + ; CHECK: $w0 = COPY %phi(s32) + ; CHECK: RET_ReallyLR implicit $w0 + ; The G_SELECT and G_PHI should end up with the same register bank. + ; + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $s0, $s1, $w0, $w1 + %cond_wide:_(s32) = COPY $w0 + %cond:_(s1) = G_TRUNC %cond_wide(s32) + %fpr_copy:_(s32) = COPY $s0 + %gpr_copy:_(s32) = COPY $w1 + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.2 + bb.1: + successors: %bb.2 + %select:_(s32) = G_SELECT %cond(s1), %fpr_copy, %gpr_copy + G_BR %bb.2 + bb.2: + %phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %select(s32), %bb.1 + $w0 = COPY %phi(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: select_used_by_phi_gpr +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: select_used_by_phi_gpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $s0, $s1, $w0, $w1 + ; CHECK: %cond_wide:gpr(s32) = COPY $w0 + ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + ; CHECK: %fpr_copy:fpr(s32) = COPY $s0 + ; CHECK: %gpr_copy:gpr(s32) = COPY $w1 + ; CHECK: G_BRCOND %cond(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %fpr_copy(s32) + ; CHECK: %select:gpr(s32) = G_SELECT %cond(s1), [[COPY]], %gpr_copy + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %select(s32), %bb.1 + ; CHECK: $s0 = COPY %phi(s32) + ; CHECK: RET_ReallyLR implicit $s0 + ; The G_SELECT and G_PHI should end up with the same register bank. + ; + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $s0, $s1, $w0, $w1 + %cond_wide:_(s32) = COPY $w0 + %cond:_(s1) = G_TRUNC %cond_wide(s32) + %fpr_copy:_(s32) = COPY $s0 + %gpr_copy:_(s32) = COPY $w1 + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.2 + bb.1: + successors: %bb.2 + %select:_(s32) = G_SELECT %cond(s1), %fpr_copy, %gpr_copy + G_BR %bb.2 + bb.2: + %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %select(s32), %bb.1 + $s0 = COPY %phi(s32) + RET_ReallyLR implicit $s0 + + +... +--- +name: unmerge_used_by_phi_fpr +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: unmerge_used_by_phi_fpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $x0, $s0, $s1, $w0, $w1 + ; CHECK: %cond_wide:gpr(s32) = COPY $w0 + ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + ; CHECK: %fpr_copy:fpr(s32) = COPY $s0 + ; CHECK: %unmerge_src:gpr(s64) = COPY $x0 + ; CHECK: G_BRCOND %cond(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY %unmerge_src(s64) + ; CHECK: %unmerge_1:fpr(s32), %unmerge_2:fpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1 + ; CHECK: $s0 = COPY %phi(s32) + ; CHECK: RET_ReallyLR implicit $s0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $x0, $s0, $s1, $w0, $w1 + %cond_wide:_(s32) = COPY $w0 + %cond:_(s1) = G_TRUNC %cond_wide(s32) + %fpr_copy:_(s32) = COPY $s0 + %unmerge_src:_(s64) = COPY $x0 + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.2 + bb.1: + successors: %bb.2 + %unmerge_1:_(s32), %unmerge_2:_(s32) = G_UNMERGE_VALUES %unmerge_src(s64) + G_BR %bb.2 + bb.2: + %phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1 + $s0 = COPY %phi(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: unmerge_used_by_phi_gpr +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: unmerge_used_by_phi_gpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $x0, $s0, $s1, $w0, $w1 + ; CHECK: %cond_wide:gpr(s32) = COPY $w0 + ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + ; CHECK: %gpr_copy:gpr(s32) = COPY $w1 + ; CHECK: %unmerge_src:gpr(s64) = COPY $x0 + ; CHECK: G_BRCOND %cond(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: %unmerge_1:gpr(s32), %unmerge_2:gpr(s32) = G_UNMERGE_VALUES %unmerge_src(s64) + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1 + ; CHECK: $s0 = COPY %phi(s32) + ; CHECK: RET_ReallyLR implicit $s0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $x0, $s0, $s1, $w0, $w1 + %cond_wide:_(s32) = COPY $w0 + %cond:_(s1) = G_TRUNC %cond_wide(s32) + %gpr_copy:_(s32) = COPY $w1 + %unmerge_src:_(s64) = COPY $x0 + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.2 + bb.1: + successors: %bb.2 + %unmerge_1:_(s32), %unmerge_2:_(s32) = G_UNMERGE_VALUES %unmerge_src(s64) + G_BR %bb.2 + bb.2: + %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1 + $s0 = COPY %phi(s32) + RET_ReallyLR implicit $s0