Index: llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h +++ llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h @@ -117,6 +117,14 @@ bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const; + /// Returns true if the source registers of \p MI must all be FPRs. + bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + /// Returns true if the destination register of \p MI must be a FPR. + bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + public: AArch64RegisterBankInfo(const TargetRegisterInfo &TRI); Index: llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -476,6 +476,35 @@ &AArch64::FPRRegBank; } +bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FCMP: + return true; + default: + break; + } + return hasFPConstraints(MI, MRI, TRI); +} + +bool AArch64RegisterBankInfo::onlyDefinesFP( + const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_INSERT_VECTOR_ELT: + return true; + default: + break; + } + return hasFPConstraints(MI, MRI, TRI); +} + const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); @@ -642,7 +671,7 @@ // assume this was a floating point load in the IR. // If it was not, we would have had a bitcast before // reaching that instruction. - if (hasFPConstraints(UseMI, MRI, TRI)) { + if (onlyUsesFP(UseMI, MRI, TRI)) { OpRegBankIdx[0] = PMI_FirstFPR; break; } @@ -655,7 +684,7 @@ if (!VReg) break; MachineInstr *DefMI = MRI.getVRegDef(VReg); - if (hasFPConstraints(*DefMI, MRI, TRI)) + if (onlyDefinesFP(*DefMI, MRI, TRI)) OpRegBankIdx[0] = PMI_FirstFPR; break; } @@ -687,7 +716,7 @@ // fpr = G_FOO %z ... if (any_of( MRI.use_instructions(MI.getOperand(0).getReg()), - [&](MachineInstr &MI) { return hasFPConstraints(MI, MRI, TRI); })) + [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) ++NumFP; // Check if the defs of the source values always produce floating point @@ -707,7 +736,7 @@ unsigned VReg = MI.getOperand(Idx).getReg(); MachineInstr *DefMI = MRI.getVRegDef(VReg); if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || - hasFPConstraints(*DefMI, MRI, TRI)) + onlyDefinesFP(*DefMI, MRI, TRI)) ++NumFP; } @@ -729,9 +758,8 @@ // UNMERGE into scalars from a vector should always use FPR. // Likewise if any of the uses are FP instructions. if (SrcTy.isVector() || - any_of( - MRI.use_instructions(MI.getOperand(0).getReg()), - [&](MachineInstr &MI) { return hasFPConstraints(MI, MRI, TRI); })) { + any_of(MRI.use_instructions(MI.getOperand(0).getReg()), + [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { // Set the register bank of every operand to FPR. for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); Idx < NumOperands; ++Idx) Index: llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect %s -o - | FileCheck %s + +# Check that we correctly assign register banks based off of instructions which +# only use or only define FPRs. +# +# For example, G_SITOFP takes in a GPR, but only ever produces values on FPRs. +# Some instructions can have inputs/outputs on either FPRs or GPRs. If one of +# those instructions takes in the result of a G_SITOFP as a source, we should +# put that source on a FPR. +# +# Similarly, G_FPTOSI can only take in a value on a FPR. So, if the result of +# an instruction is consumed by a G_FPTOSI, we should put the instruction on +# FPRs. + +--- +name: load_only_uses_fp +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: load_only_uses_fp + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(uno), [[C]](s32), [[LOAD]] + ; CHECK: $w0 = COPY [[FCMP]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s32) = G_FCONSTANT float 2.0 + %2:_(s32) = G_LOAD %0 :: (load 4) + %3:_(s32) = G_FCMP floatpred(uno), %1, %2 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 +... +--- +name: unmerge_only_uses_fp + +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: unmerge_only_uses_fp + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY [[COPY]](s64) + ; CHECK: [[UV:%[0-9]+]]:fpr(s32), [[UV1:%[0-9]+]]:fpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(uno), [[UV]](s32), [[UV1]] + ; CHECK: $w0 = COPY [[FCMP]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(s64) + %3:_(s32) = G_FCMP floatpred(uno), %1, %2 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: store_defined_by_fp +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $w1 + ; CHECK-LABEL: name: store_defined_by_fp + ; CHECK: liveins: $x0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1 + ; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32) + ; CHECK: G_STORE [[SITOFP]](s32), [[COPY]](p0) :: (store 4) + %0:_(p0) = COPY $x0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_SITOFP %1 + G_STORE %2, %0 :: (store 4) + +... +--- +name: select_defined_by_fp_using_fp +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; CHECK-LABEL: name: select_defined_by_fp_using_fp + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC %2(s32) + ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr(s32) = COPY $w2 + ; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: [[COPY4:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32) + ; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[SITOFP]] + ; CHECK: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32) + %0:_(s32) = COPY $w0 + %1:_(s1) = G_TRUNC %3(s32) + %2:_(s32) = COPY $w1 + %3:_(s32) = COPY $w2 + %4:_(s32) = G_SITOFP %2 + %6:_(s32) = G_SELECT %1(s1), %3, %4 + %8:_(s32) = G_FPTOSI %6