Index: llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -679,15 +679,58 @@ // If the destination is FPR, preserve that. if (OpRegBankIdx[0] != PMI_FirstGPR) break; + + // If we're taking in vectors, we have no choice but to put everything on + // FPRs. LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); - if (SrcTy.isVector() || - any_of(MRI.use_instructions(MI.getOperand(0).getReg()), - [&](MachineInstr &MI) { return HasFPConstraints(MI); })) { - // Set the register bank of every operand to FPR. - for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); - Idx < NumOperands; ++Idx) + if (SrcTy.isVector()) { + for (unsigned Idx = 0; Idx < 4; ++Idx) OpRegBankIdx[Idx] = PMI_FirstFPR; + break; + } + + // Try to minimize the number of copies. If we have more floating point + // constrained values than not, then we'll put everything on FPR. Otherwise, + // everything has to be on GPR. + unsigned NumFP = 0; + + // Check if the uses of the result always produce floating point values. + // + // For example: + // + // %z = G_SELECT %cond %x %y + // fpr = G_FOO %z ... + if (any_of(MRI.use_instructions(MI.getOperand(0).getReg()), + [&](MachineInstr &MI) { return HasFPConstraints(MI); })) + ++NumFP; + + // Check if the defs of the source values always produce floating point + // values. + // + // For example: + // + // %x = G_SOMETHING_ALWAYS_FLOAT %a ... + // %z = G_SELECT %cond %x %y + // + // Also check whether or not the sources have already been decided to be + // FPR. Keep track of this. + // + // This doesn't check the condition, since it's just whatever is in NZCV. + // This isn't passed explicitly in a register to fcsel/csel. + for (unsigned Idx = 2; Idx < 4; ++Idx) { + unsigned VReg = MI.getOperand(Idx).getReg(); + MachineInstr *DefMI = MRI.getVRegDef(VReg); + if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || + HasFPConstraints(*DefMI)) + ++NumFP; } + + // If we have more FP constraints than not, then move everything over to + // FPR. + if (NumFP >= 2) + for (unsigned Idx = 0; Idx < 4; ++Idx) + OpRegBankIdx[Idx] = PMI_FirstFPR; + break; } case TargetOpcode::G_UNMERGE_VALUES: { Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir @@ -58,3 +58,133 @@ %4:_(s64) = G_SELECT %0(s1), %1, %2 $d0 = COPY %4(s64) RET_ReallyLR implicit $d0 + +... +--- +name: two_fpr_inputs_gpr_output +alignment: 2 +legalized: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $d1, $w0 + ; CHECK-LABEL: name: two_fpr_inputs_gpr_output + ; CHECK: liveins: $d0, $d1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; CHECK: $x0 = COPY [[SELECT]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + + ; Verify that the G_SELECT only has FPRs. + ; The only difference between fcsel and csel are the register banks. So, + ; if we have two FPR inputs and a GPR output, we should do a floating point + ; select anyway. This will cost one copy for the output, but that's less + ; than doing two to put the inputs on GPRs. + + %3:_(s32) = COPY $w0 + %0:_(s1) = G_TRUNC %3(s32) + %1:_(s64) = COPY $d0 + %2:_(s64) = COPY $d1 + %4:_(s64) = G_SELECT %0(s1), %1, %2 + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: one_fpr_input_fpr_output +alignment: 2 +legalized: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x1, $w0 + ; CHECK-LABEL: name: one_fpr_input_fpr_output + ; CHECK: liveins: $d0, $x1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: [[COPY4:%[0-9]+]]:fpr(s64) = COPY [[COPY2]](s64) + ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]] + ; CHECK: $d0 = COPY [[SELECT]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + + ; Same idea as the above test. If the output is an FPR, and one of the + ; inputs is an FPR, then it's fewer copies to just do a FCSEL. + + %3:_(s32) = COPY $w0 + %0:_(s1) = G_TRUNC %3(s32) + %1:_(s64) = COPY $d0 + %2:_(s64) = COPY $x1 + %4:_(s64) = G_SELECT %0(s1), %1, %2 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: one_fpr_input_gpr_output +alignment: 2 +legalized: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $x1, $w0 + ; CHECK-LABEL: name: one_fpr_input_gpr_output + ; CHECK: liveins: $d0, $x1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1 + ; CHECK: [[COPY3:%[0-9]+]]:gpr(s64) = COPY [[COPY1]](s64) + ; CHECK: [[SELECT:%[0-9]+]]:gpr(s64) = G_SELECT [[TRUNC]](s1), [[COPY3]], [[COPY2]] + ; CHECK: $x0 = COPY [[SELECT]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + + ; Now we have more GPR registers on the G_SELECT. It's cheaper here to put + ; everything on GPR. + + %3:_(s32) = COPY $w0 + %0:_(s1) = G_TRUNC %3(s32) + %1:_(s64) = COPY $d0 + %2:_(s64) = COPY $x1 + %4:_(s64) = G_SELECT %0(s1), %1, %2 + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: two_gpr_input_fpr_output +alignment: 2 +legalized: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $w0 + ; CHECK-LABEL: name: two_gpr_input_fpr_output + ; CHECK: liveins: $x0, $x1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:gpr(s64) = COPY $x0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1 + ; CHECK: [[SELECT:%[0-9]+]]:gpr(s64) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]] + ; CHECK: $d0 = COPY [[SELECT]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + + ; Same as above. The G_SELECT should get all GPRS. + + %3:_(s32) = COPY $w0 + %0:_(s1) = G_TRUNC %3(s32) + %1:_(s64) = COPY $x0 + %2:_(s64) = COPY $x1 + %4:_(s64) = G_SELECT %0(s1), %1, %2 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0