Index: llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -679,15 +679,58 @@
     // If the destination is FPR, preserve that.
     if (OpRegBankIdx[0] != PMI_FirstGPR)
       break;
+
+    // If we're taking in vectors, we have no choice but to put everything on
+    // FPRs.
     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
-    if (SrcTy.isVector() ||
-        any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
-               [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
-      // Set the register bank of every operand to FPR.
-      for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
-           Idx < NumOperands; ++Idx)
+    if (SrcTy.isVector()) {
+      for (unsigned Idx = 0; Idx < 4; ++Idx)
         OpRegBankIdx[Idx] = PMI_FirstFPR;
+      break;
+    }
+
+    // Try to minimize the number of copies. If we have more floating point
+    // constrained values than not, then we'll put everything on FPR. Otherwise,
+    // everything has to be on GPR.
+    unsigned NumFP = 0;
+
+    // Check if the uses of the result always produce floating point values.
+    //
+    // For example:
+    //
+    // %z = G_SELECT %cond %x %y
+    // fpr = G_FOO %z ...
+    if (any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+               [&](MachineInstr &MI) { return HasFPConstraints(MI); }))
+      ++NumFP;
+
+    // Check if the defs of the source values always produce floating point
+    // values.
+    //
+    // For example:
+    //
+    // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
+    // %z = G_SELECT %cond %x %y
+    //
+    // Also check whether or not the sources have already been decided to be
+    // FPR. Keep track of this.
+    //
+    // This doesn't check the condition, since it's just whatever is in NZCV.
+    // This isn't passed explicitly in a register to fcsel/csel.
+    for (unsigned Idx = 2; Idx < 4; ++Idx) {
+      unsigned VReg = MI.getOperand(Idx).getReg();
+      MachineInstr *DefMI = MRI.getVRegDef(VReg);
+      if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
+          HasFPConstraints(*DefMI))
+        ++NumFP;
     }
+
+    // If we have more FP constraints than not, then move everything over to
+    // FPR.
+    if (NumFP >= 2)
+      for (unsigned Idx = 0; Idx < 4; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+
     break;
   }
   case TargetOpcode::G_UNMERGE_VALUES: {
Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
@@ -58,3 +58,133 @@
     %4:_(s64) = G_SELECT %0(s1), %1, %2
     $d0 = COPY %4(s64)
     RET_ReallyLR implicit $d0
+
+...
+---
+name:            two_fpr_inputs_gpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $w0
+    ; CHECK-LABEL: name: two_fpr_inputs_gpr_output
+    ; CHECK: liveins: $d0, $d1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $x0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+
+    ; Verify that the G_SELECT only has FPRs.
+    ; The only difference between fcsel and csel are the register banks. So,
+    ; if we have two FPR inputs and a GPR output, we should do a floating point
+    ; select anyway. This will cost one copy for the output, but that's less
+    ; than doing two to put the inputs on GPRs.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $d1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $x0 = COPY %4(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            one_fpr_input_fpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x1, $w0
+    ; CHECK-LABEL: name: one_fpr_input_fpr_output
+    ; CHECK: liveins: $d0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[COPY4:%[0-9]+]]:fpr(s64) = COPY [[COPY2]](s64)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]]
+    ; CHECK: $d0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $d0
+
+    ; Same idea as the above test. If the output is an FPR, and one of the
+    ; inputs is an FPR, then it's fewer copies to just do a FCSEL.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            one_fpr_input_gpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x1, $w0
+    ; CHECK-LABEL: name: one_fpr_input_gpr_output
+    ; CHECK: liveins: $d0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr(s64) = COPY [[COPY1]](s64)
+    ; CHECK: [[SELECT:%[0-9]+]]:gpr(s64) = G_SELECT [[TRUNC]](s1), [[COPY3]], [[COPY2]]
+    ; CHECK: $x0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+
+    ; Now we have more GPR registers on the G_SELECT. It's cheaper here to put
+    ; everything on GPR.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $x0 = COPY %4(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            two_gpr_input_fpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $w0
+    ; CHECK-LABEL: name: two_gpr_input_fpr_output
+    ; CHECK: liveins: $x0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr(s64) = COPY $x0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[SELECT:%[0-9]+]]:gpr(s64) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $d0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $d0
+
+    ; Same as above. The G_SELECT should get all GPRS.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $x0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0