Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7729,36 +7729,59 @@
     SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
                                              Chain, &Flag, CS.getInstruction());
 
-    // FIXME: Why don't we do this for inline asms with MRVs?
-    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
-      EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
-
-      // If the type of the inline asm call site return value is different but
-      // has same size as the type of the asm output bitcast it.  One example
-      // of this is for vectors with different width / number of elements.
-      // This can happen for register classes that can contain multiple
-      // different value types.  The preg or vreg allocated may not have the
-      // same VT as was expected.
-      //
-      // This can also happen for a return value that disagrees with the
-      // register class it is put in, eg. a double in a general-purpose
-      // register on a 32-bit machine.
-      if (ResultType != Val.getValueType() &&
-          ResultType.getSizeInBits() == Val.getValueSizeInBits()) {
-        Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(),
-                          ResultType, Val);
-
-      } else if (ResultType != Val.getValueType() &&
-                 ResultType.isInteger() && Val.getValueType().isInteger()) {
-        // If a result value was tied to an input value, the computed result may
-        // have a wider width than the expected result.  Extract the relevant
-        // portion.
-        Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val);
-      }
+    llvm::Type *CSResultType = CS.getType();
+    unsigned numRet;
+    ArrayRef<Type *> ResultTypes;
+    SmallVector<SDValue, 1> ResultValues(1);
+    if (CSResultType->isSingleValueType()) {
+      numRet = 1;
+      ResultValues[0] = Val;
+      ResultTypes = makeArrayRef(CSResultType);
+    } else {
+      numRet = CSResultType->getNumContainedTypes();
+      assert(Val->getNumOperands() == numRet &&
+             "Mismatch in number of output operands in asm result");
+      ResultTypes = CSResultType->subtypes();
+      ArrayRef<SDUse> ValueUses = Val->ops();
+      ResultValues.resize(numRet);
+      std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
+                     [](const SDUse &u) -> SDValue { return u.get(); });
+    }
+    SmallVector<EVT, 1> ResultVTs(numRet);
+    for (unsigned i = 0; i < numRet; i++) {
+      EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]);
+      SDValue Val = ResultValues[i];
+      if (ResultTypes[i]->isSized()) {
+        // If the type of the inline asm call site return value is different but
+        // has same size as the type of the asm output bitcast it.  One example
+        // of this is for vectors with different width / number of elements.
+        // This can happen for register classes that can contain multiple
+        // different value types.  The preg or vreg allocated may not have the
+        // same VT as was expected.
+        //
+        // This can also happen for a return value that disagrees with the
+        // register class it is put in, eg. a double in a general-purpose
+        // register on a 32-bit machine.
+        if (ResultVT != Val.getValueType() &&
+            ResultVT.getSizeInBits() == Val.getValueSizeInBits()) {
+          Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val);
+
+        } else if (ResultVT != Val.getValueType() && ResultVT.isInteger() &&
+                   Val.getValueType().isInteger()) {
+          // If a result value was tied to an input value, the computed result
+          // may have a wider width than the expected result.  Extract the
+          // relevant portion.
+          Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val);
+        }
 
-      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+        assert(ResultVT == Val.getValueType() && "Asm result value mismatch!");
+      }
+      ResultVTs[i] = ResultVT;
+      ResultValues[i] = Val;
     }
 
+    Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+                      DAG.getVTList(ResultVTs), ResultValues);
     setValue(CS.getInstruction(), Val);
     // Don't need to use this as a chain in this case.
     if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
Index: test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll
===================================================================
--- test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll
+++ test/CodeGen/ARM/inline-asm-operand-implicit-cast.ll
@@ -35,6 +35,32 @@
   ret double %1
 }
 
+; Check support for returning several float in GPR
+define arm_aapcscc float @zerobits_float_convoluted_soft() #0 {
+; CHECK-LABEL: zerobits_float_convoluted_soft
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+  %1 = call { float, float } asm "mov $0, #0; mov $1, #0", "=r,=r"()
+  %asmresult = extractvalue { float, float } %1, 0
+  %asmresult1 = extractvalue { float, float } %1, 1
+  %add = fadd float %asmresult, %asmresult1
+  ret float %add
+}
+
+; Check support for returning several double in GPR
+define double @zerobits_double_convoluted_soft() #0 {
+; CHECK-LABEL: zerobits_double_convoluted_soft
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: mov r3, #0
+  %1 = call { double, double } asm "mov ${0:Q}, #0; mov ${0:R}, #0; mov ${1:Q}, #0; mov ${1:R}, #0", "=r,=r"()
+  %asmresult = extractvalue { double, double } %1, 0
+  %asmresult1 = extractvalue { double, double } %1, 1
+  %add = fadd double %asmresult, %asmresult1
+  ret double %add
+}
+
 attributes #0 = { nounwind "target-features"="+d16,+vfp2,+vfp3,-fp-only-sp" "use-soft-float"="true" }
 
 
@@ -79,4 +105,34 @@
   ret double %1
 }
 
+; Check support for returning several float in GPR
+define float @zerobits_float_convoluted_hard() #1 {
+; CHECK-LABEL: zerobits_float_convoluted_hard
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK: vmov {{s[[:digit:]]+}}, r1
+; CHECK-NEXT: vmov {{s[[:digit:]]+}}, r0
+  %1 = call { float, float } asm "mov $0, #0; mov $1, #0", "=r,=r"()
+  %asmresult = extractvalue { float, float } %1, 0
+  %asmresult1 = extractvalue { float, float } %1, 1
+  %add = fadd float %asmresult, %asmresult1
+  ret float %add
+}
+
+; Check support for returning several double in GPR
+define double @zerobits_double_convoluted_hard() #1 {
+; CHECK-LABEL: zerobits_double_convoluted_hard
+; CHECK: mov r0, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK: vmov {{d[[:digit:]]+}}, r2, r3
+; CHECK-NEXT: vmov {{d[[:digit:]]+}}, r0, r1
+  %1 = call { double, double } asm "mov ${0:Q}, #0; mov ${0:R}, #0; mov ${1:Q}, #0; mov ${1:R}, #0", "=r,=r"()
+  %asmresult = extractvalue { double, double } %1, 0
+  %asmresult1 = extractvalue { double, double } %1, 1
+  %add = fadd double %asmresult, %asmresult1
+  ret double %add
+}
+
 attributes #1 = { nounwind "target-features"="+d16,+vfp2,+vfp3,-fp-only-sp" "use-soft-float"="false" }