Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13301,6 +13301,19 @@
   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
     return SDValue();
 
+  // A target may want to call DAG:UnrollVectorOp() on a node which is going
+  // to be widened and then expanded. This is better to do before type
+  // legalization, because then only two scalar operations result (an
+  // infinite loop would result if this function would re-vectorize the op).
+  if (!LegalTypes && TLI.isOperationExpand(Opcode, VT)) {
+    LLVMContext &Context = *DAG.getContext();
+    if (TLI.getTypeAction(Context, VT) == TargetLowering::TypeWidenVector) {
+      EVT WideVT = TLI.getTypeToTransformTo(Context, VT);
+      if (TLI.isOperationExpand(Opcode, WideVT))
+        return SDValue();
+    }
+  }
+
   // Just because the floating-point vector type is legal does not necessarily
   // mean that the corresponding integer vector type is.
   if (!isTypeLegal(NVT))
Index: lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.cpp
+++ lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -449,6 +449,14 @@
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::ROTL);
 
+  // Scalarize v2f32 early, to avoid later expansion to 4 operations (see
+  // comment in PerformDAGCombine).
+  SmallVector<ISD::NodeType, 12> FP32Ops =
+    {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FREM, ISD::SINT_TO_FP,
+     ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT};
+  for (auto Op : FP32Ops)
+    setTargetDAGCombine(Op);
+
   // Handle intrinsics.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -5184,7 +5192,18 @@
 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   switch(N->getOpcode()) {
-  default: break;
+  default:
+    // Z13 can handle fp32 vectors in registers and memory, but does not
+    // support any vector operations on them. v2f32 is widened to v4f32 and
+    // kept in a single vector register, but any operations on v2f32 should
+    // be scalarized before type legalization, or else all four operations
+    // will actually be emitted.
+    if (N->getValueType(0) == MVT::v2f32 ||
+        ((N->getOpcode() == ISD::FP_TO_SINT || N->getOpcode() == ISD::FP_TO_UINT) &&
+         (N->getOperand(0)->getValueType(0) == MVT::v2f32)))
+      return DCI.DAG.UnrollVectorOp(N, 2);
+
+    break;
   case ISD::SIGN_EXTEND:        return combineSIGN_EXTEND(N, DCI);
   case SystemZISD::MERGE_HIGH:
   case SystemZISD::MERGE_LOW:   return combineMERGE(N, DCI);
Index: test/CodeGen/SystemZ/fp32-vec-conv.ll
===================================================================
--- /dev/null
+++ test/CodeGen/SystemZ/fp32-vec-conv.ll
@@ -0,0 +1,41 @@
+; Test that a vector of two floats only generates two instructions (and not
+; four).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+
+define <2 x float> @fun0(<2 x i32> %val1) {
+; CHECK-LABEL: fun0:
+; CHECK: 	celfbr
+; CHECK: 	celfbr
+; CHECK-NOT: 	celfbr
+  %z = uitofp <2 x i32> %val1 to <2 x float>
+  ret <2 x float> %z
+}
+
+define <2 x float> @fun1(<2 x i32> %val1) {
+; CHECK-LABEL: fun1:
+; CHECK: 	cefbr
+; CHECK: 	cefbr
+; CHECK-NOT: 	cefbr
+  %z = sitofp <2 x i32> %val1 to <2 x float>
+  ret <2 x float> %z
+}
+
+define <2 x i32> @fun2(<2 x float> %val1) {
+; CHECK-LABEL: fun2:
+; CHECK: 	cfebr
+; CHECK: 	cfebr
+; CHECK-NOT: 	cfebr
+  %z = fptosi <2 x float> %val1 to <2 x i32>
+  ret <2 x i32> %z
+}
+
+define <2 x i32> @fun3(<2 x float> %val1) {
+; CHECK-LABEL: fun3:
+; CHECK: 	clfebr
+; CHECK: 	clfebr
+; CHECK-NOT: 	clfebr
+  %z = fptoui <2 x float> %val1 to <2 x i32>
+  ret <2 x i32> %z
+}
Index: test/CodeGen/SystemZ/fp32-vec-ops.ll
===================================================================
--- /dev/null
+++ test/CodeGen/SystemZ/fp32-vec-ops.ll
@@ -0,0 +1,49 @@
+; Test that a vector of two floats only generates two instructions (and not
+; four).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+define <2 x float> @fun0(<2 x float> %val1, <2 x float> %val2) {
+; CHECK-LABEL: fun0:
+; CHECK: 	aebr
+; CHECK: 	aebr
+; CHECK-NOT: 	aebr
+  %ret = fadd <2 x float> %val1, %val2
+  ret <2 x float> %ret
+}
+
+define <2 x float> @fun1(<2 x float> %val1, <2 x float> %val2) {
+; CHECK-LABEL: fun1:
+; CHECK: 	sebr
+; CHECK: 	sebr
+; CHECK-NOT: 	sebr
+  %ret = fsub <2 x float> %val1, %val2
+  ret <2 x float> %ret
+}
+
+define <2 x float> @fun2(<2 x float> %val1, <2 x float> %val2) {
+; CHECK-LABEL: fun2:
+; CHECK: 	meebr
+; CHECK: 	meebr
+; CHECK-NOT: 	meebr
+  %ret = fmul <2 x float> %val1, %val2
+  ret <2 x float> %ret
+}
+
+define <2 x float> @fun3(<2 x float> %val1, <2 x float> %val2) {
+; CHECK-LABEL: fun3:
+; CHECK: 	debr
+; CHECK: 	debr
+; CHECK-NOT: 	debr
+  %ret = fdiv <2 x float> %val1, %val2
+  ret <2 x float> %ret
+}
+
+define <2 x float> @fun4(<2 x float> %val1, <2 x float> %val2) {
+; CHECK-LABEL: fun4:
+; CHECK: 	brasl	%r14, fmodf@PLT
+; CHECK: 	brasl	%r14, fmodf@PLT
+; CHECK-NOT: 	brasl	%r14, fmodf@PLT
+  %ret = frem <2 x float> %val1, %val2
+  ret <2 x float> %ret
+}