Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13301,6 +13301,19 @@ if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) return SDValue(); + // A target may want to call DAG:UnrollVectorOp() on a node which is going + // to be widened and then expanded. This is better to do before type + // legalization, because then only two scalar operations result (an + // infinite loop would result if this function would re-vectorize the op). + if (!LegalTypes && TLI.isOperationExpand(Opcode, VT)) { + LLVMContext &Context = *DAG.getContext(); + if (TLI.getTypeAction(Context, VT) == TargetLowering::TypeWidenVector) { + EVT WideVT = TLI.getTypeToTransformTo(Context, VT); + if (TLI.isOperationExpand(Opcode, WideVT)) + return SDValue(); + } + } + // Just because the floating-point vector type is legal does not necessarily // mean that the corresponding integer vector type is. if (!isTypeLegal(NVT)) Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -449,6 +449,14 @@ setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::ROTL); + // Scalarize v2f32 early, to avoid later expansion to 4 operations (see + // comment in PerformDAGCombine). + SmallVector FP32Ops = + {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FREM, ISD::SINT_TO_FP, + ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}; + for (auto Op : FP32Ops) + setTargetDAGCombine(Op); + // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -5184,7 +5192,18 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { - default: break; + default: + // Z13 can handle fp32 vectors in registers and memory, but does not + // support any vector operations on them. v2f32 is widened to v4f32 and + // kept in a single vector register, but any operations on v2f32 should + // be scalarized before type legalization, or else all four operations + // will actually be emitted. + if (N->getValueType(0) == MVT::v2f32 || + ((N->getOpcode() == ISD::FP_TO_SINT || N->getOpcode() == ISD::FP_TO_UINT) && + (N->getOperand(0)->getValueType(0) == MVT::v2f32))) + return DCI.DAG.UnrollVectorOp(N, 2); + + break; case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); case SystemZISD::MERGE_HIGH: case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); Index: test/CodeGen/SystemZ/fp32-vec-conv.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/fp32-vec-conv.ll @@ -0,0 +1,41 @@ +; Test that a vector of two floats only generates two instructions (and not +; four). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + +define <2 x float> @fun0(<2 x i32> %val1) { +; CHECK-LABEL: fun0: +; CHECK: celfbr +; CHECK: celfbr +; CHECK-NOT: celfbr + %z = uitofp <2 x i32> %val1 to <2 x float> + ret <2 x float> %z +} + +define <2 x float> @fun1(<2 x i32> %val1) { +; CHECK-LABEL: fun1: +; CHECK: cefbr +; CHECK: cefbr +; CHECK-NOT: cefbr + %z = sitofp <2 x i32> %val1 to <2 x float> + ret <2 x float> %z +} + +define <2 x i32> @fun2(<2 x float> %val1) { +; CHECK-LABEL: fun2: +; CHECK: cfebr +; CHECK: cfebr +; CHECK-NOT: cfebr + %z = fptosi <2 x float> %val1 to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i32> @fun3(<2 x float> %val1) { +; CHECK-LABEL: fun3: +; CHECK: clfebr +; CHECK: clfebr +; CHECK-NOT: clfebr + %z = fptoui <2 x float> %val1 to <2 x i32> + ret <2 x i32> %z +} Index: test/CodeGen/SystemZ/fp32-vec-ops.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/fp32-vec-ops.ll @@ -0,0 +1,49 @@ +; Test that a vector of two floats only generates two instructions (and not +; four). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define <2 x float> @fun0(<2 x float> %val1, <2 x float> %val2) { +; CHECK-LABEL: fun0: +; CHECK: aebr +; CHECK: aebr +; CHECK-NOT: aebr + %ret = fadd <2 x float> %val1, %val2 + ret <2 x float> %ret +} + +define <2 x float> @fun1(<2 x float> %val1, <2 x float> %val2) { +; CHECK-LABEL: fun1: +; CHECK: sebr +; CHECK: sebr +; CHECK-NOT: sebr + %ret = fsub <2 x float> %val1, %val2 + ret <2 x float> %ret +} + +define <2 x float> @fun2(<2 x float> %val1, <2 x float> %val2) { +; CHECK-LABEL: fun2: +; CHECK: meebr +; CHECK: meebr +; CHECK-NOT: meebr + %ret = fmul <2 x float> %val1, %val2 + ret <2 x float> %ret +} + +define <2 x float> @fun3(<2 x float> %val1, <2 x float> %val2) { +; CHECK-LABEL: fun3: +; CHECK: debr +; CHECK: debr +; CHECK-NOT: debr + %ret = fdiv <2 x float> %val1, %val2 + ret <2 x float> %ret +} + +define <2 x float> @fun4(<2 x float> %val1, <2 x float> %val2) { +; CHECK-LABEL: fun4: +; CHECK: brasl %r14, fmodf@PLT +; CHECK: brasl %r14, fmodf@PLT +; CHECK-NOT: brasl %r14, fmodf@PLT + %ret = frem <2 x float> %val1, %val2 + ret <2 x float> %ret +}