diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -642,6 +642,7 @@ SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -641,6 +641,8 @@ setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::STRICT_FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::STRICT_FP_EXTEND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::SDIV); @@ -6081,6 +6083,32 @@ return SDValue(); } +SDValue SystemZTargetLowering::combineINT_TO_FP( + SDNode *N, DAGCombinerInfo &DCI) const { + if (DCI.Level != BeforeLegalizeTypes) + return SDValue(); + unsigned Opcode = N->getOpcode(); + EVT OutVT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + unsigned OutScalarBits = OutVT.getScalarSizeInBits(); + unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); + + // Insert an extension before type-legalization to avoid scalarization, e.g.: + // v2f64 = uint_to_fp v2i16 + // => + // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) + if (OutVT.isVector() && OutScalarBits > InScalarBits) { + MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()), + OutVT.getVectorNumElements()); + unsigned ExtOpcode = + (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); + SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); + return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); + } + return SDValue(); +} + SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -6408,6 +6436,8 @@ case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); diff --git a/llvm/test/CodeGen/SystemZ/vec-move-23.ll b/llvm/test/CodeGen/SystemZ/vec-move-23.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-move-23.ll @@ -0,0 +1,131 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s -check-prefixes=CHECK,Z14 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s -check-prefixes=CHECK,Z15 +; +; Check that int-to-fp conversions from a narrower type get a vector extension. + +define void @fun0(<2 x i8> %Src, <2 x double>* %Dst) { +; CHECK-LABEL: fun0: +; CHECK: vuphb %v0, %v24 +; CHECK-NEXT: vuphh %v0, %v0 +; CHECK-NEXT: vuphf %v0, %v0 +; CHECK-NEXT: vcdgb %v0, %v0, 0, 0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %c = sitofp <2 x i8> %Src to <2 x double> + store <2 x double> %c, <2 x double>* %Dst + ret void +} + +define void @fun1(<2 x i16> %Src, <2 x double>* %Dst) { +; CHECK-LABEL: fun1: +; CHECK: vuphh %v0, %v24 +; CHECK-NEXT: vuphf %v0, %v0 +; CHECK-NEXT: vcdgb %v0, %v0, 0, 0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %c = sitofp <2 x i16> %Src to <2 x double> + store <2 x double> %c, <2 x double>* %Dst + ret void +} + +define void @fun2(<2 x i32> %Src, <2 x double>* %Dst) { +; CHECK-LABEL: fun2: +; CHECK: vuphf %v0, %v24 +; CHECK-NEXT: vcdgb %v0, %v0, 0, 0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %c = sitofp <2 x i32> %Src to <2 x double> + store <2 x double> %c, <2 x double>* %Dst + ret void +} + +define void @fun3(<4 x i16> %Src, <4 x float>* %Dst) { +; CHECK-LABEL: fun3: + +; Z14: vuphh %v0, %v24 +; Z14-NEXT: vlgvf %r0, %v0, 3 +; Z14-NEXT: cefbr %f1, %r0 +; Z14-NEXT: vlgvf %r0, %v0, 2 +; Z14-NEXT: cefbr %f2, %r0 +; Z14-NEXT: vlgvf %r0, %v0, 1 +; Z14-NEXT: vmrhf %v1, %v2, %v1 +; Z14-NEXT: cefbr %f2, %r0 +; Z14-NEXT: vlgvf %r0, %v0, 0 +; Z14-NEXT: cefbr %f0, %r0 +; Z14-NEXT: vmrhf %v0, %v0, %v2 +; Z14-NEXT: vmrhg %v0, %v0, %v1 +; Z14-NEXT: vst %v0, 0(%r2), 3 +; Z14-NEXT: br %r14 + +; Z15: vuphh %v0, %v24 +; Z15-NEXT: vcefb %v0, %v0, 0, 0 +; Z15-NEXT: vst %v0, 0(%r2), 3 +; Z15-NEXT: br %r14 + %c = sitofp <4 x i16> %Src to <4 x float> + store <4 x float> %c, <4 x float>* %Dst + ret void +} + +define void @fun4(<2 x i8> %Src, <2 x double>* %Dst) { +; CHECK-LABEL: fun4: +; CHECK: vuplhb %v0, %v24 +; CHECK-NEXT: vuplhh %v0, %v0 +; CHECK-NEXT: vuplhf %v0, %v0 +; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %c = uitofp <2 x i8> %Src to <2 x double> + store <2 x double> %c, <2 x double>* %Dst + ret void +} + +define void @fun5(<2 x i16> %Src, <2 x double>* %Dst) { +; CHECK-LABEL: fun5: +; CHECK: vuplhh %v0, %v24 +; CHECK-NEXT: vuplhf %v0, %v0 +; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %c = uitofp <2 x i16> %Src to <2 x double> + store <2 x double> %c, <2 x double>* %Dst + ret void +} + +define void @fun6(<2 x i32> %Src, <2 x double>* %Dst) { +; CHECK-LABEL: fun6: +; CHECK: vuplhf %v0, %v24 +; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %c = uitofp <2 x i32> %Src to <2 x double> + store <2 x double> %c, <2 x double>* %Dst + ret void +} + +define void @fun7(<4 x i16> %Src, <4 x float>* %Dst) { +; CHECK-LABEL: fun7: + +; Z14: vuplhh %v0, %v24 +; Z14-NEXT: vlgvf %r0, %v0, 3 +; Z14-NEXT: celfbr %f1, 0, %r0, 0 +; Z14-NEXT: vlgvf %r0, %v0, 2 +; Z14-NEXT: celfbr %f2, 0, %r0, 0 +; Z14-NEXT: vlgvf %r0, %v0, 1 +; Z14-NEXT: vmrhf %v1, %v2, %v1 +; Z14-NEXT: celfbr %f2, 0, %r0, 0 +; Z14-NEXT: vlgvf %r0, %v0, 0 +; Z14-NEXT: celfbr %f0, 0, %r0, 0 +; Z14-NEXT: vmrhf %v0, %v0, %v2 +; Z14-NEXT: vmrhg %v0, %v0, %v1 +; Z14-NEXT: vst %v0, 0(%r2), 3 +; Z14-NEXT: br %r14 + +; Z15: vuplhh %v0, %v24 +; Z15-NEXT: vcelfb %v0, %v0, 0, 0 +; Z15-NEXT: vst %v0, 0(%r2), 3 +; Z15-NEXT: br %r14 + %c = uitofp <4 x i16> %Src to <4 x float> + store <4 x float> %c, <4 x float>* %Dst + ret void +} +