diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4321,11 +4321,45 @@ Results.push_back(ExpandLibCall(LC, Node, false)); break; } + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_TO_FP16: { - RTLIB::Libcall LC = - RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16); - assert(LC != RTLIB::UNKNOWN_LIBCALL && - "Unable to expand strict_fp_to_fp16"); + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + switch (Opc) { + default: + llvm_unreachable("Unable to legalize as libcall"); + case ISD::STRICT_FP_TO_FP16: + LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16); + break; + case ISD::STRICT_FP_TO_SINT: + LC = RTLIB::getFPTOSINT(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_FP_TO_UINT: + LC = RTLIB::getFPTOUINT(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_SINT_TO_FP: + LC = RTLIB::getSINTTOFP(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_UINT_TO_FP: + LC = RTLIB::getUINTTOFP(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_FP_EXTEND: + LC = RTLIB::getFPEXT(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_FP_ROUND: + LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + } TargetLowering::MakeLibCallOptions CallOptions; std::pair Tmp = TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1), @@ -4334,6 +4368,59 @@ Results.push_back(Tmp.second); break; } + case ISD::FP_TO_SINT: { + Results.push_back( + ExpandLibCall(RTLIB::getFPTOSINT(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, true)); + break; + } + case ISD::FP_TO_UINT: { + Results.push_back( + ExpandLibCall(RTLIB::getFPTOUINT(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, false)); + break; + } + case ISD::SINT_TO_FP: { + Results.push_back( + ExpandLibCall(RTLIB::getSINTTOFP(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, true)); + break; + } + case ISD::UINT_TO_FP: { + Results.push_back( + ExpandLibCall(RTLIB::getUINTTOFP(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, false)); + break; + } + case ISD::FP_EXTEND: { + Results.push_back( + ExpandLibCall(RTLIB::getFPEXT(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, false)); + break; + } + case ISD::FP_ROUND: { + // X = FP_ROUND(Y, TRUNC) + // TRUNC is a flag, which is always an integer that is zero or one. + // If TRUNC is 0, this is a normal rounding, if it is 1, this FP_ROUND + // is known to not change the value of Y. + // We can only expand it into libcall if the TRUNC is 0. + const ConstantSDNode *TRUNC = dyn_cast(Node->getOperand(1)); + assert(TRUNC && TRUNC->isNullValue() && + "Unable to expand as libcall if it is not normal rounding"); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair Tmp = TLI.makeLibCall( + DAG, + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node->getValueType(0), Node->getOperand(0), CallOptions, SDLoc(Node)); + Results.push_back(Tmp.first); + break; + } case ISD::FSUB: case ISD::STRICT_FSUB: ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -679,6 +679,9 @@ return (Kind != SelectSupportKind::ScalarCondVectorVal); } + bool isSupportedCastOperation(unsigned Opcode, EVT SrcVT, + EVT DestVT) const override; + /// getPreferredVectorAction - The code we generate when vector types are /// legalized by promoting the integer element type is often much worse /// than code we generate if we widen the type for applicable vector types. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1194,6 +1194,9 @@ setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FMA, MVT::f128, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); } if (Subtarget.hasP9Altivec()) { @@ -1387,6 +1390,29 @@ PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive(); } +bool PPCTargetLowering::isSupportedCastOperation(unsigned Opcode, EVT SrcVT, + EVT DestVT) const { + if (!Subtarget.hasP9Vector()) { + switch (Opcode) { + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return SrcVT != MVT::f128; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_FP_EXTEND: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_EXTEND: + return DestVT != MVT::f128; + } + } + return TargetLowering::isSupportedCastOperation(Opcode, SrcVT, DestVT); +} + /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) { diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll --- a/llvm/test/CodeGen/PowerPC/f128-conv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll @@ -3,7 +3,7 @@ ; RUN: -ppc-vsr-nums-as-vr -verify-machineinstrs -ppc-asm-full-reg-names < %s \ ; RUN: | FileCheck %s ; RUN: llc -relocation-model=pic -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -ppc-vsr-nums-as-vr -verify-machineinstrs -ppc-asm-full-reg-names < %s \ +; RUN: -ppc-vsr-nums-as-vr -verify-machineinstrs -ppc-asm-full-reg-names -ppc-set-fp128-legal < %s \ ; RUN: | FileCheck %s -check-prefix=CHECK-P8 @mem = global [5 x i64] [i64 56, i64 63, i64 3, i64 5, i64 6], align 8 @@ -35,8 +35,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -86,8 +85,7 @@ ; CHECK-P8-NEXT: mr r4, r5 ; CHECK-P8-NEXT: bl __floattikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -127,8 +125,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -166,8 +163,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -203,8 +199,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -254,8 +249,7 @@ ; CHECK-P8-NEXT: mr r4, r5 ; CHECK-P8-NEXT: bl __floatuntikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -295,8 +289,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -334,8 +327,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -376,10 +368,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r5, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -423,10 +413,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r5, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -465,8 +453,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -502,8 +489,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -545,8 +531,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -583,8 +568,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -620,8 +604,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -663,8 +646,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -705,8 +687,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -744,8 +725,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -777,12 +757,12 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: lhz r4, 0(r4) ; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: lhz r3, 0(r4) +; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -817,14 +797,14 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: addis r4, r2, .LC4@toc@ha ; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: addis r3, r2, .LC4@toc@ha -; CHECK-P8-NEXT: ld r3, .LC4@toc@l(r3) -; CHECK-P8-NEXT: lhz r3, 6(r3) +; CHECK-P8-NEXT: ld r4, .LC4@toc@l(r4) +; CHECK-P8-NEXT: lhz r4, 6(r4) +; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -865,8 +845,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -906,8 +885,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -938,12 +916,12 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: lbz r4, 0(r4) ; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: lbz r3, 0(r4) +; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -978,14 +956,14 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: addis r4, r2, .LC5@toc@ha ; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: addis r3, r2, .LC5@toc@ha -; CHECK-P8-NEXT: ld r3, .LC5@toc@l(r3) -; CHECK-P8-NEXT: lbz r3, 2(r3) +; CHECK-P8-NEXT: ld r4, .LC5@toc@l(r4) +; CHECK-P8-NEXT: lbz r4, 2(r4) +; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1026,8 +1004,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1070,9 +1047,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1108,9 +1083,7 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC6@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC6@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stfdx f1, 0, r30 @@ -1153,9 +1126,7 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC7@toc@ha ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC7@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r3, r30, 3 @@ -1195,14 +1166,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __trunckfdf2 @@ -1240,9 +1206,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1279,9 +1243,7 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC6@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC6@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stfsx f1, 0, r30 @@ -1325,9 +1287,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC7@toc@ha ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC7@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 48(r4) -; CHECK-P8-NEXT: ld r4, 56(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 48 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r3, r30, 2 @@ -1368,14 +1329,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __trunckfsf2 @@ -1444,10 +1400,9 @@ ; CHECK-P8-NEXT: lfdx f1, 0, r3 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addis r5, r2, .LC8@toc@ha -; CHECK-P8-NEXT: ld r5, .LC8@toc@l(r5) -; CHECK-P8-NEXT: std r4, 8(r5) -; CHECK-P8-NEXT: std r3, 0(r5) +; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1482,10 +1437,9 @@ ; CHECK-P8-NEXT: lfdx f1, r3, r4 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addis r5, r2, .LC8@toc@ha -; CHECK-P8-NEXT: ld r5, .LC8@toc@l(r5) -; CHECK-P8-NEXT: std r4, 8(r5) -; CHECK-P8-NEXT: std r3, 0(r5) +; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1524,9 +1478,8 @@ ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: sldi r5, r30, 4 -; CHECK-P8-NEXT: stdux r3, r29, r5 -; CHECK-P8-NEXT: std r4, 8(r29) +; CHECK-P8-NEXT: sldi r3, r30, 4 +; CHECK-P8-NEXT: stvx v2, r29, r3 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1562,8 +1515,7 @@ ; CHECK-P8-NEXT: mr r30, r4 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1622,10 +1574,9 @@ ; CHECK-P8-NEXT: lfsx f1, 0, r3 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addis r5, r2, .LC8@toc@ha -; CHECK-P8-NEXT: ld r5, .LC8@toc@l(r5) -; CHECK-P8-NEXT: std r4, 8(r5) -; CHECK-P8-NEXT: std r3, 0(r5) +; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1660,10 +1611,9 @@ ; CHECK-P8-NEXT: lfsx f1, r3, r4 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addis r5, r2, .LC8@toc@ha -; CHECK-P8-NEXT: ld r5, .LC8@toc@l(r5) -; CHECK-P8-NEXT: std r4, 8(r5) -; CHECK-P8-NEXT: std r3, 0(r5) +; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1702,9 +1652,8 @@ ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: sldi r5, r30, 4 -; CHECK-P8-NEXT: stdux r3, r29, r5 -; CHECK-P8-NEXT: std r4, 8(r29) +; CHECK-P8-NEXT: sldi r3, r30, 4 +; CHECK-P8-NEXT: stvx v2, r29, r3 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1740,8 +1689,7 @@ ; CHECK-P8-NEXT: mr r30, r4 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1779,8 +1727,7 @@ ; CHECK-P8-NEXT: extsw r3, r3 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1816,8 +1763,7 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1855,8 +1801,7 @@ ; CHECK-P8-NEXT: extsw r3, r3 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1892,8 +1837,7 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1931,8 +1875,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1968,8 +1911,7 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2007,8 +1949,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2044,8 +1985,7 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2082,9 +2022,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfti ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -2121,9 +2059,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixunskfti ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/f128-rounding.ll b/llvm/test/CodeGen/PowerPC/f128-rounding.ll --- a/llvm/test/CodeGen/PowerPC/f128-rounding.ll +++ b/llvm/test/CodeGen/PowerPC/f128-rounding.ll @@ -2,7 +2,7 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ -; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -ppc-set-fp128-legal < %s | FileCheck %s \ ; RUN: -check-prefix=CHECK-P8 define void @qp_trunc(fp128* nocapture readonly %a, fp128* nocapture %res) { @@ -22,15 +22,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl __truncieee128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -61,15 +57,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl __rintieee128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -100,15 +92,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl __nearbyintieee128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -139,15 +127,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl __roundieee128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -178,15 +162,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl __floorieee128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -217,15 +197,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl __ceilieee128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s\ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 -ppc-set-fp128-legal | FileCheck %s\ ; RUN: -check-prefix=P8 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s \ @@ -661,6 +661,7 @@ ; P8-NEXT: stdu r1, -112(r1) ; P8-NEXT: .cfi_def_cfa_offset 112 ; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: clrldi r3, r3, 32 ; P8-NEXT: bl __floatsikf ; P8-NEXT: nop ; P8-NEXT: addi r1, r1, 112