diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -914,6 +914,8 @@ /// the immediate into a register. bool isLegalAddImmediate(int64_t Imm) const override; + bool isLegalizedAsLibCall(SDNode *N) const override; + /// isTruncateFree - Return true if it's free to truncate a value of /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in /// register X1 to i32 by referencing its sub-register R1. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1197,6 +1197,9 @@ setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FMA, MVT::f128, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); } if (Subtarget.hasP9Altivec()) { @@ -1379,6 +1382,32 @@ PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive(); } +bool PPCTargetLowering::isLegalizedAsLibCall(SDNode *N) const { + if (Subtarget.hasP9Vector()) + return false; + + unsigned SrcIdx = N->isStrictFPOpcode() ? 1 : 0; + EVT DestVT = N->getValueType(0); + switch (N->getOpcode()) { + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return N->getOperand(SrcIdx).getValueType() == MVT::f128; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_FP_EXTEND: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_EXTEND: + return DestVT == MVT::f128; + } + + return false; +} + /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) { diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll --- a/llvm/test/CodeGen/PowerPC/f128-conv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll @@ -3,7 +3,7 @@ ; RUN: -ppc-vsr-nums-as-vr -verify-machineinstrs -ppc-asm-full-reg-names < %s \ ; RUN: | FileCheck %s ; RUN: llc -relocation-model=pic -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -ppc-vsr-nums-as-vr -verify-machineinstrs -ppc-asm-full-reg-names < %s \ +; RUN: -ppc-vsr-nums-as-vr -verify-machineinstrs -ppc-asm-full-reg-names -ppc-set-fp128-legal < %s \ ; RUN: | FileCheck %s -check-prefix=CHECK-P8 @mem = global [5 x i64] [i64 56, i64 63, i64 3, i64 5, i64 6], align 8 @@ -35,8 +35,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -86,8 +85,7 @@ ; CHECK-P8-NEXT: mr r4, r5 ; CHECK-P8-NEXT: bl __floattitf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -127,8 +125,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -166,8 +163,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -203,8 +199,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -254,8 +249,7 @@ ; CHECK-P8-NEXT: mr r4, r5 ; CHECK-P8-NEXT: bl __floatuntitf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -295,8 +289,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -334,8 +327,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -376,10 +368,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r5, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -423,10 +413,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r5, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -465,8 +453,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -502,8 +489,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -545,8 +531,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -583,8 +568,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -620,8 +604,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -663,8 +646,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -705,8 +687,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -744,8 +725,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -777,12 +757,12 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: lhz r4, 0(r4) ; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: lhz r3, 0(r4) +; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -817,14 +797,14 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: addis r4, r2, .LC4@toc@ha ; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: addis r3, r2, .LC4@toc@ha -; CHECK-P8-NEXT: ld r3, .LC4@toc@l(r3) -; CHECK-P8-NEXT: lhz r3, 6(r3) +; CHECK-P8-NEXT: ld r4, .LC4@toc@l(r4) +; CHECK-P8-NEXT: lhz r4, 6(r4) +; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -865,8 +845,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -906,8 +885,7 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -938,12 +916,12 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: lbz r4, 0(r4) ; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: lbz r3, 0(r4) +; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -978,14 +956,14 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: addis r4, r2, .LC5@toc@ha ; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: addis r3, r2, .LC5@toc@ha -; CHECK-P8-NEXT: ld r3, .LC5@toc@l(r3) -; CHECK-P8-NEXT: lbz r3, 2(r3) +; CHECK-P8-NEXT: ld r4, .LC5@toc@l(r4) +; CHECK-P8-NEXT: lbz r4, 2(r4) +; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1026,8 +1004,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1070,9 +1047,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1108,9 +1083,7 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC6@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC6@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stfdx f1, 0, r30 @@ -1153,9 +1126,7 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC7@toc@ha ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC7@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r3, r30, 3 @@ -1195,14 +1166,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __trunckfdf2 @@ -1240,9 +1206,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1279,9 +1243,7 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC6@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC6@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stfsx f1, 0, r30 @@ -1325,9 +1287,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC7@toc@ha ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC7@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 48(r4) -; CHECK-P8-NEXT: ld r4, 56(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 48 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r3, r30, 2 @@ -1368,14 +1329,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __trunckfsf2 @@ -1444,10 +1400,9 @@ ; CHECK-P8-NEXT: lfdx f1, 0, r3 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addis r5, r2, .LC8@toc@ha -; CHECK-P8-NEXT: ld r5, .LC8@toc@l(r5) -; CHECK-P8-NEXT: std r4, 8(r5) -; CHECK-P8-NEXT: std r3, 0(r5) +; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1482,10 +1437,9 @@ ; CHECK-P8-NEXT: lfdx f1, r3, r4 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addis r5, r2, .LC8@toc@ha -; CHECK-P8-NEXT: ld r5, .LC8@toc@l(r5) -; CHECK-P8-NEXT: std r4, 8(r5) -; CHECK-P8-NEXT: std r3, 0(r5) +; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1524,9 +1478,8 @@ ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: sldi r5, r30, 4 -; CHECK-P8-NEXT: stdux r3, r29, r5 -; CHECK-P8-NEXT: std r4, 8(r29) +; CHECK-P8-NEXT: sldi r3, r30, 4 +; CHECK-P8-NEXT: stvx v2, r29, r3 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1562,8 +1515,7 @@ ; CHECK-P8-NEXT: mr r30, r4 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1622,10 +1574,9 @@ ; CHECK-P8-NEXT: lfsx f1, 0, r3 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addis r5, r2, .LC8@toc@ha -; CHECK-P8-NEXT: ld r5, .LC8@toc@l(r5) -; CHECK-P8-NEXT: std r4, 8(r5) -; CHECK-P8-NEXT: std r3, 0(r5) +; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1660,10 +1611,9 @@ ; CHECK-P8-NEXT: lfsx f1, r3, r4 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addis r5, r2, .LC8@toc@ha -; CHECK-P8-NEXT: ld r5, .LC8@toc@l(r5) -; CHECK-P8-NEXT: std r4, 8(r5) -; CHECK-P8-NEXT: std r3, 0(r5) +; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1702,9 +1652,8 @@ ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: sldi r5, r30, 4 -; CHECK-P8-NEXT: stdux r3, r29, r5 -; CHECK-P8-NEXT: std r4, 8(r29) +; CHECK-P8-NEXT: sldi r3, r30, 4 +; CHECK-P8-NEXT: stvx v2, r29, r3 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1740,8 +1689,7 @@ ; CHECK-P8-NEXT: mr r30, r4 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1779,8 +1727,7 @@ ; CHECK-P8-NEXT: extsw r3, r3 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1816,8 +1763,7 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1855,8 +1801,7 @@ ; CHECK-P8-NEXT: extsw r3, r3 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1892,8 +1837,7 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1931,8 +1875,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1968,8 +1911,7 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2007,8 +1949,7 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2044,8 +1985,7 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: std r3, 0(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2082,9 +2022,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixtfti ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -2121,9 +2059,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixunstfti ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/f128-rounding.ll b/llvm/test/CodeGen/PowerPC/f128-rounding.ll --- a/llvm/test/CodeGen/PowerPC/f128-rounding.ll +++ b/llvm/test/CodeGen/PowerPC/f128-rounding.ll @@ -2,7 +2,7 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ -; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -ppc-set-fp128-legal | FileCheck %s \ ; RUN: -check-prefix=CHECK-P8 define void @qp_trunc(fp128* nocapture readonly %a, fp128* nocapture %res) { @@ -22,15 +22,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl truncl ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -61,15 +57,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl rintl ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -100,15 +92,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl nearbyintl ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -139,15 +127,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl roundl ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -178,15 +162,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl floorl ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -217,15 +197,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r6, 8(r3) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r4, r6 ; CHECK-P8-NEXT: bl ceill ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -6,7 +6,7 @@ ; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s \ ; RUN: -check-prefix=P9 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -ppc-set-fp128-legal -mattr=-vsx \ ; RUN: | FileCheck %s -check-prefix=NOVSX declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) @@ -681,6 +681,7 @@ ; NOVSX-NEXT: stdu r1, -32(r1) ; NOVSX-NEXT: .cfi_def_cfa_offset 32 ; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: clrldi r3, r3, 32 ; NOVSX-NEXT: bl __floatsikf ; NOVSX-NEXT: nop ; NOVSX-NEXT: addi r1, r1, 32