diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -121,11 +121,6 @@ static cl::opt UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden); -// TODO - Remove this option if soft fp128 has been fully supported . -static cl::opt - EnableSoftFP128("enable-soft-fp128", - cl::desc("temp option to enable soft fp128"), cl::Hidden); - STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM"); @@ -1114,6 +1109,25 @@ addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } + if (Subtarget.hasAltivec()) { + addRegisterClass(MVT::f128, &PPC::VRRCRegClass); + for (MVT FPT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); + + // Expand the SELECT to SELECT_CC + setOperationAction(ISD::SELECT, MVT::f128, Expand); + + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + + // No implementation for these ops for PowerPC. + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FPOW, MVT::f128, Expand); + setOperationAction(ISD::FPOWI, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, Expand); + } + if (Subtarget.hasP9Vector()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); @@ -1125,15 +1139,12 @@ setOperationAction(ISD::SRL, MVT::v1i128, Legal); setOperationAction(ISD::SRA, MVT::v1i128, Expand); - addRegisterClass(MVT::f128, &PPC::VRRCRegClass); setOperationAction(ISD::FADD, MVT::f128, Legal); setOperationAction(ISD::FSUB, MVT::f128, Legal); setOperationAction(ISD::FDIV, MVT::f128, Legal); setOperationAction(ISD::FMUL, MVT::f128, Legal); setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); - // No extending loads to f128 on PPC. - for (MVT FPT : MVT::fp_valuetypes()) - setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); + setOperationAction(ISD::FMA, MVT::f128, Legal); setCondCodeAction(ISD::SETULT, MVT::f128, Expand); setCondCodeAction(ISD::SETUGT, MVT::f128, Expand); @@ -1149,18 +1160,9 @@ setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal); setOperationAction(ISD::FROUND, MVT::f128, Legal); - setOperationAction(ISD::SELECT, MVT::f128, Expand); setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); - setTruncStoreAction(MVT::f128, MVT::f64, Expand); - setTruncStoreAction(MVT::f128, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i128, Custom); - // No implementation for these ops for PowerPC. - setOperationAction(ISD::FSIN, MVT::f128, Expand); - setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FPOWI, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); // Handle constrained floating-point operations of fp128 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal); @@ -1183,15 +1185,9 @@ setOperationAction(ISD::BSWAP, MVT::v4i32, Legal); setOperationAction(ISD::BSWAP, MVT::v2i64, Legal); setOperationAction(ISD::BSWAP, MVT::v1i128, Legal); - } else if (Subtarget.hasAltivec() && EnableSoftFP128) { - addRegisterClass(MVT::f128, &PPC::VRRCRegClass); - - for (MVT FPT : MVT::fp_valuetypes()) - setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); - + } else if (Subtarget.hasAltivec()) { setOperationAction(ISD::LOAD, MVT::f128, Promote); setOperationAction(ISD::STORE, MVT::f128, Promote); - AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32); AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32); @@ -1199,23 +1195,14 @@ // fp_to_uint and int_to_fp. setOperationAction(ISD::FADD, MVT::f128, LibCall); setOperationAction(ISD::FSUB, MVT::f128, LibCall); - setOperationAction(ISD::FMUL, MVT::f128, Expand); setOperationAction(ISD::FDIV, MVT::f128, Expand); setOperationAction(ISD::FNEG, MVT::f128, Expand); setOperationAction(ISD::FABS, MVT::f128, Expand); - setOperationAction(ISD::FSIN, MVT::f128, Expand); - setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FPOWI, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FMA, MVT::f128, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); - setTruncStoreAction(MVT::f128, MVT::f64, Expand); - setTruncStoreAction(MVT::f128, MVT::f32, Expand); - // Expand the fp_extend if the target type is fp128. setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand); @@ -1226,9 +1213,6 @@ setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); } - // Expand the SELECT to SELECT_CC - setOperationAction(ISD::SELECT, MVT::f128, Expand); - setOperationAction(ISD::SETCC, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSETCC, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, LibCall); @@ -14490,6 +14474,9 @@ (Op1VT == MVT::i32 || Op1VT == MVT::i64 || (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8))); + if (ResVT == MVT::f128 && !Subtarget.hasP9Vector()) + return SDValue(); + if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() || cast(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) return SDValue(); diff --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll --- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll +++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll @@ -30,9 +30,8 @@ ; ; CHECK-P8-LABEL: testArray_01: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: ld r5, 32(r3) -; CHECK-P8-NEXT: ld r4, 40(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r3, r3, 32 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: blr entry: @@ -60,9 +59,9 @@ ; CHECK-P8-LABEL: testArray_02: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r3, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r3) -; CHECK-P8-NEXT: ld r3, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) +; CHECK-P8-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-P8-NEXT: addi r3, r3, 32 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: blr entry: @@ -103,8 +102,7 @@ ; ; CHECK-P8-LABEL: testStruct_02: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: ld r3, 144(r1) -; CHECK-P8-NEXT: ld r4, 152(r1) +; CHECK-P8-NEXT: vmr v2, v9 ; CHECK-P8-NEXT: blr entry: @@ -144,14 +142,13 @@ ; ; CHECK-P8-LABEL: testStruct_03: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: ld r11, 128(r1) -; CHECK-P8-NEXT: ld r12, 136(r1) +; CHECK-P8-NEXT: li r11, 96 +; CHECK-P8-NEXT: addi r12, r1, 32 ; CHECK-P8-NEXT: std r3, 32(r1) ; CHECK-P8-NEXT: std r4, 40(r1) ; CHECK-P8-NEXT: std r5, 48(r1) ; CHECK-P8-NEXT: std r6, 56(r1) -; CHECK-P8-NEXT: mr r3, r11 -; CHECK-P8-NEXT: mr r4, r12 +; CHECK-P8-NEXT: lvx v2, r12, r11 ; CHECK-P8-NEXT: std r7, 64(r1) ; CHECK-P8-NEXT: std r8, 72(r1) ; CHECK-P8-NEXT: std r9, 80(r1) @@ -179,8 +176,7 @@ ; ; CHECK-P8-LABEL: testStruct_04: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mr r4, r10 -; CHECK-P8-NEXT: mr r3, r9 +; CHECK-P8-NEXT: vmr v2, v5 ; CHECK-P8-NEXT: blr entry: @@ -240,8 +236,7 @@ ; ; CHECK-P8-LABEL: testHUnion_03: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mr r4, r6 -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: vmr v2, v3 ; CHECK-P8-NEXT: blr entry: @@ -263,8 +258,7 @@ ; ; CHECK-P8-LABEL: testHUnion_04: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mr r4, r8 -; CHECK-P8-NEXT: mr r3, r7 +; CHECK-P8-NEXT: vmr v2, v4 ; CHECK-P8-NEXT: blr entry: @@ -291,8 +285,10 @@ ; ; CHECK-P8-LABEL: testMixedAggregate: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mr r4, r8 -; CHECK-P8-NEXT: mr r3, r7 +; CHECK-P8-NEXT: addi r3, r1, -16 +; CHECK-P8-NEXT: std r8, -8(r1) +; CHECK-P8-NEXT: std r7, -16(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: blr entry: @@ -315,8 +311,10 @@ ; ; CHECK-P8-LABEL: testMixedAggregate_02: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mr r4, r6 -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r3, r1, -16 +; CHECK-P8-NEXT: std r6, -8(r1) +; CHECK-P8-NEXT: std r5, -16(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: blr entry: @@ -352,44 +350,40 @@ ; CHECK-P8-LABEL: testMixedAggregate_03: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -96(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 96 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r4, 64 +; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: extsw r3, r3 ; CHECK-P8-NEXT: mr r30, r10 -; CHECK-P8-NEXT: mr r29, r6 -; CHECK-P8-NEXT: mr r28, r5 +; CHECK-P8-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-P8-NEXT: addi r4, r1, 48 +; CHECK-P8-NEXT: std r6, 56(r1) +; CHECK-P8-NEXT: std r5, 48(r1) +; CHECK-P8-NEXT: lvx v31, 0, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r28 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r28, r4 +; CHECK-P8-NEXT: vmr v31, v2 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r29 -; CHECK-P8-NEXT: mr r4, r28 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 64 +; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 96 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr entry: @@ -438,16 +432,17 @@ ; ; CHECK-P8-LABEL: testNestedAggregate: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: std r3, 32(r1) -; CHECK-P8-NEXT: std r4, 40(r1) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: mr r4, r8 +; CHECK-P8-NEXT: li r11, 32 ; CHECK-P8-NEXT: std r8, 72(r1) ; CHECK-P8-NEXT: std r7, 64(r1) -; CHECK-P8-NEXT: std r5, 48(r1) -; CHECK-P8-NEXT: std r6, 56(r1) ; CHECK-P8-NEXT: std r9, 80(r1) ; CHECK-P8-NEXT: std r10, 88(r1) +; CHECK-P8-NEXT: addi r7, r1, 32 +; CHECK-P8-NEXT: lvx v2, r7, r11 +; CHECK-P8-NEXT: std r3, 32(r1) +; CHECK-P8-NEXT: std r4, 40(r1) +; CHECK-P8-NEXT: std r5, 48(r1) +; CHECK-P8-NEXT: std r6, 56(r1) ; CHECK-P8-NEXT: blr entry: @@ -470,6 +465,10 @@ ; ; CHECK-P8-LABEL: testUnion_01: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addi r5, r1, -16 +; CHECK-P8-NEXT: std r4, -8(r1) +; CHECK-P8-NEXT: std r3, -16(r1) +; CHECK-P8-NEXT: lvx v2, 0, r5 ; CHECK-P8-NEXT: blr entry: @@ -492,6 +491,10 @@ ; ; CHECK-P8-LABEL: testUnion_02: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addi r5, r1, -16 +; CHECK-P8-NEXT: std r4, -8(r1) +; CHECK-P8-NEXT: std r3, -16(r1) +; CHECK-P8-NEXT: lvx v2, 0, r5 ; CHECK-P8-NEXT: blr entry: @@ -514,8 +517,10 @@ ; ; CHECK-P8-LABEL: testUnion_03: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mr r4, r8 -; CHECK-P8-NEXT: mr r3, r7 +; CHECK-P8-NEXT: addi r3, r1, -16 +; CHECK-P8-NEXT: std r8, -8(r1) +; CHECK-P8-NEXT: std r7, -16(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: blr entry: @@ -577,39 +582,44 @@ ; CHECK-P8-LABEL: sum_float128: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 +; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 ; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: .cfi_offset r30, -16 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -64(r1) +; CHECK-P8-NEXT: addis r11, r2, .LCPI17_0@toc@ha ; CHECK-P8-NEXT: cmpwi r3, 1 -; CHECK-P8-NEXT: std r4, 88(r1) -; CHECK-P8-NEXT: std r5, 96(r1) -; CHECK-P8-NEXT: std r6, 104(r1) -; CHECK-P8-NEXT: std r7, 112(r1) -; CHECK-P8-NEXT: std r8, 120(r1) -; CHECK-P8-NEXT: std r9, 128(r1) -; CHECK-P8-NEXT: std r10, 136(r1) +; CHECK-P8-NEXT: std r4, 104(r1) +; CHECK-P8-NEXT: std r5, 112(r1) +; CHECK-P8-NEXT: std r6, 120(r1) +; CHECK-P8-NEXT: std r7, 128(r1) +; CHECK-P8-NEXT: addi r3, r11, .LCPI17_0@toc@l +; CHECK-P8-NEXT: std r8, 136(r1) +; CHECK-P8-NEXT: std r9, 144(r1) +; CHECK-P8-NEXT: std r10, 152(r1) ; CHECK-P8-NEXT: blt cr0, .LBB17_2 ; CHECK-P8-NEXT: # %bb.1: # %if.end -; CHECK-P8-NEXT: ld r3, 88(r1) -; CHECK-P8-NEXT: ld r4, 96(r1) -; CHECK-P8-NEXT: li r5, 0 -; CHECK-P8-NEXT: li r6, 0 +; CHECK-P8-NEXT: addi r30, r1, 104 +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r30 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: ld r5, 104(r1) -; CHECK-P8-NEXT: ld r6, 112(r1) -; CHECK-P8-NEXT: addi r7, r1, 120 -; CHECK-P8-NEXT: std r7, 40(r1) +; CHECK-P8-NEXT: li r3, 16 +; CHECK-P8-NEXT: lxvd2x vs0, r30, r3 +; CHECK-P8-NEXT: addi r3, r1, 136 +; CHECK-P8-NEXT: std r3, 40(r1) +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: b .LBB17_3 ; CHECK-P8-NEXT: .LBB17_2: -; CHECK-P8-NEXT: li r3, 0 -; CHECK-P8-NEXT: li r4, 0 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: .LBB17_3: # %cleanup -; CHECK-P8-NEXT: addi r1, r1, 48 +; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll --- a/llvm/test/CodeGen/PowerPC/f128-arith.ll +++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll @@ -2,7 +2,7 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ -; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -enable-soft-fp128 | FileCheck %s \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \ ; RUN: -check-prefix=CHECK-P8 ; Function Attrs: norecurse nounwind diff --git a/llvm/test/CodeGen/PowerPC/f128-bitcast.ll b/llvm/test/CodeGen/PowerPC/f128-bitcast.ll --- a/llvm/test/CodeGen/PowerPC/f128-bitcast.ll +++ b/llvm/test/CodeGen/PowerPC/f128-bitcast.ll @@ -22,6 +22,8 @@ ; ; CHECK-P8-LABEL: getPart1: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr entry: %0 = bitcast fp128 %in to i128 @@ -43,7 +45,7 @@ ; ; CHECK-P8-LABEL: getPart2: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mr r3, r4 +; CHECK-P8-NEXT: mfvsrd r3, v2 ; CHECK-P8-NEXT: blr entry: %0 = bitcast fp128 %in to i128 @@ -70,11 +72,8 @@ ; ; CHECK-P8-LABEL: checkBitcast: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mtfprd f0, r3 -; CHECK-P8-NEXT: mtfprd f1, r4 -; CHECK-P8-NEXT: xxmrghd v3, vs1, vs0 -; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: vaddudm v2, v2, v3 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 diff --git a/llvm/test/CodeGen/PowerPC/f128-compare.ll b/llvm/test/CodeGen/PowerPC/f128-compare.ll --- a/llvm/test/CodeGen/PowerPC/f128-compare.ll +++ b/llvm/test/CodeGen/PowerPC/f128-compare.ll @@ -2,7 +2,7 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ -; RUN: -enable-soft-fp128 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \ ; RUN: -check-prefix=CHECK-P8 @a_qp = common global fp128 0xL00000000000000000000000000000000, align 16 diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll --- a/llvm/test/CodeGen/PowerPC/f128-conv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll @@ -4,7 +4,7 @@ ; RUN: | FileCheck %s ; RUN: llc -relocation-model=pic -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-vsr-nums-as-vr -verify-machineinstrs -ppc-asm-full-reg-names < %s \ -; RUN: -enable-soft-fp128 | FileCheck %s -check-prefix=CHECK-P8 +; RUN: | FileCheck %s -check-prefix=CHECK-P8 @mem = global [5 x i64] [i64 56, i64 63, i64 3, i64 5, i64 6], align 8 @umem = global [5 x i64] [i64 560, i64 100, i64 34, i64 2, i64 5], align 8 diff --git a/llvm/test/CodeGen/PowerPC/f128-fma.ll b/llvm/test/CodeGen/PowerPC/f128-fma.ll --- a/llvm/test/CodeGen/PowerPC/f128-fma.ll +++ b/llvm/test/CodeGen/PowerPC/f128-fma.ll @@ -18,40 +18,30 @@ ; CHECK-P8-LABEL: qpFmadd: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r8, 8(r3) -; CHECK-P8-NEXT: ld r9, 0(r4) -; CHECK-P8-NEXT: ld r10, 8(r4) -; CHECK-P8-NEXT: mr r28, r6 -; CHECK-P8-NEXT: ld r30, 0(r5) -; CHECK-P8-NEXT: ld r29, 8(r5) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: mr r4, r8 -; CHECK-P8-NEXT: mr r5, r9 -; CHECK-P8-NEXT: mr r6, r10 +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r7, 48 +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill +; CHECK-P8-NEXT: lvx v31, 0, r5 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r30 -; CHECK-P8-NEXT: mr r6, r29 +; CHECK-P8-NEXT: vmr v3, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r28) -; CHECK-P8-NEXT: std r4, 8(r28) -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr fp128* nocapture readonly %c, fp128* nocapture %res) { @@ -79,40 +69,31 @@ ; CHECK-P8-LABEL: qpFmadd_02: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r7, 0(r5) -; CHECK-P8-NEXT: ld r8, 8(r5) -; CHECK-P8-NEXT: ld r30, 0(r3) -; CHECK-P8-NEXT: ld r29, 8(r3) -; CHECK-P8-NEXT: mr r28, r6 -; CHECK-P8-NEXT: ld r3, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r5, r7 -; CHECK-P8-NEXT: mr r6, r8 +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r7, 48 +; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lvx v3, 0, r5 +; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill +; CHECK-P8-NEXT: lvx v31, 0, r3 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r28) -; CHECK-P8-NEXT: std r4, 8(r28) -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr fp128* nocapture readonly %b, @@ -149,23 +130,16 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 -; CHECK-P8-NEXT: ld r6, 8(r4) ; CHECK-P8-NEXT: mr r29, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: ld r5, 0(r29) -; CHECK-P8-NEXT: ld r6, 8(r29) +; CHECK-P8-NEXT: lvx v3, 0, r29 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -198,43 +172,37 @@ ; CHECK-P8-LABEL: qpFnmadd: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -96(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 96 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r7, 0(r5) -; CHECK-P8-NEXT: ld r8, 8(r5) -; CHECK-P8-NEXT: ld r30, 0(r3) -; CHECK-P8-NEXT: ld r29, 8(r3) -; CHECK-P8-NEXT: mr r28, r6 -; CHECK-P8-NEXT: ld r3, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r5, r7 -; CHECK-P8-NEXT: mr r6, r8 +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r7, 64 +; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lvx v3, 0, r5 +; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill +; CHECK-P8-NEXT: lvx v31, 0, r3 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: li r5, 1 -; CHECK-P8-NEXT: std r3, 0(r28) -; CHECK-P8-NEXT: sldi r5, r5, 63 -; CHECK-P8-NEXT: xor r4, r4, r5 -; CHECK-P8-NEXT: std r4, 8(r28) -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: addi r3, r1, 48 +; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: lbz r4, 63(r1) +; CHECK-P8-NEXT: xori r4, r4, 128 +; CHECK-P8-NEXT: stb r4, 63(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: li r3, 64 +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 96 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr fp128* nocapture readonly %b, @@ -264,35 +232,31 @@ ; CHECK-P8-LABEL: qpFnmadd_02: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 -; CHECK-P8-NEXT: ld r6, 8(r4) ; CHECK-P8-NEXT: mr r29, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: ld r5, 0(r29) -; CHECK-P8-NEXT: ld r6, 8(r29) +; CHECK-P8-NEXT: lvx v3, 0, r29 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: li r5, 1 -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: sldi r5, r5, 63 -; CHECK-P8-NEXT: xor r4, r4, r5 -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: addi r3, r1, 32 +; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: lbz r4, 47(r1) +; CHECK-P8-NEXT: xori r4, r4, 128 +; CHECK-P8-NEXT: stb r4, 47(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload @@ -325,40 +289,31 @@ ; CHECK-P8-LABEL: qpFmsub: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r7, 0(r5) -; CHECK-P8-NEXT: ld r8, 8(r5) -; CHECK-P8-NEXT: ld r30, 0(r3) -; CHECK-P8-NEXT: ld r29, 8(r3) -; CHECK-P8-NEXT: mr r28, r6 -; CHECK-P8-NEXT: ld r3, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r5, r7 -; CHECK-P8-NEXT: mr r6, r8 +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r7, 48 +; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lvx v3, 0, r5 +; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill +; CHECK-P8-NEXT: lvx v31, 0, r3 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r28) -; CHECK-P8-NEXT: std r4, 8(r28) -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr fp128* nocapture readonly %b, @@ -395,23 +350,16 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 -; CHECK-P8-NEXT: ld r6, 8(r4) ; CHECK-P8-NEXT: mr r29, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: ld r5, 0(r29) -; CHECK-P8-NEXT: ld r6, 8(r29) +; CHECK-P8-NEXT: lvx v3, 0, r29 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: std r4, 8(r30) +; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -445,43 +393,37 @@ ; CHECK-P8-LABEL: qpFnmsub: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -96(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 96 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r7, 0(r5) -; CHECK-P8-NEXT: ld r8, 8(r5) -; CHECK-P8-NEXT: ld r30, 0(r3) -; CHECK-P8-NEXT: ld r29, 8(r3) -; CHECK-P8-NEXT: mr r28, r6 -; CHECK-P8-NEXT: ld r3, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r5, r7 -; CHECK-P8-NEXT: mr r6, r8 +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r7, 64 +; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lvx v3, 0, r5 +; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill +; CHECK-P8-NEXT: lvx v31, 0, r3 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: li r5, 1 -; CHECK-P8-NEXT: std r3, 0(r28) -; CHECK-P8-NEXT: sldi r5, r5, 63 -; CHECK-P8-NEXT: xor r4, r4, r5 -; CHECK-P8-NEXT: std r4, 8(r28) -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: addi r3, r1, 48 +; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: lbz r4, 63(r1) +; CHECK-P8-NEXT: xori r4, r4, 128 +; CHECK-P8-NEXT: stb r4, 63(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: li r3, 64 +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 96 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr fp128* nocapture readonly %b, @@ -511,35 +453,31 @@ ; CHECK-P8-LABEL: qpFnmsub_02: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 -; CHECK-P8-NEXT: ld r6, 8(r4) ; CHECK-P8-NEXT: mr r29, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: ld r5, 0(r29) -; CHECK-P8-NEXT: ld r6, 8(r29) +; CHECK-P8-NEXT: lvx v3, 0, r29 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: li r5, 1 -; CHECK-P8-NEXT: std r3, 0(r30) -; CHECK-P8-NEXT: sldi r5, r5, 63 -; CHECK-P8-NEXT: xor r4, r4, r5 -; CHECK-P8-NEXT: std r4, 8(r30) -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: addi r3, r1, 32 +; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: lbz r4, 47(r1) +; CHECK-P8-NEXT: xori r4, r4, 128 +; CHECK-P8-NEXT: stb r4, 47(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll --- a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll +++ b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll @@ -16,10 +16,9 @@ ; ; CHECK-P8-LABEL: loadConstant: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lis r3, 1 -; CHECK-P8-NEXT: ori r3, r3, 5 -; CHECK-P8-NEXT: sldi r4, r3, 46 -; CHECK-P8-NEXT: li r3, 0 +; CHECK-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: blr entry: ret fp128 0xL00000000000000004001400000000000 @@ -45,10 +44,9 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: lis r5, 1 -; CHECK-P8-NEXT: ori r5, r5, 5 -; CHECK-P8-NEXT: sldi r6, r5, 46 -; CHECK-P8-NEXT: li r5, 0 +; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-P8-NEXT: lvx v3, 0, r3 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -136,12 +134,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: sldi r4, r4, 4 -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: add r6, r3, r4 -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: ld r5, -16(r6) -; CHECK-P8-NEXT: ld r6, -8(r6) -; CHECK-P8-NEXT: mr r3, r7 +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: add r4, r3, r4 +; CHECK-P8-NEXT: addi r4, r4, -16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -183,139 +179,115 @@ ; CHECK-P8-LABEL: maxVecParam: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 208 -; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r14, -144 -; CHECK-P8-NEXT: .cfi_offset r15, -136 -; CHECK-P8-NEXT: .cfi_offset r16, -128 -; CHECK-P8-NEXT: .cfi_offset r17, -120 -; CHECK-P8-NEXT: .cfi_offset r18, -112 -; CHECK-P8-NEXT: .cfi_offset r19, -104 -; CHECK-P8-NEXT: .cfi_offset r20, -96 -; CHECK-P8-NEXT: .cfi_offset r21, -88 -; CHECK-P8-NEXT: .cfi_offset r22, -80 -; CHECK-P8-NEXT: .cfi_offset r23, -72 -; CHECK-P8-NEXT: .cfi_offset r24, -64 -; CHECK-P8-NEXT: .cfi_offset r25, -56 -; CHECK-P8-NEXT: .cfi_offset r26, -48 -; CHECK-P8-NEXT: .cfi_offset r27, -40 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 -; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: .cfi_offset r31, -8 -; CHECK-P8-NEXT: std r14, -144(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r15, -136(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r16, -128(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r17, -120(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r18, -112(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r19, -104(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r20, -96(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r21, -88(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r22, -80(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r23, -72(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r24, -64(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r31, -8(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -208(r1) -; CHECK-P8-NEXT: mr r17, r7 -; CHECK-P8-NEXT: ld r7, 432(r1) -; CHECK-P8-NEXT: ld r26, 400(r1) -; CHECK-P8-NEXT: ld r25, 408(r1) -; CHECK-P8-NEXT: ld r24, 384(r1) -; CHECK-P8-NEXT: mr r20, r10 -; CHECK-P8-NEXT: ld r23, 392(r1) -; CHECK-P8-NEXT: ld r22, 368(r1) -; CHECK-P8-NEXT: ld r21, 376(r1) -; CHECK-P8-NEXT: ld r16, 352(r1) -; CHECK-P8-NEXT: mr r19, r9 -; CHECK-P8-NEXT: mr r18, r8 -; CHECK-P8-NEXT: ld r15, 360(r1) -; CHECK-P8-NEXT: ld r14, 336(r1) -; CHECK-P8-NEXT: ld r31, 344(r1) -; CHECK-P8-NEXT: ld r30, 320(r1) -; CHECK-P8-NEXT: std r7, 56(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: ld r7, 440(r1) -; CHECK-P8-NEXT: ld r29, 328(r1) -; CHECK-P8-NEXT: ld r28, 304(r1) -; CHECK-P8-NEXT: ld r27, 312(r1) -; CHECK-P8-NEXT: std r7, 48(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: ld r7, 416(r1) -; CHECK-P8-NEXT: std r7, 40(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: ld r7, 424(r1) -; CHECK-P8-NEXT: std r7, 32(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stdu r1, -224(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 224 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: .cfi_offset v21, -176 +; CHECK-P8-NEXT: .cfi_offset v22, -160 +; CHECK-P8-NEXT: .cfi_offset v23, -144 +; CHECK-P8-NEXT: .cfi_offset v24, -128 +; CHECK-P8-NEXT: .cfi_offset v25, -112 +; CHECK-P8-NEXT: .cfi_offset v26, -96 +; CHECK-P8-NEXT: .cfi_offset v27, -80 +; CHECK-P8-NEXT: .cfi_offset v28, -64 +; CHECK-P8-NEXT: .cfi_offset v29, -48 +; CHECK-P8-NEXT: .cfi_offset v30, -32 +; CHECK-P8-NEXT: .cfi_offset v31, -16 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: stvx v21, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 64 +; CHECK-P8-NEXT: vmr v21, v4 +; CHECK-P8-NEXT: stvx v22, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 80 +; CHECK-P8-NEXT: vmr v22, v5 +; CHECK-P8-NEXT: stvx v23, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 96 +; CHECK-P8-NEXT: vmr v23, v6 +; CHECK-P8-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 112 +; CHECK-P8-NEXT: vmr v24, v7 +; CHECK-P8-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 128 +; CHECK-P8-NEXT: vmr v25, v8 +; CHECK-P8-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 144 +; CHECK-P8-NEXT: vmr v26, v9 +; CHECK-P8-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 160 +; CHECK-P8-NEXT: vmr v27, v10 +; CHECK-P8-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 176 +; CHECK-P8-NEXT: vmr v28, v11 +; CHECK-P8-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 192 +; CHECK-P8-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: li r3, 208 +; CHECK-P8-NEXT: vmr v30, v12 +; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: addi r3, r1, 448 +; CHECK-P8-NEXT: vmr v31, v13 +; CHECK-P8-NEXT: lvx v29, 0, r3 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r17 -; CHECK-P8-NEXT: mr r6, r18 +; CHECK-P8-NEXT: vmr v3, v21 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r19 -; CHECK-P8-NEXT: mr r6, r20 +; CHECK-P8-NEXT: vmr v3, v22 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r28 -; CHECK-P8-NEXT: mr r6, r27 +; CHECK-P8-NEXT: vmr v3, v23 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r30 -; CHECK-P8-NEXT: mr r6, r29 +; CHECK-P8-NEXT: vmr v3, v24 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r14 -; CHECK-P8-NEXT: mr r6, r31 +; CHECK-P8-NEXT: vmr v3, v25 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r16 -; CHECK-P8-NEXT: mr r6, r15 +; CHECK-P8-NEXT: vmr v3, v26 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r22 -; CHECK-P8-NEXT: mr r6, r21 +; CHECK-P8-NEXT: vmr v3, v27 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r24 -; CHECK-P8-NEXT: mr r6, r23 +; CHECK-P8-NEXT: vmr v3, v28 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r26 -; CHECK-P8-NEXT: mr r6, r25 +; CHECK-P8-NEXT: vmr v3, v30 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: ld r5, 40(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r6, 32(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: vmr v3, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: ld r5, 56(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r6, 48(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: vmr v3, v29 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addi r1, r1, 208 +; CHECK-P8-NEXT: li r3, 208 +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 192 +; CHECK-P8-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 176 +; CHECK-P8-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 160 +; CHECK-P8-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 144 +; CHECK-P8-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 128 +; CHECK-P8-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 112 +; CHECK-P8-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 96 +; CHECK-P8-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 80 +; CHECK-P8-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 64 +; CHECK-P8-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 224 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r31, -8(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r25, -56(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r24, -64(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r23, -72(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r22, -80(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r21, -88(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 -; CHECK-P8-NEXT: ld r20, -96(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r19, -104(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r18, -112(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r17, -120(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r16, -128(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r14, -144(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: blr fp128 %p6, fp128 %p7, fp128 %p8, fp128 %p9, fp128 %p10, fp128 %p11, fp128 %p12, fp128 %p13) { @@ -349,37 +321,31 @@ ; CHECK-P8-LABEL: mixParam_01: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r5, r6 -; CHECK-P8-NEXT: mr r6, r7 +; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r28, r4 +; CHECK-P8-NEXT: vmr v31, v2 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r29 -; CHECK-P8-NEXT: mr r4, r28 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr entry: @@ -401,37 +367,31 @@ ; CHECK-P8-LABEL: mixParam_01f: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r5, r6 -; CHECK-P8-NEXT: mr r6, r7 +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: mr r30, r3 +; CHECK-P8-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r28, r4 +; CHECK-P8-NEXT: vmr v31, v2 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r29 -; CHECK-P8-NEXT: mr r4, r28 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr entry: @@ -461,48 +421,41 @@ ; CHECK-P8-LABEL: mixParam_02: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r29, -32 -; CHECK-P8-NEXT: .cfi_offset r30, -24 ; CHECK-P8-NEXT: .cfi_offset f31, -8 -; CHECK-P8-NEXT: std r29, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: mr r11, r4 -; CHECK-P8-NEXT: lwz r4, 160(r1) -; CHECK-P8-NEXT: add r5, r7, r9 +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: add r4, r7, r9 +; CHECK-P8-NEXT: vmr v4, v2 +; CHECK-P8-NEXT: stfd f31, 72(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-P8-NEXT: lwz r3, 176(r1) +; CHECK-P8-NEXT: add r4, r4, r10 ; CHECK-P8-NEXT: fmr f31, f1 -; CHECK-P8-NEXT: add r5, r5, r10 -; CHECK-P8-NEXT: add r4, r5, r4 -; CHECK-P8-NEXT: clrldi r4, r4, 32 -; CHECK-P8-NEXT: std r4, 0(r6) -; CHECK-P8-NEXT: mr r6, r3 -; CHECK-P8-NEXT: ld r5, 0(r8) -; CHECK-P8-NEXT: ld r4, 8(r8) -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r5, r6 -; CHECK-P8-NEXT: mr r6, r11 +; CHECK-P8-NEXT: add r3, r4, r3 +; CHECK-P8-NEXT: clrldi r3, r3, 32 +; CHECK-P8-NEXT: std r3, 0(r6) +; CHECK-P8-NEXT: lvx v3, 0, r8 +; CHECK-P8-NEXT: vmr v2, v3 +; CHECK-P8-NEXT: vmr v3, v4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: fmr f1, f31 -; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: mr r29, r4 +; CHECK-P8-NEXT: vmr v31, v2 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr i16 signext %p4, fp128* nocapture readonly %p5, @@ -541,47 +494,40 @@ ; CHECK-P8-LABEL: mixParam_02f: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r29, -32 -; CHECK-P8-NEXT: .cfi_offset r30, -24 ; CHECK-P8-NEXT: .cfi_offset f31, -8 -; CHECK-P8-NEXT: std r29, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: mr r11, r4 -; CHECK-P8-NEXT: add r4, r6, r8 -; CHECK-P8-NEXT: mr r6, r3 +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: add r4, r4, r6 +; CHECK-P8-NEXT: vmr v4, v2 +; CHECK-P8-NEXT: li r9, 48 +; CHECK-P8-NEXT: stfd f31, 72(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: add r4, r4, r7 +; CHECK-P8-NEXT: stvx v31, r1, r9 # 16-byte Folded Spill ; CHECK-P8-NEXT: fmr f31, f1 -; CHECK-P8-NEXT: add r4, r4, r9 -; CHECK-P8-NEXT: add r4, r4, r10 +; CHECK-P8-NEXT: add r4, r4, r8 ; CHECK-P8-NEXT: clrldi r4, r4, 32 -; CHECK-P8-NEXT: std r4, 0(r5) -; CHECK-P8-NEXT: ld r5, 0(r7) -; CHECK-P8-NEXT: ld r4, 8(r7) -; CHECK-P8-NEXT: mr r3, r5 -; CHECK-P8-NEXT: mr r5, r6 -; CHECK-P8-NEXT: mr r6, r11 +; CHECK-P8-NEXT: std r4, 0(r3) +; CHECK-P8-NEXT: lvx v3, 0, r5 +; CHECK-P8-NEXT: vmr v2, v3 +; CHECK-P8-NEXT: vmr v3, v4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: fmr f1, f31 -; CHECK-P8-NEXT: mr r30, r3 -; CHECK-P8-NEXT: mr r29, r4 +; CHECK-P8-NEXT: vmr v31, v2 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr i16 signext %p4, fp128* nocapture readonly %p5, @@ -620,40 +566,35 @@ ; CHECK-P8-LABEL: mixParam_03: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: ld r6, 168(r1) -; CHECK-P8-NEXT: std r4, 8(r9) -; CHECK-P8-NEXT: std r3, 0(r9) +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: ld r4, 184(r1) +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: stvx v2, 0, r9 +; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: mr r3, r10 -; CHECK-P8-NEXT: mr r28, r5 -; CHECK-P8-NEXT: stvx v2, 0, r6 -; CHECK-P8-NEXT: ld r30, 0(r9) -; CHECK-P8-NEXT: ld r29, 8(r9) +; CHECK-P8-NEXT: stvx v3, 0, r4 +; CHECK-P8-NEXT: lvx v31, 0, r9 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stfdx f1, 0, r28 -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: stfdx f1, 0, r30 +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr fp128* nocapture %f2, i32 signext %i1, i8 zeroext %c1, @@ -686,39 +627,34 @@ ; CHECK-P8-LABEL: mixParam_03f: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -80(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r28, -32 -; CHECK-P8-NEXT: .cfi_offset r29, -24 ; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: std r4, 8(r6) -; CHECK-P8-NEXT: std r3, 0(r6) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: mr r28, r5 -; CHECK-P8-NEXT: stvx v2, 0, r9 -; CHECK-P8-NEXT: ld r30, 0(r6) -; CHECK-P8-NEXT: ld r29, 8(r6) +; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: li r6, 48 +; CHECK-P8-NEXT: stvx v2, 0, r4 +; CHECK-P8-NEXT: stvx v3, 0, r7 +; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: mr r30, r3 +; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: stvx v31, r1, r6 # 16-byte Folded Spill +; CHECK-P8-NEXT: lvx v31, 0, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: mr r5, r3 -; CHECK-P8-NEXT: mr r6, r4 -; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: mr r4, r29 +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stfdx f1, 0, r28 -; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: li r3, 48 +; CHECK-P8-NEXT: stfdx f1, 0, r30 +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr fp128* nocapture %f2, i32 signext %i1, i8 zeroext %c1, @@ -757,15 +693,15 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 ; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -32(r1) -; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: stdu r1, -96(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 96 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: bl in ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl out ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: li r3, 0 -; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: addi r1, r1, 96 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/f128-rounding.ll b/llvm/test/CodeGen/PowerPC/f128-rounding.ll --- a/llvm/test/CodeGen/PowerPC/f128-rounding.ll +++ b/llvm/test/CodeGen/PowerPC/f128-rounding.ll @@ -2,7 +2,7 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ -; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -enable-soft-fp128 | FileCheck %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ ; RUN: -check-prefix=CHECK-P8 define void @qp_trunc(fp128* nocapture readonly %a, fp128* nocapture %res) { diff --git a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll --- a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll @@ -28,9 +28,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -67,9 +65,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixkfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: std r3, 0(r30) @@ -109,12 +106,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r6, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: ld r5, 16(r6) -; CHECK-P8-NEXT: ld r6, 24(r6) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: addi r4, r4, 16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfdi @@ -154,14 +149,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfdi @@ -210,9 +200,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixkfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r4, r30, 3 @@ -251,9 +240,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixunskfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -290,9 +277,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixunskfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: std r3, 0(r30) @@ -332,12 +318,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r6, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: ld r5, 16(r6) -; CHECK-P8-NEXT: ld r6, 24(r6) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: addi r4, r4, 16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixunskfdi @@ -377,14 +361,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixunskfdi @@ -433,9 +412,7 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 0(r4) -; CHECK-P8-NEXT: ld r4, 8(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixunskfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r4, r30, 3 @@ -475,9 +452,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -515,9 +490,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stw r3, 0(r30) @@ -558,12 +532,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r6, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: ld r5, 16(r6) -; CHECK-P8-NEXT: ld r6, 24(r6) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: addi r4, r4, 16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -604,14 +576,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -650,9 +617,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixunskfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -689,9 +654,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixunskfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stw r3, 0(r30) @@ -732,12 +696,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r6, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: ld r5, 16(r6) -; CHECK-P8-NEXT: ld r6, 24(r6) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: addi r4, r4, 16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixunskfsi @@ -777,14 +739,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixunskfsi @@ -824,9 +781,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -863,9 +818,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sth r3, 0(r30) @@ -905,12 +859,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r6, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: ld r5, 16(r6) -; CHECK-P8-NEXT: ld r6, 24(r6) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: addi r4, r4, 16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -950,14 +902,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -995,9 +942,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1033,9 +978,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sth r3, 0(r30) @@ -1075,12 +1019,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r6, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: ld r5, 16(r6) -; CHECK-P8-NEXT: ld r6, 24(r6) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: addi r4, r4, 16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1119,14 +1061,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1164,9 +1101,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -1203,9 +1138,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stb r3, 0(r30) @@ -1245,12 +1179,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r6, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: ld r5, 16(r6) -; CHECK-P8-NEXT: ld r6, 24(r6) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: addi r4, r4, 16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1290,14 +1222,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1335,9 +1262,7 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: ld r5, 0(r3) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1373,9 +1298,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r5, 32(r4) -; CHECK-P8-NEXT: ld r4, 40(r4) -; CHECK-P8-NEXT: mr r3, r5 +; CHECK-P8-NEXT: addi r4, r4, 32 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stb r3, 0(r30) @@ -1415,12 +1339,10 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r7, 0(r3) -; CHECK-P8-NEXT: ld r6, .LC0@toc@l(r4) -; CHECK-P8-NEXT: ld r4, 8(r3) -; CHECK-P8-NEXT: mr r3, r7 -; CHECK-P8-NEXT: ld r5, 16(r6) -; CHECK-P8-NEXT: ld r6, 24(r6) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: addi r4, r4, 16 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1459,14 +1381,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: ld r9, 0(r3) -; CHECK-P8-NEXT: ld r7, 8(r3) -; CHECK-P8-NEXT: ld r8, 0(r4) -; CHECK-P8-NEXT: ld r6, 8(r4) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 -; CHECK-P8-NEXT: mr r3, r9 -; CHECK-P8-NEXT: mr r4, r7 -; CHECK-P8-NEXT: mr r5, r8 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -6,7 +6,7 @@ ; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s \ ; RUN: -check-prefix=P9 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -enable-soft-fp128 -mattr=-vsx \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx \ ; RUN: | FileCheck %s -check-prefix=NOVSX declare i1 @llvm.experimental.constrained.fptosi.i1.f128(fp128, metadata) @@ -183,7 +183,7 @@ ; P8-NEXT: stdu r1, -112(r1) ; P8-NEXT: .cfi_def_cfa_offset 112 ; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __fixunskfsi +; P8-NEXT: bl __fixkfsi ; P8-NEXT: nop ; P8-NEXT: addi r1, r1, 112 ; P8-NEXT: ld r0, 16(r1) @@ -786,7 +786,7 @@ ; P8-NEXT: stdu r1, -112(r1) ; P8-NEXT: .cfi_def_cfa_offset 112 ; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __floatunsikf +; P8-NEXT: bl __floatsikf ; P8-NEXT: nop ; P8-NEXT: addi r1, r1, 112 ; P8-NEXT: ld r0, 16(r1) diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll @@ -154,15 +154,18 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: mflr r0 ; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -32(r1) -; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: li r9, 1 -; CHECK-P8-NEXT: sldi r9, r9, 63 -; CHECK-P8-NEXT: xor r8, r8, r9 +; CHECK-P8-NEXT: addi r3, r1, 32 +; CHECK-P8-NEXT: stvx v4, 0, r3 +; CHECK-P8-NEXT: lbz r4, 47(r1) +; CHECK-P8-NEXT: xori r4, r4, 128 +; CHECK-P8-NEXT: stb r4, 47(r1) +; CHECK-P8-NEXT: lvx v4, 0, r3 ; CHECK-P8-NEXT: bl fmal ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr @@ -185,15 +188,18 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: mflr r0 ; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -32(r1) -; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: bl fmal ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: li r5, 1 -; CHECK-P8-NEXT: sldi r5, r5, 63 -; CHECK-P8-NEXT: xor r4, r4, r5 -; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: addi r3, r1, 32 +; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: lbz r4, 47(r1) +; CHECK-P8-NEXT: xori r4, r4, 128 +; CHECK-P8-NEXT: stb r4, 47(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr @@ -215,21 +221,26 @@ ; CHECK-P8-LABEL: fnmsub_f128: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 -; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: .cfi_offset r30, -16 -; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) -; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: li r9, 1 -; CHECK-P8-NEXT: sldi r30, r9, 63 -; CHECK-P8-NEXT: xor r8, r8, r30 +; CHECK-P8-NEXT: stdu r1, -64(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: addi r3, r1, 32 +; CHECK-P8-NEXT: stvx v4, 0, r3 +; CHECK-P8-NEXT: lbz r4, 47(r1) +; CHECK-P8-NEXT: xori r4, r4, 128 +; CHECK-P8-NEXT: stb r4, 47(r1) +; CHECK-P8-NEXT: lvx v4, 0, r3 ; CHECK-P8-NEXT: bl fmal ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: xor r4, r4, r30 -; CHECK-P8-NEXT: addi r1, r1, 48 +; CHECK-P8-NEXT: addi r3, r1, 48 +; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: lbz r4, 63(r1) +; CHECK-P8-NEXT: xori r4, r4, 128 +; CHECK-P8-NEXT: stb r4, 63(r1) +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) -; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr %neg = fneg fp128 %f2 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll @@ -1701,36 +1701,32 @@ ; P8: # %bb.0: ; P8-NEXT: mflr r0 ; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -160(r1) -; P8-NEXT: std r26, 112(r1) # 8-byte Folded Spill -; P8-NEXT: std r27, 120(r1) # 8-byte Folded Spill -; P8-NEXT: std r28, 128(r1) # 8-byte Folded Spill -; P8-NEXT: std r29, 136(r1) # 8-byte Folded Spill -; P8-NEXT: mr r29, r5 -; P8-NEXT: mr r28, r4 -; P8-NEXT: mr r27, r3 -; P8-NEXT: std r30, 144(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r6 +; P8-NEXT: stdu r1, -176(r1) +; P8-NEXT: li r3, 128 +; P8-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 144 +; P8-NEXT: vmr v30, v2 +; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v3 ; P8-NEXT: bl __unordkf2 ; P8-NEXT: nop +; P8-NEXT: vmr v2, v30 ; P8-NEXT: cntlzw r3, r3 -; P8-NEXT: mr r4, r28 -; P8-NEXT: mr r5, r29 -; P8-NEXT: mr r6, r30 -; P8-NEXT: srwi r26, r3, 5 -; P8-NEXT: mr r3, r27 +; P8-NEXT: vmr v3, v31 +; P8-NEXT: srwi r30, r3, 5 ; P8-NEXT: bl __eqkf2 ; P8-NEXT: nop ; P8-NEXT: cntlzw r3, r3 -; P8-NEXT: ld r30, 144(r1) # 8-byte Folded Reload -; P8-NEXT: ld r29, 136(r1) # 8-byte Folded Reload -; P8-NEXT: ld r28, 128(r1) # 8-byte Folded Reload -; P8-NEXT: ld r27, 120(r1) # 8-byte Folded Reload +; P8-NEXT: li r4, 144 ; P8-NEXT: srwi r3, r3, 5 +; P8-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; P8-NEXT: li r4, 128 ; P8-NEXT: xori r3, r3, 1 -; P8-NEXT: and r3, r26, r3 -; P8-NEXT: ld r26, 112(r1) # 8-byte Folded Reload -; P8-NEXT: addi r1, r1, 160 +; P8-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; P8-NEXT: and r3, r30, r3 +; P8-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; P8-NEXT: addi r1, r1, 176 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 ; P8-NEXT: blr @@ -1749,38 +1745,34 @@ ; NOVSX-LABEL: fcmp_one_f128: ; NOVSX: # %bb.0: ; NOVSX-NEXT: mflr r0 -; NOVSX-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -80(r1) -; NOVSX-NEXT: mr r30, r6 -; NOVSX-NEXT: mr r29, r5 -; NOVSX-NEXT: mr r28, r4 -; NOVSX-NEXT: mr r27, r3 +; NOVSX-NEXT: stdu r1, -96(r1) +; NOVSX-NEXT: li r3, 48 +; NOVSX-NEXT: std r30, 80(r1) # 8-byte Folded Spill +; NOVSX-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; NOVSX-NEXT: li r3, 64 +; NOVSX-NEXT: vmr v30, v2 +; NOVSX-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; NOVSX-NEXT: vmr v31, v3 ; NOVSX-NEXT: bl __unordkf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: vmr v2, v30 ; NOVSX-NEXT: cntlzw r3, r3 -; NOVSX-NEXT: mr r4, r28 -; NOVSX-NEXT: mr r5, r29 -; NOVSX-NEXT: mr r6, r30 -; NOVSX-NEXT: srwi r26, r3, 5 -; NOVSX-NEXT: mr r3, r27 +; NOVSX-NEXT: vmr v3, v31 +; NOVSX-NEXT: srwi r30, r3, 5 ; NOVSX-NEXT: bl __eqkf2 ; NOVSX-NEXT: nop ; NOVSX-NEXT: cntlzw r3, r3 +; NOVSX-NEXT: li r4, 64 ; NOVSX-NEXT: srwi r3, r3, 5 +; NOVSX-NEXT: lvx v31, r1, r4 # 16-byte Folded Reload +; NOVSX-NEXT: li r4, 48 ; NOVSX-NEXT: xori r3, r3, 1 -; NOVSX-NEXT: and r3, r26, r3 -; NOVSX-NEXT: addi r1, r1, 80 +; NOVSX-NEXT: lvx v30, r1, r4 # 16-byte Folded Reload +; NOVSX-NEXT: and r3, r30, r3 +; NOVSX-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; NOVSX-NEXT: addi r1, r1, 96 ; NOVSX-NEXT: ld r0, 16(r1) -; NOVSX-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; NOVSX-NEXT: mtlr r0 ; NOVSX-NEXT: blr %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"one", metadata !"fpexcept.strict") #0 @@ -1959,36 +1951,32 @@ ; P8: # %bb.0: ; P8-NEXT: mflr r0 ; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -160(r1) -; P8-NEXT: std r26, 112(r1) # 8-byte Folded Spill -; P8-NEXT: std r27, 120(r1) # 8-byte Folded Spill -; P8-NEXT: std r28, 128(r1) # 8-byte Folded Spill -; P8-NEXT: std r29, 136(r1) # 8-byte Folded Spill -; P8-NEXT: mr r29, r5 -; P8-NEXT: mr r28, r4 -; P8-NEXT: mr r27, r3 -; P8-NEXT: std r30, 144(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r6 +; P8-NEXT: stdu r1, -176(r1) +; P8-NEXT: li r3, 128 +; P8-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 144 +; P8-NEXT: vmr v30, v2 +; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v3 ; P8-NEXT: bl __eqkf2 ; P8-NEXT: nop +; P8-NEXT: vmr v2, v30 ; P8-NEXT: cntlzw r3, r3 -; P8-NEXT: mr r4, r28 -; P8-NEXT: mr r5, r29 -; P8-NEXT: mr r6, r30 -; P8-NEXT: srwi r26, r3, 5 -; P8-NEXT: mr r3, r27 +; P8-NEXT: vmr v3, v31 +; P8-NEXT: srwi r30, r3, 5 ; P8-NEXT: bl __unordkf2 ; P8-NEXT: nop ; P8-NEXT: cntlzw r3, r3 -; P8-NEXT: ld r30, 144(r1) # 8-byte Folded Reload -; P8-NEXT: ld r29, 136(r1) # 8-byte Folded Reload -; P8-NEXT: ld r28, 128(r1) # 8-byte Folded Reload -; P8-NEXT: ld r27, 120(r1) # 8-byte Folded Reload +; P8-NEXT: li r4, 144 ; P8-NEXT: srwi r3, r3, 5 +; P8-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; P8-NEXT: li r4, 128 ; P8-NEXT: xori r3, r3, 1 -; P8-NEXT: or r3, r3, r26 -; P8-NEXT: ld r26, 112(r1) # 8-byte Folded Reload -; P8-NEXT: addi r1, r1, 160 +; P8-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; P8-NEXT: or r3, r3, r30 +; P8-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; P8-NEXT: addi r1, r1, 176 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 ; P8-NEXT: blr @@ -2004,38 +1992,34 @@ ; NOVSX-LABEL: fcmp_ueq_f128: ; NOVSX: # %bb.0: ; NOVSX-NEXT: mflr r0 -; NOVSX-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -80(r1) -; NOVSX-NEXT: mr r30, r6 -; NOVSX-NEXT: mr r29, r5 -; NOVSX-NEXT: mr r28, r4 -; NOVSX-NEXT: mr r27, r3 +; NOVSX-NEXT: stdu r1, -96(r1) +; NOVSX-NEXT: li r3, 48 +; NOVSX-NEXT: std r30, 80(r1) # 8-byte Folded Spill +; NOVSX-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; NOVSX-NEXT: li r3, 64 +; NOVSX-NEXT: vmr v30, v2 +; NOVSX-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; NOVSX-NEXT: vmr v31, v3 ; NOVSX-NEXT: bl __eqkf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: vmr v2, v30 ; NOVSX-NEXT: cntlzw r3, r3 -; NOVSX-NEXT: mr r4, r28 -; NOVSX-NEXT: mr r5, r29 -; NOVSX-NEXT: mr r6, r30 -; NOVSX-NEXT: srwi r26, r3, 5 -; NOVSX-NEXT: mr r3, r27 +; NOVSX-NEXT: vmr v3, v31 +; NOVSX-NEXT: srwi r30, r3, 5 ; NOVSX-NEXT: bl __unordkf2 ; NOVSX-NEXT: nop ; NOVSX-NEXT: cntlzw r3, r3 +; NOVSX-NEXT: li r4, 64 ; NOVSX-NEXT: srwi r3, r3, 5 +; NOVSX-NEXT: lvx v31, r1, r4 # 16-byte Folded Reload +; NOVSX-NEXT: li r4, 48 ; NOVSX-NEXT: xori r3, r3, 1 -; NOVSX-NEXT: or r3, r3, r26 -; NOVSX-NEXT: addi r1, r1, 80 +; NOVSX-NEXT: lvx v30, r1, r4 # 16-byte Folded Reload +; NOVSX-NEXT: or r3, r3, r30 +; NOVSX-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; NOVSX-NEXT: addi r1, r1, 96 ; NOVSX-NEXT: ld r0, 16(r1) -; NOVSX-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; NOVSX-NEXT: mtlr r0 ; NOVSX-NEXT: blr %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ueq", metadata !"fpexcept.strict") #0 @@ -2305,36 +2289,32 @@ ; P8: # %bb.0: ; P8-NEXT: mflr r0 ; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -160(r1) -; P8-NEXT: std r26, 112(r1) # 8-byte Folded Spill -; P8-NEXT: std r27, 120(r1) # 8-byte Folded Spill -; P8-NEXT: std r28, 128(r1) # 8-byte Folded Spill -; P8-NEXT: std r29, 136(r1) # 8-byte Folded Spill -; P8-NEXT: mr r29, r5 -; P8-NEXT: mr r28, r4 -; P8-NEXT: mr r27, r3 -; P8-NEXT: std r30, 144(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r6 +; P8-NEXT: stdu r1, -176(r1) +; P8-NEXT: li r3, 128 +; P8-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 144 +; P8-NEXT: vmr v30, v2 +; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v3 ; P8-NEXT: bl __unordkf2 ; P8-NEXT: nop +; P8-NEXT: vmr v2, v30 ; P8-NEXT: cntlzw r3, r3 -; P8-NEXT: mr r4, r28 -; P8-NEXT: mr r5, r29 -; P8-NEXT: mr r6, r30 -; P8-NEXT: srwi r26, r3, 5 -; P8-NEXT: mr r3, r27 +; P8-NEXT: vmr v3, v31 +; P8-NEXT: srwi r30, r3, 5 ; P8-NEXT: bl __eqkf2 ; P8-NEXT: nop ; P8-NEXT: cntlzw r3, r3 -; P8-NEXT: ld r30, 144(r1) # 8-byte Folded Reload -; P8-NEXT: ld r29, 136(r1) # 8-byte Folded Reload -; P8-NEXT: ld r28, 128(r1) # 8-byte Folded Reload -; P8-NEXT: ld r27, 120(r1) # 8-byte Folded Reload +; P8-NEXT: li r4, 144 ; P8-NEXT: srwi r3, r3, 5 +; P8-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; P8-NEXT: li r4, 128 ; P8-NEXT: xori r3, r3, 1 -; P8-NEXT: and r3, r26, r3 -; P8-NEXT: ld r26, 112(r1) # 8-byte Folded Reload -; P8-NEXT: addi r1, r1, 160 +; P8-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; P8-NEXT: and r3, r30, r3 +; P8-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; P8-NEXT: addi r1, r1, 176 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 ; P8-NEXT: blr @@ -2353,38 +2333,34 @@ ; NOVSX-LABEL: fcmps_one_f128: ; NOVSX: # %bb.0: ; NOVSX-NEXT: mflr r0 -; NOVSX-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -80(r1) -; NOVSX-NEXT: mr r30, r6 -; NOVSX-NEXT: mr r29, r5 -; NOVSX-NEXT: mr r28, r4 -; NOVSX-NEXT: mr r27, r3 +; NOVSX-NEXT: stdu r1, -96(r1) +; NOVSX-NEXT: li r3, 48 +; NOVSX-NEXT: std r30, 80(r1) # 8-byte Folded Spill +; NOVSX-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; NOVSX-NEXT: li r3, 64 +; NOVSX-NEXT: vmr v30, v2 +; NOVSX-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; NOVSX-NEXT: vmr v31, v3 ; NOVSX-NEXT: bl __unordkf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: vmr v2, v30 ; NOVSX-NEXT: cntlzw r3, r3 -; NOVSX-NEXT: mr r4, r28 -; NOVSX-NEXT: mr r5, r29 -; NOVSX-NEXT: mr r6, r30 -; NOVSX-NEXT: srwi r26, r3, 5 -; NOVSX-NEXT: mr r3, r27 +; NOVSX-NEXT: vmr v3, v31 +; NOVSX-NEXT: srwi r30, r3, 5 ; NOVSX-NEXT: bl __eqkf2 ; NOVSX-NEXT: nop ; NOVSX-NEXT: cntlzw r3, r3 +; NOVSX-NEXT: li r4, 64 ; NOVSX-NEXT: srwi r3, r3, 5 +; NOVSX-NEXT: lvx v31, r1, r4 # 16-byte Folded Reload +; NOVSX-NEXT: li r4, 48 ; NOVSX-NEXT: xori r3, r3, 1 -; NOVSX-NEXT: and r3, r26, r3 -; NOVSX-NEXT: addi r1, r1, 80 +; NOVSX-NEXT: lvx v30, r1, r4 # 16-byte Folded Reload +; NOVSX-NEXT: and r3, r30, r3 +; NOVSX-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; NOVSX-NEXT: addi r1, r1, 96 ; NOVSX-NEXT: ld r0, 16(r1) -; NOVSX-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; NOVSX-NEXT: mtlr r0 ; NOVSX-NEXT: blr %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"one", metadata !"fpexcept.strict") #0 @@ -2563,36 +2539,32 @@ ; P8: # %bb.0: ; P8-NEXT: mflr r0 ; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -160(r1) -; P8-NEXT: std r26, 112(r1) # 8-byte Folded Spill -; P8-NEXT: std r27, 120(r1) # 8-byte Folded Spill -; P8-NEXT: std r28, 128(r1) # 8-byte Folded Spill -; P8-NEXT: std r29, 136(r1) # 8-byte Folded Spill -; P8-NEXT: mr r29, r5 -; P8-NEXT: mr r28, r4 -; P8-NEXT: mr r27, r3 -; P8-NEXT: std r30, 144(r1) # 8-byte Folded Spill -; P8-NEXT: mr r30, r6 +; P8-NEXT: stdu r1, -176(r1) +; P8-NEXT: li r3, 128 +; P8-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 144 +; P8-NEXT: vmr v30, v2 +; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v3 ; P8-NEXT: bl __eqkf2 ; P8-NEXT: nop +; P8-NEXT: vmr v2, v30 ; P8-NEXT: cntlzw r3, r3 -; P8-NEXT: mr r4, r28 -; P8-NEXT: mr r5, r29 -; P8-NEXT: mr r6, r30 -; P8-NEXT: srwi r26, r3, 5 -; P8-NEXT: mr r3, r27 +; P8-NEXT: vmr v3, v31 +; P8-NEXT: srwi r30, r3, 5 ; P8-NEXT: bl __unordkf2 ; P8-NEXT: nop ; P8-NEXT: cntlzw r3, r3 -; P8-NEXT: ld r30, 144(r1) # 8-byte Folded Reload -; P8-NEXT: ld r29, 136(r1) # 8-byte Folded Reload -; P8-NEXT: ld r28, 128(r1) # 8-byte Folded Reload -; P8-NEXT: ld r27, 120(r1) # 8-byte Folded Reload +; P8-NEXT: li r4, 144 ; P8-NEXT: srwi r3, r3, 5 +; P8-NEXT: lxvd2x v31, r1, r4 # 16-byte Folded Reload +; P8-NEXT: li r4, 128 ; P8-NEXT: xori r3, r3, 1 -; P8-NEXT: or r3, r3, r26 -; P8-NEXT: ld r26, 112(r1) # 8-byte Folded Reload -; P8-NEXT: addi r1, r1, 160 +; P8-NEXT: lxvd2x v30, r1, r4 # 16-byte Folded Reload +; P8-NEXT: or r3, r3, r30 +; P8-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; P8-NEXT: addi r1, r1, 176 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 ; P8-NEXT: blr @@ -2608,38 +2580,34 @@ ; NOVSX-LABEL: fcmps_ueq_f128: ; NOVSX: # %bb.0: ; NOVSX-NEXT: mflr r0 -; NOVSX-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; NOVSX-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -80(r1) -; NOVSX-NEXT: mr r30, r6 -; NOVSX-NEXT: mr r29, r5 -; NOVSX-NEXT: mr r28, r4 -; NOVSX-NEXT: mr r27, r3 +; NOVSX-NEXT: stdu r1, -96(r1) +; NOVSX-NEXT: li r3, 48 +; NOVSX-NEXT: std r30, 80(r1) # 8-byte Folded Spill +; NOVSX-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; NOVSX-NEXT: li r3, 64 +; NOVSX-NEXT: vmr v30, v2 +; NOVSX-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; NOVSX-NEXT: vmr v31, v3 ; NOVSX-NEXT: bl __eqkf2 ; NOVSX-NEXT: nop +; NOVSX-NEXT: vmr v2, v30 ; NOVSX-NEXT: cntlzw r3, r3 -; NOVSX-NEXT: mr r4, r28 -; NOVSX-NEXT: mr r5, r29 -; NOVSX-NEXT: mr r6, r30 -; NOVSX-NEXT: srwi r26, r3, 5 -; NOVSX-NEXT: mr r3, r27 +; NOVSX-NEXT: vmr v3, v31 +; NOVSX-NEXT: srwi r30, r3, 5 ; NOVSX-NEXT: bl __unordkf2 ; NOVSX-NEXT: nop ; NOVSX-NEXT: cntlzw r3, r3 +; NOVSX-NEXT: li r4, 64 ; NOVSX-NEXT: srwi r3, r3, 5 +; NOVSX-NEXT: lvx v31, r1, r4 # 16-byte Folded Reload +; NOVSX-NEXT: li r4, 48 ; NOVSX-NEXT: xori r3, r3, 1 -; NOVSX-NEXT: or r3, r3, r26 -; NOVSX-NEXT: addi r1, r1, 80 +; NOVSX-NEXT: lvx v30, r1, r4 # 16-byte Folded Reload +; NOVSX-NEXT: or r3, r3, r30 +; NOVSX-NEXT: ld r30, 80(r1) # 8-byte Folded Reload +; NOVSX-NEXT: addi r1, r1, 96 ; NOVSX-NEXT: ld r0, 16(r1) -; NOVSX-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; NOVSX-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; NOVSX-NEXT: mtlr r0 ; NOVSX-NEXT: blr %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ueq", metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/PowerPC/store_fptoi.ll b/llvm/test/CodeGen/PowerPC/store_fptoi.ll --- a/llvm/test/CodeGen/PowerPC/store_fptoi.ll +++ b/llvm/test/CodeGen/PowerPC/store_fptoi.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -ppc-vsr-nums-as-vr \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -ppc-vsr-nums-as-vr \ @@ -9,254 +10,365 @@ ; Function Attrs: norecurse nounwind define void @qpConv2sdw(fp128* nocapture readonly %a, i64* nocapture %b) { +; CHECK-LABEL: qpConv2sdw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv 2, 0(3) +; CHECK-NEXT: xscvqpsdz 2, 2 +; CHECK-NEXT: stxsd 2, 0(4) +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: qpConv2sdw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: mflr 0 +; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 48 +; CHECK-PWR8-NEXT: .cfi_offset lr, 16 +; CHECK-PWR8-NEXT: .cfi_offset r30, -16 +; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std 0, 16(1) +; CHECK-PWR8-NEXT: stdu 1, -48(1) +; CHECK-PWR8-NEXT: lvx 2, 0, 3 +; CHECK-PWR8-NEXT: mr 30, 4 +; CHECK-PWR8-NEXT: bl __fixkfdi +; CHECK-PWR8-NEXT: nop +; CHECK-PWR8-NEXT: std 3, 0(30) +; CHECK-PWR8-NEXT: addi 1, 1, 48 +; CHECK-PWR8-NEXT: ld 0, 16(1) +; CHECK-PWR8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: mtlr 0 +; CHECK-PWR8-NEXT: blr entry: %0 = load fp128, fp128* %a, align 16 %conv = fptosi fp128 %0 to i64 store i64 %conv, i64* %b, align 8 ret void -; CHECK-LABEL: qpConv2sdw -; CHECK: lxv [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvqpsdz [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: qpConv2sdw -; CHECK-PWR8: bl __fixkfdi -; CHECK-PWR8: blr } ; Function Attrs: norecurse nounwind define void @qpConv2sw(fp128* nocapture readonly %a, i32* nocapture %b) { +; CHECK-LABEL: qpConv2sw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv 2, 0(3) +; CHECK-NEXT: xscvqpswz 2, 2 +; CHECK-NEXT: stxsiwx 2, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: qpConv2sw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: mflr 0 +; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 48 +; CHECK-PWR8-NEXT: .cfi_offset lr, 16 +; CHECK-PWR8-NEXT: .cfi_offset r30, -16 +; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std 0, 16(1) +; CHECK-PWR8-NEXT: stdu 1, -48(1) +; CHECK-PWR8-NEXT: lvx 2, 0, 3 +; CHECK-PWR8-NEXT: mr 30, 4 +; CHECK-PWR8-NEXT: bl __fixkfsi +; CHECK-PWR8-NEXT: nop +; CHECK-PWR8-NEXT: stw 3, 0(30) +; CHECK-PWR8-NEXT: addi 1, 1, 48 +; CHECK-PWR8-NEXT: ld 0, 16(1) +; CHECK-PWR8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: mtlr 0 +; CHECK-PWR8-NEXT: blr entry: %0 = load fp128, fp128* %a, align 16 %conv = fptosi fp128 %0 to i32 store i32 %conv, i32* %b, align 4 ret void -; CHECK-LABEL: qpConv2sw -; CHECK: lxv [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvqpswz [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsiwx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: qpConv2sw -; CHECK-PWR8: bl __fixkfsi -; CHECK-PWR8: blr } ; Function Attrs: norecurse nounwind define void @qpConv2udw(fp128* nocapture readonly %a, i64* nocapture %b) { +; CHECK-LABEL: qpConv2udw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv 2, 0(3) +; CHECK-NEXT: xscvqpudz 2, 2 +; CHECK-NEXT: stxsd 2, 0(4) +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: qpConv2udw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: mflr 0 +; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 48 +; CHECK-PWR8-NEXT: .cfi_offset lr, 16 +; CHECK-PWR8-NEXT: .cfi_offset r30, -16 +; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std 0, 16(1) +; CHECK-PWR8-NEXT: stdu 1, -48(1) +; CHECK-PWR8-NEXT: lvx 2, 0, 3 +; CHECK-PWR8-NEXT: mr 30, 4 +; CHECK-PWR8-NEXT: bl __fixunskfdi +; CHECK-PWR8-NEXT: nop +; CHECK-PWR8-NEXT: std 3, 0(30) +; CHECK-PWR8-NEXT: addi 1, 1, 48 +; CHECK-PWR8-NEXT: ld 0, 16(1) +; CHECK-PWR8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: mtlr 0 +; CHECK-PWR8-NEXT: blr entry: %0 = load fp128, fp128* %a, align 16 %conv = fptoui fp128 %0 to i64 store i64 %conv, i64* %b, align 8 ret void -; CHECK-LABEL: qpConv2udw -; CHECK: lxv [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvqpudz [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: qpConv2udw -; CHECK-PWR8: bl __fixunskfdi -; CHECK-PWR8: blr } ; Function Attrs: norecurse nounwind define void @qpConv2uw(fp128* nocapture readonly %a, i32* nocapture %b) { +; CHECK-LABEL: qpConv2uw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv 2, 0(3) +; CHECK-NEXT: xscvqpuwz 2, 2 +; CHECK-NEXT: stxsiwx 2, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: qpConv2uw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: mflr 0 +; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 48 +; CHECK-PWR8-NEXT: .cfi_offset lr, 16 +; CHECK-PWR8-NEXT: .cfi_offset r30, -16 +; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std 0, 16(1) +; CHECK-PWR8-NEXT: stdu 1, -48(1) +; CHECK-PWR8-NEXT: lvx 2, 0, 3 +; CHECK-PWR8-NEXT: mr 30, 4 +; CHECK-PWR8-NEXT: bl __fixunskfsi +; CHECK-PWR8-NEXT: nop +; CHECK-PWR8-NEXT: stw 3, 0(30) +; CHECK-PWR8-NEXT: addi 1, 1, 48 +; CHECK-PWR8-NEXT: ld 0, 16(1) +; CHECK-PWR8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: mtlr 0 +; CHECK-PWR8-NEXT: blr entry: %0 = load fp128, fp128* %a, align 16 %conv = fptoui fp128 %0 to i32 store i32 %conv, i32* %b, align 4 ret void -; CHECK-LABEL: qpConv2uw -; CHECK: lxv [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvqpuwz [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsiwx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: qpConv2uw -; CHECK-PWR8: bl __fixunskfsi -; CHECK-PWR8: blr } ; Function Attrs: norecurse nounwind define void @dpConv2sdw(double* nocapture readonly %a, i64* nocapture %b) { +; CHECK-LABEL: dpConv2sdw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpsxds 2, 0 +; CHECK-NEXT: stxsd 2, 0(4) +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2sdw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxds 0, 0 +; CHECK-PWR8-NEXT: stxsdx 0, 0, 4 +; CHECK-PWR8-NEXT: blr entry: %0 = load double, double* %a, align 8 %conv = fptosi double %0 to i64 store i64 %conv, i64* %b, align 8 ret void -; CHECK-LABEL: dpConv2sdw -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2sdw -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stxsdx [[CONV]], 0, 4 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2sw(double* nocapture readonly %a, i32* nocapture %b) { +; CHECK-LABEL: dpConv2sw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stfiwx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2sw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: stfiwx 0, 0, 4 +; CHECK-PWR8-NEXT: blr entry: %0 = load double, double* %a, align 8 %conv = fptosi double %0 to i32 store i32 %conv, i32* %b, align 4 ret void -; CHECK-LABEL: dpConv2sw -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stfiwx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2sw -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stfiwx [[CONV]], 0, 4 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2shw(double* nocapture readonly %a, i16* nocapture %b) { +; CHECK-LABEL: dpConv2shw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stxsihx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2shw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: sth 3, 0(4) +; CHECK-PWR8-NEXT: blr entry: %0 = load double, double* %a, align 8 %conv = fptosi double %0 to i16 store i16 %conv, i16* %b, align 2 ret void -; CHECK-LABEL: dpConv2shw -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsihx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2shw -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: sth [[REG]], 0(4) -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2sb(double* nocapture readonly %a, i8* nocapture %b) { +; CHECK-LABEL: dpConv2sb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stxsibx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2sb: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: stb 3, 0(4) +; CHECK-PWR8-NEXT: blr entry: %0 = load double, double* %a, align 8 %conv = fptosi double %0 to i8 store i8 %conv, i8* %b, align 1 ret void -; CHECK-LABEL: dpConv2sb -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsibx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2sb -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: stb [[REG]], 0(4) -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2sdw(float* nocapture readonly %a, i64* nocapture %b) { +; CHECK-LABEL: spConv2sdw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpsxds 2, 0 +; CHECK-NEXT: stxsd 2, 0(4) +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2sdw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxds 0, 0 +; CHECK-PWR8-NEXT: stxsdx 0, 0, 4 +; CHECK-PWR8-NEXT: blr entry: %0 = load float, float* %a, align 4 %conv = fptosi float %0 to i64 store i64 %conv, i64* %b, align 8 ret void -; CHECK-LABEL: spConv2sdw -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2sdw -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stxsdx [[CONV]], 0, 4 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2sw(float* nocapture readonly %a, i32* nocapture %b) { +; CHECK-LABEL: spConv2sw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stfiwx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2sw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: stfiwx 0, 0, 4 +; CHECK-PWR8-NEXT: blr entry: %0 = load float, float* %a, align 4 %conv = fptosi float %0 to i32 store i32 %conv, i32* %b, align 4 ret void -; CHECK-LABEL: spConv2sw -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stfiwx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2sw -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stfiwx [[CONV]], 0, 4 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2shw(float* nocapture readonly %a, i16* nocapture %b) { +; CHECK-LABEL: spConv2shw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stxsihx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2shw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: sth 3, 0(4) +; CHECK-PWR8-NEXT: blr entry: %0 = load float, float* %a, align 4 %conv = fptosi float %0 to i16 store i16 %conv, i16* %b, align 2 ret void -; CHECK-LABEL: spConv2shw -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsihx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2shw -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: sth [[REG]], 0(4) -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2sb(float* nocapture readonly %a, i8* nocapture %b) { +; CHECK-LABEL: spConv2sb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stxsibx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2sb: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: stb 3, 0(4) +; CHECK-PWR8-NEXT: blr entry: %0 = load float, float* %a, align 4 %conv = fptosi float %0 to i8 store i8 %conv, i8* %b, align 1 ret void -; CHECK-LABEL: spConv2sb -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsibx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2sb -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: stb [[REG]], 0(4) -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2sdw_x(double* nocapture readonly %a, i64* nocapture %b, +; CHECK-LABEL: dpConv2sdw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: sldi 3, 5, 3 +; CHECK-NEXT: xscvdpsxds 0, 0 +; CHECK-NEXT: stxsdx 0, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2sdw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 3, 5, 3 +; CHECK-PWR8-NEXT: xscvdpsxds 0, 0 +; CHECK-PWR8-NEXT: stxsdx 0, 4, 3 +; CHECK-PWR8-NEXT: blr i32 signext %idx) { entry: %0 = load double, double* %a, align 8 @@ -266,23 +378,26 @@ store i64 %conv, i64* %arrayidx, align 8 ret void -; CHECK-LABEL: dpConv2sdw_x -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3 -; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2sdw_x -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8: sldi [[REG:[0-9]+]], 5, 3 -; CHECK-PWR8-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stxsdx [[CONV]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2sw_x(double* nocapture readonly %a, i32* nocapture %b, +; CHECK-LABEL: dpConv2sw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: sldi 3, 5, 2 +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stfiwx 0, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2sw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 3, 5, 2 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: stfiwx 0, 4, 3 +; CHECK-PWR8-NEXT: blr i32 signext %idx) { entry: %0 = load double, double* %a, align 8 @@ -292,23 +407,27 @@ store i32 %conv, i32* %arrayidx, align 4 ret void -; CHECK-LABEL: dpConv2sw_x -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2 -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2sw_x -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 2 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stfiwx [[CONV]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2shw_x(double* nocapture readonly %a, i16* nocapture %b, +; CHECK-LABEL: dpConv2shw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: sldi 3, 5, 1 +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stxsihx 0, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2shw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 5, 5, 1 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: sthx 3, 4, 5 +; CHECK-PWR8-NEXT: blr i32 signext %idx) { entry: %0 = load double, double* %a, align 8 @@ -318,24 +437,25 @@ store i16 %conv, i16* %arrayidx, align 2 ret void -; CHECK-LABEL: dpConv2shw_x -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 1 -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2shw_x -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 1 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: sthx [[REG]], 4, 5 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2sb_x(double* nocapture readonly %a, i8* nocapture %b, +; CHECK-LABEL: dpConv2sb_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stxsibx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2sb_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: stbx 3, 4, 5 +; CHECK-PWR8-NEXT: blr i32 signext %idx) { entry: %0 = load double, double* %a, align 8 @@ -345,22 +465,26 @@ store i8 %conv, i8* %arrayidx, align 1 ret void -; CHECK-LABEL: dpConv2sb_x -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsibx [[CONV]], 4, 5 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2sb_x -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: stbx [[REG]], 4, 5 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2sdw_x(float* nocapture readonly %a, i64* nocapture %b, +; CHECK-LABEL: spConv2sdw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpsxds 0, 0 +; CHECK-NEXT: sldi 5, 5, 3 +; CHECK-NEXT: stxsdx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2sdw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 3, 5, 3 +; CHECK-PWR8-NEXT: xscvdpsxds 0, 0 +; CHECK-PWR8-NEXT: stxsdx 0, 4, 3 +; CHECK-PWR8-NEXT: blr i32 signext %idx) { entry: %0 = load float, float* %a, align 4 @@ -370,23 +494,26 @@ store i64 %conv, i64* %arrayidx, align 8 ret void -; CHECK-LABEL: spConv2sdw_x -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 3 -; CHECK-DAG: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2sdw_x -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 3 -; CHECK-PWR8-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stxsdx [[CONV]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2sw_x(float* nocapture readonly %a, i32* nocapture %b, +; CHECK-LABEL: spConv2sw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: sldi 5, 5, 2 +; CHECK-NEXT: stfiwx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2sw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 3, 5, 2 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: stfiwx 0, 4, 3 +; CHECK-PWR8-NEXT: blr i32 signext %idx) { entry: %0 = load float, float* %a, align 4 @@ -396,23 +523,27 @@ store i32 %conv, i32* %arrayidx, align 4 ret void -; CHECK-LABEL: spConv2sw_x -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 2 -; CHECK-DAG: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2sw_x -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 2 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stfiwx [[CONV]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2shw_x(float* nocapture readonly %a, i16* nocapture %b, +; CHECK-LABEL: spConv2shw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: sldi 5, 5, 1 +; CHECK-NEXT: stxsihx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2shw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 5, 5, 1 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: sthx 3, 4, 5 +; CHECK-PWR8-NEXT: blr i32 signext %idx) { entry: %0 = load float, float* %a, align 4 @@ -422,24 +553,25 @@ store i16 %conv, i16* %arrayidx, align 2 ret void -; CHECK-LABEL: spConv2shw_x -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 1 -; CHECK-DAG: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2shw_x -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 1 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG2:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: sthx [[REG2]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2sb_x(float* nocapture readonly %a, i8* nocapture %b, +; CHECK-LABEL: spConv2sb_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpsxws 0, 0 +; CHECK-NEXT: stxsibx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2sb_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: stbx 3, 4, 5 +; CHECK-PWR8-NEXT: blr i32 signext %idx) { entry: %0 = load float, float* %a, align 4 @@ -449,18 +581,7 @@ store i8 %conv, i8* %arrayidx, align 1 ret void -; CHECK-LABEL: spConv2sb_x -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsibx [[CONV]], 4, 5 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2sb_x -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: stbx [[REG]], 4, 5 -; CHECK-PWR8-NEXT: blr } ; ========================================== @@ -469,178 +590,217 @@ ; Function Attrs: norecurse nounwind define void @dpConv2udw(double* nocapture readonly %a, i64* nocapture %b) { +; CHECK-LABEL: dpConv2udw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpuxds 2, 0 +; CHECK-NEXT: stxsd 2, 0(4) +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2udw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpuxds 0, 0 +; CHECK-PWR8-NEXT: stxsdx 0, 0, 4 +; CHECK-PWR8-NEXT: blr entry: %0 = load double, double* %a, align 8 %conv = fptoui double %0 to i64 store i64 %conv, i64* %b, align 8 ret void -; CHECK-LABEL: dpConv2udw -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2udw -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stxsdx [[CONV]], 0, 4 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2uw(double* nocapture readonly %a, i32* nocapture %b) { +; CHECK-LABEL: dpConv2uw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stfiwx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2uw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpuxws 0, 0 +; CHECK-PWR8-NEXT: stfiwx 0, 0, 4 +; CHECK-PWR8-NEXT: blr entry: %0 = load double, double* %a, align 8 %conv = fptoui double %0 to i32 store i32 %conv, i32* %b, align 4 ret void -; CHECK-LABEL: dpConv2uw -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stfiwx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2uw -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stfiwx [[CONV]], 0, 4 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2uhw(double* nocapture readonly %a, i16* nocapture %b) { +; CHECK-LABEL: dpConv2uhw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stxsihx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2uhw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: sth 3, 0(4) +; CHECK-PWR8-NEXT: blr entry: %0 = load double, double* %a, align 8 %conv = fptoui double %0 to i16 store i16 %conv, i16* %b, align 2 ret void -; CHECK-LABEL: dpConv2uhw -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsihx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2uhw -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: sth [[REG]], 0(4) -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2ub(double* nocapture readonly %a, i8* nocapture %b) { +; CHECK-LABEL: dpConv2ub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stxsibx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2ub: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: stb 3, 0(4) +; CHECK-PWR8-NEXT: blr entry: %0 = load double, double* %a, align 8 %conv = fptoui double %0 to i8 store i8 %conv, i8* %b, align 1 ret void -; CHECK-LABEL: dpConv2ub -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsibx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2ub -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: stb [[REG]], 0(4) -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2udw(float* nocapture readonly %a, i64* nocapture %b) { +; CHECK-LABEL: spConv2udw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpuxds 2, 0 +; CHECK-NEXT: stxsd 2, 0(4) +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2udw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpuxds 0, 0 +; CHECK-PWR8-NEXT: stxsdx 0, 0, 4 +; CHECK-PWR8-NEXT: blr entry: %0 = load float, float* %a, align 4 %conv = fptoui float %0 to i64 store i64 %conv, i64* %b, align 8 ret void -; CHECK-LABEL: spConv2udw -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsd [[CONV]], 0(4) -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2udw -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stxsdx [[CONV]], 0, 4 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2uw(float* nocapture readonly %a, i32* nocapture %b) { +; CHECK-LABEL: spConv2uw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stfiwx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2uw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpuxws 0, 0 +; CHECK-PWR8-NEXT: stfiwx 0, 0, 4 +; CHECK-PWR8-NEXT: blr entry: %0 = load float, float* %a, align 4 %conv = fptoui float %0 to i32 store i32 %conv, i32* %b, align 4 ret void -; CHECK-LABEL: spConv2uw -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stfiwx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2uw -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stfiwx [[CONV]], 0, 4 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2uhw(float* nocapture readonly %a, i16* nocapture %b) { +; CHECK-LABEL: spConv2uhw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stxsihx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2uhw: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: sth 3, 0(4) +; CHECK-PWR8-NEXT: blr entry: %0 = load float, float* %a, align 4 %conv = fptoui float %0 to i16 store i16 %conv, i16* %b, align 2 ret void -; CHECK-LABEL: spConv2uhw -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsihx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2uhw -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: sth [[REG]], 0(4) -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2ub(float* nocapture readonly %a, i8* nocapture %b) { +; CHECK-LABEL: spConv2ub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stxsibx 0, 0, 4 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2ub: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: stb 3, 0(4) +; CHECK-PWR8-NEXT: blr entry: %0 = load float, float* %a, align 4 %conv = fptoui float %0 to i8 store i8 %conv, i8* %b, align 1 ret void -; CHECK-LABEL: spConv2ub -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsibx [[CONV]], 0, 4 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2ub -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: stb [[REG]], 0(4) -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2udw_x(double* nocapture readonly %a, i64* nocapture %b, +; CHECK-LABEL: dpConv2udw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: sldi 3, 5, 3 +; CHECK-NEXT: xscvdpuxds 0, 0 +; CHECK-NEXT: stxsdx 0, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2udw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 3, 5, 3 +; CHECK-PWR8-NEXT: xscvdpuxds 0, 0 +; CHECK-PWR8-NEXT: stxsdx 0, 4, 3 +; CHECK-PWR8-NEXT: blr i32 zeroext %idx) { entry: %0 = load double, double* %a, align 8 @@ -650,23 +810,26 @@ store i64 %conv, i64* %arrayidx, align 8 ret void -; CHECK-LABEL: dpConv2udw_x -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3 -; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2udw_x -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8: sldi [[REG:[0-9]+]], 5, 3 -; CHECK-PWR8-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stxsdx [[CONV]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2uw_x(double* nocapture readonly %a, i32* nocapture %b, +; CHECK-LABEL: dpConv2uw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: sldi 3, 5, 2 +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stfiwx 0, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2uw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 3, 5, 2 +; CHECK-PWR8-NEXT: xscvdpuxws 0, 0 +; CHECK-PWR8-NEXT: stfiwx 0, 4, 3 +; CHECK-PWR8-NEXT: blr i32 zeroext %idx) { entry: %0 = load double, double* %a, align 8 @@ -676,23 +839,27 @@ store i32 %conv, i32* %arrayidx, align 4 ret void -; CHECK-LABEL: dpConv2uw_x -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2 -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2uw_x -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 2 -; CHECK-PWR8-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stfiwx [[CONV]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2uhw_x(double* nocapture readonly %a, i16* nocapture %b, +; CHECK-LABEL: dpConv2uhw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: sldi 3, 5, 1 +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stxsihx 0, 4, 3 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2uhw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 5, 5, 1 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: sthx 3, 4, 5 +; CHECK-PWR8-NEXT: blr i32 zeroext %idx) { entry: %0 = load double, double* %a, align 8 @@ -702,24 +869,25 @@ store i16 %conv, i16* %arrayidx, align 2 ret void -; CHECK-LABEL: dpConv2uhw_x -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 1 -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2uhw_x -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 1 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: sthx [[REG]], 4, 5 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @dpConv2ub_x(double* nocapture readonly %a, i8* nocapture %b, +; CHECK-LABEL: dpConv2ub_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfd 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stxsibx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: dpConv2ub_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfdx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: stbx 3, 4, 5 +; CHECK-PWR8-NEXT: blr i32 zeroext %idx) { entry: %0 = load double, double* %a, align 8 @@ -729,22 +897,26 @@ store i8 %conv, i8* %arrayidx, align 1 ret void -; CHECK-LABEL: dpConv2ub_x -; CHECK: lfd [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsibx [[CONV]], 4, 5 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: dpConv2ub_x -; CHECK-PWR8: lfdx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: stbx [[REG]], 4, 5 -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2udw_x(float* nocapture readonly %a, i64* nocapture %b, +; CHECK-LABEL: spConv2udw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpuxds 0, 0 +; CHECK-NEXT: sldi 5, 5, 3 +; CHECK-NEXT: stxsdx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2udw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 3, 5, 3 +; CHECK-PWR8-NEXT: xscvdpuxds 0, 0 +; CHECK-PWR8-NEXT: stxsdx 0, 4, 3 +; CHECK-PWR8-NEXT: blr i32 zeroext %idx) { entry: %0 = load float, float* %a, align 4 @@ -754,23 +926,26 @@ store i64 %conv, i64* %arrayidx, align 8 ret void -; CHECK-LABEL: spConv2udw_x -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 3 -; CHECK-DAG: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2udw_x -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 3 -; CHECK-PWR8-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stxsdx [[CONV]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2uw_x(float* nocapture readonly %a, i32* nocapture %b, +; CHECK-LABEL: spConv2uw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: sldi 5, 5, 2 +; CHECK-NEXT: stfiwx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2uw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 3, 5, 2 +; CHECK-PWR8-NEXT: xscvdpuxws 0, 0 +; CHECK-PWR8-NEXT: stfiwx 0, 4, 3 +; CHECK-PWR8-NEXT: blr i32 zeroext %idx) { entry: %0 = load float, float* %a, align 4 @@ -780,23 +955,27 @@ store i32 %conv, i32* %arrayidx, align 4 ret void -; CHECK-LABEL: spConv2uw_x -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 2 -; CHECK-DAG: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2uw_x -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 2 -; CHECK-PWR8-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: stfiwx [[CONV]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2uhw_x(float* nocapture readonly %a, i16* nocapture %b, +; CHECK-LABEL: spConv2uhw_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: sldi 5, 5, 1 +; CHECK-NEXT: stxsihx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2uhw_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: sldi 5, 5, 1 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: sthx 3, 4, 5 +; CHECK-PWR8-NEXT: blr i32 zeroext %idx) { entry: %0 = load float, float* %a, align 4 @@ -806,24 +985,25 @@ store i16 %conv, i16* %arrayidx, align 2 ret void -; CHECK-LABEL: spConv2uhw_x -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 1 -; CHECK-DAG: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]] -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2uhw_x -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: sldi [[REG:[0-9]+]], 5, 1 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG2:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: sthx [[REG2]], 4, [[REG]] -; CHECK-PWR8-NEXT: blr } ; Function Attrs: norecurse nounwind define void @spConv2ub_x(float* nocapture readonly %a, i8* nocapture %b, +; CHECK-LABEL: spConv2ub_x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfs 0, 0(3) +; CHECK-NEXT: xscvdpuxws 0, 0 +; CHECK-NEXT: stxsibx 0, 4, 5 +; CHECK-NEXT: blr +; +; CHECK-PWR8-LABEL: spConv2ub_x: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: lfsx 0, 0, 3 +; CHECK-PWR8-NEXT: xscvdpsxws 0, 0 +; CHECK-PWR8-NEXT: mffprwz 3, 0 +; CHECK-PWR8-NEXT: stbx 3, 4, 5 +; CHECK-PWR8-NEXT: blr i32 zeroext %idx) { entry: %0 = load float, float* %a, align 4 @@ -833,16 +1013,5 @@ store i8 %conv, i8* %arrayidx, align 1 ret void -; CHECK-LABEL: spConv2ub_x -; CHECK: lfs [[LD:[0-9]+]], 0(3) -; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]] -; CHECK-NEXT: stxsibx [[CONV]], 4, 5 -; CHECK-NEXT: blr -; CHECK-PWR8-LABEL: spConv2ub_x -; CHECK-PWR8: lfsx [[LD:[0-9]+]], 0, 3 -; CHECK-PWR8-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]] -; CHECK-PWR8-NEXT: mffprwz [[REG:[0-9]+]], [[CONV]] -; CHECK-PWR8-NEXT: stbx [[REG]], 4, 5 -; CHECK-PWR8-NEXT: blr }