Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -189,6 +189,9 @@ /// Direct move from a GPR to a VSX register (zero) MTVSRZ, + /// Direct move of 2 consective GPR to a VSX register. + BUILD_FP128, + /// Extract a subvector from signed integer vector and convert to FP. /// It is primarily used to convert a (widened) illegal integer vector /// type to a legal floating point vector type. @@ -1065,6 +1068,7 @@ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -805,6 +805,7 @@ setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i128, Custom); } } @@ -1259,6 +1260,7 @@ case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; + case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; } return nullptr; } @@ -7651,6 +7653,25 @@ return !(IsSplat && IsLoad); } +// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128. +SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { + + SDLoc dl(Op); + + if (!EnableQuadPrecision || + (Op->getValueType(0) != MVT::f128 ) || + (Op->getOperand(0).getOpcode() != ISD::BUILD_PAIR) || + (Op->getOperand(0).getOperand(0).getValueType() != MVT::i64) || + (Op->getOperand(0).getOperand(1).getValueType() != MVT::i64)) + return SDValue(); + + SDValue Op0 = Op->getOperand(0); + SDValue Val = DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, + Op0.getOperand(0), Op0.getOperand(1)); + + return Val; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -9445,6 +9466,8 @@ // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); + case ISD::BITCAST: return LowerBITCAST(Op, DAG); + // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -218,6 +218,13 @@ def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; +// Move 2 i64 values into a VSX register +def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", + SDTypeProfile<1, 2, + [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, + SDTCisSameAs<1,2>]>, + []>; + // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -3382,6 +3382,17 @@ def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; } // end HasP9Vector, AddedComplexity +let AddedComplexity = 400 in { + let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in { + def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), + (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; + } + let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in { + def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)), + (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; + } +} + let Predicates = [HasP9Vector] in { let isPseudo = 1 in { let mayStore = 1 in { Index: test/CodeGen/PowerPC/f128-aggregates.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/f128-aggregates.ll @@ -0,0 +1,207 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -enable-ppc-quad-precision -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown \ +; RUN: -enable-ppc-quad-precision -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-BE %s + +%struct.MixedC = type { i32, %struct.SA, float, [12 x i8] } +%struct.SA = type { double, fp128, <4 x float> } + +; Function Attrs: norecurse nounwind readnone +define fp128 @testMixedAggregate([3 x i128] %a.coerce) { +; CHECK-LABEL: testMixedAggregate: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdd 34, 8, 7 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: testMixedAggregate: +; CHECK-BE: mtvsrdd 34, 8, 7 +; CHECK-BE-NEXT: blr +entry: + %a.coerce.fca.2.extract = extractvalue [3 x i128] %a.coerce, 2 + %0 = bitcast i128 %a.coerce.fca.2.extract to fp128 + ret fp128 %0 +} + +; Function Attrs: norecurse nounwind readnone +define fp128 @testMixedAggregate_02([4 x i128] %a.coerce) { +; CHECK-LABEL: testMixedAggregate_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdd 34, 6, 5 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: testMixedAggregate_02: +; CHECK-BE: mtvsrdd 34, 6, 5 +; CHECK-BE-NEXT: blr +entry: + %a.coerce.fca.1.extract = extractvalue [4 x i128] %a.coerce, 1 + %0 = bitcast i128 %a.coerce.fca.1.extract to fp128 + ret fp128 %0 +} + +; Function Attrs: norecurse nounwind readnone +define fp128 @testMixedAggregate_03([4 x i128] %sa.coerce) { +; CHECK-LABEL: testMixedAggregate_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrwa 34, 3 +; CHECK-NEXT: mtvsrdd 35, 6, 5 +; CHECK-NEXT: mtvsrd 36, 10 +; CHECK-NEXT: xscvsdqp 2, 2 +; CHECK-NEXT: xscvsdqp 19, 4 +; CHECK-NEXT: xsaddqp 2, 3, 2 +; CHECK-NEXT: xsaddqp 2, 2, 19 +; CHECK-NEXT: blr +entry: + %sa.coerce.fca.0.extract = extractvalue [4 x i128] %sa.coerce, 0 + %sa.sroa.0.0.extract.trunc = trunc i128 %sa.coerce.fca.0.extract to i32 + %sa.coerce.fca.1.extract = extractvalue [4 x i128] %sa.coerce, 1 + %sa.coerce.fca.3.extract = extractvalue [4 x i128] %sa.coerce, 3 + %sa.sroa.6.48.extract.shift = lshr i128 %sa.coerce.fca.3.extract, 64 + %sa.sroa.6.48.extract.trunc = trunc i128 %sa.sroa.6.48.extract.shift to i64 + %conv = sitofp i32 %sa.sroa.0.0.extract.trunc to fp128 + %0 = bitcast i128 %sa.coerce.fca.1.extract to fp128 + %add = fadd fp128 %0, %conv + %conv2 = sitofp i64 %sa.sroa.6.48.extract.trunc to fp128 + %add3 = fadd fp128 %add, %conv2 + ret fp128 %add3 +} + + +; Function Attrs: norecurse nounwind readonly +define fp128 @testNestedAggregate(%struct.MixedC* byval nocapture readonly align 16 %a) { +; CHECK-LABEL: testNestedAggregate: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: std 8, 72(1) +; CHECK-NEXT: std 7, 64(1) +; CHECK-NEXT: std 10, 88(1) +; CHECK-NEXT: std 9, 80(1) +; CHECK-NEXT: lxv 34, 64(1) +; CHECK-NEXT: std 6, 56(1) +; CHECK-NEXT: std 5, 48(1) +; CHECK-NEXT: std 4, 40(1) +; CHECK-NEXT: std 3, 32(1) +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: testNestedAggregate: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: std 8, 88(1) +; CHECK-BE-NEXT: std 7, 80(1) +; CHECK-BE-NEXT: std 10, 104(1) +; CHECK-BE-NEXT: std 9, 96(1) +; CHECK-BE-NEXT: lxv 34, 80(1) +; CHECK-BE-NEXT: std 6, 72(1) +; CHECK-BE-NEXT: std 5, 64(1) +; CHECK-BE-NEXT: std 4, 56(1) +; CHECK-BE-NEXT: std 3, 48(1) +; CHECK-BE-NEXT: blr +entry: + %c = getelementptr inbounds %struct.MixedC, %struct.MixedC* %a, i64 0, i32 1, i32 1 + %0 = load fp128, fp128* %c, align 16 + ret fp128 %0 +} + +; Function Attrs: norecurse nounwind readnone +define fp128 @testUnion_01([1 x i128] %a.coerce) { +; CHECK-LABEL: testUnion_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdd 34, 4, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: testUnion_01: +; CHECK-BE: mtvsrdd 34, 4, 3 +; CHECK-BE-NEXT: blr +entry: + %a.coerce.fca.0.extract = extractvalue [1 x i128] %a.coerce, 0 + %0 = bitcast i128 %a.coerce.fca.0.extract to fp128 + ret fp128 %0 +} + +; Function Attrs: norecurse nounwind readnone +define fp128 @testUnion_02([1 x i128] %a.coerce) { +; CHECK-LABEL: testUnion_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdd 34, 4, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: testUnion_02: +; CHECK-BE: mtvsrdd 34, 4, 3 +; CHECK-BE-NEXT: blr +entry: + %a.coerce.fca.0.extract = extractvalue [1 x i128] %a.coerce, 0 + %0 = bitcast i128 %a.coerce.fca.0.extract to fp128 + ret fp128 %0 +} + +; Function Attrs: norecurse nounwind readnone +define fp128 @testUnion_03([4 x i128] %a.coerce) { +; CHECK-LABEL: testUnion_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdd 34, 8, 7 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: testUnion_03: +; CHECK-BE: mtvsrdd 34, 8, 7 +; CHECK-BE-NEXT: blr +entry: + %a.coerce.fca.2.extract = extractvalue [4 x i128] %a.coerce, 2 + %0 = bitcast i128 %a.coerce.fca.2.extract to fp128 + ret fp128 %0 +} + +; Function Attrs: nounwind +define fp128 @sum_float128(i32 signext %count, ...) { +; CHECK-LABEL: sum_float128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 11, 2, .LCPI7_0@toc@ha +; CHECK-NEXT: cmpwi 0, 3, 1 +; CHECK-NEXT: std 10, 88(1) +; CHECK-NEXT: std 9, 80(1) +; CHECK-NEXT: std 8, 72(1) +; CHECK-NEXT: std 7, 64(1) +; CHECK-NEXT: std 6, 56(1) +; CHECK-NEXT: std 5, 48(1) +; CHECK-NEXT: std 4, 40(1) +; CHECK-NEXT: addi 11, 11, .LCPI7_0@toc@l +; CHECK-NEXT: lxvx 34, 0, 11 +; CHECK-NEXT: bltlr 0 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: addi 3, 1, 40 +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: lxv 51, 16(3) +; CHECK-NEXT: addi 3, 1, 72 +; CHECK-NEXT: std 3, -8(1) +; CHECK-NEXT: xsaddqp 2, 3, 2 +; CHECK-NEXT: xsaddqp 2, 2, 19 +; CHECK-NEXT: blr +entry: + %ap = alloca i8*, align 8 + %0 = bitcast i8** %ap to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #2 + %cmp = icmp slt i32 %count, 1 + br i1 %cmp, label %cleanup, label %if.end + +if.end: ; preds = %entry + call void @llvm.va_start(i8* nonnull %0) + %argp.cur = load i8*, i8** %ap, align 8 + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 16 + %1 = bitcast i8* %argp.cur to fp128* + %2 = load fp128, fp128* %1, align 8 + %add = fadd fp128 %2, 0xL00000000000000000000000000000000 + %argp.next3 = getelementptr inbounds i8, i8* %argp.cur, i64 32 + store i8* %argp.next3, i8** %ap, align 8 + %3 = bitcast i8* %argp.next to fp128* + %4 = load fp128, fp128* %3, align 8 + %add4 = fadd fp128 %add, %4 + call void @llvm.va_end(i8* nonnull %0) + br label %cleanup + +cleanup: ; preds = %entry, %if.end + %retval.0 = phi fp128 [ %add4, %if.end ], [ 0xL00000000000000000000000000000000, %entry ] + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #2 + ret fp128 %retval.0 +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 +declare void @llvm.va_start(i8*) #2 +declare void @llvm.va_end(i8*) #2 +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1