Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -806,6 +806,11 @@ setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i128, Custom); + // No implementation for these ops for PowerPC. + setOperationAction(ISD::FSIN , MVT::f128, Expand); + setOperationAction(ISD::FCOS , MVT::f128, Expand); + setOperationAction(ISD::FPOW, MVT::f128, Expand); + setOperationAction(ISD::FPOWI, MVT::f128, Expand); } } @@ -1044,6 +1049,20 @@ setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); } + if (EnableQuadPrecision) { + setLibcallName(RTLIB::LOG_F128, "logf128"); + setLibcallName(RTLIB::LOG2_F128, "log2f128"); + setLibcallName(RTLIB::LOG10_F128, "log10f128"); + setLibcallName(RTLIB::EXP_F128, "expf128"); + setLibcallName(RTLIB::EXP2_F128, "exp2f128"); + setLibcallName(RTLIB::SIN_F128, "sinf128"); + setLibcallName(RTLIB::COS_F128, "cosf128"); + setLibcallName(RTLIB::POW_F128, "powf128"); + setLibcallName(RTLIB::FMIN_F128, "fminf128"); + setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); + setLibcallName(RTLIB::POWI_F128, "__powikf2"); + } + // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. if (Subtarget.useCRBits()) { Index: test/CodeGen/PowerPC/f128-arith.ll =================================================================== --- test/CodeGen/PowerPC/f128-arith.ll +++ test/CodeGen/PowerPC/f128-arith.ll @@ -1,5 +1,6 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -enable-ppc-quad-precision < %s | FileCheck %s +; RUN: -enable-ppc-quad-precision -ppc-asm-full-reg-names \ +; RUN: -verify-machineinstrs < %s | FileCheck %s ; Function Attrs: norecurse nounwind define void @qpAdd(fp128* nocapture readonly %a, fp128* nocapture %res) { @@ -175,3 +176,266 @@ ; CHECK: xscvdpqp ; CHECK: blr } + +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +define fp128 @qp_sin(fp128* nocapture readonly %a) { +; CHECK-LABEL: qp_sin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: bl sinf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = tail call fp128 @llvm.sin.f128(fp128 %0) + ret fp128 %1 +} +declare fp128 @llvm.sin.f128(fp128 %Val) + +define fp128 @qp_cos(fp128* nocapture readonly %a) { +; CHECK-LABEL: qp_cos: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: bl cosf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = tail call fp128 @llvm.cos.f128(fp128 %0) + ret fp128 %1 +} +declare fp128 @llvm.cos.f128(fp128 %Val) + +define fp128 @qp_log(fp128* nocapture readonly %a) { +; CHECK-LABEL: qp_log: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: bl logf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = tail call fp128 @llvm.log.f128(fp128 %0) + ret fp128 %1 +} +declare fp128 @llvm.log.f128(fp128 %Val) + +define fp128 @qp_log10(fp128* nocapture readonly %a) { +; CHECK-LABEL: qp_log10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: bl log10f128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = tail call fp128 @llvm.log10.f128(fp128 %0) + ret fp128 %1 +} +declare fp128 @llvm.log10.f128(fp128 %Val) + +define fp128 @qp_log2(fp128* nocapture readonly %a) { +; CHECK-LABEL: qp_log2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: bl log2f128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = tail call fp128 @llvm.log2.f128(fp128 %0) + ret fp128 %1 +} +declare fp128 @llvm.log2.f128(fp128 %Val) + +define fp128 @qp_minnum(fp128* nocapture readonly %a, +; CHECK-LABEL: qp_minnum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: lxv vs35, 0(r4) +; CHECK-NEXT: bl fminf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + fp128* nocapture readonly %b) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %2 = tail call fp128 @llvm.minnum.f128(fp128 %0, fp128 %1) + ret fp128 %2 +} +declare fp128 @llvm.minnum.f128(fp128 %Val0, fp128 %Val1) + +define fp128 @qp_maxnum(fp128* nocapture readonly %a, +; CHECK-LABEL: qp_maxnum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: lxv vs35, 0(r4) +; CHECK-NEXT: bl fmaxf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + fp128* nocapture readonly %b) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %2 = tail call fp128 @llvm.maxnum.f128(fp128 %0, fp128 %1) + ret fp128 %2 +} +declare fp128 @llvm.maxnum.f128(fp128 %Val0, fp128 %Val1) + +define fp128 @qp_pow(fp128* nocapture readonly %a, +; CHECK-LABEL: qp_pow: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: lxv vs35, 0(r4) +; CHECK-NEXT: bl powf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + fp128* nocapture readonly %b) { +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %2 = tail call fp128 @llvm.pow.f128(fp128 %0, fp128 %1) + ret fp128 %2 +} +declare fp128 @llvm.pow.f128(fp128 %Val, fp128 %Power) + +define fp128 @qp_exp(fp128* nocapture readonly %a) { +; CHECK-LABEL: qp_exp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: bl expf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = tail call fp128 @llvm.exp.f128(fp128 %0) + ret fp128 %1 +} +declare fp128 @llvm.exp.f128(fp128 %Val) + +define fp128 @qp_exp2(fp128* nocapture readonly %a) { +; CHECK-LABEL: qp_exp2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: bl exp2f128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = tail call fp128 @llvm.exp2.f128(fp128 %0) + ret fp128 %1 +} +declare fp128 @llvm.exp2.f128(fp128 %Val) + +define void @qp_powi(fp128* nocapture readonly %a, i32* nocapture readonly %b, + fp128* nocapture %res) { +; CHECK-LABEL: qp_powi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: lwz r12, 0(r4) +; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: std r30, 32(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r5 +; CHECK-NEXT: mr r5, r12 +; CHECK-NEXT: bl __powikf2 +; CHECK-NEXT: nop +; CHECK-NEXT: stxv vs34, 0(r30) +; CHECK-NEXT: ld r30, 32(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load i32, i32* %b, align 8 + %2 = tail call fp128 @llvm.powi.f128(fp128 %0, i32 %1) + store fp128 %2, fp128* %res, align 16 + ret void +} +declare fp128 @llvm.powi.f128(fp128 %Val, i32 %power)