diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -485,6 +485,9 @@ case Intrinsic::experimental_constrained_sin: case Intrinsic::experimental_constrained_cos: return true; + // There is no corresponding FMA instruction for PPC double double. + // Thus, we need to disable CTR loop generation for this type. + case Intrinsic::fmuladd: case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> isPPC_FP128Ty()) diff --git a/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +declare ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128) #2 + +define ppc_fp128 @test_ctr0() { +; LE-LABEL: test_ctr0: +; LE: # %bb.0: # %bb +; LE-NEXT: mflr r0 +; LE-NEXT: .cfi_def_cfa_offset 48 +; LE-NEXT: .cfi_offset lr, 16 +; LE-NEXT: .cfi_offset r30, -16 +; LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; LE-NEXT: std r0, 16(r1) +; LE-NEXT: stdu r1, -48(r1) +; LE-NEXT: xxlxor f1, f1, f1 +; LE-NEXT: li r30, 0 +; LE-NEXT: xxlxor f2, f2, f2 +; LE-NEXT: .p2align 5 +; LE-NEXT: .LBB0_1: # %bb6 +; LE-NEXT: # +; LE-NEXT: xxlxor f3, f3, f3 +; LE-NEXT: xxlxor f4, f4, f4 +; LE-NEXT: bl __gcc_qadd +; LE-NEXT: nop +; LE-NEXT: addi r30, r30, 4 +; LE-NEXT: cmpldi r30, 0 +; LE-NEXT: bne cr0, .LBB0_1 +; LE-NEXT: # %bb.2: # %bb14 +; LE-NEXT: addi r1, r1, 48 +; LE-NEXT: ld r0, 16(r1) +; LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; LE-NEXT: mtlr r0 +; LE-NEXT: blr +; +; P9BE-LABEL: test_ctr0: +; P9BE: # %bb.0: # %bb +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: .cfi_def_cfa_offset 128 +; P9BE-NEXT: .cfi_offset lr, 16 +; P9BE-NEXT: .cfi_offset r30, -16 +; P9BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P9BE-NEXT: xxlxor f1, f1, f1 +; P9BE-NEXT: li r30, 0 +; P9BE-NEXT: xxlxor f2, f2, f2 +; P9BE-NEXT: .p2align 5 +; P9BE-NEXT: .LBB0_1: # %bb6 +; P9BE-NEXT: # +; P9BE-NEXT: xxlxor f3, f3, f3 +; P9BE-NEXT: xxlxor f4, f4, f4 +; P9BE-NEXT: bl __gcc_qadd +; P9BE-NEXT: nop +; P9BE-NEXT: addi r30, r30, 4 +; P9BE-NEXT: cmpldi r30, 0 +; P9BE-NEXT: bne cr0, .LBB0_1 +; P9BE-NEXT: # %bb.2: # %bb14 +; P9BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P8BE-LABEL: test_ctr0: +; P8BE: # %bb.0: # %bb +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: .cfi_def_cfa_offset 128 +; P8BE-NEXT: .cfi_offset lr, 16 +; P8BE-NEXT: .cfi_offset r30, -16 +; P8BE-NEXT: xxlxor f1, f1, f1 +; P8BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P8BE-NEXT: li r30, 0 +; P8BE-NEXT: xxlxor f2, f2, f2 +; P8BE-NEXT: .p2align 5 +; P8BE-NEXT: .LBB0_1: # %bb6 +; P8BE-NEXT: # +; P8BE-NEXT: xxlxor f3, f3, f3 +; P8BE-NEXT: xxlxor f4, f4, f4 +; P8BE-NEXT: bl __gcc_qadd +; P8BE-NEXT: nop +; P8BE-NEXT: addi r30, r30, 4 +; P8BE-NEXT: cmpldi r30, 0 +; P8BE-NEXT: bne cr0, .LBB0_1 +; P8BE-NEXT: # %bb.2: # %bb14 +; P8BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +bb: + br label %bb6 + +bb6: ; preds = %bb6, %bb + %i = phi ppc_fp128 [ %i8, %bb6 ], [ 0xM00000000000000000000000000000000, %bb ] + %i7 = phi i64 [ %i9, %bb6 ], [ 0, %bb ] + %i8 = tail call ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 %i) #4 + %i9 = add i64 %i7, -4 + %i10 = icmp eq i64 %i9, 0 + br i1 %i10, label %bb14, label %bb6 + +bb14: ; preds = %bb6 + ret ppc_fp128 %i8 +}