diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -457,6 +457,14 @@ break; } + // There is no corresponding FMA instruction for PPC double double. + // Thus, we need to disable CTR loop generation for this type. + case Intrinsic::fmuladd: + if (CI->getArgOperand(0)->getType()->getScalarType()-> + isPPC_FP128Ty()) + return true; + break; + // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp // because, although it does clobber the counter register, the // control can't then return to inside the loop unless there is also diff --git a/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +%0 = type { %1 } +%1 = type { i8*, i64, i32, i8, i8, i8, i8, %2 } +%2 = type { %3 } +%3 = type { i64, i64, i64 } + +$test_ctr0 = comdat any + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128) #2 + +define linkonce_odr hidden ppc_fp128 @test_ctr0(i32 %arg, i32 %arg1, %0* %arg3, %0* %arg4) local_unnamed_addr #0 comdat { +; LE-LABEL: test_ctr0: +; LE: # %bb.0: # %bb +; LE-NEXT: mflr r0 +; LE-NEXT: .cfi_def_cfa_offset 48 +; LE-NEXT: .cfi_offset lr, 16 +; LE-NEXT: .cfi_offset r30, -16 +; LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; LE-NEXT: std r0, 16(r1) +; LE-NEXT: stdu r1, -48(r1) +; LE-NEXT: xxlxor f1, f1, f1 +; LE-NEXT: li r30, 0 +; LE-NEXT: xxlxor f2, f2, f2 +; LE-NEXT: .p2align 4 +; LE-NEXT: .LBB0_1: # %bb6 +; LE-NEXT: # +; LE-NEXT: xxlxor f3, f3, f3 +; LE-NEXT: xxlxor f4, f4, f4 +; LE-NEXT: bl __gcc_qadd +; LE-NEXT: nop +; LE-NEXT: xxlxor f3, f3, f3 +; LE-NEXT: xxlxor f4, f4, f4 +; LE-NEXT: bl __gcc_qadd +; LE-NEXT: nop +; LE-NEXT: xxlxor f3, f3, f3 +; LE-NEXT: xxlxor f4, f4, f4 +; LE-NEXT: bl __gcc_qadd +; LE-NEXT: nop +; LE-NEXT: xxlxor f3, f3, f3 +; LE-NEXT: xxlxor f4, f4, f4 +; LE-NEXT: bl __gcc_qadd +; LE-NEXT: nop +; LE-NEXT: addi r30, r30, 4 +; LE-NEXT: cmpldi r30, 0 +; LE-NEXT: bne cr0, .LBB0_1 +; LE-NEXT: # %bb.2: # %bb14 +; LE-NEXT: addi r1, r1, 48 +; LE-NEXT: ld r0, 16(r1) +; LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; LE-NEXT: mtlr r0 +; LE-NEXT: blr +; +; P9BE-LABEL: test_ctr0: +; P9BE: # %bb.0: # %bb +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: .cfi_def_cfa_offset 128 +; P9BE-NEXT: .cfi_offset lr, 16 +; P9BE-NEXT: .cfi_offset r30, -16 +; P9BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P9BE-NEXT: xxlxor f1, f1, f1 +; P9BE-NEXT: li r30, 0 +; P9BE-NEXT: xxlxor f2, f2, f2 +; P9BE-NEXT: .p2align 4 +; P9BE-NEXT: .LBB0_1: # %bb6 +; P9BE-NEXT: # +; P9BE-NEXT: xxlxor f3, f3, f3 +; P9BE-NEXT: xxlxor f4, f4, f4 +; P9BE-NEXT: bl __gcc_qadd +; P9BE-NEXT: nop +; P9BE-NEXT: xxlxor f3, f3, f3 +; P9BE-NEXT: xxlxor f4, f4, f4 +; P9BE-NEXT: bl __gcc_qadd +; P9BE-NEXT: nop +; P9BE-NEXT: xxlxor f3, f3, f3 +; P9BE-NEXT: xxlxor f4, f4, f4 +; P9BE-NEXT: bl __gcc_qadd +; P9BE-NEXT: nop +; P9BE-NEXT: xxlxor f3, f3, f3 +; P9BE-NEXT: xxlxor f4, f4, f4 +; P9BE-NEXT: bl __gcc_qadd +; P9BE-NEXT: nop +; P9BE-NEXT: addi r30, r30, 4 +; P9BE-NEXT: cmpldi r30, 0 +; P9BE-NEXT: bne cr0, .LBB0_1 +; P9BE-NEXT: # %bb.2: # %bb14 +; P9BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P8BE-LABEL: test_ctr0: +; P8BE: # %bb.0: # %bb +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: .cfi_def_cfa_offset 128 +; P8BE-NEXT: .cfi_offset lr, 16 +; P8BE-NEXT: .cfi_offset r30, -16 +; P8BE-NEXT: xxlxor f1, f1, f1 +; P8BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P8BE-NEXT: li r30, 0 +; P8BE-NEXT: xxlxor f2, f2, f2 +; P8BE-NEXT: .p2align 4 +; P8BE-NEXT: .LBB0_1: # %bb6 +; P8BE-NEXT: # +; P8BE-NEXT: xxlxor f3, f3, f3 +; P8BE-NEXT: xxlxor f4, f4, f4 +; P8BE-NEXT: bl __gcc_qadd +; P8BE-NEXT: nop +; P8BE-NEXT: xxlxor f3, f3, f3 +; P8BE-NEXT: xxlxor f4, f4, f4 +; P8BE-NEXT: bl __gcc_qadd +; P8BE-NEXT: nop +; P8BE-NEXT: xxlxor f3, f3, f3 +; P8BE-NEXT: xxlxor f4, f4, f4 +; P8BE-NEXT: bl __gcc_qadd +; P8BE-NEXT: nop +; P8BE-NEXT: xxlxor f3, f3, f3 +; P8BE-NEXT: xxlxor f4, f4, f4 +; P8BE-NEXT: bl __gcc_qadd +; P8BE-NEXT: nop +; P8BE-NEXT: addi r30, r30, 4 +; P8BE-NEXT: cmpldi r30, 0 +; P8BE-NEXT: bne cr0, .LBB0_1 +; P8BE-NEXT: # %bb.2: # %bb14 +; P8BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +bb: + br label %bb6 + +bb6: ; preds = %bb6, %bb + %i = phi ppc_fp128 [ %i11, %bb6 ], [ 0xM00000000000000000000000000000000, %bb ] + %i7 = phi i64 [ %i12, %bb6 ], [ 0, %bb ] + %i8 = tail call ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 %i) #4 + %i9 = tail call ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 %i8) #4 + %i10 = tail call ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 %i9) #4 + %i11 = tail call ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 %i10) #4 + %i12 = add i64 %i7, -4 + %i13 = icmp eq i64 %i12, 0 + br i1 %i13, label %bb14, label %bb6 + +bb14: ; preds = %bb6 + ret ppc_fp128 %i11 +}