Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -406,6 +406,30 @@ case Intrinsic::loop_decrement: return true; + // Binary operations on 128-bit value will use CTR. + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + if (F->getType()->getScalarType()->isFP128Ty() || + F->getType()->getScalarType()->isPPC_FP128Ty()) + return true; + break; + + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_uitofp: { + Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType(); + Type *DstType = CI->getType()->getScalarType(); + if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() || + isLargeIntegerTy(!TM.isPPC64(), SrcType) || + isLargeIntegerTy(!TM.isPPC64(), DstType)) + return true; + break; + } + // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp // because, although it does clobber the counter register, the // control can't then return to inside the loop unless there is also @@ -424,6 +448,15 @@ case Intrinsic::pow: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: return true; case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> @@ -445,6 +478,54 @@ case Intrinsic::llround: Opcode = ISD::LLROUND; break; case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; + case Intrinsic::experimental_constrained_fcmp: + Opcode = ISD::STRICT_FSETCC; + break; + case Intrinsic::experimental_constrained_fcmps: + Opcode = ISD::STRICT_FSETCCS; + break; + case Intrinsic::experimental_constrained_fma: + Opcode = ISD::STRICT_FMA; + break; + case Intrinsic::experimental_constrained_sqrt: + Opcode = ISD::STRICT_FSQRT; + break; + case Intrinsic::experimental_constrained_floor: + Opcode = ISD::STRICT_FFLOOR; + break; + case Intrinsic::experimental_constrained_ceil: + Opcode = ISD::STRICT_FCEIL; + break; + case Intrinsic::experimental_constrained_trunc: + Opcode = ISD::STRICT_FTRUNC; + break; + case Intrinsic::experimental_constrained_rint: + Opcode = ISD::STRICT_FRINT; + break; + case Intrinsic::experimental_constrained_lrint: + Opcode = ISD::STRICT_LRINT; + break; + case Intrinsic::experimental_constrained_llrint: + Opcode = ISD::STRICT_LLRINT; + break; + case Intrinsic::experimental_constrained_nearbyint: + Opcode = ISD::STRICT_FNEARBYINT; + break; + case Intrinsic::experimental_constrained_round: + Opcode = ISD::STRICT_FROUND; + break; + case Intrinsic::experimental_constrained_lround: + Opcode = ISD::STRICT_LROUND; + break; + case Intrinsic::experimental_constrained_llround: + Opcode = ISD::STRICT_LLROUND; + break; + case Intrinsic::experimental_constrained_minnum: + Opcode = ISD::STRICT_FMINNUM; + break; + case Intrinsic::experimental_constrained_maxnum: + Opcode = ISD::STRICT_FMAXNUM; + break; case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; } Index: llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple powerpc64le < %s | FileCheck %s + +; Check constrained ops converted to call +define void @test(double* %cast) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %root +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: li 30, 0 +; CHECK-NEXT: addi 29, 3, -8 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: lfdu 1, 8(29) +; CHECK-NEXT: bl cos +; CHECK-NEXT: nop +; CHECK-NEXT: addi 30, 30, 8 +; CHECK-NEXT: stfdx 1, 0, 29 +; CHECK-NEXT: cmpldi 30, 2040 +; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +root: + br label %for.body + +exit: + ret void + +for.body: + %i = phi i64 [ 0, %root ], [ %next, %for.body ] + %idx = getelementptr inbounds double, double* %cast, i64 %i + %val = load double, double* %idx + %cos = tail call nnan ninf nsz arcp double @llvm.experimental.constrained.cos.f64(double %val, metadata !"round.dynamic", metadata !"fpexcept.strict") + store double %cos, double* %idx, align 8 + %next = add nuw nsw i64 %i, 1 + %cond = icmp eq i64 %next, 255 + br i1 %cond, label %exit, label %for.body +} + +; Check constrained ops converted to native instruction +define void @test2(double* %cast) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, 255 +; CHECK-NEXT: addi 3, 3, -8 +; CHECK-NEXT: mtctr 4 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: lfdu 0, 8(3) +; CHECK-NEXT: xssqrtdp 0, 0 +; CHECK-NEXT: stfdx 0, 0, 3 +; CHECK-NEXT: bdnz .LBB1_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: blr +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %next, %for.body ] + %idx = getelementptr inbounds double, double* %cast, i64 %i + %val = load double, double* %idx + %cos = tail call nnan ninf nsz arcp double @llvm.experimental.constrained.sqrt.f64(double %val, metadata !"round.dynamic", metadata !"fpexcept.strict") + store double %cos, double* %idx, align 8 + %next = add nuw nsw i64 %i, 1 + %cond = icmp eq i64 %next, 255 + br i1 %cond, label %exit, label %for.body + +exit: + ret void +} + +declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)