diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -653,18 +653,30 @@
       }
 
       return true;
-    } else if (isa<BinaryOperator>(J) &&
-               (J->getType()->getScalarType()->isFP128Ty() ||
-                J->getType()->getScalarType()->isPPC_FP128Ty())) {
-      // Most operations on f128 or ppc_f128 values become calls.
-      return true;
-    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
-               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
-      CastInst *CI = cast<CastInst>(J);
-      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
-          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
-          isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
-          isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
+    } else if (const auto *CI = dyn_cast<CastInst>(J)) {
+      if (!ST->hasP9Vector() && (CI->getSrcTy()->getScalarType()->isFP128Ty() ||
+                                 CI->getDestTy()->getScalarType()->isFP128Ty()))
+        return true;
+      // FIXME: ppc_fp128 to i32 and i32/u32 to ppc_fp128 don't require call
+      if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || isa<FPToSIInst>(J) ||
+          isa<FPToUIInst>(J))
+        if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+            CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+            isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
+            isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
+          return true;
+    } else if (J->getType()->getScalarType()->isPPC_FP128Ty()) {
+      // There are no native instructions for ppc_fp128, but some operations can
+      // be lowered into separate instructions for high and low parts.
+      if (!isa<CastInst>(J) && !isa<LoadInst>(J) && !isa<StoreInst>(J) &&
+          !isa<FCmpInst>(J) && J->getOpcode() != Instruction::FNeg)
+        return true;
+    } else if (J->getType()->getScalarType()->isFP128Ty()) {
+      // Since Power9, we have native instructions for all the operations.
+      // Before that, load and store are legal because fp128 uses vector
+      // registers. fneg is also an exception which doesn't require call.
+      if (!ST->hasP9Vector() && !isa<LoadInst>(J) && !isa<StoreInst>(J) &&
+          J->getOpcode() != Instruction::FNeg)
         return true;
     } else if (isLargeIntegerTy(!TM.isPPC64(),
                                 J->getType()->getScalarType()) &&
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
--- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
@@ -1,5 +1,7 @@
-; RUN: llc -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \
-; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \
+; RUN:   -mtriple=powerpc64le-unknown-unknown | FileCheck %s --check-prefix=P9
+; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr8 \
+; RUN:   -mtriple=powerpc64le-unknown-unknown | FileCheck %s --check-prefix=P8
 
 @a = internal global fp128 0xL00000000000000000000000000000000, align 16
 @x = internal global [4 x fp128] zeroinitializer, align 16
@@ -24,9 +26,93 @@
 for.end:                                          ; preds = %for.body
   ret void
 
-; CHECK-LABEL: fmul_ctrloop_fp128
-; CHECK-NOT:     call void @llvm.set.loop.iterations.i64(i64 4)
-; CHECK-NOT:     call i1 @llvm.loop.decrement.i64(i64 1)
+; P9-LABEL: fmul_ctrloop_fp128
+; P9: call void @llvm.set.loop.iterations.i64(i64 4)
+; P9: call i1 @llvm.loop.decrement.i64(i64 1)
+
+; P8-LABEL: fmul_ctrloop_fp128
+; P8-NOT: call void @llvm.set.loop.iterations.i64(i64 4)
+; P8-NOT: call i1 @llvm.loop.decrement.i64(i64 1)
+}
+
+define void @fneg_ctrloop_fp128() {
+entry:
+  %0 = load fp128, fp128* @a, align 16
+  br label %for.body
+
+for.body:
+  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %neg = fneg fp128 %0
+  %arrayidx1 = getelementptr inbounds [4 x fp128], [4 x fp128]* @y, i64 0, i64 %i.06
+  store fp128 %neg, fp128* %arrayidx1, align 16
+  %inc = add nuw nsw i64 %i.06, 1
+  %exitcond = icmp eq i64 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+
+; P9-LABEL: fneg_ctrloop_fp128
+; P9: call void @llvm.set.loop.iterations.i64(i64 4)
+; P9: call i1 @llvm.loop.decrement.i64(i64 1)
+
+; P8-LABEL: fneg_ctrloop_fp128
+; P8: call void @llvm.set.loop.iterations.i64(i64 4)
+; P8: call i1 @llvm.loop.decrement.i64(i64 1)
+}
+
+define void @fpext_ctrloop_fp128(double* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %a, i64 %i.06
+  %0 = load double, double* %arrayidx, align 8
+  %ext = fpext double %0 to fp128
+  %arrayidx1 = getelementptr inbounds [4 x fp128], [4 x fp128]* @y, i64 0, i64 %i.06
+  store fp128 %ext, fp128* %arrayidx1, align 16
+  %inc = add nuw nsw i64 %i.06, 1
+  %exitcond = icmp eq i64 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+
+; P9-LABEL: fpext_ctrloop_fp128
+; P9: call void @llvm.set.loop.iterations.i64(i64 4)
+; P9: call i1 @llvm.loop.decrement.i64(i64 1)
+
+; P8-LABEL: fpext_ctrloop_fp128
+; P8-NOT: call void @llvm.set.loop.iterations.i64(i64 4)
+; P8-NOT: call i1 @llvm.loop.decrement.i64(i64 1)
+}
+
+define void @fptrunc_ctrloop_fp128(double* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [4 x fp128], [4 x fp128]* @x, i64 0, i64 %i.06
+  %0 = load fp128, fp128* %arrayidx, align 16
+  %trunc = fptrunc fp128 %0 to double
+  %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.06
+  store double %trunc, double* %arrayidx1, align 16
+  %inc = add nuw nsw i64 %i.06, 1
+  %exitcond = icmp eq i64 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+
+; P9-LABEL: fptrunc_ctrloop_fp128
+; P9: call void @llvm.set.loop.iterations.i64(i64 4)
+; P9: call i1 @llvm.loop.decrement.i64(i64 1)
+
+; P8-LABEL: fptrunc_ctrloop_fp128
+; P8-NOT: call void @llvm.set.loop.iterations.i64(i64 4)
+; P8-NOT: call i1 @llvm.loop.decrement.i64(i64 1)
 }
 
 declare void @obfuscate(i8*, ...) local_unnamed_addr #2