Index: lib/CodeGen/PPCGCodeGeneration.cpp
===================================================================
--- lib/CodeGen/PPCGCodeGeneration.cpp
+++ lib/CodeGen/PPCGCodeGeneration.cpp
@@ -2611,6 +2611,36 @@
     return isl_ast_expr_ge(Iterations, MinComputeExpr);
   }
 
+  /// Check whether the Block contains any Function value.
+  bool ContainsFnPtrValInBlock(const BasicBlock *BB) {
+    for (const Instruction &Inst : *BB)
+      for (Value *SrcVal : Inst.operands()) {
+        PointerType *p = dyn_cast<PointerType>(SrcVal->getType());
+        if (!p)
+          continue;
+        if (isa<FunctionType>(p->getElementType()))
+          return true;
+      }
+    return false;
+  }
+
+  /// Return whether the Scop S has functions.
+  bool ContainsFnPtr(const Scop &S) {
+    for (auto &Stmt : S) {
+      if (Stmt.isBlockStmt()) {
+        if (ContainsFnPtrValInBlock(Stmt.getBasicBlock()))
+          return true;
+      } else {
+        assert(Stmt.isRegionStmt() &&
+               "Stmt was neither block nor region statement");
+        for (const BasicBlock *BB : Stmt.getRegion()->blocks())
+          if (ContainsFnPtrValInBlock(BB))
+            return true;
+      }
+    }
+    return false;
+  }
+
   /// Generate code for a given GPU AST described by @p Root.
   ///
   /// @param Root An isl_ast_node pointing to the root of the GPU AST.
@@ -2681,6 +2711,14 @@
     if (S->hasInvariantAccesses())
       return false;
 
+    // We currently do not support functions inside kernels, as code
+    // generation will need to offload function calls to the kernel.
+    // This may lead to a kernel try to call a function on the host.
+    // This also allows us to prevent codegen from trying to take the
+    // address of an intrinsic function to send to the kernel.
+    if (ContainsFnPtr(CurrentScop))
+      return false;
+
     auto PPCGScop = createPPCGScop();
     auto PPCGProg = createPPCGProg(PPCGScop);
     auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
Index: test/GPGPU/intrinsic-not-copied-to-kernel.ll
===================================================================
--- /dev/null
+++ test/GPGPU/intrinsic-not-copied-to-kernel.ll
@@ -0,0 +1,71 @@
+; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s
+
+;CHECK:       %conv = fpext float %0 to double
+;CHECK-NEXT:  %1 = tail call double @llvm.pow.f64(double %conv, double 3.000000e+00)
+;CHECK-NEXT:  %conv6 = fptrunc double %1 to float
+
+; REQUIRES: pollyacc
+
+; static const int N = 1000;
+; void f(float A[N][N], int n, float B[N][N]) {
+;   for(int i = 0; i < n; i++) {
+;     for(int j = 0; j < n; j++) {
+;       B[i][j] = pow(A[i][j], 3);
+;     }
+;
+;   }
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @f([1000 x float]* %A, i32 %n, [1000 x float]* %B) {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  %cmp3 = icmp sgt i32 %n, 0
+  br i1 %cmp3, label %for.cond1.preheader.lr.ph, label %for.end13
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry.split
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.lr.ph, %for.inc11
+  %indvars.iv5 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next6, %for.inc11 ]
+  %cmp21 = icmp sgt i32 %n, 0
+  br i1 %cmp21, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.lr.ph, %for.body3
+  %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx5 = getelementptr inbounds [1000 x float], [1000 x float]* %A, i64 %indvars.iv5, i64 %indvars.iv
+  %0 = load float, float* %arrayidx5, align 4
+  %conv = fpext float %0 to double
+  %1 = tail call double @llvm.pow.f64(double %conv, double 3.000000e+00)
+  %conv6 = fptrunc double %1 to float
+  %arrayidx10 = getelementptr inbounds [1000 x float], [1000 x float]* %B, i64 %indvars.iv5, i64 %indvars.iv
+  store float %conv6, float* %arrayidx10, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %wide.trip.count = zext i32 %n to i64
+  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.body3, label %for.cond1.for.inc11_crit_edge
+
+for.cond1.for.inc11_crit_edge:                    ; preds = %for.body3
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.cond1.for.inc11_crit_edge, %for.cond1.preheader
+  %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
+  %wide.trip.count7 = zext i32 %n to i64
+  %exitcond8 = icmp ne i64 %indvars.iv.next6, %wide.trip.count7
+  br i1 %exitcond8, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge
+
+for.cond.for.end13_crit_edge:                     ; preds = %for.inc11
+  br label %for.end13
+
+for.end13:                                        ; preds = %for.cond.for.end13_crit_edge, %entry.split
+  ret void
+}
+
+declare double @llvm.pow.f64(double, double)