Index: tools/GPURuntime/GPUJIT.c
===================================================================
--- tools/GPURuntime/GPUJIT.c
+++ tools/GPURuntime/GPUJIT.c
@@ -941,6 +941,10 @@
 typedef CUresult CUDAAPI CuMemAllocFcnTy(CUdeviceptr *, size_t);
 static CuMemAllocFcnTy *CuMemAllocFcnPtr;
 
+typedef CUresult CUDAAPI CuMemAllocManagedFcnTy(CUdeviceptr *, size_t,
+                                                unsigned int);
+static CuMemAllocManagedFcnTy *CuMemAllocManagedFcnPtr;
+
 typedef CUresult CUDAAPI CuLaunchKernelFcnTy(
     CUfunction F, unsigned int GridDimX, unsigned int GridDimY,
     unsigned int gridDimZ, unsigned int blockDimX, unsigned int BlockDimY,
@@ -1081,6 +1085,9 @@
   CuMemAllocFcnPtr =
       (CuMemAllocFcnTy *)getAPIHandleCUDA(HandleCuda, "cuMemAlloc_v2");
 
+  CuMemAllocManagedFcnPtr = (CuMemAllocManagedFcnTy *)getAPIHandleCUDA(
+      HandleCuda, "cuMemAllocManaged");
+
   CuMemFreeFcnPtr =
       (CuMemFreeFcnTy *)getAPIHandleCUDA(HandleCuda, "cuMemFree_v2");
 
@@ -1445,7 +1452,7 @@
   // If not, we pass it along to the underlying allocator.
   // This is a hack, and can be removed if the underlying issue is fixed.
   if (isManagedPtr(mem)) {
-    if (cudaFree(mem) != cudaSuccess) {
+    if (CuMemFreeFcnPtr((size_t)mem) != CUDA_SUCCESS) {
       fprintf(stderr, "cudaFree failed.\n");
       exit(-1);
     }
@@ -1465,15 +1472,18 @@
     fprintf(stderr, "cudaMallocManaged called with size 0. "
                     "Promoting to size 1");
   size = max(size, 1);
-  polly_initContextCUDA();
-  dump_function();
-  void *a;
-  if (cudaMallocManaged(&a, size, cudaMemAttachGlobal) != cudaSuccess) {
+  PollyGPUContext *_ = polly_initContextCUDA();
+  assert(_ && "polly_initContextCUDA failed");
+
+  void *newMemPtr;
+  const CUresult Res = CuMemAllocManagedFcnPtr((CUdeviceptr *)&newMemPtr, size,
+                                               CU_MEM_ATTACH_GLOBAL);
+  if (Res != CUDA_SUCCESS) {
     fprintf(stderr, "cudaMallocManaged failed for size: %zu\n", size);
     exit(-1);
   }
-  addManagedPtr(a);
-  return a;
+  addManagedPtr(newMemPtr);
+  return newMemPtr;
 }
 
 static void freeDeviceMemoryCUDA(PollyGPUDevicePtr *Allocation) {
@@ -1531,6 +1541,7 @@
 }
 
 static void freeContextCUDA(PollyGPUContext *Context) {
+
   dump_function();
 
   CUDAContext *Ctx = (CUDAContext *)Context->Context;