Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -1273,12 +1273,17 @@ /// /// @param is64Bit Are we looking for a 64 bit architecture? static std::string computeNVPTXDataLayout(bool is64Bit) { - std::string Ret = "e"; + std::string Ret = ""; - if (!is64Bit) - Ret += "-p:32:32"; - - Ret += "-i64:64-v16:16-v32:32-n16:32:64"; + if (!is64Bit) { + Ret += "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:" + "64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:" + "64-v128:128:128-n16:32:64"; + } else { + Ret += "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:" + "64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:" + "64-v128:128:128-n16:32:64"; + } return Ret; } @@ -1298,7 +1303,8 @@ const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id); Args.push_back(SAI->getElementType()); } else { - Args.push_back(Builder.getInt8PtrTy()); + static const int UseGlobalMemory = 1; + Args.push_back(Builder.getInt8PtrTy(UseGlobalMemory)); } } Index: test/GPGPU/cuda-annotations.ll =================================================================== --- test/GPGPU/cuda-annotations.ll +++ test/GPGPU/cuda-annotations.ll @@ -4,13 +4,13 @@ ; REQUIRES: pollyacc -; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) #0 { +; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 { ; KERNEL: !nvvm.annotations = !{!0} -; KERNEL: !0 = !{void (i8*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1} +; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1} -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(i64* %A, i64 %n) { bb: Index: test/GPGPU/double-parallel-loop.ll =================================================================== --- test/GPGPU/double-parallel-loop.ll +++ test/GPGPU/double-parallel-loop.ll @@ -212,7 +212,7 @@ ; A[i][j] += i * j; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @double_parallel_loop([1024 x float]* %A) { bb: Index: test/GPGPU/host-control-flow.ll =================================================================== --- test/GPGPU/host-control-flow.ll +++ test/GPGPU/host-control-flow.ll @@ -42,7 +42,7 @@ ; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98 ; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit -; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) +; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %c0) ; KERNEL-IR-LABEL: entry: ; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64 @@ -65,35 +65,35 @@ ; KERNEL-IR-NEXT: br label %polly.stmt.for.body3 ; KERNEL-IR-LABEL: polly.stmt.for.body3: ; preds = %polly.then -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %pexp.pdiv_r = urem i64 %c0, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A = mul nsw i64 %pexp.pdiv_r, 100 ; KERNEL-IR-NEXT: %7 = mul nsw i64 32, %b0 ; KERNEL-IR-NEXT: %8 = add nsw i64 %7, %t0 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A = add nsw i64 %polly.access.mul.MemRef_A, %8 -; KERNEL-IR-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %polly.access.add.MemRef_A -; KERNEL-IR-NEXT: %tmp_p_scalar_ = load float, float* %polly.access.MemRef_A, align 4 -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A1 = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %polly.access.add.MemRef_A +; KERNEL-IR-NEXT: %tmp_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A, align 4 +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A1 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %pexp.pdiv_r2 = urem i64 %c0, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A3 = mul nsw i64 %pexp.pdiv_r2, 100 ; KERNEL-IR-NEXT: %9 = mul nsw i64 32, %b0 ; KERNEL-IR-NEXT: %10 = add nsw i64 %9, %t0 ; KERNEL-IR-NEXT: %11 = add nsw i64 %10, 1 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A4 = add nsw i64 %polly.access.mul.MemRef_A3, %11 -; KERNEL-IR-NEXT: %polly.access.MemRef_A5 = getelementptr float, float* %polly.access.cast.MemRef_A1, i64 %polly.access.add.MemRef_A4 -; KERNEL-IR-NEXT: %tmp2_p_scalar_ = load float, float* %polly.access.MemRef_A5, align 4 +; KERNEL-IR-NEXT: %polly.access.MemRef_A5 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A1, i64 %polly.access.add.MemRef_A4 +; KERNEL-IR-NEXT: %tmp2_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A5, align 4 ; KERNEL-IR-NEXT: %p_add = fadd float %tmp_p_scalar_, %tmp2_p_scalar_ -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A6 = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A6 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %pexp.pdiv_r7 = urem i64 %c0, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A8 = mul nsw i64 %pexp.pdiv_r7, 100 ; KERNEL-IR-NEXT: %12 = mul nsw i64 32, %b0 ; KERNEL-IR-NEXT: %13 = add nsw i64 %12, %t0 ; KERNEL-IR-NEXT: %14 = add nsw i64 %13, 2 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A9 = add nsw i64 %polly.access.mul.MemRef_A8, %14 -; KERNEL-IR-NEXT: %polly.access.MemRef_A10 = getelementptr float, float* %polly.access.cast.MemRef_A6, i64 %polly.access.add.MemRef_A9 -; KERNEL-IR-NEXT: %tmp3_p_scalar_ = load float, float* %polly.access.MemRef_A10, align 4 +; KERNEL-IR-NEXT: %polly.access.MemRef_A10 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A6, i64 %polly.access.add.MemRef_A9 +; KERNEL-IR-NEXT: %tmp3_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A10, align 4 ; KERNEL-IR-NEXT: %p_add12 = fadd float %p_add, %tmp3_p_scalar_ -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A11 = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A11 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %15 = add nsw i64 %c0, 1 ; KERNEL-IR-NEXT: %pexp.pdiv_r12 = urem i64 %15, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A13 = mul nsw i64 %pexp.pdiv_r12, 100 @@ -101,10 +101,10 @@ ; KERNEL-IR-NEXT: %17 = add nsw i64 %16, %t0 ; KERNEL-IR-NEXT: %18 = add nsw i64 %17, 1 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A14 = add nsw i64 %polly.access.mul.MemRef_A13, %18 -; KERNEL-IR-NEXT: %polly.access.MemRef_A15 = getelementptr float, float* %polly.access.cast.MemRef_A11, i64 %polly.access.add.MemRef_A14 -; KERNEL-IR-NEXT: %tmp4_p_scalar_ = load float, float* %polly.access.MemRef_A15, align 4 +; KERNEL-IR-NEXT: %polly.access.MemRef_A15 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A11, i64 %polly.access.add.MemRef_A14 +; KERNEL-IR-NEXT: %tmp4_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A15, align 4 ; KERNEL-IR-NEXT: %p_add17 = fadd float %tmp4_p_scalar_, %p_add12 -; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A16 = bitcast i8* %MemRef_A to float* +; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A16 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* ; KERNEL-IR-NEXT: %19 = add nsw i64 %c0, 1 ; KERNEL-IR-NEXT: %pexp.pdiv_r17 = urem i64 %19, 2 ; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A18 = mul nsw i64 %pexp.pdiv_r17, 100 @@ -112,15 +112,15 @@ ; KERNEL-IR-NEXT: %21 = add nsw i64 %20, %t0 ; KERNEL-IR-NEXT: %22 = add nsw i64 %21, 1 ; KERNEL-IR-NEXT: %polly.access.add.MemRef_A19 = add nsw i64 %polly.access.mul.MemRef_A18, %22 -; KERNEL-IR-NEXT: %polly.access.MemRef_A20 = getelementptr float, float* %polly.access.cast.MemRef_A16, i64 %polly.access.add.MemRef_A19 -; KERNEL-IR-NEXT: store float %p_add17, float* %polly.access.MemRef_A20, align 4 +; KERNEL-IR-NEXT: %polly.access.MemRef_A20 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A16, i64 %polly.access.add.MemRef_A19 +; KERNEL-IR-NEXT: store float %p_add17, float addrspace(1)* %polly.access.MemRef_A20, align 4 ; KERNEL-IR-NEXT: br label %polly.merge ; KERNEL-IR-LABEL: polly.else: ; preds = %polly.cond ; KERNEL-IR-NEXT: br label %polly.merge ; KERNEL-IR-NEXT: } -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo([100 x float]* %A) { entry: Index: test/GPGPU/host-statement.ll =================================================================== --- test/GPGPU/host-statement.ll +++ test/GPGPU/host-statement.ll @@ -10,7 +10,7 @@ ; REQUIRES: pollyacc -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "x86_64-unknown-linux-gnu" declare void @llvm.lifetime.start(i64, i8* nocapture) #0 Index: test/GPGPU/invalid-kernel.ll =================================================================== --- test/GPGPU/invalid-kernel.ll +++ test/GPGPU/invalid-kernel.ll @@ -46,7 +46,7 @@ ; IR: br i1 false, label %polly.start, label %bb1 -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(i64* %A, i64* %B) { bb: Index: test/GPGPU/kernel-params-only-some-arrays.ll =================================================================== --- test/GPGPU/kernel-params-only-some-arrays.ll +++ test/GPGPU/kernel-params-only-some-arrays.ll @@ -18,10 +18,10 @@ ; KERNEL: ; ModuleID = 'kernel_0' ; KERNEL-NEXT: source_filename = "kernel_0" -; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) +; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 @@ -33,10 +33,10 @@ ; KERNEL: ; ModuleID = 'kernel_1' ; KERNEL-NEXT: source_filename = "kernel_1" -; KERNEL-NEXT: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) +; KERNEL: define ptx_kernel void @kernel_1(i8 addrspace(1)* %MemRef_B) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 @@ -59,7 +59,7 @@ ; IR-NEXT: [[DATA:%.*]] = bitcast i8** %polly_launch_1_param_0 to i8* ; IR-NEXT: store i8* [[DATA]], i8** [[SLOT]] -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @kernel_params_only_some_arrays(float* %A, float* %B) { entry: Index: test/GPGPU/kernel-params-scop-parameter.ll =================================================================== --- test/GPGPU/kernel-params-scop-parameter.ll +++ test/GPGPU/kernel-params-scop-parameter.ll @@ -9,9 +9,9 @@ ; A[i] += 42; ; } -; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) +; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @kernel_params_scop_parameter(float* %A, i64 %n) { bb: Index: test/GPGPU/mostly-sequential.ll =================================================================== --- test/GPGPU/mostly-sequential.ll +++ test/GPGPU/mostly-sequential.ll @@ -49,7 +49,7 @@ ; Verify that we identified this kernel as non-profitable. ; IR: br i1 false, label %polly.start, label %bb3 -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(float* %A) { bb: Index: test/GPGPU/non-read-only-scalars.ll =================================================================== --- test/GPGPU/non-read-only-scalars.ll +++ test/GPGPU/non-read-only-scalars.ll @@ -87,15 +87,15 @@ ; CODE-NEXT: Stmt_bb20(c0); ; KERNEL-IR: store float %p_tmp23, float* %sum.0.phiops -; KERNEL-IR-NEXT: [[REGA:%.+]] = bitcast i8* %MemRef_sum_0__phi to float* +; KERNEL-IR-NEXT: [[REGA:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0__phi to float* ; KERNEL-IR-NEXT: [[REGB:%.+]] = load float, float* %sum.0.phiops ; KERNEL-IR-NEXT: store float [[REGB]], float* [[REGA]] -; KERNEL-IR-NEXT: [[REGC:%.+]] = bitcast i8* %MemRef_sum_0 to float* +; KERNEL-IR-NEXT: [[REGC:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0 to float* ; KERNEL-IR-NEXT: [[REGD:%.+]] = load float, float* %sum.0.s2a ; KERNEL-IR-NEXT: store float [[REGD]], float* [[REGC]] ; KERNEL-IR-NEXT: ret void -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" @.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 Index: test/GPGPU/non-zero-array-offset.ll =================================================================== --- test/GPGPU/non-zero-array-offset.ll +++ test/GPGPU/non-zero-array-offset.ll @@ -77,7 +77,7 @@ ; } ; #endif ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(float* %A, float* %B) { bb: Index: test/GPGPU/only-part-of-array-modified.ll =================================================================== --- test/GPGPU/only-part-of-array-modified.ll +++ test/GPGPU/only-part-of-array-modified.ll @@ -12,7 +12,7 @@ ; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i32), cudaMemcpyHostToDevice)); ; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (2047) * sizeof(i32), cudaMemcpyHostToDevice)); -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(float* %A, float* %B) { bb: Index: test/GPGPU/parametric-loop-bound.ll =================================================================== --- test/GPGPU/parametric-loop-bound.ll +++ test/GPGPU/parametric-loop-bound.ll @@ -35,7 +35,7 @@ ; IR-NEXT: [[REGB:%.+]] = bitcast i64* %polly_launch_0_param_1 to i8* ; IR-NEXT: store i8* [[REGB]], i8** [[REGA]] -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(i64* %A, i64 %n) { bb: Index: test/GPGPU/phi-nodes-in-kernel.ll =================================================================== --- test/GPGPU/phi-nodes-in-kernel.ll +++ test/GPGPU/phi-nodes-in-kernel.ll @@ -11,7 +11,7 @@ ; REQUIRES: pollyacc -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "x86_64-unknown-linux-gnu" ; CODE: # host @@ -49,10 +49,10 @@ ; KERNEL-IR: entry: ; KERNEL-IR-NEXT: %out_l.055.s2a = alloca i32 ; KERNEL-IR-NEXT: %out_l.055.phiops = alloca i32 -; KERNEL-IR-NEXT: %1 = bitcast i8* %MemRef_out_l_055__phi to i32* +; KERNEL-IR-NEXT: %1 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055__phi to i32* ; KERNEL-IR-NEXT: %2 = load i32, i32* %1 ; KERNEL-IR-NEXT: store i32 %2, i32* %out_l.055.phiops -; KERNEL-IR-NEXT: %3 = bitcast i8* %MemRef_out_l_055 to i32* +; KERNEL-IR-NEXT: %3 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055 to i32* ; KERNEL-IR-NEXT: %4 = load i32, i32* %3 ; KERNEL-IR-NEXT: store i32 %4, i32* %out_l.055.s2a Index: test/GPGPU/private-memory.ll =================================================================== --- test/GPGPU/private-memory.ll +++ test/GPGPU/private-memory.ll @@ -28,19 +28,19 @@ ; KERNEL: %polly.access.cast.private_array = bitcast [1 x float]* %private_array to float* ; KERNEL-NEXT: %polly.access.private_array = getelementptr float, float* %polly.access.cast.private_array, i64 0 -; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float* -; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %t0 -; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_A +; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* +; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %t0 +; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_A ; KERNEL-NEXT: store float %shared.read, float* %polly.access.private_array ; KERNEL: %polly.access.cast.private_array5 = bitcast [1 x float]* %private_array to float* ; KERNEL-NEXT: %polly.access.private_array6 = getelementptr float, float* %polly.access.cast.private_array5, i64 0 -; KERNEL-NEXT: %polly.access.cast.MemRef_A7 = bitcast i8* %MemRef_A to float* -; KERNEL-NEXT: %polly.access.MemRef_A8 = getelementptr float, float* %polly.access.cast.MemRef_A7, i64 %t0 +; KERNEL-NEXT: %polly.access.cast.MemRef_A7 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* +; KERNEL-NEXT: %polly.access.MemRef_A8 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A7, i64 %t0 ; KERNEL-NEXT: %shared.write = load float, float* %polly.access.private_array6 -; KERNEL-NEXT: store float %shared.write, float* %polly.access.MemRef_A8 +; KERNEL-NEXT: store float %shared.write, float addrspace(1)* %polly.access.MemRef_A8 -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @add(float* %A) { bb: Index: test/GPGPU/region-stmt.ll =================================================================== --- test/GPGPU/region-stmt.ll +++ test/GPGPU/region-stmt.ll @@ -39,7 +39,7 @@ ; } ; source_filename = "/tmp/test.c" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(float* %A, float* %B) { entry: Index: test/GPGPU/remove-dead-instructions-in-stmt-2.ll =================================================================== --- test/GPGPU/remove-dead-instructions-in-stmt-2.ll +++ test/GPGPU/remove-dead-instructions-in-stmt-2.ll @@ -4,10 +4,10 @@ ; REQUIRES: pollyacc -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "x86_64-unknown-linux-gnu" -; KERNEL-IR: store i32 0, i32* %polly.access.MemRef_sum_c, align 4 +; KERNEL-IR: store i32 0, i32 addrspace(1)* %polly.access.MemRef_sum_c, align 4 ; KERNEL-IR-NEXT: br label %polly.merge define void @kernel_dynprog([50 x [50 x i32]]* %sum_c) { Index: test/GPGPU/remove-dead-instructions-in-stmt.ll =================================================================== --- test/GPGPU/remove-dead-instructions-in-stmt.ll +++ test/GPGPU/remove-dead-instructions-in-stmt.ll @@ -10,7 +10,7 @@ ; condition. This code referred to CPU registers and consequently resulted ; in invalid bitcode. -; KERNEL-IR: store i32 0, i32* %polly.access.MemRef_sum_c, align 4 +; KERNEL-IR: store i32 0, i32 addrspace(1)* %polly.access.MemRef_sum_c, align 4 ; KERNEL-IR-NEXT: br label %polly.merge target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/GPGPU/run-time-check.ll =================================================================== --- test/GPGPU/run-time-check.ll +++ test/GPGPU/run-time-check.ll @@ -12,7 +12,7 @@ ; IR: %tmp = icmp slt i64 %i.0, %n ; IR-NEXT: br i1 %tmp, label %bb2, label %polly.merge_new_and_old -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(i64 %n, [32 x float]* %A) { bb: Index: test/GPGPU/scalar-param-and-value-32-bit.ll =================================================================== --- test/GPGPU/scalar-param-and-value-32-bit.ll +++ test/GPGPU/scalar-param-and-value-32-bit.ll @@ -9,7 +9,7 @@ ; A[j + n] += 42; ; } -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; CHECK: define ptx_kernel void @kernel_0(i8* %MemRef_A, i32 %n) Index: test/GPGPU/scalar-param-and-value-use.ll =================================================================== --- test/GPGPU/scalar-param-and-value-use.ll +++ test/GPGPU/scalar-param-and-value-use.ll @@ -10,7 +10,7 @@ ; A[i][j] += A[i + 1][j + 1]; ; } -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; This test case failed at some point as %n was only available in this kernel ; when referenced through an isl_id in an isl ast expression, but not when Index: test/GPGPU/scalar-parameter-fp128.ll =================================================================== --- test/GPGPU/scalar-parameter-fp128.ll +++ test/GPGPU/scalar-parameter-fp128.ll @@ -11,7 +11,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @fp128(fp128* %A, fp128 %b) { bb: Index: test/GPGPU/scalar-parameter-half.ll =================================================================== --- test/GPGPU/scalar-parameter-half.ll +++ test/GPGPU/scalar-parameter-half.ll @@ -9,7 +9,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @half(half* %A, half %b) { bb: Index: test/GPGPU/scalar-parameter-i120.ll =================================================================== --- test/GPGPU/scalar-parameter-i120.ll +++ test/GPGPU/scalar-parameter-i120.ll @@ -11,7 +11,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i120(i120* %A, i120 %b) { bb: Index: test/GPGPU/scalar-parameter-i128.ll =================================================================== --- test/GPGPU/scalar-parameter-i128.ll +++ test/GPGPU/scalar-parameter-i128.ll @@ -12,7 +12,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i128(i128* %A, i128 %b) { bb: Index: test/GPGPU/scalar-parameter-i3000.ll =================================================================== --- test/GPGPU/scalar-parameter-i3000.ll +++ test/GPGPU/scalar-parameter-i3000.ll @@ -12,7 +12,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i3000(i3000* %A, i3000 %b) { bb: Index: test/GPGPU/scalar-parameter-i80.ll =================================================================== --- test/GPGPU/scalar-parameter-i80.ll +++ test/GPGPU/scalar-parameter-i80.ll @@ -12,7 +12,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i80(i80* %A, i80 %b) { bb: Index: test/GPGPU/scalar-parameter-ppc_fp128.ll =================================================================== --- test/GPGPU/scalar-parameter-ppc_fp128.ll +++ test/GPGPU/scalar-parameter-ppc_fp128.ll @@ -11,7 +11,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @ppc_fp128(ppc_fp128* %A, ppc_fp128 %b) { bb: Index: test/GPGPU/scalar-parameter-x86_fp80.ll =================================================================== --- test/GPGPU/scalar-parameter-x86_fp80.ll +++ test/GPGPU/scalar-parameter-x86_fp80.ll @@ -11,7 +11,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @fp128(fp128* %A, fp128 %b) { bb: Index: test/GPGPU/scalar-parameter.ll =================================================================== --- test/GPGPU/scalar-parameter.ll +++ test/GPGPU/scalar-parameter.ll @@ -12,7 +12,7 @@ ; REQUIRES: pollyacc,nvptx -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, float %MemRef_b) @@ -39,7 +39,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @float(float* %A, float %b) { bb: @@ -93,7 +93,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @double(double* %A, double %b) { bb: @@ -142,7 +142,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i1(i1* %A, i1 %b) { bb: @@ -191,7 +191,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i3(i3* %A, i3 %b) { bb: @@ -240,7 +240,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i8(i8* %A, i8 %b) { bb: @@ -300,7 +300,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i32(i32* %A, i32 %b) { bb: @@ -349,7 +349,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i60(i60* %A, i60 %b) { bb: @@ -398,7 +398,7 @@ ; A[i] += b; ; } ; -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @i64(i64* %A, i64 %b) { bb: Index: test/GPGPU/scheduler-timeout.ll =================================================================== --- test/GPGPU/scheduler-timeout.ll +++ test/GPGPU/scheduler-timeout.ll @@ -4,7 +4,7 @@ ; REQUIRES: pollyacc -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "x86_64-unknown-linux-gnu" ; This test case took at some point forever to schedule, as the isl scheduler Index: test/GPGPU/shared-memory-scalar.ll =================================================================== --- test/GPGPU/shared-memory-scalar.ll +++ test/GPGPU/shared-memory-scalar.ll @@ -23,7 +23,7 @@ ; the scalar is not stored any more in shared memory. We still leave this ; test case as documentation if we every forget to mark scalars as read-only. -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @add(float* %A, float %alpha) { bb: Index: test/GPGPU/shared-memory-two-dimensional.ll =================================================================== --- test/GPGPU/shared-memory-two-dimensional.ll +++ test/GPGPU/shared-memory-two-dimensional.ll @@ -36,12 +36,12 @@ ; KERNEL: %polly.access.mul.MemRef_b = mul nsw i64 %polly.indvar, 8 ; KERNEL-NEXT: %polly.access.add.MemRef_b = add nsw i64 %polly.access.mul.MemRef_b, %t0 -; KERNEL-NEXT: %polly.access.MemRef_b = getelementptr float, float* %polly.access.cast.MemRef_b, i64 %polly.access.add.MemRef_b -; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_b +; KERNEL-NEXT: %polly.access.MemRef_b = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_b, i64 %polly.access.add.MemRef_b +; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_b ; KERNEL-NEXT: store float %shared.read, float addrspace(3)* %polly.access.shared_MemRef_b -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @foo(float* %A, [8 x float]* %b) { bb: Index: test/GPGPU/shared-memory.ll =================================================================== --- test/GPGPU/shared-memory.ll +++ test/GPGPU/shared-memory.ll @@ -29,19 +29,19 @@ ; KERNEL: @shared_MemRef_A = internal addrspace(3) global [32 x float] zeroinitializer, align 4 ; KERNEL: %polly.access.shared_MemRef_A = getelementptr float, float addrspace(3)* getelementptr inbounds ([32 x float], [32 x float] addrspace(3)* @shared_MemRef_A, i32 0, i32 0), i64 %t0 -; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8* %MemRef_A to float* -; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float* %polly.access.cast.MemRef_A, i64 %t0 -; KERNEL-NEXT: %shared.read = load float, float* %polly.access.MemRef_A +; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* +; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %t0 +; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_A ; KERNEL-NEXT: store float %shared.read, float addrspace(3)* %polly.access.shared_MemRef_A ; KERNEL: %polly.access.shared_MemRef_A3 = getelementptr float, float addrspace(3)* getelementptr inbounds ([32 x float], [32 x float] addrspace(3)* @shared_MemRef_A, i32 0, i32 0), i64 %t0 -; KERNEL-NEXT: %polly.access.cast.MemRef_A4 = bitcast i8* %MemRef_A to float* -; KERNEL-NEXT: %polly.access.MemRef_A5 = getelementptr float, float* %polly.access.cast.MemRef_A4, i64 %t0 +; KERNEL-NEXT: %polly.access.cast.MemRef_A4 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)* +; KERNEL-NEXT: %polly.access.MemRef_A5 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A4, i64 %t0 ; KERNEL-NEXT: %shared.write = load float, float addrspace(3)* %polly.access.shared_MemRef_A3 -; KERNEL-NEXT: store float %shared.write, float* %polly.access.MemRef_A5 +; KERNEL-NEXT: store float %shared.write, float addrspace(1)* %polly.access.MemRef_A5 -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" define void @add(float* %A) { bb: Index: test/GPGPU/size-cast.ll =================================================================== --- test/GPGPU/size-cast.ll +++ test/GPGPU/size-cast.ll @@ -34,7 +34,7 @@ ; IR-NEXT: mul i64 ; IR-NEXT: @polly_allocateMemoryForDevice -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "x86_64-unknown-linux-gnu" define void @hoge(i32 %arg, i32 %arg1, [1000 x double]* %arg2, double* %arg3) { Index: test/GPGPU/untouched-arrays.ll =================================================================== --- test/GPGPU/untouched-arrays.ll +++ test/GPGPU/untouched-arrays.ll @@ -23,7 +23,7 @@ ; CODE-NEXT: Stmt_bb33(t0, 0); -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "x86_64-unknown-linux-gnu" %struct.hoge = type { [23 x i16], [22 x i16], [14 x i16], [13 x i16] }