; ModuleID = 'copy_3D_mat' source_filename = "copy_3D_mat" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @kernel_0 = private unnamed_addr constant [4098 x i8] c"//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 3.2\0A.target sm_30\0A.address_size 64\0A\0A\09// .globl\09kernel_0\0A\0A.visible .entry kernel_0(\0A\09.param .u64 kernel_0_param_0,\0A\09.param .u64 kernel_0_param_1,\0A\09.param .u64 kernel_0_param_2,\0A\09.param .u64 kernel_0_param_3,\0A\09.param .u64 kernel_0_param_4,\0A\09.param .u64 kernel_0_param_5,\0A\09.param .u64 kernel_0_param_6,\0A\09.param .u64 kernel_0_param_7\0A)\0A.maxntid 32, 4, 4\0A{\0A\09.reg .pred \09%p<9>;\0A\09.reg .b32 \09%r<6>;\0A\09.reg .b64 \09%rd<116>;\0A\0A\09ld.param.u64 \09%rd50, [kernel_0_param_4];\0A\09ld.param.u64 \09%rd52, [kernel_0_param_0];\0A\09ld.param.u64 \09%rd53, [kernel_0_param_1];\0A\09mov.u32 \09%r1, %ctaid.x;\0A\09ld.param.u64 \09%rd54, [kernel_0_param_2];\0A\09ld.param.u64 \09%rd55, [kernel_0_param_3];\0A\09mov.u32 \09%r2, %ctaid.y;\0A\09ld.param.u64 \09%rd56, [kernel_0_param_5];\0A\09mov.u32 \09%r3, %tid.x;\0A\09cvt.u64.u32 \09%rd57, %r3;\0A\09ld.param.u64 \09%rd58, [kernel_0_param_6];\0A\09mov.u32 \09%r4, %tid.y;\0A\09cvt.u64.u32 \09%rd59, %r4;\0A\09mov.u32 \09%r5, %tid.z;\0A\09cvt.u64.u32 \09%rd60, %r5;\0A\09mul.wide.u32 \09%rd61, %r1, 32;\0A\09add.s64 \09%rd62, %rd50, -1;\0A\09sub.s64 \09%rd63, %rd62, %rd61;\0A\09shr.u64 \09%rd1, %rd63, 13;\0A\09add.s64 \09%rd2, %rd61, %rd57;\0A\09mul.wide.u32 \09%rd64, %r2, 32;\0A\09add.s64 \09%rd65, %rd54, -1;\0A\09sub.s64 \09%rd66, %rd65, %rd64;\0A\09shr.u64 \09%rd3, %rd66, 13;\0A\09add.s64 \09%rd67, %rd55, -1;\0A\09shr.u64 \09%rd4, %rd67, 5;\0A\09mul.wide.u32 \09%rd68, %r2, 8;\0A\09neg.s64 \09%rd5, %rd68;\0A\09sub.s64 \09%rd69, %rd65, %rd59;\0A\09shr.s64 \09%rd6, %rd69, 2;\0A\09sub.s64 \09%rd70, %rd67, %rd60;\0A\09shr.s64 \09%rd7, %rd70, 2;\0A\09mul.lo.s64 \09%rd71, %rd54, %rd60;\0A\09add.s64 \09%rd72, %rd71, %rd64;\0A\09add.s64 \09%rd73, %rd72, %rd59;\0A\09mul.lo.s64 \09%rd74, %rd50, %rd73;\0A\09add.s64 \09%rd75, %rd74, %rd61;\0A\09add.s64 \09%rd76, %rd75, %rd57;\0A\09shl.b64 \09%rd77, %rd76, 3;\0A\09add.s64 \09%rd102, %rd53, %rd77;\0A\09shl.b64 \09%rd9, %rd50, 16;\0A\09mul.lo.s64 \09%rd78, %rd50, %rd54;\0A\09shl.b64 \09%rd10, %rd78, 8;\0A\09shl.b64 \09%rd11, %rd50, 5;\0A\09shl.b64 \09%rd12, %rd78, 5;\0A\09mul.lo.s64 \09%rd79, %rd56, %rd60;\0A\09add.s64 \09%rd80, %rd79, %rd64;\0A\09add.s64 \09%rd81, %rd80, %rd59;\0A\09mul.lo.s64 \09%rd82, %rd58, %rd81;\0A\09add.s64 \09%rd83, %rd82, %rd61;\0A\09add.s64 \09%rd84, %rd83, %rd57;\0A\09shl.b64 \09%rd85, %rd84, 3;\0A\09add.s64 \09%rd101, %rd52, %rd85;\0A\09shl.b64 \09%rd14, %rd58, 16;\0A\09mul.lo.s64 \09%rd86, %rd58, %rd56;\0A\09shl.b64 \09%rd15, %rd86, 8;\0A\09shl.b64 \09%rd16, %rd58, 5;\0A\09shl.b64 \09%rd17, %rd86, 5;\0A\09mov.u64 \09%rd103, 0;\0ALBB0_1:\0A\09shl.b64 \09%rd87, %rd103, 13;\0A\09add.s64 \09%rd88, %rd2, %rd87;\0A\09setp.ge.s64 \09%p1, %rd88, %rd50;\0A\09@%p1 bra \09LBB0_8;\0A\09mov.u64 \09%rd106, 0;\0A\09mov.u64 \09%rd104, %rd101;\0A\09mov.u64 \09%rd105, %rd102;\0ALBB0_3:\0A\09shl.b64 \09%rd90, %rd106, 11;\0A\09sub.s64 \09%rd91, %rd5, %rd90;\0A\09add.s64 \09%rd92, %rd91, %rd6;\0A\09min.s64 \09%rd93, %rd92, 7;\0A\09setp.lt.s64 \09%p2, %rd93, 0;\0A\09@%p2 bra \09LBB0_13;\0A\09add.s64 \09%rd27, %rd93, -1;\0A\09mov.u64 \09%rd109, 0;\0A\09mov.u64 \09%rd107, %rd104;\0A\09mov.u64 \09%rd108, %rd105;\0ALBB0_5:\0A\09shl.b64 \09%rd95, %rd109, 3;\0A\09sub.s64 \09%rd96, %rd7, %rd95;\0A\09min.s64 \09%rd97, %rd96, 7;\0A\09setp.lt.s64 \09%p3, %rd97, 0;\0A\09@%p3 bra \09LBB0_12;\0A\09add.s64 \09%rd31, %rd97, -1;\0A\09mov.u64 \09%rd112, 0;\0A\09mov.u64 \09%rd110, %rd107;\0A\09mov.u64 \09%rd111, %rd108;\0ALBB0_7:\0A\09mov.u64 \09%rd115, -1;\0A\09mov.u64 \09%rd113, %rd110;\0A\09mov.u64 \09%rd114, %rd111;\0ALBB0_10:\0A\09ld.global.u64 \09%rd100, [%rd113];\0A\09st.global.u64 \09[%rd114], %rd100;\0A\09add.s64 \09%rd115, %rd115, 1;\0A\09add.s64 \09%rd114, %rd114, %rd12;\0A\09add.s64 \09%rd113, %rd113, %rd17;\0A\09setp.gt.s64 \09%p4, %rd115, %rd31;\0A\09@%p4 bra \09LBB0_11;\0A\09bra.uni \09LBB0_10;\0ALBB0_11:\0A\09add.s64 \09%rd38, %rd112, 1;\0A\09add.s64 \09%rd111, %rd111, %rd11;\0A\09add.s64 \09%rd110, %rd110, %rd16;\0A\09setp.gt.s64 \09%p5, %rd112, %rd27;\0A\09mov.u64 \09%rd112, %rd38;\0A\09@%p5 bra \09LBB0_12;\0A\09bra.uni \09LBB0_7;\0ALBB0_12:\0A\09add.s64 \09%rd32, %rd109, 1;\0A\09add.s64 \09%rd108, %rd108, %rd10;\0A\09add.s64 \09%rd107, %rd107, %rd15;\0A\09setp.eq.s64 \09%p6, %rd109, %rd4;\0A\09mov.u64 \09%rd109, %rd32;\0A\09@%p6 bra \09LBB0_13;\0A\09bra.uni \09LBB0_5;\0ALBB0_13:\0A\09add.s64 \09%rd47, %rd106, 1;\0A\09add.s64 \09%rd105, %rd105, %rd9;\0A\09add.s64 \09%rd104, %rd104, %rd14;\0A\09setp.eq.s64 \09%p7, %rd106, %rd3;\0A\09mov.u64 \09%rd106, %rd47;\0A\09@%p7 bra \09LBB0_8;\0A\09bra.uni \09LBB0_3;\0ALBB0_8:\0A\09add.s64 \09%rd21, %rd103, 1;\0A\09add.s64 \09%rd102, %rd102, 65536;\0A\09add.s64 \09%rd101, %rd101, 65536;\0A\09setp.eq.s64 \09%p8, %rd103, %rd1;\0A\09mov.u64 \09%rd103, %rd21;\0A\09@%p8 bra \09LBB0_9;\0A\09bra.uni \09LBB0_1;\0ALBB0_9:\0A\09ret;\0A}\0A\0A\0A\00" @kernel_0_name = private unnamed_addr constant [9 x i8] c"kernel_0\00" ; Function Attrs: sspstrong define void @julia_copy_3D_mat_64495(i8** dereferenceable(40), i8** dereferenceable(40)) #0 !dbg !5 { top: %polly_launch_0_params = alloca [8 x i8*] %polly_launch_0_param_0 = alloca i8* %polly_launch_0_param_1 = alloca i8* %polly_launch_0_param_2 = alloca i64 %polly_launch_0_param_3 = alloca i64 %polly_launch_0_param_4 = alloca i64 %polly_launch_0_param_5 = alloca i64 %polly_launch_0_param_6 = alloca i64 %polly_launch_0_param_7 = alloca void (metadata, i64, metadata, metadata)* %polly_launch_0_params_i8ptr = bitcast [8 x i8*]* %polly_launch_0_params to i8* br label %top.split, !dbg !24 top.split: ; preds = %top call void @llvm.dbg.value(metadata i8** null, i64 0, metadata !15, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i8** null, i64 0, metadata !16, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i8** %0, i64 0, metadata !15, metadata !25), !dbg !24 %2 = bitcast i8** %0 to double** %3 = load double*, double** %2, align 8, !tbaa !26 %4 = getelementptr i8*, i8** %0, i64 3 %5 = bitcast i8** %4 to i64* %6 = load i64, i64* %5, align 8, !tbaa !26 %7 = getelementptr i8*, i8** %0, i64 4 %8 = bitcast i8** %7 to i64* %9 = load i64, i64* %8, align 8, !tbaa !26 call void @llvm.dbg.value(metadata i8** %1, i64 0, metadata !16, metadata !25), !dbg !24 %10 = bitcast i8** %1 to double** %11 = load double*, double** %10, align 8, !tbaa !26 %12 = getelementptr i8*, i8** %1, i64 3 %13 = bitcast i8** %12 to i64* %14 = load i64, i64* %13, align 8, !tbaa !26 %15 = getelementptr i8*, i8** %1, i64 4 %16 = bitcast i8** %15 to i64* %17 = load i64, i64* %16, align 8, !tbaa !26 %18 = getelementptr i8*, i8** %0, i64 5, !dbg !29 %19 = bitcast i8** %18 to i64*, !dbg !29 %20 = load i64, i64* %19, align 8, !dbg !29, !tbaa !26 call void @llvm.dbg.value(metadata i64 %6, i64 0, metadata !21, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i64 %6, i64 0, metadata !21, metadata !25), !dbg !24 %21 = icmp sgt i64 %6, 0, !dbg !39 %22 = select i1 %21, i64 %6, i64 0, !dbg !39 %23 = icmp slt i64 %6, 1, !dbg !39 br i1 %23, label %L55, label %if.lr.ph, !dbg !39 if.lr.ph: ; preds = %top.split br label %polly.split_new_and_old, !dbg !39 L31.L22.loopexit_crit_edge: ; preds = %L31.loopexit br label %L22.loopexit, !dbg !40 L22.loopexit: ; preds = %L31.L22.loopexit_crit_edge, %if %24 = icmp eq i64 %"#temp#2.026", %22, !dbg !39 br i1 %24, label %L22.L55_crit_edge, label %if, !dbg !39 polly.split_new_and_old: ; preds = %if.lr.ph %25 = icmp sge i64 %17, %9 %26 = icmp sge i64 %14, %6 %27 = and i1 %25, %26 %28 = icmp sle i64 %6, 0 %29 = sext i1 %28 to i64 %30 = icmp eq i64 0, %29 %31 = and i1 %27, %30 %polly.access.cast. = bitcast double* %11 to i64* %32 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %20, i64 1) %.obit = extractvalue { i64, i1 } %32, 1 %polly.overflow.state = or i1 false, %.obit %.res = extractvalue { i64, i1 } %32, 0 %polly.access.mul. = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %.res, i64 %17) %polly.access.mul..obit = extractvalue { i64, i1 } %polly.access.mul., 1 %polly.overflow.state27 = or i1 %polly.overflow.state, %polly.access.mul..obit %polly.access.mul..res = extractvalue { i64, i1 } %polly.access.mul., 0 %33 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %9, i64 1) %.obit28 = extractvalue { i64, i1 } %33, 1 %polly.overflow.state29 = or i1 %polly.overflow.state27, %.obit28 %.res30 = extractvalue { i64, i1 } %33, 0 %polly.access.add. = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res, i64 %.res30) %polly.access.add..obit = extractvalue { i64, i1 } %polly.access.add., 1 %polly.overflow.state31 = or i1 %polly.overflow.state29, %polly.access.add..obit %polly.access.add..res = extractvalue { i64, i1 } %polly.access.add., 0 %polly.access.mul.32 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %polly.access.add..res, i64 %14) %polly.access.mul..obit33 = extractvalue { i64, i1 } %polly.access.mul.32, 1 %polly.overflow.state34 = or i1 %polly.overflow.state31, %polly.access.mul..obit33 %polly.access.mul..res35 = extractvalue { i64, i1 } %polly.access.mul.32, 0 %polly.access.add.36 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res35, i64 %6) %polly.access.add..obit37 = extractvalue { i64, i1 } %polly.access.add.36, 1 %polly.overflow.state38 = or i1 %polly.overflow.state34, %polly.access.add..obit37 %polly.access.add..res39 = extractvalue { i64, i1 } %polly.access.add.36, 0 %polly.access. = getelementptr i64, i64* %polly.access.cast., i64 %polly.access.add..res39 %polly.access.cast.40 = bitcast double* %3 to i64* %polly.access.mul.41 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 %9) %polly.access.mul..obit42 = extractvalue { i64, i1 } %polly.access.mul.41, 1 %polly.overflow.state43 = or i1 %polly.overflow.state38, %polly.access.mul..obit42 %polly.access.mul..res44 = extractvalue { i64, i1 } %polly.access.mul.41, 0 %polly.access.add.45 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res44, i64 0) %polly.access.add..obit46 = extractvalue { i64, i1 } %polly.access.add.45, 1 %polly.overflow.state47 = or i1 %polly.overflow.state43, %polly.access.add..obit46 %polly.access.add..res48 = extractvalue { i64, i1 } %polly.access.add.45, 0 %polly.access.mul.49 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %polly.access.add..res48, i64 %6) %polly.access.mul..obit50 = extractvalue { i64, i1 } %polly.access.mul.49, 1 %polly.overflow.state51 = or i1 %polly.overflow.state47, %polly.access.mul..obit50 %polly.access.mul..res52 = extractvalue { i64, i1 } %polly.access.mul.49, 0 %polly.access.add.53 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res52, i64 0) %polly.access.add..obit54 = extractvalue { i64, i1 } %polly.access.add.53, 1 %polly.overflow.state55 = or i1 %polly.overflow.state51, %polly.access.add..obit54 %polly.access.add..res56 = extractvalue { i64, i1 } %polly.access.add.53, 0 %polly.access.57 = getelementptr i64, i64* %polly.access.cast.40, i64 %polly.access.add..res56 %34 = ptrtoint i64* %polly.access. to i64 %35 = ptrtoint i64* %polly.access.57 to i64 %36 = icmp ule i64 %34, %35 %polly.access.cast.58 = bitcast double* %3 to i64* %37 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %20, i64 1) %.obit59 = extractvalue { i64, i1 } %37, 1 %polly.overflow.state60 = or i1 %polly.overflow.state55, %.obit59 %.res61 = extractvalue { i64, i1 } %37, 0 %polly.access.mul.62 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %.res61, i64 %9) %polly.access.mul..obit63 = extractvalue { i64, i1 } %polly.access.mul.62, 1 %polly.overflow.state64 = or i1 %polly.overflow.state60, %polly.access.mul..obit63 %polly.access.mul..res65 = extractvalue { i64, i1 } %polly.access.mul.62, 0 %38 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %9, i64 1) %.obit66 = extractvalue { i64, i1 } %38, 1 %polly.overflow.state67 = or i1 %polly.overflow.state64, %.obit66 %.res68 = extractvalue { i64, i1 } %38, 0 %polly.access.add.69 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res65, i64 %.res68) %polly.access.add..obit70 = extractvalue { i64, i1 } %polly.access.add.69, 1 %polly.overflow.state71 = or i1 %polly.overflow.state67, %polly.access.add..obit70 %polly.access.add..res72 = extractvalue { i64, i1 } %polly.access.add.69, 0 %polly.access.mul.73 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %polly.access.add..res72, i64 %6) %polly.access.mul..obit74 = extractvalue { i64, i1 } %polly.access.mul.73, 1 %polly.overflow.state75 = or i1 %polly.overflow.state71, %polly.access.mul..obit74 %polly.access.mul..res76 = extractvalue { i64, i1 } %polly.access.mul.73, 0 %polly.access.add.77 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res76, i64 %6) %polly.access.add..obit78 = extractvalue { i64, i1 } %polly.access.add.77, 1 %polly.overflow.state79 = or i1 %polly.overflow.state75, %polly.access.add..obit78 %polly.access.add..res80 = extractvalue { i64, i1 } %polly.access.add.77, 0 %polly.access.81 = getelementptr i64, i64* %polly.access.cast.58, i64 %polly.access.add..res80 %polly.access.cast.82 = bitcast double* %11 to i64* %polly.access.mul.83 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 %17) %polly.access.mul..obit84 = extractvalue { i64, i1 } %polly.access.mul.83, 1 %polly.overflow.state85 = or i1 %polly.overflow.state79, %polly.access.mul..obit84 %polly.access.mul..res86 = extractvalue { i64, i1 } %polly.access.mul.83, 0 %polly.access.add.87 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res86, i64 0) %polly.access.add..obit88 = extractvalue { i64, i1 } %polly.access.add.87, 1 %polly.overflow.state89 = or i1 %polly.overflow.state85, %polly.access.add..obit88 %polly.access.add..res90 = extractvalue { i64, i1 } %polly.access.add.87, 0 %polly.access.mul.91 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %polly.access.add..res90, i64 %14) %polly.access.mul..obit92 = extractvalue { i64, i1 } %polly.access.mul.91, 1 %polly.overflow.state93 = or i1 %polly.overflow.state89, %polly.access.mul..obit92 %polly.access.mul..res94 = extractvalue { i64, i1 } %polly.access.mul.91, 0 %polly.access.add.95 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res94, i64 0) %polly.access.add..obit96 = extractvalue { i64, i1 } %polly.access.add.95, 1 %polly.overflow.state97 = or i1 %polly.overflow.state93, %polly.access.add..obit96 %polly.access.add..res98 = extractvalue { i64, i1 } %polly.access.add.95, 0 %polly.access.99 = getelementptr i64, i64* %polly.access.cast.82, i64 %polly.access.add..res98 %39 = ptrtoint i64* %polly.access.81 to i64 %40 = ptrtoint i64* %polly.access.99 to i64 %41 = icmp ule i64 %39, %40 %42 = or i1 %36, %41 %43 = and i1 %31, %42 %44 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %6) %.obit100 = extractvalue { i64, i1 } %44, 1 %polly.overflow.state101 = or i1 %polly.overflow.state97, %.obit100 %.res102 = extractvalue { i64, i1 } %44, 0 %45 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %.res102, i64 %9) %.obit103 = extractvalue { i64, i1 } %45, 1 %polly.overflow.state104 = or i1 %polly.overflow.state101, %.obit103 %.res105 = extractvalue { i64, i1 } %45, 0 %46 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %.res105, i64 %20) %.obit106 = extractvalue { i64, i1 } %46, 1 %polly.overflow.state107 = or i1 %polly.overflow.state104, %.obit106 %.res108 = extractvalue { i64, i1 } %46, 0 %47 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 27, i64 %.res108) %.obit109 = extractvalue { i64, i1 } %47, 1 %polly.overflow.state110 = or i1 %polly.overflow.state107, %.obit109 %.res111 = extractvalue { i64, i1 } %47, 0 %48 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 0, i64 %.res111) %.obit112 = extractvalue { i64, i1 } %48, 1 %polly.overflow.state113 = or i1 %polly.overflow.state110, %.obit112 %.res114 = extractvalue { i64, i1 } %48, 0 %49 = icmp sge i64 %.res114, 2621440 %50 = and i1 %43, %49 %polly.rtc.overflown = xor i1 %polly.overflow.state113, true %polly.rtc.result = and i1 %50, %polly.rtc.overflown br i1 %polly.rtc.result, label %polly.start, label %if.pre_entry_bb if.pre_entry_bb: ; preds = %polly.split_new_and_old br label %if, !dbg !39 if: ; preds = %if.pre_entry_bb, %L22.loopexit %"#temp#2.026" = phi i64 [ %51, %L22.loopexit ], [ 1, %if.pre_entry_bb ] %51 = add i64 %"#temp#2.026", 1, !dbg !39 call void @llvm.dbg.value(metadata i64 %9, i64 0, metadata !22, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i64 %9, i64 0, metadata !22, metadata !25), !dbg !24 %52 = icmp sgt i64 %9, 0, !dbg !40 %53 = select i1 %52, i64 %9, i64 0, !dbg !40 %54 = icmp slt i64 %9, 1, !dbg !40 br i1 %54, label %L22.loopexit, label %if14.lr.ph, !dbg !40 if14.lr.ph: ; preds = %if br label %if14, !dbg !40 L22.L55_crit_edge: ; preds = %L22.loopexit br label %polly.merge_new_and_old, !dbg !39 polly.merge_new_and_old: ; preds = %polly.exiting, %L22.L55_crit_edge br label %L55, !dbg !41 L55: ; preds = %polly.merge_new_and_old, %top.split ret void, !dbg !41 L40.L31.loopexit_crit_edge: ; preds = %if15 br label %L31.loopexit, !dbg !42 L31.loopexit: ; preds = %L40.L31.loopexit_crit_edge, %if14 %55 = icmp eq i64 %"#temp#1.025", %53, !dbg !40 br i1 %55, label %L31.L22.loopexit_crit_edge, label %if14, !dbg !40 if14: ; preds = %if14.lr.ph, %L31.loopexit %"#temp#1.025" = phi i64 [ 1, %if14.lr.ph ], [ %56, %L31.loopexit ] %56 = add i64 %"#temp#1.025", 1, !dbg !40 call void @llvm.dbg.value(metadata i64 %20, i64 0, metadata !23, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i64 %20, i64 0, metadata !23, metadata !25), !dbg !24 %57 = icmp sgt i64 %20, 0, !dbg !42 %58 = select i1 %57, i64 %20, i64 0, !dbg !42 %59 = icmp slt i64 %20, 1, !dbg !42 br i1 %59, label %L31.loopexit, label %if15.lr.ph, !dbg !42 if15.lr.ph: ; preds = %if14 br label %if15, !dbg !42 if15: ; preds = %if15.lr.ph, %if15 %"#temp#.024" = phi i64 [ 1, %if15.lr.ph ], [ %60, %if15 ] %60 = add i64 %"#temp#.024", 1, !dbg !42 call void @llvm.dbg.value(metadata i64 %"#temp#2.026", i64 0, metadata !20, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i64 %"#temp#1.025", i64 0, metadata !19, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i64 %"#temp#.024", i64 0, metadata !17, metadata !25), !dbg !24 %61 = add i64 %"#temp#1.025", -1, !dbg !41 %62 = add i64 %"#temp#.024", -1, !dbg !41 %63 = mul i64 %62, %17, !dbg !41 %tmp = add i64 %61, %63 %tmp21 = mul i64 %tmp, %14 %64 = add i64 %"#temp#2.026", -1, !dbg !41 %65 = add i64 %64, %tmp21, !dbg !41 %66 = getelementptr double, double* %11, i64 %65, !dbg !41 %67 = bitcast double* %66 to i64*, !dbg !41 %68 = load i64, i64* %67, align 8, !dbg !41, !tbaa !43 call void @llvm.dbg.value(metadata i64 %"#temp#2.026", i64 0, metadata !20, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i64 %"#temp#1.025", i64 0, metadata !19, metadata !25), !dbg !24 call void @llvm.dbg.value(metadata i64 %"#temp#.024", i64 0, metadata !17, metadata !25), !dbg !24 %69 = mul i64 %62, %9, !dbg !41 %tmp22 = add i64 %61, %69 %tmp23 = mul i64 %tmp22, %6 %70 = add i64 %64, %tmp23, !dbg !41 %71 = getelementptr double, double* %3, i64 %70, !dbg !41 %72 = bitcast double* %71 to i64*, !dbg !41 store i64 %68, i64* %72, align 8, !dbg !41, !tbaa !43 %73 = icmp eq i64 %"#temp#.024", %58, !dbg !42 br i1 %73, label %L40.L31.loopexit_crit_edge, label %if15, !dbg !42 polly.start: ; preds = %polly.split_new_and_old br label %polly.acc.initialize polly.acc.initialize: ; preds = %polly.start %74 = call i8* @polly_initContext() %75 = mul nsw i64 %20, %17 %76 = mul nsw i64 %75, %14 %77 = mul i64 8, %76 %p_dev_array_MemRef0 = call i8* @polly_allocateMemoryForDevice(i64 %77) %78 = mul nsw i64 %20, %9 %79 = mul nsw i64 %78, %6 %80 = mul i64 8, %79 %p_dev_array_MemRef1 = call i8* @polly_allocateMemoryForDevice(i64 %80) br label %polly.cond polly.cond: ; preds = %polly.acc.initialize %81 = icmp sge i64 %9, 1 %82 = icmp sge i64 %20, 1 %83 = and i1 %81, %82 %84 = icmp sge i64 %6, 1 %85 = and i1 %83, %84 br i1 %85, label %polly.then, label %polly.else polly.merge: ; preds = %polly.else, %polly.merge116 call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef0) call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef1) call void @polly_freeContext(i8* %74) br label %polly.exiting polly.exiting: ; preds = %polly.merge br label %polly.merge_new_and_old polly.then: ; preds = %polly.cond br label %polly.cond115 polly.cond115: ; preds = %polly.then %86 = icmp sge i64 %17, 1 %87 = icmp sge i64 %14, 1 %88 = and i1 %86, %87 br i1 %88, label %polly.then117, label %polly.else118 polly.merge116: ; preds = %polly.else118, %polly.then117 %89 = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef0) %90 = getelementptr [8 x i8*], [8 x i8*]* %polly_launch_0_params, i64 0, i64 0 store i8* %89, i8** %polly_launch_0_param_0 %91 = bitcast i8** %polly_launch_0_param_0 to i8* store i8* %91, i8** %90 %92 = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef1) %93 = getelementptr [8 x i8*], [8 x i8*]* %polly_launch_0_params, i64 0, i64 1 store i8* %92, i8** %polly_launch_0_param_1 %94 = bitcast i8** %polly_launch_0_param_1 to i8* store i8* %94, i8** %93 store i64 %9, i64* %polly_launch_0_param_2 %95 = getelementptr [8 x i8*], [8 x i8*]* %polly_launch_0_params, i64 0, i64 2 %96 = bitcast i64* %polly_launch_0_param_2 to i8* store i8* %96, i8** %95 store i64 %20, i64* %polly_launch_0_param_3 %97 = getelementptr [8 x i8*], [8 x i8*]* %polly_launch_0_params, i64 0, i64 3 %98 = bitcast i64* %polly_launch_0_param_3 to i8* store i8* %98, i8** %97 store i64 %6, i64* %polly_launch_0_param_4 %99 = getelementptr [8 x i8*], [8 x i8*]* %polly_launch_0_params, i64 0, i64 4 %100 = bitcast i64* %polly_launch_0_param_4 to i8* store i8* %100, i8** %99 store i64 %17, i64* %polly_launch_0_param_5 %101 = getelementptr [8 x i8*], [8 x i8*]* %polly_launch_0_params, i64 0, i64 5 %102 = bitcast i64* %polly_launch_0_param_5 to i8* store i8* %102, i8** %101 store i64 %14, i64* %polly_launch_0_param_6 %103 = getelementptr [8 x i8*], [8 x i8*]* %polly_launch_0_params, i64 0, i64 6 %104 = bitcast i64* %polly_launch_0_param_6 to i8* store i8* %104, i8** %103 store void (metadata, i64, metadata, metadata)* @llvm.dbg.value, void (metadata, i64, metadata, metadata)** %polly_launch_0_param_7 %105 = getelementptr [8 x i8*], [8 x i8*]* %polly_launch_0_params, i64 0, i64 7 %106 = bitcast void (metadata, i64, metadata, metadata)** %polly_launch_0_param_7 to i8* store i8* %106, i8** %105 %107 = call i8* @polly_getKernel(i8* getelementptr inbounds ([4098 x i8], [4098 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0)) %108 = icmp sge i64 %6, 8161 %109 = add nsw i64 %6, 31 %polly.fdiv_q.shr = ashr i64 %109, 5 %110 = select i1 %108, i64 256, i64 %polly.fdiv_q.shr %111 = trunc i64 %110 to i32 %112 = icmp sge i64 %9, 8162 %113 = add nsw i64 %9, 31 %polly.fdiv_q.shr119 = ashr i64 %113, 5 %114 = select i1 %112, i64 256, i64 %polly.fdiv_q.shr119 %115 = trunc i64 %114 to i32 call void @polly_launchKernel(i8* %107, i32 %111, i32 %115, i32 32, i32 4, i32 4, i8* %polly_launch_0_params_i8ptr) call void @polly_freeKernel(i8* %107) %116 = mul nsw i64 %20, %9 %117 = mul nsw i64 %116, %6 %118 = mul i64 8, %117 %119 = bitcast double* %3 to i8* call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef1, i8* %119, i64 %118) br label %polly.merge polly.else: ; preds = %polly.cond br label %polly.merge polly.then117: ; preds = %polly.cond115 %120 = mul nsw i64 %20, %17 %121 = mul nsw i64 %120, %14 %122 = mul i64 8, %121 %123 = bitcast double* %11 to i8* call void @polly_copyFromHostToDevice(i8* %123, i8* %p_dev_array_MemRef0, i64 %122) br label %polly.merge116 polly.else118: ; preds = %polly.cond115 br label %polly.merge116 } define i8** @jlcall_copy_3D_mat_64494(i8**, i8***, i32) #1 { top: br label %top.split top.split: ; preds = %top %3 = load i8**, i8*** %1, align 8 %4 = getelementptr i8**, i8*** %1, i64 1 %5 = load i8**, i8*** %4, align 8 call void @julia_copy_3D_mat_64495(i8** %3, i8** %5) ret i8** inttoptr (i64 140448624738320 to i8**) } declare i8**** @jl_get_ptls_states() ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 ; Function Attrs: noreturn declare void @jl_error(i8*) #3 ; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #4 ; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #4 declare void @jl_enter_handler(i8*) ; Function Attrs: returns_twice declare i32 @__sigsetjmp(i8*, i32) #5 ; Function Attrs: nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2 ; Function Attrs: nounwind readnone declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #2 ; Function Attrs: nounwind readnone declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) #2 ; Function Attrs: nounwind readnone declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) #2 declare i8* @polly_initContext() declare i8* @polly_allocateMemoryForDevice(i64) declare void @polly_copyFromHostToDevice(i8*, i8*, i64) declare i8* @polly_getDevicePtr(i8*) declare i8* @polly_getKernel(i8*, i8*) declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*) declare void @polly_freeKernel(i8*) declare void @polly_copyFromDeviceToHost(i8*, i8*, i64) declare void @polly_freeDeviceMemory(i8*) declare void @polly_freeContext(i8*) attributes #0 = { sspstrong "no-frame-pointer-elim"="true" } attributes #1 = { "no-frame-pointer-elim"="true" } attributes #2 = { nounwind readnone } attributes #3 = { noreturn } attributes #4 = { argmemonly nounwind } attributes #5 = { returns_twice } !llvm.module.flags = !{!0, !1} !llvm.dbg.cu = !{!2} !0 = !{i32 2, !"Dwarf Version", i32 4} !1 = !{i32 1, !"Debug Info Version", i32 3} !2 = distinct !DICompileUnit(language: DW_LANG_C89, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4) !3 = !DIFile(filename: "REPL[2]", directory: ".") !4 = !{} !5 = distinct !DISubprogram(name: "copy_3D_mat", linkageName: "julia_copy_3D_mat_64495", scope: null, file: !3, type: !6, isLocal: false, isDefinition: true, isOptimized: true, unit: !2, variables: !12) !6 = !DISubroutineType(types: !7) !7 = !{!8, !8} !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64) !9 = !DICompositeType(tag: DW_TAG_structure_type, name: "jl_value_t", file: !10, line: 71, align: 64, elements: !11) !10 = !DIFile(filename: "julia.h", directory: "") !11 = !{!8} !12 = !{!13, !15, !16, !17, !19, !20, !21, !22, !23} !13 = !DILocalVariable(name: "#self#", arg: 1, scope: !5, file: !3, line: 2, type: !14) !14 = !DICompositeType(tag: DW_TAG_structure_type, name: "#copy_3D_mat", align: 8, elements: !4, runtimeLang: DW_LANG_Julia, identifier: "#copy_3D_mat_64447") !15 = !DILocalVariable(name: "a", arg: 2, scope: !5, file: !3, line: 2, type: !8) !16 = !DILocalVariable(name: "b", arg: 3, scope: !5, file: !3, line: 2, type: !8) !17 = !DILocalVariable(name: "k", scope: !5, file: !3, line: 2, type: !18) !18 = !DIBasicType(name: "Int64", size: 64, encoding: DW_ATE_unsigned) !19 = !DILocalVariable(name: "j", scope: !5, file: !3, line: 2, type: !18) !20 = !DILocalVariable(name: "i", scope: !5, file: !3, line: 2, type: !18) !21 = !DILocalVariable(name: "ni", scope: !5, file: !3, line: 2, type: !18) !22 = !DILocalVariable(name: "nj", scope: !5, file: !3, line: 2, type: !18) !23 = !DILocalVariable(name: "nk", scope: !5, file: !3, line: 2, type: !18) !24 = !DILocation(line: 2, scope: !5) !25 = !DIExpression() !26 = !{!27, !27, i64 0, i64 1} !27 = !{!"jtbaa_const", !28, i64 0} !28 = !{!"jtbaa"} !29 = !DILocation(line: 80, scope: !30, inlinedAt: !33) !30 = distinct !DISubprogram(name: "_size;", linkageName: "_size", scope: !31, file: !31, type: !32, isLocal: false, isDefinition: true, isOptimized: true, unit: !2, variables: !4) !31 = !DIFile(filename: "array.jl", directory: ".") !32 = !DISubroutineType(types: !4) !33 = !DILocation(line: 80, scope: !34, inlinedAt: !35) !34 = distinct !DISubprogram(name: "_size;", linkageName: "_size", scope: !31, file: !31, type: !32, isLocal: false, isDefinition: true, isOptimized: true, unit: !2, variables: !4) !35 = !DILocation(line: 80, scope: !36, inlinedAt: !37) !36 = distinct !DISubprogram(name: "_size;", linkageName: "_size", scope: !31, file: !31, type: !32, isLocal: false, isDefinition: true, isOptimized: true, unit: !2, variables: !4) !37 = !DILocation(line: 76, scope: !38, inlinedAt: !24) !38 = distinct !DISubprogram(name: "size;", linkageName: "size", scope: !31, file: !31, type: !32, isLocal: false, isDefinition: true, isOptimized: true, unit: !2, variables: !4) !39 = !DILocation(line: 3, scope: !5) !40 = !DILocation(line: 4, scope: !5) !41 = !DILocation(line: 6, scope: !5) !42 = !DILocation(line: 5, scope: !5) !43 = !{!44, !44, i64 0} !44 = !{!"jtbaa_arraybuf", !45, i64 0} !45 = !{!"jtbaa_data", !28, i64 0}