; ModuleID = 'square_mat' source_filename = "square_mat" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @kernel_0 = private unnamed_addr constant [1 x i8] zeroinitializer @kernel_0_name = private unnamed_addr constant [9 x i8] c"kernel_0\00" ; Function Attrs: sspstrong define void @julia_square_mat_64434(i8** dereferenceable(40), i8** dereferenceable(40)) #0 !dbg !5 { top: %polly_launch_0_params = alloca [6 x i8*] %polly_launch_0_param_0 = alloca i8* %polly_launch_0_param_1 = alloca i8* %polly_launch_0_param_2 = alloca i64 %polly_launch_0_param_3 = alloca i64 %polly_launch_0_param_4 = alloca i64 %polly_launch_0_param_5 = alloca void (metadata, i64, metadata, metadata)* %polly_launch_0_params_i8ptr = bitcast [6 x i8*]* %polly_launch_0_params to i8* br label %top.split, !dbg !22 top.split: ; preds = %top call void @llvm.dbg.value(metadata i8** null, i64 0, metadata !15, metadata !23), !dbg !22 call void @llvm.dbg.value(metadata i8** null, i64 0, metadata !16, metadata !23), !dbg !22 call void @llvm.dbg.value(metadata i8** %0, i64 0, metadata !15, metadata !23), !dbg !22 %2 = bitcast i8** %0 to double** %3 = load double*, double** %2, align 8, !tbaa !24 %4 = getelementptr i8*, i8** %0, i64 3 %5 = bitcast i8** %4 to i64* %6 = load i64, i64* %5, align 8, !tbaa !24 call void @llvm.dbg.value(metadata i8** %1, i64 0, metadata !16, metadata !23), !dbg !22 %7 = bitcast i8** %1 to double** %8 = load double*, double** %7, align 8, !tbaa !24 %9 = getelementptr i8*, i8** %1, i64 3 %10 = bitcast i8** %9 to i64* %11 = load i64, i64* %10, align 8, !tbaa !24 %12 = getelementptr i8*, i8** %0, i64 4, !dbg !22 %13 = bitcast i8** %12 to i64*, !dbg !22 %14 = load i64, i64* %13, align 8, !dbg !22, !tbaa !24 call void @llvm.dbg.value(metadata i64 %6, i64 0, metadata !20, metadata !23), !dbg !22 call void @llvm.dbg.value(metadata i64 %6, i64 0, metadata !20, metadata !23), !dbg !22 %15 = icmp sgt i64 %6, 0, !dbg !27 %16 = select i1 %15, i64 %6, i64 0, !dbg !27 %17 = icmp slt i64 %6, 1, !dbg !27 br i1 %17, label %L33, label %if.lr.ph, !dbg !27 if.lr.ph: ; preds = %top.split br label %polly.split_new_and_old, !dbg !27 L20.L11.loopexit_crit_edge: ; preds = %if3 br label %L11.loopexit, !dbg !28 L11.loopexit: ; preds = %L20.L11.loopexit_crit_edge, %if %18 = icmp eq i64 %"#temp#1.010", %16, !dbg !27 br i1 %18, label %L11.L33_crit_edge, label %if, !dbg !27 polly.split_new_and_old: ; preds = %if.lr.ph %19 = icmp sge i64 %11, %6 %20 = icmp sle i64 %6, 0 %21 = sext i1 %20 to i64 %22 = icmp eq i64 0, %21 %23 = and i1 %19, %22 %24 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %14, i64 1) %.obit = extractvalue { i64, i1 } %24, 1 %polly.overflow.state = or i1 false, %.obit %.res = extractvalue { i64, i1 } %24, 0 %polly.access.mul. = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %.res, i64 %11) %polly.access.mul..obit = extractvalue { i64, i1 } %polly.access.mul., 1 %polly.overflow.state11 = or i1 %polly.overflow.state, %polly.access.mul..obit %polly.access.mul..res = extractvalue { i64, i1 } %polly.access.mul., 0 %polly.access.add. = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res, i64 %6) %polly.access.add..obit = extractvalue { i64, i1 } %polly.access.add., 1 %polly.overflow.state12 = or i1 %polly.overflow.state11, %polly.access.add..obit %polly.access.add..res = extractvalue { i64, i1 } %polly.access.add., 0 %polly.access. = getelementptr double, double* %8, i64 %polly.access.add..res %polly.access.mul.13 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 %6) %polly.access.mul..obit14 = extractvalue { i64, i1 } %polly.access.mul.13, 1 %polly.overflow.state15 = or i1 %polly.overflow.state12, %polly.access.mul..obit14 %polly.access.mul..res16 = extractvalue { i64, i1 } %polly.access.mul.13, 0 %polly.access.add.17 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res16, i64 0) %polly.access.add..obit18 = extractvalue { i64, i1 } %polly.access.add.17, 1 %polly.overflow.state19 = or i1 %polly.overflow.state15, %polly.access.add..obit18 %polly.access.add..res20 = extractvalue { i64, i1 } %polly.access.add.17, 0 %polly.access.21 = getelementptr double, double* %3, i64 %polly.access.add..res20 %25 = ptrtoint double* %polly.access. to i64 %26 = ptrtoint double* %polly.access.21 to i64 %27 = icmp ule i64 %25, %26 %28 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %14, i64 1) %.obit22 = extractvalue { i64, i1 } %28, 1 %polly.overflow.state23 = or i1 %polly.overflow.state19, %.obit22 %.res24 = extractvalue { i64, i1 } %28, 0 %polly.access.mul.25 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %.res24, i64 %6) %polly.access.mul..obit26 = extractvalue { i64, i1 } %polly.access.mul.25, 1 %polly.overflow.state27 = or i1 %polly.overflow.state23, %polly.access.mul..obit26 %polly.access.mul..res28 = extractvalue { i64, i1 } %polly.access.mul.25, 0 %polly.access.add.29 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res28, i64 %6) %polly.access.add..obit30 = extractvalue { i64, i1 } %polly.access.add.29, 1 %polly.overflow.state31 = or i1 %polly.overflow.state27, %polly.access.add..obit30 %polly.access.add..res32 = extractvalue { i64, i1 } %polly.access.add.29, 0 %polly.access.33 = getelementptr double, double* %3, i64 %polly.access.add..res32 %polly.access.mul.34 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 0, i64 %11) %polly.access.mul..obit35 = extractvalue { i64, i1 } %polly.access.mul.34, 1 %polly.overflow.state36 = or i1 %polly.overflow.state31, %polly.access.mul..obit35 %polly.access.mul..res37 = extractvalue { i64, i1 } %polly.access.mul.34, 0 %polly.access.add.38 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %polly.access.mul..res37, i64 0) %polly.access.add..obit39 = extractvalue { i64, i1 } %polly.access.add.38, 1 %polly.overflow.state40 = or i1 %polly.overflow.state36, %polly.access.add..obit39 %polly.access.add..res41 = extractvalue { i64, i1 } %polly.access.add.38, 0 %polly.access.42 = getelementptr double, double* %8, i64 %polly.access.add..res41 %29 = ptrtoint double* %polly.access.33 to i64 %30 = ptrtoint double* %polly.access.42 to i64 %31 = icmp ule i64 %29, %30 %32 = or i1 %27, %31 %33 = and i1 %23, %32 %34 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %6) %.obit43 = extractvalue { i64, i1 } %34, 1 %polly.overflow.state44 = or i1 %polly.overflow.state40, %.obit43 %.res45 = extractvalue { i64, i1 } %34, 0 %35 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %.res45, i64 %14) %.obit46 = extractvalue { i64, i1 } %35, 1 %polly.overflow.state47 = or i1 %polly.overflow.state44, %.obit46 %.res48 = extractvalue { i64, i1 } %35, 0 %36 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 21, i64 %.res48) %.obit49 = extractvalue { i64, i1 } %36, 1 %polly.overflow.state50 = or i1 %polly.overflow.state47, %.obit49 %.res51 = extractvalue { i64, i1 } %36, 0 %37 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 0, i64 %.res51) %.obit52 = extractvalue { i64, i1 } %37, 1 %polly.overflow.state53 = or i1 %polly.overflow.state50, %.obit52 %.res54 = extractvalue { i64, i1 } %37, 0 %38 = icmp sge i64 %.res54, 2621440 %39 = and i1 %33, %38 %polly.rtc.overflown = xor i1 %polly.overflow.state53, true %polly.rtc.result = and i1 %39, %polly.rtc.overflown br i1 false, label %polly.start, label %if.pre_entry_bb if.pre_entry_bb: ; preds = %polly.split_new_and_old br label %if, !dbg !27 if: ; preds = %if.pre_entry_bb, %L11.loopexit %"#temp#1.010" = phi i64 [ %40, %L11.loopexit ], [ 1, %if.pre_entry_bb ] %40 = add i64 %"#temp#1.010", 1, !dbg !27 call void @llvm.dbg.value(metadata i64 %14, i64 0, metadata !21, metadata !23), !dbg !22 call void @llvm.dbg.value(metadata i64 %14, i64 0, metadata !21, metadata !23), !dbg !22 %41 = icmp sgt i64 %14, 0, !dbg !28 %42 = select i1 %41, i64 %14, i64 0, !dbg !28 %43 = icmp slt i64 %14, 1, !dbg !28 br i1 %43, label %L11.loopexit, label %if3.lr.ph, !dbg !28 if3.lr.ph: ; preds = %if br label %if3, !dbg !28 L11.L33_crit_edge: ; preds = %L11.loopexit br label %polly.merge_new_and_old, !dbg !27 polly.merge_new_and_old: ; preds = %polly.exiting, %L11.L33_crit_edge br label %L33, !dbg !29 L33: ; preds = %polly.merge_new_and_old, %top.split ret void, !dbg !29 if3: ; preds = %if3.lr.ph, %if3 %"#temp#.09" = phi i64 [ 1, %if3.lr.ph ], [ %44, %if3 ] %44 = add i64 %"#temp#.09", 1, !dbg !28 call void @llvm.dbg.value(metadata i64 %"#temp#1.010", i64 0, metadata !19, metadata !23), !dbg !22 call void @llvm.dbg.value(metadata i64 %"#temp#.09", i64 0, metadata !17, metadata !23), !dbg !22 %45 = add i64 %"#temp#1.010", -1, !dbg !29 %46 = add i64 %"#temp#.09", -1, !dbg !29 %47 = mul i64 %46, %11, !dbg !29 %48 = add i64 %45, %47, !dbg !29 %49 = getelementptr double, double* %8, i64 %48, !dbg !29 %50 = load double, double* %49, align 8, !dbg !29, !tbaa !30 call void @llvm.dbg.value(metadata i64 %"#temp#1.010", i64 0, metadata !19, metadata !23), !dbg !22 call void @llvm.dbg.value(metadata i64 %"#temp#.09", i64 0, metadata !17, metadata !23), !dbg !22 %51 = fmul double %50, %50, !dbg !29 call void @llvm.dbg.value(metadata i64 %"#temp#1.010", i64 0, metadata !19, metadata !23), !dbg !22 call void @llvm.dbg.value(metadata i64 %"#temp#.09", i64 0, metadata !17, metadata !23), !dbg !22 %52 = mul i64 %46, %6, !dbg !29 %53 = add i64 %45, %52, !dbg !29 %54 = getelementptr double, double* %3, i64 %53, !dbg !29 store double %51, double* %54, align 8, !dbg !29, !tbaa !30 %55 = icmp eq i64 %"#temp#.09", %42, !dbg !28 br i1 %55, label %L20.L11.loopexit_crit_edge, label %if3, !dbg !28 polly.start: ; preds = %polly.split_new_and_old br label %polly.acc.initialize polly.acc.initialize: ; preds = %polly.start %56 = call i8* @polly_initContext() %57 = mul nsw i64 %14, %11 %58 = mul i64 8, %57 %p_dev_array_MemRef0 = call i8* @polly_allocateMemoryForDevice(i64 %58) %59 = mul nsw i64 %14, %6 %60 = mul i64 8, %59 %p_dev_array_MemRef1 = call i8* @polly_allocateMemoryForDevice(i64 %60) br label %polly.cond polly.cond: ; preds = %polly.acc.initialize %61 = icmp sge i64 %14, 1 %62 = icmp sge i64 %6, 1 %63 = and i1 %61, %62 br i1 %63, label %polly.then, label %polly.else polly.merge: ; preds = %polly.else, %polly.merge56 call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef1) call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef0) call void @polly_freeContext(i8* %56) br label %polly.exiting polly.exiting: ; preds = %polly.merge br label %polly.merge_new_and_old polly.then: ; preds = %polly.cond br label %polly.cond55 polly.cond55: ; preds = %polly.then %64 = icmp sge i64 %11, 1 br i1 %64, label %polly.then57, label %polly.else58 polly.merge56: ; preds = %polly.else58, %polly.then57 %65 = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef0) %66 = getelementptr [6 x i8*], [6 x i8*]* %polly_launch_0_params, i64 0, i64 0 store i8* %65, i8** %polly_launch_0_param_0 %67 = bitcast i8** %polly_launch_0_param_0 to i8* store i8* %67, i8** %66 %68 = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef1) %69 = getelementptr [6 x i8*], [6 x i8*]* %polly_launch_0_params, i64 0, i64 1 store i8* %68, i8** %polly_launch_0_param_1 %70 = bitcast i8** %polly_launch_0_param_1 to i8* store i8* %70, i8** %69 store i64 %14, i64* %polly_launch_0_param_2 %71 = getelementptr [6 x i8*], [6 x i8*]* %polly_launch_0_params, i64 0, i64 2 %72 = bitcast i64* %polly_launch_0_param_2 to i8* store i8* %72, i8** %71 store i64 %6, i64* %polly_launch_0_param_3 %73 = getelementptr [6 x i8*], [6 x i8*]* %polly_launch_0_params, i64 0, i64 3 %74 = bitcast i64* %polly_launch_0_param_3 to i8* store i8* %74, i8** %73 store i64 %11, i64* %polly_launch_0_param_4 %75 = getelementptr [6 x i8*], [6 x i8*]* %polly_launch_0_params, i64 0, i64 4 %76 = bitcast i64* %polly_launch_0_param_4 to i8* store i8* %76, i8** %75 store void (metadata, i64, metadata, metadata)* @llvm.dbg.value, void (metadata, i64, metadata, metadata)** %polly_launch_0_param_5 %77 = getelementptr [6 x i8*], [6 x i8*]* %polly_launch_0_params, i64 0, i64 5 %78 = bitcast void (metadata, i64, metadata, metadata)** %polly_launch_0_param_5 to i8* store i8* %78, i8** %77 %79 = call i8* @polly_getKernel(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0)) %80 = icmp sge i64 %6, 8161 %81 = add nsw i64 %6, 31 %polly.fdiv_q.shr = ashr i64 %81, 5 %82 = select i1 %80, i64 256, i64 %polly.fdiv_q.shr %83 = trunc i64 %82 to i32 %84 = icmp sge i64 %14, 8162 %85 = add nsw i64 %14, 31 %polly.fdiv_q.shr59 = ashr i64 %85, 5 %86 = select i1 %84, i64 256, i64 %polly.fdiv_q.shr59 %87 = trunc i64 %86 to i32 call void @polly_launchKernel(i8* %79, i32 %83, i32 %87, i32 32, i32 16, i32 1, i8* %polly_launch_0_params_i8ptr) call void @polly_freeKernel(i8* %79) %88 = mul nsw i64 %14, %6 %89 = mul i64 8, %88 %90 = bitcast double* %3 to i8* call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef1, i8* %90, i64 %89) br label %polly.merge polly.else: ; preds = %polly.cond br label %polly.merge polly.then57: ; preds = %polly.cond55 %91 = mul nsw i64 %14, %11 %92 = mul i64 8, %91 %93 = bitcast double* %8 to i8* call void @polly_copyFromHostToDevice(i8* %93, i8* %p_dev_array_MemRef0, i64 %92) br label %polly.merge56 polly.else58: ; preds = %polly.cond55 br label %polly.merge56 } define i8** @jlcall_square_mat_64433(i8**, i8***, i32) #1 { top: br label %top.split top.split: ; preds = %top %3 = load i8**, i8*** %1, align 8 %4 = getelementptr i8**, i8*** %1, i64 1 %5 = load i8**, i8*** %4, align 8 call void @julia_square_mat_64434(i8** %3, i8** %5) ret i8** inttoptr (i64 140429972488208 to i8**) } declare i8**** @jl_get_ptls_states() ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 ; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #3 ; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #3 declare void @jl_enter_handler(i8*) ; Function Attrs: returns_twice declare i32 @__sigsetjmp(i8*, i32) #4 ; Function Attrs: nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2 ; Function Attrs: nounwind readnone declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #2 ; Function Attrs: nounwind readnone declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) #2 ; Function Attrs: nounwind readnone declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) #2 declare i8* @polly_initContext() declare i8* @polly_allocateMemoryForDevice(i64) declare void @polly_copyFromHostToDevice(i8*, i8*, i64) declare i8* @polly_getDevicePtr(i8*) declare i8* @polly_getKernel(i8*, i8*) declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*) declare void @polly_freeKernel(i8*) declare void @polly_copyFromDeviceToHost(i8*, i8*, i64) declare void @polly_freeDeviceMemory(i8*) declare void @polly_freeContext(i8*) attributes #0 = { sspstrong "no-frame-pointer-elim"="true" } attributes #1 = { "no-frame-pointer-elim"="true" } attributes #2 = { nounwind readnone } attributes #3 = { argmemonly nounwind } attributes #4 = { returns_twice } !llvm.module.flags = !{!0, !1} !llvm.dbg.cu = !{!2} !0 = !{i32 2, !"Dwarf Version", i32 4} !1 = !{i32 1, !"Debug Info Version", i32 3} !2 = distinct !DICompileUnit(language: DW_LANG_C89, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4) !3 = !DIFile(filename: "REPL[1]", directory: ".") !4 = !{} !5 = distinct !DISubprogram(name: "square_mat", linkageName: "julia_square_mat_64434", scope: null, file: !3, type: !6, isLocal: false, isDefinition: true, isOptimized: true, unit: !2, variables: !12) !6 = !DISubroutineType(types: !7) !7 = !{!8, !8} !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64) !9 = !DICompositeType(tag: DW_TAG_structure_type, name: "jl_value_t", file: !10, line: 71, align: 64, elements: !11) !10 = !DIFile(filename: "julia.h", directory: "") !11 = !{!8} !12 = !{!13, !15, !16, !17, !19, !20, !21} !13 = !DILocalVariable(name: "#self#", arg: 1, scope: !5, file: !3, line: 2, type: !14) !14 = !DICompositeType(tag: DW_TAG_structure_type, name: "#square_mat", align: 8, elements: !4, runtimeLang: DW_LANG_Julia, identifier: "#square_mat_64353") !15 = !DILocalVariable(name: "a", arg: 2, scope: !5, file: !3, line: 2, type: !8) !16 = !DILocalVariable(name: "b", arg: 3, scope: !5, file: !3, line: 2, type: !8) !17 = !DILocalVariable(name: "j", scope: !5, file: !3, line: 2, type: !18) !18 = !DIBasicType(name: "Int64", size: 64, encoding: DW_ATE_unsigned) !19 = !DILocalVariable(name: "i", scope: !5, file: !3, line: 2, type: !18) !20 = !DILocalVariable(name: "ni", scope: !5, file: !3, line: 2, type: !18) !21 = !DILocalVariable(name: "nj", scope: !5, file: !3, line: 2, type: !18) !22 = !DILocation(line: 2, scope: !5) !23 = !DIExpression() !24 = !{!25, !25, i64 0, i64 1} !25 = !{!"jtbaa_const", !26, i64 0} !26 = !{!"jtbaa"} !27 = !DILocation(line: 3, scope: !5) !28 = !DILocation(line: 4, scope: !5) !29 = !DILocation(line: 5, scope: !5) !30 = !{!31, !31, i64 0} !31 = !{!"jtbaa_arraybuf", !32, i64 0} !32 = !{!"jtbaa_data", !26, i64 0}