Index: lib/Analysis/DependenceInfo.cpp =================================================================== --- lib/Analysis/DependenceInfo.cpp +++ lib/Analysis/DependenceInfo.cpp @@ -774,6 +774,94 @@ return IsValid; } +// Find the maximum number of input or output dimensions among all isl_maps +// in a isl_union_map. Used as a callback for isl_union_map_foreach_map. +// @see unifyDepsMap +static isl_stat findMaxDimsCount(__isl_take isl_map *map, void *user) { + assert(user && "Expected valid pointer to store max dimension count."); + unsigned *Maxdims = (unsigned *)user; + + unsigned NInDims = isl_map_dim(map, isl_dim_in); + unsigned NOutDims = isl_map_dim(map, isl_dim_out); + isl_map_free(map); + + *Maxdims = std::max(*Maxdims, NInDims); + *Maxdims = std::max(*Maxdims, NOutDims); + + return isl_stat_ok; +} + +// Helper struct used to store information used by +// appendInputDimsAndUnion. +// @see appendInputDimsAndUnion +struct AlignInputsMapInfo { + // Number of input & dimensions that is expected for an isl_map; + unsigned NExpectedDims; + + // Final isl_union_map that is a concatenation of all the isl_maps + // appendInputDimsAndUnion is run on. + isl_union_map *out; + + AlignInputsMapInfo() : NExpectedDims(0), out(nullptr){}; +}; + +// Pad each isl_map with dimensions such that they all have the +// same number of input & output dimensions. Used as callback for +// isl_union_map_foreach_map. +// @see unifyDepsMap +static isl_stat appendInputDimsAndUnion(__isl_take isl_map *map, void *user) { + AlignInputsMapInfo *info = (AlignInputsMapInfo *)(user); + assert(info && "expected valid user pointer"); + + unsigned InN = isl_map_dim(map, isl_dim_in); + unsigned OutN = isl_map_dim(map, isl_dim_out); + + assert( + InN <= info->NExpectedDims && + "Found map with more than maximum number of expected input dimensions."); + + assert( + OutN <= info->NExpectedDims && + "Found map with more than maximum number of expected output dimensions."); + + // TODO: I don't fully understand the implications of doing this, + // since isl_map_add_dims flattens the map. However, I believe that it + // is okay to flatten dependences over a schedule. + map = isl_map_add_dims(map, isl_dim_in, info->NExpectedDims - InN); + map = isl_map_add_dims(map, isl_dim_out, info->NExpectedDims - OutN); + + if (!info->out) + info->out = isl_union_map_from_map(map); + else + info->out = isl_union_map_union(info->out, isl_union_map_from_map(map)); + + return isl_stat_ok; +} + +// The isl_maps that comprise of the Deps may have different dimensions +// per isl_map. We need to force them to have the same number of dimensions +// since we wish to unify this into an isl_map. So, we figure out what +// the maximum number of dimensions are, and we pad each isl_map's input +// and output dimensions to be the same number throughout the isl_union_map. +// +// We expect that the number of input and output dimensions for each map in Deps +// to be equal, since they represent dependences. +// +// Postcondition: +// forall m, n \in out, +// dim_in(n) = dim_in(m) = dim_out(n) = dim_out(m) +// @see Dependences::isParallel +static isl_map *unifyDepsMap(isl_union_map *Deps) { + AlignInputsMapInfo info; + isl_union_map_foreach_map(Deps, &findMaxDimsCount, &info.NExpectedDims); + + isl_union_map_foreach_map(Deps, &appendInputDimsAndUnion, &info); + isl_union_map_free(Deps); + Deps = info.out; + + return isl_map_from_union_map(Deps); +} + // Check if the current scheduling dimension is parallel. // // We check for parallelism by verifying that the loop does not carry any @@ -802,7 +890,7 @@ return true; } - ScheduleDeps = isl_map_from_union_map(Deps); + ScheduleDeps = unifyDepsMap(Deps); Dimension = isl_map_dim(ScheduleDeps, isl_dim_out) - 1; for (unsigned i = 0; i < Dimension; i++) Index: test/DependenceInfo/is_parallel_irregular_deps.ll =================================================================== --- /dev/null +++ test/DependenceInfo/is_parallel_irregular_deps.ll @@ -0,0 +1,474 @@ +; Test file taken from Polybench's LU implementation: +; polybench-c-4.2.1-beta/linear-algebra/solvers/lu/lu.c + +; This contains dependences which have unequal number of input and output dimensions. +; We check that these are cleaned up within Dependences::isParallel(). + +; RUN: opt %loadPolly -polyhedral-info -polly-check-parallel -analyze %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } +%struct.__sFILEX = type opaque +%struct.__sbuf = type { i8*, i32 } + +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 +@__stderrp = external global %struct.__sFILE*, align 8 +@.str.1 = private unnamed_addr constant [23 x i8] c"==BEGIN DUMP_ARRAYS==\0A\00", align 1 +@.str.2 = private unnamed_addr constant [15 x i8] c"begin dump: %s\00", align 1 +@.str.3 = private unnamed_addr constant [2 x i8] c"A\00", align 1 +@.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 +@.str.5 = private unnamed_addr constant [8 x i8] c"%0.2lf \00", align 1 +@.str.6 = private unnamed_addr constant [17 x i8] c"\0Aend dump: %s\0A\00", align 1 +@.str.7 = private unnamed_addr constant [23 x i8] c"==END DUMP_ARRAYS==\0A\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main(i32 %argc, i8** %argv) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %call = tail call i8* @polybench_alloc_data(i64 4000000, i32 8) #2 + %arraydecay = bitcast i8* %call to [2000 x double]* + tail call void @init_array(i32 2000, [2000 x double]* %arraydecay) + %arraydecay1 = bitcast i8* %call to [2000 x double]* + tail call void @kernel_lu(i32 2000, [2000 x double]* %arraydecay1) + %cmp = icmp sgt i32 %argc, 42 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %entry.split + %0 = load i8*, i8** %argv, align 8 + %strcmpload = load i8, i8* %0, align 1 + %tobool = icmp eq i8 %strcmpload, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true + %arraydecay3 = bitcast i8* %call to [2000 x double]* + tail call void @print_array(i32 2000, [2000 x double]* %arraydecay3) + br label %if.end + +if.end: ; preds = %land.lhs.true, %if.then, %entry.split + tail call void @free(i8* %call) + ret i32 0 +} + +declare i8* @polybench_alloc_data(i64, i32) #1 + +; Function Attrs: noinline nounwind ssp uwtable +define internal void @init_array(i32 %n, [2000 x double]* %A) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %cmp20 = icmp sgt i32 %n, 0 + br i1 %cmp20, label %for.cond1.preheader.lr.ph, label %for.end25 + +for.cond1.preheader.lr.ph: ; preds = %entry.split + %0 = sext i32 %n to i64 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.end18 + %indvars.iv61 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next62, %for.end18 ] + %indvars.iv59 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next60, %for.end18 ] + br i1 false, label %for.cond8.preheader, label %for.body3.lr.ph + +for.body3.lr.ph: ; preds = %for.cond1.preheader + br label %for.body3 + +for.cond1.for.cond8.preheader_crit_edge: ; preds = %for.body3 + br label %for.cond8.preheader + +for.cond8.preheader: ; preds = %for.cond1.for.cond8.preheader_crit_edge, %for.cond1.preheader + %indvars.iv.next62 = add nuw nsw i64 %indvars.iv61, 1 + %cmp918 = icmp slt i64 %indvars.iv.next62, %0 + br i1 %cmp918, label %for.body11.lr.ph, label %for.end18 + +for.body11.lr.ph: ; preds = %for.cond8.preheader + br label %for.body11 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv46 = phi i64 [ %indvars.iv.next47, %for.body3 ], [ 0, %for.body3.lr.ph ] + %1 = sub nsw i64 0, %indvars.iv46 + %2 = trunc i64 %1 to i32 + %rem = srem i32 %2, %n + %conv = sitofp i32 %rem to double + %conv4 = sitofp i32 %n to double + %div = fdiv double %conv, %conv4 + %add = fadd double %div, 1.000000e+00 + %arrayidx6 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv61, i64 %indvars.iv46 + store double %add, double* %arrayidx6, align 8 + %indvars.iv.next47 = add nuw nsw i64 %indvars.iv46, 1 + %exitcond52 = icmp ne i64 %indvars.iv.next47, %indvars.iv59 + br i1 %exitcond52, label %for.body3, label %for.cond1.for.cond8.preheader_crit_edge + +for.body11: ; preds = %for.body11.lr.ph, %for.body11 + %indvars.iv55 = phi i64 [ %indvars.iv59, %for.body11.lr.ph ], [ %indvars.iv.next56, %for.body11 ] + %arrayidx15 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv61, i64 %indvars.iv55 + store double 0.000000e+00, double* %arrayidx15, align 8 + %indvars.iv.next56 = add nuw nsw i64 %indvars.iv55, 1 + %wide.trip.count57 = zext i32 %n to i64 + %exitcond58 = icmp ne i64 %indvars.iv.next56, %wide.trip.count57 + br i1 %exitcond58, label %for.body11, label %for.cond8.for.end18_crit_edge + +for.cond8.for.end18_crit_edge: ; preds = %for.body11 + br label %for.end18 + +for.end18: ; preds = %for.cond8.for.end18_crit_edge, %for.cond8.preheader + %arrayidx22 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv61, i64 %indvars.iv61 + store double 1.000000e+00, double* %arrayidx22, align 8 + %indvars.iv.next60 = add nuw nsw i64 %indvars.iv59, 1 + %wide.trip.count63 = zext i32 %n to i64 + %exitcond64 = icmp ne i64 %indvars.iv.next62, %wide.trip.count63 + br i1 %exitcond64, label %for.cond1.preheader, label %for.cond.for.end25_crit_edge + +for.cond.for.end25_crit_edge: ; preds = %for.end18 + br label %for.end25 + +for.end25: ; preds = %for.cond.for.end25_crit_edge, %entry.split + %call = tail call i8* @polybench_alloc_data(i64 4000000, i32 8) #2 + %3 = bitcast i8* %call to [2000 x [2000 x double]]* + %cmp2713 = icmp sgt i32 %n, 0 + br i1 %cmp2713, label %for.cond30.preheader.lr.ph, label %for.cond44.preheader + +for.cond30.preheader.lr.ph: ; preds = %for.end25 + br label %for.cond30.preheader + +for.cond30.preheader: ; preds = %for.cond30.preheader.lr.ph, %for.inc41 + %indvars.iv42 = phi i64 [ 0, %for.cond30.preheader.lr.ph ], [ %indvars.iv.next43, %for.inc41 ] + %cmp3111 = icmp sgt i32 %n, 0 + br i1 %cmp3111, label %for.body33.lr.ph, label %for.inc41 + +for.body33.lr.ph: ; preds = %for.cond30.preheader + br label %for.body33 + +for.cond26.for.cond44.preheader_crit_edge: ; preds = %for.inc41 + br label %for.cond44.preheader + +for.cond44.preheader: ; preds = %for.cond26.for.cond44.preheader_crit_edge, %for.end25 + %cmp459 = icmp sgt i32 %n, 0 + br i1 %cmp459, label %for.cond48.preheader.lr.ph, label %for.cond78.preheader + +for.cond48.preheader.lr.ph: ; preds = %for.cond44.preheader + br label %for.cond48.preheader + +for.body33: ; preds = %for.body33.lr.ph, %for.body33 + %indvars.iv38 = phi i64 [ 0, %for.body33.lr.ph ], [ %indvars.iv.next39, %for.body33 ] + %arrayidx37 = getelementptr inbounds [2000 x [2000 x double]], [2000 x [2000 x double]]* %3, i64 0, i64 %indvars.iv42, i64 %indvars.iv38 + store double 0.000000e+00, double* %arrayidx37, align 8 + %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1 + %wide.trip.count40 = zext i32 %n to i64 + %exitcond41 = icmp ne i64 %indvars.iv.next39, %wide.trip.count40 + br i1 %exitcond41, label %for.body33, label %for.cond30.for.inc41_crit_edge + +for.cond30.for.inc41_crit_edge: ; preds = %for.body33 + br label %for.inc41 + +for.inc41: ; preds = %for.cond30.for.inc41_crit_edge, %for.cond30.preheader + %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1 + %wide.trip.count44 = zext i32 %n to i64 + %exitcond45 = icmp ne i64 %indvars.iv.next43, %wide.trip.count44 + br i1 %exitcond45, label %for.cond30.preheader, label %for.cond26.for.cond44.preheader_crit_edge + +for.cond48.preheader: ; preds = %for.cond48.preheader.lr.ph, %for.inc75 + %indvars.iv34 = phi i64 [ 0, %for.cond48.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc75 ] + %cmp497 = icmp sgt i32 %n, 0 + br i1 %cmp497, label %for.cond52.preheader.lr.ph, label %for.inc75 + +for.cond52.preheader.lr.ph: ; preds = %for.cond48.preheader + br label %for.cond52.preheader + +for.cond44.for.cond78.preheader_crit_edge: ; preds = %for.inc75 + br label %for.cond78.preheader + +for.cond78.preheader: ; preds = %for.cond44.for.cond78.preheader_crit_edge, %for.cond44.preheader + %cmp793 = icmp sgt i32 %n, 0 + br i1 %cmp793, label %for.cond82.preheader.lr.ph, label %for.end99 + +for.cond82.preheader.lr.ph: ; preds = %for.cond78.preheader + br label %for.cond82.preheader + +for.cond52.preheader: ; preds = %for.cond52.preheader.lr.ph, %for.inc72 + %indvars.iv30 = phi i64 [ 0, %for.cond52.preheader.lr.ph ], [ %indvars.iv.next31, %for.inc72 ] + %cmp535 = icmp sgt i32 %n, 0 + br i1 %cmp535, label %for.body55.lr.ph, label %for.inc72 + +for.body55.lr.ph: ; preds = %for.cond52.preheader + br label %for.body55 + +for.body55: ; preds = %for.body55.lr.ph, %for.body55 + %indvars.iv26 = phi i64 [ 0, %for.body55.lr.ph ], [ %indvars.iv.next27, %for.body55 ] + %arrayidx59 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv30, i64 %indvars.iv34 + %4 = load double, double* %arrayidx59, align 8 + %arrayidx63 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv26, i64 %indvars.iv34 + %5 = load double, double* %arrayidx63, align 8 + %mul = fmul double %4, %5 + %arrayidx67 = getelementptr inbounds [2000 x [2000 x double]], [2000 x [2000 x double]]* %3, i64 0, i64 %indvars.iv30, i64 %indvars.iv26 + %6 = load double, double* %arrayidx67, align 8 + %add68 = fadd double %6, %mul + store double %add68, double* %arrayidx67, align 8 + %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1 + %wide.trip.count28 = zext i32 %n to i64 + %exitcond29 = icmp ne i64 %indvars.iv.next27, %wide.trip.count28 + br i1 %exitcond29, label %for.body55, label %for.cond52.for.inc72_crit_edge + +for.cond52.for.inc72_crit_edge: ; preds = %for.body55 + br label %for.inc72 + +for.inc72: ; preds = %for.cond52.for.inc72_crit_edge, %for.cond52.preheader + %indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1 + %wide.trip.count32 = zext i32 %n to i64 + %exitcond33 = icmp ne i64 %indvars.iv.next31, %wide.trip.count32 + br i1 %exitcond33, label %for.cond52.preheader, label %for.cond48.for.inc75_crit_edge + +for.cond48.for.inc75_crit_edge: ; preds = %for.inc72 + br label %for.inc75 + +for.inc75: ; preds = %for.cond48.for.inc75_crit_edge, %for.cond48.preheader + %indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1 + %wide.trip.count36 = zext i32 %n to i64 + %exitcond37 = icmp ne i64 %indvars.iv.next35, %wide.trip.count36 + br i1 %exitcond37, label %for.cond48.preheader, label %for.cond44.for.cond78.preheader_crit_edge + +for.cond82.preheader: ; preds = %for.cond82.preheader.lr.ph, %for.inc97 + %indvars.iv22 = phi i64 [ 0, %for.cond82.preheader.lr.ph ], [ %indvars.iv.next23, %for.inc97 ] + %cmp831 = icmp sgt i32 %n, 0 + br i1 %cmp831, label %for.body85.lr.ph, label %for.inc97 + +for.body85.lr.ph: ; preds = %for.cond82.preheader + br label %for.body85 + +for.body85: ; preds = %for.body85.lr.ph, %for.body85 + %indvars.iv = phi i64 [ 0, %for.body85.lr.ph ], [ %indvars.iv.next, %for.body85 ] + %arrayidx89 = getelementptr inbounds [2000 x [2000 x double]], [2000 x [2000 x double]]* %3, i64 0, i64 %indvars.iv22, i64 %indvars.iv + %7 = bitcast double* %arrayidx89 to i64* + %8 = load i64, i64* %7, align 8 + %arrayidx93 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv22, i64 %indvars.iv + %9 = bitcast double* %arrayidx93 to i64* + store i64 %8, i64* %9, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body85, label %for.cond82.for.inc97_crit_edge + +for.cond82.for.inc97_crit_edge: ; preds = %for.body85 + br label %for.inc97 + +for.inc97: ; preds = %for.cond82.for.inc97_crit_edge, %for.cond82.preheader + %indvars.iv.next23 = add nuw nsw i64 %indvars.iv22, 1 + %wide.trip.count24 = zext i32 %n to i64 + %exitcond25 = icmp ne i64 %indvars.iv.next23, %wide.trip.count24 + br i1 %exitcond25, label %for.cond82.preheader, label %for.cond78.for.end99_crit_edge + +for.cond78.for.end99_crit_edge: ; preds = %for.inc97 + br label %for.end99 + +for.end99: ; preds = %for.cond78.for.end99_crit_edge, %for.cond78.preheader + tail call void @free(i8* %call) + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define internal void @kernel_lu(i32 %n, [2000 x double]* %A) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.cond1.preheader.lr.ph, label %for.end56 + +for.cond1.preheader.lr.ph: ; preds = %entry.split + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc54 + %indvars.iv31 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next32, %for.inc54 ] + %cmp23 = icmp sgt i64 %indvars.iv31, 0 + br i1 %cmp23, label %for.cond4.preheader.lr.ph, label %for.cond28.preheader + +for.cond4.preheader.lr.ph: ; preds = %for.cond1.preheader + br label %for.cond4.preheader + +for.cond1.for.cond28.preheader_crit_edge: ; preds = %for.end + br label %for.cond28.preheader + +for.cond28.preheader: ; preds = %for.cond1.for.cond28.preheader_crit_edge, %for.cond1.preheader + br i1 true, label %for.cond31.preheader.lr.ph, label %for.inc54 + +for.cond31.preheader.lr.ph: ; preds = %for.cond28.preheader + br label %for.cond31.preheader + +for.cond4.preheader: ; preds = %for.cond4.preheader.lr.ph, %for.end + %indvars.iv14 = phi i64 [ 0, %for.cond4.preheader.lr.ph ], [ %indvars.iv.next15, %for.end ] + %cmp51 = icmp sgt i64 %indvars.iv14, 0 + br i1 %cmp51, label %for.body6.lr.ph, label %for.end + +for.body6.lr.ph: ; preds = %for.cond4.preheader + br label %for.body6 + +for.body6: ; preds = %for.body6.lr.ph, %for.body6 + %indvars.iv = phi i64 [ 0, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ] + %arrayidx8 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv31, i64 %indvars.iv + %0 = load double, double* %arrayidx8, align 8 + %arrayidx12 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv, i64 %indvars.iv14 + %1 = load double, double* %arrayidx12, align 8 + %mul = fmul double %0, %1 + %arrayidx16 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv31, i64 %indvars.iv14 + %2 = load double, double* %arrayidx16, align 8 + %sub = fsub double %2, %mul + store double %sub, double* %arrayidx16, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %indvars.iv14 + br i1 %exitcond, label %for.body6, label %for.cond4.for.end_crit_edge + +for.cond4.for.end_crit_edge: ; preds = %for.body6 + br label %for.end + +for.end: ; preds = %for.cond4.for.end_crit_edge, %for.cond4.preheader + %arrayidx20 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv14, i64 %indvars.iv14 + %3 = load double, double* %arrayidx20, align 8 + %arrayidx24 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv31, i64 %indvars.iv14 + %4 = load double, double* %arrayidx24, align 8 + %div = fdiv double %4, %3 + store double %div, double* %arrayidx24, align 8 + %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1 + %exitcond19 = icmp ne i64 %indvars.iv.next15, %indvars.iv31 + br i1 %exitcond19, label %for.cond4.preheader, label %for.cond1.for.cond28.preheader_crit_edge + +for.cond31.preheader: ; preds = %for.cond31.preheader.lr.ph, %for.inc51 + %indvars.iv27 = phi i64 [ %indvars.iv31, %for.cond31.preheader.lr.ph ], [ %indvars.iv.next28, %for.inc51 ] + %cmp325 = icmp sgt i64 %indvars.iv31, 0 + br i1 %cmp325, label %for.body33.lr.ph, label %for.inc51 + +for.body33.lr.ph: ; preds = %for.cond31.preheader + br label %for.body33 + +for.body33: ; preds = %for.body33.lr.ph, %for.body33 + %indvars.iv20 = phi i64 [ 0, %for.body33.lr.ph ], [ %indvars.iv.next21, %for.body33 ] + %arrayidx37 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv31, i64 %indvars.iv20 + %5 = load double, double* %arrayidx37, align 8 + %arrayidx41 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv20, i64 %indvars.iv27 + %6 = load double, double* %arrayidx41, align 8 + %mul42 = fmul double %5, %6 + %arrayidx46 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv31, i64 %indvars.iv27 + %7 = load double, double* %arrayidx46, align 8 + %sub47 = fsub double %7, %mul42 + store double %sub47, double* %arrayidx46, align 8 + %indvars.iv.next21 = add nuw nsw i64 %indvars.iv20, 1 + %exitcond24 = icmp ne i64 %indvars.iv.next21, %indvars.iv31 + br i1 %exitcond24, label %for.body33, label %for.cond31.for.inc51_crit_edge + +for.cond31.for.inc51_crit_edge: ; preds = %for.body33 + br label %for.inc51 + +for.inc51: ; preds = %for.cond31.for.inc51_crit_edge, %for.cond31.preheader + %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1 + %wide.trip.count29 = zext i32 %n to i64 + %exitcond30 = icmp ne i64 %indvars.iv.next28, %wide.trip.count29 + br i1 %exitcond30, label %for.cond31.preheader, label %for.cond28.for.inc54_crit_edge + +for.cond28.for.inc54_crit_edge: ; preds = %for.inc51 + br label %for.inc54 + +for.inc54: ; preds = %for.cond28.for.inc54_crit_edge, %for.cond28.preheader + %indvars.iv.next32 = add nuw nsw i64 %indvars.iv31, 1 + %wide.trip.count33 = zext i32 %n to i64 + %exitcond34 = icmp ne i64 %indvars.iv.next32, %wide.trip.count33 + br i1 %exitcond34, label %for.cond1.preheader, label %for.cond.for.end56_crit_edge + +for.cond.for.end56_crit_edge: ; preds = %for.inc54 + br label %for.end56 + +for.end56: ; preds = %for.cond.for.end56_crit_edge, %entry.split + ret void +} + +declare i32 @strcmp(i8*, i8*) #1 + +; Function Attrs: noinline nounwind ssp uwtable +define internal void @print_array(i32 %n, [2000 x double]* %A) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %0 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), i64 22, i64 1, %struct.__sFILE* %0) + %2 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %call1 = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %2, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) #2 + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.cond2.preheader.lr.ph, label %for.end12 + +for.cond2.preheader.lr.ph: ; preds = %entry.split + %3 = sext i32 %n to i64 + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.cond2.preheader.lr.ph, %for.inc10 + %indvars.iv8 = phi i64 [ 0, %for.cond2.preheader.lr.ph ], [ %indvars.iv.next9, %for.inc10 ] + %cmp31 = icmp sgt i32 %n, 0 + br i1 %cmp31, label %for.body4.lr.ph, label %for.inc10 + +for.body4.lr.ph: ; preds = %for.cond2.preheader + br label %for.body4 + +for.body4: ; preds = %for.body4.lr.ph, %if.end + %indvars.iv = phi i64 [ 0, %for.body4.lr.ph ], [ %indvars.iv.next, %if.end ] + %4 = mul nsw i64 %indvars.iv8, %3 + %5 = add nsw i64 %indvars.iv, %4 + %6 = trunc i64 %5 to i32 + %rem = srem i32 %6, 20 + %cmp5 = icmp eq i32 %rem, 0 + br i1 %cmp5, label %if.then, label %if.end + +if.then: ; preds = %for.body4 + %7 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %fputc = tail call i32 @fputc(i32 10, %struct.__sFILE* %7) + br label %if.end + +if.end: ; preds = %if.then, %for.body4 + %8 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %arrayidx8 = getelementptr inbounds [2000 x double], [2000 x double]* %A, i64 %indvars.iv8, i64 %indvars.iv + %9 = load double, double* %arrayidx8, align 8 + %call9 = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %8, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.5, i64 0, i64 0), double %9) #2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body4, label %for.cond2.for.inc10_crit_edge + +for.cond2.for.inc10_crit_edge: ; preds = %if.end + br label %for.inc10 + +for.inc10: ; preds = %for.cond2.for.inc10_crit_edge, %for.cond2.preheader + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %wide.trip.count11 = zext i32 %n to i64 + %exitcond12 = icmp ne i64 %indvars.iv.next9, %wide.trip.count11 + br i1 %exitcond12, label %for.cond2.preheader, label %for.cond.for.end12_crit_edge + +for.cond.for.end12_crit_edge: ; preds = %for.inc10 + br label %for.end12 + +for.end12: ; preds = %for.cond.for.end12_crit_edge, %entry.split + %10 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %call13 = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %10, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) #2 + %11 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %12 = tail call i64 @fwrite(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.7, i64 0, i64 0), i64 22, i64 1, %struct.__sFILE* %11) + ret void +} + +declare void @free(i8*) #1 + +declare i32 @fprintf(%struct.__sFILE*, i8*, ...) #1 + +; Function Attrs: nounwind +declare i64 @fwrite(i8* nocapture, i64, i64, %struct.__sFILE* nocapture) #2 + +; Function Attrs: nounwind +declare i32 @fputc(i32, %struct.__sFILE* nocapture) #2 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git 88423634c5b7688be89586484a68e5a1167b1280) (http://llvm.org/git/llvm.git 405d07a3a61aff7681568173e5e44c09fc7ff945)"} Index: test/DependenceInfo/is_parallel_irregular_deps_2.ll =================================================================== --- /dev/null +++ test/DependenceInfo/is_parallel_irregular_deps_2.ll @@ -0,0 +1,485 @@ +; Test file taken from Polybench: +; polybench-c-4.2.1-beta/linear-algebra/kernels/2mm/2mm.c + +; This contains dependences that contain isl_maps which do not share the same +; underlying space. We check that this is fixed by Dependences::isParallel(). + +; RUN: opt %loadPolly -polyhedral-info -polly-check-parallel -analyze %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } +%struct.__sFILEX = type opaque +%struct.__sbuf = type { i8*, i32 } + +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 +@__stderrp = external global %struct.__sFILE*, align 8 +@.str.1 = private unnamed_addr constant [23 x i8] c"==BEGIN DUMP_ARRAYS==\0A\00", align 1 +@.str.2 = private unnamed_addr constant [15 x i8] c"begin dump: %s\00", align 1 +@.str.3 = private unnamed_addr constant [2 x i8] c"D\00", align 1 +@.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 +@.str.5 = private unnamed_addr constant [8 x i8] c"%0.2lf \00", align 1 +@.str.6 = private unnamed_addr constant [17 x i8] c"\0Aend dump: %s\0A\00", align 1 +@.str.7 = private unnamed_addr constant [23 x i8] c"==END DUMP_ARRAYS==\0A\00", align 1 + +; Function Attrs: noinline nounwind ssp uwtable +define i32 @main(i32 %argc, i8** %argv) #0 { +entry: + %alpha = alloca double, align 8 + %beta = alloca double, align 8 + br label %entry.split + +entry.split: ; preds = %entry + %call = tail call i8* @polybench_alloc_data(i64 720000, i32 8) #2 + %call2 = tail call i8* @polybench_alloc_data(i64 880000, i32 8) #2 + %call4 = tail call i8* @polybench_alloc_data(i64 990000, i32 8) #2 + %call6 = tail call i8* @polybench_alloc_data(i64 1080000, i32 8) #2 + %call8 = tail call i8* @polybench_alloc_data(i64 960000, i32 8) #2 + %arraydecay = bitcast i8* %call2 to [1100 x double]* + %arraydecay9 = bitcast i8* %call4 to [900 x double]* + %arraydecay10 = bitcast i8* %call6 to [1200 x double]* + %arraydecay11 = bitcast i8* %call8 to [1200 x double]* + call void @init_array(i32 800, i32 900, i32 1100, i32 1200, double* nonnull %alpha, double* nonnull %beta, [1100 x double]* %arraydecay, [900 x double]* %arraydecay9, [1200 x double]* %arraydecay10, [1200 x double]* %arraydecay11) + %0 = load double, double* %alpha, align 8 + %1 = load double, double* %beta, align 8 + %arraydecay12 = bitcast i8* %call to [900 x double]* + %arraydecay13 = bitcast i8* %call2 to [1100 x double]* + %arraydecay14 = bitcast i8* %call4 to [900 x double]* + %arraydecay15 = bitcast i8* %call6 to [1200 x double]* + %arraydecay16 = bitcast i8* %call8 to [1200 x double]* + call void @kernel_2mm(i32 800, i32 900, i32 1100, i32 1200, double %0, double %1, [900 x double]* %arraydecay12, [1100 x double]* %arraydecay13, [900 x double]* %arraydecay14, [1200 x double]* %arraydecay15, [1200 x double]* %arraydecay16) + %cmp = icmp sgt i32 %argc, 42 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %entry.split + %2 = load i8*, i8** %argv, align 8 + %strcmpload = load i8, i8* %2, align 1 + %tobool = icmp eq i8 %strcmpload, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true + %arraydecay18 = bitcast i8* %call8 to [1200 x double]* + call void @print_array(i32 800, i32 1200, [1200 x double]* %arraydecay18) + br label %if.end + +if.end: ; preds = %land.lhs.true, %if.then, %entry.split + call void @free(i8* %call) + call void @free(i8* %call2) + call void @free(i8* %call4) + call void @free(i8* %call6) + call void @free(i8* %call8) + ret i32 0 +} + +declare i8* @polybench_alloc_data(i64, i32) #1 + +; Function Attrs: noinline nounwind ssp uwtable +define internal void @init_array(i32 %ni, i32 %nj, i32 %nk, i32 %nl, double* %alpha, double* %beta, [1100 x double]* %A, [900 x double]* %B, [1200 x double]* %C, [1200 x double]* %D) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + store double 1.500000e+00, double* %alpha, align 8 + store double 1.200000e+00, double* %beta, align 8 + %cmp18 = icmp sgt i32 %ni, 0 + br i1 %cmp18, label %for.cond1.preheader.lr.ph, label %for.cond10.preheader + +for.cond1.preheader.lr.ph: ; preds = %entry.split + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc7 + %indvars.iv53 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next54, %for.inc7 ] + %cmp216 = icmp sgt i32 %nk, 0 + br i1 %cmp216, label %for.body3.lr.ph, label %for.inc7 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + br label %for.body3 + +for.cond.for.cond10.preheader_crit_edge: ; preds = %for.inc7 + br label %for.cond10.preheader + +for.cond10.preheader: ; preds = %for.cond.for.cond10.preheader_crit_edge, %entry.split + %cmp1113 = icmp sgt i32 %nk, 0 + br i1 %cmp1113, label %for.cond14.preheader.lr.ph, label %for.cond34.preheader + +for.cond14.preheader.lr.ph: ; preds = %for.cond10.preheader + br label %for.cond14.preheader + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %indvars.iv47 = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next48, %for.body3 ] + %0 = mul nuw nsw i64 %indvars.iv47, %indvars.iv53 + %1 = add nuw nsw i64 %0, 1 + %2 = trunc i64 %1 to i32 + %rem = srem i32 %2, %ni + %conv = sitofp i32 %rem to double + %conv4 = sitofp i32 %ni to double + %div = fdiv double %conv, %conv4 + %arrayidx6 = getelementptr inbounds [1100 x double], [1100 x double]* %A, i64 %indvars.iv53, i64 %indvars.iv47 + store double %div, double* %arrayidx6, align 8 + %indvars.iv.next48 = add nuw nsw i64 %indvars.iv47, 1 + %wide.trip.count51 = zext i32 %nk to i64 + %exitcond52 = icmp ne i64 %indvars.iv.next48, %wide.trip.count51 + br i1 %exitcond52, label %for.body3, label %for.cond1.for.inc7_crit_edge + +for.cond1.for.inc7_crit_edge: ; preds = %for.body3 + br label %for.inc7 + +for.inc7: ; preds = %for.cond1.for.inc7_crit_edge, %for.cond1.preheader + %indvars.iv.next54 = add nuw nsw i64 %indvars.iv53, 1 + %wide.trip.count55 = zext i32 %ni to i64 + %exitcond56 = icmp ne i64 %indvars.iv.next54, %wide.trip.count55 + br i1 %exitcond56, label %for.cond1.preheader, label %for.cond.for.cond10.preheader_crit_edge + +for.cond14.preheader: ; preds = %for.cond14.preheader.lr.ph, %for.inc31 + %indvars.iv43 = phi i64 [ 0, %for.cond14.preheader.lr.ph ], [ %indvars.iv.next44, %for.inc31 ] + %cmp1511 = icmp sgt i32 %nj, 0 + br i1 %cmp1511, label %for.body17.lr.ph, label %for.inc31 + +for.body17.lr.ph: ; preds = %for.cond14.preheader + br label %for.body17 + +for.cond10.for.cond34.preheader_crit_edge: ; preds = %for.inc31 + br label %for.cond34.preheader + +for.cond34.preheader: ; preds = %for.cond10.for.cond34.preheader_crit_edge, %for.cond10.preheader + %cmp358 = icmp sgt i32 %nj, 0 + br i1 %cmp358, label %for.cond38.preheader.lr.ph, label %for.cond59.preheader + +for.cond38.preheader.lr.ph: ; preds = %for.cond34.preheader + br label %for.cond38.preheader + +for.body17: ; preds = %for.body17.lr.ph, %for.body17 + %indvars.iv38 = phi i64 [ 0, %for.body17.lr.ph ], [ %indvars.iv.next39, %for.body17 ] + %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1 + %3 = mul nuw nsw i64 %indvars.iv.next39, %indvars.iv43 + %4 = trunc i64 %3 to i32 + %rem20 = srem i32 %4, %nj + %conv21 = sitofp i32 %rem20 to double + %conv22 = sitofp i32 %nj to double + %div23 = fdiv double %conv21, %conv22 + %arrayidx27 = getelementptr inbounds [900 x double], [900 x double]* %B, i64 %indvars.iv43, i64 %indvars.iv38 + store double %div23, double* %arrayidx27, align 8 + %wide.trip.count41 = zext i32 %nj to i64 + %exitcond42 = icmp ne i64 %indvars.iv.next39, %wide.trip.count41 + br i1 %exitcond42, label %for.body17, label %for.cond14.for.inc31_crit_edge + +for.cond14.for.inc31_crit_edge: ; preds = %for.body17 + br label %for.inc31 + +for.inc31: ; preds = %for.cond14.for.inc31_crit_edge, %for.cond14.preheader + %indvars.iv.next44 = add nuw nsw i64 %indvars.iv43, 1 + %wide.trip.count45 = zext i32 %nk to i64 + %exitcond46 = icmp ne i64 %indvars.iv.next44, %wide.trip.count45 + br i1 %exitcond46, label %for.cond14.preheader, label %for.cond10.for.cond34.preheader_crit_edge + +for.cond38.preheader: ; preds = %for.cond38.preheader.lr.ph, %for.inc56 + %indvars.iv34 = phi i64 [ 0, %for.cond38.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc56 ] + %cmp396 = icmp sgt i32 %nl, 0 + br i1 %cmp396, label %for.body41.lr.ph, label %for.inc56 + +for.body41.lr.ph: ; preds = %for.cond38.preheader + br label %for.body41 + +for.cond34.for.cond59.preheader_crit_edge: ; preds = %for.inc56 + br label %for.cond59.preheader + +for.cond59.preheader: ; preds = %for.cond34.for.cond59.preheader_crit_edge, %for.cond34.preheader + %cmp603 = icmp sgt i32 %ni, 0 + br i1 %cmp603, label %for.cond63.preheader.lr.ph, label %for.end82 + +for.cond63.preheader.lr.ph: ; preds = %for.cond59.preheader + br label %for.cond63.preheader + +for.body41: ; preds = %for.body41.lr.ph, %for.body41 + %indvars.iv27 = phi i64 [ 0, %for.body41.lr.ph ], [ %indvars.iv.next28, %for.body41 ] + %5 = add nuw nsw i64 %indvars.iv27, 3 + %6 = mul nuw nsw i64 %5, %indvars.iv34 + %7 = add nuw nsw i64 %6, 1 + %8 = trunc i64 %7 to i32 + %rem45 = srem i32 %8, %nl + %conv46 = sitofp i32 %rem45 to double + %conv47 = sitofp i32 %nl to double + %div48 = fdiv double %conv46, %conv47 + %arrayidx52 = getelementptr inbounds [1200 x double], [1200 x double]* %C, i64 %indvars.iv34, i64 %indvars.iv27 + store double %div48, double* %arrayidx52, align 8 + %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1 + %wide.trip.count32 = zext i32 %nl to i64 + %exitcond33 = icmp ne i64 %indvars.iv.next28, %wide.trip.count32 + br i1 %exitcond33, label %for.body41, label %for.cond38.for.inc56_crit_edge + +for.cond38.for.inc56_crit_edge: ; preds = %for.body41 + br label %for.inc56 + +for.inc56: ; preds = %for.cond38.for.inc56_crit_edge, %for.cond38.preheader + %indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1 + %wide.trip.count36 = zext i32 %nj to i64 + %exitcond37 = icmp ne i64 %indvars.iv.next35, %wide.trip.count36 + br i1 %exitcond37, label %for.cond38.preheader, label %for.cond34.for.cond59.preheader_crit_edge + +for.cond63.preheader: ; preds = %for.cond63.preheader.lr.ph, %for.inc80 + %indvars.iv23 = phi i64 [ 0, %for.cond63.preheader.lr.ph ], [ %indvars.iv.next24, %for.inc80 ] + %cmp641 = icmp sgt i32 %nl, 0 + br i1 %cmp641, label %for.body66.lr.ph, label %for.inc80 + +for.body66.lr.ph: ; preds = %for.cond63.preheader + br label %for.body66 + +for.body66: ; preds = %for.body66.lr.ph, %for.body66 + %indvars.iv = phi i64 [ 0, %for.body66.lr.ph ], [ %indvars.iv.next, %for.body66 ] + %9 = add nuw nsw i64 %indvars.iv, 2 + %10 = mul nuw nsw i64 %9, %indvars.iv23 + %11 = trunc i64 %10 to i32 + %rem69 = srem i32 %11, %nk + %conv70 = sitofp i32 %rem69 to double + %conv71 = sitofp i32 %nk to double + %div72 = fdiv double %conv70, %conv71 + %arrayidx76 = getelementptr inbounds [1200 x double], [1200 x double]* %D, i64 %indvars.iv23, i64 %indvars.iv + store double %div72, double* %arrayidx76, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %nl to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body66, label %for.cond63.for.inc80_crit_edge + +for.cond63.for.inc80_crit_edge: ; preds = %for.body66 + br label %for.inc80 + +for.inc80: ; preds = %for.cond63.for.inc80_crit_edge, %for.cond63.preheader + %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 + %wide.trip.count25 = zext i32 %ni to i64 + %exitcond26 = icmp ne i64 %indvars.iv.next24, %wide.trip.count25 + br i1 %exitcond26, label %for.cond63.preheader, label %for.cond59.for.end82_crit_edge + +for.cond59.for.end82_crit_edge: ; preds = %for.inc80 + br label %for.end82 + +for.end82: ; preds = %for.cond59.for.end82_crit_edge, %for.cond59.preheader + ret void +} + +; Function Attrs: noinline nounwind ssp uwtable +define internal void @kernel_2mm(i32 %ni, i32 %nj, i32 %nk, i32 %nl, double %alpha, double %beta, [900 x double]* %tmp, [1100 x double]* %A, [900 x double]* %B, [1200 x double]* %C, [1200 x double]* %D) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %cmp11 = icmp sgt i32 %ni, 0 + br i1 %cmp11, label %for.cond4.preheader.lr.ph, label %for.cond31.preheader + +for.cond4.preheader.lr.ph: ; preds = %entry.split + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.cond4.preheader.lr.ph, %for.inc28 + %indvars.iv29 = phi i64 [ 0, %for.cond4.preheader.lr.ph ], [ %indvars.iv.next30, %for.inc28 ] + %cmp59 = icmp sgt i32 %nj, 0 + br i1 %cmp59, label %for.body6.lr.ph, label %for.inc28 + +for.body6.lr.ph: ; preds = %for.cond4.preheader + br label %for.body6 + +for.cond.for.cond31.preheader_crit_edge: ; preds = %for.inc28 + br label %for.cond31.preheader + +for.cond31.preheader: ; preds = %for.cond.for.cond31.preheader_crit_edge, %entry.split + %cmp325 = icmp sgt i32 %ni, 0 + br i1 %cmp325, label %for.cond34.preheader.lr.ph, label %for.end67 + +for.cond34.preheader.lr.ph: ; preds = %for.cond31.preheader + br label %for.cond34.preheader + +for.body6: ; preds = %for.body6.lr.ph, %for.inc25 + %indvars.iv25 = phi i64 [ 0, %for.body6.lr.ph ], [ %indvars.iv.next26, %for.inc25 ] + %arrayidx8 = getelementptr inbounds [900 x double], [900 x double]* %tmp, i64 %indvars.iv29, i64 %indvars.iv25 + store double 0.000000e+00, double* %arrayidx8, align 8 + %cmp107 = icmp sgt i32 %nk, 0 + br i1 %cmp107, label %for.body11.lr.ph, label %for.inc25 + +for.body11.lr.ph: ; preds = %for.body6 + br label %for.body11 + +for.body11: ; preds = %for.body11.lr.ph, %for.body11 + %indvars.iv21 = phi i64 [ 0, %for.body11.lr.ph ], [ %indvars.iv.next22, %for.body11 ] + %arrayidx15 = getelementptr inbounds [1100 x double], [1100 x double]* %A, i64 %indvars.iv29, i64 %indvars.iv21 + %0 = load double, double* %arrayidx15, align 8 + %mul = fmul double %0, %alpha + %arrayidx19 = getelementptr inbounds [900 x double], [900 x double]* %B, i64 %indvars.iv21, i64 %indvars.iv25 + %1 = load double, double* %arrayidx19, align 8 + %mul20 = fmul double %mul, %1 + %arrayidx24 = getelementptr inbounds [900 x double], [900 x double]* %tmp, i64 %indvars.iv29, i64 %indvars.iv25 + %2 = load double, double* %arrayidx24, align 8 + %add = fadd double %2, %mul20 + store double %add, double* %arrayidx24, align 8 + %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1 + %wide.trip.count23 = zext i32 %nk to i64 + %exitcond24 = icmp ne i64 %indvars.iv.next22, %wide.trip.count23 + br i1 %exitcond24, label %for.body11, label %for.cond9.for.inc25_crit_edge + +for.cond9.for.inc25_crit_edge: ; preds = %for.body11 + br label %for.inc25 + +for.inc25: ; preds = %for.cond9.for.inc25_crit_edge, %for.body6 + %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1 + %wide.trip.count27 = zext i32 %nj to i64 + %exitcond28 = icmp ne i64 %indvars.iv.next26, %wide.trip.count27 + br i1 %exitcond28, label %for.body6, label %for.cond4.for.inc28_crit_edge + +for.cond4.for.inc28_crit_edge: ; preds = %for.inc25 + br label %for.inc28 + +for.inc28: ; preds = %for.cond4.for.inc28_crit_edge, %for.cond4.preheader + %indvars.iv.next30 = add nuw nsw i64 %indvars.iv29, 1 + %wide.trip.count31 = zext i32 %ni to i64 + %exitcond32 = icmp ne i64 %indvars.iv.next30, %wide.trip.count31 + br i1 %exitcond32, label %for.cond4.preheader, label %for.cond.for.cond31.preheader_crit_edge + +for.cond34.preheader: ; preds = %for.cond34.preheader.lr.ph, %for.inc65 + %indvars.iv17 = phi i64 [ 0, %for.cond34.preheader.lr.ph ], [ %indvars.iv.next18, %for.inc65 ] + %cmp353 = icmp sgt i32 %nl, 0 + br i1 %cmp353, label %for.body36.lr.ph, label %for.inc65 + +for.body36.lr.ph: ; preds = %for.cond34.preheader + br label %for.body36 + +for.body36: ; preds = %for.body36.lr.ph, %for.inc62 + %indvars.iv13 = phi i64 [ 0, %for.body36.lr.ph ], [ %indvars.iv.next14, %for.inc62 ] + %arrayidx40 = getelementptr inbounds [1200 x double], [1200 x double]* %D, i64 %indvars.iv17, i64 %indvars.iv13 + %3 = load double, double* %arrayidx40, align 8 + %mul41 = fmul double %3, %beta + store double %mul41, double* %arrayidx40, align 8 + %cmp431 = icmp sgt i32 %nj, 0 + br i1 %cmp431, label %for.body44.lr.ph, label %for.inc62 + +for.body44.lr.ph: ; preds = %for.body36 + br label %for.body44 + +for.body44: ; preds = %for.body44.lr.ph, %for.body44 + %indvars.iv = phi i64 [ 0, %for.body44.lr.ph ], [ %indvars.iv.next, %for.body44 ] + %arrayidx48 = getelementptr inbounds [900 x double], [900 x double]* %tmp, i64 %indvars.iv17, i64 %indvars.iv + %4 = load double, double* %arrayidx48, align 8 + %arrayidx52 = getelementptr inbounds [1200 x double], [1200 x double]* %C, i64 %indvars.iv, i64 %indvars.iv13 + %5 = load double, double* %arrayidx52, align 8 + %mul53 = fmul double %4, %5 + %arrayidx57 = getelementptr inbounds [1200 x double], [1200 x double]* %D, i64 %indvars.iv17, i64 %indvars.iv13 + %6 = load double, double* %arrayidx57, align 8 + %add58 = fadd double %6, %mul53 + store double %add58, double* %arrayidx57, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %nj to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body44, label %for.cond42.for.inc62_crit_edge + +for.cond42.for.inc62_crit_edge: ; preds = %for.body44 + br label %for.inc62 + +for.inc62: ; preds = %for.cond42.for.inc62_crit_edge, %for.body36 + %indvars.iv.next14 = add nuw nsw i64 %indvars.iv13, 1 + %wide.trip.count15 = zext i32 %nl to i64 + %exitcond16 = icmp ne i64 %indvars.iv.next14, %wide.trip.count15 + br i1 %exitcond16, label %for.body36, label %for.cond34.for.inc65_crit_edge + +for.cond34.for.inc65_crit_edge: ; preds = %for.inc62 + br label %for.inc65 + +for.inc65: ; preds = %for.cond34.for.inc65_crit_edge, %for.cond34.preheader + %indvars.iv.next18 = add nuw nsw i64 %indvars.iv17, 1 + %wide.trip.count19 = zext i32 %ni to i64 + %exitcond20 = icmp ne i64 %indvars.iv.next18, %wide.trip.count19 + br i1 %exitcond20, label %for.cond34.preheader, label %for.cond31.for.end67_crit_edge + +for.cond31.for.end67_crit_edge: ; preds = %for.inc65 + br label %for.end67 + +for.end67: ; preds = %for.cond31.for.end67_crit_edge, %for.cond31.preheader + ret void +} + +declare i32 @strcmp(i8*, i8*) #1 + +; Function Attrs: noinline nounwind ssp uwtable +define internal void @print_array(i32 %ni, i32 %nl, [1200 x double]* %D) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %0 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), i64 22, i64 1, %struct.__sFILE* %0) + %2 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %call1 = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %2, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) #2 + %cmp4 = icmp sgt i32 %ni, 0 + br i1 %cmp4, label %for.cond2.preheader.lr.ph, label %for.end12 + +for.cond2.preheader.lr.ph: ; preds = %entry.split + %3 = sext i32 %ni to i64 + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.cond2.preheader.lr.ph, %for.inc10 + %indvars.iv8 = phi i64 [ 0, %for.cond2.preheader.lr.ph ], [ %indvars.iv.next9, %for.inc10 ] + %cmp31 = icmp sgt i32 %nl, 0 + br i1 %cmp31, label %for.body4.lr.ph, label %for.inc10 + +for.body4.lr.ph: ; preds = %for.cond2.preheader + br label %for.body4 + +for.body4: ; preds = %for.body4.lr.ph, %if.end + %indvars.iv = phi i64 [ 0, %for.body4.lr.ph ], [ %indvars.iv.next, %if.end ] + %4 = mul nsw i64 %indvars.iv8, %3 + %5 = add nsw i64 %indvars.iv, %4 + %6 = trunc i64 %5 to i32 + %rem = srem i32 %6, 20 + %cmp5 = icmp eq i32 %rem, 0 + br i1 %cmp5, label %if.then, label %if.end + +if.then: ; preds = %for.body4 + %7 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %fputc = tail call i32 @fputc(i32 10, %struct.__sFILE* %7) + br label %if.end + +if.end: ; preds = %if.then, %for.body4 + %8 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %arrayidx8 = getelementptr inbounds [1200 x double], [1200 x double]* %D, i64 %indvars.iv8, i64 %indvars.iv + %9 = load double, double* %arrayidx8, align 8 + %call9 = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %8, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.5, i64 0, i64 0), double %9) #2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %nl to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body4, label %for.cond2.for.inc10_crit_edge + +for.cond2.for.inc10_crit_edge: ; preds = %if.end + br label %for.inc10 + +for.inc10: ; preds = %for.cond2.for.inc10_crit_edge, %for.cond2.preheader + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %wide.trip.count11 = zext i32 %ni to i64 + %exitcond12 = icmp ne i64 %indvars.iv.next9, %wide.trip.count11 + br i1 %exitcond12, label %for.cond2.preheader, label %for.cond.for.end12_crit_edge + +for.cond.for.end12_crit_edge: ; preds = %for.inc10 + br label %for.end12 + +for.end12: ; preds = %for.cond.for.end12_crit_edge, %entry.split + %10 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %call13 = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %10, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) #2 + %11 = load %struct.__sFILE*, %struct.__sFILE** @__stderrp, align 8 + %12 = tail call i64 @fwrite(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.7, i64 0, i64 0), i64 22, i64 1, %struct.__sFILE* %11) + ret void +} + +declare void @free(i8*) #1 + +declare i32 @fprintf(%struct.__sFILE*, i8*, ...) #1 + +; Function Attrs: nounwind +declare i64 @fwrite(i8* nocapture, i64, i64, %struct.__sFILE* nocapture) #2 + +; Function Attrs: nounwind +declare i32 @fputc(i32, %struct.__sFILE* nocapture) #2 + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git 88423634c5b7688be89586484a68e5a1167b1280) (http://llvm.org/git/llvm.git 405d07a3a61aff7681568173e5e44c09fc7ff945)"} \ No newline at end of file