Index: lib/Transform/ScheduleOptimizer.cpp =================================================================== --- lib/Transform/ScheduleOptimizer.cpp +++ lib/Transform/ScheduleOptimizer.cpp @@ -1014,6 +1014,14 @@ int Car = floor( (FirstCacheLevelAssociativity - 1) / (1 + static_cast(MicroKernelParams.Nr) / MicroKernelParams.Mr)); + + // Car can be computed to be zero since it is floor to int. + // On Mac OS, division by 0 does not raise a signal. This causes negative + // tile sizes to be computed. Prevent division by 0 Cac by early returning + // if this happens. + if (Car == 0) + return {1, 1, 1}; + auto ElementSize = getMatMulAlignTypeSize(MMI); assert(ElementSize > 0 && "The element size of the matrix multiplication " "operands should be greater than zero."); @@ -1022,8 +1030,13 @@ double Cac = static_cast(Kc * ElementSize * SecondCacheLevelAssociativity) / SecondCacheLevelSize; + int Mc = floor((SecondCacheLevelAssociativity - 2) / Cac); int Nc = PollyPatternMatchingNcQuotient * MicroKernelParams.Nr; + + assert(Mc > 0 && Nc > 0 && Kc > 0 && + "Matrix block sizes should be " + " greater than zero"); return {Mc, Nc, Kc}; } Index: test/ScheduleOptimizer/ensure-correct-tile-sizes.ll =================================================================== --- /dev/null +++ test/ScheduleOptimizer/ensure-correct-tile-sizes.ll @@ -0,0 +1,84 @@ +; Test that Polly does not crash due to configurations that can lead to +; incorrect tile size computations. +; The parameters are setup such that Car in `getMacroKernelParams` is evaluated +; to 0. +; +; RUN: opt -polly-process-unprofitable -polly-remarks-minimal -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-throughput-vector-fma=1 -polly-target-latency-vector-fma=1 -polly-ast -polly-target-vector-register-bitwidth=4096 -polly-target-1st-cache-level-associativity=3 < %s +; +; static const int N = 3000; +; +; void f(int A[N][N], int B[N][N], int C[N][N]) { +; +; for (int i = 0; i < N; i++) { +; for (int j = 0; j < N; j++) { +; A[i][j] = 0; +; for (int k = 0; k < N; k++) { +; A[i][j] += B[i][k] * C[k][j]; +; } +; } +; } +; } +; +source_filename = "testbed.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f([3000 x i32]* %A, [3000 x i32]* %B, [3000 x i32]* %C) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc24, %entry + %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc24 ], [ 0, %entry ] + %exitcond6 = icmp ne i64 %indvars.iv4, 3000 + br i1 %exitcond6, label %for.body, label %for.end26 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc21, %for.body + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc21 ], [ 0, %for.body ] + %exitcond3 = icmp ne i64 %indvars.iv1, 3000 + br i1 %exitcond3, label %for.body3, label %for.end23 + +for.body3: ; preds = %for.cond1 + %arrayidx5 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %indvars.iv4, i64 %indvars.iv1 + store i32 0, i32* %arrayidx5, align 4 + br label %for.cond6 + +for.cond6: ; preds = %for.inc, %for.body3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body3 ] + %exitcond = icmp ne i64 %indvars.iv, 3000 + br i1 %exitcond, label %for.body8, label %for.end + +for.body8: ; preds = %for.cond6 + %arrayidx12 = getelementptr inbounds [3000 x i32], [3000 x i32]* %B, i64 %indvars.iv4, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx12, align 4 + %arrayidx16 = getelementptr inbounds [3000 x i32], [3000 x i32]* %C, i64 %indvars.iv, i64 %indvars.iv1 + %tmp7 = load i32, i32* %arrayidx16, align 4 + %mul = mul nsw i32 %tmp, %tmp7 + %arrayidx20 = getelementptr inbounds [3000 x i32], [3000 x i32]* %A, i64 %indvars.iv4, i64 %indvars.iv1 + %tmp8 = load i32, i32* %arrayidx20, align 4 + %add = add nsw i32 %tmp8, %mul + store i32 %add, i32* %arrayidx20, align 4 + br label %for.inc + +for.inc: ; preds = %for.body8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond6 + +for.end: ; preds = %for.cond6 + br label %for.inc21 + +for.inc21: ; preds = %for.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond1 + +for.end23: ; preds = %for.cond1 + br label %for.inc24 + +for.inc24: ; preds = %for.end23 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + br label %for.cond + +for.end26: ; preds = %for.cond + ret void +}