Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -579,7 +579,8 @@ UDomain = isl_union_set_from_set(getStatement()->getDomain()); USchedule = isl_union_map_intersect_domain(USchedule, UDomain); Schedule = isl_map_from_union_map(USchedule); - ScheduledAccRel = isl_map_apply_domain(getAddressFunction(), Schedule); + ScheduledAccRel = isl_map_apply_domain(getAccessRelation(), Schedule); + ScheduledAccRel = isl_map_lexmin(ScheduledAccRel); return isl_pw_multi_aff_from_map(ScheduledAccRel); } Index: test/Isl/CodeGen/pattern-matching-based-opts.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/pattern-matching-based-opts.ll @@ -0,0 +1,113 @@ +; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-througput-vector-fma=1 -polly-target-latency-vector-fma=7 -polly-target-cache-level-associativity=8,8 -polly-target-cache-level-sizes=32768,262144 -polly-codegen -S < %s | FileCheck %s +; +; /* C := alpha*A*B + beta*C */ +; /* _PB_NK % Kc != 0 */ +; for (i = 0; i < _PB_NI; i++) +; for (j = 0; j < _PB_NJ; j++) +; { +; C[i][j] *= beta; +; for (k = 0; k < _PB_NK; ++k) +; C[i][j] += alpha * A[i][k] * B[k][j]; +; } +; +; CHECK:polly.loop_header121: ; preds = %polly.loop_header121, %polly.loop_preheader122 +; CHECK-NEXT: %polly.indvar125 = phi i64 [ %47, %polly.loop_preheader122 ], [ %polly.indvar_next126, %polly.loop_header121 ] +; CHECK-NEXT: %polly.access.cast.arg6129 = bitcast [1023 x double]* %arg6 to double* +; CHECK-NEXT: %polly.access.mul.arg6130 = mul nsw i64 %polly.indvar116, 1023 +; CHECK-NEXT: %polly.access.add.arg6131 = add nsw i64 %polly.access.mul.arg6130, %polly.indvar125 +; CHECK-NEXT: %polly.access.arg6132 = getelementptr double, double* %polly.access.cast.arg6129, i64 %polly.access.add.arg6131 +; CHECK-NEXT: %polly.access.arg6132.load = load double, double* %polly.access.arg6132 +; CHECK-NEXT: %polly.access.cast.Packed_A = bitcast [8160 x [3 x double]]* %Packed_A to double* +; CHECK-NEXT: %52 = mul nsw i64 -170, %polly.indvar83 +; CHECK-NEXT: %53 = mul nsw i64 8160, %polly.indvar108 +; CHECK-NEXT: %54 = sub nsw i64 %52, %53 +; CHECK-NEXT: %55 = mul nsw i64 170, %polly.indvar116 +; CHECK-NEXT: %56 = add nsw i64 %54, %55 +; CHECK-NEXT: %57 = add nsw i64 %56, %polly.indvar125 +; CHECK-NEXT: %58 = add nsw i64 %polly.indvar116, 1 +; CHECK-NEXT: %pexp.p_div_q133 = udiv i64 %58, 3 +; CHECK-NEXT: %59 = mul nsw i64 170, %pexp.p_div_q133 +; CHECK-NEXT: %60 = add nsw i64 %57, %59 +; CHECK-NEXT: %61 = sub nsw i64 0, %polly.indvar116 +; CHECK-NEXT: %62 = add nsw i64 %61, 1057 +; CHECK-NEXT: %pexp.pdiv_r134 = urem i64 %62, 3 +; CHECK-NEXT: %63 = mul nsw i64 167, %polly.indvar116 +; CHECK-NEXT: %64 = add nsw i64 %pexp.pdiv_r134, %63 +; CHECK-NEXT: %65 = add nsw i64 %64, 165 +; CHECK-NEXT: %pexp.p_div_q135 = udiv i64 %65, 167 +; CHECK-NEXT: %66 = mul nsw i64 170, %pexp.p_div_q135 +; CHECK-NEXT: %67 = sub nsw i64 %60, %66 +; CHECK-NEXT: %polly.access.mul.Packed_A = mul nsw i64 %67, 3 +; CHECK-NEXT: %68 = sub nsw i64 0, %polly.indvar116 +; CHECK-NEXT: %69 = add nsw i64 %68, 1057 +; CHECK-NEXT: %pexp.pdiv_r136 = urem i64 %69, 3 +; CHECK-NEXT: %70 = sub nsw i64 0, %pexp.pdiv_r136 +; CHECK-NEXT: %71 = mul nsw i64 3, %polly.indvar116 +; CHECK-NEXT: %72 = sub nsw i64 %70, %71 +; CHECK-NEXT: %73 = sub nsw i64 0, %polly.indvar116 +; CHECK-NEXT: %74 = add nsw i64 %73, 1057 +; CHECK-NEXT: %pexp.pdiv_r137 = urem i64 %74, 3 +; CHECK-NEXT: %75 = mul nsw i64 167, %polly.indvar116 +; CHECK-NEXT: %76 = add nsw i64 %pexp.pdiv_r137, %75 +; CHECK-NEXT: %77 = add nsw i64 %76, 165 +; CHECK-NEXT: %pexp.p_div_q138 = udiv i64 %77, 167 +; CHECK-NEXT: %78 = mul nsw i64 3, %pexp.p_div_q138 +; CHECK-NEXT: %79 = add nsw i64 %72, %78 +; CHECK-NEXT: %80 = add nsw i64 %79, 1 +; CHECK-NEXT: %polly.access.add.Packed_A = add nsw i64 %polly.access.mul.Packed_A, %80 +; CHECK-NEXT: %polly.access.Packed_A = getelementptr double, double* %polly.access.cast.Packed_A, i64 %polly.access.add.Packed_A +; CHECK-NEXT: store double %polly.access.arg6132.load, double* %polly.access.Packed_A +; CHECK-NEXT: %polly.indvar_next126 = add nsw i64 %polly.indvar125, 1 +; CHECK-NEXT: %polly.adjust_ub127 = sub i64 %51, 1 +; CHECK-NEXT: %polly.loop_cond128 = icmp sle i64 %polly.indvar125, %polly.adjust_ub127 +; CHECK-NEXT: br i1 %polly.loop_cond128, label %polly.loop_header121, label %polly.loop_exit123 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1023 x double]* %arg6, [1056 x double]* %arg7) #0 { +bb: + br label %bb8 + +bb8: ; preds = %bb29, %bb + %tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ] + br label %bb9 + +bb9: ; preds = %bb26, %bb8 + %tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ] + %tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10 + %tmp12 = load double, double* %tmp11, align 8 + %tmp13 = fmul double %tmp12, %arg4 + store double %tmp13, double* %tmp11, align 8 + br label %Copy_0 + +Copy_0: ; preds = %Copy_0, %bb9 + %tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ] + %tmp16 = getelementptr inbounds [1023 x double], [1023 x double]* %arg6, i64 %tmp, i64 %tmp15 + %tmp17 = load double, double* %tmp16, align 8 + %tmp18 = fmul double %tmp17, %arg3 + %tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10 + %tmp20 = load double, double* %tmp19, align 8 + %tmp21 = fmul double %tmp18, %tmp20 + %tmp22 = load double, double* %tmp11, align 8 + %tmp23 = fadd double %tmp22, %tmp21 + store double %tmp23, double* %tmp11, align 8 + %tmp24 = add nuw nsw i64 %tmp15, 1 + %tmp25 = icmp ne i64 %tmp24, 1023 + br i1 %tmp25, label %Copy_0, label %bb26 + +bb26: ; preds = %Copy_0 + %tmp27 = add nuw nsw i64 %tmp10, 1 + %tmp28 = icmp ne i64 %tmp27, 1056 + br i1 %tmp28, label %bb9, label %bb29 + +bb29: ; preds = %bb26 + %tmp30 = add nuw nsw i64 %tmp, 1 + %tmp31 = icmp ne i64 %tmp30, 1056 + br i1 %tmp31, label %bb8, label %bb32 + +bb32: ; preds = %bb29 + ret void +} + +attributes #0 = { nounwind uwtable "target-cpu"="x86-64" "target-features"="+aes,+avx,+cmov,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" }