Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -2119,14 +2119,28 @@ joinOrderedPHIs(UnionFind, ModeledInsts); // The list of instructions for statement (statement represented by the leader - // instruction). The order of statements instructions is reversed such that - // the epilogue is first. This makes it easier to ensure that the epilogue is - // the last statement. + // instruction). MapVector> LeaderToInstList; + // Order of instruction groups needs to be preserved wrto the original order + // of ordered instructions in basic block, rather than order of leadership + // instructions. We scan the ordered instructions and insert elements for + // the leaders in a standalone loop. + for (Instruction &Inst : *BB) { + if (!isOrderedInstruction(&Inst)) + continue; + + auto LeaderIt = UnionFind.findLeader(&Inst); + if (LeaderIt == UnionFind.member_end()) + continue; + + // Insert elment for the leader instruction. + (void)LeaderToInstList[*LeaderIt]; + } + // Collect the instructions of all leaders. UnionFind's member iterator // unfortunately are not in any specific order. - for (Instruction &Inst : reverse(*BB)) { + for (Instruction &Inst : *BB) { auto LeaderIt = UnionFind.findLeader(&Inst); if (LeaderIt == UnionFind.member_end()) continue; @@ -2140,13 +2154,12 @@ // Finally build the statements. int Count = 0; long BBIdx = scop->getNextStmtIdx(); - for (auto &Instructions : reverse(LeaderToInstList)) { + for (auto &Instructions : LeaderToInstList) { std::vector &InstList = Instructions.second; // If there is no main instruction, make the first statement the main. bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0); - std::reverse(InstList.begin(), InstList.end()); std::string Name = makeStmtName(BB, BBIdx, Count, IsMain); scop->addScopStmt(BB, Name, L, std::move(InstList)); Count += 1; Index: test/ScopInfo/preserve-equiv-class-order-in-basic_block.ll =================================================================== --- /dev/null +++ test/ScopInfo/preserve-equiv-class-order-in-basic_block.ll @@ -0,0 +1,110 @@ +; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" + +$"??_C@_03NBBHOIGN@?$CFX?6?$AA@" = comdat any + +@b = dso_local local_unnamed_addr global i32 1, align 4 +@e = dso_local local_unnamed_addr global i32 3, align 4 +@a = common dso_local local_unnamed_addr global [56 x i32] zeroinitializer, align 16 +@f = common dso_local local_unnamed_addr global i16 0, align 2 +@d = common dso_local local_unnamed_addr global i8 0, align 1 +@"??_C@_03NBBHOIGN@?$CFX?6?$AA@" = linkonce_odr dso_local unnamed_addr constant [4 x i8] c"%X\0A\00", comdat, align 1 + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body, %entry.split + %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [56 x i32], [56 x i32]* @a, i64 0, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4, !tbaa !3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 56 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %1 = load i32, i32* @e, align 4, !tbaa !3 + store i32 2, i32* @e, align 4, !tbaa !3 + %2 = trunc i32 %1 to i16 + %conv = and i16 %2, 1 + %tobool = icmp eq i16 %conv, 0 + br label %for.body3 + +for.body3: ; preds = %for.end, %for.inc11 + %storemerge20 = phi i32 [ 2, %for.end ], [ %dec12, %for.inc11 ] + %3 = load i8, i8* @d, align 1 + %cmp6 = icmp eq i8 %3, 8 + %or.cond = or i1 %tobool, %cmp6 + br i1 %or.cond, label %for.inc11, label %for.inc11.loopexit + +for.inc11.loopexit: ; preds = %for.body3 + store i32 0, i32* @b, align 4, !tbaa !3 + store i8 8, i8* @d, align 1, !tbaa !7 + br label %for.inc11 + +for.inc11: ; preds = %for.inc11.loopexit, %for.body3 + %dec12 = add nsw i32 %storemerge20, -1 + %cmp2 = icmp sgt i32 %storemerge20, -18 + br i1 %cmp2, label %for.body3, label %for.end13 + +for.end13: ; preds = %for.inc11 + store i16 %conv, i16* @f, align 2, !tbaa !8 + store i32 -19, i32* @e, align 4, !tbaa !3 + %4 = load i32, i32* @b, align 4, !tbaa !3 + tail call void bitcast (void (...)* @printf to void (i8*, i32)*)(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"??_C@_03NBBHOIGN@?$CFX?6?$AA@", i64 0, i64 0), i32 %4) #2 + ret i32 0 +} + +declare dso_local void @printf(...) local_unnamed_addr #1 + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 8.0.0 (trunk 334718) (llvm/trunk 338031)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!5, !5, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"short", !5, i64 0} + +; CHECK: Stmt_for_end_a +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_for_end_a[] }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_for_end_a[] -> [1, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_end_a[] -> MemRef_e[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_end_a[] -> MemRef_tobool[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_end_a[] -> MemRef_conv[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %1 = load i32, i32* @e, align 4, !tbaa !3 +; CHECK-NEXT: %2 = trunc i32 %1 to i16 +; CHECK-NEXT: %conv = and i16 %2, 1 +; CHECK-NEXT: %tobool = icmp eq i16 %conv, 0 +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_for_end +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_for_end[] }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_for_end[] -> [2, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_end[] -> MemRef_e[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: store i32 2, i32* @e, align 4, !tbaa !3 +; CHECK-NEXT: }