This is an archive of the discontinued LLVM Phabricator instance.

[MemCpyOptimizer] Change required analysis order for BasicAA/PhiValuesAnalysis
ClosedPublic

Authored by dmgreen on Sep 2 2020, 6:58 AM.

Details

Summary

This is a followup to D74494, which made a number of changes including the apparently innocuous reordering of required passes in MemCpyOptimizer. This however altered the creation order of BasicAA vs Phi Values analysis, meaning BasicAA did not pick up PhiValues as a cached result. Instead if we require MemoryDependence first it will require PhiValuesAnalysis allowing BasicAA to use it for better results.

I don't claim this is an excellent design, but it fixes a nasty little regressions we got from D74494 where a query later in JumpThreading was getting worse results.

Diff Detail

Event Timeline

dmgreen created this revision.Sep 2 2020, 6:58 AM
Herald added a project: Restricted Project. · View Herald TranscriptSep 2 2020, 6:58 AM
dmgreen requested review of this revision.Sep 2 2020, 6:58 AM
nikic added a comment.Sep 2 2020, 8:48 AM

Is it feasible to add a phase ordering test case for this?

asbirlea accepted this revision.Sep 2 2020, 12:11 PM

I realized I accidentally reordered passes, but didn't realize the impact of not using phi values in basicaa.

This revision is now accepted and ready to land.Sep 2 2020, 12:11 PM

Thanks

Is it feasible to add a phase ordering test case for this?

I had a go at creating a test. The original is a fairly large printf style state machine, and the reduced case doesn't look a lot better to me. It's easy enough to show that there are differences, but the test feels like it will be more trouble than it's worth! It would look something like this, let me know if you think it would still be useful:

diff --git a/llvm/test/Transforms/PhaseOrdering/phivaluesorder.ll b/llvm/test/Transforms/PhaseOrdering/phivaluesorder.ll
index e2ad840970a..b8f9de2df12 100644
--- a/llvm/test/Transforms/PhaseOrdering/phivaluesorder.ll
+++ b/llvm/test/Transforms/PhaseOrdering/phivaluesorder.ll
@@ -1,98 +1,102 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -O2 -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
 
 define void @q(i8* %s) {
 ; CHECK-LABEL: @q(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[E:%.*]] = alloca [2 x i32], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[S:%.*]], align 1
 ; CHECK-NEXT:    [[TOBOOL_NOT4:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT4]], label [[FOR_COND_PREHEADER:%.*]], label [[SW_BB_I:%.*]]
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT4]], label [[FOR_COND_PREHEADER:%.*]], label [[SW_BB_I_PREHEADER_PREHEADER:%.*]]
+; CHECK:       sw.bb.i.preheader.preheader:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[E]], i32 0, i32 1
+; CHECK-NEXT:    br label [[SW_BB_I:%.*]]
 ; CHECK:       sw.bb.i:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[DOTBE:%.*]], [[SW_BB_I_BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[K_015_I:%.*]] = phi i8* [ [[K_015_I_BE:%.*]], [[SW_BB_I_BACKEDGE]] ], [ [[S]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP0]], [[SW_BB_I_PREHEADER_PREHEADER]] ], [ [[DOTBE:%.*]], [[SW_BB_I_BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[K_015_I:%.*]] = phi i8* [ [[S]], [[SW_BB_I_PREHEADER_PREHEADER]] ], [ [[K_015_I_BE:%.*]], [[SW_BB_I_BACKEDGE]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 57
 ; CHECK-NEXT:    [[TOBOOL4_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL4_NOT_I]], label [[FOR_INC_I:%.*]], label [[FOR_INC_THREAD_I:%.*]]
-; CHECK:       for.inc.thread.i:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* inttoptr (i32 4 to i32*), align 4
-; CHECK-NEXT:    [[INC_I:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[INC_I]], i32* inttoptr (i32 4 to i32*), align 4
-; CHECK-NEXT:    [[INCDEC_PTR18_I:%.*]] = getelementptr inbounds i8, i8* [[K_015_I]], i32 1
-; CHECK-NEXT:    br label [[P_EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[TOBOOL4_NOT_I]], label [[FOR_INC_I:%.*]], label [[P_EXIT:%.*]]
 ; CHECK:       for.inc.i:
 ; CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds i8, i8* [[K_015_I]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[INCDEC_PTR_I]], align 1
-; CHECK-NEXT:    [[TOBOOL_NOT_I_NOT:%.*]] = icmp eq i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT_I_NOT]], label [[P_EXIT]], label [[SW_BB_I_BACKEDGE]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* [[INCDEC_PTR_I]], align 1
+; CHECK-NEXT:    [[TOBOOL_NOT_I_NOT:%.*]] = icmp eq i8 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT_I_NOT]], label [[P_EXIT_THREAD:%.*]], label [[SW_BB_I_BACKEDGE]]
 ; CHECK:       sw.bb.i.backedge:
-; CHECK-NEXT:    [[DOTBE]] = phi i8 [ [[TMP4]], [[FOR_INC_I]] ], [ [[TMP6:%.*]], [[P_EXIT]] ]
-; CHECK-NEXT:    [[K_015_I_BE]] = phi i8* [ [[INCDEC_PTR_I]], [[FOR_INC_I]] ], [ [[K_0_LCSSA_I:%.*]], [[P_EXIT]] ]
+; CHECK-NEXT:    [[DOTBE]] = phi i8 [ [[TMP3]], [[FOR_INC_I]] ], [ [[DOTPR:%.*]], [[P_EXIT]] ]
+; CHECK-NEXT:    [[K_015_I_BE]] = phi i8* [ [[INCDEC_PTR_I]], [[FOR_INC_I]] ], [ [[INCDEC_PTR18_I:%.*]], [[P_EXIT]] ]
 ; CHECK-NEXT:    br label [[SW_BB_I]]
+; CHECK:       p.exit.thread:
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[E]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[INC8:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    store i32 [[INC8]], i32* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    br label [[FOR_COND_PREHEADER]]
 ; CHECK:       p.exit:
-; CHECK-NEXT:    [[M_0_LCSSA_I:%.*]] = phi i32 [ 1, [[FOR_INC_THREAD_I]] ], [ 0, [[FOR_INC_I]] ]
-; CHECK-NEXT:    [[K_0_LCSSA_I]] = phi i8* [ [[INCDEC_PTR18_I]], [[FOR_INC_THREAD_I]] ], [ [[INCDEC_PTR_I]], [[FOR_INC_I]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[E]], i32 0, i32 [[M_0_LCSSA_I]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* inttoptr (i32 4 to i32*), align 4
+; CHECK-NEXT:    [[INC_I:%.*]] = add nsw i32 [[TMP5]], 1
+; CHECK-NEXT:    store i32 [[INC_I]], i32* inttoptr (i32 4 to i32*), align 4
+; CHECK-NEXT:    [[INCDEC_PTR18_I]] = getelementptr inbounds i8, i8* [[K_015_I]], i32 1
+; CHECK-NEXT:    [[DOTPR]] = load i8, i8* [[INCDEC_PTR18_I]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
 ; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP6]] = load i8, i8* [[K_0_LCSSA_I]], align 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP6]], 0
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8 [[DOTPR]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_COND_PREHEADER]], label [[SW_BB_I_BACKEDGE]]
 ; CHECK:       for.cond.preheader:
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ;
 entry:
   %e = alloca [2 x i32], align 4
   %0 = bitcast [2 x i32]* %e to i8*
   %1 = load i8, i8* %s, align 1
   %tobool.not4 = icmp eq i8 %1, 0
   br i1 %tobool.not4, label %for.cond.preheader, label %sw.bb.i.preheader
 
 sw.bb.i.preheader:                                ; preds = %entry, %p.exit
   %2 = phi i8 [ %8, %p.exit ], [ %1, %entry ]
   %g.05 = phi i8* [ %k.0.lcssa.i, %p.exit ], [ %s, %entry ]
   br label %sw.bb.i
 
 for.cond.preheader:                               ; preds = %p.exit, %entry
   br label %for.cond
 
 sw.bb.i:                                          ; preds = %sw.bb.i.preheader, %for.inc.i
   %3 = phi i8 [ %6, %for.inc.i ], [ %2, %sw.bb.i.preheader ]
   %k.015.i = phi i8* [ %incdec.ptr.i, %for.inc.i ], [ %g.05, %sw.bb.i.preheader ]
   %4 = and i8 %3, 57
   %tobool4.not.i = icmp eq i8 %4, 0
   br i1 %tobool4.not.i, label %for.inc.i, label %for.inc.thread.i
 
 for.inc.thread.i:                                 ; preds = %sw.bb.i
   %5 = load i32, i32* inttoptr (i32 4 to i32*), align 4
   %inc.i = add nsw i32 %5, 1
   store i32 %inc.i, i32* inttoptr (i32 4 to i32*), align 4
   %incdec.ptr18.i = getelementptr inbounds i8, i8* %k.015.i, i32 1
   br label %p.exit
 
 for.inc.i:                                        ; preds = %sw.bb.i
   %incdec.ptr.i = getelementptr inbounds i8, i8* %k.015.i, i32 1
   %6 = load i8, i8* %incdec.ptr.i, align 1
   %tobool.not.i.not = icmp eq i8 %6, 0
   br i1 %tobool.not.i.not, label %p.exit, label %sw.bb.i
 
 p.exit:                                           ; preds = %for.inc.i, %for.inc.thread.i
   %m.0.lcssa.i = phi i32 [ 1, %for.inc.thread.i ], [ 0, %for.inc.i ]
   %k.0.lcssa.i = phi i8* [ %incdec.ptr18.i, %for.inc.thread.i ], [ %incdec.ptr.i, %for.inc.i ]
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %e, i32 0, i32 %m.0.lcssa.i
   %7 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %arrayidx, align 4
   %8 = load i8, i8* %k.0.lcssa.i, align 1
   %tobool.not = icmp eq i8 %8, 0
   br i1 %tobool.not, label %for.cond.preheader, label %sw.bb.i.preheader
 
 for.cond:                                         ; preds = %for.cond.preheader, %for.cond
   br label %for.cond
 }