Index: include/llvm/Transforms/Scalar/SimplifyCFG.h
===================================================================
--- include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -31,16 +31,16 @@
   SimplifyCFGOptions Options;
 
 public:
-  /// The default constructor sets the pass options to create optimal IR,
-  /// rather than canonical IR. That is, by default we do transformations that
-  /// are likely to improve performance but make analysis more difficult.
-  /// FIXME: This is inverted from what most instantiations of the pass should
-  /// be.
+  /// The default constructor sets the pass options to create canonical IR,
+  /// rather than optimal IR. That is, by default we bypass transformations that
+  /// are likely to improve performance but make analysis for other passes more
+  /// difficult.
   SimplifyCFGPass()
       : SimplifyCFGPass(SimplifyCFGOptions()
-                            .forwardSwitchCondToPhi(true)
-                            .convertSwitchToLookupTable(true)
-                            .needCanonicalLoops(false)) {}
+                            .forwardSwitchCondToPhi(false)
+                            .convertSwitchToLookupTable(false)
+                            .needCanonicalLoops(true)) {}
+
 
   /// Construct a pass with optional optimizations.
   SimplifyCFGPass(const SimplifyCFGOptions &PassOptions);
Index: lib/Passes/PassBuilder.cpp
===================================================================
--- lib/Passes/PassBuilder.cpp
+++ lib/Passes/PassBuilder.cpp
@@ -751,8 +751,13 @@
   // Optimize parallel scalar instruction chains into SIMD instructions.
   OptimizePM.addPass(SLPVectorizerPass());
 
-  // Cleanup after all of the vectorizers.
-  OptimizePM.addPass(SimplifyCFGPass());
+  // Cleanup after all of the vectorizers. Simplification passes like CVP and
+  // GVN, loop transforms, and others have already run, so it's now better to
+  // convert to more optimized IR using more aggressive simplify CFG options.
+  OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
+                                         forwardSwitchCondToPhi(true).
+                                         convertSwitchToLookupTable(true).
+                                         needCanonicalLoops(false)));
   OptimizePM.addPass(InstCombinePass());
 
   // Unroll small loops to hide loop backedge latency and saturate any parallel
Index: test/Transforms/PhaseOrdering/simplifycfg-options.ll
===================================================================
--- test/Transforms/PhaseOrdering/simplifycfg-options.ll
+++ test/Transforms/PhaseOrdering/simplifycfg-options.ll
@@ -1,63 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -O1 -S < %s                    | FileCheck %s --check-prefix=OLDPM
-; RUN: opt -passes='default<O1>' -S < %s  | FileCheck %s --check-prefix=NEWPM
+; RUN: opt -O1 -S < %s                    | FileCheck %s --check-prefix=ALL --check-prefix=OLDPM
+; RUN: opt -passes='default<O1>' -S < %s  | FileCheck %s --check-prefix=ALL --check-prefix=NEWPM
 
 ; Don't simplify unconditional branches from empty blocks in simplifyCFG
 ; until late in the pipeline because it can destroy canonical loop structure.
 
-; FIXME: The new pass manager is not limiting simplifycfg at any point in the pipeline,
-; so it performs a transformation before loop optimizations that is avoided in the old PM.
-
 define i1 @PR33605(i32 %a, i32 %b, i32* %c) {
-; OLDPM-LABEL: @PR33605(
-; OLDPM-NEXT:  for.body:
-; OLDPM-NEXT:    [[OR:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
-; OLDPM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1
-; OLDPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; OLDPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], [[TMP0]]
-; OLDPM-NEXT:    br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; OLDPM:       if.then:
-; OLDPM-NEXT:    store i32 [[OR]], i32* [[ARRAYIDX]], align 4
-; OLDPM-NEXT:    tail call void @foo()
-; OLDPM-NEXT:    br label [[IF_END]]
-; OLDPM:       if.end:
-; OLDPM-NEXT:    [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[FOR_BODY:%.*]] ]
-; OLDPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[C]], align 4
-; OLDPM-NEXT:    [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]]
-; OLDPM-NEXT:    br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]]
-; OLDPM:       if.then.1:
-; OLDPM-NEXT:    store i32 [[OR]], i32* [[C]], align 4
-; OLDPM-NEXT:    tail call void @foo()
-; OLDPM-NEXT:    br label [[IF_END_1]]
-; OLDPM:       if.end.1:
-; OLDPM-NEXT:    [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ]
-; OLDPM-NEXT:    ret i1 [[CHANGED_1_OFF0_1]]
-;
-; NEWPM-LABEL: @PR33605(
-; NEWPM-NEXT:  entry:
-; NEWPM-NEXT:    [[OR:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
-; NEWPM-NEXT:    br label [[FOR_COND_OUTER:%.*]]
-; NEWPM:       for.cond.outer:
-; NEWPM-NEXT:    [[I_0_PH:%.*]] = phi i32 [ [[DEC:%.*]], [[IF_THEN:%.*]] ], [ 2, [[ENTRY:%.*]] ]
-; NEWPM-NEXT:    [[CHANGED_0_OFF0_PH:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[ENTRY]] ]
-; NEWPM-NEXT:    br label [[FOR_COND:%.*]]
-; NEWPM:       for.cond:
-; NEWPM-NEXT:    [[I_0:%.*]] = phi i32 [ [[DEC]], [[FOR_BODY:%.*]] ], [ [[I_0_PH]], [[FOR_COND_OUTER]] ]
-; NEWPM-NEXT:    [[DEC]] = add nsw i32 [[I_0]], -1
-; NEWPM-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[I_0]], 0
-; NEWPM-NEXT:    br i1 [[TOBOOL]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; NEWPM:       for.cond.cleanup:
-; NEWPM-NEXT:    ret i1 [[CHANGED_0_OFF0_PH]]
-; NEWPM:       for.body:
-; NEWPM-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DEC]] to i64
-; NEWPM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[IDXPROM]]
-; NEWPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; NEWPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], [[TMP0]]
-; NEWPM-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_THEN]]
-; NEWPM:       if.then:
-; NEWPM-NEXT:    store i32 [[OR]], i32* [[ARRAYIDX]], align 4
-; NEWPM-NEXT:    tail call void @foo()
-; NEWPM-NEXT:    br label [[FOR_COND_OUTER]]
+; ALL-LABEL: @PR33605(
+; ALL-NEXT:  for.body:
+; ALL-NEXT:    [[OR:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
+; ALL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1
+; ALL-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], [[TMP0]]
+; ALL-NEXT:    br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; ALL:       if.then:
+; ALL-NEXT:    store i32 [[OR]], i32* [[ARRAYIDX]], align 4
+; ALL-NEXT:    tail call void @foo()
+; ALL-NEXT:    br label [[IF_END]]
+; ALL:       if.end:
+; ALL-NEXT:    [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[FOR_BODY:%.*]] ]
+; ALL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[C]], align 4
+; ALL-NEXT:    [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]]
+; ALL-NEXT:    br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]]
+; ALL:       if.then.1:
+; ALL-NEXT:    store i32 [[OR]], i32* [[C]], align 4
+; ALL-NEXT:    tail call void @foo()
+; ALL-NEXT:    br label [[IF_END_1]]
+; ALL:       if.end.1:
+; ALL-NEXT:    [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ]
+; ALL-NEXT:    ret i1 [[CHANGED_1_OFF0_1]]
 ;
 entry:
   br label %for.cond