diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1792,7 +1792,7 @@
 template <typename T, typename R, typename Predicate>
 T *find_singleton(R &&Range, Predicate P, bool AllowRepeats = false) {
   T *RC = nullptr;
-  for (auto *A : Range) {
+  for (auto &&A : Range) {
     if (T *PRC = P(A, AllowRepeats)) {
       if (RC) {
         if (!AllowRepeats || PRC != RC)
diff --git a/mlir/include/mlir/Conversion/ControlFlowToSCF/ControlFlowToSCF.h b/mlir/include/mlir/Conversion/ControlFlowToSCF/ControlFlowToSCF.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Conversion/ControlFlowToSCF/ControlFlowToSCF.h
@@ -0,0 +1,26 @@
+//===- ControlFlowToSCF.h - ControlFlow to SCF -------------*- C++ ------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Define conversions from the ControlFlow dialect to the SCF dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_CONTROLFLOWTOSCF_CONTROLFLOWTOSCF_H
+#define MLIR_CONVERSION_CONTROLFLOWTOSCF_CONTROLFLOWTOSCF_H
+
+#include <memory>
+
+namespace mlir {
+class Pass;
+
+#define GEN_PASS_DECL_LIFTCONTROLFLOWTOSCFPASS
+#include "mlir/Conversion/Passes.h.inc"
+
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_CONTROLFLOWTOSCF_CONTROLFLOWTOSCF_H
diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h
--- a/mlir/include/mlir/Conversion/Passes.h
+++ b/mlir/include/mlir/Conversion/Passes.h
@@ -22,6 +22,7 @@
 #include "mlir/Conversion/ComplexToSPIRV/ComplexToSPIRVPass.h"
 #include "mlir/Conversion/ComplexToStandard/ComplexToStandard.h"
 #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/ControlFlowToSCF/ControlFlowToSCF.h"
 #include "mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h"
 #include "mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.h"
 #include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h"
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -283,6 +283,31 @@
   ];
 }
 
+//===----------------------------------------------------------------------===//
+// ControlFlowToSCF
+//===----------------------------------------------------------------------===//
+
+def LiftControlFlowToSCFPass : Pass<"lift-cf-to-scf"> {
+  let summary = "Lift ControlFlow dialect to SCF dialect";
+  let description = [{
+    Lifts ControlFlow operations to SCF dialect operations.
+
+    This pass is prefixed with "lift" instead of "convert" as it is not always
+    guaranteed to replace all ControlFlow ops.
+    If a region contains only a single kind of return-like operation, all
+    ControlFlow operations will be replaced successfully.
+    Otherwise a single ControlFlow switch branching to one block per return-like
+    operation kind remains.
+  }];
+
+  let dependentDialects = ["scf::SCFDialect",
+                           "arith::ArithDialect",
+                           "ub::UBDialect",
+                           // TODO: This is only necessary until we have a
+                           //       ub.unreachable op.
+                           "func::FuncDialect"];
+}
+
 //===----------------------------------------------------------------------===//
 // ControlFlowToSPIRV
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/IR/Block.h b/mlir/include/mlir/IR/Block.h
--- a/mlir/include/mlir/IR/Block.h
+++ b/mlir/include/mlir/IR/Block.h
@@ -59,6 +59,10 @@
   /// the specified block.
   void insertBefore(Block *block);
 
+  /// Insert this block (which must not already be in a region) right after
+  /// the specified block.
+  void insertAfter(Block *block);
+
   /// Unlink this block from its current region and insert it right before the
   /// specific block.
   void moveBefore(Block *block);
diff --git a/mlir/include/mlir/IR/ValueRange.h b/mlir/include/mlir/IR/ValueRange.h
--- a/mlir/include/mlir/IR/ValueRange.h
+++ b/mlir/include/mlir/IR/ValueRange.h
@@ -167,6 +167,14 @@
     return operator OperandRange()[index];
   }
 
+  OperandRange::iterator begin() const {
+    return static_cast<OperandRange>(*this).begin();
+  }
+
+  OperandRange::iterator end() const {
+    return static_cast<OperandRange>(*this).end();
+  }
+
 private:
   /// Update the length of this range to the one provided.
   void updateLength(unsigned newLength);
diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
@@ -82,6 +82,11 @@
   /// Get the range of operands that are simply forwarded to the successor.
   OperandRange getForwardedOperands() const { return forwardedOperands; }
 
+  /// Get the range of operands that are simply forwarded to the successor.
+  MutableOperandRange getMutableForwardedOperands() const {
+    return forwardedOperands;
+  }
+
   /// Get a slice of the operands forwarded to the successor. The given range
   /// must not contain any operands produced by the operation.
   MutableOperandRange slice(unsigned subStart, unsigned subLen) const {
diff --git a/mlir/include/mlir/Transforms/CFGToSCF.h b/mlir/include/mlir/Transforms/CFGToSCF.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Transforms/CFGToSCF.h
@@ -0,0 +1,154 @@
+//===- CFGToSCF.h - Control Flow Graph to Structured Control Flow *- C++ -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines a generic `transformCFGToSCF` function that can be
+// used to lift any dialect operations implementing control flow graph
+// operations to any dialect implementing structured control flow operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TRANSFORMS_CFGTOSCF_H
+#define MLIR_TRANSFORMS_CFGTOSCF_H
+
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/Dominance.h"
+#include "mlir/IR/Operation.h"
+
+namespace mlir {
+
+/// Interface that should be implemented by any caller of `transformCFGToSCF`.
+/// The transformation requires the caller to 1) create switch-like control
+/// flow operations for intermediate transformations and 2) to create
+/// the desired structured control flow ops.
+class CFGToSCFInterface {
+public:
+  virtual ~CFGToSCFInterface() = default;
+
+  /// Creates a structured control flow operation branching to one of `regions`.
+  /// It replaces `controlFlowCondOp` and must have `resultTypes` as results.
+  /// `regions` contains the list of branch regions corresponding to each
+  /// successor of `controlFlowCondOp`. Their bodies must simply be taken and
+  /// left as is.
+  /// Returns failure if incapable of converting the control flow graph
+  /// operation.
+  virtual FailureOr<Operation *> createStructuredBranchRegionOp(
+      OpBuilder &builder, Operation *controlFlowCondOp, TypeRange resultTypes,
+      MutableArrayRef<Region> regions) = 0;
+
+  /// Creates a return-like terminator for a branch region of the op returned
+  /// by `createStructuredBranchRegionOp`. `branchRegionOp` is the operation
+  /// returned by `createStructuredBranchRegionOp` while `results` are the
+  /// values that should be returned by the branch region.
+  virtual LogicalResult
+  createStructuredBranchRegionTerminatorOp(Location loc, OpBuilder &builder,
+                                           Operation *branchRegionOp,
+                                           ValueRange results) = 0;
+
+  /// Creates a structured control flow operation representing a do-while loop.
+  /// The do-while loop is expected to have the exact same result types as the
+  /// types of the iteration values.
+  /// `loopBody` is the body of the loop. The implementation of this
+  /// function must create a suitable terminator op at the end of the last block
+  /// in `loopBody` which continues the loop if `condition` is 1 and exits the
+  /// loop if 0. `loopValuesNextIter` are the values that have to be passed as
+  /// the iteration values for the next iteration if continuing, or the result
+  /// of the loop if exiting.
+  /// `condition` is guaranteed to be of the same type as values returned by
+  /// `getCFGSwitchValue` with either 0 or 1 as value.
+  ///
+  /// `loopValuesInit` are the values used to initialize the iteration
+  /// values of the loop.
+  /// Returns failure if incapable of creating a loop op.
+  virtual FailureOr<Operation *> createStructuredDoWhileLoopOp(
+      OpBuilder &builder, Operation *replacedOp, ValueRange loopValuesInit,
+      Value condition, ValueRange loopValuesNextIter, Region &&loopBody) = 0;
+
+  /// Creates a constant operation with a result representing `value` that is
+  /// suitable as flag for `createCFGSwitchOp`.
+  virtual Value getCFGSwitchValue(Location loc, OpBuilder &builder,
+                                  unsigned value) = 0;
+
+  /// Creates a switch CFG branch operation branching to one of
+  /// `caseDestinations` or `defaultDest`. This is used by the transformation
+  /// for intermediate transformations before lifting to structured control
+  /// flow. The switch op branches based on `flag` which is guaranteed to be of
+  /// the same type as values returned by `getCFGSwitchValue`. Note:
+  /// `caseValues` and other related ranges may be empty to represent an
+  /// unconditional branch.
+  virtual void createCFGSwitchOp(Location loc, OpBuilder &builder, Value flag,
+                                 ArrayRef<unsigned> caseValues,
+                                 BlockRange caseDestinations,
+                                 ArrayRef<ValueRange> caseArguments,
+                                 Block *defaultDest,
+                                 ValueRange defaultArgs) = 0;
+
+  /// Creates a constant operation returning an undefined instance of `type`.
+  /// This is required by the transformation as the lifting process might create
+  /// control-flow paths where an SSA-value is undefined.
+  virtual Value getUndefValue(Location loc, OpBuilder &builder, Type type) = 0;
+
+  /// Creates a return-like terminator indicating unreachable.
+  /// This is required when the transformation encounters a statically known
+  /// infinite loop. Since structured control flow ops are not terminators,
+  /// after lifting an infinite loop, a terminator has to be placed after to
+  /// possibly satisfy the terminator requirement of the region originally
+  /// passed to `transformCFGToSCF`.
+  ///
+  /// `region` is guaranteed to be the region originally passed to
+  /// `transformCFGToSCF` and the op is guaranteed to always be an op in a block
+  /// directly nested under `region` after the transformation.
+  ///
+  /// Returns failure if incapable of creating an unreachable terminator.
+  virtual FailureOr<Operation *>
+  createUnreachableTerminator(Location loc, OpBuilder &builder,
+                              Region &region) = 0;
+
+  /// Helper function to create an unconditional branch using
+  /// `createCFGSwitchOp`.
+  void createSingleDestinationBranch(Location loc, OpBuilder &builder,
+                                     Value dummyFlag, Block *destination,
+                                     ValueRange arguments) {
+    createCFGSwitchOp(loc, builder, dummyFlag, {}, {}, {}, destination,
+                      arguments);
+  }
+
+  /// Helper function to create a conditional branch using
+  /// `createCFGSwitchOp`.
+  void createConditionalBranch(Location loc, OpBuilder &builder,
+                               Value condition, Block *trueDest,
+                               ValueRange trueArgs, Block *falseDest,
+                               ValueRange falseArgs) {
+    createCFGSwitchOp(loc, builder, condition, {0}, {falseDest}, {falseArgs},
+                      trueDest, trueArgs);
+  }
+};
+
+/// Transformation lifting any dialect implementing control flow graph
+/// operations to a dialect implementing structured control flow operations.
+/// `region` is the region that should be transformed.
+/// The implementation of `interface` is responsible for the conversion of the
+/// control flow operations to the structured control flow operations.
+///
+/// If the region contains only a single kind of return-like operation, all
+/// control flow graph operations will be converted successfully.
+/// Otherwise a single control flow graph operation branching to one block
+/// per return-like operation kind remains.
+///
+/// The transformation currently requires that all control flow graph operations
+/// have no side effects, implement the BranchOpInterface and does not have any
+/// operation produced successor operands.
+/// Returns failure if any of the preconditions are violated or if any of the
+/// methods of `interface` failed. The IR is left in an unspecified state.
+///
+/// Otherwise, returns true or false if any changes to the IR have been made.
+FailureOr<bool> transformCFGToSCF(Region &region, CFGToSCFInterface &interface,
+                                  DominanceInfo &dominanceInfo);
+
+} // namespace mlir
+
+#endif // MLIR_TRANSFORMS_CFGTOSCF_H
diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -12,6 +12,7 @@
 add_subdirectory(ComplexToSPIRV)
 add_subdirectory(ComplexToStandard)
 add_subdirectory(ControlFlowToLLVM)
+add_subdirectory(ControlFlowToSCF)
 add_subdirectory(ControlFlowToSPIRV)
 add_subdirectory(ConvertToLLVM)
 add_subdirectory(FuncToLLVM)
diff --git a/mlir/lib/Conversion/ControlFlowToSCF/CMakeLists.txt b/mlir/lib/Conversion/ControlFlowToSCF/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Conversion/ControlFlowToSCF/CMakeLists.txt
@@ -0,0 +1,23 @@
+add_mlir_conversion_library(MLIRControlFlowToSCF
+  ControlFlowToSCF.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ControlFlowToSCF
+
+  DEPENDS
+  MLIRConversionPassIncGen
+  intrinsics_gen
+
+  LINK_COMPONENTS
+  Core
+
+  LINK_LIBS PUBLIC
+  MLIRAnalysis
+  MLIRArithDialect
+  MLIRControlFlowDialect
+  MLIRFuncDialect
+  MLIRSCFDialect
+  MLIRUBDialect
+  MLIRPass
+  MLIRTransformUtils
+)
diff --git a/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp b/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp
@@ -0,0 +1,189 @@
+//===- ControlFlowToSCF.h - ControlFlow to SCF -------------*- C++ ------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Define conversions from the ControlFlow dialect to the SCF dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/ControlFlowToSCF/ControlFlowToSCF.h"
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/UB/IR/UBOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/CFGToSCF.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_LIFTCONTROLFLOWTOSCFPASS
+#include "mlir/Conversion/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+
+namespace {
+
+class ControlFlowToSCFTransformation : public CFGToSCFInterface {
+public:
+  FailureOr<Operation *> createStructuredBranchRegionOp(
+      OpBuilder &builder, Operation *controlFlowCondOp, TypeRange resultTypes,
+      MutableArrayRef<Region> regions) override {
+    if (auto condBrOp = dyn_cast<cf::CondBranchOp>(controlFlowCondOp)) {
+      assert(regions.size() == 2);
+      auto ifOp = builder.create<scf::IfOp>(
+          controlFlowCondOp->getLoc(), resultTypes, condBrOp.getCondition());
+      ifOp.getThenRegion().takeBody(regions[0]);
+      ifOp.getElseRegion().takeBody(regions[1]);
+      return ifOp.getOperation();
+    }
+
+    if (auto switchOp = dyn_cast<cf::SwitchOp>(controlFlowCondOp)) {
+      // `getCFGSwitchValue` returns an i32 that we need to convert to index
+      // fist.
+      auto cast = builder.create<arith::IndexCastUIOp>(
+          controlFlowCondOp->getLoc(), builder.getIndexType(),
+          switchOp.getFlag());
+      SmallVector<int64_t> cases;
+      if (auto caseValues = switchOp.getCaseValues())
+        llvm::append_range(
+            cases, llvm::map_range(*caseValues, [](const llvm::APInt &apInt) {
+              return apInt.getZExtValue();
+            }));
+
+      assert(regions.size() == cases.size() + 1);
+
+      auto indexSwitchOp = builder.create<scf::IndexSwitchOp>(
+          controlFlowCondOp->getLoc(), resultTypes, cast, cases, cases.size());
+
+      indexSwitchOp.getDefaultRegion().takeBody(regions[0]);
+      for (auto &&[targetRegion, sourceRegion] :
+           llvm::zip(indexSwitchOp.getCaseRegions(), llvm::drop_begin(regions)))
+        targetRegion.takeBody(sourceRegion);
+
+      return indexSwitchOp.getOperation();
+    }
+
+    controlFlowCondOp->emitOpError(
+        "Cannot convert unknown control flow op to structured control flow");
+    return failure();
+  }
+
+  LogicalResult
+  createStructuredBranchRegionTerminatorOp(Location loc, OpBuilder &builder,
+                                           Operation *branchRegionOp,
+                                           ValueRange results) override {
+    builder.create<scf::YieldOp>(loc, results);
+    return success();
+  }
+
+  FailureOr<Operation *>
+  createStructuredDoWhileLoopOp(OpBuilder &builder, Operation *replacedOp,
+                                ValueRange loopVariablesInit, Value condition,
+                                ValueRange loopVariablesNextIter,
+                                Region &&loopBody) override {
+    Location loc = replacedOp->getLoc();
+    auto whileOp = builder.create<scf::WhileOp>(
+        loc, loopVariablesInit.getTypes(), loopVariablesInit);
+
+    whileOp.getBefore().takeBody(loopBody);
+
+    builder.setInsertionPointToEnd(&whileOp.getBefore().back());
+    // `getCFGSwitchValue` returns a i32. We therefore need to truncate the
+    // condition to i1 first. It is guaranteed to be either 0 or 1 already.
+    builder.create<scf::ConditionOp>(
+        loc,
+        builder.create<arith::TruncIOp>(loc, builder.getI1Type(), condition),
+        loopVariablesNextIter);
+
+    auto *afterBlock = new Block;
+    whileOp.getAfter().push_back(afterBlock);
+    afterBlock->addArguments(
+        loopVariablesInit.getTypes(),
+        SmallVector<Location>(loopVariablesInit.size(), loc));
+    builder.setInsertionPointToEnd(afterBlock);
+    builder.create<scf::YieldOp>(loc, afterBlock->getArguments());
+
+    return whileOp.getOperation();
+  }
+
+  Value getCFGSwitchValue(Location loc, OpBuilder &builder,
+                          unsigned int value) override {
+    return builder.create<arith::ConstantOp>(loc,
+                                             builder.getI32IntegerAttr(value));
+  }
+
+  void createCFGSwitchOp(Location loc, OpBuilder &builder, Value flag,
+                         ArrayRef<unsigned int> caseValues,
+                         BlockRange caseDestinations,
+                         ArrayRef<ValueRange> caseArguments, Block *defaultDest,
+                         ValueRange defaultArgs) override {
+    builder.create<cf::SwitchOp>(loc, flag, defaultDest, defaultArgs,
+                                 llvm::to_vector_of<int32_t>(caseValues),
+                                 caseDestinations, caseArguments);
+  }
+
+  Value getUndefValue(Location loc, OpBuilder &builder, Type type) override {
+    return builder.create<ub::PoisonOp>(loc, type, nullptr);
+  }
+
+  FailureOr<Operation *> createUnreachableTerminator(Location loc,
+                                                     OpBuilder &builder,
+                                                     Region &region) override {
+
+    // TODO: This should create a `ub.unreachable` op. Once such an operation
+    //       exists to make the pass can be made independent of the func
+    //       dialect. For now just return poison values.
+    auto funcOp = dyn_cast<func::FuncOp>(region.getParentOp());
+    if (!funcOp)
+      return emitError(loc, "Expected '")
+             << func::FuncOp::getOperationName() << "' as top level operation";
+
+    return builder
+        .create<func::ReturnOp>(
+            loc, llvm::map_to_vector(funcOp.getResultTypes(),
+                                     [&](Type type) {
+                                       return getUndefValue(loc, builder, type);
+                                     }))
+        .getOperation();
+  }
+};
+
+struct LiftControlFlowToSCF
+    : public impl::LiftControlFlowToSCFPassBase<LiftControlFlowToSCF> {
+
+  using Base::Base;
+
+  void runOnOperation() override {
+    ControlFlowToSCFTransformation transformation;
+
+    bool changed = false;
+    WalkResult result = getOperation()->walk([&](func::FuncOp funcOp) {
+      if (funcOp.getBody().empty())
+        return WalkResult::advance();
+
+      FailureOr<bool> changedFunc = transformCFGToSCF(
+          funcOp.getBody(), transformation,
+          funcOp != getOperation() ? getChildAnalysis<DominanceInfo>(funcOp)
+                                   : getAnalysis<DominanceInfo>());
+      if (failed(changedFunc))
+        return WalkResult::interrupt();
+
+      changed |= *changedFunc;
+      return WalkResult::advance();
+    });
+    if (result.wasInterrupted())
+      return signalPassFailure();
+
+    if (!changed)
+      markAllAnalysesPreserved();
+  }
+};
+} // namespace
diff --git a/mlir/lib/IR/Block.cpp b/mlir/lib/IR/Block.cpp
--- a/mlir/lib/IR/Block.cpp
+++ b/mlir/lib/IR/Block.cpp
@@ -42,6 +42,12 @@
   block->getParent()->getBlocks().insert(block->getIterator(), this);
 }
 
+void Block::insertAfter(Block *block) {
+  assert(!getParent() && "already inserted into a block!");
+  assert(block->getParent() && "cannot insert before a block without a parent");
+  block->getParent()->getBlocks().insertAfter(block->getIterator(), this);
+}
+
 /// Unlink this block from its current region and insert it right before the
 /// specific block.
 void Block::moveBefore(Block *block) {
diff --git a/mlir/lib/Transforms/Utils/CFGToSCF.cpp b/mlir/lib/Transforms/Utils/CFGToSCF.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Transforms/Utils/CFGToSCF.cpp
@@ -0,0 +1,1327 @@
+//===- CFGToSCF.h - Control Flow Graph to Structured Control Flow *- C++ -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This code is an implementation of:
+// Helge Bahmann, Nico Reissmann, Magnus Jahre, and Jan Christian Meyer. 2015.
+// Perfect Reconstructability of Control Flow from Demand Dependence Graphs. ACM
+// Trans. Archit. Code Optim. 11, 4, Article 66 (January 2015), 25 pages.
+// https://doi.org/10.1145/2693261
+//
+// It defines an algorithm to translate any control flow graph with a single
+// entry and single exit block into structured control flow operations
+// consisting of regions of do-while loops and operations conditionally
+// dispatching to one out of multiple regions before continuing after the
+// operation. This includes control flow graphs containing irreducible
+// control flow.
+//
+// The implementation here additionally supports the transformation on
+// regions with multiple exit blocks. This is implemented by first
+// transforming all occurrences of return-like operations to branch to a
+// single exit block containing an instance of that return-like operation.
+// If there are multiple kinds of return-like operations, multiple exit
+// blocks are created. In that case the transformation leaves behind a
+// conditional control flow graph operation that dispatches to the given regions
+// terminating with different kinds of return-like operations each.
+//
+// If the function only contains a single kind of return-like operations,
+// it is guaranteed that all control flow graph ops will be lifted to structured
+// control flow, and that no more control flow graph ops remain after the
+// operation.
+//
+// The algorithm to lift CFGs consists of two transformations applied after each
+// other on any single-entry, single-exit region:
+// 1) Lifting cycles to structured control flow loops
+// 2) Lifting conditional branches to structured control flow branches
+// These are then applied recursively on any new single-entry single-exit
+// regions created by the transformation until no more CFG operations remain.
+//
+// The first part of cycle lifting is to detect any cycles in the CFG.
+// This is done using an algorithm for iterating over SCCs. Every SCC
+// representing a cycle is then transformed into a structured loop with a single
+// entry block and a single latch containing the only back edge to the entry
+// block and the only edge to an exit block outside the loop. Rerouting control
+// flow to create single entry and exit blocks is achieved via a multiplexer
+// construct that can be visualized as follows:
+//                         +-----+ +-----+   +-----+
+//                         | bb0 | | bb1 |...| bbN |
+//                         +--+--+ +--+--+   +-+---+
+//                            |       |        |
+//                            |       v        |
+//                            |  +------+      |
+//                            | ++      ++<----+
+//                            | | Region |
+//                            +>|        |<----+
+//                              ++      ++     |
+//                               +------+------+
+//
+// The above transforms to:
+//                         +-----+ +-----+   +-----+
+//                         | bb0 | | bb1 |...| bbN |
+//                         +-----+ +--|--+   ++----+
+//                              |     v       |
+//                              +->+-----+<---+
+//                                 | bbM |<-------+
+//                                 +---+-+        |
+//                             +---+   | +----+   |
+//                             |       v      |   |
+//                             |   +------+   |   |
+//                             |  ++      ++<-+   |
+//                             +->| Region |      |
+//                                ++      ++      |
+//                                 +------+-------+
+//
+// bbM in the above is the multiplexer block, and any block previously branching
+// to an entry block of the region are redirected to it. This includes any
+// branches from within the region. Using a block argument, bbM then dispatches
+// to the correct entry block of the region dependent on the predecessor.
+//
+// A similar transformation is done to create the latch block with the single
+// back edge and loop exit edge.
+//
+// The above form has the advantage that bbM now acts as the loop header
+// of the loop body. After the transformation on the latch, this results in a
+// structured loop that can then be lifted to structured control flow. The
+// conditional branches created in bbM are later lifted to conditional
+// branches.
+//
+// Lifting conditional branches is done by analyzing the *first* conditional
+// branch encountered in the entry region. The algorithm then identifies
+// all blocks that are dominated by a specific control flow edge and
+// the region where control flow continues:
+//                                 +-----+
+//                           +-----+ bb0 +----+
+//                           v     +-----+    v
+//                Region 1 +-+-+    ...     +-+-+ Region n
+//                         +---+            +---+
+//                          ...              ...
+//                           |                |
+//                           |      +---+     |
+//                           +---->++   ++<---+
+//                                 |     |
+//                                 ++   ++ Region T
+//                                  +---+
+// Every region following bb0 consists of 0 or more blocks that eventually
+// branch to Region T. If there are multiple entry blocks into Region T, a
+// single entry block is created using a multiplexer block as shown above.
+// Region 1 to Region n are then lifted together with the conditional control
+// flow operation terminating bb0 into a structured conditional operation
+// followed by the operations of the entry block of Region T.
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Transforms/CFGToSCF.h"
+
+#include "mlir/IR/RegionGraphTraits.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace mlir;
+
+/// Returns the mutable operand range used to transfer operands from `block` to
+/// its successor with the given index. The returned range being mutable allows
+/// us to modify the operands being transferred.
+static MutableOperandRange
+getMutableSuccessorOperands(Block *block, unsigned successorIndex) {
+  auto branchOpInterface = cast<BranchOpInterface>(block->getTerminator());
+  SuccessorOperands succOps =
+      branchOpInterface.getSuccessorOperands(successorIndex);
+  return succOps.getMutableForwardedOperands();
+}
+
+/// Appends all the block arguments from `other` to the block arguments of
+/// `block`, copying their types and locations.
+static void addBlockArgumentsFromOther(Block *block, Block *other) {
+  for (BlockArgument arg : other->getArguments())
+    block->addArgument(arg.getType(), arg.getLoc());
+}
+
+namespace {
+
+/// Class representing an edge in the CFG. Consists of a from-block, a successor
+/// and corresponding successor operands passed to the block arguments of the
+/// successor.
+class Edge {
+  Block *fromBlock;
+  unsigned successorIndex;
+
+public:
+  /// Constructs a new edge from `fromBlock` to the successor corresponding to
+  /// `successorIndex`.
+  Edge(Block *fromBlock, unsigned int successorIndex)
+      : fromBlock(fromBlock), successorIndex(successorIndex) {}
+
+  /// Returns the from-block.
+  Block *getFromBlock() const { return fromBlock; }
+
+  /// Returns the successor of the edge.
+  Block *getSuccessor() const {
+    return fromBlock->getSuccessor(successorIndex);
+  }
+
+  /// Sets the successor of the edge, adjusting the terminator in the
+  /// from-block.
+  void setSuccessor(Block *block) const {
+    fromBlock->getTerminator()->setSuccessor(block, successorIndex);
+  }
+
+  /// Returns the arguments of this edge that are passed to the block arguments
+  /// of the successor.
+  MutableOperandRange getSuccessorOperands() const {
+    return getMutableSuccessorOperands(fromBlock, successorIndex);
+  }
+};
+
+/// Structure containing the entry, exit and back edges of a cycle. A cycle is a
+/// generalization of a loop that may have multiple entry edges. See also
+/// https://llvm.org/docs/CycleTerminology.html.
+struct CycleEdges {
+  /// All edges from a block outside the cycle to a block inside the cycle.
+  /// The targets of these edges are entry blocks.
+  SmallVector<Edge> entryEdges;
+  /// All edges from a block inside the cycle to a block outside the cycle.
+  SmallVector<Edge> exitEdges;
+  /// All edges from a block inside the cycle to an entry block.
+  SmallVector<Edge> backEdges;
+};
+
+/// Class used to orchestrate creation of so-called edge multiplexers.
+/// This class creates a new basic block and routes all inputs edges
+/// to this basic block before branching to their original target.
+/// The purpose of this transformation is to create single-entry,
+/// single-exit regions.
+class EdgeMultiplexer {
+public:
+  /// Creates a new edge multiplexer capable of redirecting all edges to one of
+  /// the `entryBlocks`. This creates the multiplexer basic block with
+  /// appropriate block arguments after the first entry block. `extraArgs`
+  /// contains the types of possible extra block arguments passed to the
+  /// multiplexer block that are added to the successor operands of every
+  /// outgoing edge.
+  ///
+  /// NOTE: This does not yet redirect edges to branch to the
+  /// multiplexer block nor code dispatching from the multiplexer code
+  /// to the original successors.
+  /// See `redirectEdge` and `createSwitch`.
+  static EdgeMultiplexer create(Location loc, ArrayRef<Block *> entryBlocks,
+                                function_ref<Value(unsigned)> getSwitchValue,
+                                function_ref<Value(Type)> getUndefValue,
+                                TypeRange extraArgs = {}) {
+    assert(!entryBlocks.empty() && "Require at least one entry block");
+
+    auto *multiplexerBlock = new Block;
+    multiplexerBlock->insertAfter(entryBlocks.front());
+
+    // To implement the multiplexer block, we have to add the block arguments of
+    // every distinct successor block to the multiplexer block. When redirecting
+    // edges, block arguments designated for blocks that aren't branched to will
+    // be assigned the `getUndefValue`. The amount of block arguments and their
+    // offset is saved in the map for `redirectEdge` to transform the edges.
+    llvm::SmallMapVector<Block *, unsigned, 4> blockArgMapping;
+    for (Block *entryBlock : entryBlocks) {
+      auto [iter, inserted] = blockArgMapping.insert(
+          {entryBlock, multiplexerBlock->getNumArguments()});
+      if (inserted)
+        addBlockArgumentsFromOther(multiplexerBlock, entryBlock);
+    }
+
+    // If we have more than one successor, we have to additionally add a
+    // discriminator value, denoting which successor to jump to.
+    // When redirecting edges, an appropriate value will be passed using
+    // `getSwitchValue`.
+    Value discriminator;
+    if (blockArgMapping.size() > 1)
+      discriminator =
+          multiplexerBlock->addArgument(getSwitchValue(0).getType(), loc);
+
+    multiplexerBlock->addArguments(
+        extraArgs, SmallVector<Location>(extraArgs.size(), loc));
+
+    return EdgeMultiplexer(multiplexerBlock, getSwitchValue, getUndefValue,
+                           std::move(blockArgMapping), discriminator);
+  }
+
+  /// Returns the created multiplexer block.
+  Block *getMultiplexerBlock() const { return multiplexerBlock; }
+
+  /// Redirects `edge` to branch to the multiplexer block before continuing to
+  /// its original target. The edges successor must have originally been part
+  /// of the entry blocks array passed to the `create` function. `extraArgs`
+  /// must be used to pass along any additional values corresponding to
+  /// `extraArgs` in `create`.
+  void redirectEdge(Edge edge, ValueRange extraArgs = {}) const {
+    const auto *result = blockArgMapping.find(edge.getSuccessor());
+    assert(result != blockArgMapping.end() &&
+           "Edge was not originally passed to `create` method.");
+
+    MutableOperandRange successorOperands = edge.getSuccessorOperands();
+
+    // Extra arguments are always appended at the end of the block arguments.
+    unsigned extraArgsBeginIndex =
+        multiplexerBlock->getNumArguments() - extraArgs.size();
+    // If a discriminator exists, it is right before the extra arguments.
+    std::optional<unsigned> discriminatorIndex =
+        discriminator ? extraArgsBeginIndex - 1 : std::optional<unsigned>{};
+
+    SmallVector<Value> newSuccOperands(multiplexerBlock->getNumArguments());
+    for (BlockArgument argument : multiplexerBlock->getArguments()) {
+      unsigned index = argument.getArgNumber();
+      if (index >= result->second &&
+          index < result->second + edge.getSuccessor()->getNumArguments()) {
+        // Original block arguments to the entry block.
+        newSuccOperands[index] = successorOperands[index - result->second];
+        continue;
+      }
+
+      // Discriminator value if it exists.
+      if (index == discriminatorIndex) {
+        newSuccOperands[index] =
+            getSwitchValue(result - blockArgMapping.begin());
+        continue;
+      }
+
+      // Followed by the extra arguments.
+      if (index >= extraArgsBeginIndex) {
+        newSuccOperands[index] = extraArgs[index - extraArgsBeginIndex];
+        continue;
+      }
+
+      // Otherwise undef values for any unused block arguments used by other
+      // entry blocks.
+      newSuccOperands[index] = getUndefValue(argument.getType());
+    }
+
+    edge.setSuccessor(multiplexerBlock);
+    successorOperands.assign(newSuccOperands);
+  }
+
+  /// Creates a switch op using `builder` which dispatches to the original
+  /// successors of the edges passed to `create` minus the ones in `excluded`.
+  /// The builder's insertion point has to be in a block dominated by the
+  /// multiplexer block.
+  void createSwitch(
+      Location loc, OpBuilder &builder, CFGToSCFInterface &interface,
+      const SmallPtrSetImpl<Block *> &excluded = SmallPtrSet<Block *, 1>{}) {
+    // We create the switch by creating a case for all entries and then
+    // splitting of the last entry as a default case.
+
+    SmallVector<ValueRange> caseArguments;
+    SmallVector<unsigned> caseValues;
+    SmallVector<Block *> caseDestinations;
+    for (auto &&[index, pair] : llvm::enumerate(blockArgMapping)) {
+      auto &&[succ, offset] = pair;
+      if (excluded.contains(succ))
+        continue;
+
+      caseValues.push_back(index);
+      caseArguments.push_back(multiplexerBlock->getArguments().slice(
+          offset, succ->getNumArguments()));
+      caseDestinations.push_back(succ);
+    }
+
+    // If we don't have a discriminator due to only having one entry we have to
+    // create a dummy flag for the switch.
+    Value realDiscriminator = discriminator;
+    if (!realDiscriminator || caseArguments.size() == 1)
+      realDiscriminator = getSwitchValue(0);
+
+    caseValues.pop_back();
+    Block *defaultDest = caseDestinations.pop_back_val();
+    ValueRange defaultArgs = caseArguments.pop_back_val();
+
+    interface.createCFGSwitchOp(loc, builder, realDiscriminator, caseValues,
+                                caseDestinations, caseArguments, defaultDest,
+                                defaultArgs);
+  }
+
+private:
+  /// Newly created multiplexer block.
+  Block *multiplexerBlock;
+  /// Callback used to create a constant suitable as flag for
+  /// the interfaces `createCFGSwitchOp`.
+  function_ref<Value(unsigned)> getSwitchValue;
+  /// Callback used to create undefined values of a given type.
+  function_ref<Value(Type)> getUndefValue;
+
+  /// Mapping of the block arguments of an entry block to the corresponding
+  /// block arguments in the multiplexer block. Block arguments of an entry
+  /// block are simply appended ot the multiplexer block. This map simply
+  /// contains the offset to the range in the multiplexer block.
+  llvm::SmallMapVector<Block *, unsigned, 4> blockArgMapping;
+  /// Discriminator value used in the multiplexer block to dispatch to the
+  /// correct entry block. Null value if not required due to only having one
+  /// entry block.
+  Value discriminator;
+
+  EdgeMultiplexer(Block *multiplexerBlock,
+                  function_ref<Value(unsigned)> getSwitchValue,
+                  function_ref<Value(Type)> getUndefValue,
+                  llvm::SmallMapVector<Block *, unsigned, 4> &&entries,
+                  Value dispatchFlag)
+      : multiplexerBlock(multiplexerBlock), getSwitchValue(getSwitchValue),
+        getUndefValue(getUndefValue), blockArgMapping(std::move(entries)),
+        discriminator(dispatchFlag) {}
+};
+
+/// Alternative implementation of DenseMapInfo<Operation*> using the operation
+/// equivalence infrastructure to check whether two 'return-like' operations are
+/// equivalent in the context of this transformation. This means that both
+/// operations are of the same kind, have the same amount of operands and types
+/// and the same attributes and properties. The operands themselves don't have
+/// to be equivalent.
+struct ReturnLikeOpEquivalence : public llvm::DenseMapInfo<Operation *> {
+  static unsigned getHashValue(const Operation *opC) {
+    return OperationEquivalence::computeHash(
+        const_cast<Operation *>(opC),
+        /*hashOperands=*/OperationEquivalence::ignoreHashValue,
+        /*hashResults=*/OperationEquivalence::ignoreHashValue,
+        OperationEquivalence::IgnoreLocations);
+  }
+
+  static bool isEqual(const Operation *lhs, const Operation *rhs) {
+    if (lhs == rhs)
+      return true;
+    if (lhs == getTombstoneKey() || lhs == getEmptyKey() ||
+        rhs == getTombstoneKey() || rhs == getEmptyKey())
+      return false;
+    return OperationEquivalence::isEquivalentTo(
+        const_cast<Operation *>(lhs), const_cast<Operation *>(rhs),
+        OperationEquivalence::ignoreValueEquivalence, nullptr,
+        OperationEquivalence::IgnoreLocations);
+  }
+};
+
+/// Utility-class for transforming a region to only have one single block for
+/// every return-like operation.
+class ReturnLikeExitCombiner {
+public:
+  ReturnLikeExitCombiner(Region &topLevelRegion, CFGToSCFInterface &interface)
+      : topLevelRegion(topLevelRegion), interface(interface) {}
+
+  /// Transforms `returnLikeOp` to a branch to the only block in the
+  /// region with an instance of `returnLikeOp`s kind.
+  void combineExit(Operation *returnLikeOp,
+                   function_ref<Value(unsigned)> getSwitchValue) {
+    auto [iter, inserted] =
+        returnLikeToCombinedExit.insert({returnLikeOp, nullptr});
+    if (!inserted && iter->first == returnLikeOp)
+      return;
+
+    Block *exitBlock = iter->second;
+    if (inserted) {
+      exitBlock = new Block;
+      iter->second = exitBlock;
+      topLevelRegion.push_back(exitBlock);
+      exitBlock->addArguments(
+          returnLikeOp->getOperandTypes(),
+          SmallVector<Location>(returnLikeOp->getNumOperands(),
+                                returnLikeOp->getLoc()));
+    }
+
+    auto builder = OpBuilder::atBlockTerminator(returnLikeOp->getBlock());
+    interface.createSingleDestinationBranch(returnLikeOp->getLoc(), builder,
+                                            getSwitchValue(0), exitBlock,
+                                            returnLikeOp->getOperands());
+
+    if (!inserted) {
+      returnLikeOp->erase();
+      return;
+    }
+
+    returnLikeOp->moveBefore(exitBlock, exitBlock->end());
+    returnLikeOp->setOperands(exitBlock->getArguments());
+  }
+
+private:
+  /// Mapping of return-like operation to block. All return-like operations
+  /// of the same kind with the same attributes, properties and types are seen
+  /// as equivalent. First occurrence seen is kept in the map.
+  llvm::SmallDenseMap<Operation *, Block *, 4, ReturnLikeOpEquivalence>
+      returnLikeToCombinedExit;
+  Region &topLevelRegion;
+  CFGToSCFInterface &interface;
+};
+
+} // namespace
+
+/// Returns a range of all edges from `block` to each of its successors.
+static auto successorEdges(Block *block) {
+  return llvm::map_range(llvm::seq(block->getNumSuccessors()),
+                         [=](unsigned index) { return Edge(block, index); });
+}
+
+/// Calculates entry, exit and back edges of the given cycle.
+static CycleEdges
+calculateCycleEdges(const llvm::SmallSetVector<Block *, 4> &cycles) {
+  CycleEdges result;
+  SmallPtrSet<Block *, 8> entryBlocks;
+
+  // First identify all exit and entry edges by checking whether any successors
+  // or predecessors are from outside the cycles.
+  for (Block *block : cycles) {
+    for (auto pred = block->pred_begin(); pred != block->pred_end(); pred++) {
+      if (cycles.contains(*pred))
+        continue;
+
+      result.entryEdges.emplace_back(*pred, pred.getSuccessorIndex());
+      entryBlocks.insert(block);
+    }
+
+    for (auto &&[succIndex, succ] : llvm::enumerate(block->getSuccessors())) {
+      if (cycles.contains(succ))
+        continue;
+
+      result.exitEdges.emplace_back(block, succIndex);
+    }
+  }
+
+  // With the entry blocks identified, find all the back edges.
+  for (Block *block : cycles) {
+    for (auto &&[succIndex, succ] : llvm::enumerate(block->getSuccessors())) {
+      if (!entryBlocks.contains(succ))
+        continue;
+
+      result.backEdges.emplace_back(block, succIndex);
+    }
+  }
+
+  return result;
+}
+
+/// Creates a single entry block out of multiple entry edges using an edge
+/// multiplexer and returns it.
+static EdgeMultiplexer
+createSingleEntryBlock(Location loc, ArrayRef<Edge> entryEdges,
+                       function_ref<Value(unsigned)> getSwitchValue,
+                       function_ref<Value(Type)> getUndefValue,
+                       CFGToSCFInterface &interface) {
+  auto result = EdgeMultiplexer::create(
+      loc, llvm::map_to_vector(entryEdges, std::mem_fn(&Edge::getSuccessor)),
+      getSwitchValue, getUndefValue);
+
+  auto builder = OpBuilder::atBlockBegin(result.getMultiplexerBlock());
+  result.createSwitch(loc, builder, interface);
+
+  for (Edge edge : entryEdges)
+    result.redirectEdge(edge);
+
+  return result;
+}
+
+namespace {
+/// Special loop properties of a structured loop.
+/// A structured loop is a loop satisfying all of the following:
+/// * Has at most one entry, one exit and one back edge.
+/// * The back edge originates from the same block as the exit edge.
+struct StructuredLoopProperties {
+  /// Block containing both the single exit edge and the single back edge.
+  Block *latch;
+  /// Loop condition of type equal to a value returned by `getSwitchValue`.
+  Value condition;
+  /// Exit block which is the only successor of the loop.
+  Block *exitBlock;
+};
+} // namespace
+
+/// Transforms a loop into a structured loop with only a single back edge and
+/// exiting edge, originating from the same block.
+static FailureOr<StructuredLoopProperties> createSingleExitingLatch(
+    Location loc, ArrayRef<Edge> backEdges, ArrayRef<Edge> exitEdges,
+    function_ref<Value(unsigned)> getSwitchValue,
+    function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface,
+    ReturnLikeExitCombiner &exitCombiner) {
+  assert(llvm::all_equal(
+             llvm::map_range(backEdges, std::mem_fn(&Edge::getSuccessor))) &&
+         "All repetition edges must lead to the single loop header");
+
+  // First create the multiplexer block, which will be our latch, for all back
+  // edges and exit edges. We pass an additional argument to the multiplexer
+  // block which indicates whether the latch was reached from what was
+  // originally a back edge or an exit block.
+  // This is later used to branch using the new only back edge.
+  SmallVector<Block *> successors;
+  llvm::append_range(
+      successors, llvm::map_range(backEdges, std::mem_fn(&Edge::getSuccessor)));
+  llvm::append_range(
+      successors, llvm::map_range(exitEdges, std::mem_fn(&Edge::getSuccessor)));
+  auto multiplexer =
+      EdgeMultiplexer::create(loc, successors, getSwitchValue, getUndefValue,
+                              /*extraArgs=*/getSwitchValue(0).getType());
+
+  auto *latchBlock = multiplexer.getMultiplexerBlock();
+
+  // Create a separate exit block that comes right after the latch.
+  auto *exitBlock = new Block;
+  exitBlock->insertAfter(latchBlock);
+
+  // Since this is a loop, all back edges point to the same loop header.
+  Block *loopHeader = backEdges.front().getSuccessor();
+
+  // Create the new only back edge to the loop header. Branch to the
+  // exit block otherwise.
+  Value shouldRepeat = latchBlock->getArguments().back();
+  {
+    auto builder = OpBuilder::atBlockBegin(latchBlock);
+    interface.createConditionalBranch(
+        builder.getUnknownLoc(), builder, shouldRepeat, loopHeader,
+        latchBlock->getArguments().take_front(loopHeader->getNumArguments()),
+        /*falseDest=*/exitBlock,
+        /*falseArgs=*/{});
+  }
+
+  {
+    auto builder = OpBuilder::atBlockBegin(exitBlock);
+    if (!exitEdges.empty()) {
+      // Create the switch dispatching to what were originally the multiple exit
+      // blocks. The loop header has to explicitly be excluded in the below
+      // switch as we would otherwise be creating a new loop again. All back
+      // edges leading to the loop header have already been handled in the
+      // switch above. The remaining edges can only jump to blocks outside the
+      // loop.
+
+      SmallPtrSet<Block *, 1> excluded = {loopHeader};
+      multiplexer.createSwitch(loc, builder, interface, excluded);
+    } else {
+      // A loop without an exit edge is a statically known infinite loop.
+      // Since structured control flow ops are not terminator ops, the caller
+      // has to create a fitting return-like unreachable terminator operation.
+      FailureOr<Operation *> terminator = interface.createUnreachableTerminator(
+          loc, builder, *latchBlock->getParent());
+      if (failed(terminator))
+        return failure();
+      // Transform the just created transform operation in the case that an
+      // occurrence of it existed in input IR.
+      exitCombiner.combineExit(*terminator, getSwitchValue);
+    }
+  }
+
+  // Redirecting back edges with `shouldRepeat` as 1.
+  for (Edge backEdge : backEdges)
+    multiplexer.redirectEdge(backEdge, /*extraArgs=*/getSwitchValue(1));
+
+  // Redirecting exits edges with `shouldRepeat` as 0.
+  for (Edge exitEdge : exitEdges)
+    multiplexer.redirectEdge(exitEdge, /*extraArgs=*/getSwitchValue(0));
+
+  return StructuredLoopProperties{latchBlock, /*condition=*/shouldRepeat,
+                                  exitBlock};
+}
+
+/// Transforms a structured loop into a loop in reduce form.
+///
+/// Reduce form is defined as a structured loop where:
+/// (0) No values defined within the loop body are used outside the loop body.
+/// (1) The block arguments and successor operands of the exit block are equal
+///     to the block arguments of the loop header and the successor operands
+///     of the back edge.
+///
+/// This is required for many structured control flow ops as they tend
+/// to not have separate "loop result arguments" and "loop iteration arguments"
+/// at the end of the block. Rather, the "loop iteration arguments" from the
+/// last iteration are the result of the loop.
+///
+/// Note that the requirement of (0) is shared with LCSSA form in LLVM. However,
+/// due to this being a structured loop instead of a general loop, we do not
+/// require complicated dominance algorithms nor SSA updating making this
+/// implementation easier than creating a generic LCSSA transformation pass.
+static SmallVector<Value>
+transformToReduceLoop(Block *loopHeader, Block *exitBlock,
+                      const llvm::SmallSetVector<Block *, 4> &loopBlocks,
+                      function_ref<Value(Type)> getUndefValue,
+                      DominanceInfo &dominanceInfo) {
+  Block *latch = exitBlock->getSinglePredecessor();
+  assert(latch &&
+         "Exit block must have only latch as predecessor at this point");
+  assert(exitBlock->getNumArguments() == 0 &&
+         "Exit block mustn't have any block arguments at this point");
+
+  unsigned loopHeaderIndex = 0;
+  unsigned exitBlockIndex = 1;
+  if (latch->getSuccessor(loopHeaderIndex) != loopHeader)
+    std::swap(loopHeaderIndex, exitBlockIndex);
+
+  assert(latch->getSuccessor(loopHeaderIndex) == loopHeader);
+  assert(latch->getSuccessor(exitBlockIndex) == exitBlock);
+
+  MutableOperandRange exitBlockSuccessorOperands =
+      getMutableSuccessorOperands(latch, exitBlockIndex);
+  // Save the values as a vector, not a `MutableOperandRange` as the latter gets
+  // invalidated when mutating the operands through a different
+  // `MutableOperandRange` of the same operation.
+  SmallVector<Value> loopHeaderSuccessorOperands =
+      llvm::to_vector(getMutableSuccessorOperands(latch, loopHeaderIndex));
+
+  // Add all values used in the next iteration to the exit block. Replace
+  // any uses that are outside the loop with the newly created exit block.
+  for (Value arg : loopHeaderSuccessorOperands) {
+    BlockArgument exitArg = exitBlock->addArgument(arg.getType(), arg.getLoc());
+    exitBlockSuccessorOperands.append(arg);
+    arg.replaceUsesWithIf(exitArg, [&](OpOperand &use) {
+      return !loopBlocks.contains(use.getOwner()->getBlock());
+    });
+  }
+
+  // Loop below might add block arguments to the latch and loop header.
+  // Save the block arguments prior to the loop to not process these.
+  SmallVector<BlockArgument> latchBlockArgumentsPrior =
+      llvm::to_vector(latch->getArguments());
+  SmallVector<BlockArgument> loopHeaderArgumentsPrior =
+      llvm::to_vector(loopHeader->getArguments());
+
+  // Go over all values defined within the loop body. If any of them are used
+  // outside the loop body, create a block argument on the exit block and loop
+  // header and replace the outside uses with the exit block argument.
+  // The loop header block argument is added to satisfy requirement (1) in the
+  // reduce form condition.
+  for (Block *loopBlock : loopBlocks) {
+    // Cache dominance queries for loopBlock.
+    // There are likely to be many duplicate queries as there can be many value
+    // definitions within a block.
+    llvm::SmallDenseMap<Block *, bool> dominanceCache;
+    // Returns true if `loopBlock` dominates `block`.
+    auto loopBlockDominates = [&](Block *block) {
+      auto [iter, inserted] = dominanceCache.insert({block, false});
+      if (!inserted)
+        return iter->second;
+      iter->second = dominanceInfo.dominates(loopBlock, block);
+      return iter->second;
+    };
+
+    auto checkValue = [&](Value value) {
+      Value blockArgument;
+      for (OpOperand &use : llvm::make_early_inc_range(value.getUses())) {
+        if (loopBlocks.contains(use.getOwner()->getBlock()))
+          continue;
+
+        // Block argument is only created the first time it is required.
+        if (!blockArgument) {
+          blockArgument =
+              exitBlock->addArgument(value.getType(), value.getLoc());
+          loopHeader->addArgument(value.getType(), value.getLoc());
+
+          // `value` might be defined in a block that does not dominate `latch`
+          // but previously dominated an exit block with a use.
+          // In this case, add a block argument to the latch and go through all
+          // predecessors. If the value dominates the predecessor, pass the
+          // value as a successor operand, otherwise pass undef.
+          // The above is unnecessary if the value is a block argument of the
+          // latch or if `value` dominates all predecessors.
+          Value argument = value;
+          if (value.getParentBlock() != latch &&
+              llvm::any_of(latch->getPredecessors(), [&](Block *pred) {
+                return !loopBlockDominates(pred);
+              })) {
+            argument = latch->addArgument(value.getType(), value.getLoc());
+            for (auto iter = latch->pred_begin(); iter != latch->pred_end();
+                 ++iter) {
+              Value succOperand = value;
+              if (!loopBlockDominates(*iter))
+                succOperand = getUndefValue(value.getType());
+
+              getMutableSuccessorOperands(*iter, iter.getSuccessorIndex())
+                  .append(succOperand);
+            }
+          }
+
+          loopHeaderSuccessorOperands.push_back(argument);
+          for (Edge edge : successorEdges(latch))
+            edge.getSuccessorOperands().append(argument);
+        }
+
+        use.set(blockArgument);
+      }
+    };
+
+    if (loopBlock == latch)
+      llvm::for_each(latchBlockArgumentsPrior, checkValue);
+    else if (loopBlock == loopHeader)
+      llvm::for_each(loopHeaderArgumentsPrior, checkValue);
+    else
+      llvm::for_each(loopBlock->getArguments(), checkValue);
+
+    for (Operation &op : *loopBlock)
+      llvm::for_each(op.getResults(), checkValue);
+  }
+
+  // New block arguments may have been added to the loop header.
+  // Adjust the entry edges to pass undef values to these.
+  for (auto iter = loopHeader->pred_begin(); iter != loopHeader->pred_end();
+       ++iter) {
+    // Latch successor arguments have already been handled.
+    if (*iter == latch)
+      continue;
+
+    MutableOperandRange succOps =
+        getMutableSuccessorOperands(*iter, iter.getSuccessorIndex());
+    succOps.append(llvm::map_to_vector(
+        loopHeader->getArguments().drop_front(succOps.size()),
+        [&](BlockArgument arg) { return getUndefValue(arg.getType()); }));
+  }
+
+  return loopHeaderSuccessorOperands;
+}
+
+/// Transforms all outer-most cycles in the region with the region entry
+/// `regionEntry` into structured loops. Returns the entry blocks of any newly
+/// created regions potentially requiring further transformations.
+static FailureOr<SmallVector<Block *>> transformCyclesToSCFLoops(
+    Block *regionEntry, function_ref<Value(unsigned)> getSwitchValue,
+    function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface,
+    DominanceInfo &dominanceInfo, ReturnLikeExitCombiner &exitCombiner) {
+  SmallVector<Block *> newSubRegions;
+  auto scc = llvm::scc_begin(regionEntry);
+  while (!scc.isAtEnd()) {
+    if (!scc.hasCycle()) {
+      ++scc;
+      continue;
+    }
+
+    // Save the set and increment the SCC iterator early to avoid our
+    // modifications breaking the SCC iterator.
+    llvm::SmallSetVector<Block *, 4> cycleBlockSet(scc->begin(), scc->end());
+    ++scc;
+
+    CycleEdges edges = calculateCycleEdges(cycleBlockSet);
+    Block *loopHeader = edges.entryEdges.front().getSuccessor();
+    // First turn the cycle into a loop by creating a single entry block if
+    // needed.
+    if (edges.entryEdges.size() > 1) {
+      EdgeMultiplexer multiplexer = createSingleEntryBlock(
+          loopHeader->getTerminator()->getLoc(), edges.entryEdges,
+          getSwitchValue, getUndefValue, interface);
+
+      for (Edge edge : edges.backEdges)
+        multiplexer.redirectEdge(edge);
+
+      loopHeader = multiplexer.getMultiplexerBlock();
+    }
+    cycleBlockSet.insert(loopHeader);
+
+    // Then turn it into a structured loop by creating a single latch.
+    FailureOr<StructuredLoopProperties> loopProperties =
+        createSingleExitingLatch(
+            edges.backEdges.front().getFromBlock()->getTerminator()->getLoc(),
+            edges.backEdges, edges.exitEdges, getSwitchValue, getUndefValue,
+            interface, exitCombiner);
+    if (failed(loopProperties))
+      return failure();
+
+    Block *latchBlock = loopProperties->latch;
+    Block *exitBlock = loopProperties->exitBlock;
+    cycleBlockSet.insert(latchBlock);
+    cycleBlockSet.insert(loopHeader);
+
+    // Finally, turn it into reduce form.
+    SmallVector<Value> iterationValues = transformToReduceLoop(
+        loopHeader, exitBlock, cycleBlockSet, getUndefValue, dominanceInfo);
+
+    // Create a block acting as replacement for the loop header and insert
+    // the structured loop into it.
+    auto *newLoopParentBlock = new Block;
+    newLoopParentBlock->insertBefore(loopHeader);
+    addBlockArgumentsFromOther(newLoopParentBlock, loopHeader);
+
+    Region::BlockListType &blocks = regionEntry->getParent()->getBlocks();
+    Region loopBody;
+    // Make sure the loop header is the entry block.
+    loopBody.push_back(blocks.remove(loopHeader));
+    for (Block *block : cycleBlockSet)
+      if (block != latchBlock && block != loopHeader)
+        loopBody.push_back(blocks.remove(block));
+    // And the latch is the last block.
+    loopBody.push_back(blocks.remove(latchBlock));
+
+    Operation *oldTerminator = latchBlock->getTerminator();
+    oldTerminator->remove();
+
+    auto builder = OpBuilder::atBlockBegin(newLoopParentBlock);
+    FailureOr<Operation *> structuredLoopOp =
+        interface.createStructuredDoWhileLoopOp(
+            builder, oldTerminator, newLoopParentBlock->getArguments(),
+            loopProperties->condition, iterationValues, std::move(loopBody));
+    if (failed(structuredLoopOp))
+      return failure();
+    oldTerminator->erase();
+
+    newSubRegions.push_back(loopHeader);
+
+    for (auto &&[oldValue, newValue] : llvm::zip(
+             exitBlock->getArguments(), (*structuredLoopOp)->getResults()))
+      oldValue.replaceAllUsesWith(newValue);
+
+    loopHeader->replaceAllUsesWith(newLoopParentBlock);
+    // Merge the exit block right after the loop operation.
+    newLoopParentBlock->getOperations().splice(newLoopParentBlock->end(),
+                                               exitBlock->getOperations());
+    exitBlock->erase();
+  }
+  return newSubRegions;
+}
+
+/// Makes sure the branch region only has a single exit. This is required by the
+/// recursive part of the algorithm, as it expects the CFG to be single-entry
+/// and single-exit. This is done by simply creating an empty block if there
+/// is more than one block with an edge to the continuation block. All blocks
+/// with edges to the continuation are then redirected to this block. A region
+/// terminator is later placed into the block.
+static void createSingleExitBranchRegion(
+    ArrayRef<Block *> branchRegion, Block *continuation,
+    SmallVectorImpl<std::pair<Block *, SmallVector<Value>>> &createdEmptyBlocks,
+    Region &conditionalRegion) {
+  Block *singleExitBlock = nullptr;
+  std::optional<Edge> previousEdgeToContinuation;
+  Region::BlockListType &parentBlockList =
+      branchRegion.front()->getParent()->getBlocks();
+  for (Block *block : branchRegion) {
+    for (Edge edge : successorEdges(block)) {
+      if (edge.getSuccessor() != continuation)
+        continue;
+
+      if (!previousEdgeToContinuation) {
+        previousEdgeToContinuation = edge;
+        continue;
+      }
+
+      // If this is not the first edge to the continuation we create the
+      // single exit block and redirect the edges.
+      if (!singleExitBlock) {
+        singleExitBlock = new Block;
+        addBlockArgumentsFromOther(singleExitBlock, continuation);
+        previousEdgeToContinuation->setSuccessor(singleExitBlock);
+        createdEmptyBlocks.emplace_back(singleExitBlock,
+                                        singleExitBlock->getArguments());
+      }
+
+      edge.setSuccessor(singleExitBlock);
+    }
+
+    conditionalRegion.push_back(parentBlockList.remove(block));
+  }
+
+  if (singleExitBlock)
+    conditionalRegion.push_back(singleExitBlock);
+}
+
+/// Returns true if this block is an exit block of the region.
+static bool isRegionExitBlock(Block *block) {
+  return block->getNumSuccessors() == 0;
+}
+
+/// Transforms the first occurrence of conditional control flow in `regionEntry`
+/// into conditionally executed regions. Returns the entry block of the created
+/// regions and the region after the conditional control flow.
+static FailureOr<SmallVector<Block *>> transformToStructuredCFBranches(
+    Block *regionEntry, function_ref<Value(unsigned)> getSwitchValue,
+    function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface,
+    DominanceInfo &dominanceInfo) {
+  // Trivial region.
+  if (regionEntry->getNumSuccessors() == 0)
+    return SmallVector<Block *>{};
+
+  if (regionEntry->getNumSuccessors() == 1) {
+    // Single successor we can just splice together.
+    Block *successor = regionEntry->getSuccessor(0);
+    for (auto &&[oldValue, newValue] :
+         llvm::zip(successor->getArguments(),
+                   getMutableSuccessorOperands(regionEntry, 0)))
+      oldValue.replaceAllUsesWith(newValue);
+    regionEntry->getTerminator()->erase();
+
+    regionEntry->getOperations().splice(regionEntry->end(),
+                                        successor->getOperations());
+    successor->erase();
+    return SmallVector<Block *>{regionEntry};
+  }
+
+  // Split the CFG into "#numSuccessor + 1" regions.
+  // For every edge to a successor, the blocks it solely dominates are
+  // determined and become the region following that edge.
+  // The last region is the continuation that follows the branch regions.
+  SmallPtrSet<Block *, 8> notContinuation;
+  notContinuation.insert(regionEntry);
+  SmallVector<SmallVector<Block *>> successorBranchRegions(
+      regionEntry->getNumSuccessors());
+  for (auto &&[blockList, succ] :
+       llvm::zip(successorBranchRegions, regionEntry->getSuccessors())) {
+    // If the region entry is not the only predecessor, then the edge does not
+    // dominate the block it leads to.
+    if (succ->getSinglePredecessor() != regionEntry)
+      continue;
+
+    // Otherwise get all blocks it dominates in DFS/pre-order.
+    DominanceInfoNode *node = dominanceInfo.getNode(succ);
+    for (DominanceInfoNode *curr : llvm::depth_first(node)) {
+      blockList.push_back(curr->getBlock());
+      notContinuation.insert(curr->getBlock());
+    }
+  }
+
+  // Finds all relevant edges and checks the shape of the control flow graph at
+  // this point.
+  // Branch regions may either:
+  // * Be post-dominated by the continuation
+  // * Be post-dominated by a return-like op
+  // * Dominate a return-like op and have an edge to the continuation.
+  //
+  // The control flow graph may then be one of three cases:
+  // 1) All branch regions are post-dominated by the continuation. This is the
+  // usual case. If there are multiple entry blocks into the continuation a
+  // single entry block has to be created. A structured control flow op
+  // can then be created from the branch regions.
+  //
+  // 2) No branch region has an edge to a continuation:
+  //                                 +-----+
+  //                           +-----+ bb0 +----+
+  //                           v     +-----+    v
+  //                Region 1 +-+--+    ...     +-+--+ Region n
+  //                         |ret1|            |ret2|
+  //                         +----+            +----+
+  //
+  // This can only occur if every region ends with a different kind of
+  // return-like op. In that case the control flow operation must stay as we are
+  // unable to create a single exit-block. We can nevertheless process all its
+  // successors as they single-entry, single-exit regions.
+  //
+  // 3) Only some branch regions are post-dominated by the continuation.
+  // The other branch regions may either be post-dominated by a return-like op
+  // or lead to either the continuation or return-like op.
+  // In this case we also create a single entry block like in 1) that also
+  // includes all edges to the return-like op:
+  //                                 +-----+
+  //                           +-----+ bb0 +----+
+  //                           v     +-----+    v
+  //             Region 1    +-+-+    ...     +-+-+ Region n
+  //                         +---+            +---+
+  //                  +---+  |...              ...
+  //                  |ret|<-+ |                |
+  //                  +---+    |      +---+     |
+  //                           +---->++   ++<---+
+  //                                 |     |
+  //                                 ++   ++ Region T
+  //                                  +---+
+  // This transforms to:
+  //                                 +-----+
+  //                           +-----+ bb0 +----+
+  //                           v     +-----+    v
+  //                Region 1 +-+-+    ...     +-+-+ Region n
+  //                         +---+            +---+
+  //                          ...    +-----+   ...
+  //                           +---->+ bbM +<---+
+  //                                 +-----+
+  //                           +-----+  |
+  //                           |        v
+  //                  +---+    |      +---+
+  //                  |ret+<---+     ++   ++
+  //                  +---+          |     |
+  //                                 ++   ++ Region T
+  //                                  +---+
+  //
+  // bb0 to bbM is now a single-entry, single-exit region that applies to case
+  // 1). The control flow op at the end of bbM will trigger case 2.
+  SmallVector<Edge> continuationEdges;
+  bool continuationPostDominatesAllRegions = true;
+  bool noSuccessorHasContinuationEdge = true;
+  for (auto &&[entryEdge, branchRegion] :
+       llvm::zip(successorEdges(regionEntry), successorBranchRegions)) {
+
+    // If the branch region is empty then the branch target itself is part of
+    // the continuation.
+    if (branchRegion.empty()) {
+      continuationEdges.push_back(entryEdge);
+      noSuccessorHasContinuationEdge = false;
+      continue;
+    }
+
+    for (Block *block : branchRegion) {
+      if (isRegionExitBlock(block)) {
+        // If a return-like op is part of the branch region then the
+        // continuation no longer post-dominates the branch region.
+        // Add all its incoming edges to edge list to create the single-exit
+        // block for all branch regions.
+        continuationPostDominatesAllRegions = false;
+        for (auto iter = block->pred_begin(); iter != block->pred_end();
+             ++iter) {
+          continuationEdges.emplace_back(*iter, iter.getSuccessorIndex());
+        }
+        continue;
+      }
+
+      for (Edge edge : successorEdges(block)) {
+        if (notContinuation.contains(edge.getSuccessor()))
+          continue;
+
+        continuationEdges.push_back(edge);
+        noSuccessorHasContinuationEdge = false;
+      }
+    }
+  }
+
+  // case 2) Keep the control flow op but process its successors further.
+  if (noSuccessorHasContinuationEdge)
+    return llvm::to_vector(regionEntry->getSuccessors());
+
+  Block *continuation = llvm::find_singleton<Block>(
+      continuationEdges, [](Edge edge, bool) { return edge.getSuccessor(); },
+      /*AllowRepeats=*/true);
+
+  // In case 3) or if not all continuation edges have the same entry block,
+  // create a single entry block as continuation for all branch regions.
+  if (!continuation || !continuationPostDominatesAllRegions) {
+    EdgeMultiplexer multiplexer = createSingleEntryBlock(
+        continuationEdges.front().getFromBlock()->getTerminator()->getLoc(),
+        continuationEdges, getSwitchValue, getUndefValue, interface);
+    continuation = multiplexer.getMultiplexerBlock();
+  }
+
+  // Trigger reprocess of case 3) after creating the single entry block.
+  if (!continuationPostDominatesAllRegions) {
+    // Unlike in the general case, we are explicitly revisiting the same region
+    // entry again after having changed its control flow edges and dominance.
+    // We have to therefore explicitly invalidate the dominance tree.
+    dominanceInfo.invalidate(regionEntry->getParent());
+    return SmallVector<Block *>{regionEntry};
+  }
+
+  SmallVector<Block *> newSubRegions;
+
+  // Empty blocks with the values they return to the parent op.
+  SmallVector<std::pair<Block *, SmallVector<Value>>> createdEmptyBlocks;
+
+  // Create the branch regions.
+  std::vector<Region> conditionalRegions(successorBranchRegions.size());
+  for (auto &&[branchRegion, entryEdge, conditionalRegion] :
+       llvm::zip(successorBranchRegions, successorEdges(regionEntry),
+                 conditionalRegions)) {
+    if (branchRegion.empty()) {
+      // If no block is part of the branch region, we create a dummy block to
+      // place the region terminator into.
+      createdEmptyBlocks.emplace_back(
+          new Block, llvm::to_vector(entryEdge.getSuccessorOperands()));
+      conditionalRegion.push_back(createdEmptyBlocks.back().first);
+      continue;
+    }
+
+    createSingleExitBranchRegion(branchRegion, continuation, createdEmptyBlocks,
+                                 conditionalRegion);
+
+    // The entries of the branch regions may only have redundant block arguments
+    // since the edge to the branch region is always dominating.
+    Block *subRegionEntryBlock = &conditionalRegion.front();
+    for (auto &&[oldValue, newValue] :
+         llvm::zip(subRegionEntryBlock->getArguments(),
+                   entryEdge.getSuccessorOperands()))
+      oldValue.replaceAllUsesWith(newValue);
+
+    subRegionEntryBlock->eraseArguments(0,
+                                        subRegionEntryBlock->getNumArguments());
+    newSubRegions.push_back(subRegionEntryBlock);
+  }
+
+  Operation *structuredCondOp;
+  {
+    auto opBuilder = OpBuilder::atBlockTerminator(regionEntry);
+    FailureOr<Operation *> result = interface.createStructuredBranchRegionOp(
+        opBuilder, regionEntry->getTerminator(),
+        continuation->getArgumentTypes(), conditionalRegions);
+    if (failed(result))
+      return failure();
+    structuredCondOp = *result;
+    regionEntry->getTerminator()->erase();
+  }
+
+  for (auto &&[block, valueRange] : createdEmptyBlocks) {
+    auto builder = OpBuilder::atBlockEnd(block);
+    LogicalResult result = interface.createStructuredBranchRegionTerminatorOp(
+        structuredCondOp->getLoc(), builder, structuredCondOp, valueRange);
+    if (failed(result))
+      return failure();
+  }
+
+  // Any leftover users of the continuation must be from unconditional branches
+  // in a branch region. There can only be at most one per branch region as
+  // all branch regions have been made single-entry single-exit above.
+  // Replace them with the region terminator.
+  for (Operation *user : llvm::make_early_inc_range(continuation->getUsers())) {
+    assert(user->getNumSuccessors() == 1);
+    auto builder = OpBuilder::atBlockTerminator(user->getBlock());
+    LogicalResult result = interface.createStructuredBranchRegionTerminatorOp(
+        user->getLoc(), builder, structuredCondOp,
+        static_cast<OperandRange>(
+            getMutableSuccessorOperands(user->getBlock(), 0)));
+    if (failed(result))
+      return failure();
+    user->erase();
+  }
+
+  for (auto &&[oldValue, newValue] :
+       llvm::zip(continuation->getArguments(), structuredCondOp->getResults()))
+    oldValue.replaceAllUsesWith(newValue);
+
+  // Splice together the continuations operations with the region entry.
+  regionEntry->getOperations().splice(regionEntry->end(),
+                                      continuation->getOperations());
+
+  continuation->erase();
+
+  // After splicing the continuation, the region has to be reprocessed as it has
+  // new successors.
+  newSubRegions.push_back(regionEntry);
+
+  return newSubRegions;
+}
+
+/// Transforms the region to only have a single block for every kind of
+/// return-like operation that all previous occurrences of the return-like op
+/// branch to. If the region only contains a single kind of return-like
+/// operation, it creates a single-entry and single-exit region.
+static ReturnLikeExitCombiner createSingleExitBlocksForReturnLike(
+    Region &region, function_ref<Value(unsigned)> getSwitchValue,
+    CFGToSCFInterface &interface) {
+  ReturnLikeExitCombiner exitCombiner(region, interface);
+
+  for (Block &block : region.getBlocks()) {
+    if (block.getNumSuccessors() != 0)
+      continue;
+    exitCombiner.combineExit(block.getTerminator(), getSwitchValue);
+  }
+
+  return exitCombiner;
+}
+
+/// Checks all preconditions of the transformation prior to any transformations.
+/// Returns failure if any precondition is violated.
+static LogicalResult checkTransformationPreconditions(Region &region) {
+  for (Block &block : region.getBlocks())
+    if (block.hasNoPredecessors() && !block.isEntryBlock())
+      return block.front().emitOpError(
+          "transformation does not support unreachable blocks");
+
+  WalkResult result = region.walk([](Operation *operation) {
+    if (operation->getNumSuccessors() == 0)
+      return WalkResult::advance();
+
+    // This transformation requires all ops with successors to implement the
+    // branch op interface. It is impossible to adjust their block arguments
+    // otherwise.
+    auto branchOpInterface = dyn_cast<BranchOpInterface>(operation);
+    if (!branchOpInterface) {
+      operation->emitOpError("transformation does not support terminators with "
+                             "successors not implementing BranchOpInterface");
+      return WalkResult::interrupt();
+    }
+    // Branch operations must have no side effects. Replacing them would not be
+    // valid otherwise.
+    if (!isMemoryEffectFree(branchOpInterface)) {
+      branchOpInterface->emitOpError(
+          "transformation does not support terminators with side effects");
+      return WalkResult::interrupt();
+    }
+
+    for (unsigned index : llvm::seq(operation->getNumSuccessors())) {
+      SuccessorOperands succOps = branchOpInterface.getSuccessorOperands(index);
+
+      // We cannot support operations with operation-produced successor operands
+      // as it is currently not possible to pass them to any block arguments
+      // other than the first. This breaks creating multiplexer blocks and would
+      // likely need special handling elsewhere too.
+      if (succOps.getProducedOperandCount() == 0)
+        continue;
+
+      branchOpInterface->emitOpError("transformation does not support "
+                                     "operations with operation-produced "
+                                     "successor operands");
+      return WalkResult::interrupt();
+    }
+    return WalkResult::advance();
+  });
+  return failure(result.wasInterrupted());
+}
+
+FailureOr<bool> mlir::transformCFGToSCF(Region &region,
+                                        CFGToSCFInterface &interface,
+                                        DominanceInfo &dominanceInfo) {
+  if (region.empty() || region.hasOneBlock())
+    return false;
+
+  if (failed(checkTransformationPreconditions(region)))
+    return failure();
+
+  DenseMap<Type, Value> typedUndefCache;
+  auto getUndefValue = [&](Type type) {
+    auto [iter, inserted] = typedUndefCache.insert({type, nullptr});
+    if (!inserted)
+      return iter->second;
+
+    auto constantBuilder = OpBuilder::atBlockBegin(&region.front());
+
+    iter->second =
+        interface.getUndefValue(region.getLoc(), constantBuilder, type);
+    return iter->second;
+  };
+
+  // The transformation only creates all values in the range of 0 to
+  // max(#numSuccessors). Therefore using a vector instead of a map.
+  SmallVector<Value> switchValueCache;
+  auto getSwitchValue = [&](unsigned value) {
+    if (value < switchValueCache.size())
+      if (switchValueCache[value])
+        return switchValueCache[value];
+
+    auto constantBuilder = OpBuilder::atBlockBegin(&region.front());
+
+    switchValueCache.resize(
+        std::max<size_t>(switchValueCache.size(), value + 1));
+
+    switchValueCache[value] =
+        interface.getCFGSwitchValue(region.getLoc(), constantBuilder, value);
+    return switchValueCache[value];
+  };
+
+  ReturnLikeExitCombiner exitCombiner =
+      createSingleExitBlocksForReturnLike(region, getSwitchValue, interface);
+
+  // Invalidate any dominance tree on the region as the exit combiner has
+  // added new blocks and edges.
+  dominanceInfo.invalidate(&region);
+
+  SmallVector<Block *> workList = {&region.front()};
+  while (!workList.empty()) {
+    Block *current = workList.pop_back_val();
+
+    // Turn all top-level cycles in the CFG to structured control flow first.
+    // After this transformation, the remaining CFG ops form a DAG.
+    FailureOr<SmallVector<Block *>> newRegions =
+        transformCyclesToSCFLoops(current, getSwitchValue, getUndefValue,
+                                  interface, dominanceInfo, exitCombiner);
+    if (failed(newRegions))
+      return failure();
+
+    // Add the newly created subregions to the worklist. These are the
+    // bodies of the loops.
+    llvm::append_range(workList, *newRegions);
+    // Invalidate the dominance tree as blocks have been moved, created and
+    // added during the cycle to structured loop transformation.
+    if (!newRegions->empty())
+      dominanceInfo.invalidate(current->getParent());
+
+    newRegions = transformToStructuredCFBranches(
+        current, getSwitchValue, getUndefValue, interface, dominanceInfo);
+    if (failed(newRegions))
+      return failure();
+    // Invalidating the dominance tree is generally not required by the
+    // transformation above as the new region entries correspond to unaffected
+    // subtrees in the dominator tree. Only its parent nodes have changed but
+    // won't be visited again.
+    llvm::append_range(workList, *newRegions);
+  }
+
+  return true;
+}
diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt
--- a/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ b/mlir/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_library(MLIRTransformUtils
+  CFGToSCF.cpp
   CommutativityUtils.cpp
   ControlFlowSinkUtils.cpp
   DialectConversion.cpp
@@ -15,6 +16,7 @@
 
   LINK_LIBS PUBLIC
   MLIRAnalysis
+  MLIRControlFlowInterfaces
   MLIRLoopLikeInterface
   MLIRSideEffectInterfaces
   MLIRRewrite
diff --git a/mlir/test/Conversion/ControlFlowToSCF/test.mlir b/mlir/test/Conversion/ControlFlowToSCF/test.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Conversion/ControlFlowToSCF/test.mlir
@@ -0,0 +1,680 @@
+// RUN: mlir-opt --lift-cf-to-scf -split-input-file %s | FileCheck %s
+
+func.func @simple_if() {
+  %cond = "test.test1"() : () -> i1
+  cf.cond_br %cond, ^bb1, ^bb2
+^bb1:
+  "test.test2"() : () -> ()
+  cf.br ^bb3
+^bb2:
+  "test.test3"() : () -> ()
+  cf.br ^bb3
+^bb3:
+  "test.test4"() : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @simple_if
+// CHECK:      %[[COND:.*]] = "test.test1"()
+// CHECK-NEXT: scf.if %[[COND]]
+// CHECK-NEXT:   "test.test2"()
+// CHECK-NEXT: else
+// CHECK-NEXT:   "test.test3"()
+// CHECK-NEXT: }
+// CHECK-NEXT: "test.test4"()
+// CHECK-NEXT: return
+
+// -----
+
+func.func @if_with_block_args() -> index {
+  %cond = "test.test1"() : () -> i1
+  cf.cond_br %cond, ^bb1, ^bb2
+^bb1:
+  %1 = "test.test2"() : () -> (index)
+  cf.br ^bb3(%1: index)
+^bb2:
+  %2 = "test.test3"() : () -> (index)
+  cf.br ^bb3(%2: index)
+^bb3(%3: index):
+  "test.test4"() : () -> ()
+  return %3 : index
+}
+
+// CHECK-LABEL: func @if_with_block_args
+// CHECK:      %[[COND:.*]] = "test.test1"()
+// CHECK-NEXT: %[[RES:.*]] = scf.if %[[COND]]
+// CHECK-NEXT:   %[[VAL1:.*]]  = "test.test2"()
+// CHECK-NEXT:   scf.yield %[[VAL1]]
+// CHECK-NEXT: else
+// CHECK-NEXT:   %[[VAL2:.*]]  = "test.test3"()
+// CHECK-NEXT:   scf.yield %[[VAL2]]
+// CHECK:      "test.test4"()
+// CHECK-NEXT: return %[[RES]]
+
+// -----
+
+func.func @empty_else() {
+  %cond = "test.test1"() : () -> i1
+  cf.cond_br %cond, ^bb1, ^bb3
+^bb1:
+  "test.test2"() : () -> ()
+  cf.br ^bb3
+^bb3:
+  "test.test4"() : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @empty_else
+// CHECK:      %[[COND:.*]] = "test.test1"()
+// CHECK-NEXT: scf.if %[[COND]]
+// CHECK-NEXT:   "test.test2"()
+// CHECK:      else
+// CHECK-NEXT:      }
+// CHECK-NEXT: "test.test4"()
+// CHECK-NEXT: return
+
+// -----
+
+func.func @while_loop() {
+  "test.test1"() : () -> ()
+  cf.br ^bb1
+^bb1:
+  %cond = "test.test2"() : () -> i1
+  cf.cond_br %cond, ^bb2, ^bb3
+^bb2:
+  "test.test3"() : () -> ()
+  cf.br ^bb1
+^bb3:
+  "test.test4"() : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @while_loop
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-NEXT: "test.test1"()
+// CHECK-NEXT: scf.while
+// CHECK-NEXT:   %[[COND:.*]] = "test.test2"()
+// CHECK-NEXT:   %[[RES:.*]]:2 = scf.if %[[COND]]
+// CHECK-NEXT:     "test.test3"()
+// CHECK-NEXT:     scf.yield %[[C0]], %[[C1]]
+// CHECK-NEXT:   else
+// CHECK-NEXT:     scf.yield %[[C1]], %[[C0]]
+// CHECK:        %[[COND:.*]] = arith.trunci %[[RES]]#1
+// CHECK-NEXT:   scf.condition(%[[COND]])
+// CHECK-NEXT: do
+// CHECK-NEXT:   scf.yield
+// CHECK:      "test.test4"()
+// CHECK-NEXT: return
+
+// -----
+
+func.func @while_loop_with_block_args() -> i64{
+  %1 = "test.test1"() : () -> index
+  cf.br ^bb1(%1: index)
+^bb1(%2: index):
+  %cond:2 = "test.test2"() : () -> (i1, i64)
+  cf.cond_br %cond#0, ^bb2(%cond#1: i64), ^bb3(%cond#1: i64)
+^bb2(%3: i64):
+  %4 = "test.test3"(%3) : (i64) -> index
+  cf.br ^bb1(%4: index)
+^bb3(%5: i64):
+  "test.test4"() : () -> ()
+  return %5 : i64
+}
+
+// CHECK-LABEL: func @while_loop_with_block_args
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-DAG: %[[POISON_i64:.*]] = ub.poison : i64
+// CHECK-DAG: %[[POISON_index:.*]] = ub.poison : index
+// CHECK-NEXT: %[[VAL1:.*]] = "test.test1"()
+// CHECK-NEXT: %[[RES:.*]]:2 = scf.while (%[[ARG0:.*]] = %[[VAL1]], %[[ARG1:.*]] = %[[POISON_i64]])
+// CHECK-NEXT:   %[[COND:.*]]:2 = "test.test2"()
+// CHECK-NEXT:   %[[IF_VALS:.*]]:4 = scf.if %[[COND]]#0
+// CHECK-NEXT:     %[[VAL:.*]] = "test.test3"(%[[COND]]#1)
+// CHECK-NEXT:     scf.yield %[[VAL]], %[[POISON_i64]], %[[C0]], %[[C1]]
+// CHECK-NEXT:   else
+// CHECK-NEXT:     scf.yield %[[POISON_index]], %[[COND]]#1, %[[C1]], %[[C0]]
+// CHECK:        %[[TRUNC:.*]] = arith.trunci %[[IF_VALS]]#3
+// CHECK-NEXT:   scf.condition(%[[TRUNC]]) %[[IF_VALS]]#0, %[[IF_VALS]]#1
+// CHECK-NEXT: do
+// CHECK-NEXT:   ^{{.+}}(
+// CHECK-SAME: [[ARG0:[[:alnum:]]+]]:
+// CHECK-SAME: [[ARG1:[[:alnum:]]+]]:
+// CHECK-NEXT:   scf.yield %[[ARG0]], %[[ARG1]]
+// CHECK:      "test.test4"() : () -> ()
+// CHECK-NEXT: return %[[RES]]#1 : i64
+
+// -----
+
+func.func @multi_exit_loop() {
+  "test.test1"() : () -> ()
+  cf.br ^bb1
+^bb1:
+  %cond = "test.test2"() : () -> i1
+  cf.cond_br %cond, ^bb2, ^bb3
+^bb2:
+  %cond2 = "test.test3"() : () -> i1
+  cf.cond_br %cond2, ^bb3, ^bb1
+^bb3:
+  "test.test4"() : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @multi_exit_loop
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-NEXT: "test.test1"()
+// CHECK-NEXT: scf.while
+// CHECK-NEXT:   %[[COND:.*]] = "test.test2"()
+// CHECK-NEXT:   %[[IF_PAIR:.*]]:2 = scf.if %[[COND]]
+// CHECK-NEXT:     %[[COND2:.*]] = "test.test3"()
+// CHECK-NEXT:     %[[IF_PAIR2:.*]]:2 = scf.if %[[COND2]]
+// CHECK-NEXT:       scf.yield %[[C1]], %[[C0]]
+// CHECK-NEXT:     else
+// CHECK-NEXT:       scf.yield %[[C0]], %[[C1]]
+// CHECK:          scf.yield %[[IF_PAIR2]]#0, %[[IF_PAIR2]]#1
+// CHECK:        %[[TRUNC:.*]] = arith.trunci %[[IF_PAIR]]#1
+// CHECK-NEXT:   scf.condition(%[[TRUNC]])
+// CHECK-NEXT: do
+// CHECK-NEXT:   scf.yield
+// CHECK:      "test.test4"()
+// CHECK-NEXT: return
+
+// -----
+
+func.func private @foo(%arg: f32) -> f32
+func.func private @bar(%arg: f32)
+
+func.func @switch_with_fallthrough(%flag: i32, %arg1 : f32, %arg2 : f32) {
+  cf.switch %flag : i32, [
+    default: ^bb1(%arg1 : f32),
+    0: ^bb2(%arg2 : f32),
+    1: ^bb3
+  ]
+
+^bb1(%arg3 : f32):
+  %0 = call @foo(%arg3) : (f32) -> f32
+  cf.br ^bb2(%0 : f32)
+
+^bb2(%arg4 : f32):
+  call @bar(%arg4) : (f32) -> ()
+  cf.br ^bb3
+
+^bb3:
+  return
+}
+
+// CHECK-LABEL: func @switch_with_fallthrough
+// CHECK-SAME: %[[ARG0:[[:alnum:]]+]]
+// CHECK-SAME: %[[ARG1:[[:alnum:]]+]]
+// CHECK-SAME: %[[ARG2:[[:alnum:]]+]]
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-DAG: %[[POISON:.*]] = ub.poison
+// CHECK-NEXT: %[[INDEX_CAST:.*]] = arith.index_castui %[[ARG0]]
+// CHECK-NEXT: %[[SWITCH_PAIR:.*]]:2 = scf.index_switch %[[INDEX_CAST]]
+// CHECK-NEXT: case 0
+// CHECK-NEXT:   scf.yield %[[ARG2]], %[[C0]]
+// CHECK:      case 1
+// CHECK-NEXT:   scf.yield %[[POISON]], %[[C1]]
+// CHECK:      default
+// CHECK-NEXT:   %[[RES:.*]] = func.call @foo(%[[ARG1]])
+// CHECK-NEXT:   scf.yield %[[RES]], %[[C0]]
+// CHECK:      %[[INDEX_CAST:.*]] = arith.index_castui %[[SWITCH_PAIR]]#1
+// CHECK-NEXT: scf.index_switch %[[INDEX_CAST]]
+// CHECK-NEXT: case 0
+// CHECK-NEXT:   call @bar(%[[SWITCH_PAIR]]#0)
+// CHECK-NEXT:   scf.yield
+// CHECK:      default {
+// CHECK-NEXT: }
+// CHECK-NEXT: return
+
+// -----
+
+func.func private @bar(%arg: f32) -> (i1, f32)
+
+func.func @already_structured_loop(%arg: f32) -> f32 {
+  cf.br ^bb0
+
+^bb0:
+  %cond, %value = call @bar(%arg) : (f32) -> (i1, f32)
+  cf.cond_br %cond, ^bb1, ^bb0
+
+^bb1:
+  return %value : f32
+}
+
+// CHECK-LABEL: @already_structured_loop
+// CHECK-SAME: %[[ARG:.*]]: f32
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-DAG: %[[POISON:.*]] = ub.poison
+// CHECK-NEXT: %[[RES:.*]] = scf.while (%[[ARG1:.*]] = %[[POISON]])
+// CHECK-NEXT:   %[[CALL_PAIR:.*]]:2 = func.call @bar(%[[ARG]])
+// CHECK-NEXT:   %[[IF_PAIR:.*]]:2 = scf.if %[[CALL_PAIR]]#0
+// CHECK-NEXT:     scf.yield %[[C1]], %[[C0]]
+// CHECK-NEXT:   else
+// CHECK-NEXT:     scf.yield %[[C0]], %[[C1]]
+// CHECK:        %[[TRUNC:.*]] = arith.trunci %[[IF_PAIR]]#1
+// CHECK-NEXT:   scf.condition(%[[TRUNC]]) %[[CALL_PAIR]]#1
+// CHECK: return %[[RES]]
+
+// -----
+
+func.func private @bar(%arg: f32) -> (i1, f32)
+
+// This test makes sure that the exit block using an iteration variable works
+// correctly.
+
+func.func @exit_loop_iter_use(%arg: f32) -> f32 {
+  cf.br ^bb0(%arg : f32)
+
+^bb0(%arg1: f32):
+  %cond, %value = call @bar(%arg1) : (f32) -> (i1, f32)
+  cf.cond_br %cond, ^bb1, ^bb0(%value : f32)
+
+^bb1:
+  return %arg1 : f32
+}
+
+// CHECK-LABEL: @exit_loop_iter_use
+// CHECK-SAME: %[[ARG:.*]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-DAG: %[[POISON:.*]] = ub.poison
+// CHECK-NEXT: %[[RES:.*]]:2 = scf.while
+// CHECK-SAME:   %[[ARG1:.*]] = %[[ARG]]
+// CHECK-SAME:   %[[ARG2:.*]] = %[[POISON]]
+// CHECK-NEXT:   %[[CALL_PAIR:.*]]:2 = func.call @bar(%[[ARG1]])
+// CHECK-NEXT:   %[[IF_RES:.*]]:3 = scf.if %[[CALL_PAIR]]#0
+// CHECK-NEXT:     scf.yield %[[POISON]], %[[C1]], %[[C0]]
+// CHECK-NEXT:   else
+// CHECK-NEXT:     scf.yield %[[CALL_PAIR]]#1, %[[C0]], %[[C1]]
+// CHECK:        %[[TRUNC:.*]] = arith.trunci %[[IF_RES]]#2
+// CHECK-NEXT:   scf.condition(%[[TRUNC]]) %[[IF_RES]]#0, %[[ARG1]]
+// CHECK: return %[[RES]]#1
+
+// -----
+
+func.func private @bar(%arg: f32) -> f32
+
+func.func @infinite_loop(%arg: f32) -> f32 {
+  cf.br ^bb1(%arg: f32)
+
+^bb1(%arg1: f32):
+  %0 = call @bar(%arg1) : (f32) -> f32
+  cf.br ^bb1(%0 : f32)
+}
+
+// CHECK-LABEL: @infinite_loop
+// CHECK-SAME: %[[ARG:.*]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-NEXT: scf.while
+// CHECK-SAME:   %[[ARG1:.*]] = %[[ARG]]
+// CHECK-NEXT:   %[[CALL:.*]] = func.call @bar(%[[ARG1]])
+// CHECK-NEXT:   %[[TRUNC:.*]] = arith.trunci %[[C1]]
+// CHECK-NEXT:   scf.condition(%[[TRUNC]]) %[[CALL]]
+// CHECK: %[[POISON:.*]] = ub.poison
+// CHECK: return %[[POISON]]
+
+// -----
+
+func.func @multi_return() -> i32 {
+  %cond = "test.test1"() : () -> i1
+  cf.cond_br %cond, ^bb1, ^bb3
+^bb1:
+  %0 = "test.test2"() : () -> i32
+  return %0 : i32
+^bb3:
+  %1 = "test.test4"() : () -> i32
+  return %1 : i32
+}
+
+// CHECK-LABEL: func @multi_return
+// CHECK:      %[[COND:.*]] = "test.test1"()
+// CHECK-NEXT: %[[RES:.*]] = scf.if %[[COND]]
+// CHECK-NEXT:   %[[VAL2:.*]] = "test.test2"()
+// CHECK:        scf.yield %[[VAL2]]
+// CHECK-NEXT: else
+// CHECK-NEXT:   %[[VAL4:.*]] = "test.test4"()
+// CHECK:        scf.yield %[[VAL4]]
+// CHECK:      return %[[RES]]
+
+// -----
+
+func.func private @bar(%arg: f32) -> f32
+
+func.func @conditional_infinite_loop(%arg: f32, %cond: i1) -> f32 {
+  cf.cond_br %cond, ^bb1(%arg: f32), ^bb2
+
+^bb1(%arg1: f32):
+  %0 = call @bar(%arg1) : (f32) -> f32
+  cf.br ^bb1(%0 : f32)
+
+^bb2:
+  return %arg : f32
+}
+
+// CHECK-LABEL: @conditional_infinite_loop
+// CHECK-SAME: %[[ARG0:[[:alnum:]]+]]:
+// CHECK-SAME: %[[ARG1:[[:alnum:]]+]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-NEXT: %[[RES:.*]] = scf.if %[[ARG1]]
+// CHECK-NEXT:   scf.while
+// CHECK-SAME:     %[[ARG2:.*]] = %[[ARG0]]
+// CHECK-NEXT:     %[[CALL:.*]] = func.call @bar(%[[ARG2]])
+// CHECK-NEXT:     %[[TRUNC:.*]] = arith.trunci %[[C1]]
+// CHECK-NEXT:     scf.condition(%[[TRUNC]]) %[[CALL]]
+// CHECK:      else
+// CHECK:        scf.yield %[[ARG0]]
+// CHECK:      return %[[RES]]
+
+// -----
+
+// Different return-like terminators lead one control flow op remaining in the top level region.
+// Each of the blocks the control flow op leads to are transformed into regions nevertheless.
+
+func.func private @bar(%arg: i32)
+
+func.func @mixing_return_like(%cond: i1, %cond2: i1, %cond3: i1) {
+  %0 = arith.constant 0 : i32
+  %1 = arith.constant 1 : i32
+  cf.cond_br %cond, ^bb1, ^bb3
+
+^bb1:
+  cf.cond_br %cond2, ^bb2(%0 : i32), ^bb2(%1 : i32)
+^bb2(%arg: i32):
+  call @bar(%arg) : (i32) -> ()
+  "test.returnLike"() : () -> ()
+
+^bb3:
+  cf.cond_br %cond3, ^bb4(%1 : i32), ^bb4(%0 : i32)
+^bb4(%arg2: i32):
+  call @bar(%arg2) : (i32) -> ()
+  return
+}
+
+// CHECK-LABEL: @mixing_return_like
+// CHECK-SAME: %[[COND1:[[:alnum:]]+]]:
+// CHECK-SAME: %[[COND2:[[:alnum:]]+]]:
+// CHECK-SAME: %[[COND3:[[:alnum:]]+]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK: cf.cond_br %[[COND1]], ^[[BB1:.*]], ^[[BB2:[[:alnum:]]+]]
+
+// CHECK: ^[[BB1]]:
+// CHECK: scf.if
+// CHECK-NOT: cf.{{(switch|(cond_)?br)}}
+// CHECK: call @bar
+// CHECK: "test.returnLike"
+
+// CHECK: ^[[BB2]]:
+// CHECK: scf.if
+// CHECK-NOT: cf.{{(switch|(cond_)?br)}}
+// CHECK: call @bar
+// CHECK: return
+
+// -----
+
+// cf.switch here only has some successors with different return-like ops.
+// This test makes sure that if there are at least two successors branching to
+// the same region, that this region gets properly turned to structured control
+// flow.
+
+func.func @some_successors_with_different_return(%flag: i32) -> i32 {
+  %0 = arith.constant 5 : i32
+  %1 = arith.constant 6 : i32
+  cf.switch %flag : i32, [
+    default: ^bb1,
+    0: ^bb3(%0 : i32),
+    1: ^bb3(%1 : i32)
+  ]
+
+^bb1:
+  "test.returnLike"() : () -> ()
+
+^bb3(%arg: i32):
+  cf.br ^bb3(%arg : i32)
+}
+
+// CHECK-LABEL: @some_successors_with_different_return
+// CHECK-SAME: %[[FLAG:[[:alnum:]]+]]:
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK-DAG: %[[POISON:.*]] = ub.poison
+// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : i32
+// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : i32
+// CHECK:      %[[INDEX_CAST:.*]] = arith.index_castui %[[FLAG]]
+// CHECK-NEXT: %[[INDEX_SWITCH:.*]]:2 = scf.index_switch %[[INDEX_CAST]]
+// CHECK:      case 0
+// CHECK-NEXT:   scf.yield %[[C5]], %[[C1]]
+// CHECK:      case 1
+// CHECK-NEXT:   scf.yield %[[C6]], %[[C1]]
+// CHECK:      default
+// CHECK-NEXT:   scf.yield %[[POISON]], %[[C0]]
+// CHECK:      cf.switch %[[INDEX_SWITCH]]#1
+// CHECK-NEXT: default: ^[[BB2:[[:alnum:]]+]]
+// CHECK-SAME: %[[INDEX_SWITCH]]#0
+// CHECK-NEXT: 0: ^[[BB1:[[:alnum:]]+]]
+// CHECK-NEXT: ]
+
+// CHECK: ^[[BB2]]{{.*}}:
+// CHECK: scf.while
+// CHECK-NOT: cf.{{(switch|(cond_)?br)}}
+// CHECK: return
+
+// CHECK: ^[[BB1]]:
+// CHECK-NEXT: "test.returnLike"
+
+// -----
+
+func.func @select_like(%cond: i1) -> i32 {
+  %0 = arith.constant 0 : i32
+  %1 = arith.constant 1 : i32
+  cf.cond_br %cond, ^bb0(%0 : i32), ^bb0(%1 : i32)
+^bb0(%arg: i32):
+  return %arg : i32
+}
+
+// CHECK-LABEL: func @select_like
+// CHECK-SAME: %[[COND:.*]]: i1
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-NEXT: %[[RES:.*]] = scf.if %[[COND]]
+// CHECK-NEXT:   scf.yield %[[C0]]
+// CHECK-NEXT: else
+// CHECK-NEXT:   scf.yield %[[C1]]
+// CHECK:      return %[[RES]]
+
+// -----
+
+func.func @return_like_dominated_by_condition(%cond1: i1, %cond2: i1, %cond3: i1) {
+  cf.cond_br %cond1, ^bb1, ^bb2
+
+^bb1:
+  return
+
+^bb2:
+  cf.cond_br %cond2, ^bb3, ^bb4
+
+^bb3:
+  "test.unreachable"() : () -> ()
+
+^bb4:
+  cf.cond_br %cond3, ^bb3, ^bb5
+
+^bb5:
+  return
+}
+
+// CHECK-LABEL: func @return_like_dominated_by_condition
+// CHECK-SAME: %[[COND1:[[:alnum:]]+]]
+// CHECK-SAME: %[[COND2:[[:alnum:]]+]]
+// CHECK-SAME: %[[COND3:[[:alnum:]]+]]
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK: %[[IF:.*]] = scf.if %[[COND1]]
+// CHECK-NEXT:   scf.yield
+// CHECK: else
+// CHECK:   scf.if %[[COND2]]
+// CHECK-NEXT:   scf.yield
+// CHECK:   else
+// CHECK:      scf.if %[[COND3]]
+// CHECK-NEXT:    scf.yield
+// CHECK-NEXT: else
+// CHECK-NEXT:    scf.yield
+
+// CHECK: cf.switch %[[IF]]
+// CHECK-NEXT: default: ^[[BB1:.*]],
+// CHECK-NEXT: 0: ^[[BB2:[[:alnum:]]+]]
+
+// CHECK: ^[[BB2]]:
+// CHECK-NEXT: return
+
+// CHECK: ^[[BB1]]:
+// CHECK-NEXT: "test.unreachable"
+
+// -----
+
+func.func @dominator_issue(%cond: i1, %cond2: i1) -> i32 {
+  cf.cond_br %cond, ^bb1, ^bb2
+
+^bb1:
+  "test.unreachable"() : () -> ()
+
+^bb2:
+  %value = "test.def"() : () -> i32
+  cf.cond_br %cond2, ^bb1, ^bb4
+
+^bb4:
+  return %value : i32
+}
+
+// CHECK-LABEL: func @dominator_issue
+// CHECK-SAME: %[[COND1:[[:alnum:]]+]]
+// CHECK-SAME: %[[COND2:[[:alnum:]]+]]
+// CHECK: %[[IF:.*]]:2 = scf.if %[[COND1]]
+// CHECK: else
+// CHECK:   %[[VALUE:.*]] = "test.def"
+// CHECK:   %[[IF_RES:.*]]:2 = scf.if %[[COND2]]
+// CHECK:   else
+// CHECK-NEXT: scf.yield %[[VALUE]]
+// CHECK:   scf.yield %[[IF_RES]]#0
+// CHECK: cf.switch %[[IF]]#1
+// CHECK-NEXT: default: ^[[BB2:[[:alnum:]]+]](
+// CHECK-SAME: %[[IF]]#0
+// CHECK-NEXT: 0: ^[[BB1:[[:alnum:]]+]]
+// CHECK: ^[[BB1]]:
+// CHECK-NEXT: "test.unreachable"
+// CHECK: ^[[BB2]]
+// CHECK-SAME: %[[ARG:[[:alnum:]]+]]
+// CHECK-NEXT: return %[[ARG]]
+
+// -----
+
+// Test that %value gets properly passed to ^bb4.
+
+func.func private @bar(i32)
+
+func.func @dominator_issue_loop(%cond: i1, %cond2: i1) -> i32 {
+  %0 = arith.constant 5 : i32
+  cf.br ^bb0
+
+^bb0:
+  cf.cond_br %cond, ^bb1, ^bb3
+
+^bb1:
+  %value = "test.def"() : () -> i32
+  cf.cond_br %cond2, ^bb0, ^bb4
+
+^bb3:
+  return %0 : i32
+
+^bb4:
+  return %value : i32
+}
+
+// CHECK-LABEL: func @dominator_issue_loop
+// CHECK-SAME: %[[COND1:[[:alnum:]]+]]
+// CHECK-SAME: %[[COND2:[[:alnum:]]+]]
+// CHECK: %[[WHILE:.*]]:2 = scf.while
+// CHECK:   %[[IF:.*]]:3 = scf.if %[[COND1]]
+// CHECK:     %[[DEF:.*]] = "test.def"
+// CHECK:     %[[IF2:.*]]:3 = scf.if %[[COND2]]
+// CHECK:       scf.yield
+// CHECK-SAME:   %[[DEF]]
+// CHECK:     else
+// CHECK:       scf.yield %{{.*}}, %{{.*}}, %[[DEF]]
+// CHECK:     scf.yield %[[IF2]]#0, %[[IF2]]#1, %[[IF2]]#2
+// CHECK:   scf.condition(%{{.*}}) %[[IF]]#2, %[[IF]]#0
+
+// CHECK: %[[SWITCH:.*]] = scf.index_switch
+// CHECK:   scf.yield %[[WHILE]]#0
+// CHECK: return %[[SWITCH]]
+
+// -----
+
+// Multi entry loops generally produce code in dire need of canonicalization.
+
+func.func private @comp1(%arg: i32) -> i1
+func.func private @comp2(%arg: i32) -> i1
+func.func private @foo(%arg: i32)
+
+func.func @multi_entry_loop(%cond: i1) {
+  %0 = arith.constant 6 : i32
+  %1 = arith.constant 5 : i32
+  cf.cond_br %cond, ^bb0, ^bb1
+
+^bb0:
+  %exit = call @comp1(%0) : (i32) -> i1
+  cf.cond_br %exit, ^bb2(%0 : i32), ^bb1
+
+^bb1:
+  %exit2 = call @comp2(%1) : (i32) -> i1
+  cf.cond_br %exit2, ^bb2(%1 : i32), ^bb0
+
+^bb2(%arg3 : i32):
+  call @foo(%arg3) : (i32) -> ()
+  return
+}
+
+// CHECK-LABEL: func @multi_entry_loop
+// CHECK-SAME: %[[ARG0:[[:alnum:]]+]]
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0
+// CHECK-DAG: %[[UB:.*]] = ub.poison
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1
+// CHECK-DAG: %[[C6:.*]] = arith.constant 6
+// CHECK-DAG: %[[C5:.*]] = arith.constant 5
+// CHECK:      %[[IF:.*]]:{{.*}} = scf.if %[[ARG0]]
+// CHECK-NEXT:   scf.yield %[[C1]], %[[UB]]
+// CHECK-NEXT: else
+// CHECK-NEXT:   scf.yield %[[C0]], %[[UB]]
+// CHECK:      %[[WHILE:.*]]:{{.*}} = scf.while
+// CHECK-SAME: %[[ARG1:.*]] = %[[IF]]#0
+// CHECK-SAME: %[[ARG2:.*]] = %[[IF]]#1
+// CHECK-NEXT:   %[[FLAG:.*]] = arith.index_castui %[[ARG1]]
+// CHECK-NEXT:   %[[SWITCH:.*]]:{{.*}} = scf.index_switch %[[FLAG]]
+// CHECK-NEXT:   case 0
+// CHECK-NEXT:     %[[EXIT:.*]] = func.call @comp2(%[[C5]])
+// CHECK-NEXT:     %[[IF_RES:.*]]:{{.*}} = scf.if %[[EXIT]]
+// CHECK-NEXT:       scf.yield %[[UB]], %[[C5]], %[[C1]], %[[C0]]
+// CHECK-NEXT:     else
+// CHECK-NEXT:       scf.yield %[[C1]], %[[UB]], %[[C0]], %[[C1]]
+// CHECK:          scf.yield %[[IF_RES]]#0, %[[IF_RES]]#1, %[[IF_RES]]#2, %[[IF_RES]]#3
+// CHECK:        default
+// CHECK-NEXT:     %[[EXIT:.*]] = func.call @comp1(%[[C6]])
+// CHECK-NEXT:     %[[IF_RES:.*]]:{{.*}} = scf.if %[[EXIT]]
+// CHECK-NEXT:       scf.yield %[[UB]], %[[C6]], %[[C1]], %[[C0]]
+// CHECK-NEXT:     else
+// CHECK-NEXT:       scf.yield %[[C0]], %[[UB]], %[[C0]], %[[C1]]
+// CHECK:          scf.yield %[[IF_RES]]#0, %[[IF_RES]]#1, %[[IF_RES]]#2, %[[IF_RES]]#3
+// CHECK:        %[[COND:.*]] = arith.trunci %[[SWITCH]]#3
+// CHECK-NEXT:   scf.condition(%[[COND]]) %[[SWITCH]]#0, %[[SWITCH]]#1
+// CHECK:      do
+// CHECK:        scf.yield
+// CHECK:      call @foo(%[[WHILE]]#1)
+// CHECK-NEXT: return