diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -83,6 +83,9 @@
     return valueMapping.lookup(value);
   }
 
+  /// Looks up remapped a list of remapped values.
+  SmallVector<llvm::Value *> lookupValues(ValueRange values);
+
   /// Stores the mapping between an MLIR block and LLVM IR basic block.
   void mapBlock(Block *mlir, llvm::BasicBlock *llvm) {
     auto result = blockMapping.try_emplace(mlir, llvm);
@@ -110,6 +113,10 @@
     return branchMapping.lookup(op);
   }
 
+  /// Removes the mapping for blocks contained in the region and values defined
+  /// in these blocks.
+  void forgetMapping(Region &region);
+
   /// Returns the LLVM metadata corresponding to a reference to an mlir LLVM
   /// dialect access group operation.
   llvm::MDNode *getAccessGroup(Operation &opInst,
@@ -142,9 +149,6 @@
   /// Converts the type from MLIR LLVM dialect to LLVM.
   llvm::Type *convertType(Type type);
 
-  /// Looks up remapped a list of remapped values.
-  SmallVector<llvm::Value *, 8> lookupValues(ValueRange values);
-
   /// Returns the MLIR context of the module being translated.
   MLIRContext &getContext() { return *mlirModule->getContext(); }
 
@@ -217,7 +221,7 @@
   /// translated makes the frame available when translating ops within that
   /// region.
   template <typename T, typename... Args>
-  void stackPush(Args &&... args) {
+  void stackPush(Args &&...args) {
     static_assert(
         std::is_base_of<StackFrame, T>::value,
         "can only push instances of StackFrame on ModuleTranslation stack");
diff --git a/mlir/lib/Target/LLVMIR/CMakeLists.txt b/mlir/lib/Target/LLVMIR/CMakeLists.txt
--- a/mlir/lib/Target/LLVMIR/CMakeLists.txt
+++ b/mlir/lib/Target/LLVMIR/CMakeLists.txt
@@ -28,8 +28,6 @@
 
   LINK_LIBS PUBLIC
   MLIRLLVMIR
-  MLIROpenACC
-  MLIROpenMP
   MLIRLLVMIRTransforms
   MLIRTranslation
   )
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
@@ -33,6 +34,19 @@
       : allocaInsertPoint(allocaIP) {}
   llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
 };
+
+/// ModuleTranslation stack frame containing the partial mapping between MLIR
+/// values and their LLVM IR equivalents.
+class OpenMPVarMappingStackFrame
+    : public LLVM::ModuleTranslation::StackFrameBase<
+          OpenMPVarMappingStackFrame> {
+public:
+  explicit OpenMPVarMappingStackFrame(
+      const DenseMap<Value, llvm::Value *> &mapping)
+      : mapping(mapping) {}
+
+  DenseMap<Value, llvm::Value *> mapping;
+};
 } // namespace
 
 /// Find the insertion point for allocas given the current insertion point for
@@ -62,22 +76,66 @@
 /// Converts the given region that appears within an OpenMP dialect operation to
 /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
 /// region, and a branch from any block with an successor-less OpenMP terminator
-/// to `continuationBlock`.
-static void convertOmpOpRegions(Region &region, StringRef blockName,
-                                llvm::BasicBlock &sourceBlock,
-                                llvm::BasicBlock &continuationBlock,
-                                llvm::IRBuilderBase &builder,
-                                LLVM::ModuleTranslation &moduleTranslation,
-                                LogicalResult &bodyGenStatus) {
+/// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
+/// of the continuation block if provided.
+static void convertOmpOpRegions(
+    Region &region, StringRef blockName, llvm::BasicBlock &sourceBlock,
+    llvm::BasicBlock &continuationBlock, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
+    SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
   llvm::LLVMContext &llvmContext = builder.getContext();
   for (Block &bb : region) {
     llvm::BasicBlock *llvmBB = llvm::BasicBlock::Create(
-        llvmContext, blockName, builder.GetInsertBlock()->getParent());
+        llvmContext, blockName, builder.GetInsertBlock()->getParent(),
+        builder.GetInsertBlock()->getNextNode());
     moduleTranslation.mapBlock(&bb, llvmBB);
   }
 
   llvm::Instruction *sourceTerminator = sourceBlock.getTerminator();
 
+  // Terminators (namely YieldOp) may be forwarding values to the region that
+  // need to be available in the continuation block. Collect the types of these
+  // operands in preparation of creating PHI nodes.
+  SmallVector<llvm::Type *> continuationBlockPHITypes;
+  bool operandsProcessed = false;
+  unsigned numYields = 0;
+  for (Block &bb : region.getBlocks()) {
+    if (omp::YieldOp yield = dyn_cast<omp::YieldOp>(bb.getTerminator())) {
+      if (!operandsProcessed) {
+        for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
+          continuationBlockPHITypes.push_back(
+              moduleTranslation.convertType(yield->getOperand(i).getType()));
+        }
+        operandsProcessed = true;
+      } else {
+        assert(continuationBlockPHITypes.size() == yield->getNumOperands() &&
+               "mismatching number of values yielded from the region");
+        for (unsigned i = 0, e = yield->getNumOperands(); i < e; ++i) {
+          llvm::Type *operandType =
+              moduleTranslation.convertType(yield->getOperand(i).getType());
+          (void)operandType;
+          assert(continuationBlockPHITypes[i] == operandType &&
+                 "values of mismatching types yielded from the region");
+        }
+      }
+      numYields++;
+    }
+  }
+
+  // Insert PHI nodes in the continuation block for any values forwarded by the
+  // terminators in this region.
+  if (!continuationBlockPHITypes.empty())
+    assert(
+        continuationBlockPHIs &&
+        "expected continuation block PHIs if converted regions yield values");
+  if (continuationBlockPHIs) {
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    continuationBlockPHIs->reserve(continuationBlockPHITypes.size());
+    builder.SetInsertPoint(&continuationBlock, continuationBlock.begin());
+    for (llvm::Type *ty : continuationBlockPHITypes)
+      continuationBlockPHIs->push_back(builder.CreatePHI(ty, numYields));
+  }
+
   // Convert blocks one by one in topological order to ensure
   // defs are converted before uses.
   SetVector<Block *> blocks =
@@ -108,12 +166,24 @@
     // ModuleTranslation class to set up the correct insertion point. This is
     // also consistent with MLIR's idiom of handling special region terminators
     // in the same code that handles the region-owning operation.
-    if (isa<omp::TerminatorOp, omp::YieldOp>(bb->getTerminator()))
+    Operation *terminator = bb->getTerminator();
+    if (isa<omp::TerminatorOp, omp::YieldOp>(terminator)) {
       builder.CreateBr(&continuationBlock);
+
+      for (unsigned i = 0, e = terminator->getNumOperands(); i < e; ++i)
+        (*continuationBlockPHIs)[i]->addIncoming(
+            moduleTranslation.lookupValue(terminator->getOperand(i)), llvmBB);
+    }
   }
-  // Finally, after all blocks have been traversed and values mapped,
-  // connect the PHI nodes to the results of preceding blocks.
+  // After all blocks have been traversed and values mapped, connect the PHI
+  // nodes to the results of preceding blocks.
   LLVM::detail::connectPHINodes(region, moduleTranslation);
+
+  // Remove the blocks and values defined in this region from the mapping since
+  // they are not visible outside of this region. This allows the same region to
+  // be converted several times, that is cloned, without clashes, and slightly
+  // speeds up the lookups.
+  moduleTranslation.forgetMapping(region);
 }
 
 /// Converts the OpenMP parallel operation to LLVM IR.
@@ -243,6 +313,167 @@
   return success();
 }
 
+/// Returns a reduction declaration that corresponds to the given reduction
+/// operation in the given container. Currently only supports reductions inside
+/// WsLoopOp but can be easily extended.
+static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,
+                                                 omp::ReductionOp reduction) {
+  SymbolRefAttr reductionSymbol;
+  for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {
+    if (container.reduction_vars()[i] != reduction.accumulator())
+      continue;
+    reductionSymbol = (*container.reductions())[i].cast<SymbolRefAttr>();
+    break;
+  }
+  assert(reductionSymbol &&
+         "reduction operation must be associated with a declaration");
+
+  return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
+      container, reductionSymbol);
+}
+
+/// Populates `reductions` with reduction declarations used in the given loop.
+static void
+collectReductionDecls(omp::WsLoopOp loop,
+                      SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {
+  Optional<ArrayAttr> attr = loop.reductions();
+  if (!attr)
+    return;
+
+  reductions.reserve(reductions.size() + loop.getNumReductionVars());
+  for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {
+    reductions.push_back(
+        SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(
+            loop, symbolRef));
+  }
+}
+
+/// Translates the blocks contained in the given region and appends them to at
+/// the current insertion point of `builder`. The operations of the entry block
+/// are appended to the current insertion block, which is not expected to have a
+/// terminator. If set, `continuationBlockArgs` is populated with translated
+/// values that correspond to the values omp.yield'ed from the region.
+static LogicalResult inlineConvertOmpRegions(
+    Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation,
+    SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {
+  if (region.empty())
+    return success();
+
+  // Special case for single-block regions that don't create additional blocks:
+  // insert operations without creating additional blocks.
+  if (llvm::hasSingleElement(region)) {
+    moduleTranslation.mapBlock(&region.front(), builder.GetInsertBlock());
+    if (failed(moduleTranslation.convertBlock(
+            region.front(), /*ignoreArguments=*/true, builder)))
+      return failure();
+
+    // The continuation arguments are simply the translated terminator operands.
+    if (continuationBlockArgs)
+      llvm::append_range(
+          *continuationBlockArgs,
+          moduleTranslation.lookupValues(region.front().back().getOperands()));
+
+    // Drop the mapping that is no longer necessary so that the same region can
+    // be processed multiple times.
+    moduleTranslation.forgetMapping(region);
+    return success();
+  }
+
+  // Create the continuation block manually instead of calling splitBlock
+  // because the current insertion block may not have a terminator.
+  llvm::BasicBlock *continuationBlock =
+      llvm::BasicBlock::Create(builder.getContext(), blockName + ".cont",
+                               builder.GetInsertBlock()->getParent(),
+                               builder.GetInsertBlock()->getNextNode());
+  builder.CreateBr(continuationBlock);
+
+  LogicalResult bodyGenStatus = success();
+  SmallVector<llvm::PHINode *> phis;
+  convertOmpOpRegions(region, blockName, *builder.GetInsertBlock(),
+                      *continuationBlock, builder, moduleTranslation,
+                      bodyGenStatus, &phis);
+  if (failed(bodyGenStatus))
+    return failure();
+  if (continuationBlockArgs)
+    llvm::append_range(*continuationBlockArgs, phis);
+  builder.SetInsertPoint(continuationBlock,
+                         continuationBlock->getFirstInsertionPt());
+  return success();
+}
+
+namespace {
+/// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
+/// store lambdas with capture.
+using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
+    llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
+    llvm::Value *&)>;
+using OwningAtomicReductionGen =
+    std::function<llvm::OpenMPIRBuilder::InsertPointTy(
+        llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *)>;
+} // namespace
+
+/// Create an OpenMPIRBuilder-compatible reduction generator for the given
+/// reduction declaration. The generator uses `builder` but ignores its
+/// insertion point.
+static OwningReductionGen
+makeReductionGen(omp::ReductionDeclareOp decl, llvm::IRBuilderBase &builder,
+                 LLVM::ModuleTranslation &moduleTranslation) {
+  // The lambda is mutable because we need access to non-const methods of decl
+  // (which aren't actually mutating it), and we must capture decl by-value to
+  // avoid the dangling reference after the parent function returns.
+  OwningReductionGen gen =
+      [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
+                llvm::Value *lhs, llvm::Value *rhs,
+                llvm::Value *&result) mutable {
+        Region &reductionRegion = decl.reductionRegion();
+        moduleTranslation.mapValue(reductionRegion.front().getArgument(0), lhs);
+        moduleTranslation.mapValue(reductionRegion.front().getArgument(1), rhs);
+        builder.restoreIP(insertPoint);
+        SmallVector<llvm::Value *> phis;
+        if (failed(inlineConvertOmpRegions(reductionRegion,
+                                           "omp.reduction.nonatomic.body",
+                                           builder, moduleTranslation, &phis)))
+          return llvm::OpenMPIRBuilder::InsertPointTy();
+        assert(phis.size() == 1);
+        result = phis[0];
+        return builder.saveIP();
+      };
+  return gen;
+}
+
+/// Create an OpenMPIRBuilder-compatible atomic reduction generator for the
+/// given reduction declaration. The generator uses `builder` but ignores its
+/// insertion point. Returns null if there is no atomic region available in the
+/// reduction declaration.
+static OwningAtomicReductionGen
+makeAtomicReductionGen(omp::ReductionDeclareOp decl,
+                       llvm::IRBuilderBase &builder,
+                       LLVM::ModuleTranslation &moduleTranslation) {
+  if (decl.atomicReductionRegion().empty())
+    return OwningAtomicReductionGen();
+
+  // The lambda is mutable because we need access to non-const methods of decl
+  // (which aren't actually mutating it), and we must capture decl by-value to
+  // avoid the dangling reference after the parent function returns.
+  OwningAtomicReductionGen atomicGen =
+      [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
+                llvm::Value *lhs, llvm::Value *rhs) mutable {
+        Region &atomicRegion = decl.atomicReductionRegion();
+        moduleTranslation.mapValue(atomicRegion.front().getArgument(0), lhs);
+        moduleTranslation.mapValue(atomicRegion.front().getArgument(1), rhs);
+        builder.restoreIP(insertPoint);
+        SmallVector<llvm::Value *> phis;
+        if (failed(inlineConvertOmpRegions(atomicRegion,
+                                           "omp.reduction.atomic.body", builder,
+                                           moduleTranslation, &phis)))
+          return llvm::OpenMPIRBuilder::InsertPointTy();
+        assert(phis.empty());
+        return builder.saveIP();
+      };
+  return atomicGen;
+}
+
 /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -258,6 +489,57 @@
     schedule =
         *omp::symbolizeClauseScheduleKind(loop.schedule_val().getValue());
 
+  // Find the loop configuration.
+  llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]);
+  llvm::Type *ivType = step->getType();
+  llvm::Value *chunk =
+      loop.schedule_chunk_var()
+          ? moduleTranslation.lookupValue(loop.schedule_chunk_var())
+          : llvm::ConstantInt::get(ivType, 1);
+
+  SmallVector<omp::ReductionDeclareOp> reductionDecls;
+  collectReductionDecls(loop, reductionDecls);
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+
+  // Allocate space for privatized reduction variables.
+  SmallVector<llvm::Value *> privateReductionVariables;
+  DenseMap<Value, llvm::Value *> reductionVariableMap;
+  unsigned numReductions = loop.getNumReductionVars();
+  privateReductionVariables.reserve(numReductions);
+  if (numReductions != 0) {
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.restoreIP(allocaIP);
+    for (unsigned i = 0; i < numReductions; ++i) {
+      auto reductionType =
+          loop.reduction_vars()[i].getType().cast<LLVM::LLVMPointerType>();
+      llvm::Value *var = builder.CreateAlloca(
+          moduleTranslation.convertType(reductionType.getElementType()));
+      privateReductionVariables.push_back(var);
+      reductionVariableMap.try_emplace(loop.reduction_vars()[i], var);
+    }
+  }
+
+  // Store the mapping between reduction variables and their private copies on
+  // ModuleTranslation stack. It can be then recovered when translating
+  // omp.reduce operations in a separate call.
+  LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
+      moduleTranslation, reductionVariableMap);
+
+  // Before the loop, store the initial values of reductions into reduction
+  // variables. Although this could be done after allocas, we don't want to mess
+  // up with the alloca insertion point.
+  for (unsigned i = 0; i < numReductions; ++i) {
+    SmallVector<llvm::Value *> phis;
+    if (failed(inlineConvertOmpRegions(reductionDecls[i].initializerRegion(),
+                                       "omp.reduction.neutral", builder,
+                                       moduleTranslation, &phis)))
+      return failure();
+    assert(phis.size() == 1 && "expected one value to be yielded from the "
+                               "reduction neutral element declaration region");
+    builder.CreateStore(phis[0], privateReductionVariables[i]);
+  }
+
   // Set up the source location value for OpenMP runtime.
   llvm::DISubprogram *subprogram =
       builder.GetInsertBlock()->getParent()->getSubprogram();
@@ -329,14 +611,7 @@
   llvm::CanonicalLoopInfo *loopInfo =
       ompBuilder->collapseLoops(diLoc, loopInfos, {});
 
-  // Find the loop configuration.
-  llvm::Type *ivType = loopInfo->getIndVar()->getType();
-  llvm::Value *chunk =
-      loop.schedule_chunk_var()
-          ? moduleTranslation.lookupValue(loop.schedule_chunk_var())
-          : llvm::ConstantInt::get(ivType, 1);
-  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
-      findAllocaInsertPoint(builder, moduleTranslation);
+  allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
   if (schedule == omp::ClauseScheduleKind::Static) {
     ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
                                          !loop.nowait(), chunk);
@@ -369,6 +644,98 @@
   // potential further loop transformations. Use the insertion point stored
   // before collapsing loops instead.
   builder.restoreIP(afterIP);
+
+  // Process the reductions if required.
+  if (numReductions == 0)
+    return success();
+
+  // Create the reduction generators. We need to own them here because
+  // ReductionInfo only accepts references to the generators.
+  SmallVector<OwningReductionGen> owningReductionGens;
+  SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
+  for (unsigned i = 0; i < numReductions; ++i) {
+    owningReductionGens.push_back(
+        makeReductionGen(reductionDecls[i], builder, moduleTranslation));
+    owningAtomicReductionGens.push_back(
+        makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));
+  }
+
+  // Collect the reduction information.
+  SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
+  reductionInfos.reserve(numReductions);
+  for (unsigned i = 0; i < numReductions; ++i) {
+    llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;
+    if (owningAtomicReductionGens[i])
+      atomicGen = owningAtomicReductionGens[i];
+    reductionInfos.push_back(
+        {moduleTranslation.lookupValue(loop.reduction_vars()[i]),
+         privateReductionVariables[i], owningReductionGens[i], atomicGen});
+  }
+
+  // The call to createReductions below expects the block to have a
+  // terminator. Create an unreachable instruction to serve as terminator
+  // and remove it later.
+  llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
+  builder.SetInsertPoint(tempTerminator);
+  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
+      ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
+                                   loop.nowait());
+  if (!contInsertPoint.getBlock())
+    return loop->emitOpError() << "failed to convert reductions";
+  auto nextInsertionPoint =
+      ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
+  tempTerminator->eraseFromParent();
+  builder.restoreIP(nextInsertionPoint);
+
+  return success();
+}
+
+/// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the
+/// mapping between reduction variables and their private equivalents to have
+/// been stored on the ModuleTranslation stack. Currently only supports
+/// reduction within WsLoopOp, but can be easily extended.
+static LogicalResult
+convertOmpReductionOp(omp::ReductionOp reductionOp,
+                      llvm::IRBuilderBase &builder,
+                      LLVM::ModuleTranslation &moduleTranslation) {
+  // Find the declaration that corresponds to the reduction op.
+  auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();
+  omp::ReductionDeclareOp declaration =
+      findReductionDecl(reductionContainer, reductionOp);
+  assert(declaration && "could not find reduction declaration");
+
+  // Retrieve the mapping between reduction variables and their private
+  // equivalents.
+  const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;
+  moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(
+      [&](const OpenMPVarMappingStackFrame &frame) {
+        reductionVariableMap = &frame.mapping;
+        return WalkResult::interrupt();
+      });
+  assert(reductionVariableMap && "couldn't find private reduction variables");
+
+  // Translate the reduction operation by emitting the body of the corresponding
+  // reduction declaration.
+  Region &reductionRegion = declaration.reductionRegion();
+  llvm::Value *privateReductionVar =
+      reductionVariableMap->lookup(reductionOp.accumulator());
+  llvm::Value *reductionVal = builder.CreateLoad(
+      moduleTranslation.convertType(reductionOp.operand().getType()),
+      privateReductionVar);
+
+  moduleTranslation.mapValue(reductionRegion.front().getArgument(0),
+                             reductionVal);
+  moduleTranslation.mapValue(
+      reductionRegion.front().getArgument(1),
+      moduleTranslation.lookupValue(reductionOp.operand()));
+
+  SmallVector<llvm::Value *> phis;
+  if (failed(inlineConvertOmpRegions(reductionRegion, "omp.reduction.body",
+                                     builder, moduleTranslation, &phis)))
+    return failure();
+  assert(phis.size() == 1 && "expected one value to be yielded from "
+                             "the reduction body declaration region");
+  builder.CreateStore(phis[0], privateReductionVar);
   return success();
 }
 
@@ -426,6 +793,9 @@
       .Case([&](omp::ParallelOp) {
         return convertOmpParallel(*op, builder, moduleTranslation);
       })
+      .Case([&](omp::ReductionOp reductionOp) {
+        return convertOmpReductionOp(reductionOp, builder, moduleTranslation);
+      })
       .Case([&](omp::MasterOp) {
         return convertOmpMaster(*op, builder, moduleTranslation);
       })
@@ -435,13 +805,14 @@
       .Case([&](omp::WsLoopOp) {
         return convertOmpWsLoop(*op, builder, moduleTranslation);
       })
-      .Case<omp::YieldOp, omp::TerminatorOp>([](auto op) {
-        // `yield` and `terminator` can be just omitted. The block structure was
-        // created in the function that handles their parent operation.
-        assert(op->getNumOperands() == 0 &&
-               "unexpected OpenMP terminator with operands");
-        return success();
-      })
+      .Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp>(
+          [](auto op) {
+            // `yield` and `terminator` can be just omitted. The block structure
+            // was created in the region that handles their parent operation.
+            // `reduction.declare` will be used by reductions and is not
+            // converted directly, skip it.
+            return success();
+          })
       .Default([&](Operation *inst) {
         return inst->emitError("unsupported OpenMP operation: ")
                << inst->getName();
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -224,6 +224,31 @@
     ompBuilder->finalize();
 }
 
+void ModuleTranslation::forgetMapping(Region &region) {
+  SmallVector<Region *> toProcess;
+  toProcess.push_back(&region);
+  while (!toProcess.empty()) {
+    Region *current = toProcess.pop_back_val();
+    for (Block &block : *current) {
+      blockMapping.erase(&block);
+      for (Value arg : block.getArguments())
+        valueMapping.erase(arg);
+      for (Operation &op : block) {
+        for (Value value : op.getResults())
+          valueMapping.erase(value);
+        if (op.hasSuccessors())
+          branchMapping.erase(&op);
+        if (isa<LLVM::GlobalOp>(op))
+          globalsMapping.erase(&op);
+        accessGroupMetadataMapping.erase(&op);
+        llvm::append_range(
+            toProcess,
+            llvm::map_range(op.getRegions(), [](Region &r) { return &r; }));
+      }
+    }
+  }
+}
+
 /// Get the SSA value passed to the current block from the terminator operation
 /// of its predecessor.
 static Value getPHISourceValue(Block *current, Block *pred,
@@ -686,15 +711,6 @@
   return success();
 }
 
-/// Check whether the module contains only supported ops directly in its body.
-static LogicalResult checkSupportedModuleOps(Operation *m) {
-  for (Operation &o : getModuleBody(m).getOperations())
-    if (!isa<LLVM::LLVMFuncOp, LLVM::GlobalOp, LLVM::MetadataOp>(&o) &&
-        !o.hasTrait<OpTrait::IsTerminator>())
-      return o.emitOpError("unsupported module-level operation");
-  return success();
-}
-
 LogicalResult ModuleTranslation::convertFunctionSignatures() {
   // Declare all functions first because there may be function calls that form a
   // call graph with cycles, or global initializers that reference functions.
@@ -850,10 +866,9 @@
   return typeTranslator.translateType(type);
 }
 
-/// A helper to look up remapped operands in the value remapping table.`
-SmallVector<llvm::Value *, 8>
-ModuleTranslation::lookupValues(ValueRange values) {
-  SmallVector<llvm::Value *, 8> remapped;
+/// A helper to look up remapped operands in the value remapping table.
+SmallVector<llvm::Value *> ModuleTranslation::lookupValues(ValueRange values) {
+  SmallVector<llvm::Value *> remapped;
   remapped.reserve(values.size());
   for (Value v : values)
     remapped.push_back(lookupValue(v));
@@ -900,8 +915,6 @@
                               StringRef name) {
   if (!satisfiesLLVMModule(module))
     return nullptr;
-  if (failed(checkSupportedModuleOps(module)))
-    return nullptr;
   std::unique_ptr<llvm::Module> llvmModule =
       prepareLLVMModule(module, llvmContext, name);
 
@@ -918,6 +931,17 @@
     return nullptr;
   if (failed(translator.convertFunctions()))
     return nullptr;
+
+  // Convert other top-level operations if possible.
+  llvm::IRBuilder<> llvmBuilder(llvmContext);
+  for (Operation &o : getModuleBody(module).getOperations()) {
+    if (!isa<LLVM::LLVMFuncOp, LLVM::GlobalOp, LLVM::MetadataOp>(&o) &&
+        !o.hasTrait<OpTrait::IsTerminator>() &&
+        failed(translator.convertOperation(o, llvmBuilder))) {
+      return nullptr;
+    }
+  }
+
   if (llvm::verifyModule(*translator.llvmModule, &llvm::errs()))
     return nullptr;
 
diff --git a/mlir/test/Target/LLVMIR/llvmir-invalid.mlir b/mlir/test/Target/LLVMIR/llvmir-invalid.mlir
--- a/mlir/test/Target/LLVMIR/llvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-invalid.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s
 
-// expected-error @+1 {{unsupported module-level operation}}
+// expected-error @+1 {{cannot be converted to LLVM IR}}
 func @foo() {
   llvm.return
 }
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@@ -0,0 +1,418 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// Only check the overall shape of the code and the presence of relevant
+// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
+  %2 = llvm.load %arg3 : !llvm.ptr<f32>
+  llvm.atomicrmw fadd %arg2, %2 monotonic : f32
+  omp.yield
+}
+
+// CHECK-LABEL: @simple_reduction
+llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
+  omp.parallel {
+    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
+    reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
+      %1 = llvm.mlir.constant(2.0 : f32) : f32
+      omp.reduction %1, %0 : !llvm.ptr<f32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE:.+]] = alloca float
+// CHECK: store float 0.000000e+00, float* %[[PRIVATE]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction.
+// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
+// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL]]
+
+// Non-atomic reduction:
+// CHECK: fadd float
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+// CHECK: call void @__kmpc_barrier
+
+// Update of the private variable using the reduction region
+// (the body block currently comes after all the other blocks).
+// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
+// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
+// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: fadd float
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
+  %2 = llvm.load %arg3 : !llvm.ptr<f32>
+  llvm.atomicrmw fadd %arg2, %2 monotonic : f32
+  omp.yield
+}
+
+// When the same reduction declaration is used several times, its regions
+// are translated several times, which shouldn't lead to value/block
+// remapping assertions.
+// CHECK-LABEL: @reuse_declaration
+llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
+  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
+  omp.parallel {
+    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
+    reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>) {
+      %1 = llvm.mlir.constant(2.0 : f32) : f32
+      omp.reduction %1, %0 : !llvm.ptr<f32>
+      omp.reduction %1, %2 : !llvm.ptr<f32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE1:.+]] = alloca float
+// CHECK: %[[PRIVATE2:.+]] = alloca float
+// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
+// CHECK: store float 0.000000e+00, float* %[[PRIVATE2]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction.
+// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
+// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL1]]
+// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
+// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL2]]
+
+// Non-atomic reduction:
+// CHECK: fadd float
+// CHECK: fadd float
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+// CHECK: call void @__kmpc_barrier
+
+// Update of the private variable using the reduction region
+// (the body block currently comes after all the other blocks).
+// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
+// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
+// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
+// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
+// CHECK: %[[UPDATED2:.+]] = fadd float %[[PARTIAL2]], 2.000000e+00
+// CHECK: store float %[[UPDATED2]], float* %[[PRIVATE2]]
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: fadd float
+// CHECK: fadd float
+
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
+  %2 = llvm.load %arg3 : !llvm.ptr<f32>
+  llvm.atomicrmw fadd %arg2, %2 monotonic : f32
+  omp.yield
+}
+
+// It's okay not to reference the reduction variable in the body.
+// CHECK-LABEL: @missing_omp_reduction
+llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
+  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
+  omp.parallel {
+    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
+    reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>) {
+      %1 = llvm.mlir.constant(2.0 : f32) : f32
+      omp.reduction %1, %0 : !llvm.ptr<f32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE1:.+]] = alloca float
+// CHECK: %[[PRIVATE2:.+]] = alloca float
+// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
+// CHECK: store float 0.000000e+00, float* %[[PRIVATE2]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction.
+// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
+// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL1]]
+// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
+// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL2]]
+
+// Non-atomic reduction:
+// CHECK: fadd float
+// CHECK: fadd float
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+// CHECK: call void @__kmpc_barrier
+
+// Update of the private variable using the reduction region
+// (the body block currently comes after all the other blocks).
+// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
+// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
+// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
+// CHECK-NOT: %{{.*}} = load float, float* %[[PRIVATE2]]
+// CHECK-NOT: %{{.*}} = fadd float %[[PARTIAL2]], 2.000000e+00
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: fadd float
+// CHECK: fadd float
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
+  %2 = llvm.load %arg3 : !llvm.ptr<f32>
+  llvm.atomicrmw fadd %arg2, %2 monotonic : f32
+  omp.yield
+}
+
+// It's okay to refer to the same reduction variable more than once in the
+// body.
+// CHECK-LABEL: @double_reference
+llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
+  omp.parallel {
+    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
+    reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
+      %1 = llvm.mlir.constant(2.0 : f32) : f32
+      omp.reduction %1, %0 : !llvm.ptr<f32>
+      omp.reduction %1, %0 : !llvm.ptr<f32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE:.+]] = alloca float
+// CHECK: store float 0.000000e+00, float* %[[PRIVATE]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction.
+// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
+// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL]]
+
+// Non-atomic reduction:
+// CHECK: fadd float
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+// CHECK: call void @__kmpc_barrier
+
+// Update of the private variable using the reduction region
+// (the body block currently comes after all the other blocks).
+// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
+// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
+// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]
+// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
+// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
+// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: fadd float
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
+  %2 = llvm.load %arg3 : !llvm.ptr<f32>
+  llvm.atomicrmw fadd %arg2, %2 monotonic : f32
+  omp.yield
+}
+
+omp.reduction.declare @mul_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(1.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fmul %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+
+// CHECK-LABEL: @no_atomic
+llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
+  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
+  omp.parallel {
+    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
+    reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @mul_f32 -> %2 : !llvm.ptr<f32>) {
+      %1 = llvm.mlir.constant(2.0 : f32) : f32
+      omp.reduction %1, %0 : !llvm.ptr<f32>
+      omp.reduction %1, %2 : !llvm.ptr<f32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE1:.+]] = alloca float
+// CHECK: %[[PRIVATE2:.+]] = alloca float
+// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
+// CHECK: store float 1.000000e+00, float* %[[PRIVATE2]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction not provided.
+// CHECK: unreachable
+
+// Non-atomic reduction:
+// CHECK: fadd float
+// CHECK: fmul float
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+// CHECK: call void @__kmpc_barrier
+
+// Update of the private variable using the reduction region
+// (the body block currently comes after all the other blocks).
+// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
+// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
+// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
+// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
+// CHECK: %[[UPDATED2:.+]] = fmul float %[[PARTIAL2]], 2.000000e+00
+// CHECK: store float %[[UPDATED2]], float* %[[PRIVATE2]]
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: fadd float
+// CHECK: fmul float