Index: flang/lib/Lower/OpenMP.cpp
===================================================================
--- flang/lib/Lower/OpenMP.cpp
+++ flang/lib/Lower/OpenMP.cpp
@@ -60,28 +60,21 @@
   return sym;
 }
 
-template <typename T>
 static void
-createPrivateVarSyms(Fortran::lower::AbstractConverter &converter,
-                     const T *clause,
-                     [[maybe_unused]] Block *lastPrivBlock = nullptr) {
-  const Fortran::parser::OmpObjectList &ompObjectList = clause->v;
-  for (const Fortran::parser::OmpObject &ompObject : ompObjectList.v) {
-    Fortran::semantics::Symbol *sym = getOmpObjectSymbol(ompObject);
-    // Privatization for symbols which are pre-determined (like loop index
-    // variables) happen separately, for everything else privatize here.
-    if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined))
-      continue;
-    bool success = converter.createHostAssociateVarClone(*sym);
-    (void)success;
-    assert(success && "Privatization failed due to existing binding");
-    if constexpr (std::is_same_v<T, Fortran::parser::OmpClause::Firstprivate>) {
-      converter.copyHostAssociateVar(*sym);
-    } else if constexpr (std::is_same_v<
-                             T, Fortran::parser::OmpClause::Lastprivate>) {
-      converter.copyHostAssociateVar(*sym, lastPrivBlock);
-    }
-  }
+privatizeSymbol(Fortran::lower::AbstractConverter &converter,
+                const Fortran::semantics::Symbol *sym,
+                [[maybe_unused]] mlir::Block *lastPrivBlock = nullptr) {
+  // Privatization for symbols which are pre-determined (like loop index
+  // variables) happen separately, for everything else privatize here.
+  if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined))
+    return;
+  bool success = converter.createHostAssociateVarClone(*sym);
+  (void)success;
+  assert(success && "Privatization failed due to existing binding");
+  if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate))
+    converter.copyHostAssociateVar(*sym);
+  if (sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate))
+    converter.copyHostAssociateVar(*sym, lastPrivBlock);
 }
 
 template <typename Op>
@@ -90,10 +83,7 @@
                           Fortran::lower::pft::Evaluation &eval) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   auto insPt = firOpBuilder.saveInsertionPoint();
-  firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
-  bool hasFirstPrivateOp = false;
-  bool hasLastPrivateOp = false;
-  // Symbols in private and/or firstprivate clauses.
+  // Symbols in private, firstprivate, and/or lastprivate clauses.
   llvm::SetVector<const Fortran::semantics::Symbol *> privatizedSymbols;
   auto collectOmpObjectListSymbol =
       [&](const Fortran::parser::OmpObjectList &ompObjectList,
@@ -105,7 +95,8 @@
       };
   // We need just one ICmpOp for multiple LastPrivate clauses.
   mlir::arith::CmpIOp cmpOp;
-
+  mlir::Block *lastPrivBlock = nullptr;
+  bool hasLastPrivateOp = false;
   for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
     if (const auto &privateClause =
             std::get_if<Fortran::parser::OmpClause::Private>(&clause.u)) {
@@ -114,15 +105,14 @@
                    std::get_if<Fortran::parser::OmpClause::Firstprivate>(
                        &clause.u)) {
       collectOmpObjectListSymbol(firstPrivateClause->v, privatizedSymbols);
-      hasFirstPrivateOp = true;
     } else if (const auto &lastPrivateClause =
                    std::get_if<Fortran::parser::OmpClause::Lastprivate>(
                        &clause.u)) {
       // TODO: Add lastprivate support for sections construct, simd construct
       if (std::is_same_v<Op, omp::WsLoopOp>) {
         omp::WsLoopOp *wsLoopOp = dyn_cast<omp::WsLoopOp>(&op);
-        fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-        auto insPt = firOpBuilder.saveInsertionPoint();
+        mlir::Operation *lastOper = wsLoopOp->region().back().getTerminator();
+        firOpBuilder.setInsertionPoint(lastOper);
 
         // Our goal here is to introduce the following control flow
         // just before exiting the worksharing loop.
@@ -146,10 +136,6 @@
         //    omp.yield
         // }
 
-        Operation *lastOper = wsLoopOp->region().back().getTerminator();
-
-        firOpBuilder.setInsertionPoint(lastOper);
-
         // TODO: The following will not work when there is collapse present.
         // Have to modify this in future.
         for (const Fortran::parser::OmpClause &clause : opClauseList.v)
@@ -158,7 +144,7 @@
             TODO(converter.getCurrentLocation(),
                  "Collapse clause with lastprivate");
         // Only generate the compare once in presence of multiple LastPrivate
-        // clauses
+        // clauses.
         if (!hasLastPrivateOp) {
           cmpOp = firOpBuilder.create<mlir::arith::CmpIOp>(
               wsLoopOp->getLoc(), mlir::arith::CmpIPredicate::eq,
@@ -167,14 +153,12 @@
         }
         mlir::scf::IfOp ifOp = firOpBuilder.create<mlir::scf::IfOp>(
             wsLoopOp->getLoc(), cmpOp, /*else*/ false);
-
-        firOpBuilder.restoreInsertionPoint(insPt);
-        createPrivateVarSyms(converter, lastPrivateClause,
-                             &(ifOp.getThenRegion().front()));
+        lastPrivBlock = &ifOp.getThenRegion().front();
       } else {
         TODO(converter.getCurrentLocation(),
-             "lastprivate clause in constructs other than work-share loop");
+             "lastprivate clause in constructs other than worksharing-loop");
       }
+      collectOmpObjectListSymbol(lastPrivateClause->v, privatizedSymbols);
       hasLastPrivateOp = true;
     }
   }
@@ -215,30 +199,31 @@
     }
   }
 
-  auto privatizeSymbol = [&](const Fortran::semantics::Symbol *sym) {
-    // Privatization for symbols which are pre-determined (like loop index
-    // variables) happen separately, for everything else privatize here.
-    if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined))
-      return;
-    bool success = converter.createHostAssociateVarClone(*sym);
-    (void)success;
-    assert(success && "Privatization failed due to existing binding");
-    if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
-      converter.copyHostAssociateVar(*sym);
-      hasFirstPrivateOp = true;
-    }
-  };
-
-  for (auto sym : privatizedSymbols)
-    privatizeSymbol(sym);
+  bool needBarrier = false;
+  firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+  for (auto sym : privatizedSymbols) {
+    privatizeSymbol(converter, sym, lastPrivBlock);
+    if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) &&
+        sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate))
+      needBarrier = true;
+  }
 
   for (auto sym : defaultSymbols)
     if (!symbolsInNestedRegions.contains(sym) &&
         !symbolsInParentRegions.contains(sym) &&
         !privatizedSymbols.contains(sym))
-      privatizeSymbol(sym);
-  if (hasFirstPrivateOp)
+      privatizeSymbol(converter, sym);
+
+  // Emit implicit barrier to synchronize threads and avoid data races on
+  // initialization of firstprivate variables and post-update of lastprivate
+  // variables.
+  // FIXME: Emit barrier for lastprivate clause when 'sections' directive has
+  // 'nowait' clause. Otherwise, emit barrier when 'sections' directive has
+  // both firstprivate and lastprivate clause.
+  // Emit implicit barrier for linear clause. Maybe on somewhere else.
+  if (needBarrier)
     firOpBuilder.create<mlir::omp::BarrierOp>(converter.getCurrentLocation());
+
   firOpBuilder.restoreInsertionPoint(insPt);
   return hasLastPrivateOp;
 }
Index: flang/test/Lower/OpenMP/default-clause.f90
===================================================================
--- flang/test/Lower/OpenMP/default-clause.f90
+++ flang/test/Lower/OpenMP/default-clause.f90
@@ -16,7 +16,6 @@
 !CHECK: fir.store %[[const]] to %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
 !CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
-!CHECK: omp.barrier
 !CHECK: %[[const:.*]] = arith.constant 2 : i32
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: %[[result:.*]] = arith.muli %[[const]], %[[temp]] : i32
@@ -65,7 +64,6 @@
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: omp.terminator
@@ -83,7 +81,6 @@
 !CHECK: %[[PRIVATE_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFEw"}
 !CHECK: %[[temp:.*]] = fir.load %[[W]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_W]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[const:.*]] = arith.constant 2 : i32
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: %[[result:.*]] = arith.muli %[[const]], %[[temp]] : i32
@@ -115,7 +112,6 @@
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFEx"}
 !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_W]] : !fir.ref<i32>
 !CHECK: omp.terminator
@@ -149,7 +145,6 @@
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"}
 !CHECK: %[[PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
-!CHECK: omp.barrier
 !CHECK: omp.parallel {
 !CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[INNER_PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
@@ -167,7 +162,6 @@
 !CHECK: %[[INNER_PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_K]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[INNER_PRIVATE_K]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[const:.*]] = arith.constant 30 : i32
 !CHECK: fir.store %[[const]] to %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: %[[const:.*]] = arith.constant 40 : i32
@@ -207,7 +201,6 @@
 !CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[INNER_PRIVATE_Y]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[INNER_PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_INNER_X]] : !fir.ref<i32>
 !CHECK: omp.terminator
@@ -243,7 +236,6 @@
 !CHECK: %[[INNER_PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[INNER_PRIVATE_Y]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: %[[temp:.*]] = fir.load %[[INNER_PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[INNER_PRIVATE_X]] : !fir.ref<i32>
 !CHECK: omp.terminator
@@ -273,7 +265,6 @@
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
 !CHECK: %[[temp:.*]] = fir.load %[[Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_Y]] : !fir.ref<i32>
-!CHECK: omp.barrier
 !CHECK: omp.single {
 !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_Y]] : !fir.ref<i32>
 !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32>
Index: flang/test/Lower/OpenMP/omp-parallel-firstprivate-clause-scalar.f90
===================================================================
--- flang/test/Lower/OpenMP/omp-parallel-firstprivate-clause-scalar.f90
+++ flang/test/Lower/OpenMP/omp-parallel-firstprivate-clause-scalar.f90
@@ -11,7 +11,6 @@
 !FIRDialect:     %[[ARG2_PVT:.*]] = fir.alloca !fir.complex<8> {bindc_name = "arg2", pinned, uniq_name = "_QFfirstprivate_complexEarg2"}
 !FIRDialect:     %[[ARG2_VAL:.*]] = fir.load %[[ARG2]] : !fir.ref<!fir.complex<8>>
 !FIRDialect:     fir.store %[[ARG2_VAL]] to %[[ARG2_PVT]] : !fir.ref<!fir.complex<8>>
-!FIRDialect:     omp.barrier
 !FIRDialect:     fir.call @_QPfoo(%[[ARG1_PVT]], %[[ARG2_PVT]]) : (!fir.ref<!fir.complex<4>>, !fir.ref<!fir.complex<8>>) -> ()
 !FIRDialect:     omp.terminator
 !FIRDialect:   }
@@ -46,7 +45,6 @@
 !FIRDialect:    %[[ARG6_PVT:.*]] = fir.alloca i128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_integerEarg6"}
 !FIRDialect:    %[[ARG6_VAL:.*]] = fir.load %[[ARG6]] : !fir.ref<i128>
 !FIRDialect:    fir.store %[[ARG6_VAL]] to %[[ARG6_PVT]] : !fir.ref<i128>
-!FIRDialect:    omp.barrier
 !FIRDialect:    fir.call @_QPbar(%[[ARG1_PVT]], %[[ARG2_PVT]], %[[ARG3_PVT]], %[[ARG4_PVT]], %[[ARG5_PVT]], %[[ARG6_PVT]]) : (!fir.ref<i32>, !fir.ref<i8>, !fir.ref<i16>, !fir.ref<i32>, !fir.ref<i64>, !fir.ref<i128>) -> ()
 !FIRDialect:    omp.terminator
 !FIRDialect:  }
@@ -82,7 +80,6 @@
 !FIRDialect:     %[[ARG5_PVT:.*]] = fir.alloca !fir.logical<8> {bindc_name = "arg5", pinned, uniq_name = "_QFfirstprivate_logicalEarg5"}
 !FIRDialect:     %[[ARG5_VAL:.*]] = fir.load %[[ARG5]] : !fir.ref<!fir.logical<8>>
 !FIRDialect:     fir.store %[[ARG5_VAL]] to %[[ARG5_PVT]] : !fir.ref<!fir.logical<8>>
-!FIRDialect:     omp.barrier
 !FIRDialect:     fir.call @_QPbaz(%[[ARG1_PVT]], %[[ARG2_PVT]], %[[ARG3_PVT]], %[[ARG4_PVT]], %[[ARG5_PVT]]) : (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<1>>, !fir.ref<!fir.logical<2>>, !fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<8>>) -> ()
 !FIRDialect:     omp.terminator
 !FIRDialect:   }
@@ -120,7 +117,6 @@
 !FIRDialect:     %[[ARG6_PVT:.*]] = fir.alloca f128 {bindc_name = "arg6", pinned, uniq_name = "_QFfirstprivate_realEarg6"}
 !FIRDialect:     %[[ARG6_VAL:.*]] = fir.load %[[ARG6]] : !fir.ref<f128>
 !FIRDialect:     fir.store %[[ARG6_VAL]] to %[[ARG6_PVT]] : !fir.ref<f128>
-!FIRDialect:     omp.barrier
 !FIRDialect:     fir.call @_QPqux(%[[ARG1_PVT]], %[[ARG2_PVT]], %[[ARG3_PVT]], %[[ARG4_PVT]], %[[ARG5_PVT]], %[[ARG6_PVT]]) : (!fir.ref<f32>, !fir.ref<f16>, !fir.ref<f32>, !fir.ref<f64>, !fir.ref<f80>, !fir.ref<f128>) -> ()
 !FIRDialect:     omp.terminator
 !FIRDialect:   }
@@ -149,8 +145,6 @@
 !FIRDialect:             %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFmultiple_firstprivateEb"}
 !FIRDialect:             %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<i32>
 !FIRDialect:             fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref<i32>
-!FIRDialect:             omp.barrier
-!FIRDialect-NOT:         omp.barrier
 !FIRDialect:             fir.call @_QPquux(%[[A_PRIV_ADDR]], %[[B_PRIV_ADDR]]) : (!fir.ref<i32>, !fir.ref<i32>) -> ()
 !FIRDialect:             omp.terminator
 !FIRDialect:           }
Index: flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90
===================================================================
--- flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90
+++ flang/test/Lower/OpenMP/omp-parallel-lastprivate-clause-scalar.f90
@@ -1,5 +1,4 @@
 ! This test checks lowering of `FIRSTPRIVATE` clause for scalar types.
-! TODO: Add a test for same var being first and lastprivate when support is there.
 
 ! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
 ! RUN: flang-new -fc1 -fopenmp -emit-fir %s -o - | FileCheck %s
@@ -84,24 +83,21 @@
 end subroutine
 
 !CHECK: func.func @_QPmult_lastprivate_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
-!CHECK-DAG: omp.parallel  {
+!CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
-!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
-!CHECK-DAG: scf.if %[[IV_CMP1]] {
-! Testing lastprivate val update
-!CHECK-NEXT: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
-!CHECK-DAG: }
-!CHECK-DAG: scf.if %[[IV_CMP1]] {
+!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
+!CHECK-NEXT: scf.if %[[IV_CMP1]] {
 ! Testing lastprivate val update
-!CHECK-NEXT: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
-!CHECK-DAG: }
-!CHECK-DAG: omp.yield
+!CHECK-DAG: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
+!CHECK-DAG: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
+!CHECK: }
+!CHECK: omp.yield
 
 subroutine mult_lastprivate_int(arg1, arg2)
         integer :: arg1, arg2
@@ -118,21 +114,21 @@
 end subroutine
 
 !CHECK: func.func @_QPmult_lastprivate_int2(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
-!CHECK-DAG: omp.parallel  {
+!CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
 
-! Testing last iteration check
-!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
-!CHECK-DAG: scf.if %[[IV_CMP1]] {
-! Testing lastprivate val update
-!CHECK-NEXT: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
-!CHECK-NEXT: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
-!CHECK-NEXT: }
-!CHECK-NEXT: omp.yield
+!Testing last iteration check
+!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
+!CHECK-NEXT: scf.if %[[IV_CMP1]] {
+!Testing lastprivate val update
+!CHECK-DAG: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
+!CHECK-DAG: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
+!CHECK: }
+!CHECK: omp.yield
 
 subroutine mult_lastprivate_int2(arg1, arg2)
         integer :: arg1, arg2
@@ -149,19 +145,19 @@
 end subroutine
 
 !CHECK: func.func @_QPfirstpriv_lastpriv_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "arg2"}) {
-!CHECK-DAG: omp.parallel  {
-! Lastprivate Allocation
-!CHECK-NEXT: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
-!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
+!CHECK: omp.parallel  {
 ! Firstprivate update
-!CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
-!CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
-!CHECK-NEXT: omp.barrier
+!CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
+!CHECK-DAG: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK-DAG: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
+! Lastprivate Allocation
+!CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
+!CHECK-NOT: omp.barrier
 !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
-!CHECK-DAG: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
-!CHECK-DAG: scf.if %[[IV_CMP1]] {
+!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
+!CHECK-NEXT: scf.if %[[IV_CMP1]] {
 ! Testing lastprivate val update
 !CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
 !CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG2]] : !fir.ref<i32>
@@ -182,4 +178,32 @@
 print *, arg1, arg2
 end subroutine
 
+!CHECK: func.func @_QPfirstpriv_lastpriv_int2(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}) {
+!CHECK: omp.parallel  {
+! Firstprivate update
+!CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
+!CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+!CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
+!CHECK-NEXT: omp.barrier
+!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+! Testing last iteration check
+!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
+!CHECK-NEXT: scf.if %[[IV_CMP1]] {
+! Testing lastprivate val update
+!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
+!CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref<i32>
+!CHECK-NEXT: }
+!CHECK-NEXT: omp.yield
 
+subroutine firstpriv_lastpriv_int2(arg1)
+        integer :: arg1
+!$OMP PARALLEL 
+!$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1)
+do n = 1, 5
+        arg1 = 2
+        print *, arg1
+end do
+!$OMP END DO
+!$OMP END PARALLEL
+print *, arg1
+end subroutine
Index: flang/test/Lower/OpenMP/omp-parallel-wsloop-firstpriv.f90
===================================================================
--- flang/test/Lower/OpenMP/omp-parallel-wsloop-firstpriv.f90
+++ flang/test/Lower/OpenMP/omp-parallel-wsloop-firstpriv.f90
@@ -13,7 +13,6 @@
   ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned
   ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref<i32>
-  ! CHECK-NEXT: omp.barrier
   ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
@@ -43,7 +42,6 @@
   ! CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "n", pinned
   ! CHECK-NEXT: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[LD1]] to %[[CLONE1]] : !fir.ref<i32>
-  ! CHECK-NEXT: omp.barrier
   ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 
 
Index: flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
===================================================================
--- flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
+++ flang/test/Lower/OpenMP/omp-parallel-wsloop.f90
@@ -87,7 +87,6 @@
   ! CHECK:    %[[PRIVATE_NT_REF:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_with_privatisation_clausesEnt"}
   ! CHECK:    %[[NT_VAL:.*]] = fir.load %[[NT_REF]] : !fir.ref<i32>
   ! CHECK:    fir.store %[[NT_VAL]] to %[[PRIVATE_NT_REF]] : !fir.ref<i32>
-  ! CHECK:    omp.barrier
   ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
@@ -138,7 +137,6 @@
 ! CHECK:             %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_private_doEnt"}
 ! CHECK:             %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[NT]] to %[[NT_ADDR]] : !fir.ref<i32>
-! CHECK:             omp.barrier
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
@@ -153,8 +151,7 @@
 ! CHECK:         }
 
 !===============================================================================
-! Checking for the following construct - multiple firstprivate clauses must emit
-! only one barrier
+! Checking for the following construct
 !   !$omp parallel
 !   !$omp do firstprivate(...) firstprivate(...)
 !===============================================================================
@@ -182,7 +179,6 @@
 ! CHECK:             %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEb"}
 ! CHECK:             %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref<i32>
-! CHECK:             omp.barrier
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
@@ -224,7 +220,6 @@
 ! CHECK:             %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"}
 ! CHECK:             %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[NT]] to %[[NT_ADDR]] : !fir.ref<i32>
-! CHECK:             omp.barrier
 ! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
@@ -240,8 +235,7 @@
 ! CHECK:         }
 
 !===============================================================================
-! Checking for the following construct - multiple firstprivate clauses must emit
-! only one barrier
+! Checking for the following construct
 !   !$omp parallel
 !   !$omp do firstprivate(...) firstprivate(...)
 !===============================================================================
@@ -268,8 +262,6 @@
 ! CHECK:             %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"}
 ! CHECK:             %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref<i32>
-! CHECK:             omp.barrier
-! CHECK-NOT:         omp.barrier
 ! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32