Index: llvm/lib/CodeGen/MachinePipeliner.cpp
===================================================================
--- llvm/lib/CodeGen/MachinePipeliner.cpp
+++ llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -165,6 +165,13 @@
     cl::desc(
         "Use the experimental peeling code generator for software pipelining"));
 
+// Some targets may guarantee def-use ordering in same cycle for scheduling.
+// This flag is used to allow such cases. By default, it is set to false,
+// while it will be set to true by the targets.
+cl::opt<bool> AllowDefUseInSameCycle(
+    "allow-def-use-in-same-cycle", cl::init(false), cl::Hidden,
+    cl::desc("Allow def-use to be scheduled in same cycle"));
+
 namespace llvm {
 
 // A command line option to enable the CopyToPhi DAG mutation.
@@ -2788,7 +2795,10 @@
         if (Register::isPhysicalRegister(SI.getReg())) {
           if (stageScheduled(SI.getSUnit()) != StageDef)
             return false;
-          if (InstrToCycle[SI.getSUnit()] <= CycleDef)
+          if (InstrToCycle[SI.getSUnit()] < CycleDef)
+            return false;
+          if (!AllowDefUseInSameCycle &&
+              (InstrToCycle[SI.getSUnit()] == CycleDef))
             return false;
         }
   }
Index: llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
===================================================================
--- llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -117,6 +117,8 @@
                                         cl::init(true),
                                         cl::desc("Enable instsimplify"));
 
+extern cl::opt<bool> AllowDefUseInSameCycle;
+
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
 /// library.  In particular, it seems that it is not possible to get
@@ -239,6 +241,8 @@
           (HexagonNoOpt ? CodeGenOpt::None : OL)),
       TLOF(std::make_unique<HexagonTargetObjectFile>()) {
   initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+  if (!AllowDefUseInSameCycle.getPosition())
+    AllowDefUseInSameCycle = true;
   initAsmInfo();
 }
 
Index: llvm/test/CodeGen/Hexagon/swp-same-cycle.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Hexagon/swp-same-cycle.ll
@@ -0,0 +1,40 @@
+; RUN: llc -O2 -march=hexagon -mcpu=hexagonv66 -mattr=-hvx -enable-pipeliner -debug-only=pipeliner -stop-after=pipeliner < %s \
+; RUN: 2>&1 | FileCheck %s
+; CHECK-NOT: No schedule found, return
+
+define void @foo(i16* nocapture %trans, i16* nocapture %Output_word) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16, i16* %Output_word, i32 11
+  store i16 0, i16* %arrayidx, align 2
+  br label %for.body
+
+for.body:
+  %state.014 = phi i32 [ 0, %entry ], [ %or12, %for.body ]
+  %i.013 = phi i32 [ 188, %entry ], [ %dec, %for.body ]
+  %and = and i32 %state.014, 15
+  %arrayidx1 = getelementptr inbounds i16, i16* %trans, i32 %i.013
+  %0 = load i16, i16* %arrayidx1, align 2
+  %conv = zext i16 %0 to i32
+  %shr = lshr i32 %conv, %and
+  %shr3 = ashr i32 %i.013, 4
+  %arrayidx4 = getelementptr inbounds i16, i16* %Output_word, i32 %shr3
+  %1 = load i16, i16* %arrayidx4, align 2
+  %conv5 = zext i16 %1 to i32
+  %shl = shl nuw nsw i32 %conv5, 1
+  %and6 = and i32 %state.014, 1
+  %or = or i32 %shl, %and6
+  %conv7 = trunc i32 %or to i16
+  %res = tail call i32 asm "$0 = insert($1,#31,#1);", "={r7},r"(i16 %1)
+  call void asm "memh($0+$1<<#1) = $2;", "r,r,{r7}"(i32 %or, i32 %shr3, i32 %res)
+  %and2 = shl i32 %shr, 3
+  %shl10 = and i32 %and2, 8
+  %shr11 = ashr i32 %state.014, 1
+  %or12 = or i32 %shl10, %shr11
+  ;%dec = tail call i32 asm "$0 = add($1,#-1);", "={r4},{r4}"(i32 %i.013)
+  %dec = add nsw i32 %i.013, -1
+  %cmp = icmp sgt i32 %i.013, 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}