Index: llvm/lib/CodeGen/MachinePipeliner.cpp =================================================================== --- llvm/lib/CodeGen/MachinePipeliner.cpp +++ llvm/lib/CodeGen/MachinePipeliner.cpp @@ -165,6 +165,13 @@ cl::desc( "Use the experimental peeling code generator for software pipelining")); +// Some targets may guarantee def-use ordering in same cycle for scheduling. +// This flag is used to allow such cases. By default, it is set to false, +// while it will be set to true by the targets. +cl::opt AllowDefUseInSameCycle( + "allow-def-use-in-same-cycle", cl::init(false), cl::Hidden, + cl::desc("Allow def-use to be scheduled in same cycle")); + namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. @@ -2788,7 +2795,10 @@ if (Register::isPhysicalRegister(SI.getReg())) { if (stageScheduled(SI.getSUnit()) != StageDef) return false; - if (InstrToCycle[SI.getSUnit()] <= CycleDef) + if (InstrToCycle[SI.getSUnit()] < CycleDef) + return false; + if (!AllowDefUseInSameCycle && + (InstrToCycle[SI.getSUnit()] == CycleDef)) return false; } } Index: llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -117,6 +117,8 @@ cl::init(true), cl::desc("Enable instsimplify")); +extern cl::opt AllowDefUseInSameCycle; + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -239,6 +241,8 @@ (HexagonNoOpt ? CodeGenOpt::None : OL)), TLOF(std::make_unique()) { initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); + if (!AllowDefUseInSameCycle.getPosition()) + AllowDefUseInSameCycle = true; initAsmInfo(); } Index: llvm/test/CodeGen/Hexagon/swp-same-cycle.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Hexagon/swp-same-cycle.ll @@ -0,0 +1,40 @@ +; RUN: llc -O2 -march=hexagon -mcpu=hexagonv66 -mattr=-hvx -enable-pipeliner -debug-only=pipeliner -stop-after=pipeliner < %s \ +; RUN: 2>&1 | FileCheck %s +; CHECK-NOT: No schedule found, return + +define void @foo(i16* nocapture %trans, i16* nocapture %Output_word) nounwind { +entry: + %arrayidx = getelementptr inbounds i16, i16* %Output_word, i32 11 + store i16 0, i16* %arrayidx, align 2 + br label %for.body + +for.body: + %state.014 = phi i32 [ 0, %entry ], [ %or12, %for.body ] + %i.013 = phi i32 [ 188, %entry ], [ %dec, %for.body ] + %and = and i32 %state.014, 15 + %arrayidx1 = getelementptr inbounds i16, i16* %trans, i32 %i.013 + %0 = load i16, i16* %arrayidx1, align 2 + %conv = zext i16 %0 to i32 + %shr = lshr i32 %conv, %and + %shr3 = ashr i32 %i.013, 4 + %arrayidx4 = getelementptr inbounds i16, i16* %Output_word, i32 %shr3 + %1 = load i16, i16* %arrayidx4, align 2 + %conv5 = zext i16 %1 to i32 + %shl = shl nuw nsw i32 %conv5, 1 + %and6 = and i32 %state.014, 1 + %or = or i32 %shl, %and6 + %conv7 = trunc i32 %or to i16 + %res = tail call i32 asm "$0 = insert($1,#31,#1);", "={r7},r"(i16 %1) + call void asm "memh($0+$1<<#1) = $2;", "r,r,{r7}"(i32 %or, i32 %shr3, i32 %res) + %and2 = shl i32 %shr, 3 + %shl10 = and i32 %and2, 8 + %shr11 = ashr i32 %state.014, 1 + %or12 = or i32 %shl10, %shr11 + ;%dec = tail call i32 asm "$0 = add($1,#-1);", "={r4},{r4}"(i32 %i.013) + %dec = add nsw i32 %i.013, -1 + %cmp = icmp sgt i32 %i.013, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +}