Page MenuHomePhabricator

D64192.diff
No OneTemporary

File Metadata

Created
Thu, Oct 17, 4:34 PM

D64192.diff

Index: llvm/trunk/lib/CodeGen/MachinePipeliner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/MachinePipeliner.cpp
+++ llvm/trunk/lib/CodeGen/MachinePipeliner.cpp
@@ -3559,6 +3559,14 @@
if (Pos < MoveUse)
MoveUse = Pos;
}
+ // We did not handle HW dependences in previous for loop,
+ // and we normally set Latency = 0 for Anti deps,
+ // so may have nodes in same cycle with Anti denpendent on HW regs.
+ else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
+ OrderBeforeUse = true;
+ if ((MoveUse == 0) || (Pos < MoveUse))
+ MoveUse = Pos;
+ }
}
for (auto &P : SU->Preds) {
if (P.getSUnit() != *I)
Index: llvm/trunk/test/CodeGen/PowerPC/sms-grp-order.ll
===================================================================
--- llvm/trunk/test/CodeGen/PowerPC/sms-grp-order.ll
+++ llvm/trunk/test/CodeGen/PowerPC/sms-grp-order.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
+; RUN: -mcpu=pwr9 --ppc-enable-pipeliner | FileCheck %s
+
+define void @lame_encode_buffer_interleaved() local_unnamed_addr {
+; CHECK-LABEL: lame_encode_buffer_interleaved:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhz 3, 0(0)
+; CHECK-NEXT: li 5, 1
+; CHECK-NEXT: sldi 5, 5, 62
+; CHECK-NEXT: lhz 4, 0(3)
+; CHECK-NEXT: mtctr 5
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB0_1: #
+; CHECK-NEXT: extsh 3, 3
+; CHECK-NEXT: extsh 4, 4
+; CHECK-NEXT: srawi 3, 3, 1
+; CHECK-NEXT: addze 3, 3
+; CHECK-NEXT: srawi 4, 4, 1
+; CHECK-NEXT: addze 4, 4
+; CHECK-NEXT: bdnz .LBB0_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: sth 3, 0(0)
+; CHECK-NEXT: sth 4, 0(3)
+; CHECK-NEXT: blr
+ br label %1
+
+1: ; preds = %1, %0
+ %2 = phi i64 [ 0, %0 ], [ %13, %1 ]
+ %3 = load i16, i16* null, align 2
+ %4 = load i16, i16* undef, align 2
+ %5 = sext i16 %3 to i32
+ %6 = sext i16 %4 to i32
+ %7 = add nsw i32 0, %5
+ %8 = add nsw i32 0, %6
+ %9 = sdiv i32 %7, 2
+ %10 = sdiv i32 %8, 2
+ %11 = trunc i32 %9 to i16
+ %12 = trunc i32 %10 to i16
+ store i16 %11, i16* null, align 2
+ store i16 %12, i16* undef, align 2
+ %13 = add i64 %2, 4
+ %14 = icmp eq i64 %13, 0
+ br i1 %14, label %15, label %1
+
+15: ; preds = %1
+ ret void
+}

Event Timeline