Index: llvm/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/lib/CodeGen/MachineScheduler.cpp +++ llvm/lib/CodeGen/MachineScheduler.cpp @@ -478,7 +478,11 @@ RegionEnd != MBB->begin(); RegionEnd = I) { // Avoid decrementing RegionEnd for blocks with no terminator. - if (RegionEnd != MBB->end() || + if (RegionEnd != MBB->end()) + --RegionEnd; + + // Skip all sched boundary and set RegionEnd with last prev one. + while (RegionEnd != MBB->begin() && isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { --RegionEnd; } @@ -489,6 +493,19 @@ I = RegionEnd; for (;I != MBB->begin(); --I) { MachineInstr &MI = *std::prev(I); + // When cfi_instruction is defined as sched boundary, it will split + // ScheduleRegion, this will make CodeGen different with and without + // cfi_instructions (with/without --strip-debug). Move cfi_instruction + // to RegionEnd and set it as RegionEnd to avoid breaking scheduler + // Regions. + if (MI.isCFIInstruction()) { + MBB->splice(RegionEnd, MBB, MI); + --RegionEnd; + // As prev(I) the cfi_instruction has been moved, we need first plus I, + // then I-- in next for-loop could keep the same I. + ++I; + continue; + } if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; if (!MI.isDebugInstr()) { Index: llvm/test/CodeGen/AArch64/machine-scheduler-with-cfi-instruction.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/machine-scheduler-with-cfi-instruction.mir @@ -0,0 +1,93 @@ +# RUN: llc -mtriple=aarch64-linux-gnu-- -run-pass=postmisched -o - %s | FileCheck %s + +# cfi_instruction will impact PostRAScheduler and make CodeGen different. +# this test case is to check cfi_instruction could moved to RegionEnd (before RET), +# and the Region scheduling could work normally. +# Fix the issue: https://bugs.llvm.org/show_bug.cgi?id=37240 + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.ll" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-linux-gnu" + + @X1 = global i32 0, align 4 + @X2 = global i32 0, align 4 + @X3 = global i32 0, align 4 + @X4 = global i32 0, align 4 + + ; Function Attrs: nounwind + define void @test(i32 %i) #0 { + entry: + %0 = load i32, i32* @X1, align 4 + %x1 = add i32 %0, 1 + %x2 = add i32 %0, 2 + %x3 = add i32 %0, 3 + %x4 = add i32 %0, 4 + tail call void @foo() + store i32 %x1, i32* @X1, align 4 + store i32 %x2, i32* @X2, align 4 + store i32 %x3, i32* @X3, align 4 + store i32 %x4, i32* @X4, align 4 + ret void + } + + declare void @foo() + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 7.0.0 (trunk 330790) (llvm/trunk 330787)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) + !1 = !DIFile(filename: "test.c", directory: "") + !2 = !{} + !3 = !{i32 2, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + +... +--- +name: test +alignment: 2 +tracksRegLiveness: true +frameInfo: + stackSize: 48 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +stack: + - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, callee-saved-register: '$x19' } + - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 8, callee-saved-register: '$x20' } + - { id: 2, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$x21' } + - { id: 3, type: spill-slot, offset: -32, size: 8, alignment: 8, callee-saved-register: '$x22' } + - { id: 4, type: spill-slot, offset: -48, size: 8, alignment: 8, callee-saved-register: '$lr' } +machineFunctionInfo: {} +body: | + ; CHECK: bb.0.entry + ; CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 4 + ; CHECK-NEXT: renamable $x19 = ADRP target-flags(aarch64-page) @X1 + ; CHECK-NEXT: renamable $w8 = LDRWui renamable $x19 + ; CHECK-NEXT: frame-setup STPXi $x22, $x21, $sp, 2 + ; CHECK-NEXT: renamable $w20 = ADDWri killed renamable $w8, 1, 0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8 + ; CHECK-NEXT: RET undef $lr + + bb.0.entry: + liveins: $lr, $x21, $x22, $x19, $x20 + + frame-setup STPXi killed $x22, killed $x21, $sp, 2 :: (store 8 into %stack.3), (store 8 into %stack.2) + frame-setup STPXi killed $x20, killed $x19, $sp, 4 :: (store 8 into %stack.1), (store 8 into %stack.0) + frame-setup CFI_INSTRUCTION def_cfa_offset 48 + frame-setup CFI_INSTRUCTION offset $w19, -8 + renamable $x19 = ADRP target-flags(aarch64-page) @X1 + renamable $w8 = LDRWui renamable $x19, target-flags(aarch64-pageoff, aarch64-nc) @X1 :: (dereferenceable load 4 from @X1) + renamable $w20 = ADDWri renamable $w8, 1, 0 + RET undef $lr + +...