Index: llvm/lib/CodeGen/MachineSink.cpp =================================================================== --- llvm/lib/CodeGen/MachineSink.cpp +++ llvm/lib/CodeGen/MachineSink.cpp @@ -18,12 +18,14 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -429,6 +431,16 @@ AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + // MachineSink currently uses MachineLoopInfo, which only recognizes natural + // loops. As such, we could sink instructions into irreducible cycles, which + // would be non-profitable. + // WARNING: The current implementation of hasStoreBetween() is incorrect for + // sinking into irreducible cycles (PR53990), this bailout is currently + // necessary for correctness, not just profitability. + ReversePostOrderTraversal RPOT(&*MF.begin()); + if (containsIrreducibleCFG(RPOT, *LI)) + return false; + bool EverMadeChange = false; while (true) { Index: llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll =================================================================== --- llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll +++ llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll @@ -24,7 +24,7 @@ ; CHECK-NEXT: movq %r15, %rdi ; CHECK-NEXT: callq l ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: jne .LBB0_9 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: cmpl $0, e(%rip) @@ -44,21 +44,19 @@ ; CHECK-NEXT: callq i ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: orl %r14d, %ebp -; CHECK-NEXT: testl %r13d, %r13d -; CHECK-NEXT: je .LBB0_6 -; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: andl $4, %ebx -; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .LBB0_6: # %if.end12 +; CHECK-NEXT: testl %r13d, %r13d +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # %bb.5: # %if.end12 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: je .LBB0_9 -; CHECK-NEXT: # %bb.7: # %if.then14 +; CHECK-NEXT: je .LBB0_8 +; CHECK-NEXT: # %bb.6: # %if.then14 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .Ltmp0: # Block address taken -; CHECK-NEXT: # %bb.8: # %if.then20.critedge +; CHECK-NEXT: # %bb.7: # %if.then20.critedge ; CHECK-NEXT: movl j(%rip), %edi ; CHECK-NEXT: movslq %eax, %rcx ; CHECK-NEXT: movl $1, %esi @@ -71,9 +69,9 @@ ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: jmp k # TAILCALL -; CHECK-NEXT: .LBB0_9: # %if.else +; CHECK-NEXT: .LBB0_8: # %if.else ; CHECK-NEXT: incq 0 -; CHECK-NEXT: .LBB0_10: # %cleanup +; CHECK-NEXT: .LBB0_9: # %cleanup ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 Index: llvm/test/CodeGen/X86/pr38795.ll =================================================================== --- llvm/test/CodeGen/X86/pr38795.ll +++ llvm/test/CodeGen/X86/pr38795.ll @@ -32,13 +32,14 @@ ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_16: # %for.inc +; CHECK-NEXT: .LBB0_15: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_20 Depth 2 +; CHECK-NEXT: # Child Loop BB0_19 Depth 2 ; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 @@ -55,7 +56,7 @@ ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_8 +; CHECK-NEXT: jg .LBB0_9 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi @@ -64,12 +65,10 @@ ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: jne .LBB0_15 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then @@ -78,82 +77,82 @@ ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_9: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb %dl, %dh ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_7 -; CHECK-NEXT: .LBB0_11: # %af +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: movl $0, %edi +; CHECK-NEXT: movb %cl, %dl +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: # %bb.7: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_12 -; CHECK-NEXT: .LBB0_17: # %if.end39 +; CHECK-NEXT: jne .LBB0_8 +; CHECK-NEXT: .LBB0_16: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.18: # %if.then41 +; CHECK-NEXT: je .LBB0_18 +; CHECK-NEXT: # %bb.17: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_19: # %for.end46 +; CHECK-NEXT: .LBB0_18: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_20 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_8: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb %dl, %dh -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_20: # %for.cond47 +; CHECK-NEXT: .LBB0_19: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: # %bb.21: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: # %bb.20: # %for.cond47 +; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: .LBB0_9: # %ae +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: .LBB0_10: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: # %bb.12: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: # %bb.14: # %if.end26 +; CHECK-NEXT: je .LBB0_15 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_16 -; CHECK-NEXT: # %bb.15: # %if.then31 +; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: # %bb.14: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_16 +; CHECK-NEXT: jmp .LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_17 -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: jmp .LBB0_6 entry: br label %for.cond Index: llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll =================================================================== --- llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll +++ llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll @@ -7,18 +7,15 @@ ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rsi, %r14 -; CHECK-NEXT: movl %edi, %r15d +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: movq (%rsi), %rbp ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) ; CHECK-NEXT: .LBB0_1: # %split.3 -; CHECK-NEXT: movq (%r14), %rbp -; CHECK-NEXT: testb $1, %r15b +; CHECK-NEXT: testb $1, %r14b ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.2: # %clobber ; CHECK-NEXT: callq clobber@PLT Index: llvm/test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1377,6 +1377,8 @@ ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1386,24 +1388,20 @@ ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_3 -; ENABLE-NEXT: ## %bb.4: ## %for.body4.i -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; ENABLE-NEXT: movl (%rax), %edi +; ENABLE-NEXT: je LBB16_4 +; ENABLE-NEXT: ## %bb.3: ## %for.body4.i ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something -; ENABLE-NEXT: jmp LBB16_5 -; ENABLE-NEXT: LBB16_3: -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4, 0x90 -; ENABLE-NEXT: LBB16_5: ## %for.inc +; ENABLE-NEXT: LBB16_4: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_5 -; ENABLE-NEXT: ## %bb.6: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_4 +; ENABLE-NEXT: ## %bb.5: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1420,6 +1418,8 @@ ; DISABLE-NEXT: pushq %rbx ; DISABLE-NEXT: pushq %rax ; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1429,24 +1429,20 @@ ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_3 -; DISABLE-NEXT: ## %bb.4: ## %for.body4.i -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; DISABLE-NEXT: movl (%rax), %edi +; DISABLE-NEXT: je LBB16_4 +; DISABLE-NEXT: ## %bb.3: ## %for.body4.i ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something -; DISABLE-NEXT: jmp LBB16_5 -; DISABLE-NEXT: LBB16_3: -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4, 0x90 -; DISABLE-NEXT: LBB16_5: ## %for.inc +; DISABLE-NEXT: LBB16_4: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_5 -; DISABLE-NEXT: ## %bb.6: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_4 +; DISABLE-NEXT: ## %bb.5: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx