diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -8507,6 +8507,11 @@ This instruction should only be used to implement the "goto" feature of gcc style inline assembly. Any other usage is an error in the IR verifier. +To support outputs along indirect edges, LLVM may need to split critical edges. +This means that basic blocks may be synthesized; ``indirect labels`` from +inline asm may not compare equal to the label when passed as a +``function args``. + Arguments: """""""""" diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -714,7 +714,7 @@ void CodeGenPassBuilder::addISelPrepare(AddIRPass &addPass) const { derived().addPreISel(addPass); - //addPass(CallBrPrepare()); // TODO(ndesaulniers): impl pass + addPass(CallBrPrepare()); // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. addPass(SafeStackPass()); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -627,8 +627,9 @@ // it is an entry block or landing pad. for (const auto &LI : MBB->liveins()) { if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() && - MBB->getIterator() != MBB->getParent()->begin()) { - report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB); + MBB->getIterator() != MBB->getParent()->begin() && + !MBB->isInlineAsmBrIndirectTarget()) { + report("MBB has allocatable live-in, but isn't entry, landing-pad, or inlineasm-br-indirect-target.", MBB); report_context(LI.PhysReg); } } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -977,7 +977,7 @@ if (requiresCodeGenSCCOrder()) addPass(new DummyCGSCCPass); - //addPass(createCallBrPass()); // TODO(ndesaulniers): impl pass + addPass(createCallBrPass()); // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -29,6 +29,7 @@ ; CHECK-NEXT: AArch64 Stack Tagging ; CHECK-NEXT: SME ABI Pass ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -99,6 +99,7 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Merge internal globals +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -93,6 +93,7 @@ ; GCN-O0-NEXT: Loop-Closed SSA Form Pass ; GCN-O0-NEXT: DummyCGSCCPass ; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Prepare callbr ; GCN-O0-NEXT: Safe Stack instrumentation pass ; GCN-O0-NEXT: Insert stack protectors ; GCN-O0-NEXT: Dominator Tree Construction @@ -281,6 +282,7 @@ ; GCN-O1-NEXT: Loop-Closed SSA Form Pass ; GCN-O1-NEXT: DummyCGSCCPass ; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Prepare callbr ; GCN-O1-NEXT: Safe Stack instrumentation pass ; GCN-O1-NEXT: Insert stack protectors ; GCN-O1-NEXT: Dominator Tree Construction @@ -573,6 +575,7 @@ ; GCN-O1-OPTS-NEXT: Loop-Closed SSA Form Pass ; GCN-O1-OPTS-NEXT: DummyCGSCCPass ; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Prepare callbr ; GCN-O1-OPTS-NEXT: Safe Stack instrumentation pass ; GCN-O1-OPTS-NEXT: Insert stack protectors ; GCN-O1-OPTS-NEXT: Dominator Tree Construction @@ -874,6 +877,7 @@ ; GCN-O2-NEXT: Analysis if a function is memory bound ; GCN-O2-NEXT: DummyCGSCCPass ; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Prepare callbr ; GCN-O2-NEXT: Safe Stack instrumentation pass ; GCN-O2-NEXT: Insert stack protectors ; GCN-O2-NEXT: Dominator Tree Construction @@ -1187,6 +1191,7 @@ ; GCN-O3-NEXT: Analysis if a function is memory bound ; GCN-O3-NEXT: DummyCGSCCPass ; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Prepare callbr ; GCN-O3-NEXT: Safe Stack instrumentation pass ; GCN-O3-NEXT: Insert stack protectors ; GCN-O3-NEXT: Dominator Tree Construction diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -66,6 +66,7 @@ ; CHECK-NEXT: Transform predicated vector loops to use MVE tail predication ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll @@ -30,6 +30,7 @@ ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -80,6 +80,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Hardware Loop Insertion +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -31,6 +31,7 @@ ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -66,6 +66,7 @@ ; CHECK-NEXT: Exception handling preparation ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -32,6 +32,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Expand indirectbr instructions ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs-pred-succ.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs-pred-succ.ll --- a/llvm/test/CodeGen/X86/callbr-asm-outputs-pred-succ.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-outputs-pred-succ.ll @@ -22,7 +22,7 @@ ; Check the first INLINEASM_BR target block is predecessed by the block with ; the first INLINEASM_BR. -; CHECK: bb.4 (%ir-block.11, machine-block-address-taken, inlineasm-br-indirect-target): +; CHECK: bb.4 (%ir-block.12, machine-block-address-taken, inlineasm-br-indirect-target): ; CHECK-NEXT: predecessors: %bb.0 @.str = private unnamed_addr constant [26 x i8] c"inline asm#1 returned %d\0A\00", align 1 diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll --- a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll @@ -38,36 +38,46 @@ ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: jge .LBB1_2 +; CHECK-NEXT: jge .LBB1_3 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: #APP ; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: testl %edi, %esi -; CHECK-NEXT: jne .LBB1_4 +; CHECK-NEXT: jne .LBB1_2 ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: jmp .LBB1_3 -; CHECK-NEXT: .LBB1_2: # %if.else +; CHECK-NEXT: jmp .LBB1_4 +; CHECK-NEXT: .LBB1_2: # Block address taken +; CHECK-NEXT: # %if.then.label_true_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: jmp .LBB1_8 +; CHECK-NEXT: .LBB1_3: # %if.else ; CHECK-NEXT: #APP ; CHECK-NEXT: testl %esi, %edi ; CHECK-NEXT: testl %esi, %edi -; CHECK-NEXT: jne .LBB1_5 +; CHECK-NEXT: jne .LBB1_9 ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: addl %edi, %eax -; CHECK-NEXT: .LBB1_5: # Block address taken -; CHECK-NEXT: # %return -; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB1_5: # %return ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl -; CHECK-NEXT: .LBB1_4: # Block address taken -; CHECK-NEXT: # %label_true +; CHECK-NEXT: .LBB1_7: # Block address taken +; CHECK-NEXT: # %if.else.label_true_crit_edge ; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB1_8: # %label_true ; CHECK-NEXT: movl $-2, %eax ; CHECK-NEXT: jmp .LBB1_5 +; CHECK-NEXT: .LBB1_9: # Block address taken +; CHECK-NEXT: # %if.else.return_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB1_6: # Block address taken +; CHECK-NEXT: # %if.then.return_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: jmp .LBB1_5 entry: %cmp = icmp slt i32 %out1, %out2 br i1 %cmp, label %if.then, label %if.else @@ -109,23 +119,25 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: jmp .LBB2_5 +; CHECK-NEXT: jmp .LBB2_4 ; CHECK-NEXT: .LBB2_3: # %false ; CHECK-NEXT: #APP ; CHECK-NEXT: .short %eax ; CHECK-NEXT: .short %edx ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: # %bb.4: ; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: .LBB2_5: # %asm.fallthrough +; CHECK-NEXT: .LBB2_4: # %asm.fallthrough ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_5: # Block address taken +; CHECK-NEXT: # %true.indirect_crit_edge +; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: .LBB2_6: # Block address taken -; CHECK-NEXT: # %indirect +; CHECK-NEXT: # %false.indirect_crit_edge ; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: jmp .LBB2_5 +; CHECK-NEXT: jmp .LBB2_4 entry: br i1 %cmp, label %true, label %false @@ -148,31 +160,37 @@ define i32 @test4(i32 %out1, i32 %out2) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: #APP -; CHECK-NEXT: testl %ecx, %ecx -; CHECK-NEXT: testl %edx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: testl %ecx, %eax ; CHECK-NEXT: jne .LBB3_3 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: # %bb.1: # %asm.fallthrough ; CHECK-NEXT: #APP -; CHECK-NEXT: testl %ecx, %edx -; CHECK-NEXT: testl %ecx, %edx -; CHECK-NEXT: jne .LBB3_4 +; CHECK-NEXT: testl %eax, %ecx +; CHECK-NEXT: testl %eax, %ecx +; CHECK-NEXT: jne .LBB3_5 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: # %bb.2: # %asm.fallthrough2 -; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: retl ; CHECK-NEXT: .LBB3_4: # Block address taken -; CHECK-NEXT: # %return +; CHECK-NEXT: # %entry.return_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB3_5: # Block address taken +; CHECK-NEXT: # %asm.fallthrough.return_crit_edge ; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB3_6: # Block address taken +; CHECK-NEXT: # %asm.fallthrough.label_true_crit_edge +; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: .LBB3_3: # Block address taken -; CHECK-NEXT: # %label_true +; CHECK-NEXT: # %entry.label_true_crit_edge ; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: movl $-2, %eax -; CHECK-NEXT: jmp .LBB3_4 +; CHECK-NEXT: retl entry: %0 = callbr { i32, i32 } asm sideeffect "testl $0, $0; testl $1, $2; jne ${3:l}", "=r,=r,r,!i,!i"(i32 %out1) to label %asm.fallthrough [label %label_true, label %return] @@ -206,7 +224,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: .LBB4_1: # Block address taken +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB4_2: # Block address taken +; CHECK-NEXT: # %._crit_edge ; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: retl %1 = call i32 @llvm.read_register.i32(metadata !3) diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -72,6 +72,7 @@ ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: Prepare callbr ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier