diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -181,6 +181,16 @@ ``$prefix/lib/clang/$CLANG_MAJOR_VERSION`` and can be queried using ``clang -print-resource-dir``, just like before. +- Indirect edges of asm goto statements under certain circumstances may not be + split. In previous releases of clang, that means for the following input the + two inputs may have compared equal in the inline assembly. This is no longer + guaranteed (and necessary to support outputs along indirect edges, which is + still not yet supported). + + .. code-block:: c + + foo: asm goto ("# %0 %1"::"i"(&&foo)::foo); + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -8473,6 +8473,11 @@ The output values of a '``callbr``' instruction are available only to the '``fallthrough``' block, not to any '``indirect``' blocks(s). +To support outputs along indirect edges, LLVM may need to split critical edges. +This means that basic blocks may be synthesized; ``indirect labels`` from +inline asm may not compare equal to the label when passed as a +``function args``. + The only use of this today is to implement the "goto" feature of gcc inline assembly where additional labels can be provided as locations for the inline assembly to jump to. diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -505,6 +505,13 @@ BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr); +// Split critical edges from CallBrInsts when the CallBrInst has outputs. When +// we take an indirect jump from the CallBrInst, we need somewhere to copy any +// Physical Register outputs back to virtual register outputs, but we must do +// so before any phi instructions (phi's must occur first in a BasicBlock). +// Does not split the default edge. +bool SplitCallBrCriticalEdges(Function &F); + /// Given a set of incoming and outgoing blocks, create a "hub" such that every /// edge from an incoming block InBB to an outgoing block OutBB is now split /// into two edges, one from InBB to the hub and another from the hub to diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -568,6 +568,10 @@ // to help generate sane code for PHIs involving such edges. EverMadeChange |= SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true); + // CallBr may have phyreg outputs. The use of these may be a PHI node, so we + // need somewhere to put the vreg copies after the INLINEASM_BR but before + // the phis. + EverMadeChange |= SplitCallBrCriticalEdges(F); // If we are optimzing huge function, we need to consider the build time. // Because the basic algorithm's complex is near O(N!). diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp --- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/Utils/BreakCriticalEdges.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -24,14 +25,21 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" + +#include + using namespace llvm; #define DEBUG_TYPE "break-crit-edges" @@ -463,3 +471,49 @@ return Changed; } + +bool llvm::SplitCallBrCriticalEdges(Function &F) { + bool Changed = false; + CriticalEdgeSplittingOptions Options; + Options.setMergeIdenticalEdges(); + SmallPtrSet Visited; + SmallVector UniqueCriticalSuccessorIndices; + + for (Instruction &I : instructions(F)) { + if (auto *CBR = dyn_cast(&I)) { + // If the CallBrInst has no output, then we do not need to split any + // critical edges. + if (CBR->getFunctionType()->getReturnType()->isVoidTy()) + continue; + + Visited.clear(); + UniqueCriticalSuccessorIndices.clear(); + + // Do one pass to find the indices of unique indirect successors BEFORE + // attempting to split critical edges (otherwise the BasicBlock inserted + // in the Visited Set below will not catch duplicates; it would get + // updated to the new synthetic block while iterating this loop). Start + // at index 1 to avoid the default destination. + for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i) { + // The same destination may appear multiple times in a callbr + // indirect label list. Also, a phi may list the same predecessor + // multiple times as long as they use the same value. + // + // The CriticalEdgeSplittingOptions option MergeIdenticalEdges will + // handle this properly for us, but we don't want to call + // SplitCriticalEdge on such repeated destinations more than once. + if (!Visited.insert(CBR->getSuccessor(i)).second) + continue; + if (isCriticalEdge(CBR, i)) + UniqueCriticalSuccessorIndices.push_back(i); + } + for (unsigned i : UniqueCriticalSuccessorIndices) { + BasicBlock *Synth = SplitKnownCriticalEdge(CBR, i, Options); + assert(Synth && "Failed to split a known critical edge from callbr"); + if (Synth) + Changed = true; + } + } + } + return Changed; +} diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll --- a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll @@ -38,36 +38,46 @@ ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: jge .LBB1_2 +; CHECK-NEXT: jge .LBB1_3 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: #APP ; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: testl %edi, %esi -; CHECK-NEXT: jne .LBB1_4 +; CHECK-NEXT: jne .LBB1_2 ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: jmp .LBB1_3 -; CHECK-NEXT: .LBB1_2: # %if.else +; CHECK-NEXT: jmp .LBB1_4 +; CHECK-NEXT: .LBB1_2: # Block address taken +; CHECK-NEXT: # %if.then.label_true_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: jmp .LBB1_8 +; CHECK-NEXT: .LBB1_3: # %if.else ; CHECK-NEXT: #APP ; CHECK-NEXT: testl %esi, %edi ; CHECK-NEXT: testl %esi, %edi -; CHECK-NEXT: jne .LBB1_5 +; CHECK-NEXT: jne .LBB1_9 ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: addl %edi, %eax -; CHECK-NEXT: .LBB1_5: # Block address taken -; CHECK-NEXT: # %return -; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB1_5: # %return ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl -; CHECK-NEXT: .LBB1_4: # Block address taken -; CHECK-NEXT: # %label_true +; CHECK-NEXT: .LBB1_7: # Block address taken +; CHECK-NEXT: # %if.else.label_true_crit_edge ; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB1_8: # %label_true ; CHECK-NEXT: movl $-2, %eax ; CHECK-NEXT: jmp .LBB1_5 +; CHECK-NEXT: .LBB1_9: # Block address taken +; CHECK-NEXT: # %if.else.return_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB1_6: # Block address taken +; CHECK-NEXT: # %if.then.return_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: jmp .LBB1_5 entry: %cmp = icmp slt i32 %out1, %out2 br i1 %cmp, label %if.then, label %if.else @@ -122,7 +132,10 @@ ; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl ; CHECK-NEXT: .LBB2_6: # Block address taken -; CHECK-NEXT: # %indirect +; CHECK-NEXT: # %true.indirect_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB2_7: # Block address taken +; CHECK-NEXT: # %false.indirect_crit_edge ; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: movl $42, %eax ; CHECK-NEXT: jmp .LBB2_5 @@ -148,31 +161,37 @@ define i32 @test4(i32 %out1, i32 %out2) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: #APP -; CHECK-NEXT: testl %ecx, %ecx -; CHECK-NEXT: testl %edx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: testl %ecx, %eax ; CHECK-NEXT: jne .LBB3_3 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: # %bb.1: # %asm.fallthrough ; CHECK-NEXT: #APP -; CHECK-NEXT: testl %ecx, %edx -; CHECK-NEXT: testl %ecx, %edx -; CHECK-NEXT: jne .LBB3_4 +; CHECK-NEXT: testl %eax, %ecx +; CHECK-NEXT: testl %eax, %ecx +; CHECK-NEXT: jne .LBB3_5 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: # %bb.2: # %asm.fallthrough2 -; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: retl ; CHECK-NEXT: .LBB3_4: # Block address taken -; CHECK-NEXT: # %return +; CHECK-NEXT: # %entry.return_crit_edge +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: .LBB3_5: # Block address taken +; CHECK-NEXT: # %asm.fallthrough.return_crit_edge ; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB3_6: # Block address taken +; CHECK-NEXT: # %asm.fallthrough.label_true_crit_edge +; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: .LBB3_3: # Block address taken -; CHECK-NEXT: # %label_true +; CHECK-NEXT: # %entry.label_true_crit_edge ; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: movl $-2, %eax -; CHECK-NEXT: jmp .LBB3_4 +; CHECK-NEXT: retl entry: %0 = callbr { i32, i32 } asm sideeffect "testl $0, $0; testl $1, $2; jne ${3:l}", "=r,=r,r,!i,!i"(i32 %out1) to label %asm.fallthrough [label %label_true, label %return] @@ -206,7 +225,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: .LBB4_1: # Block address taken +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB4_2: # Block address taken +; CHECK-NEXT: # %._crit_edge ; CHECK-NEXT: # Label of block must be emitted ; CHECK-NEXT: retl %1 = call i32 @llvm.read_register.i32(metadata !3) diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/callbr.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/callbr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/callbr.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare %s -o - | FileCheck %s + +target triple = "aarch64-linux-gnu" + +; Don't split edges unless they are critical and callbr produces output. +; Here we have neither. +define i32 @dont_split0() { +; CHECK-LABEL: @dont_split0( +; CHECK-NEXT: entry: +; CHECK-NEXT: callbr void asm "", "!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %y] +; CHECK: x: +; CHECK-NEXT: ret i32 42 +; CHECK: y: +; CHECK-NEXT: ret i32 0 +; +entry: + callbr void asm "", "!i"() + to label %x [label %y] + +x: + ret i32 42 + +y: + ret i32 0 +} + +; Don't split edges unless they are critical and callbr produces output. +; Here we have output, but no critical edge. +define i32 @dont_split1() { +; CHECK-LABEL: @dont_split1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %y] +; CHECK: x: +; CHECK-NEXT: ret i32 42 +; CHECK: y: +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %0 = callbr i32 asm "", "=r,!i"() + to label %x [label %y] + +x: + ret i32 42 + +y: + ret i32 %0 +} + +; Don't split edges unless they are critical and callbr produces output. +; Here we have a critical edge along an indirect branch, but no output. +define i32 @dont_split2() { +; CHECK-LABEL: @dont_split2( +; CHECK-NEXT: entry: +; CHECK-NEXT: callbr void asm "", "!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %y] +; CHECK: x: +; CHECK-NEXT: br label [[Y:%.*]] +; CHECK: y: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, [[X]] ], [ 42, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + callbr void asm "", "!i"() + to label %x [label %y] + +x: + br label %y + +y: + %0 = phi i32 [ 0, %x ], [ 42, %entry ] + ret i32 %0 +} + +; Don't split edges unless they are critical and callbr produces output. +; Here we have output and a critical edge along an indirect branch. +define i32 @split_me0() { +; CHECK-LABEL: @split_me0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label %entry.y_crit_edge] +; CHECK: entry.y_crit_edge: +; CHECK-NEXT: br label [[Y:%.*]] +; CHECK: x: +; CHECK-NEXT: br label [[Y]] +; CHECK: y: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY_Y_CRIT_EDGE:%.*]] ], [ 42, [[X]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %0 = callbr i32 asm "", "=r,!i"() + to label %x [label %y] + +x: + br label %y + +y: + %1 = phi i32 [ %0, %entry ], [ 42, %x ] + ret i32 %1 +} + +; Here we have output and a critical edge along an indirect branch. +; Ensure that if we repeat the indirect destination, that we only split it +; once. +define i32 @split_me1(i1 %z) { +; CHECK-LABEL: @split_me1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[Z:%.*]], label [[W:%.*]], label [[V:%.*]] +; CHECK: w: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label [[W_V_CRIT_EDGE:%.*]], label %w.v_crit_edge] +; CHECK: w.v_crit_edge: +; CHECK-NEXT: br label [[V]] +; CHECK: x: +; CHECK-NEXT: ret i32 42 +; CHECK: v: +; CHECK-NEXT: ret i32 0 +; +entry: + br i1 %z, label %w, label %v + +w: + %0 = callbr i32 asm "", "=r,!i,!i"() + to label %x [label %v, label %v] + +x: + ret i32 42 + +v: + ret i32 0 +} + +; A more interessting case of @split_me1. Check that we still only split the +; critical edge from w to v once. +define i32 @split_me2(i1 %z) { +; CHECK-LABEL: @split_me2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[Z:%.*]], label [[W:%.*]], label [[V:%.*]] +; CHECK: w: +; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i"() +; CHECK-NEXT: to label [[X:%.*]] [label [[W_V_CRIT_EDGE:%.*]], label %w.v_crit_edge] +; CHECK: w.v_crit_edge: +; CHECK-NEXT: br label [[V]] +; CHECK: x: +; CHECK-NEXT: ret i32 42 +; CHECK: v: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ 42, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + br i1 %z, label %w, label %v + +w: + %0 = callbr i32 asm "", "=r,!i,!i"() + to label %x [label %v, label %v] + +x: + ret i32 42 + +v: + %1 = phi i32 [ %0, %w ], [ 42, %entry ], [ %0, %w ] + ret i32 %1 +}