diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1580,25 +1580,8 @@ ASM Goto with Output Constraints ================================ -.. note:: - - Clang's implementation of ASM goto differs from `GCC's - implementation `_ in - that Clang doesn't yet support outputs on the indirect branch. Use of an - output on the indirect branch may result in undefined behavior and should be - avoided. E.g., in the following `z` isn't valid when used and may have - different values depending on optimization levels. (Clang may not warn about - such constructs.) - - .. code-block:: c++ - - int foo(int x) { - int y, z; - asm goto(... : "=r"(y), "=r"(z): "r"(x) : : err); - return y; - err: - return z; - } +Outputs may be used along any branches from the ``asm goto`` whether the +branches are taken or not. When using tied-outputs (i.e. outputs that are inputs and outputs, not just outputs) with the `+r` constraint, there is a hidden input that's created diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -192,6 +192,17 @@ these definitions were allowed. Note that such definitions are ODR violations if the header is included more than once. +- Indirect edges of asm goto statements under certain circumstances may now be + split. In previous releases of clang, that means for the following code the + two inputs may have compared equal in the inline assembly. This is no longer + guaranteed (and necessary to support outputs along indirect edges, which is + now supported as of this release). This change is more consistent with the + behavior of GCC. + + .. code-block:: c + + foo: asm goto ("# %0 %1"::"i"(&&foo)::foo); + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed @@ -201,6 +212,9 @@ Major New Features ------------------ +- Support for outputs from asm goto statements along indirect edges has been + added. This fixes + `Issue 53562 RegResults; + llvm::CallBrInst *CBR; + llvm::DenseMap> + CBRRegResults; if (IsGCCAsmGoto) { - llvm::CallBrInst *Result = - Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); + CBR = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); EmitBlock(Fallthrough); - UpdateAsmCallInst(*Result, HasSideEffect, false, ReadOnly, ReadNone, + UpdateAsmCallInst(*CBR, HasSideEffect, false, ReadOnly, ReadNone, InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, *this, RegResults); + // Because we are emitting code top to bottom, we don't have enough + // information at this point to know precisely whether we have a critical + // edge. If we have outputs, split all indirect destinations. + if (!RegResults.empty()) { + unsigned i = 0; + for (llvm::BasicBlock *Dest : CBR->getIndirectDests()) { + llvm::Twine SynthName = Dest->getName() + ".split"; + llvm::BasicBlock *SynthBB = createBasicBlock(SynthName); + llvm::IRBuilderBase::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(SynthBB); + + if (ResultRegTypes.size() == 1) { + CBRRegResults[SynthBB].push_back(CBR); + } else { + for (unsigned j = 0, e = ResultRegTypes.size(); j != e; ++j) { + llvm::Value *Tmp = Builder.CreateExtractValue(CBR, j, "asmresult"); + CBRRegResults[SynthBB].push_back(Tmp); + } + } + + EmitBranch(Dest); + EmitBlock(SynthBB); + CBR->setIndirectDest(i++, SynthBB); + } + } } else if (HasUnwindClobber) { llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, ""); UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone, @@ -2832,6 +2860,19 @@ EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes, ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast, ResultRegIsFlagReg); + + // If this is an asm goto with outputs, repeat EmitAsmStores, but with a + // different insertion point; one for each indirect destination and with + // CBRRegResults rather than RegResults. + if (IsGCCAsmGoto && !CBRRegResults.empty()) { + for (llvm::BasicBlock *Succ : CBR->getIndirectDests()) { + llvm::IRBuilderBase::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(Succ, --(Succ->end())); + EmitAsmStores(*this, S, CBRRegResults[Succ], ResultRegTypes, + ResultTruncRegTypes, ResultRegDests, ResultRegQualTys, + ResultTypeRequiresCast, ResultRegIsFlagReg); + } + } } LValue CodeGenFunction::InitCapturedStruct(const CapturedStmt &S) { diff --git a/clang/test/CodeGen/asm-goto.c b/clang/test/CodeGen/asm-goto.c --- a/clang/test/CodeGen/asm-goto.c +++ b/clang/test/CodeGen/asm-goto.c @@ -22,12 +22,12 @@ int test2(int cond) { // CHECK-LABEL: define{{.*}} i32 @test2( // CHECK: callbr i32 asm sideeffect - // CHECK: to label %asm.fallthrough [label %label_true, label %loop] + // CHECK: to label %asm.fallthrough [label %label_true.split, label %loop.split] // CHECK-LABEL: asm.fallthrough: asm volatile goto("testl %0, %0; jne %l2;" : "=r"(cond) : "r"(cond) :: label_true, loop); asm volatile goto("testl %0, %0; jne %l3;" : "=r"(cond) : "r"(cond) :: label_true, loop); // CHECK: callbr i32 asm sideeffect - // CHECK: to label %asm.fallthrough1 [label %label_true, label %loop] + // CHECK: to label %asm.fallthrough1 [label %label_true.split2, label %loop.split3] // CHECK-LABEL: asm.fallthrough1: return 0; loop: @@ -39,13 +39,13 @@ int test3(int out1, int out2) { // CHECK-LABEL: define{{.*}} i32 @test3( // CHECK: callbr { i32, i32 } asm sideeffect - // CHECK: to label %asm.fallthrough [label %label_true, label %loop] + // CHECK: to label %asm.fallthrough [label %label_true.split, label %loop.split] // CHECK-LABEL: asm.fallthrough: asm volatile goto("testl %0, %0; jne %l3;" : "=r"(out1), "=r"(out2) : "r"(out1) :: label_true, loop); asm volatile goto("testl %0, %0; jne %l4;" : "=r"(out1), "=r"(out2) : "r"(out1) :: label_true, loop); // CHECK: callbr { i32, i32 } asm sideeffect - // CHECK: to label %asm.fallthrough2 [label %label_true, label %loop] - // CHECK-LABEL: asm.fallthrough2: + // CHECK: to label %asm.fallthrough6 [label %label_true.split11, label %loop.split14] + // CHECK-LABEL: asm.fallthrough6: return 0; loop: return 0; @@ -56,15 +56,15 @@ int test4(int out1, int out2) { // CHECK-LABEL: define{{.*}} i32 @test4( // CHECK: callbr { i32, i32 } asm sideeffect "jne ${5:l}", "={si},={di},r,0,1,!i,!i - // CHECK: to label %asm.fallthrough [label %label_true, label %loop] + // CHECK: to label %asm.fallthrough [label %label_true.split, label %loop.split] // CHECK-LABEL: asm.fallthrough: if (out1 < out2) asm volatile goto("jne %l5" : "+S"(out1), "+D"(out2) : "r"(out1) :: label_true, loop); else asm volatile goto("jne %l7" : "+S"(out1), "+D"(out2) : "r"(out1), "r"(out2) :: label_true, loop); // CHECK: callbr { i32, i32 } asm sideeffect "jne ${7:l}", "={si},={di},r,r,0,1,!i,!i - // CHECK: to label %asm.fallthrough2 [label %label_true, label %loop] - // CHECK-LABEL: asm.fallthrough2: + // CHECK: to label %asm.fallthrough6 [label %label_true.split11, label %loop.split14] + // CHECK-LABEL: asm.fallthrough6: return out1 + out2; loop: return -1; @@ -75,7 +75,7 @@ int test5(int addr, int size, int limit) { // CHECK-LABEL: define{{.*}} i32 @test5( // CHECK: callbr i32 asm "add $1,$0 ; jc ${4:l} ; cmp $2,$0 ; ja ${4:l} ; ", "=r,imr,imr,0,!i - // CHECK: to label %asm.fallthrough [label %t_err] + // CHECK: to label %asm.fallthrough [label %t_err.split] // CHECK-LABEL: asm.fallthrough: asm goto( "add %1,%0 ; " @@ -93,7 +93,7 @@ int test6(int out1) { // CHECK-LABEL: define{{.*}} i32 @test6( // CHECK: callbr i32 asm sideeffect "testl $0, $0; testl $1, $1; jne ${3:l}", "={si},r,0,!i,!i,{{.*}} - // CHECK: to label %asm.fallthrough [label %label_true, label %landing] + // CHECK: to label %asm.fallthrough [label %label_true.split, label %landing.split] // CHECK-LABEL: asm.fallthrough: // CHECK-LABEL: landing: int out2 = 42; @@ -111,7 +111,7 @@ void *test7(void) { // CHECK-LABEL: define{{.*}} ptr @test7( // CHECK: %1 = callbr ptr asm "# $0\0A\09# ${2:l}", "=r,0,!i,~{dirflag},~{fpsr},~{flags}"(ptr %0) - // CHECK-NEXT: to label %asm.fallthrough [label %foo] + // CHECK-NEXT: to label %asm.fallthrough [label %foo.split] void *p = &&foo; asm goto ("# %0\n\t# %l2":"+r"(p):::foo); foo: @@ -123,7 +123,7 @@ void *test8(void) { // CHECK-LABEL: define{{.*}} ptr @test8( // CHECK: %1 = callbr ptr asm "# $0\0A\09# ${2:l}", "=r,0,!i,~{dirflag},~{fpsr},~{flags}"(ptr %0) - // CHECK-NEXT: to label %asm.fallthrough [label %foo] + // CHECK-NEXT: to label %asm.fallthrough [label %foo.split] void *p = &&foo; asm goto ("# %0\n\t# %l[foo]":"+r"(p):::foo); foo: diff --git a/clang/test/CodeGen/asm-goto2.c b/clang/test/CodeGen/asm-goto2.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/asm-goto2.c @@ -0,0 +1,156 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -O0 -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @test0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,~{dirflag},~{fpsr},~{flags}"() #[[ATTR1:[0-9]+]] +// CHECK-NEXT: to label [[ASM_FALLTHROUGH:%.*]] [label %z.split], !srcloc !2 +// CHECK: asm.fallthrough: +// CHECK-NEXT: store i32 [[TMP0]], ptr [[RET]], align 4 +// CHECK-NEXT: store i32 42, ptr [[RET]], align 4 +// CHECK-NEXT: br label [[Z:%.*]] +// CHECK: z: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] +// CHECK: z.split: +// CHECK-NEXT: store i32 [[TMP0]], ptr [[RET]], align 4 +// CHECK-NEXT: br label [[Z]] +// +int test0 (void) { + int ret; + asm goto ("" : "=r"(ret):::z); + ret = 42; +z: + return ret; +} + +// CHECK-LABEL: @test1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP0:%.*]] = callbr { i32, i32 } asm "", "=r,=r,!i,~{dirflag},~{fpsr},~{flags}"() #[[ATTR1]] +// CHECK-NEXT: to label [[ASM_FALLTHROUGH:%.*]] [label %z.split], !srcloc !3 +// CHECK: asm.fallthrough: +// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 +// CHECK-NEXT: store i32 [[ASMRESULT]], ptr [[RET]], align 4 +// CHECK-NEXT: store i32 [[ASMRESULT1]], ptr [[B]], align 4 +// CHECK-NEXT: store i32 42, ptr [[RET]], align 4 +// CHECK-NEXT: br label [[Z:%.*]] +// CHECK: z: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] +// CHECK: z.split: +// CHECK-NEXT: [[ASMRESULT2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 +// CHECK-NEXT: [[ASMRESULT3:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 +// CHECK-NEXT: store i32 [[ASMRESULT2]], ptr [[RET]], align 4 +// CHECK-NEXT: store i32 [[ASMRESULT3]], ptr [[B]], align 4 +// CHECK-NEXT: br label [[Z]] +// +int test1 (void) { + int ret, b; + asm goto ("" : "=r"(ret), "=r"(b):::z); + ret = 42; +z: + return ret; +} + +// CHECK-LABEL: @test2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP0:%.*]] = callbr { i32, i32 } asm "", "=r,=r,!i,~{dirflag},~{fpsr},~{flags}"() #[[ATTR1]] +// CHECK-NEXT: to label [[ASM_FALLTHROUGH:%.*]] [label %z.split], !srcloc !4 +// CHECK: asm.fallthrough: +// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 +// CHECK-NEXT: store i32 [[ASMRESULT]], ptr [[RET]], align 4 +// CHECK-NEXT: store i32 [[ASMRESULT1]], ptr [[B]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = callbr { i32, i32 } asm "", "=r,=r,!i,~{dirflag},~{fpsr},~{flags}"() #[[ATTR1]] +// CHECK-NEXT: to label [[ASM_FALLTHROUGH4:%.*]] [label %z.split9], !srcloc !5 +// CHECK: asm.fallthrough4: +// CHECK-NEXT: [[ASMRESULT5:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// CHECK-NEXT: [[ASMRESULT6:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// CHECK-NEXT: store i32 [[ASMRESULT5]], ptr [[RET]], align 4 +// CHECK-NEXT: store i32 [[ASMRESULT6]], ptr [[B]], align 4 +// CHECK-NEXT: br label [[Z:%.*]] +// CHECK: z: +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[RET]], align 4 +// CHECK-NEXT: ret i32 [[TMP2]] +// CHECK: z.split: +// CHECK-NEXT: [[ASMRESULT2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 +// CHECK-NEXT: [[ASMRESULT3:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 +// CHECK-NEXT: store i32 [[ASMRESULT2]], ptr [[RET]], align 4 +// CHECK-NEXT: store i32 [[ASMRESULT3]], ptr [[B]], align 4 +// CHECK-NEXT: br label [[Z]] +// CHECK: z.split9: +// CHECK-NEXT: [[ASMRESULT7:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +// CHECK-NEXT: [[ASMRESULT8:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +// CHECK-NEXT: store i32 [[ASMRESULT7]], ptr [[RET]], align 4 +// CHECK-NEXT: store i32 [[ASMRESULT8]], ptr [[B]], align 4 +// CHECK-NEXT: br label [[Z]] +// +int test2 (void) { + int ret, b; + asm goto ("" : "=r"(ret), "=r"(b):::z); + asm goto ("" : "=r"(ret), "=r"(b):::z); +z: + return ret; +} +// CHECK-LABEL: @test3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i,~{dirflag},~{fpsr},~{flags}"() #[[ATTR1]] +// CHECK-NEXT: to label [[ASM_FALLTHROUGH:%.*]] [label [[LABEL_TRUE_SPLIT:%.*]], label %loop.split], !srcloc !6 +// CHECK: asm.fallthrough: +// CHECK-NEXT: store i32 [[TMP0]], ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NEXT: br label [[RETURN:%.*]] +// CHECK: label_true.split: +// CHECK-NEXT: store i32 [[TMP0]], ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: br label [[LABEL_TRUE:%.*]] +// CHECK: loop.split: +// CHECK-NEXT: store i32 [[TMP0]], ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: br label [[LOOP:%.*]] +// CHECK: loop: +// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NEXT: br label [[RETURN]] +// CHECK: label_true: +// CHECK-NEXT: store i32 1, ptr [[RETVAL]], align 4 +// CHECK-NEXT: br label [[RETURN]] +// CHECK: return: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] +// +int test3 (int out1) { + asm goto("" : "=r"(out1)::: label_true, loop); + return 0; +loop: + return 0; +label_true: + return 1; +} + +// CHECK-LABEL: @test4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK-NEXT: br label [[FOO:%.*]] +// CHECK: foo: +// CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,~{dirflag},~{fpsr},~{flags}"() #[[ATTR1]] +// CHECK-NEXT: to label [[ASM_FALLTHROUGH:%.*]] [label %foo.split], !srcloc !7 +// CHECK: asm.fallthrough: +// CHECK-NEXT: store i32 [[TMP0]], ptr [[X]], align 4 +// CHECK-NEXT: ret void +// CHECK: foo.split: +// CHECK-NEXT: store i32 [[TMP0]], ptr [[X]], align 4 +// CHECK-NEXT: br label [[FOO]] +// +void test4 (void) { + int x; +foo: + asm goto ("" : "=r"(x):::foo); +} diff --git a/clang/test/Modules/asm-goto.c b/clang/test/Modules/asm-goto.c --- a/clang/test/Modules/asm-goto.c +++ b/clang/test/Modules/asm-goto.c @@ -5,7 +5,7 @@ // CHECK-LABEL: define {{.*}} @foo( // CHECK: callbr {{.*}} "=r,!i{{.*}}() -// CHECK-NEXT: to label %asm.fallthrough [label %indirect] +// CHECK-NEXT: to label %asm.fallthrough [label %indirect.split] int bar(void) { return foo();