Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1278,6 +1278,9 @@ This function attribute indicates that the function never returns normally. This produces undefined behavior at runtime if the function ever does dynamically return. +``notail`` + This attribute indicates that tail call optimization cannot be done on calls + to the function. ``nounwind`` This function attribute indicates that the function never raises an exception. If the function does raise an exception, its runtime Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -418,7 +418,8 @@ ATTR_KIND_SAFESTACK = 44, ATTR_KIND_ARGMEMONLY = 45, ATTR_KIND_SWIFT_SELF = 46, - ATTR_KIND_SWIFT_ERROR = 47 + ATTR_KIND_SWIFT_ERROR = 47, + ATTR_KIND_NO_TAIL = 48 }; enum ComdatSelectionKindCodes { Index: include/llvm/IR/Attributes.h =================================================================== --- include/llvm/IR/Attributes.h +++ include/llvm/IR/Attributes.h @@ -93,6 +93,7 @@ DereferenceableOrNull, ///< Pointer is either null or dereferenceable NoRedZone, ///< Disable redzone NoReturn, ///< Mark the function as not returning + NoTail, ///< No tail call optimization to this function NoUnwind, ///< Function doesn't unwind stack OptimizeForSize, ///< opt_size OptimizeNone, ///< Function must not be optimized. Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -1583,6 +1583,8 @@ addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline); } + bool isNoTail() const { return hasFnAttr(Attribute::NoTail); } + /// \brief Return true if the call can return twice bool canReturnTwice() const { return hasFnAttr(Attribute::ReturnsTwice); Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -618,6 +618,7 @@ KEYWORD(nonnull); KEYWORD(noredzone); KEYWORD(noreturn); + KEYWORD(notail); KEYWORD(nounwind); KEYWORD(optnone); KEYWORD(optsize); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1000,6 +1000,7 @@ case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; + case lltok::kw_notail: B.addAttribute(Attribute::NoTail); break; case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break; case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; @@ -1330,6 +1331,7 @@ case lltok::kw_nonlazybind: case lltok::kw_noredzone: case lltok::kw_noreturn: + case lltok::kw_notail: case lltok::kw_nounwind: case lltok::kw_optnone: case lltok::kw_optsize: @@ -1410,6 +1412,7 @@ case lltok::kw_noduplicate: case lltok::kw_noimplicitfloat: case lltok::kw_noinline: + case lltok::kw_notail: case lltok::kw_nonlazybind: case lltok::kw_noredzone: case lltok::kw_noreturn: Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -126,6 +126,7 @@ kw_nonnull, kw_noredzone, kw_noreturn, + kw_notail, kw_nounwind, kw_optnone, kw_optsize, Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -1194,6 +1194,8 @@ return Attribute::NoRedZone; case bitc::ATTR_KIND_NO_RETURN: return Attribute::NoReturn; + case bitc::ATTR_KIND_NO_TAIL: + return Attribute::NoTail; case bitc::ATTR_KIND_NO_UNWIND: return Attribute::NoUnwind; case bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE: Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -210,6 +210,8 @@ return bitc::ATTR_KIND_NO_RED_ZONE; case Attribute::NoReturn: return bitc::ATTR_KIND_NO_RETURN; + case Attribute::NoTail: + return bitc::ATTR_KIND_NO_TAIL; case Attribute::NoUnwind: return bitc::ATTR_KIND_NO_UNWIND; case Attribute::OptimizeForSize: Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5826,7 +5826,7 @@ // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. - LowerCallTo(&I, Callee, I.isTailCall()); + LowerCallTo(&I, Callee, I.isTailCall() && !I.isNoTail()); } namespace { Index: lib/IR/Attributes.cpp =================================================================== --- lib/IR/Attributes.cpp +++ lib/IR/Attributes.cpp @@ -232,6 +232,8 @@ return "noredzone"; if (hasAttribute(Attribute::NoReturn)) return "noreturn"; + if (hasAttribute(Attribute::NoTail)) + return "notail"; if (hasAttribute(Attribute::NoUnwind)) return "nounwind"; if (hasAttribute(Attribute::OptimizeNone)) @@ -442,6 +444,7 @@ case Attribute::JumpTable: return 1ULL << 45; case Attribute::Convergent: return 1ULL << 46; case Attribute::SafeStack: return 1ULL << 47; + case Attribute::NoTail: return 1ULL << 48; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); break; Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -1271,7 +1271,8 @@ I->getKindAsEnum() == Attribute::OptimizeNone || I->getKindAsEnum() == Attribute::JumpTable || I->getKindAsEnum() == Attribute::Convergent || - I->getKindAsEnum() == Attribute::ArgMemOnly) { + I->getKindAsEnum() == Attribute::ArgMemOnly || + I->getKindAsEnum() == Attribute::NoTail) { if (!isFunction) { CheckFailed("Attribute '" + I->getAsString() + "' only applies to functions!", V); Index: lib/Transforms/Scalar/TailRecursionElimination.cpp =================================================================== --- lib/Transforms/Scalar/TailRecursionElimination.cpp +++ lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -304,7 +304,9 @@ if (!CI || CI->isTailCall()) continue; - if (CI->doesNotAccessMemory()) { + bool IsNoTail = CI->isNoTail(); + + if (!IsNoTail && CI->doesNotAccessMemory()) { // A call to a readnone function whose arguments are all things computed // outside this function can be marked tail. Even if you stored the // alloca address into a global, a readnone function can't load the @@ -332,7 +334,7 @@ } } - if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { DeferredTails.push_back(CI); } else { AllCallsAreTailCalls = false; Index: test/Bindings/llvm-c/invalid-bitcode.test =================================================================== --- test/Bindings/llvm-c/invalid-bitcode.test +++ test/Bindings/llvm-c/invalid-bitcode.test @@ -1,3 +1,3 @@ ; RUN: not llvm-c-test --module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s -CHECK: Error parsing bitcode: Unknown attribute kind (48) +CHECK: Error parsing bitcode: Unknown attribute kind (63) Index: test/Bitcode/attributes.ll =================================================================== --- test/Bitcode/attributes.ll +++ test/Bitcode/attributes.ll @@ -204,7 +204,7 @@ ; CHECK: define void @f34() { call void @nobuiltin() nobuiltin -; CHECK: call void @nobuiltin() #27 +; CHECK: call void @nobuiltin() #28 ret void; } @@ -272,6 +272,11 @@ ret void } +; CHECK: define void @f47() #27 +define void @f47() notail { + ret void +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } @@ -299,4 +304,5 @@ ; CHECK: attributes #24 = { jumptable } ; CHECK: attributes #25 = { convergent } ; CHECK: attributes #26 = { argmemonly } -; CHECK: attributes #27 = { nobuiltin } +; CHECK: attributes #27 = { notail } +; CHECK: attributes #28 = { nobuiltin } Index: test/Bitcode/compatibility.ll =================================================================== --- test/Bitcode/compatibility.ll +++ test/Bitcode/compatibility.ll @@ -501,6 +501,8 @@ ; CHECK: declare void @f.uwtable() #30 declare void @f.kvpair() "cpu"="cortex-a8" ; CHECK:declare void @f.kvpair() #31 +declare void @f.notail() notail +; CHECK: declare void @f.notail() #32 ; Functions -- section declare void @f.section() section "80" @@ -559,7 +561,7 @@ ; Functions -- Personality constant declare void @llvm.donothing() nounwind readnone -; CHECK: declare void @llvm.donothing() #32 +; CHECK: declare void @llvm.donothing() #33 define void @f.no_personality() personality i8 3 { ; CHECK: define void @f.no_personality() personality i8 3 invoke void @llvm.donothing() to label %normal unwind label %exception @@ -1125,7 +1127,7 @@ ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #36 + ; CHECK: call void @f.nobuiltin() #37 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1345,11 +1347,12 @@ ; CHECK: attributes #29 = { "thunk" } ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } -; CHECK: attributes #32 = { nounwind readnone } -; CHECK: attributes #33 = { nounwind readonly argmemonly } -; CHECK: attributes #34 = { nounwind argmemonly } -; CHECK: attributes #35 = { nounwind readonly } -; CHECK: attributes #36 = { builtin } +; CHECK: attributes #32 = { notail } +; CHECK: attributes #33 = { nounwind readnone } +; CHECK: attributes #34 = { nounwind readonly argmemonly } +; CHECK: attributes #35 = { nounwind argmemonly } +; CHECK: attributes #36 = { nounwind readonly } +; CHECK: attributes #37 = { builtin } ;; Metadata Index: test/Bitcode/invalid.ll =================================================================== --- test/Bitcode/invalid.ll +++ test/Bitcode/invalid.ll @@ -1,6 +1,6 @@ ; RUN: not llvm-dis < %s.bc 2>&1 | FileCheck %s -; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Unknown attribute kind (48) +; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Unknown attribute kind (63) ; invalid.ll.bc has an invalid attribute number. ; The test checks that LLVM reports the error and doesn't access freed memory Index: test/CodeGen/X86/attr-notail.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/attr-notail.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; CHECK-LABEL: {{\_?}}caller0: +; CHECK: callq {{\_?}}callee0 +; CHECK: jmp {{\_?}}callee3 +; CHECK: callq {{\_?}}callee1 +; CHECK: jmp {{\_?}}callee2 + +define i32 @callee0(i32 %a) #0 { +entry: + %add = add nsw i32 %a, 1 + ret i32 %add +} + +define i32 @caller0(i32 %a) { +entry: + %cmp = icmp sgt i32 %a, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %call = tail call i32 @callee0(i32 %a) + br label %return + +if.end: + %cmp1 = icmp slt i32 %a, 0 + br i1 %cmp1, label %if.then.2, label %if.end.4 + +if.then.2: + %call3 = tail call i32 @callee1(i32 %a) + br label %return + +if.end.4: + %cmp2 = icmp eq i32 %a, 0 + br i1 %cmp2, label %if.then.3, label %if.end.5 + +if.then.3: + %call5 = tail call i32 @callee2(i32 %a) + br label %return + +if.end.5: + %call6 = musttail call i32 @callee3(i32 %a) + ret i32 %call6 + +return: + %retval.0 = phi i32 [ %call, %if.then ], [ %call3, %if.then.2 ], [ %call5, %if.then.3 ] + ret i32 %retval.0 +} + +declare i32 @callee1(i32) #0 + +declare i32 @callee2(i32) + +declare i32 @callee3(i32) #0 + +attributes #0 = { notail } Index: test/Transforms/TailCallElim/notail.ll =================================================================== --- /dev/null +++ test/Transforms/TailCallElim/notail.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -tailcallelim -S | FileCheck %s + +; Check that callee isn't marked as "tail". + +; CHECK: %{{[a-z]+}} = call i32 @callee(i32 %{{[a-z]+}}) + +define i32 @caller(i32 %a) { +entry: + %call = call i32 @callee(i32 %a) + ret i32 %call +} + +declare i32 @callee(i32) #0 + +attributes #0 = { notail }