Index: lib/CodeGen/Analysis.cpp =================================================================== --- lib/CodeGen/Analysis.cpp +++ lib/CodeGen/Analysis.cpp @@ -501,6 +501,30 @@ return true; } +// Return true if the instruction is __builtin_frame_address or +// __builtin_return_address function call. +static bool isFrameAddressOrReturnAddressCall(const Instruction *I) { + if (const CallInst *CI = dyn_cast(I)) + if (Function *IntrinsicF = CI->getCalledFunction()) { + Intrinsic::ID ID = IntrinsicF->getIntrinsicID(); + if ((ID == Intrinsic::frameaddress) || + (ID == Intrinsic::returnaddress)) + return true; + } + return false; +} + +// Return frame depth argument of __builtin_frame_address or +// __builtin_return_address. +// Return -1 if the instruction is not a __builtin_frame_address or +// __builtin_return_address function call. +static int getFrameDepthArg(const Instruction *I) { + if (!isFrameAddressOrReturnAddressCall(I)) + return -1; + const CallInst *CI = cast(I); + Constant *C = cast(*CI->arg_begin()); + return C->getUniqueInteger().getZExtValue(); +} /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with @@ -513,6 +537,7 @@ const BasicBlock *ExitBB = I->getParent(); const Instruction *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast(Term); + const Function *CalleeFn = CS.getCalledFunction(); // The block must end in a return statement or unreachable. // @@ -545,6 +570,25 @@ return false; } + // Do not do tail call if the callee function contain __builtin_frame_address + // or __builtin_return_address. + // + // Enabling tail call may remove the frame pointer and return address + // restoration in caller which will make the above two builtin functions get + // incorrect value if the depth parameter > 0. + // E.g. + // void __attribute__((noinline)) + // *callee (char *p) { return __builtin_frame_address (1); } + // void + // *caller (void) { char * save = (char*) alloca (4); + // return callee (save); } + if (CalleeFn != nullptr) { + for (const BasicBlock &BB : *CalleeFn) + for (const Instruction &I : BB) + if (getFrameDepthArg(&I) > 0) + return false; + } + const Function *F = ExitBB->getParent(); return returnTypeIsEligibleForTailCall( F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); Index: test/CodeGen/RISCV/builtin-frame-address.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/builtin-frame-address.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +; The test case check that tail call optimization will be suppressed for +; @llvm.frameaddress with depth > 0. +; Otherwise, @llvm.frameaddress(i32 1) will get wrong frame address. + +; Tail call will be suppressed in caller1 because callee1 contain +; @llvm.rameaddress(i32 1). +define i8* @caller1() { +; RV32I-LABEL: caller1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: call callee1 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +entry: + %call = tail call i8* @callee1(i8* undef) + ret i8* %call +} + +define i8* @callee1(i8* nocapture readnone %p) { +; RV32I-LABEL: callee1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw a0, -8(s0) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +entry: + %0 = tail call i8* @llvm.frameaddress(i32 1) + ret i8* %0 +} + +; Tail call won't be suppressed in caller0 because callee0 contain +; @llvm.frameaddress(i32 0) which will not backtrace to caller0's stack. +define i8* @caller0() { +; RV32I-LABEL: caller0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: tail callee0 +entry: + %call = tail call i8* @callee0(i8* undef) + ret i8* %call +} + +define i8* @callee0(i8* nocapture readnone %p) { +; RV32I-LABEL: callee0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +entry: + %0 = tail call i8* @llvm.frameaddress(i32 0) + ret i8* %0 +} + +declare i8* @llvm.frameaddress(i32) Index: test/CodeGen/RISCV/builtin-return-address.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/builtin-return-address.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +; The test case check that tail call optimization will be suppressed for +; @llvm.returnaddress with depth > 0. +; Otherwise, @llvm.returnaddress(i32 1) will get wrong return address. + +; Tail call will be suppressed in caller1 because callee1 contain +; @llvm.returnaddress(i32 1). +define i8* @caller1() { +; RV32I-LABEL: caller1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: call callee1 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +entry: + %call = tail call i8* @callee1(i8* undef) + ret i8* %call +} + +define i8* @callee1(i8* nocapture readnone %p) { +; RV32I-LABEL: callee1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: lw a0, -8(s0) +; RV32I-NEXT: lw a0, -4(a0) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +entry: + %0 = tail call i8* @llvm.returnaddress(i32 1) + ret i8* %0 +} + +; Tail call won't be suppressed in caller0 because callee0 contain +; @llvm.returnaddress(i32 0) which will not backtrace to caller0's stack. +define i8* @caller0() { +; RV32I-LABEL: caller0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: tail callee0 +entry: + %call = tail call i8* @callee0(i8* undef) + ret i8* %call +} + +define i8* @callee0(i8* nocapture readnone %p) { +; RV32I-LABEL: callee0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: mv a0, ra +; RV32I-NEXT: ret +entry: + %0 = tail call i8* @llvm.returnaddress(i32 0) + ret i8* %0 +} + +declare i8* @llvm.returnaddress(i32)