Index: llvm/trunk/include/llvm/CodeGen/Analysis.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/Analysis.h +++ llvm/trunk/include/llvm/CodeGen/Analysis.h @@ -105,11 +105,21 @@ /// This function only tests target-independent requirements. bool isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM); +/// Test if given that the input instruction is in the tail call position, if +/// there is an attribute mismatch between the caller and the callee that will +/// inhibit tail call optimizations. +/// \p AllowDifferingSizes is an output parameter which, if forming a tail call +/// is permitted, determines whether it's permitted only if the size of the +/// caller's and callee's return types match exactly. +bool attributesPermitTailCall(const Function *F, const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI, + bool *AllowDifferingSizes = nullptr); + /// Test if given that the input instruction is in the tail call position if the /// return type or any attributes of the function will inhibit tail call /// optimization. -bool returnTypeIsEligibleForTailCall(const Function *F, - const Instruction *I, +bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI); Index: llvm/trunk/lib/CodeGen/Analysis.cpp =================================================================== --- llvm/trunk/lib/CodeGen/Analysis.cpp +++ llvm/trunk/lib/CodeGen/Analysis.cpp @@ -525,19 +525,15 @@ F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); } -bool llvm::returnTypeIsEligibleForTailCall(const Function *F, - const Instruction *I, - const ReturnInst *Ret, - const TargetLoweringBase &TLI) { - // If the block ends with a void return or unreachable, it doesn't matter - // what the call's return type is. - if (!Ret || Ret->getNumOperands() == 0) return true; +bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI, + bool *AllowDifferingSizes) { + // ADS may be null, so don't write to it directly. + bool DummyADS; + bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS; + ADS = true; - // If the return value is undef, it doesn't matter what the call's - // return type is. - if (isa(Ret->getOperand(0))) return true; - - // Make sure the attributes attached to each return are compatible. AttrBuilder CallerAttrs(F->getAttributes(), AttributeSet::ReturnIndex); AttrBuilder CalleeAttrs(cast(I)->getAttributes(), @@ -548,19 +544,18 @@ CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias); CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias); - bool AllowDifferingSizes = true; if (CallerAttrs.contains(Attribute::ZExt)) { if (!CalleeAttrs.contains(Attribute::ZExt)) return false; - AllowDifferingSizes = false; + ADS = false; CallerAttrs.removeAttribute(Attribute::ZExt); CalleeAttrs.removeAttribute(Attribute::ZExt); } else if (CallerAttrs.contains(Attribute::SExt)) { if (!CalleeAttrs.contains(Attribute::SExt)) return false; - AllowDifferingSizes = false; + ADS = false; CallerAttrs.removeAttribute(Attribute::SExt); CalleeAttrs.removeAttribute(Attribute::SExt); } @@ -568,7 +563,24 @@ // If they're still different, there's some facet we don't understand // (currently only "inreg", but in future who knows). It may be OK but the // only safe option is to reject the tail call. - if (CallerAttrs != CalleeAttrs) + return CallerAttrs == CalleeAttrs; +} + +bool llvm::returnTypeIsEligibleForTailCall(const Function *F, + const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI) { + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa(Ret->getOperand(0))) return true; + + // Make sure the attributes attached to each return are compatible. + bool AllowDifferingSizes; + if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes)) return false; const Value *RetVal = Ret->getOperand(0), *CallVal = I; Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -1983,14 +1984,6 @@ if (PN && PN->getParent() != BB) return false; - // It's not safe to eliminate the sign / zero extension of the return value. - // See llvm::isInTailCallPosition(). - const Function *F = BB->getParent(); - AttributeSet CallerAttrs = F->getAttributes(); - if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) - return false; - // Make sure there are no instructions between the PHI and return, or that the // return is the first instruction in the block. if (PN) { @@ -2010,13 +2003,15 @@ /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. + const Function *F = BB->getParent(); SmallVector TailCalls; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { CallInst *CI = dyn_cast(PN->getIncomingValue(I)); // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && - TLI->mayBeEmittedAsTailCall(CI)) + TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) TailCalls.push_back(CI); } } else { @@ -2033,7 +2028,8 @@ continue; CallInst *CI = dyn_cast(&*RI); - if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) TailCalls.push_back(CI); } } Index: llvm/trunk/test/CodeGen/X86/tailcall-cgp-dup.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/tailcall-cgp-dup.ll +++ llvm/trunk/test/CodeGen/X86/tailcall-cgp-dup.ll @@ -85,3 +85,23 @@ %retval.0.i = bitcast i8* %retval.0.in.i to %0* ret %0* %retval.0.i } + + +; Correctly handle zext returns. +declare zeroext i1 @foo_i1() + +; CHECK-LABEL: zext_i1 +; CHECK: jmp _foo_i1 +define zeroext i1 @zext_i1(i1 %k) { +entry: + br i1 %k, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + %call1 = tail call zeroext i1 @foo_i1() + br label %land.end + +land.end: ; preds = %entry, %land.rhs + %0 = phi i1 [ false, %entry ], [ %call1, %land.rhs ] + ret i1 %0 +} +