Index: lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- lib/Transforms/IPO/FunctionAttrs.cpp +++ lib/Transforms/IPO/FunctionAttrs.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -774,6 +775,311 @@ return true; } +/// Tests whether a function is "printf-like". +/// +/// A function is "printf-like" if it uses varargs and the only use of the +/// variable argument list is to pass to a vsprintf-like function. We know +/// vsprintf and friends do not capture or write their variadic arguments, +/// so can propagate readonly+nocapture to all calls of F. +/// +/// If this is a printf-like function, true is returned and \c FormatStr is +/// set to the value providing the printf format string. +static bool isPrintfLikeFunction(Function *F, const TargetLibraryInfo &TLI, + Value *&FormatStr) { + if (!F->isVarArg()) + return false; + + // We search for a call to @llvm.vastart(i8* %x). If %x isn't an alloca we + // bail out. + // Then we search for all uses of %x, following through bitcasts. We use a + // CaptureTracker to identify uses of %x as call operands, then later check + // that the operand is to a call to a known library function such as vsprintf. + + // Find the va_start call. + Instruction *VaStartInst = nullptr; + for (auto &I : F->getEntryBlock()) + if (auto *Intr = dyn_cast(&I)) + if (Intr->getIntrinsicID() == Intrinsic::vastart) { + if (VaStartInst) + // Multiple va_starts seen - we only support one. + return false; + VaStartInst = Intr; + } + if (!VaStartInst) + return false; + + const DataLayout &DL = F->getParent()->getDataLayout(); + Value *UnderlyingVaStart = + GetUnderlyingObject(VaStartInst->getOperand(0), DL); + + if (!isa(UnderlyingVaStart)) + return false; + + struct ValistCaptureTracker : public CaptureTracker { + bool Captured = false; + const Use *Call = nullptr; + + ~ValistCaptureTracker() {} + + void tooManyUses() override { Captured = true; } + + bool shouldExplore(const Use *U) override { + if (auto *LI = dyn_cast(U->getUser())) { + // The value has been loaded. Is it (only) used in a call? + Value *V = LI; + // Spool through single-use instructions if needed. + while (V->hasOneUse() && !isa(*V->user_begin())) + V = *V->user_begin(); + if (V->hasOneUse() && isa(*V->user_begin())) { + if (Call) + // Multiple calls seen! + Captured = true; + else + Call = &*V->use_begin(); + } else { + // Loaded value is used in multiple places or not in a call. + Captured = true; + } + } + + return !Captured; + } + bool captured(const Use *U) override { + if (auto *I = dyn_cast(U->getUser())) + if (I->getIntrinsicID() == Intrinsic::vastart || + I->getIntrinsicID() == Intrinsic::vaend) + // Not captured, keep going. + return false; + + if (Operator::getOpcode(U->getUser()) == Instruction::BitCast) + // Not captured, keep going. + return false; + + Captured = true; + return true; + } + }; + ValistCaptureTracker Tracker; + PointerMayBeCaptured(UnderlyingVaStart, &Tracker); + + if (Tracker.Captured || !Tracker.Call) + return false; + + // Now, is the call to a known va_list function? + Function *CalledF = + cast(Tracker.Call->getUser())->getCalledFunction(); + if (!CalledF) + return false; + + LibFunc::Func TheLibFunc; + if (!(TLI.getLibFunc(CalledF->getName(), TheLibFunc) && TLI.has(TheLibFunc))) + return false; + + unsigned FormatArgNo; + switch (TheLibFunc) { + default: + return false; + case LibFunc::vprintf: + FormatArgNo = 0; + break; + case LibFunc::vsprintf: + FormatArgNo = 1; + break; + case LibFunc::vfprintf: + FormatArgNo = 1; + break; + case LibFunc::vsnprintf: + FormatArgNo = 2; + break; + } + FormatStr = + cast(Tracker.Call->getUser())->getArgOperand(FormatArgNo); + return true; +} + +/// If \c GV is a constant string, return it as a StringRef otherwise return +/// an empty string. +static StringRef getStringFromGV(GlobalVariable *GV) { + if (!GV) + return StringRef(); + Constant *C = GV->getInitializer(); + if (!C || !GV->hasDefinitiveInitializer()) + return StringRef(); + if (auto *CDS = dyn_cast(C)) + if (CDS->isString()) + return CDS->getAsString(); + return StringRef(); +} + +/// \c &S[I] is a printf modifer-specifier sequence, for example "08s". Skip +/// over the modifiers ('0' and '2' in this example) and return the index of the +/// specifier ('s' in this example). +/// +/// Return ~0U on failure. +static unsigned getPrintfSpecifierCharSkippingModifiers(StringRef S, + unsigned I) { + for (unsigned E = S.size(); I != E; ++I) { + switch (S[I]) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case ' ': + continue; + default: + return I; + } + } + return ~0U; +} + +/// \c FormatStrVal is a Value providing a printf format string during the call +/// \c CI. Try and resolve the Value to a StringRef, parse it, and return the +/// format specifiers. +/// +/// For example, given @g = constant c"a: %s, %02d", it would return {'s', 'd'}. +/// +/// On failure return an empty list. +static SmallVector resolvePrintfFormatStr(Value *FormatStrVal, + CallInst *CI) { + SmallVector V; + if (!FormatStrVal) + return V; + FormatStrVal = FormatStrVal->stripPointerCasts(); + + // We support FormatStrVal coming directly from a constant global, or coming + // from an operand to CI, which itself is a constant global. + + StringRef Str; + if (auto *GV = dyn_cast(FormatStrVal)) { + Str = getStringFromGV(GV); + } else if (auto *A = dyn_cast(FormatStrVal)) { + auto *Op = std::next(CI->op_begin(), A->getArgNo())->get(); + Str = getStringFromGV(dyn_cast(Op->stripPointerCasts())); + } + + // Process the format string, extracting known specifiers and ignoring + // modifiers. + for (unsigned I = 0, E = Str.size(); I < E; ++I) { + if (Str[I] != '%') + continue; + // Skip over '%'. + ++I; + if (Str[I] == '%') + // Escape. + continue; + + // This doesn't have to be 100% accurate. As long as it knows enough about + // format modifiers to catch the obvious/standard cases, that is good + // enough. + I = getPrintfSpecifierCharSkippingModifiers(Str, I); + if (I == ~0U) + return SmallVector(); + + switch (Str[I]) { + default: + // Unrecognized format specifier - abort completely. + return SmallVector(); + case 's': case 'd': case 'u': case 'f': case 'F': case 'p': + case 'g': case 'G': case 'a': case 'A': case 'e': case 'E': + case 'x': case 'X': case 'o': case 'i': case 'c': + V.push_back(Str[I]); + break; + } + } + return V; +} + +/// Return the attributes inferrable for this parameter due to \c Specifier. +/// The only specifier implemented currently is '%s', which implies nocapture +/// and readonly. +static AttributeSet getAttributesForPrintfSpecifier(char Specifier, + unsigned ParamIdx, + LLVMContext &C) { + AttributeSet AS; + if (Specifier != 's') + return AS; + AS = AS.addAttribute(C, ParamIdx, Attribute::NoCapture); + AS = AS.addAttribute(C, ParamIdx, Attribute::ReadOnly); + return AS; +} + +// Deduce attributes for variadic arguments in the SCC. +static bool addVarArgAttrs(const SCCNodeSet &SCCNodes, + const TargetLibraryInfo &TLI) { + // Look for functions whose variadic arguments only flow to libcalls known + // not to capture or write them, and add nocapture+readonly to their + // callsites where applicable. + // + // This applies to any pointer type variadic operand. Unfortunately many + // printf format specifiers can actually capture the pointer; e.g. + // + // sprintf(buf, "%p", &foo); sscanf(buf, "%p", &bar); + // + // And some can actually write to their operands; + // + // sprintf(bug, "%n", &foo); + // + // So here we only handle a specific format specifier - "%s". %s is one + // of the most used format specifiers and takes a pointer argument + // but does not serialize the pointer value itself. + // + // Knowing that a string does not get written or get captured can be useful + // for such things as performing SROA on an aggregate: + // + // struct a { const char str[32], void (*f)() } A = ...; + // my_printf("xyz: %s\n", &a.str); + // + // Without knowing that &a.str is nocapture and readonly, we would never be + // able to devirtualize calls to a.f(), even if "A" was determined to be + // constant. + // + // FIXME: This can be extended to work for vscanf and friends, adding + // nocapture (but not readonly). + bool Changed = false; + for (Function *F : SCCNodes) { + if (F->isDeclaration() || F->mayBeOverridden() || !F->isVarArg()) + continue; + + // "isPrintfLikeFunction()" can possibly be expensive. Do an early exit + // check: does the function even have any (variadic) pointer operands? + unsigned NumNonVarArgs = F->getFunctionType()->getNumParams(); + bool HasPointerArg = false; + for (auto *U : F->users()) + if (CallInst *CI = dyn_cast(U)) + for (unsigned I = NumNonVarArgs; I < CI->getNumArgOperands(); ++I) + if (CI->getArgOperand(I)->getType()->isPointerTy()) { + HasPointerArg = true; + break; + } + if (!HasPointerArg) + continue; + + Value *FormatStrVal = nullptr; + if (!isPrintfLikeFunction(F, TLI, FormatStrVal) || !FormatStrVal) + continue; + + LLVMContext &C = F->getContext(); + for (auto *U : F->users()) + if (CallInst *CI = dyn_cast(U)) { + SmallVector Format = resolvePrintfFormatStr(FormatStrVal, CI); + for (unsigned I = NumNonVarArgs, J = 0; I < CI->getNumArgOperands(); + ++I, ++J) + if (CI->getArgOperand(I)->getType()->isPointerTy() && + J < Format.size()) { + AttributeSet AddAttrs = + getAttributesForPrintfSpecifier(Format[J], I + 1, C); + if (AddAttrs.isEmpty()) + continue; + AttributeSet Attrs = CI->getAttributes(); + Attrs = Attrs.addAttributes(C, I + 1, AddAttrs); + if (Attrs != CI->getAttributes()) + Changed = true; + CI->setAttributes(Attrs); + } + } + } + return Changed; +} + /// Deduce noalias attributes for the SCC. static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { // Check each function in turn, determining which functions return noalias @@ -1890,7 +2196,8 @@ Changed |= addNoAliasAttrs(SCCNodes); Changed |= addNonNullAttrs(SCCNodes, *TLI); } - + + Changed |= addVarArgAttrs(SCCNodes, *TLI); Changed |= addNoRecurseAttrs(SCC, Revisit); return Changed; } Index: test/Transforms/FunctionAttrs/varargs.ll =================================================================== --- /dev/null +++ test/Transforms/FunctionAttrs/varargs.ll @@ -0,0 +1,63 @@ +; RUN: opt < %s -functionattrs -S | FileCheck %s + +target triple = "armv7--linux-gnueabihf" + +%struct.__va_list = type { i8* } + +declare i32 @vsprintf(i8* readonly nocapture, i8* readonly nocapture, [1 x i32]) + +; @f() is "printf-like", so any varargs operands to it should be readonly+nocapture. +define i32 @f(i8* nocapture %a, i8* nocapture readonly %b, ...) #0 { + %v = alloca %struct.__va_list, align 4 + %1 = bitcast %struct.__va_list* %v to i8* + call void @llvm.lifetime.start(i64 4, i8* %1) + call void @llvm.va_start(i8* %1) + %2 = bitcast %struct.__va_list* %v to i32* + %3 = load i32, i32* %2, align 4 + %4 = insertvalue [1 x i32] undef, i32 %3, 0 + %5 = call arm_aapcscc i32 @vsprintf(i8* %a, i8* %b, [1 x i32] %4) #1 + call void @llvm.va_end(i8* %1) + call void @llvm.lifetime.end(i64 4, i8* %1) + ret i32 %5 +} + +; CHECK-LABEL: @percent_s +; CHECK: call i32 (i8*, i8*, ...) @f(i8* %a, i8* {{.*}}, i8* nocapture readonly %c, i32 %d) +define i32 @percent_s(i8* %a, i8* %b, i8* %c, i32 %d) { + %x = call i32 (i8*,i8*, ...) @f(i8* %a, i8* bitcast ([8 x i8]* @b to i8*), i8* %c, i32 %d) + ret i32 %x +} + +; This function uses %d, which can serialize a pointer. +; CHECK-LABEL: @percent_d +; CHECK: call i32 (i8*, i8*, ...) @f(i8* %a, i8* {{.*}}, i8* %c, i32 %d) +define i32 @percent_d(i8* %a, i8* %b, i8* %c, i32 %d) { + %x = call i32 (i8*,i8*, ...) @f(i8* %a, i8* bitcast ([8 x i8]* @c to i8*), i8* %c, i32 %d) + ret i32 %x +} + +; This function uses %p, which can serialize a pointer. It also has a percent-escape (%%) +; in it, so check it doesn't get confused. +; CHECK-LABEL: @percent_p +; CHECK: call i32 (i8*, i8*, ...) @f(i8* %a, i8* {{.*}}, i8* %c, i8* nocapture readonly %d) +define i32 @percent_p(i8* %a, i8* %b, i8* %c, i8* %d) { + %x = call i32 (i8*,i8*, ...) @f(i8* %a, i8* bitcast ([11 x i8]* @d to i8*), i8* %c, i8* %d) + ret i32 %x +} + +; CHECK-LABEL: @percent_s_mod +; CHECK: call i32 (i8*, i8*, ...) @f(i8* %a, i8* {{.*}}, i8* %c, i8* nocapture readonly %d) +define i32 @percent_s_mod(i8* %a, i8* %b, i8* %c, i8* %d) { + %x = call i32 (i8*,i8*, ...) @f(i8* %a, i8* bitcast ([11 x i8]* @e to i8*), i8* %c, i8* %d) + ret i32 %x +} + +@b = constant [8 x i8] c"a: %s %d" +@c = constant [8 x i8] c"a: %d %d" +@d = constant [11 x i8] c"a: %% %d %s" +@e = constant [11 x i8] c"a: %0d % 2s" + +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*)