Index: include/llvm/Transforms/Instrumentation/PoisonChecking.h =================================================================== --- include/llvm/Transforms/Instrumentation/PoisonChecking.h +++ include/llvm/Transforms/Instrumentation/PoisonChecking.h @@ -0,0 +1,25 @@ +//===- PoisonChecking.h - ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct PoisonCheckingPass : public PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} + + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -100,6 +100,7 @@ #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" +#include "llvm/Transforms/Instrumentation/PoisonChecking.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -86,6 +86,7 @@ MODULE_PASS("verify", VerifierPass()) MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) +MODULE_PASS("poison-checking", PoisonCheckingPass()) #undef MODULE_PASS #ifndef CGSCC_ANALYSIS Index: lib/Transforms/Instrumentation/CMakeLists.txt =================================================================== --- lib/Transforms/Instrumentation/CMakeLists.txt +++ lib/Transforms/Instrumentation/CMakeLists.txt @@ -12,6 +12,7 @@ InstrProfiling.cpp PGOInstrumentation.cpp PGOMemOPSizeOpt.cpp + PoisonChecking.cpp SanitizerCoverage.cpp ThreadSanitizer.cpp HWAddressSanitizer.cpp Index: lib/Transforms/Instrumentation/PoisonChecking.cpp =================================================================== --- lib/Transforms/Instrumentation/PoisonChecking.cpp +++ lib/Transforms/Instrumentation/PoisonChecking.cpp @@ -0,0 +1,243 @@ +//===- PoisonChecking.cpp - -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements a transform pass which instruments IR such that poison semantics +// are made explicit. That is, it provides a (possibly partial) executable +// semantics for every instruction w.r.t. poison as specified in the LLVM +// LangRef. There are obvious parallels to the sanitizer tools, but this pass +// is focused purely on the semantics of LLVM IR, not any particular source +// language. If you're looking for something to see if your C/C++ contains +// UB, this is not it. +// +// The rewritten semantics of each instruction will include the following +// components: +// +// 1) The original instruction, unmodified. +// 2) A propagation rule which translates dynamic information about the poison +// state of each input to whether the dynamic output of the instruction +// produces poison. +// 3) A flag validation rule which validates any poison producing flags on the +// instruction itself (e.g. checks for overflow on nsw). +// 4) A check rule which traps (to a handler function) if this instruction must +// execute undefined behavior given the poison state of it's inputs. +// +// At the moment, the UB detection is done in a best effort manner; that is, +// the resulting code may produce a false negative result (not report UB when +// it actually exists according to the LangRef spec), but should never produce +// a false positive (report UB where it doesn't exist). The intention is to +// eventually support a "strict" mode which never dynamically reports a false +// negative at the cost of rejecting some valid inputs to translation. +// +// Use cases for this pass include: +// - Understanding (and testing!) the implications of the definition of poison +// from the LangRef. +// - Validating the output of a IR fuzzer to ensure that all programs produced +// are well defined on the specific input used. +// - Finding/confirming poison specific miscompiles by checking the poison +// status of an input/IR pair is the same before and after an optimization +// transform. +// - Checking that a bugpoint reduction does not introduce UB which didn't +// exist in the original program being reduced. +// +// The major sources of inprecision (false positives) are currently: +// - Most validation rules not yet implemented for instructions with poison +// relavant flags. At the moment, only nsw/nuw on add/sub are supported. +// - UB which is control dependent on a branch on poison is not yet +// reported. Currently, only data flow dependence is modeled. +// - Poison which is propagated through memory is not modeled. As such, +// storing poison to memory and then reloading it will cause a false negative +// as we consider the reloaded value to not be poisoned. +// - Poison propagation across function boundaries is not modeled. At the +// moment, all arguments and return values are assumed not to be poison. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "poison-checking" + +static Value *buildOrChain(IRBuilder<> &B, ArrayRef Ops) { + if (Ops.size() == 0) + return B.getFalse(); + Value *Accum = Ops[0]; + for (unsigned i = 1; i < Ops.size(); i++) + Accum = B.CreateOr(Accum, Ops[i]); + return Accum; +} + +class RewriteVisitor + : public InstVisitor { +public: + Value* visitInstruction(Instruction&) { return nullptr; } + + Value* visitAdd(BinaryOperator &BO) { + auto &I = cast(BO); + Value *LHS = I.getOperand(0); + Value *RHS = I.getOperand(1); + IRBuilder<> B(&I); + SmallVector Checks; + if (I.hasNoSignedWrap()) { + auto *OverflowOp = + B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); + Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); + } + if (I.hasNoUnsignedWrap()) { + auto *OverflowOp = + B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); + Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); + } + return buildOrChain(B, Checks); + } + + Value *visitSub(BinaryOperator &BO) { + auto &I = cast(BO); + Value *LHS = I.getOperand(0); + Value *RHS = I.getOperand(1); + IRBuilder<> B(&I); + SmallVector Checks; + if (I.hasNoSignedWrap()) { + auto *OverflowOp = + B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); + Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); + } + if (I.hasNoUnsignedWrap()) { + auto *OverflowOp = + B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); + Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); + } + return buildOrChain(B, Checks); + } +}; + +static Value* generatePoisonFlagChecks(Instruction &I) { + // TODO: really should have: assert(hasPoisonFlags(I)); + // but we don't have that predicate yet + RewriteVisitor RV; + return RV.visit(I); +} + +static Value *getPoisonFor(DenseMap &ValToPoison, Value *V) { + if (ValToPoison.count(V)) + return ValToPoison[V]; + if (isa(V)) { + return ConstantInt::getFalse(V->getContext()); + } + // Return false for unknwon values - this implements a non-strict mode where + // unhandled IR constructs are simply considered to never produce poison. At + // some point in the future, we probably want a "strict mode" for testing if + // nothing else. + return ConstantInt::getFalse(V->getContext()); +} + +static bool rewrite(Function &F) { + auto * const Int1Ty = Type::getInt1Ty(F.getContext()); + + DenseMap ValToPoison; + + for (BasicBlock &BB : F) + for (auto I = BB.begin(); isa(&*I); I++) { + auto *OldPHI = cast(&*I); + auto *NewPHI = PHINode::Create(Int1Ty, + OldPHI->getNumIncomingValues()); + for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) + NewPHI->addIncoming(UndefValue::get(Int1Ty), + OldPHI->getIncomingBlock(i)); + NewPHI->insertBefore(OldPHI); + ValToPoison[OldPHI] = NewPHI; + } + + for (BasicBlock &BB : F) + for (Instruction &I : BB) { + if (isa(I)) continue; + + IRBuilder<> B(cast(&I)); + if (Value *Op = const_cast(getGuaranteedNonFullPoisonOp(&I))) + B.CreateCall(I.getModule()->getFunction("trap"), + {getPoisonFor(ValToPoison, Op)}); + + // TODO: Remove, just here for easy examination of function local output + if (auto *RI = dyn_cast(&I)) { + if (RI->getNumOperands() == 0) + continue; + Value *Op = RI->getOperand(0); + B.CreateCall(RI->getModule()->getFunction("trap"), + {getPoisonFor(ValToPoison, Op)}); + } + + SmallVector Checks; + if (propagatesFullPoison(&I)) + for (Use &U : I.operands()) + Checks.push_back(getPoisonFor(ValToPoison, U.get())); + + if (auto *Check = generatePoisonFlagChecks(I)) + Checks.push_back(Check); + ValToPoison[&I] = buildOrChain(B, Checks); + } + + for (BasicBlock &BB : F) + for (auto I = BB.begin(); isa(&*I); I++) { + auto *OldPHI = cast(&*I); + if (!ValToPoison.count(OldPHI)) + continue; // skip the newly inserted phis + auto *NewPHI = cast(ValToPoison[OldPHI]); + for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { + auto *OldVal = OldPHI->getIncomingValue(i); + NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); + } + } + return true; +} + + +PreservedAnalyses PoisonCheckingPass::run(Module &M, + ModuleAnalysisManager &AM) { + bool Changed = false; + for (auto &F : M) + Changed |= rewrite(F); + + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + return PA; +} + +PreservedAnalyses PoisonCheckingPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (!rewrite(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + return PA; +} + + +/* Major TODO Items: + - Control dependent poison UB + - Strict mode - (i.e. must analyze every operand) + - Poison through memory + - Function ABIs + + Minor TODO items: + - Add propagation rules for and/or instructions + - Add hasPoisonFlags predicate to ValueTracking + - Add poison check rules for: + - mul, exact flags, out of bounds operands + - inbounds (can't be strict due to unknown allocation sizes) + - fmf and fp casts + */