Index: include/llvm/FuzzMutate/IRMutator.h =================================================================== --- /dev/null +++ include/llvm/FuzzMutate/IRMutator.h @@ -0,0 +1,106 @@ +//===-- IRMutator.h - Mutation engine for fuzzing IR ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Provides the IRMutator class, which drives mutations on IR based on a +// configurable set of strategies. Some common strategies are also included +// here. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZMUTATE_IRMUTATOR_H +#define LLVM_FUZZMUTATE_IRMUTATOR_H + +#include "llvm/FuzzMutate/OpDescriptor.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +class BasicBlock; +class Function; +class Instruction; +class Module; + +struct RandomIRBuilder; + +/// Base class for describing how to mutate a module. mutation functions for +/// each IR unit forward to the contained unit. +class IRMutationStrategy { +public: + virtual ~IRMutationStrategy() = default; + + /// Provide a weight to bias towards choosing this strategy for a mutation. + /// + /// The value of the weight is arbitrary, but a good default is "the number of + /// distinct ways in which this strategy can mutate a unit". This can also be + /// used to prefer strategies that shrink the overall size of the result when + /// we start getting close to \c MaxSize. + virtual uint64_t getWeight(size_t CurrentSize, size_t MaxSize, + uint64_t CurrentWeight) = 0; + + /// @{ + /// Mutators for each IR unit. By default these forward to a contained + /// instance of the next smaller unit. + virtual void mutate(Module &M, RandomIRBuilder &IB); + virtual void mutate(Function &F, RandomIRBuilder &IB); + virtual void mutate(BasicBlock &BB, RandomIRBuilder &IB); + virtual void mutate(Instruction &I, RandomIRBuilder &IB) { + llvm_unreachable("Strategy does not implement any mutators"); + } + /// @} +}; + +using TypeGetter = std::function; + +/// Entry point for configuring and running IR mutations. +class IRMutator { + std::vector AllowedTypes; + std::vector> Strategies; + +public: + IRMutator(std::vector &&AllowedTypes, + std::vector> &&Strategies) + : AllowedTypes(std::move(AllowedTypes)), + Strategies(std::move(Strategies)) {} + + void mutateModule(Module &M, int Seed, size_t CurSize, size_t MaxSize); +}; + +/// Strategy that injects operations into the function. +class InjectorIRStrategy : public IRMutationStrategy { + std::vector Operations; + + fuzzerop::OpDescriptor chooseOperation(Value *Src, RandomIRBuilder &IB); + +public: + InjectorIRStrategy(std::vector &&Operations) + : Operations(std::move(Operations)) {} + static std::vector getDefaultOps(); + + uint64_t getWeight(size_t CurrentSize, size_t MaxSize, + uint64_t CurrentWeight) override { + return Operations.size(); + } + + using IRMutationStrategy::mutate; + void mutate(Function &F, RandomIRBuilder &IB) override; + void mutate(BasicBlock &BB, RandomIRBuilder &IB) override; +}; + +class InstDeleterIRStrategy : public IRMutationStrategy { +public: + uint64_t getWeight(size_t CurrentSize, size_t MaxSize, + uint64_t CurrentWeight) override; + + using IRMutationStrategy::mutate; + void mutate(Function &F, RandomIRBuilder &IB) override; + void mutate(Instruction &Inst, RandomIRBuilder &IB) override; +}; + +} // end llvm namespace + +#endif // LLVM_FUZZMUTATE_IRMUTATOR_H Index: include/llvm/FuzzMutate/OpDescriptor.h =================================================================== --- /dev/null +++ include/llvm/FuzzMutate/OpDescriptor.h @@ -0,0 +1,193 @@ +//===-- OpDescriptor.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Provides the fuzzerop::Descriptor class and related tools for describing +// operations an IR fuzzer can work with. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZMUTATE_OPDESCRIPTOR_H +#define LLVM_FUZZMUTATE_OPDESCRIPTOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include + +namespace llvm { +namespace fuzzerop { + +/// @{ +/// Populate a small list of potentially interesting constants of a given type. +void makeConstantsWithType(Type *T, std::vector &Cs); +std::vector makeConstantsWithType(Type *T); +/// @} + +/// A matcher/generator for finding suitable values for the next source in an +/// operation's partially completed argument list. +/// +/// Given that we're building some operation X and may have already filled some +/// subset of its operands, this predicate determines if some value New is +/// suitable for the next operand or generates a set of values that are +/// suitable. +class SourcePred { +public: + /// Given a list of already selected operands, returns whether a given new + /// operand is suitable for the next operand. + using PredT = std::function Cur, const Value *New)>; + /// Given a list of already selected operands and a set of valid base types + /// for a fuzzer, generates a list of constants that could be used for the + /// next operand. + using MakeT = std::function( + ArrayRef Cur, ArrayRef BaseTypes)>; + +private: + PredT Pred; + MakeT Make; + +public: + /// Create a fully general source predicate. + SourcePred(PredT Pred, MakeT Make) : Pred(Pred), Make(Make) {} + SourcePred(PredT Pred, NoneType) : Pred(Pred) { + Make = [Pred](ArrayRef Cur, ArrayRef BaseTypes) { + // Default filter just calls Pred on each of the base types. + std::vector Result; + for (Type *T : BaseTypes) { + Constant *V = UndefValue::get(T); + if (Pred(Cur, V)) + makeConstantsWithType(T, Result); + } + if (Result.empty()) + report_fatal_error("Predicate does not match for base types"); + return Result; + }; + } + + /// Returns true if \c New is compatible for the argument after \c Cur + bool matches(ArrayRef Cur, const Value *New) { + return Pred(Cur, New); + } + + /// Generates a list of potential values for the argument after \c Cur. + std::vector generate(ArrayRef Cur, + ArrayRef BaseTypes) { + return Make(Cur, BaseTypes); + } +}; + +/// A description of some operation we can build while fuzzing IR. +struct OpDescriptor { + unsigned Weight; + SmallVector SourcePreds; + std::function, Instruction *)> BuilderFunc; +}; + +static inline SourcePred onlyType(Type *Only) { + auto Pred = [Only](ArrayRef, const Value *V) { + return V->getType() == Only; + }; + auto Make = [Only](ArrayRef, ArrayRef) { + return makeConstantsWithType(Only); + }; + return {Pred, Make}; +} + +static inline SourcePred anyType() { + auto Pred = [](ArrayRef, const Value *V) { + return !V->getType()->isVoidTy(); + }; + auto Make = None; + return {Pred, Make}; +} + +static inline SourcePred anyIntType() { + auto Pred = [](ArrayRef, const Value *V) { + return V->getType()->isIntegerTy(); + }; + auto Make = None; + return {Pred, Make}; +} + +static inline SourcePred anyFloatType() { + auto Pred = [](ArrayRef, const Value *V) { + return V->getType()->isFloatingPointTy(); + }; + auto Make = None; + return {Pred, Make}; +} + +static inline SourcePred anyPtrType() { + auto Pred = [](ArrayRef, const Value *V) { + return V->getType()->isPointerTy(); + }; + auto Make = [](ArrayRef, ArrayRef Ts) { + std::vector Result; + // TODO: Should these point at something? + for (Type *T : Ts) + Result.push_back(UndefValue::get(PointerType::getUnqual(T))); + return Result; + }; + return {Pred, Make}; +} + +static inline SourcePred anyAggregateType() { + auto Pred = [](ArrayRef, const Value *V) { + return V->getType()->isAggregateType(); + }; + // TODO: For now we only find aggregates in BaseTypes. It might be better to + // manufacture them out of the base types in some cases. + auto Find = None; + return {Pred, Find}; +} + +static inline SourcePred anyVectorType() { + auto Pred = [](ArrayRef, const Value *V) { + return V->getType()->isVectorTy(); + }; + // TODO: For now we only find vectors in BaseTypes. It might be better to + // manufacture vectors out of the base types, but it's tricky to be sure + // that's actually a reasonable type. + auto Make = None; + return {Pred, Make}; +} + +/// Match values that have the same type as the first source. +static inline SourcePred matchFirstType() { + auto Pred = [](ArrayRef Cur, const Value *V) { + assert(!Cur.empty() && "No first source yet"); + return V->getType() == Cur[0]->getType(); + }; + auto Make = [](ArrayRef Cur, ArrayRef) { + assert(!Cur.empty() && "No first source yet"); + return makeConstantsWithType(Cur[0]->getType()); + }; + return {Pred, Make}; +} + +/// Match values that have the first source's scalar type. +static inline SourcePred matchScalarOfFirstType() { + auto Pred = [](ArrayRef Cur, const Value *V) { + assert(!Cur.empty() && "No first source yet"); + return V->getType() == Cur[0]->getType()->getScalarType(); + }; + auto Make = [](ArrayRef Cur, ArrayRef) { + assert(!Cur.empty() && "No first source yet"); + return makeConstantsWithType(Cur[0]->getType()->getScalarType()); + }; + return {Pred, Make}; +} + +} // end fuzzerop namespace +} // end llvm namespace + +#endif // LLVM_FUZZMUTATE_OPDESCRIPTOR_H Index: include/llvm/FuzzMutate/Operations.h =================================================================== --- /dev/null +++ include/llvm/FuzzMutate/Operations.h @@ -0,0 +1,54 @@ +//===-- Operations.h - ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementations of common fuzzer operation descriptors for building an IR +// mutator. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZMUTATE_OPERATIONS_H +#define LLVM_FUZZMUTATE_OPERATIONS_H + +#include "llvm/FuzzMutate/OpDescriptor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" + +namespace llvm { + +/// Getters for the default sets of operations, per general category. +/// @{ +void describeFuzzerIntOps(std::vector &Ops); +void describeFuzzerFloatOps(std::vector &Ops); +void describeFuzzerControlFlowOps(std::vector &Ops); +void describeFuzzerPointerOps(std::vector &Ops); +void describeFuzzerAggregateOps(std::vector &Ops); +void describeFuzzerVectorOps(std::vector &Ops); +/// @} + +namespace fuzzerop { + +/// Descriptors for individual operations. +/// @{ +OpDescriptor binOpDescriptor(unsigned Weight, Instruction::BinaryOps Op); +OpDescriptor cmpOpDescriptor(unsigned Weight, Instruction::OtherOps CmpOp, + CmpInst::Predicate Pred); +OpDescriptor splitBlockDescriptor(unsigned Weight); +OpDescriptor gepDescriptor(unsigned Weight); +OpDescriptor extractValueDescriptor(unsigned Weight); +OpDescriptor insertValueDescriptor(unsigned Weight); +OpDescriptor extractElementDescriptor(unsigned Weight); +OpDescriptor insertElementDescriptor(unsigned Weight); +OpDescriptor shuffleVectorDescriptor(unsigned Weight); +/// @} + +} // end fuzzerop namespace + +} // end llvm namespace + +#endif // LLVM_FUZZMUTATE_OPERATIONS_H Index: include/llvm/FuzzMutate/Random.h =================================================================== --- /dev/null +++ include/llvm/FuzzMutate/Random.h @@ -0,0 +1,97 @@ +//===--- Random.h - Utilities for random sampling -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utilities for random sampling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZMUTATE_RANDOM_H +#define LLVM_FUZZMUTATE_RANDOM_H + +#include +#include "llvm/Support/raw_ostream.h" +namespace llvm { + +/// Return a uniformly distributed random value between \c Min and \c Max +template T uniform(GenT &Gen, T Min, T Max) { + return std::uniform_int_distribution(Min, Max)(Gen); +} + +/// Return a uniformly distributed random value of type \c T +template T uniform(GenT &Gen) { + return uniform(Gen, std::numeric_limits::min(), + std::numeric_limits::max()); +} + +/// Randomly selects an item by sampling into a set with an unknown number of +/// elements, which may each be weighted to be more likely choices. +template class ReservoirSampler { + GenT &RandGen; + typename std::remove_const::type Selection = {}; + uint64_t TotalWeight = 0; + +public: + ReservoirSampler(GenT &RandGen) : RandGen(RandGen) {} + + uint64_t totalWeight() const { return TotalWeight; } + bool isEmpty() const { return TotalWeight == 0; } + + const T &getSelection() const { + assert(!isEmpty() && "Nothing selected"); + return Selection; + } + + explicit operator bool() const { return !isEmpty();} + const T &operator*() const { return getSelection(); } + + /// Sample each item in \c Items with unit weight + template ReservoirSampler &sample(RangeT &&Items) { + for (auto &I : Items) + sample(I, 1); + return *this; + } + + /// Sample a single item with the given weight. + ReservoirSampler &sample(const T &Item, uint64_t Weight) { + if (!Weight) + // If the weight is zero, do nothing. + return *this; + TotalWeight += Weight; + // Consider switching from the current element to this one. + if (uniform(RandGen, 1, TotalWeight) <= Weight) + Selection = Item; + return *this; + } +}; + +template ()))>::type> +ReservoirSampler makeSampler(GenT &RandGen, RangeT &&Items) { + ReservoirSampler RS(RandGen); + RS.sample(Items); + return RS; +} + +template +ReservoirSampler makeSampler(GenT &RandGen, const T &Item, + uint64_t Weight) { + ReservoirSampler RS(RandGen); + RS.sample(Item, Weight); + return RS; +} + +template +ReservoirSampler makeSampler(GenT &RandGen) { + return ReservoirSampler(RandGen); +} + +} // End llvm namespace + +#endif // LLVM_FUZZMUTATE_RANDOM_H Index: include/llvm/FuzzMutate/RandomIRBuilder.h =================================================================== --- /dev/null +++ include/llvm/FuzzMutate/RandomIRBuilder.h @@ -0,0 +1,62 @@ +//===-- Mutator.h - Utils for randomly mutation IR --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Provides the Mutator class, which is used to mutate IR for fuzzing. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZMUTATE_RANDOMIRBUILDER_H +#define LLVM_FUZZMUTATE_RANDOMIRBUILDER_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/FuzzMutate/IRMutator.h" +#include "llvm/FuzzMutate/Random.h" +#include + +namespace llvm { + +using RandomEngine = std::mt19937; + +struct RandomIRBuilder { + RandomEngine Rand; + SmallVector KnownTypes; + + RandomIRBuilder(int Seed, ArrayRef AllowedTypes) + : Rand(Seed), KnownTypes(AllowedTypes.begin(), AllowedTypes.end()) {} + + // TODO: Try to make this a bit less of a random mishmash of functions. + + /// Find a "source" for some operation, which will be used in one of the + /// operation's operands. This either selects an instruction in \c Insts or + /// returns some new arbitrary Value. + Value *findOrCreateSource(BasicBlock &BB, ArrayRef Insts); + /// Find a "source" for some operation, which will be used in one of the + /// operation's operands. This either selects an instruction in \c Insts that + /// matches \c Pred, or returns some new Value that matches \c Pred. The + /// values in \c Srcs should be source operands that have already been + /// selected. + Value *findOrCreateSource(BasicBlock &BB, ArrayRef Insts, + ArrayRef Srcs, fuzzerop::SourcePred Pred); + /// Create some Value suitable as a source for some operation. + Value *newSource(BasicBlock &BB, ArrayRef Insts, + ArrayRef Srcs, fuzzerop::SourcePred Pred); + /// Find a viable user for \c V in \c Insts, which should all be contained in + /// \c BB. This may also create some new instruction in \c BB and use that. + void connectToSink(BasicBlock &BB, ArrayRef Insts, Value *V); + /// Create a user for \c V in \c BB. + void newSink(BasicBlock &BB, ArrayRef Insts, Value *V); + Value *findPointer(BasicBlock &BB, ArrayRef Insts, + ArrayRef Srcs, fuzzerop::SourcePred Pred); + Type *chooseType(LLVMContext &Context, ArrayRef Srcs, + fuzzerop::SourcePred Pred); +}; + +} // end llvm namespace + +#endif // LLVM_FUZZMUTATE_RANDOMIRBUILDER_H Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -2,6 +2,7 @@ # CMakeLists.txt add_subdirectory(IR) +add_subdirectory(FuzzMutate) add_subdirectory(IRReader) add_subdirectory(CodeGen) add_subdirectory(BinaryFormat) Index: lib/FuzzMutate/CMakeLists.txt =================================================================== --- /dev/null +++ lib/FuzzMutate/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(LLVMFuzzMutate + IRMutator.cpp + OpDescriptor.cpp + Operations.cpp + RandomIRBuilder.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/FuzzMutate + + DEPENDS + intrinsics_gen + ) Index: lib/FuzzMutate/IRMutator.cpp =================================================================== --- /dev/null +++ lib/FuzzMutate/IRMutator.cpp @@ -0,0 +1,183 @@ +//===-- IRMutator.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/IRMutator.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/FuzzMutate/Operations.h" +#include "llvm/FuzzMutate/Random.h" +#include "llvm/FuzzMutate/RandomIRBuilder.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Scalar/DCE.h" + +using namespace llvm; + +static void createEmptyFunction(Module &M) { + // TODO: Some arguments and a return value would probably be more interesting. + LLVMContext &Context = M.getContext(); + Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Context), {}, + /*isVarArg=*/false), + GlobalValue::ExternalLinkage, "f", &M); + BasicBlock *BB = BasicBlock::Create(Context, "BB", F); + ReturnInst::Create(Context, BB); +} + +void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) { + if (M.empty()) + createEmptyFunction(M); + + auto RS = makeSampler(IB.Rand); + for (Function &F : M) + if (!F.isDeclaration()) + RS.sample(&F, /*Weight=*/1); + mutate(*RS.getSelection(), IB); +} + +void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) { + mutate(*makeSampler(IB.Rand, make_pointer_range(F)).getSelection(), IB); +} + +void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { + mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB); +} + +void IRMutator::mutateModule(Module &M, int Seed, size_t CurSize, + size_t MaxSize) { + std::vector Types; + for (const auto &Getter : AllowedTypes) + Types.push_back(Getter(M.getContext())); + RandomIRBuilder IB(Seed, Types); + + auto RS = makeSampler(IB.Rand); + for (const auto &Strategy : Strategies) + RS.sample(Strategy.get(), + Strategy->getWeight(CurSize, MaxSize, RS.totalWeight())); + auto Strategy = RS.getSelection(); + + Strategy->mutate(M, IB); +} + +static void eliminateDeadCode(Function &F) { + FunctionPassManager FPM; + FPM.addPass(DCEPass()); + FunctionAnalysisManager FAM; + FAM.registerPass([&] { return TargetLibraryAnalysis(); }); + FPM.run(F, FAM); +} + +void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { + IRMutationStrategy::mutate(F, IB); + eliminateDeadCode(F); +} + +std::vector InjectorIRStrategy::getDefaultOps() { + std::vector Ops; + describeFuzzerIntOps(Ops); + describeFuzzerFloatOps(Ops); + describeFuzzerControlFlowOps(Ops); + describeFuzzerPointerOps(Ops); + describeFuzzerAggregateOps(Ops); + describeFuzzerVectorOps(Ops); + return Ops; +} + +fuzzerop::OpDescriptor +InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) { + auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) { + return Op.SourcePreds[0].matches({}, Src); + }; + auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred)); + if (RS.isEmpty()) + report_fatal_error("No available operations for src type"); + return *RS; +} + +void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { + SmallVector Insts; + for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I) + Insts.push_back(&*I); + + // Choose an insertion point for our new instruction. + size_t IP = uniform(IB.Rand, 0, Insts.size() - 1); + + auto InstsBefore = makeArrayRef(Insts).slice(0, IP); + auto InstsAfter = makeArrayRef(Insts).slice(IP); + + // Choose a source, which will be used to constrain the operation selection. + SmallVector Srcs; + Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore)); + + // Choose an operation that's constrained to be valid for the type of the + // source, collect any other sources it needs, and then build it. + fuzzerop::OpDescriptor OpDesc = chooseOperation(Srcs[0], IB); + for (const auto &Pred : makeArrayRef(OpDesc.SourcePreds).slice(1)) + Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); + if (Value *Op = OpDesc.BuilderFunc(Srcs, Insts[IP])) { + // Find a sink and wire up the results of the operation. + IB.connectToSink(BB, InstsAfter, Op); + } +} + +uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize, + uint64_t CurrentWeight) { + // If we have less than 200 bytes, panic and try to always delete. + if (CurrentSize > MaxSize - 200) + return CurrentWeight ? CurrentWeight * 100 : 1; + // Draw a line starting from when we only have 1k left and increasing linearly + // to double the current weight. + int Line = (-2 * CurrentWeight) * (MaxSize - CurrentSize + 1000); + // Clamp negative weights to zero. + if (Line < 0) + return 0; + return Line; +} + +void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { + auto RS = makeSampler(IB.Rand); + // Avoid terminators so we don't have to worry about keeping the CFG coherent. + for (Instruction &Inst : instructions(F)) + if (!Inst.isTerminator()) + RS.sample(&Inst, /*Weight=*/1); + assert(!RS.isEmpty() && "No instructions to delete"); + // Delete the instruction. + mutate(*RS.getSelection(), IB); + // Clean up any dead code that's left over after removing the instruction. + eliminateDeadCode(F); +} + +void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) { + assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG"); + + if (Inst.getType()->isVoidTy()) { + // Instructions with void type (ie, store) have no uses to worry about. Just + // erase it and move on. + Inst.eraseFromParent(); + return; + } + + // Otherwise we need to find some other value with the right type to keep the + // users happy. + auto Pred = fuzzerop::onlyType(Inst.getType()); + auto RS = makeSampler(IB.Rand); + SmallVector InstsBefore; + BasicBlock *BB = Inst.getParent(); + for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E; + ++I) { + if (Pred.matches({}, &*I)) + RS.sample(&*I, /*Weight=*/1); + InstsBefore.push_back(&*I); + } + if (!RS) + RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1); + + Inst.replaceAllUsesWith(RS.getSelection()); +} Index: lib/FuzzMutate/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/FuzzMutate/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/FuzzMutate/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = FuzzMutate +parent = Libraries +required_libraries = Analysis Core IR Support Transforms Index: lib/FuzzMutate/OpDescriptor.cpp =================================================================== --- /dev/null +++ lib/FuzzMutate/OpDescriptor.cpp @@ -0,0 +1,38 @@ +//===-- OpDescriptor.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/OpDescriptor.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; +using namespace fuzzerop; + +void fuzzerop::makeConstantsWithType(Type *T, std::vector &Cs) { + if (auto *IntTy = dyn_cast(T)) { + uint64_t W = IntTy->getBitWidth(); + Cs.push_back(ConstantInt::get(IntTy, APInt::getMaxValue(W))); + Cs.push_back(ConstantInt::get(IntTy, APInt::getMinValue(W))); + Cs.push_back(ConstantInt::get(IntTy, APInt::getSignedMaxValue(W))); + Cs.push_back(ConstantInt::get(IntTy, APInt::getSignedMinValue(W))); + Cs.push_back(ConstantInt::get(IntTy, APInt::getOneBitSet(W, W / 2))); + } else if (T->isFloatingPointTy()) { + auto &Ctx = T->getContext(); + auto &Sem = T->getFltSemantics(); + Cs.push_back(ConstantFP::get(Ctx, APFloat::getZero(Sem))); + Cs.push_back(ConstantFP::get(Ctx, APFloat::getLargest(Sem))); + Cs.push_back(ConstantFP::get(Ctx, APFloat::getSmallest(Sem))); + } else + Cs.push_back(UndefValue::get(T)); +} + +std::vector fuzzerop::makeConstantsWithType(Type *T) { + std::vector Result; + makeConstantsWithType(T, Result); + return Result; +} Index: lib/FuzzMutate/Operations.cpp =================================================================== --- /dev/null +++ lib/FuzzMutate/Operations.cpp @@ -0,0 +1,312 @@ +//===-- Operations.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/Operations.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; +using namespace fuzzerop; + +void llvm::describeFuzzerIntOps(std::vector &Ops) { + Ops.push_back(binOpDescriptor(1, Instruction::Add)); + Ops.push_back(binOpDescriptor(1, Instruction::Sub)); + Ops.push_back(binOpDescriptor(1, Instruction::Mul)); + Ops.push_back(binOpDescriptor(1, Instruction::SDiv)); + Ops.push_back(binOpDescriptor(1, Instruction::UDiv)); + Ops.push_back(binOpDescriptor(1, Instruction::SRem)); + Ops.push_back(binOpDescriptor(1, Instruction::URem)); + Ops.push_back(binOpDescriptor(1, Instruction::Shl)); + Ops.push_back(binOpDescriptor(1, Instruction::LShr)); + Ops.push_back(binOpDescriptor(1, Instruction::AShr)); + Ops.push_back(binOpDescriptor(1, Instruction::And)); + Ops.push_back(binOpDescriptor(1, Instruction::Or)); + Ops.push_back(binOpDescriptor(1, Instruction::Xor)); + + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_EQ)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_NE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_UGT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_UGE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_ULT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_ULE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SGT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SGE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SLT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::ICmp, CmpInst::ICMP_SLE)); +} + +void llvm::describeFuzzerFloatOps(std::vector &Ops) { + Ops.push_back(binOpDescriptor(1, Instruction::FAdd)); + Ops.push_back(binOpDescriptor(1, Instruction::FSub)); + Ops.push_back(binOpDescriptor(1, Instruction::FMul)); + Ops.push_back(binOpDescriptor(1, Instruction::FDiv)); + Ops.push_back(binOpDescriptor(1, Instruction::FRem)); + + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_FALSE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OEQ)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OGT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OGE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OLT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_OLE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ONE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ORD)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UNO)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UEQ)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UGT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UGE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ULT)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_ULE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_UNE)); + Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_TRUE)); +} + +void llvm::describeFuzzerControlFlowOps( + std::vector &Ops) { + Ops.push_back(splitBlockDescriptor(1)); +} + +void llvm::describeFuzzerPointerOps(std::vector &Ops) { + Ops.push_back(gepDescriptor(1)); +} + +void llvm::describeFuzzerAggregateOps( + std::vector &Ops) { + Ops.push_back(extractValueDescriptor(1)); + Ops.push_back(insertValueDescriptor(1)); +} + +void llvm::describeFuzzerVectorOps(std::vector &Ops) { + Ops.push_back(extractElementDescriptor(1)); + Ops.push_back(insertElementDescriptor(1)); + Ops.push_back(shuffleVectorDescriptor(1)); +} + +OpDescriptor llvm::fuzzerop::binOpDescriptor(unsigned Weight, + Instruction::BinaryOps Op) { + auto buildOp = [Op](ArrayRef Srcs, Instruction *Inst) { + return BinaryOperator::Create(Op, Srcs[0], Srcs[1], "B", Inst); + }; + switch (Op) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return {Weight, {anyIntType(), matchFirstType()}, buildOp}; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + return {Weight, {anyFloatType(), matchFirstType()}, buildOp}; + case Instruction::BinaryOpsEnd: + llvm_unreachable("Value out of range of enum"); + } + llvm_unreachable("Covered switch"); +} + +OpDescriptor llvm::fuzzerop::cmpOpDescriptor(unsigned Weight, + Instruction::OtherOps CmpOp, + CmpInst::Predicate Pred) { + auto buildOp = [CmpOp, Pred](ArrayRef Srcs, Instruction *Inst) { + return CmpInst::Create(CmpOp, Pred, Srcs[0], Srcs[1], "C", Inst); + }; + + switch (CmpOp) { + case Instruction::ICmp: + return {Weight, {anyIntType(), matchFirstType()}, buildOp}; + case Instruction::FCmp: + return {Weight, {anyFloatType(), matchFirstType()}, buildOp}; + default: + llvm_unreachable("CmpOp must be ICmp or FCmp"); + } +} + +OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) { + auto buildSplitBlock = [](ArrayRef Srcs, Instruction *Inst) { + BasicBlock *Block = Inst->getParent(); + BasicBlock *Next = Block->splitBasicBlock(Inst, "BB"); + if (Block != &Block->getParent()->getEntryBlock()) { + // Loop back on this block by replacing the unconditional forward branch + // with a conditional with a backedge. + BranchInst::Create(Block, Next, Srcs[0], Block->getTerminator()); + Block->getTerminator()->eraseFromParent(); + + // We need values for each phi in the block. Since there isn't a good way + // to do a variable number of input values currently, we just fill them + // with undef. + for (PHINode &PHI : Block->phis()) + PHI.addIncoming(UndefValue::get(PHI.getType()), Block); + } + return nullptr; + }; + SourcePred isInt1Ty{[](ArrayRef, const Value *V) { + return V->getType()->isIntegerTy(1); + }, + None}; + return {Weight, {isInt1Ty}, buildSplitBlock}; +} + +OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) { + auto buildGEP = [](ArrayRef Srcs, Instruction *Inst) { + Type *Ty = cast(Srcs[0]->getType())->getElementType(); + auto Indices = makeArrayRef(Srcs).drop_front(1); + return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", Inst); + }; + // TODO: Handle aggregates and vectors + // TODO: Support multiple indices. + // TODO: Try to avoid meaningless accesses. + return {Weight, {anyPtrType(), anyIntType()}, buildGEP}; +} + +static uint64_t getAggregateNumElements(Type *T) { + assert(T->isAggregateType() && "Not a struct or array"); + if (isa(T)) + return T->getStructNumElements(); + return T->getArrayNumElements(); +} + +static SourcePred validExtractValueIndex() { + auto Pred = [](ArrayRef Cur, const Value *V) { + if (auto *CI = dyn_cast(V)) + if (!CI->uge(getAggregateNumElements(Cur[0]->getType()))) + return true; + return false; + }; + auto Make = [](ArrayRef Cur, ArrayRef Ts) { + std::vector Result; + auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext()); + uint64_t N = getAggregateNumElements(Cur[0]->getType()); + // Create indices at the start, end, and middle, but avoid dups. + Result.push_back(ConstantInt::get(Int32Ty, 0)); + if (N > 1) + Result.push_back(ConstantInt::get(Int32Ty, N - 1)); + if (N > 2) + Result.push_back(ConstantInt::get(Int32Ty, N / 2)); + return Result; + }; + return {Pred, Make}; +} + +OpDescriptor llvm::fuzzerop::extractValueDescriptor(unsigned Weight) { + auto buildExtract = [](ArrayRef Srcs, Instruction *Inst) { + // TODO: It's pretty inefficient to shuffle this all through constants. + unsigned Idx = cast(Srcs[1])->getZExtValue(); + return ExtractValueInst::Create(Srcs[0], {Idx}, "E", Inst); + }; + // TODO: Should we handle multiple indices? + return {Weight, {anyAggregateType(), validExtractValueIndex()}, buildExtract}; +} + +static SourcePred matchScalarInAggregate() { + auto Pred = [](ArrayRef Cur, const Value *V) { + if (isa(Cur[0]->getType())) + return V->getType() == Cur[0]->getType(); + auto *STy = cast(Cur[0]->getType()); + for (int I = 0, E = STy->getNumElements(); I < E; ++I) + if (STy->getTypeAtIndex(I) == V->getType()) + return true; + return false; + }; + auto Make = [](ArrayRef Cur, ArrayRef) { + if (isa(Cur[0]->getType())) + return makeConstantsWithType(Cur[0]->getType()); + std::vector Result; + auto *STy = cast(Cur[0]->getType()); + for (int I = 0, E = STy->getNumElements(); I < E; ++I) + makeConstantsWithType(STy->getTypeAtIndex(I), Result); + return Result; + }; + return {Pred, Make}; +} + +static SourcePred validInsertValueIndex() { + auto Pred = [](ArrayRef Cur, const Value *V) { + auto *CTy = cast(Cur[0]->getType()); + if (auto *CI = dyn_cast(V)) + if (CI->getBitWidth() == 32) + if (CTy->getTypeAtIndex(CI->getZExtValue()) == V->getType()) + return true; + return false; + }; + auto Make = [](ArrayRef Cur, ArrayRef Ts) { + std::vector Result; + auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext()); + auto *CTy = cast(Cur[0]->getType()); + for (int I = 0, E = getAggregateNumElements(CTy); I < E; ++I) + if (CTy->getTypeAtIndex(I) == Cur[1]->getType()) + Result.push_back(ConstantInt::get(Int32Ty, I)); + return Result; + }; + return {Pred, Make}; +} + +OpDescriptor llvm::fuzzerop::insertValueDescriptor(unsigned Weight) { + auto buildInsert = [](ArrayRef Srcs, Instruction *Inst) { + // TODO: It's pretty inefficient to shuffle this all through constants. + unsigned Idx = cast(Srcs[2])->getZExtValue(); + return InsertValueInst::Create(Srcs[0], Srcs[1], {Idx}, "I", Inst); + }; + return { + Weight, + {anyAggregateType(), matchScalarInAggregate(), validInsertValueIndex()}, + buildInsert}; +} + +OpDescriptor llvm::fuzzerop::extractElementDescriptor(unsigned Weight) { + auto buildExtract = [](ArrayRef Srcs, Instruction *Inst) { + return ExtractElementInst::Create(Srcs[0], Srcs[1], "E", Inst); + }; + // TODO: Try to avoid undefined accesses. + return {Weight, {anyVectorType(), anyIntType()}, buildExtract}; +} + +OpDescriptor llvm::fuzzerop::insertElementDescriptor(unsigned Weight) { + auto buildInsert = [](ArrayRef Srcs, Instruction *Inst) { + return InsertElementInst::Create(Srcs[0], Srcs[1], Srcs[2], "I", Inst); + }; + // TODO: Try to avoid undefined accesses. + return {Weight, + {anyVectorType(), matchScalarOfFirstType(), anyIntType()}, + buildInsert}; +} + +static SourcePred validShuffleVectorIndex() { + auto Pred = [](ArrayRef Cur, const Value *V) { + return ShuffleVectorInst::isValidOperands(Cur[0], Cur[1], V); + }; + auto Make = [](ArrayRef Cur, ArrayRef Ts) { + auto *FirstTy = cast(Cur[0]->getType()); + auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext()); + // TODO: It's straighforward to make up reasonable values, but listing them + // exhaustively would be insane. Come up with a couple of sensible ones. + return std::vector{ + UndefValue::get(VectorType::get(Int32Ty, FirstTy->getNumElements()))}; + }; + return {Pred, Make}; +} + +OpDescriptor llvm::fuzzerop::shuffleVectorDescriptor(unsigned Weight) { + auto buildShuffle = [](ArrayRef Srcs, Instruction *Inst) { + return new ShuffleVectorInst(Srcs[0], Srcs[1], Srcs[2], "S", Inst); + }; + return {Weight, + {anyVectorType(), matchFirstType(), validShuffleVectorIndex()}, + buildShuffle}; +} Index: lib/FuzzMutate/RandomIRBuilder.cpp =================================================================== --- /dev/null +++ lib/FuzzMutate/RandomIRBuilder.cpp @@ -0,0 +1,140 @@ +//===-- RandomIRBuilder.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/RandomIRBuilder.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/FuzzMutate/Random.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" + +using namespace llvm; +using namespace fuzzerop; + +Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, + ArrayRef Insts) { + return findOrCreateSource(BB, Insts, {}, anyType()); +} + +Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, + ArrayRef Insts, + ArrayRef Srcs, + SourcePred Pred) { + auto MatchesPred = [&Srcs, &Pred](Instruction *Inst) { + return Pred.matches(Srcs, Inst); + }; + auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred)); + // Also consider choosing no source, meaning we want a new one. + RS.sample(nullptr, /*Weight=*/1); + if (Instruction *Src = RS.getSelection()) + return Src; + return newSource(BB, Insts, Srcs, Pred); +} + +Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef Insts, + ArrayRef Srcs, SourcePred Pred) { + // Generate some constants to choose from. + auto RS = makeSampler(Rand); + RS.sample(Pred.generate(Srcs, KnownTypes)); + assert(!RS.isEmpty() && "Failed to generate sources"); + + // If we can find a pointer to load from, use it half the time. + Value *Ptr = findPointer(BB, Insts, Srcs, Pred); + if (Ptr) + RS.sample(Ptr, RS.totalWeight()); + + Value *Result = RS.getSelection(); + if (Result != Ptr) + return Result; + + // If we choose the pointer, we need to create a load. + auto IP = BB.getFirstInsertionPt(); + if (auto *I = dyn_cast(Ptr)) + IP = ++I->getIterator(); + return new LoadInst(Ptr, "L", &*IP); +} + +static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, + const Value *Replacement) { + if (Operand->getType() != Replacement->getType()) + return false; + switch (I->getOpcode()) { + case Instruction::GetElementPtr: + case Instruction::ExtractElement: + case Instruction::ExtractValue: + // TODO: We could potentially validate these, but for now just leave indices + // alone. + if (Operand.getOperandNo() > 1) + return false; + break; + case Instruction::InsertValue: + case Instruction::InsertElement: + if (Operand.getOperandNo() > 2) + return false; + break; + default: + break; + } + return true; +} + +void RandomIRBuilder::connectToSink(BasicBlock &BB, + ArrayRef Insts, Value *V) { + auto RS = makeSampler(Rand); + for (auto &I : Insts) { + if (isa(I)) + // TODO: Replacing operands of intrinsics would be interesting, but + // there's no easy way to verify that a given replacement is valid given + // that intrinsics can impose arbitrary constraints. + continue; + for (Use &U : I->operands()) + if (isCompatibleReplacement(I, U, V)) + RS.sample(&U, 1); + } + // Also consider choosing no sink, meaning we want a new one. + RS.sample(nullptr, /*Weight=*/1); + + if (Use *Sink = RS.getSelection()) { + User *U = Sink->getUser(); + unsigned OpNo = Sink->getOperandNo(); + U->setOperand(OpNo, V); + return; + } + newSink(BB, Insts, V); +} + +void RandomIRBuilder::newSink(BasicBlock &BB, ArrayRef Insts, + Value *V) { + Value *Ptr = findPointer(BB, Insts, {V}, matchFirstType()); + if (!Ptr) { + if (uniform(Rand)) + Ptr = new AllocaInst(V->getType(), 0, "A", &*BB.getFirstInsertionPt()); + else + Ptr = UndefValue::get(PointerType::get(V->getType(), 0)); + } + + new StoreInst(V, Ptr, Insts.back()); +} + +Value *RandomIRBuilder::findPointer(BasicBlock &BB, + ArrayRef Insts, + ArrayRef Srcs, SourcePred Pred) { + auto IsMatchingPtr = [&Srcs, &Pred](Instruction *Inst) { + if (auto PtrTy = dyn_cast(Inst->getType())) + // TODO: Check if this is horribly expensive. + return Pred.matches(Srcs, UndefValue::get(PtrTy->getElementType())); + return false; + }; + if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr))) + return RS.getSelection(); + return nullptr; +} Index: unittests/CMakeLists.txt =================================================================== --- unittests/CMakeLists.txt +++ unittests/CMakeLists.txt @@ -12,6 +12,7 @@ add_subdirectory(CodeGen) add_subdirectory(DebugInfo) add_subdirectory(ExecutionEngine) +add_subdirectory(FuzzMutate) add_subdirectory(IR) add_subdirectory(LineEditor) add_subdirectory(Linker) Index: unittests/FuzzMutate/CMakeLists.txt =================================================================== --- /dev/null +++ unittests/FuzzMutate/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS + Core + FuzzMutate + Support + ) + +add_llvm_unittest(FuzzMutateTests + OperationsTest.cpp + ReservoirSamplerTest.cpp + ) Index: unittests/FuzzMutate/OperationsTest.cpp =================================================================== --- /dev/null +++ unittests/FuzzMutate/OperationsTest.cpp @@ -0,0 +1,323 @@ +//===- OperationsTest.cpp - Tests for fuzzer operations -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/Operations.h" +#include "llvm/FuzzMutate/OpDescriptor.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +// Define some pretty printers to help with debugging failures. +namespace llvm { +void PrintTo(Type *T, ::std::ostream *OS) { + raw_os_ostream ROS(*OS); + T->print(ROS); +} + +void PrintTo(BasicBlock *BB, ::std::ostream *OS) { + raw_os_ostream ROS(*OS); + ROS << BB << " (" << BB->getName() << ")"; +} + +void PrintTo(Value *V, ::std::ostream *OS) { + raw_os_ostream ROS(*OS); + ROS << V << " ("; + V->print(ROS); + ROS << ")"; +} +void PrintTo(Constant *C, ::std::ostream *OS) { PrintTo(cast(C), OS); } + +} // namespace llvm + +using namespace llvm; + +using testing::AllOf; +using testing::AnyOf; +using testing::ElementsAre; +using testing::Eq; +using testing::Ge; +using testing::Each; +using testing::Truly; +using testing::NotNull; +using testing::PrintToString; +using testing::SizeIs; + +MATCHER_P(TypesMatch, V, "has type " + PrintToString(V->getType())) { + return arg->getType() == V->getType(); +} +MATCHER_P(HasType, T, "") { return arg->getType() == T; } + +TEST(OperationsTest, SourcePreds) { + using namespace llvm::fuzzerop; + + LLVMContext Ctx; + + Constant *i1 = ConstantInt::getFalse(Ctx); + Constant *i8 = ConstantInt::get(Type::getInt8Ty(Ctx), 3); + Constant *i16 = ConstantInt::get(Type::getInt16Ty(Ctx), 1 << 15); + Constant *i32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Constant *i64 = ConstantInt::get(Type::getInt64Ty(Ctx), + std::numeric_limits::max()); + Constant *f16 = ConstantFP::getInfinity(Type::getHalfTy(Ctx)); + Constant *f32 = ConstantFP::get(Type::getFloatTy(Ctx), 0.0); + Constant *f64 = ConstantFP::get(Type::getDoubleTy(Ctx), 123.45); + Constant *s = + ConstantStruct::get(StructType::create(Ctx, "OpaqueStruct")); + Constant *a = + ConstantArray::get(ArrayType::get(i32->getType(), 2), {i32, i32}); + Constant *v8i8 = ConstantVector::getSplat(8, i8); + Constant *v4f16 = ConstantVector::getSplat(4, f16); + Constant *p0i32 = + ConstantPointerNull::get(PointerType::get(i32->getType(), 0)); + + auto OnlyI32 = onlyType(i32->getType()); + EXPECT_TRUE(OnlyI32.matches({}, i32)); + EXPECT_FALSE(OnlyI32.matches({}, i64)); + EXPECT_FALSE(OnlyI32.matches({}, p0i32)); + EXPECT_FALSE(OnlyI32.matches({}, a)); + + EXPECT_THAT(OnlyI32.generate({}, {}), + AllOf(SizeIs(Ge(1u)), Each(TypesMatch(i32)))); + + auto AnyType = anyType(); + EXPECT_TRUE(AnyType.matches({}, i1)); + EXPECT_TRUE(AnyType.matches({}, f64)); + EXPECT_TRUE(AnyType.matches({}, s)); + EXPECT_TRUE(AnyType.matches({}, v8i8)); + EXPECT_TRUE(AnyType.matches({}, p0i32)); + + EXPECT_THAT( + AnyType.generate({}, {i32->getType(), f16->getType(), v8i8->getType()}), + Each(AnyOf(TypesMatch(i32), TypesMatch(f16), TypesMatch(v8i8)))); + + auto AnyInt = anyIntType(); + EXPECT_TRUE(AnyInt.matches({}, i1)); + EXPECT_TRUE(AnyInt.matches({}, i64)); + EXPECT_FALSE(AnyInt.matches({}, f32)); + EXPECT_FALSE(AnyInt.matches({}, v4f16)); + + EXPECT_THAT( + AnyInt.generate({}, {i32->getType(), f16->getType(), v8i8->getType()}), + AllOf(SizeIs(Ge(1u)), Each(TypesMatch(i32)))); + + auto AnyFP = anyFloatType(); + EXPECT_TRUE(AnyFP.matches({}, f16)); + EXPECT_TRUE(AnyFP.matches({}, f32)); + EXPECT_FALSE(AnyFP.matches({}, i16)); + EXPECT_FALSE(AnyFP.matches({}, p0i32)); + EXPECT_FALSE(AnyFP.matches({}, v4f16)); + + EXPECT_THAT( + AnyFP.generate({}, {i32->getType(), f16->getType(), v8i8->getType()}), + AllOf(SizeIs(Ge(1u)), Each(TypesMatch(f16)))); + + auto AnyPtr = anyPtrType(); + EXPECT_TRUE(AnyPtr.matches({}, p0i32)); + EXPECT_FALSE(AnyPtr.matches({}, i8)); + EXPECT_FALSE(AnyPtr.matches({}, a)); + EXPECT_FALSE(AnyPtr.matches({}, v8i8)); + + auto isPointer = [](Value *V) { return V->getType()->isPointerTy(); }; + EXPECT_THAT( + AnyPtr.generate({}, {i32->getType(), f16->getType(), v8i8->getType()}), + AllOf(SizeIs(Ge(3u)), Each(Truly(isPointer)))); + + auto AnyVec = anyVectorType(); + EXPECT_TRUE(AnyVec.matches({}, v8i8)); + EXPECT_TRUE(AnyVec.matches({}, v4f16)); + EXPECT_FALSE(AnyVec.matches({}, i8)); + EXPECT_FALSE(AnyVec.matches({}, a)); + EXPECT_FALSE(AnyVec.matches({}, s)); + + EXPECT_THAT(AnyVec.generate({}, {v8i8->getType()}), + ElementsAre(TypesMatch(v8i8))); + + auto First = matchFirstType(); + EXPECT_TRUE(First.matches({i8}, i8)); + EXPECT_TRUE(First.matches({s, a}, s)); + EXPECT_FALSE(First.matches({f16}, f32)); + EXPECT_FALSE(First.matches({v4f16, f64}, f64)); + + EXPECT_THAT(First.generate({i8}, {}), Each(TypesMatch(i8))); + EXPECT_THAT(First.generate({f16}, {i8->getType()}), + Each(TypesMatch(f16))); + EXPECT_THAT(First.generate({v8i8, i32}, {}), Each(TypesMatch(v8i8))); +} + +TEST(OperationsTest, SplitBlock) { + LLVMContext Ctx; + + Module M("M", Ctx); + Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Ctx), {}, + /*isVarArg=*/false), + GlobalValue::ExternalLinkage, "f", &M); + auto SBOp = fuzzerop::splitBlockDescriptor(1); + + // Create a block with only a return and split it on the return. + auto *BB = BasicBlock::Create(Ctx, "BB", F); + auto *RI = ReturnInst::Create(Ctx, BB); + SBOp.BuilderFunc({UndefValue::get(Type::getInt1Ty(Ctx))}, RI); + + // We should end up with an unconditional branch from BB to BB1, and the + // return ends up in BB1. + auto *UncondBr = cast(BB->getTerminator()); + ASSERT_TRUE(UncondBr->isUnconditional()); + auto *BB1 = UncondBr->getSuccessor(0); + ASSERT_THAT(RI->getParent(), Eq(BB1)); + + // Now add an instruction to BB1 and split on that. + auto *AI = new AllocaInst(Type::getInt8Ty(Ctx), 0, "a", RI); + Value *Cond = ConstantInt::getFalse(Ctx); + SBOp.BuilderFunc({Cond}, AI); + + // We should end up with a loop back on BB1 and the instruction we split on + // moves to BB2. + auto *CondBr = cast(BB1->getTerminator()); + EXPECT_THAT(CondBr->getCondition(), Eq(Cond)); + ASSERT_THAT(CondBr->getNumSuccessors(), Eq(2u)); + ASSERT_THAT(CondBr->getSuccessor(0), Eq(BB1)); + auto *BB2 = CondBr->getSuccessor(1); + EXPECT_THAT(AI->getParent(), Eq(BB2)); + EXPECT_THAT(RI->getParent(), Eq(BB2)); + + EXPECT_FALSE(verifyModule(M, &errs())); +} + +TEST(OperationsTest, SplitBlockWithPhis) { + LLVMContext Ctx; + + Type *Int8Ty = Type::getInt8Ty(Ctx); + + Module M("M", Ctx); + Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Ctx), {}, + /*isVarArg=*/false), + GlobalValue::ExternalLinkage, "f", &M); + auto SBOp = fuzzerop::splitBlockDescriptor(1); + + // Create 3 blocks with an if-then branch. + auto *BB1 = BasicBlock::Create(Ctx, "BB1", F); + auto *BB2 = BasicBlock::Create(Ctx, "BB2", F); + auto *BB3 = BasicBlock::Create(Ctx, "BB3", F); + BranchInst::Create(BB2, BB3, ConstantInt::getFalse(Ctx), BB1); + BranchInst::Create(BB3, BB2); + + // Set up phi nodes selecting values for the incoming edges. + auto *PHI1 = PHINode::Create(Int8Ty, /*NumReservedValues=*/2, "p1", BB3); + PHI1->addIncoming(ConstantInt::get(Int8Ty, 0), BB1); + PHI1->addIncoming(ConstantInt::get(Int8Ty, 1), BB2); + auto *PHI2 = PHINode::Create(Int8Ty, /*NumReservedValues=*/2, "p2", BB3); + PHI2->addIncoming(ConstantInt::get(Int8Ty, 1), BB1); + PHI2->addIncoming(ConstantInt::get(Int8Ty, 0), BB2); + auto *RI = ReturnInst::Create(Ctx, BB3); + + // Now we split the block with PHI nodes, making sure they're all updated. + Value *Cond = ConstantInt::getFalse(Ctx); + SBOp.BuilderFunc({Cond}, RI); + + // Make sure the PHIs are updated with a value for the third incoming edge. + EXPECT_THAT(PHI1->getNumIncomingValues(), Eq(3u)); + EXPECT_THAT(PHI2->getNumIncomingValues(), Eq(3u)); + EXPECT_FALSE(verifyModule(M, &errs())); +} + +TEST(OperationsTest, GEP) { + LLVMContext Ctx; + + Type *Int8PtrTy = Type::getInt8PtrTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + + Module M("M", Ctx); + Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Ctx), {}, + /*isVarArg=*/false), + GlobalValue::ExternalLinkage, "f", &M); + auto *BB = BasicBlock::Create(Ctx, "BB", F); + auto *RI = ReturnInst::Create(Ctx, BB); + + auto GEPOp = fuzzerop::gepDescriptor(1); + EXPECT_TRUE(GEPOp.SourcePreds[0].matches({}, UndefValue::get(Int8PtrTy))); + EXPECT_TRUE(GEPOp.SourcePreds[1].matches({UndefValue::get(Int8PtrTy)}, + ConstantInt::get(Int32Ty, 0))); + + GEPOp.BuilderFunc({UndefValue::get(Int8PtrTy), ConstantInt::get(Int32Ty, 0)}, + RI); + EXPECT_FALSE(verifyModule(M, &errs())); +} + +TEST(OperationsTest, ExtractAndInsertValue) { + LLVMContext Ctx; + + Type *Int8PtrTy = Type::getInt8PtrTy(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int64Ty = Type::getInt64Ty(Ctx); + + Type *StructTy = StructType::create(Ctx, {Int8PtrTy, Int32Ty}); + Type *OpaqueTy = StructType::create(Ctx, "OpaqueStruct"); + Type *ArrayTy = ArrayType::get(Int64Ty, 4); + Type *VectorTy = VectorType::get(Int32Ty, 2); + + auto EVOp = fuzzerop::extractValueDescriptor(1); + auto IVOp = fuzzerop::insertValueDescriptor(1); + + // Sanity check the source preds. + Constant *SVal = UndefValue::get(StructTy); + Constant *OVal = UndefValue::get(OpaqueTy); + Constant *AVal = UndefValue::get(ArrayTy); + Constant *VVal = UndefValue::get(VectorTy); + + EXPECT_TRUE(EVOp.SourcePreds[0].matches({}, SVal)); + EXPECT_TRUE(EVOp.SourcePreds[0].matches({}, OVal)); + EXPECT_TRUE(EVOp.SourcePreds[0].matches({}, AVal)); + EXPECT_FALSE(EVOp.SourcePreds[0].matches({}, VVal)); + EXPECT_TRUE(IVOp.SourcePreds[0].matches({}, SVal)); + EXPECT_TRUE(IVOp.SourcePreds[0].matches({}, OVal)); + EXPECT_TRUE(IVOp.SourcePreds[0].matches({}, AVal)); + EXPECT_FALSE(IVOp.SourcePreds[0].matches({}, VVal)); + + // Make sure we're range checking appropriately. + EXPECT_TRUE( + EVOp.SourcePreds[1].matches({SVal}, ConstantInt::get(Int32Ty, 0))); + EXPECT_TRUE( + EVOp.SourcePreds[1].matches({SVal}, ConstantInt::get(Int32Ty, 1))); + EXPECT_FALSE( + EVOp.SourcePreds[1].matches({SVal}, ConstantInt::get(Int32Ty, 2))); + EXPECT_FALSE( + EVOp.SourcePreds[1].matches({OVal}, ConstantInt::get(Int32Ty, 0))); + EXPECT_FALSE( + EVOp.SourcePreds[1].matches({OVal}, ConstantInt::get(Int32Ty, 65536))); + EXPECT_TRUE( + EVOp.SourcePreds[1].matches({AVal}, ConstantInt::get(Int32Ty, 0))); + EXPECT_TRUE( + EVOp.SourcePreds[1].matches({AVal}, ConstantInt::get(Int32Ty, 3))); + EXPECT_FALSE( + EVOp.SourcePreds[1].matches({AVal}, ConstantInt::get(Int32Ty, 4))); + + EXPECT_THAT( + EVOp.SourcePreds[1].generate({SVal}, {}), + ElementsAre(ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1))); + + // InsertValue should accept any type in the struct, but only in positions + // where it makes sense. + EXPECT_TRUE(IVOp.SourcePreds[1].matches({SVal}, UndefValue::get(Int8PtrTy))); + EXPECT_TRUE(IVOp.SourcePreds[1].matches({SVal}, UndefValue::get(Int32Ty))); + EXPECT_FALSE(IVOp.SourcePreds[1].matches({SVal}, UndefValue::get(Int64Ty))); + EXPECT_FALSE(IVOp.SourcePreds[2].matches({SVal, UndefValue::get(Int32Ty)}, + ConstantInt::get(Int32Ty, 0))); + EXPECT_TRUE(IVOp.SourcePreds[2].matches({SVal, UndefValue::get(Int32Ty)}, + ConstantInt::get(Int32Ty, 1))); + + EXPECT_THAT(IVOp.SourcePreds[1].generate({SVal}, {}), + Each(AnyOf(HasType(Int32Ty), HasType(Int8PtrTy)))); + EXPECT_THAT( + IVOp.SourcePreds[2].generate({SVal, ConstantInt::get(Int32Ty, 0)}, {}), + ElementsAre(ConstantInt::get(Int32Ty, 1))); +} Index: unittests/FuzzMutate/ReservoirSamplerTest.cpp =================================================================== --- /dev/null +++ unittests/FuzzMutate/ReservoirSamplerTest.cpp @@ -0,0 +1,69 @@ +//===- ReservoirSampler.cpp - Tests for the ReservoirSampler --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/FuzzMutate/Random.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; + +TEST(ReservoirSamplerTest, OneItem) { + std::mt19937 Rand; + auto Sampler = makeSampler(Rand, 7, 1); + ASSERT_FALSE(Sampler.isEmpty()); + ASSERT_EQ(7, Sampler.getSelection()); +} + +TEST(ReservoirSamplerTest, NoWeight) { + std::mt19937 Rand; + auto Sampler = makeSampler(Rand, 7, 0); + ASSERT_TRUE(Sampler.isEmpty()); +} + +TEST(ReservoirSamplerTest, Uniform) { + std::mt19937 Rand; + + // Run three chi-squared tests to check that the distribution is reasonably + // uniform. + std::vector Items = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + int Failures = 0; + for (int Run = 0; Run < 3; ++Run) { + std::vector Counts(Items.size(), 0); + + // We need $np_s > 5$ at minimum, but we're better off going a couple of + // orders of magnitude larger. + int N = Items.size() * 5 * 100; + for (int I = 0; I < N; ++I) { + auto Sampler = makeSampler(Rand, Items); + Counts[Sampler.getSelection()] += 1; + } + + // Knuth. TAOCP Vol. 2, 3.3.1 (8): + // $V = \frac{1}{n} \sum_{s=1}^{k} \left(\frac{Y_s^2}{p_s}\right) - n$ + double Ps = 1.0 / Items.size(); + double Sum = 0.0; + for (int Ys : Counts) + Sum += Ys * Ys / Ps; + double V = (Sum / N) - N; + + assert(Items.size() == 10 && "Our chi-squared values assume 10 items"); + // Since we have 10 items, there are 9 degrees of freedom and the table of + // chi-squared values is as follows: + // + // | p=1% | 5% | 25% | 50% | 75% | 95% | 99% | + // v=9 | 2.088 | 3.325 | 5.899 | 8.343 | 11.39 | 16.92 | 21.67 | + // + // Check that we're in the likely range of results. + //if (V < 2.088 || V > 21.67) + if (V < 2.088 || V > 21.67) + ++Failures; + } + EXPECT_LT(Failures, 3) << "Non-uniform distribution?"; +}