diff --git a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h --- a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h +++ b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h @@ -23,6 +23,10 @@ class LLVMContext; class Type; class Value; +class Module; +class Function; +class GlobalVariable; +class AllocaInst; namespace fuzzerop { class SourcePred; } @@ -38,6 +42,23 @@ // TODO: Try to make this a bit less of a random mishmash of functions. + /// Create a stack memory at the head of the function, store \c Init to the + /// memory if provided. + AllocaInst *createStackMemory(Function *F, Type *Ty, Value *Init = nullptr); + /// Find or create a global variable. It will be initialized by random + /// constants that satisfies \c Pred. \c DidCreate will report whether we + /// found a global variable or created a new one. + GlobalVariable *findOrCreateGlobalVariable(Module *M, ArrayRef Srcs, + fuzzerop::SourcePred Pred, + bool *DidCreate = nullptr); + enum SourceType { + SrcFromInstInCurBlock, + FunctionArgument, + InstInDominator, + SrcFromGlobalVariable, + NewConstOrStack, + EndOfValueSource, + }; /// Find a "source" for some operation, which will be used in one of the /// operation's operands. This either selects an instruction in \c Insts or /// returns some new arbitrary Value. @@ -54,11 +75,22 @@ Value *newSource(BasicBlock &BB, ArrayRef Insts, ArrayRef Srcs, fuzzerop::SourcePred Pred, bool allowConstant = true); + + enum SinkType { + /// TODO: Also consider pointers in function argument. + SinkToInstInCurBlock, + PointersInDominator, + InstInDominatee, + NewStore, + SinkToGlobalVariable, + EndOfValueSink, + }; /// Find a viable user for \c V in \c Insts, which should all be contained in /// \c BB. This may also create some new instruction in \c BB and use that. - void connectToSink(BasicBlock &BB, ArrayRef Insts, Value *V); + Instruction *connectToSink(BasicBlock &BB, ArrayRef Insts, + Value *V); /// Create a user for \c V in \c BB. - void newSink(BasicBlock &BB, ArrayRef Insts, Value *V); + Instruction *newSink(BasicBlock &BB, ArrayRef Insts, Value *V); Value *findPointer(BasicBlock &BB, ArrayRef Insts, ArrayRef Srcs, fuzzerop::SourcePred Pred); Type *chooseType(LLVMContext &Context, ArrayRef Srcs, diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp --- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp +++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp @@ -13,12 +13,92 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" using namespace llvm; using namespace fuzzerop; +/// Return a vector of Blocks that dominates this block, excluding current +/// block. +template +static std::vector getDominators(BasicBlock *BB) { + std::vector ret; + DomTree DT(*BB->getParent()); + DomTreeNode *Node = DT[BB]->getIDom(); + while (Node && Node->getBlock()) { + ret.push_back(Node->getBlock()); + // Get parent block. + Node = Node->getIDom(); + } + return ret; +} + +/// Return a vector of Blocks that is dominated by this block, excluding current +/// block +template +static std::vector getDominatees(BasicBlock *BB) { + DomTree DT(*BB->getParent()); + std::vector ret; + for (DomTreeNode *Child : DT[BB]->children()) + ret.push_back(Child->getBlock()); + uint64_t Idx = 0; + while (Idx < ret.size()) { + DomTreeNode *Node = DT[ret[Idx]]; + Idx++; + for (DomTreeNode *Child : Node->children()) + ret.push_back(Child->getBlock()); + } + return ret; +} + +AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty, + Value *Init) { + /// TODO: For all Allocas, maybe allocate an array. + BasicBlock *EntryBB = &F->getEntryBlock(); + DataLayout DL(F->getParent()); + AllocaInst *Alloca = new AllocaInst(Ty, DL.getProgramAddressSpace(), "A", + &*EntryBB->getFirstInsertionPt()); + if (Init) + new StoreInst(Init, Alloca, Alloca->getNextNode()); + return Alloca; +} + +GlobalVariable * +RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef Srcs, + fuzzerop::SourcePred Pred, + bool *DidCreate) { + auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) { + // Can't directly compare GV's type, as it would be a pointer to the actual + // type. + return Pred.matches(Srcs, GV->getInitializer()); + }; + SmallVector GlobalVars; + for (GlobalVariable &GV : M->globals()) { + GlobalVars.push_back(&GV); + } + auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred)); + RS.sample(nullptr, 1); + GlobalVariable *GV = RS.getSelection(); + if (!GV) { + if (DidCreate) + *DidCreate = true; + using LinkageTypes = GlobalVariable::LinkageTypes; + auto TRS = makeSampler(Rand); + /// FIXME: This might be incorrect for operands that needs to be constant. + /// We shouldn't generate a constnat and save it. + TRS.sample(Pred.generate(Srcs, KnownTypes)); + Constant *Init = TRS.getSelection(); + Type *Ty = Init->getType(); + GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init, + "G"); + } + return GV; +} + Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, ArrayRef Insts) { return findOrCreateSource(BB, Insts, {}, anyType()); @@ -29,15 +109,90 @@ ArrayRef Srcs, SourcePred Pred, bool allowConstant) { - auto MatchesPred = [&Srcs, &Pred](Instruction *Inst) { - return Pred.matches(Srcs, Inst); - }; - auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred)); - // Also consider choosing no source, meaning we want a new one. - RS.sample(nullptr, /*Weight=*/1); - if (Instruction *Src = RS.getSelection()) - return Src; - return newSource(BB, Insts, Srcs, Pred, allowConstant); + auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); }; + SmallVector SrcTys; + for (uint64_t i = 0; i < EndOfValueSource; i++) + SrcTys.push_back(i); + std::shuffle(SrcTys.begin(), SrcTys.end(), Rand); + for (uint64_t SrcTy : SrcTys) { + switch (SrcTy) { + case SrcFromInstInCurBlock: { + auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred)); + if (!RS.isEmpty()) { + return RS.getSelection(); + } + break; + } + case FunctionArgument: { + Function *F = BB.getParent(); + // Somehow I can't use iterators to init these vectors, it will have type + // mismatch. + std::vector Args; + for (uint64_t i = 0; i < F->arg_size(); i++) { + Args.push_back(F->getArg(i)); + } + auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred)); + if (!RS.isEmpty()) { + return RS.getSelection(); + } + break; + } + case InstInDominator: { + auto Dominators = getDominators(&BB); + std::shuffle(Dominators.begin(), Dominators.end(), Rand); + for (BasicBlock *Dom : Dominators) { + // Somehow I can't use iterators to init these vectors, it will have + // type mismatch. + std::vector Instructions; + for (Instruction &I : *Dom) { + Instructions.push_back(&I); + } + auto RS = + makeSampler(Rand, make_filter_range(Instructions, MatchesPred)); + // Also consider choosing no source, meaning we want a new one. + if (!RS.isEmpty()) { + return RS.getSelection(); + } + } + break; + } + case SrcFromGlobalVariable: { + Module *M = BB.getParent()->getParent(); + bool DidCreate = false; + GlobalVariable *GV = + findOrCreateGlobalVariable(M, Srcs, Pred, &DidCreate); + Type *Ty = GV->getInitializer()->getType(); + LoadInst *LoadGV = nullptr; + if (BB.getTerminator()) { + LoadGV = new LoadInst(Ty, GV, "LGV", &*BB.getFirstInsertionPt()); + } else { + LoadGV = new LoadInst(Ty, GV, "LGV", &BB); + } + // Because we might be generating new values, we have to check if it + // matches again. + if (DidCreate) { + if (Pred.matches(Srcs, LoadGV)) { + return LoadGV; + } else { + LoadGV->eraseFromParent(); + // If no one is using this GlobalVariable, delete it too. + if (GV->hasNUses(0)) { + GV->eraseFromParent(); + } + } + } + break; + } + case NewConstOrStack: { + return newSource(BB, Insts, Srcs, Pred, allowConstant); + } + default: + case EndOfValueSource: { + llvm_unreachable("EndOfValueSource executed"); + } + } + } + llvm_unreachable("Can't find a source"); } Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef Insts, @@ -76,12 +231,7 @@ if (!allowConstant && isa(newSrc)) { Type *Ty = newSrc->getType(); Function *F = BB.getParent(); - BasicBlock *EntryBB = &F->getEntryBlock(); - /// TODO: For all Allocas, maybe allocate an array. - DataLayout DL(BB.getParent()->getParent()); - AllocaInst *Alloca = new AllocaInst(Ty, DL.getProgramAddressSpace(), "A", - EntryBB->getTerminator()); - new StoreInst(newSrc, Alloca, EntryBB->getTerminator()); + AllocaInst *Alloca = createStackMemory(F, Ty, newSrc); if (BB.getTerminator()) { newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator()); } else { @@ -125,42 +275,103 @@ return true; } -void RandomIRBuilder::connectToSink(BasicBlock &BB, - ArrayRef Insts, Value *V) { - auto RS = makeSampler(Rand); - for (auto &I : Insts) { - if (isa(I)) - // TODO: Replacing operands of intrinsics would be interesting, but - // there's no easy way to verify that a given replacement is valid given - // that intrinsics can impose arbitrary constraints. - continue; - for (Use &U : I->operands()) - if (isCompatibleReplacement(I, U, V)) - RS.sample(&U, 1); - } - // Also consider choosing no sink, meaning we want a new one. - RS.sample(nullptr, /*Weight=*/1); - - if (Use *Sink = RS.getSelection()) { - User *U = Sink->getUser(); - unsigned OpNo = Sink->getOperandNo(); - U->setOperand(OpNo, V); - return; +Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB, + ArrayRef Insts, + Value *V) { + SmallVector SinkTys; + for (uint64_t i = 0; i < EndOfValueSink; i++) + SinkTys.push_back(i); + std::shuffle(SinkTys.begin(), SinkTys.end(), Rand); + auto findSinkAndConnect = + [this, V](ArrayRef Instructions) -> Instruction * { + auto RS = makeSampler(Rand); + for (auto &I : Instructions) { + if (isa(I)) + // TODO: Replacing operands of intrinsics would be interesting, + // but there's no easy way to verify that a given replacement is + // valid given that intrinsics can impose arbitrary constraints. + continue; + for (Use &U : I->operands()) + if (isCompatibleReplacement(I, U, V)) + RS.sample(&U, 1); + } + if (!RS.isEmpty()) { + Use *Sink = RS.getSelection(); + User *U = Sink->getUser(); + unsigned OpNo = Sink->getOperandNo(); + U->setOperand(OpNo, V); + return cast(U); + } + return nullptr; + }; + Instruction *Sink = nullptr; + for (uint64_t SinkTy : SinkTys) { + switch (SinkTy) { + case SinkToInstInCurBlock: + Sink = findSinkAndConnect(Insts); + if (Sink) + return Sink; + break; + case PointersInDominator: { + auto Dominators = getDominators(&BB); + std::shuffle(Dominators.begin(), Dominators.end(), Rand); + for (BasicBlock *Dom : Dominators) { + for (Instruction &I : *Dom) { + if (PointerType *PtrTy = dyn_cast(I.getType())) { + if (PtrTy->isOpaqueOrPointeeTypeMatches(V->getType())) { + return new StoreInst(V, &I, Insts.back()); + } + } + } + } + break; + } + case InstInDominatee: { + auto Dominatees = getDominatees(&BB); + std::shuffle(Dominatees.begin(), Dominatees.end(), Rand); + for (BasicBlock *Dominee : Dominatees) { + std::vector Instructions; + for (Instruction &I : *Dominee) + Instructions.push_back(&I); + Sink = findSinkAndConnect(Instructions); + if (Sink) { + return Sink; + } + } + break; + } + case NewStore: + /// TODO: allocate a new stack memory. + return newSink(BB, Insts, V); + break; + case SinkToGlobalVariable: { + Module *M = BB.getParent()->getParent(); + GlobalVariable *GV = + findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType())); + return new StoreInst(V, GV, Insts.back()); + break; + } + case EndOfValueSink: + default: + llvm_unreachable("EndOfValueSink executed"); + }; } - newSink(BB, Insts, V); + llvm_unreachable("Can't find a sink"); } -void RandomIRBuilder::newSink(BasicBlock &BB, ArrayRef Insts, - Value *V) { +Instruction *RandomIRBuilder::newSink(BasicBlock &BB, + ArrayRef Insts, Value *V) { Value *Ptr = findPointer(BB, Insts, {V}, matchFirstType()); if (!Ptr) { - if (uniform(Rand, 0, 1)) - Ptr = new AllocaInst(V->getType(), 0, "A", &*BB.getFirstInsertionPt()); - else + if (uniform(Rand, 0, 1)) { + Type *Ty = V->getType(); + Ptr = createStackMemory(BB.getParent(), Ty, UndefValue::get(Ty)); + } else { Ptr = UndefValue::get(PointerType::get(V->getType(), 0)); + } } - new StoreInst(V, Ptr, Insts.back()); + return new StoreInst(V, Ptr, Insts.back()); } Value *RandomIRBuilder::findPointer(BasicBlock &BB, diff --git a/llvm/unittests/FuzzMutate/RandomIRBuilderTest.cpp b/llvm/unittests/FuzzMutate/RandomIRBuilderTest.cpp --- a/llvm/unittests/FuzzMutate/RandomIRBuilderTest.cpp +++ b/llvm/unittests/FuzzMutate/RandomIRBuilderTest.cpp @@ -15,6 +15,7 @@ #include "llvm/FuzzMutate/Operations.h" #include "llvm/FuzzMutate/Random.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -259,7 +260,11 @@ for (int i = 0; i < 10; ++i) { Value *V = IB.findOrCreateSource(BB, {FuncPtr, OpaquePtr}); - ASSERT_FALSE(isa(V)); + // To make sure we are allowed to load from a global variable + if (LoadInst *LI = dyn_cast(V)) { + ASSERT_TRUE(LI->getOperand(0) != FuncPtr); + ASSERT_TRUE(LI->getOperand(0) != OpaquePtr); + } } } @@ -338,4 +343,134 @@ } } +TEST(RandomIRBuilderTest, createStackMemory) { + LLVMContext Ctx; + const char *SourceCode = "\n\ + define void @test(i1 %C1, i1 %C2, i32 %I, i32 %J) { \n\ + Entry: \n\ + ret void \n\ + }"; + std::vector Types = {Type::getInt32Ty(Ctx), Type::getInt64Ty(Ctx)}; + RandomIRBuilder IB(Seed, Types); + std::unique_ptr M1 = parseAssembly(SourceCode, Ctx); + Function &F1 = *M1->getFunction("test"); + IB.createStackMemory(&F1, Types[0]); + ASSERT_FALSE(verifyModule(*M1, &errs())); + + std::unique_ptr M2 = parseAssembly(SourceCode, Ctx); + Function &F2 = *M2->getFunction("test"); + IB.createStackMemory(&F2, Types[1], + ConstantInt::get(Types[1], APInt(64, 42))); + ASSERT_FALSE(verifyModule(*M2, &errs())); +} + +TEST(RandomIRBuilderTest, findOrCreateGlobalVariable) { + LLVMContext Ctx; + const char *SourceCode = "\n\ + @G = global i32 1 \n\ + "; + std::vector Types = {Type::getInt32Ty(Ctx), Type::getInt64Ty(Ctx)}; + RandomIRBuilder IB(Seed, Types); + std::unique_ptr M1 = parseAssembly(SourceCode, Ctx); + bool DidCreate = false; + IB.findOrCreateGlobalVariable(&*M1, {}, fuzzerop::onlyType(Types[0])); + ASSERT_FALSE(verifyModule(*M1, &errs())); + unsigned NumGV = M1->getNumNamedValues(); + IB.findOrCreateGlobalVariable(&*M1, {}, fuzzerop::onlyType(Types[0]), + &DidCreate); + ASSERT_FALSE(verifyModule(*M1, &errs())); + ASSERT_EQ(M1->getNumNamedValues(), NumGV + DidCreate); + + DidCreate = false; + std::unique_ptr M2 = parseAssembly(SourceCode, Ctx); + IB.findOrCreateGlobalVariable(&*M1, {}, fuzzerop::onlyType(Types[1]), + &DidCreate); + ASSERT_FALSE(verifyModule(*M2, &errs())); + ASSERT_TRUE(DidCreate); +} + +/// Checks if the source and sink we find for an instruction has correct +/// domination relation. +TEST(RandomIRBuilderTest, findSourceAndSink) { + const char *Source = "\n\ + define i64 @test(i1 %0, i1 %1, i1 %2, i32 %3, i32 %4) { \n\ + Entry: \n\ + %A = alloca i32, i32 8, align 4 \n\ + %E.1 = and i32 %3, %4 \n\ + %E.2 = add i32 %4 , 1 \n\ + %A.GEP.1 = getelementptr i32, ptr %A, i32 0 \n\ + %A.GEP.2 = getelementptr i32, ptr %A.GEP.1, i32 1 \n\ + %L.2 = load i32, ptr %A.GEP.2 \n\ + %L.1 = load i32, ptr %A.GEP.1 \n\ + %E.3 = sub i32 %E.2, %L.1 \n\ + %Cond.1 = icmp eq i32 %E.3, %E.2 \n\ + %Cond.2 = and i1 %0, %1 \n\ + %Cond = or i1 %Cond.1, %Cond.2 \n\ + br i1 %Cond, label %BB0, label %BB1 \n\ + BB0: \n\ + %Add = add i32 %L.1, %L.2 \n\ + %Sub = sub i32 %L.1, %L.2 \n\ + %Sub.1 = sub i32 %Sub, 12 \n\ + %Cast.1 = bitcast i32 %4 to float \n\ + %Add.2 = add i32 %3, 1 \n\ + %Cast.2 = bitcast i32 %Add.2 to float \n\ + %FAdd = fadd float %Cast.1, %Cast.2 \n\ + %Add.3 = add i32 %L.2, %L.1 \n\ + %Cast.3 = bitcast float %FAdd to i32 \n\ + %Sub.2 = sub i32 %Cast.3, %Sub.1 \n\ + %SExt = sext i32 %Cast.3 to i64 \n\ + %A.GEP.3 = getelementptr i64, ptr %A, i32 1 \n\ + store i64 %SExt, ptr %A.GEP.3 \n\ + br label %Exit \n\ + BB1: \n\ + %PHI.1 = phi i32 [0, %Entry] \n\ + %SExt.1 = sext i1 %Cond.2 to i32 \n\ + %SExt.2 = sext i1 %Cond.1 to i32 \n\ + %E.164 = zext i32 %E.1 to i64 \n\ + %E.264 = zext i32 %E.2 to i64 \n\ + %E.1264 = mul i64 %E.164, %E.264 \n\ + %E.12 = trunc i64 %E.1264 to i32 \n\ + %A.GEP.4 = getelementptr i32, ptr %A, i32 2 \n\ + %A.GEP.5 = getelementptr i32, ptr %A.GEP.4, i32 2 \n\ + store i32 %E.12, ptr %A.GEP.5 \n\ + br label %Exit \n\ + Exit: \n\ + %PHI.2 = phi i32 [%Add, %BB0], [%E.3, %BB1] \n\ + %PHI.3 = phi i64 [%SExt, %BB0], [%E.1264, %BB1] \n\ + %ZExt = zext i32 %PHI.2 to i64 \n\ + %Add.5 = add i64 %PHI.3, 3 \n\ + ret i64 %Add.5 \n\ + }"; + LLVMContext Ctx; + std::vector Types = {Type::getInt1Ty(Ctx), Type::getInt32Ty(Ctx), + Type::getInt64Ty(Ctx)}; + std::mt19937 mt(Seed); + std::uniform_int_distribution RandInt(INT_MIN, INT_MAX); + + // Get a random instruction, try to find source and sink, make sure it is + // dominated. + for (int i = 0; i < 100; i++) { + RandomIRBuilder IB(RandInt(mt), Types); + std::unique_ptr M = parseAssembly(Source, Ctx); + Function &F = *M->getFunction("test"); + DominatorTree DT(F); + BasicBlock *BB = makeSampler(IB.Rand, make_pointer_range(F)).getSelection(); + SmallVector Insts; + for (auto I = BB->getFirstInsertionPt(), E = BB->end(); I != E; ++I) + Insts.push_back(&*I); + // Choose an insertion point for our new instruction. + size_t IP = uniform(IB.Rand, 1, Insts.size() - 2); + + auto InstsBefore = makeArrayRef(Insts).slice(0, IP); + auto InstsAfter = makeArrayRef(Insts).slice(IP); + Value *Src = IB.findOrCreateSource( + *BB, InstsBefore, {}, fuzzerop::onlyType(Types[i % Types.size()])); + ASSERT_TRUE(DT.dominates(Src, Insts[IP + 1])); + Instruction *Sink = IB.connectToSink(*BB, InstsAfter, Insts[IP - 1]); + if (!DT.dominates(Insts[IP - 1], Sink)) { + errs() << *Insts[IP - 1] << "\n" << *Sink << "\n "; + } + ASSERT_TRUE(DT.dominates(Insts[IP - 1], Sink)); + } +} } // namespace