Index: llvm/trunk/include/llvm/Transforms/Instrumentation.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Instrumentation.h +++ llvm/trunk/include/llvm/Transforms/Instrumentation.h @@ -123,6 +123,7 @@ enum Type { ESAN_None = 0, ESAN_CacheFrag, + ESAN_WorkingSet, } ToolType; }; Index: llvm/trunk/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp @@ -42,6 +42,9 @@ static cl::opt<bool> ClToolCacheFrag("esan-cache-frag", cl::init(false), cl::desc("Detect data cache fragmentation"), cl::Hidden); +static cl::opt<bool> + ClToolWorkingSet("esan-working-set", cl::init(false), + cl::desc("Measure the working set size"), cl::Hidden); // Each new tool will get its own opt flag here. // These are converted to EfficiencySanitizerOptions for use // in the code. @@ -65,12 +68,31 @@ static const char *const EsanInitName = "__esan_init"; static const char *const EsanExitName = "__esan_exit"; +// We must keep these Shadow* constants consistent with the esan runtime. +// FIXME: Try to place these shadow constants, the names of the __esan_* +// interface functions, and the ToolType enum into a header shared between +// llvm and compiler-rt. +static const uint64_t ShadowMask = 0x00000fffffffffffull; +static const uint64_t ShadowOffs[3] = { // Indexed by scale + 0x0000130000000000ull, + 0x0000220000000000ull, + 0x0000440000000000ull, +}; +// This array is indexed by the ToolType enum. +static const int ShadowScale[] = { + 0, // ESAN_None. + 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2. + 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6. +}; + namespace { static EfficiencySanitizerOptions OverrideOptionsFromCL(EfficiencySanitizerOptions Options) { if (ClToolCacheFrag) Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; + else if (ClToolWorkingSet) + Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; // Direct opt invocation with no params will have the default ESAN_None. // We run the default tool in that case. @@ -100,11 +122,14 @@ bool instrumentMemIntrinsic(MemIntrinsic *MI); bool shouldIgnoreMemoryAccess(Instruction *I); int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); + Value *appToShadow(Value *Shadow, IRBuilder<> &IRB); bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore, Value *Addr, unsigned Alignment); // Each tool has its own fastpath routine: bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment); + bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL, + Value *Addr, unsigned Alignment); EfficiencySanitizerOptions Options; LLVMContext *Ctx; @@ -226,11 +251,30 @@ return true; } +Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) { + // Shadow = ((App & Mask) + Offs) >> Scale + Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask)); + uint64_t Offs; + int Scale = ShadowScale[Options.ToolType]; + if (Scale <= 2) + Offs = ShadowOffs[Scale]; + else + Offs = ShadowOffs[0] << Scale; + Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs)); + if (Scale > 0) + Shadow = IRB.CreateLShr(Shadow, Scale); + return Shadow; +} + bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) { if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { // We'd like to know about cache fragmentation in vtable accesses and // constant data references, so we do not currently ignore anything. return false; + } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { + // TODO: the instrumentation disturbs the data layout on the stack, so we + // may want to add an option to ignore stack references (if we can + // distinguish them) to reduce overhead. } // TODO(bruening): future tools will be returning true for some cases. return false; @@ -309,6 +353,11 @@ Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; Value *OnAccessFunc = nullptr; + + // Convert 0 to the default alignment. + if (Alignment == 0) + Alignment = DL.getPrefTypeAlignment(OrigTy); + if (IsStore) NumInstrumentedStores++; else @@ -384,6 +433,8 @@ Value *Addr, unsigned Alignment) { if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { return instrumentFastpathCacheFrag(I, DL, Addr, Alignment); + } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { + return instrumentFastpathWorkingSet(I, DL, Addr, Alignment); } return false; } @@ -395,3 +446,56 @@ // TODO(bruening): implement a fastpath for aligned accesses return false; } + +bool EfficiencySanitizer::instrumentFastpathWorkingSet( + Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) { + assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this + IRBuilder<> IRB(I); + Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); + const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); + // Bail to the slowpath if the access might touch multiple cache lines. + // An access aligned to its size is guaranteed to be intra-cache-line. + // getMemoryAccessFuncIndex has already ruled out a size larger than 16 + // and thus larger than a cache line for platforms this tool targets + // (and our shadow memory setup assumes 64-byte cache lines). + assert(TypeSize <= 64); + if (!(TypeSize == 8 || + (Alignment % (TypeSize / 8)) == 0)) + return false; + + // We inline instrumentation to set the corresponding shadow bits for + // each cache line touched by the application. Here we handle a single + // load or store where we've already ruled out the possibility that it + // might touch more than one cache line and thus we simply update the + // shadow memory for a single cache line. + // Our shadow memory model is fine with races when manipulating shadow values. + // We generate the following code: + // + // const char BitMask = 0x81; + // char *ShadowAddr = appToShadow(AppAddr); + // if ((*ShadowAddr & BitMask) != BitMask) + // *ShadowAddr |= Bitmask; + // + Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy); + Value *ShadowPtr = appToShadow(AddrPtr, IRB); + Type *ShadowTy = IntegerType::get(*Ctx, 8U); + Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); + // The bottom bit is used for the current sampling period's working set. + // The top bit is used for the total working set. We set both on each + // memory access, if they are not already set. + Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B + + Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); + // The AND and CMP will be turned into a TEST instruction by the compiler. + Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask); + TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false); + // FIXME: do I need to call SetCurrentDebugLocation? + IRB.SetInsertPoint(CmpTerm); + // We use OR to set the shadow bits to avoid corrupting the middle 6 bits, + // which are used by the runtime library. + Value *NewVal = IRB.CreateOr(OldValue, ValueMask); + IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); + IRB.SetInsertPoint(I); + + return true; +} Index: llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_basic.ll =================================================================== --- llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_basic.ll +++ llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_basic.ll @@ -0,0 +1,164 @@ +; Test basic EfficiencySanitizer working set instrumentation. +; +; RUN: opt < %s -esan -esan-working-set -S | FileCheck %s + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Intra-cache-line + +define i8 @aligned1(i8* %a) { +entry: + %tmp1 = load i8, i8* %a, align 1 + ret i8 %tmp1 +; CHECK: @llvm.global_ctors = {{.*}}@esan.module_ctor +; CHECK: %0 = ptrtoint i8* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i8, i8* %a, align 1 +; CHECK-NEXT: ret i8 %tmp1 +} + +define i16 @aligned2(i16* %a) { +entry: + %tmp1 = load i16, i16* %a, align 2 + ret i16 %tmp1 +; CHECK: %0 = ptrtoint i16* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i16, i16* %a, align 2 +; CHECK-NEXT: ret i16 %tmp1 +} + +define i32 @aligned4(i32* %a) { +entry: + %tmp1 = load i32, i32* %a, align 4 + ret i32 %tmp1 +; CHECK: %0 = ptrtoint i32* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i32, i32* %a, align 4 +; CHECK-NEXT: ret i32 %tmp1 +} + +define i64 @aligned8(i64* %a) { +entry: + %tmp1 = load i64, i64* %a, align 8 + ret i64 %tmp1 +; CHECK: %0 = ptrtoint i64* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i64, i64* %a, align 8 +; CHECK-NEXT: ret i64 %tmp1 +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Not guaranteed to be intra-cache-line + +define i16 @unaligned2(i16* %a) { +entry: + %tmp1 = load i16, i16* %a, align 1 + ret i16 %tmp1 +; CHECK: %0 = bitcast i16* %a to i8* +; CHECK-NEXT: call void @__esan_unaligned_load2(i8* %0) +; CHECK-NEXT: %tmp1 = load i16, i16* %a, align 1 +; CHECK-NEXT: ret i16 %tmp1 +} + +define i32 @unaligned4(i32* %a) { +entry: + %tmp1 = load i32, i32* %a, align 2 + ret i32 %tmp1 +; CHECK: %0 = bitcast i32* %a to i8* +; CHECK-NEXT: call void @__esan_unaligned_load4(i8* %0) +; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 2 +; CHECK-NEXT: ret i32 %tmp1 +} + +define i64 @unaligned8(i64* %a) { +entry: + %tmp1 = load i64, i64* %a, align 4 + ret i64 %tmp1 +; CHECK: %0 = bitcast i64* %a to i8* +; CHECK-NEXT: call void @__esan_unaligned_load8(i8* %0) +; CHECK-NEXT: %tmp1 = load i64, i64* %a, align 4 +; CHECK-NEXT: ret i64 %tmp1 +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Ensure that esan converts intrinsics to calls: + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) + +define void @memCpyTest(i8* nocapture %x, i8* nocapture %y) { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false) + ret void +; CHECK: define void @memCpyTest +; CHECK: call i8* @memcpy +; CHECK: ret void +} + +define void @memMoveTest(i8* nocapture %x, i8* nocapture %y) { +entry: + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false) + ret void +; CHECK: define void @memMoveTest +; CHECK: call i8* @memmove +; CHECK: ret void +} + +define void @memSetTest(i8* nocapture %x) { +entry: + tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false) + ret void +; CHECK: define void @memSetTest +; CHECK: call i8* @memset +; CHECK: ret void +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Top-level: + +; CHECK: define internal void @esan.module_ctor() +; CHECK: call void @__esan_init(i32 2, i64 ptrtoint (i64* @0 to i64))