Index: llvm/trunk/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp @@ -57,6 +57,14 @@ "esan-instrument-memintrinsics", cl::init(true), cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); +// Experiments show that the performance difference can be 2x or more, +// and accuracy loss is typically negligible, so we turn this on by default. +static cl::opt ClAssumeIntraCacheLine( + "esan-assume-intra-cache-line", cl::init(true), + cl::desc("Assume each memory access touches just one cache line, for " + "better performance but with a potential loss of accuracy."), + cl::Hidden); + STATISTIC(NumInstrumentedLoads, "Number of instrumented loads"); STATISTIC(NumInstrumentedStores, "Number of instrumented stores"); STATISTIC(NumFastpaths, "Number of instrumented fastpaths"); @@ -65,6 +73,8 @@ STATISTIC(NumIgnoredStructs, "Number of ignored structs"); STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions"); STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions"); +STATISTIC(NumAssumedIntraCacheLine, + "Number of accesses assumed to be intra-cache-line"); static const uint64_t EsanCtorAndDtorPriority = 0; static const char *const EsanModuleCtorName = "esan.module_ctor"; @@ -715,8 +725,12 @@ // (and our shadow memory setup assumes 64-byte cache lines). assert(TypeSize <= 64); if (!(TypeSize == 8 || - (Alignment % (TypeSize / 8)) == 0)) - return false; + (Alignment % (TypeSize / 8)) == 0)) { + if (ClAssumeIntraCacheLine) + ++NumAssumedIntraCacheLine; + else + return false; + } // We inline instrumentation to set the corresponding shadow bits for // each cache line touched by the application. Here we handle a single Index: llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_basic.ll =================================================================== --- llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_basic.ll +++ llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_basic.ll @@ -91,15 +91,27 @@ } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; Not guaranteed to be intra-cache-line +; Not guaranteed to be intra-cache-line, but our defaults are to +; assume they are: define i16 @unaligned2(i16* %a) { entry: %tmp1 = load i16, i16* %a, align 1 ret i16 %tmp1 -; CHECK: %0 = bitcast i16* %a to i8* -; CHECK-NEXT: call void @__esan_unaligned_load2(i8* %0) -; CHECK-NEXT: %tmp1 = load i16, i16* %a, align 1 +; CHECK: %0 = ptrtoint i16* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i16, i16* %a, align 1 ; CHECK-NEXT: ret i16 %tmp1 } @@ -107,9 +119,20 @@ entry: %tmp1 = load i32, i32* %a, align 2 ret i32 %tmp1 -; CHECK: %0 = bitcast i32* %a to i8* -; CHECK-NEXT: call void @__esan_unaligned_load4(i8* %0) -; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 2 +; CHECK: %0 = ptrtoint i32* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i32, i32* %a, align 2 ; CHECK-NEXT: ret i32 %tmp1 } @@ -117,9 +140,20 @@ entry: %tmp1 = load i64, i64* %a, align 4 ret i64 %tmp1 -; CHECK: %0 = bitcast i64* %a to i8* -; CHECK-NEXT: call void @__esan_unaligned_load8(i8* %0) -; CHECK-NEXT: %tmp1 = load i64, i64* %a, align 4 +; CHECK: %0 = ptrtoint i64* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i64, i64* %a, align 4 ; CHECK-NEXT: ret i64 %tmp1 } Index: llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_strict.ll =================================================================== --- llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_strict.ll +++ llvm/trunk/test/Instrumentation/EfficiencySanitizer/working_set_strict.ll @@ -0,0 +1,125 @@ +; Test EfficiencySanitizer working set instrumentation without aggressive +; optimization flags. +; +; RUN: opt < %s -esan -esan-working-set -esan-assume-intra-cache-line=0 -S | FileCheck %s + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Intra-cache-line + +define i8 @aligned1(i8* %a) { +entry: + %tmp1 = load i8, i8* %a, align 1 + ret i8 %tmp1 +; CHECK: @llvm.global_ctors = {{.*}}@esan.module_ctor +; CHECK: %0 = ptrtoint i8* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i8, i8* %a, align 1 +; CHECK-NEXT: ret i8 %tmp1 +} + +define i16 @aligned2(i16* %a) { +entry: + %tmp1 = load i16, i16* %a, align 2 + ret i16 %tmp1 +; CHECK: %0 = ptrtoint i16* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i16, i16* %a, align 2 +; CHECK-NEXT: ret i16 %tmp1 +} + +define i32 @aligned4(i32* %a) { +entry: + %tmp1 = load i32, i32* %a, align 4 + ret i32 %tmp1 +; CHECK: %0 = ptrtoint i32* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i32, i32* %a, align 4 +; CHECK-NEXT: ret i32 %tmp1 +} + +define i64 @aligned8(i64* %a) { +entry: + %tmp1 = load i64, i64* %a, align 8 + ret i64 %tmp1 +; CHECK: %0 = ptrtoint i64* %a to i64 +; CHECK-NEXT: %1 = and i64 %0, 17592186044415 +; CHECK-NEXT: %2 = add i64 %1, 1337006139375616 +; CHECK-NEXT: %3 = lshr i64 %2, 6 +; CHECK-NEXT: %4 = inttoptr i64 %3 to i8* +; CHECK-NEXT: %5 = load i8, i8* %4 +; CHECK-NEXT: %6 = and i8 %5, -127 +; CHECK-NEXT: %7 = icmp ne i8 %6, -127 +; CHECK-NEXT: br i1 %7, label %8, label %11 +; CHECK: %9 = or i8 %5, -127 +; CHECK-NEXT: %10 = inttoptr i64 %3 to i8* +; CHECK-NEXT: store i8 %9, i8* %10 +; CHECK-NEXT: br label %11 +; CHECK: %tmp1 = load i64, i64* %a, align 8 +; CHECK-NEXT: ret i64 %tmp1 +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Not guaranteed to be intra-cache-line + +define i16 @unaligned2(i16* %a) { +entry: + %tmp1 = load i16, i16* %a, align 1 + ret i16 %tmp1 +; CHECK: %0 = bitcast i16* %a to i8* +; CHECK-NEXT: call void @__esan_unaligned_load2(i8* %0) +; CHECK-NEXT: %tmp1 = load i16, i16* %a, align 1 +; CHECK-NEXT: ret i16 %tmp1 +} + +define i32 @unaligned4(i32* %a) { +entry: + %tmp1 = load i32, i32* %a, align 2 + ret i32 %tmp1 +; CHECK: %0 = bitcast i32* %a to i8* +; CHECK-NEXT: call void @__esan_unaligned_load4(i8* %0) +; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 2 +; CHECK-NEXT: ret i32 %tmp1 +} + +define i64 @unaligned8(i64* %a) { +entry: + %tmp1 = load i64, i64* %a, align 4 + ret i64 %tmp1 +; CHECK: %0 = bitcast i64* %a to i8* +; CHECK-NEXT: call void @__esan_unaligned_load8(i8* %0) +; CHECK-NEXT: %tmp1 = load i64, i64* %a, align 4 +; CHECK-NEXT: ret i64 %tmp1 +}