diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -505,6 +505,11 @@ bool init(Module &M); + /// Advances \p OriginAddr to point to the next 32-bit origin and then loads + /// from it. Returns the origin's loaded value. + Value *loadNextOrigin(Instruction *Pos, Align OriginAlign, + Value **OriginAddr); + /// Returns whether fast8 or fast16 mode has been specified. bool hasFastLabelsEnabled(); @@ -2094,6 +2099,14 @@ return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size); } +Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign, + Value **OriginAddr) { + IRBuilder<> IRB(Pos); + *OriginAddr = + IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1)); + return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign); +} + std::pair DFSanFunction::loadFast16ShadowFast( Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign, Align OriginAlign, Value *FirstOrigin, Instruction *Pos) { @@ -2125,19 +2138,39 @@ Value *CombinedWideShadow = IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign); - if (ShouldTrackOrigins) { - Shadows.push_back(CombinedWideShadow); - Origins.push_back(FirstOrigin); - } + unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth(); + const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits; + + auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) { + if (BytesPerWideShadow > 4) { + assert(BytesPerWideShadow == 8); + // The wide shadow relates to two origin pointers: one for the first four + // application bytes, and one for the latest four. We use a left shift to + // get just the shadow bytes that correspond to the first origin pointer, + // and then the entire shadow for the second origin pointer (which will be + // chosen by combineOrigins() iff the least-significant half of the wide + // shadow was empty but the other half was not). + Value *WideShadowLo = IRB.CreateShl( + WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2)); + Shadows.push_back(WideShadow); + Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr)); + + Shadows.push_back(WideShadowLo); + Origins.push_back(Origin); + } else { + Shadows.push_back(WideShadow); + Origins.push_back(Origin); + } + }; + + if (ShouldTrackOrigins) + AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin); // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly; // then OR individual shadows within the combined WideShadow by binary ORing. // This is fewer instructions than ORing shadows individually, since it // needs logN shift/or instructions (N being the bytes of the combined wide // shadow). - unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth(); - const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits; - for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size; ByteOfs += BytesPerWideShadow) { WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr, @@ -2146,11 +2179,8 @@ IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign); CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow); if (ShouldTrackOrigins) { - Shadows.push_back(NextWideShadow); - OriginAddr = IRB.CreateGEP(DFS.OriginTy, OriginAddr, - ConstantInt::get(DFS.IntptrTy, 1)); - Origins.push_back( - IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign)); + Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr); + AppendWideShadowAndOrigin(NextWideShadow, NextOrigin); } } for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits; diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll @@ -191,6 +191,9 @@ ; CHECK16-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW_NZ]], i32 %[[#ORIGIN2]], i32 %[[#ORIGIN]] ; COMM: On fast8, no need to OR the wide shadow but one more shift is needed. + ; CHECK8-NEXT: %[[#WIDE_SHADOW_LO:]] = shl i64 %[[#WIDE_SHADOW]], 32 + ; CHECK8-NEXT: %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1 + ; CHECK8-NEXT: %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8 ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 32 ; CHECK8-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]] ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 16 @@ -198,6 +201,8 @@ ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 8 ; CHECK8-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]] ; CHECK8-NEXT: %[[#SHADOW:]] = trunc i64 %[[#WIDE_SHADOW]] to i[[#SBITS]] + ; CHECK8-NEXT: %[[#SHADOW_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW_LO]], 0 + ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW_NZ]], i32 %[[#ORIGIN]], i32 %[[#ORIGIN2]] ; COMBINE_LOAD_PTR-NEXT: %[[#SHADOW:]] = or i[[#SBITS]] %[[#SHADOW]], %[[#PS]] ; COMBINE_LOAD_PTR-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0 @@ -250,13 +255,16 @@ ; CHECK-NEXT: %[[#ORIGIN:]] = load i32, i32* %[[#ORIGIN_PTR]], align 8 ; CHECK-NEXT: %[[#WIDE_SHADOW_PTR:]] = bitcast i[[#SBITS]]* %[[#SHADOW_PTR]] to i64* ; CHECK-NEXT: %[[#WIDE_SHADOW:]] = load i64, i64* %[[#WIDE_SHADOW_PTR]], align [[#SBYTES]] + ; CHECK8-NEXT: %[[#WIDE_SHADOW_LO:]] = shl i64 %[[#WIDE_SHADOW]], 32 + ; CHECK8-NEXT: %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1 + ; CHECK8-NEXT: %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8 ; CHECK-NEXT: %[[#WIDE_SHADOW_PTR2:]] = getelementptr i64, i64* %[[#WIDE_SHADOW_PTR]], i64 1 ; CHECK-NEXT: %[[#WIDE_SHADOW2:]] = load i64, i64* %[[#WIDE_SHADOW_PTR2]], align [[#SBYTES]] ; CHECK-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW2]] - ; CHECK-NEXT: %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1 - ; CHECK-NEXT: %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8 ; COMM: On fast16, we need to OR 4x64bits for the wide shadow, before ORing its bytes. + ; CHECK16-NEXT: %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1 + ; CHECK16-NEXT: %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8 ; CHECK16-NEXT: %[[#WIDE_SHADOW_PTR3:]] = getelementptr i64, i64* %[[#WIDE_SHADOW_PTR2]], i64 1 ; CHECK16-NEXT: %[[#WIDE_SHADOW3:]] = load i64, i64* %[[#WIDE_SHADOW_PTR3]], align [[#SBYTES]] ; CHECK16-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW3]] @@ -280,6 +288,11 @@ ; CHECK16-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW4_NZ]], i32 %[[#ORIGIN4]], i32 %[[#ORIGIN]] ; COMM: On fast8, we need to OR 2x64bits for the wide shadow, before ORing its bytes (one more shift). + ; CHECK8-NEXT: %[[#ORIGIN_PTR3:]] = getelementptr i32, i32* %[[#ORIGIN_PTR2]], i64 1 + ; CHECK8-NEXT: %[[#ORIGIN3:]] = load i32, i32* %[[#ORIGIN_PTR3]], align 8 + ; CHECK8-NEXT: %[[#WIDE_SHADOW2_LO:]] = shl i64 %[[#WIDE_SHADOW2]], 32 + ; CHECK8-NEXT: %[[#ORIGIN_PTR4:]] = getelementptr i32, i32* %[[#ORIGIN_PTR3]], i64 1 + ; CHECK8-NEXT: %[[#ORIGIN4:]] = load i32, i32* %[[#ORIGIN_PTR4]], align 8 ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 32 ; CHECK8-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]] ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 16 @@ -287,8 +300,12 @@ ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 8 ; CHECK8-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]] ; CHECK8-NEXT: %[[#SHADOW:]] = trunc i64 %[[#WIDE_SHADOW]] to i[[#SBITS]] + ; CHECK8-NEXT: %[[#SHADOW_LO_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW_LO]], 0 + ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW_LO_NZ]], i32 %[[#ORIGIN]], i32 %[[#ORIGIN2]] ; CHECK8-NEXT: %[[#SHADOW2_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW2]], 0 - ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW2_NZ]], i32 %[[#ORIGIN2]], i32 %[[#ORIGIN]] + ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW2_NZ]], i32 %[[#ORIGIN4]], i32 %[[#ORIGIN]] + ; CHECK8-NEXT: %[[#SHADOW2_LO_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW2_LO]], 0 + ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW2_LO_NZ]], i32 %[[#ORIGIN3]], i32 %[[#ORIGIN]] ; COMBINE_LOAD_PTR-NEXT: %[[#SHADOW:]] = or i[[#SBITS]] %[[#SHADOW]], %[[#PS]] ; COMBINE_LOAD_PTR-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0