diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -502,6 +502,8 @@ void initializeCallbackFunctions(Module &M); void initializeRuntimeFunctions(Module &M); void injectMetadataGlobals(Module &M); + Value *loadNextOrigin(Instruction *Pos, Align OriginAlign, + Value **OriginAddr); bool init(Module &M); @@ -2094,6 +2096,14 @@ return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size); } +Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign, + Value **OriginAddr) { + IRBuilder<> IRB(Pos); + *OriginAddr = + IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1)); + return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign); +} + std::pair DFSanFunction::loadFast16ShadowFast( Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign, Align OriginAlign, Value *FirstOrigin, Instruction *Pos) { @@ -2125,9 +2135,31 @@ Value *CombinedWideShadow = IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign); + unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth(); + const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits; + if (ShouldTrackOrigins) { - Shadows.push_back(CombinedWideShadow); - Origins.push_back(FirstOrigin); + if (BytesPerWideShadow > 4) { + assert(BytesPerWideShadow == 8); + // The wide shadow relates to two origin pointers: one for the first four + // application bytes, and one for the latest four. We use a left shift to + // get just the shadow bytes that correspond to the first origin pointer, + // and then the entire shadow for the second origin pointer (which will be + // chosen by combineOrigins() iff the least-significant half of the wide + // shadow was empty but the other half was not). + Value *CombinedWideShadowLo = + IRB.CreateShl(CombinedWideShadow, + ConstantInt::get(DFS.IntptrTy, WideShadowBitWidth / 2)); + Value *SecondOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr); + Shadows.push_back(CombinedWideShadow); + Origins.push_back(SecondOrigin); + + Shadows.push_back(CombinedWideShadowLo); + Origins.push_back(FirstOrigin); + } else { + Shadows.push_back(CombinedWideShadow); + Origins.push_back(FirstOrigin); + } } // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly; @@ -2135,9 +2167,6 @@ // This is fewer instructions than ORing shadows individually, since it // needs logN shift/or instructions (N being the bytes of the combined wide // shadow). - unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth(); - const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits; - for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size; ByteOfs += BytesPerWideShadow) { WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr, @@ -2146,11 +2175,21 @@ IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign); CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow); if (ShouldTrackOrigins) { - Shadows.push_back(NextWideShadow); - OriginAddr = IRB.CreateGEP(DFS.OriginTy, OriginAddr, - ConstantInt::get(DFS.IntptrTy, 1)); - Origins.push_back( - IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign)); + if (BytesPerWideShadow > 4) { + Value *NextWideShadowLo = IRB.CreateShl( + NextWideShadow, + ConstantInt::get(DFS.IntptrTy, WideShadowBitWidth / 2)); + Value *NextOriginLo = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr); + + Shadows.push_back(NextWideShadow); + Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr)); + + Shadows.push_back(NextWideShadowLo); + Origins.push_back(NextOriginLo); + } else { + Shadows.push_back(NextWideShadow); + Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr)); + } } } for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits; diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll --- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll @@ -191,6 +191,9 @@ ; CHECK16-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW_NZ]], i32 %[[#ORIGIN2]], i32 %[[#ORIGIN]] ; COMM: On fast8, no need to OR the wide shadow but one more shift is needed. + ; CHECK8-NEXT: %[[#WIDE_SHADOW_LO:]] = shl i64 %[[#WIDE_SHADOW]], 32 + ; CHECK8-NEXT: %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1 + ; CHECK8-NEXT: %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8 ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 32 ; CHECK8-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]] ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 16 @@ -198,6 +201,8 @@ ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 8 ; CHECK8-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]] ; CHECK8-NEXT: %[[#SHADOW:]] = trunc i64 %[[#WIDE_SHADOW]] to i[[#SBITS]] + ; CHECK8-NEXT: %[[#SHADOW_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW_LO]], 0 + ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW_NZ]], i32 %[[#ORIGIN]], i32 %[[#ORIGIN2]] ; COMBINE_LOAD_PTR-NEXT: %[[#SHADOW:]] = or i[[#SBITS]] %[[#SHADOW]], %[[#PS]] ; COMBINE_LOAD_PTR-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0 @@ -250,13 +255,16 @@ ; CHECK-NEXT: %[[#ORIGIN:]] = load i32, i32* %[[#ORIGIN_PTR]], align 8 ; CHECK-NEXT: %[[#WIDE_SHADOW_PTR:]] = bitcast i[[#SBITS]]* %[[#SHADOW_PTR]] to i64* ; CHECK-NEXT: %[[#WIDE_SHADOW:]] = load i64, i64* %[[#WIDE_SHADOW_PTR]], align [[#SBYTES]] + ; CHECK8-NEXT: %[[#WIDE_SHADOW_LO:]] = shl i64 %[[#WIDE_SHADOW]], 32 + ; CHECK8-NEXT: %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1 + ; CHECK8-NEXT: %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8 ; CHECK-NEXT: %[[#WIDE_SHADOW_PTR2:]] = getelementptr i64, i64* %[[#WIDE_SHADOW_PTR]], i64 1 ; CHECK-NEXT: %[[#WIDE_SHADOW2:]] = load i64, i64* %[[#WIDE_SHADOW_PTR2]], align [[#SBYTES]] ; CHECK-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW2]] - ; CHECK-NEXT: %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1 - ; CHECK-NEXT: %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8 ; COMM: On fast16, we need to OR 4x64bits for the wide shadow, before ORing its bytes. + ; CHECK16-NEXT: %[[#ORIGIN_PTR2:]] = getelementptr i32, i32* %[[#ORIGIN_PTR]], i64 1 + ; CHECK16-NEXT: %[[#ORIGIN2:]] = load i32, i32* %[[#ORIGIN_PTR2]], align 8 ; CHECK16-NEXT: %[[#WIDE_SHADOW_PTR3:]] = getelementptr i64, i64* %[[#WIDE_SHADOW_PTR2]], i64 1 ; CHECK16-NEXT: %[[#WIDE_SHADOW3:]] = load i64, i64* %[[#WIDE_SHADOW_PTR3]], align [[#SBYTES]] ; CHECK16-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW3]] @@ -280,6 +288,11 @@ ; CHECK16-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW4_NZ]], i32 %[[#ORIGIN4]], i32 %[[#ORIGIN]] ; COMM: On fast8, we need to OR 2x64bits for the wide shadow, before ORing its bytes (one more shift). + ; CHECK8-NEXT: %[[#WIDE_SHADOW2_LO:]] = shl i64 %[[#WIDE_SHADOW2]], 32 + ; CHECK8-NEXT: %[[#ORIGIN_PTR3:]] = getelementptr i32, i32* %[[#ORIGIN_PTR2]], i64 1 + ; CHECK8-NEXT: %[[#ORIGIN3:]] = load i32, i32* %[[#ORIGIN_PTR3]], align 8 + ; CHECK8-NEXT: %[[#ORIGIN_PTR4:]] = getelementptr i32, i32* %[[#ORIGIN_PTR3]], i64 1 + ; CHECK8-NEXT: %[[#ORIGIN4:]] = load i32, i32* %[[#ORIGIN_PTR4]], align 8 ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 32 ; CHECK8-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]] ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 16 @@ -287,8 +300,12 @@ ; CHECK8-NEXT: %[[#WIDE_SHADOW_SHIFTED:]] = lshr i64 %[[#WIDE_SHADOW]], 8 ; CHECK8-NEXT: %[[#WIDE_SHADOW:]] = or i64 %[[#WIDE_SHADOW]], %[[#WIDE_SHADOW_SHIFTED]] ; CHECK8-NEXT: %[[#SHADOW:]] = trunc i64 %[[#WIDE_SHADOW]] to i[[#SBITS]] + ; CHECK8-NEXT: %[[#SHADOW_LO_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW_LO]], 0 + ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW_LO_NZ]], i32 %[[#ORIGIN]], i32 %[[#ORIGIN2]] ; CHECK8-NEXT: %[[#SHADOW2_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW2]], 0 - ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW2_NZ]], i32 %[[#ORIGIN2]], i32 %[[#ORIGIN]] + ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW2_NZ]], i32 %[[#ORIGIN4]], i32 %[[#ORIGIN]] + ; CHECK8-NEXT: %[[#SHADOW2_LO_NZ:]] = icmp ne i64 %[[#WIDE_SHADOW2_LO]], 0 + ; CHECK8-NEXT: %[[#ORIGIN:]] = select i1 %[[#SHADOW2_LO_NZ]], i32 %[[#ORIGIN3]], i32 %[[#ORIGIN]] ; COMBINE_LOAD_PTR-NEXT: %[[#SHADOW:]] = or i[[#SBITS]] %[[#SHADOW]], %[[#PS]] ; COMBINE_LOAD_PTR-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0