diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -87,6 +87,7 @@ #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -434,8 +435,8 @@ Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB); Value *getShadowAddress(Value *Addr, Instruction *Pos); - // std::pair - // getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos); + std::pair + getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos); bool isInstrumented(const Function *F); bool isInstrumented(const GlobalAlias *GA); FunctionType *getArgsFunctionType(FunctionType *T); @@ -508,6 +509,8 @@ DenseMap ValShadowMap; DenseMap ValOriginMap; DenseMap AllocaShadowMap; + DenseMap AllocaOriginMap; + std::vector> PHIFixups; DenseSet SkipInsts; std::vector NonZeroChecks; @@ -574,8 +577,9 @@ Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2, Instruction *Pos); Value *combineOperandShadows(Instruction *Inst); - Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align, - Instruction *Pos); + std::pair loadShadowOrigin(Value *ShadowAddr, uint64_t Size, + Align InstAlignment, + Instruction *Pos); void storePrimitiveShadow(Value *Addr, uint64_t Size, Align Alignment, Value *PrimitiveShadow, Instruction *Pos); /// Applies PrimitiveShadow to all primitive subtypes of T, returning @@ -617,8 +621,20 @@ Align ShadowAlign, Instruction *Pos); /// The fast path of loading shadow in fast-16-label mode. - Value *loadFast16ShadowFast(Value *ShadowAddr, uint64_t Size, - Align ShadowAlign, Instruction *Pos); + std::pair + loadFast16ShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size, + Align ShadowAlign, Align OriginAlign, Value *FirstOrigin, + Instruction *Pos); + + Align getOriginAlign(Align InstAlignment); + + /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load + /// is __dfsan_load_label_and_origin. This function returns the union of all + /// labels and the origin of the first taint label. However this is an + /// additional call with many instructions. To ensure common cases are fast, + /// checks if it is possible to load labels and origins without using the + /// callback function. + bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment); }; class DFSanVisitor : public InstVisitor { @@ -1689,7 +1705,7 @@ return IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)); } -/* + std::pair DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos) { @@ -1712,7 +1728,7 @@ } return {ShadowPtr, OriginPtr}; } -*/ + Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { // Returns (Addr & shadow_mask) x 2 IRBuilder<> IRB(Pos); @@ -1886,17 +1902,51 @@ return Align(Alignment.value() * DFS.ShadowWidthBytes); } -Value *DFSanFunction::loadFast16ShadowFast(Value *ShadowAddr, uint64_t Size, - Align ShadowAlign, - Instruction *Pos) { +Align DFSanFunction::getOriginAlign(Align InstAlignment) { + const Align Alignment = llvm::assumeAligned(InstAlignment.value()); + return Align(std::max(kMinOriginAlignment, Alignment)); +} + +bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size, + Align InstAlignment) { + assert(Size != 0); + // * if Size == 1, it is sufficient to load its origin aligned at 4. + // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to + // load its origin aligned at 4. If not, although origins may be lost, it + // should not happen very often. + // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When + // Size % 4 == 0, it is more efficient to load origins without callbacks. + // * Otherwise we use __dfsan_load_label_and_origin. + // This should ensure that common cases run efficiently. + if (Size <= 2) + return false; + + const Align Alignment = llvm::assumeAligned(InstAlignment.value()); + if (Alignment >= kMinOriginAlignment && + Size % (64 / DFS.ShadowWidthBits) == 0) + return false; + + return true; +} + +std::pair DFSanFunction::loadFast16ShadowFast( + Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign, + Align OriginAlign, Value *FirstOrigin, Instruction *Pos) { // First OR all the WideShadows, then OR individual shadows within the // combined WideShadow. This is fewer instructions than ORing shadows // individually. + const bool ShouldTrackOrigins = DFS.shouldTrackOrigins(); + std::vector Shadows; + std::vector Origins; IRBuilder<> IRB(Pos); Value *WideAddr = IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx)); Value *CombinedWideShadow = IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign); + if (ShouldTrackOrigins) { + Shadows.push_back(CombinedWideShadow); + Origins.push_back(FirstOrigin); + } for (uint64_t Ofs = 64 / DFS.ShadowWidthBits; Ofs != Size; Ofs += 64 / DFS.ShadowWidthBits) { WideAddr = IRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr, @@ -1904,12 +1954,23 @@ Value *NextWideShadow = IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign); CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow); + if (ShouldTrackOrigins) { + Shadows.push_back(NextWideShadow); + OriginAddr = IRB.CreateGEP(DFS.OriginTy, OriginAddr, + ConstantInt::get(DFS.IntptrTy, 1)); + Origins.push_back( + IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign)); + } } for (unsigned Width = 32; Width >= DFS.ShadowWidthBits; Width >>= 1) { Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width); CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow); } - return IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy); + return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy), + ShouldTrackOrigins + ? combineOrigins(Shadows, Origins, Pos, + ConstantInt::getSigned(IRB.getInt64Ty(), 0)) + : DFS.ZeroOrigin}; } Value *DFSanFunction::loadLegacyShadowFast(Value *ShadowAddr, uint64_t Size, @@ -1982,17 +2043,27 @@ // Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where // Addr has alignment Align, and take the union of each of those shadows. The // returned shadow always has primitive type. -Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, - Instruction *Pos) { +std::pair DFSanFunction::loadShadowOrigin(Value *Addr, + uint64_t Size, + Align InstAlignment, + Instruction *Pos) { + const bool ShouldTrackOrigins = DFS.shouldTrackOrigins(); + + // Non-escaped loads. if (AllocaInst *AI = dyn_cast(Addr)) { - const auto i = AllocaShadowMap.find(AI); - if (i != AllocaShadowMap.end()) { + const auto SI = AllocaShadowMap.find(AI); + if (SI != AllocaShadowMap.end()) { IRBuilder<> IRB(Pos); - return IRB.CreateLoad(DFS.PrimitiveShadowTy, i->second); + Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second); + const auto OI = AllocaOriginMap.find(AI); + assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end()); + return {ShadowLI, ShouldTrackOrigins + ? IRB.CreateLoad(DFS.OriginTy, OI->second) + : nullptr}; } } - const llvm::Align ShadowAlign(Align * DFS.ShadowWidthBytes); + // Load from constant addresses. SmallVector Objs; getUnderlyingObjects(Addr, Objs); bool AllConstants = true; @@ -2006,33 +2077,65 @@ break; } if (AllConstants) - return DFS.ZeroPrimitiveShadow; + return {DFS.ZeroPrimitiveShadow, + ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr}; + + if (Size == 0) + return {DFS.ZeroPrimitiveShadow, + ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr}; + + // Use callback to load if this is not an optimizable case for origin + // tracking. + if (ShouldTrackOrigins && + useCallbackLoadLabelAndOrigin(Size, InstAlignment)) { + IRBuilder<> IRB(Pos); + CallInst *Call = + IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn, + {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + ConstantInt::get(DFS.IntptrTy, Size)}); + Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); + return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits), + DFS.PrimitiveShadowTy), + IRB.CreateTrunc(Call, DFS.OriginTy)}; + } + + // Other cases that support loading shadows or origins in a fast way. + Value *ShadowAddr, *OriginAddr; + std::tie(ShadowAddr, OriginAddr) = + DFS.getShadowOriginAddress(Addr, InstAlignment, Pos); + + const Align ShadowAlign = getShadowAlign(InstAlignment); + const Align OriginAlign = getOriginAlign(InstAlignment); + Value *Origin = nullptr; + if (ShouldTrackOrigins) { + IRBuilder<> IRB(Pos); + Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign); + } - Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); switch (Size) { - case 0: - return DFS.ZeroPrimitiveShadow; case 1: { LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos); LI->setAlignment(ShadowAlign); - return LI; + return {LI, Origin}; } case 2: { IRBuilder<> IRB(Pos); Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1)); - return combineShadows( - IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign), - IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign), - Pos); + Value *Load = + IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign); + Value *Load1 = + IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign); + return {combineShadows(Load, Load1, Pos), Origin}; } } if (ClFast16Labels && Size % (64 / DFS.ShadowWidthBits) == 0) - return loadFast16ShadowFast(ShadowAddr, Size, ShadowAlign, Pos); + return loadFast16ShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign, + OriginAlign, Origin, Pos); if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidthBits) == 0) - return loadLegacyShadowFast(ShadowAddr, Size, ShadowAlign, Pos); + return {loadLegacyShadowFast(ShadowAddr, Size, ShadowAlign, Pos), Origin}; IRBuilder<> IRB(Pos); FunctionCallee &UnionLoadFn = @@ -2040,7 +2143,7 @@ CallInst *FallbackCall = IRB.CreateCall( UnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)}); FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - return FallbackCall; + return {FallbackCall, Origin}; } static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) { @@ -2065,6 +2168,7 @@ uint64_t Size = DL.getTypeStoreSize(LI.getType()); if (Size == 0) { DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI)); + DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin); return; } @@ -2076,13 +2180,24 @@ if (LI.isAtomic()) LI.setOrdering(addAcquireOrdering(LI.getOrdering())); - Align Alignment = ClPreserveAlignment ? LI.getAlign() : Align(1); Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI; - Value *PrimitiveShadow = - DFSF.loadShadow(LI.getPointerOperand(), Size, Alignment.value(), Pos); + std::vector Shadows; + std::vector Origins; + Value *PrimitiveShadow, *Origin; + std::tie(PrimitiveShadow, Origin) = + DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos); + const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); + if (ShouldTrackOrigins) { + Shadows.push_back(PrimitiveShadow); + Origins.push_back(Origin); + } if (ClCombinePointerLabelsOnLoad) { Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos); + if (ShouldTrackOrigins) { + Shadows.push_back(PtrShadow); + Origins.push_back(DFSF.getOrigin(LI.getPointerOperand())); + } } if (!DFSF.DFS.isZeroShadow(PrimitiveShadow)) DFSF.NonZeroChecks.push_back(PrimitiveShadow); @@ -2090,6 +2205,11 @@ Value *Shadow = DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos); DFSF.setShadow(&LI, Shadow); + + if (ShouldTrackOrigins) { + DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos)); + } + if (ClEventCallbacks) { IRBuilder<> IRB(Pos); Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr); @@ -2327,8 +2447,13 @@ if (AllLoadsStores) { IRBuilder<> IRB(&I); DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy); + if (DFSF.DFS.shouldTrackOrigins()) { + DFSF.AllocaOriginMap[&I] = + IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa"); + } } DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow); + DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin); } void DFSanVisitor::visitSelectInst(SelectInst &I) { diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll @@ -0,0 +1,251 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefixes=CHECK_META,CHECK +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -dfsan-combine-pointer-labels-on-load=false -S | FileCheck %s --check-prefixes=CHECK_META,NO_COMBINE_LOAD_PTR +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK_META: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]] +; CHECK_META: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]] + +define {} @load0({}* %p) { + ; CHECK: @"dfs$load0" + ; CHECK-NEXT: %a = load {}, {}* %p, align 1 + ; CHECK-NEXT: store {} zeroinitializer, {}* bitcast ([100 x i64]* @__dfsan_retval_tls to {}*), align [[#SBYTES]] + ; CHECK-NEXT: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + ; CHECK-NEXT: ret {} %a + + %a = load {}, {}* %p + ret {} %a +} + +define i16 @load_non_escaped_alloca() { + ; CHECK: @"dfs$load_non_escaped_alloca" + ; CHECK: [[S_ALLOCA:%.*]] = alloca i[[#SBITS]], align [[#SBYTES]] + ; CHECK: [[O_ALLOCA:%.*]] = alloca i32, align 4 + ; CHECK: [[SHADOW:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[S_ALLOCA]], align [[#SBYTES]] + ; CHECK: [[ORIGIN:%.*]] = load i32, i32* [[O_ALLOCA]], align 4 + ; CHECK: %a = load i16, i16* %p, align 2 + ; CHECK: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; CHECK: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %p = alloca i16 + %a = load i16, i16* %p + ret i16 %a +} + +define i16* @load_escaped_alloca() { + ; CHECK: @"dfs$load_escaped_alloca" + ; CHECK: [[INTP:%.*]] = ptrtoint i[[#SBITS]]* %p to i64 + ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; CHECK: [[SHADOW_PTR0:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]* + ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4 + ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; CHECK: {{%.*}} = load i32, i32* [[ORIGIN_PTR]], align 4 + ; CHECK: [[SHADOW_PTR1:%.*]] = getelementptr i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], i64 1 + ; CHECK: [[SHADOW0:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], align [[#SBYTES]] + ; CHECK: [[SHADOW1:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR1]], align [[#SBYTES]] + ; CHECK: {{%.*}} = or i[[#SBITS]] [[SHADOW0]], [[SHADOW1]] + ; CHECK: %a = load i16, i16* %p, align 2 + ; CHECK: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + + %p = alloca i16 + %a = load i16, i16* %p + ret i16* %p +} + +@X = constant i1 1 +define i1 @load_global() { + ; CHECK: @"dfs$load_global" + ; CHECK: %a = load i1, i1* @X, align 1 + ; CHECK: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i1, i1* @X + ret i1 %a +} + +define i1 @load1(i1* %p) { + ; CHECK: @"dfs$load1" + ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[PS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; CHECK: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64 + ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; CHECK: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]* + ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4 + ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; CHECK: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4 + ; CHECK: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR]], align [[#SBYTES]] + ; CHECK: [[RS:%.*]] = or i[[#SBITS]] [[AS]], [[PS]] + ; CHECK: [[PS_NZ:%.*]] = icmp ne i[[#SBITS]] [[PS]], 0 + ; CHECK: [[RO:%.*]] = select i1 [[PS_NZ]], i32 [[PO]], i32 [[AO]] + ; CHECK: %a = load i1, i1* %p, align 1 + ; CHECK: store i[[#SBITS]] [[RS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i1, i1* %p + ret i1 %a +} + +define i16 @load16(i1 %i, i16* %p) { + ; CHECK: @"dfs$load16" + ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[PS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i[[#SBITS]]*), align [[#SBYTES]] + ; CHECK: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64 + ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; CHECK: [[SHADOW_PTR0:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]* + ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4 + ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; CHECK: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4 + ; CHECK: [[SHADOW_PTR1:%.*]] = getelementptr i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], i64 1 + ; CHECK: [[SHADOW0:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], align [[#SBYTES]] + ; CHECK: [[SHADOW1:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR1]], align [[#SBYTES]] + ; CHECK: [[AS:%.*]] = or i[[#SBITS]] [[SHADOW0]], [[SHADOW1]] + ; CHECK: [[RS:%.*]] = or i[[#SBITS]] [[AS]], [[PS]] + ; CHECK: [[PS_NZ:%.*]] = icmp ne i[[#SBITS]] [[PS]], 0 + ; CHECK: [[RO:%.*]] = select i1 [[PS_NZ]], i32 [[PO]], i32 [[AO]] + ; CHECK: %a = load i16, i16* %p, align 2 + ; CHECK: store i[[#SBITS]] [[RS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i16, i16* %p + ret i16 %a +} + +define i32 @load32(i32* %p) { + ; CHECK: @"dfs$load32" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load32" + ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i32* %p to i64 + ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; NO_COMBINE_LOAD_PTR: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR64:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64* + ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = load i64, i64* [[SHADOW_PTR64]], align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]] + ; NO_COMBINE_LOAD_PTR: %a = load i32, i32* %p, align 4 + ; NO_COMBINE_LOAD_PTR: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: store i32 [[AO]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i32, i32* %p + ret i32 %a +} + +define i64 @load64(i64* %p) { + ; CHECK: @"dfs$load64" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load64" + ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i64* %p to i64 + ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_0:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_0:%.*]] = load i32, i32* [[ORIGIN_PTR_0]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_0:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64* + ; NO_COMBINE_LOAD_PTR: [[SHADOW_0:%.*]] = load i64, i64* [[SHADOW_PTR_0]], align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_1:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_0]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_1:%.*]] = load i64, i64* [[SHADOW_PTR_1]], align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = or i64 [[SHADOW_0]], [[SHADOW_1]] + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_1:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_0]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_1:%.*]] = load i32, i32* [[ORIGIN_PTR_1]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW_1_NZ:%.*]] = icmp ne i64 [[SHADOW_1]], 0 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN:%.*]] = select i1 [[SHADOW_1_NZ]], i32 [[ORIGIN_1]], i32 [[ORIGIN_0]] + ; NO_COMBINE_LOAD_PTR: %a = load i64, i64* %p, align 8 + ; NO_COMBINE_LOAD_PTR: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i64, i64* %p + ret i64 %a +} + +define i64 @load64_align2(i64* %p) { + ; CHECK: @"dfs$load64_align2" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load64_align2" + ; NO_COMBINE_LOAD_PTR-NEXT: [[INTP:%.*]] = bitcast i64* %p to i8* + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL_ORIGIN:%.*]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* [[INTP]], i64 8) + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL_ORIGIN_H32:%.*]] = lshr i64 [[LABEL_ORIGIN]], 32 + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL:%.*]] = trunc i64 [[LABEL_ORIGIN_H32]] to i[[#SBITS]] + ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN:%.*]] = trunc i64 [[LABEL_ORIGIN]] to i32 + ; NO_COMBINE_LOAD_PTR-NEXT: %a = load i64, i64* %p, align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR-NEXT: store i[[#SBITS]] [[LABEL]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR-NEXT: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i64, i64* %p, align 2 + ret i64 %a +} + +define i92 @load92(i92* %p) { + ; CHECK: @"dfs$load92" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load92" + ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i92* %p to i64 + ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_0:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_0:%.*]] = load i32, i32* [[ORIGIN_PTR_0]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_0:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64* + ; NO_COMBINE_LOAD_PTR: [[SHADOW_0:%.*]] = load i64, i64* [[SHADOW_PTR_0]], align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_1:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_0]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_1:%.*]] = load i64, i64* [[SHADOW_PTR_1]], align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW_01:%.*]] = or i64 [[SHADOW_0]], [[SHADOW_1]] + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_1:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_0]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_1:%.*]] = load i32, i32* [[ORIGIN_PTR_1]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_2:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_1]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_2:%.*]] = load i64, i64* [[SHADOW_PTR_2]], align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = or i64 [[SHADOW_01]], [[SHADOW_2]] + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_2:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_1]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_2:%.*]] = load i32, i32* [[ORIGIN_PTR_2]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW_1_NZ:%.*]] = icmp ne i64 [[SHADOW_1]], 0 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_10:%.*]] = select i1 [[SHADOW_1_NZ]], i32 [[ORIGIN_1]], i32 [[ORIGIN_0]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW_2_NZ:%.*]] = icmp ne i64 [[SHADOW_2]], 0 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN:%.*]] = select i1 [[SHADOW_2_NZ]], i32 [[ORIGIN_2]], i32 [[ORIGIN_10]] + ; NO_COMBINE_LOAD_PTR: %a = load i92, i92* %p, align 8 + ; NO_COMBINE_LOAD_PTR: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i92, i92* %p + ret i92 %a +} + +define i17 @load17(i17* %p) { + ; CHECK: @"dfs$load17" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load17" + ; NO_COMBINE_LOAD_PTR-NEXT: [[INTP:%.*]] = bitcast i17* %p to i8* + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL_ORIGIN:%.*]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* [[INTP]], i64 3) + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL_ORIGIN_H32:%.*]] = lshr i64 [[LABEL_ORIGIN]], 32 + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL:%.*]] = trunc i64 [[LABEL_ORIGIN_H32]] to i[[#SBITS]] + ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN:%.*]] = trunc i64 [[LABEL_ORIGIN]] to i32 + ; NO_COMBINE_LOAD_PTR-NEXT: %a = load i17, i17* %p, align 4 + ; NO_COMBINE_LOAD_PTR-NEXT: store i[[#SBITS]] [[LABEL]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]] + ; NO_COMBINE_LOAD_PTR-NEXT: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i17, i17* %p, align 4 + ret i17 %a +}