Index: lib/Transforms/Instrumentation/MemorySanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -373,6 +373,7 @@ friend struct MemorySanitizerVisitor; friend struct VarArgAMD64Helper; friend struct VarArgMIPS64Helper; + friend struct VarArgAArch64Helper; }; } // anonymous namespace @@ -3049,6 +3050,242 @@ } }; + +/// \brief AArch64-specific implementation of VarArgHelper. +struct VarArgAArch64Helper : public VarArgHelper { + static const unsigned kAArch64GrArgSize = 56; + static const unsigned kAArch64VrArgSize = 128; + + static const unsigned AArch64GrBegOffset = 0; + static const unsigned AArch64GrEndOffset = kAArch64GrArgSize; + // Make VR space aligned to 16 bytes. + static const unsigned AArch64VrBegOffset = AArch64GrEndOffset + 8; + static const unsigned AArch64VrEndOffset = AArch64VrBegOffset + + kAArch64VrArgSize; + static const unsigned AArch64VAEndOffset = AArch64VrEndOffset; + + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy; + Value *VAArgOverflowSize; + + SmallVector VAStartInstrumentationList; + + VarArgAArch64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), + VAArgOverflowSize(nullptr) {} + + enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; + + ArgKind classifyArgument(Value* arg) { + Type *T = arg->getType(); + if (T->isFPOrFPVectorTy()) + return AK_FloatingPoint; + if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) + || (T->isPointerTy())) + return AK_GeneralPurpose; + return AK_Memory; + } + + // The instrumentation stores the argument shadow in a non ABI-specific + // format because it does not know which argument is named (since Clang, + // like x86_64 case, lowers the va_args in the frontend and this pass only + // sees the low level code that deals with va_list internals). + // The first seven GR registers are saved in the first 56 bytes of the + // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then + // the remaining arguments. + // Using constant offset within the va_arg TLS array allows fast copy + // in the finalize instrumentation. + void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + unsigned GrOffset = AArch64GrBegOffset; + unsigned VrOffset = AArch64VrBegOffset; + unsigned OverflowOffset = AArch64VAEndOffset; + + const DataLayout &DL = F.getParent()->getDataLayout(); + for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + ArgKind AK = classifyArgument(A); + if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset) + AK = AK_Memory; + if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset) + AK = AK_Memory; + Value *Base; + switch (AK) { + case AK_GeneralPurpose: + Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset); + GrOffset += 8; + break; + case AK_FloatingPoint: + Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset); + VrOffset += 16; + break; + case AK_Memory: + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); + Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); + OverflowOffset += RoundUpToAlignment(ArgSize, 8); + break; + } + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); + } + Constant *OverflowSize = + ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset); + IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); + } + + /// Compute the shadow address for a given va_arg. + Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), + "_msarg"); + } + + void visitVAStartInst(VAStartInst &I) override { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants (size of va_list). + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */32, /* alignment */8, false); + } + + void visitVACopyInst(VACopyInst &I) override { + IRBuilder<> IRB(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants (size of va_list). + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */32, /* alignment */8, false); + } + + // Retrieve a va_list field of 'void*' size. + Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) { + Value *SaveAreaPtrPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, offset)), + Type::getInt64PtrTy(*MS.C)); + return IRB.CreateLoad(SaveAreaPtrPtr); + } + + // Retrieve a va_list field of 'int' size. + Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) { + Value *SaveAreaPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, offset)), + Type::getInt32PtrTy(*MS.C)); + Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr); + return IRB.CreateSExt(SaveArea32, MS.IntptrTy); + } + + void finalizeInstrumentation() override { + assert(!VAArgOverflowSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS); + Value *CopySize = + IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), + VAArgOverflowSize); + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + } + + Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize); + Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize); + + // Instrument va_start, copy va_list shadow from the backup copy of + // the TLS contents. + for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { + CallInst *OrigInst = VAStartInstrumentationList[i]; + IRBuilder<> IRB(OrigInst->getNextNode()); + + Value *VAListTag = OrigInst->getArgOperand(0); + + // The variadic ABI for AArch64 creates two areas to save the incoming + // argument registers (one for 64-bit general register xn-x7 and another + // for 128-bit FP/SIMD vn-v7). + // We need then to propagate the shadow arguments on both regions + // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'. + // The remaning arguments are saved on shadow for 'va::stack'. + // One caveat is it requires only to propagate the non-named arguments, + // however on the call site instrumentation 'all' the arguments are + // saved. So to copy the shadow values from the va_arg TLS array + // we need to adjust the offset for both GR and VR fields based on + // the __{gr,vr}_offs value (since they are stores based on incoming + // named arguments). + + // Read the stack pointer from the va_list. + Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0); + + // Read both the __gr_top and __gr_off and add them up. + Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8); + Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24); + + Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea); + + // Read both the __vr_top and __vr_off and add them up. + Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16); + Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28); + + Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea); + + // It does not know how many named arguments is being used and, on the + // callsite all the arguments were saved. Since __gr_off is defined as + // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic + // argument by ignoring the bytes of shadow from named arguments. + Value *GrRegSaveAreaShadowPtrOff = + IRB.CreateAdd(GrArgSize, GrOffSaveArea); + + Value *GrRegSaveAreaShadowPtr = + MSV.getShadowPtr(GrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + GrRegSaveAreaShadowPtrOff); + Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff); + + IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, GrSrcPtr, GrCopySize, 8); + + // Again, but for FP/SIMD values. + Value *VrRegSaveAreaShadowPtrOff = + IRB.CreateAdd(VrArgSize, VrOffSaveArea); + + Value *VrRegSaveAreaShadowPtr = + MSV.getShadowPtr(VrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *VrSrcPtr = IRB.CreateInBoundsGEP( + IRB.getInt8Ty(), + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + IRB.getInt32(AArch64VrBegOffset)), + VrRegSaveAreaShadowPtrOff); + Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff); + + IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, VrSrcPtr, VrCopySize, 8); + + // And finally for remaining arguments. + Value *StackSaveAreaShadowPtr = + MSV.getShadowPtr(StackSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *StackSrcPtr = + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + IRB.getInt32(AArch64VAEndOffset)); + + IRB.CreateMemCpy(StackSaveAreaShadowPtr, StackSrcPtr, + VAArgOverflowSize, 16); + } + } +}; + /// \brief A no-op implementation of VarArgHelper. struct VarArgNoOpHelper : public VarArgHelper { VarArgNoOpHelper(Function &F, MemorySanitizer &MS, @@ -3073,6 +3310,8 @@ else if (TargetTriple.getArch() == llvm::Triple::mips64 || TargetTriple.getArch() == llvm::Triple::mips64el) return new VarArgMIPS64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == llvm::Triple::aarch64) + return new VarArgAArch64Helper(Func, Msan, Visitor); else return new VarArgNoOpHelper(Func, Msan, Visitor); } Index: test/Instrumentation/MemorySanitizer/AArch64/vararg.ll =================================================================== --- /dev/null +++ test/Instrumentation/MemorySanitizer/AArch64/vararg.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -msan -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +%struct.__va_list = type { i8*, i8*, i8*, i32, i32 } + +define i32 @foo(i32 %guard, ...) { + %vl = alloca %struct.__va_list, align 8 + %1 = bitcast %struct.__va_list* %vl to i8* + call void @llvm.lifetime.start(i64 32, i8* %1) + call void @llvm.va_start(i8* %1) + call void @llvm.va_end(i8* %1) + call void @llvm.lifetime.end(i64 32, i8* %1) + ret i32 0 +} + +; First check if the variadic shadow values are saved in stack with correct +; size (192 is total of general purpose registers size, 56, rounded to 16 +; plus total of floating-point registers size, 128). + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 192, [[A]] +; CHECK: alloca {{.*}} [[B]] + +; We expect three memcpy operations: one for the general purpose registers, +; one for floating-point/SIMD ones, and one for thre remaining arguments. + +; Propagate the GR shadow values on for the va_list::__gp_top, adjust the +; offset in the __msan_va_arg_tls based on va_list:__gp_off, and finally +; issue the memcpy. +; CHECK: [[GRP:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i64 {{%.*}} +; CHECK: [[GRSIZE:%.*]] = sub i64 56, {{%.*}} +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[GRP]], i64 [[GRSIZE]], i32 8, i1 false) + +; Propagate the VR shadow values on for the va_list::__vr_top, adjust the +; offset in the __msan_va_arg_tls based on va_list:__vr_off, and finally +; issue the memcpy. +; CHECK: [[VRP:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i64 {{%.*}} +; CHECK: [[VRSIZE:%.*]] = sub i64 128, {{%.*}} +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[VRP]], i64 [[VRSIZE]], i32 8, i1 false) + +; Copy the remaining shadow values on the va_list::__stack position (it is +; on the constant offset of 192 from __msan_va_arg_tls). +; CHECK: [[STACK:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i32 192 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[STACK]], i64 {{%.*}}, i32 16, i1 false) + +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 +declare void @llvm.va_start(i8*) #2 +declare void @llvm.va_end(i8*) #2 +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i32 2, double 3.000000e+00, + double 4.000000e+00, i32 5, i32 6, + double 7.000000e+00, i32 8, i32 9, i32 10, i32 11) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. General purpose registers are saved at positions from 0 to 56, Floating +; point and SIMD are saved from 64 to 192, and the remaining from 192. +; CHECK-LABEL: @bar +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 8 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 64 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 80 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 16 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 24 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 96 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 32 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 40 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 48 +; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 192 +; CHECK: store {{.*}} 8, {{.*}} @__msan_va_arg_overflow_size_tls