Index: llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -379,6 +379,7 @@ friend struct VarArgAMD64Helper; friend struct VarArgMIPS64Helper; friend struct VarArgAArch64Helper; + friend struct VarArgPowerPC64Helper; }; } // anonymous namespace @@ -3374,6 +3375,163 @@ } }; +/// \brief PowerPC64-specific implementation of VarArgHelper. +struct VarArgPowerPC64Helper : public VarArgHelper { + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy; + Value *VAArgSize; + + SmallVector VAStartInstrumentationList; + + VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), + VAArgSize(nullptr) {} + + void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + // For PowerPC, we need to deal with alignment of stack arguments - + // they are mostly aligned to 8 bytes, but vectors and i128 arrays + // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes, + // and QPX vectors are aligned to 32 bytes. For that reason, we + // compute current offset from stack pointer (which is always properly + // aligned), and offset for the first vararg, then subtract them. + unsigned VAArgBase; + llvm::Triple TargetTriple(F.getParent()->getTargetTriple()); + // Parameter save area starts at 48 bytes from frame pointer for ABIv1, + // and 32 bytes for ABIv2. This is usually determined by target + // endianness, but in theory could be overriden by function attribute. + // For simplicity, we ignore it here (it'd only matter for QPX vectors). + if (TargetTriple.getArch() == llvm::Triple::ppc64) + VAArgBase = 48; + else + VAArgBase = 32; + unsigned VAArgOffset = VAArgBase; + const DataLayout &DL = F.getParent()->getDataLayout(); + for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + unsigned ArgNo = CS.getArgumentNo(ArgIt); + bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); + bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal); + if (IsByVal) { + assert(A->getType()->isPointerTy()); + Type *RealTy = A->getType()->getPointerElementType(); + uint64_t ArgSize = DL.getTypeAllocSize(RealTy); + uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1); + if (ArgAlign < 8) + ArgAlign = 8; + VAArgOffset = alignTo(VAArgOffset, ArgAlign); + if (!IsFixed) { + Value *Base = getShadowPtrForVAArgument(RealTy, IRB, + VAArgOffset - VAArgBase); + IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), + ArgSize, kShadowTLSAlignment); + } + VAArgOffset += alignTo(ArgSize, 8); + } else { + Value *Base; + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); + uint64_t ArgAlign = 8; + if (A->getType()->isArrayTy()) { + // Arrays are aligned to element size, except for long double + // arrays, which are aligned to 8 bytes. + Type *ElementTy = A->getType()->getArrayElementType(); + if (!ElementTy->isPPC_FP128Ty()) + ArgAlign = DL.getTypeAllocSize(ElementTy); + } else if (A->getType()->isVectorTy()) { + // Vectors are naturally aligned. + ArgAlign = DL.getTypeAllocSize(A->getType()); + } + if (ArgAlign < 8) + ArgAlign = 8; + VAArgOffset = alignTo(VAArgOffset, ArgAlign); + if (DL.isBigEndian()) { + // Adjusting the shadow for argument with size < 8 to match the placement + // of bits in big endian system + if (ArgSize < 8) + VAArgOffset += (8 - ArgSize); + } + if (!IsFixed) { + Base = getShadowPtrForVAArgument(A->getType(), IRB, + VAArgOffset - VAArgBase); + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); + } + VAArgOffset += ArgSize; + VAArgOffset = alignTo(VAArgOffset, 8); + } + if (IsFixed) + VAArgBase = VAArgOffset; + } + + Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), + VAArgOffset - VAArgBase); + // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of + // a new class member i.e. it is the total size of all VarArgs. + IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); + } + + /// \brief Compute the shadow address for a given va_arg. + Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), + "_msarg"); + } + + void visitVAStartInst(VAStartInst &I) override { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */8, /* alignment */8, false); + } + + void visitVACopyInst(VACopyInst &I) override { + IRBuilder<> IRB(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants. + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */8, /* alignment */8, false); + } + + void finalizeInstrumentation() override { + assert(!VAArgSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS); + Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), + VAArgSize); + + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + } + + // Instrument va_start. + // Copy va_list shadow from the backup copy of the TLS contents. + for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { + CallInst *OrigInst = VAStartInstrumentationList[i]; + IRBuilder<> IRB(OrigInst->getNextNode()); + Value *VAListTag = OrigInst->getArgOperand(0); + Value *RegSaveAreaPtrPtr = + IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + Type::getInt64PtrTy(*MS.C)); + Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); + Value *RegSaveAreaShadowPtr = + MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8); + } + } +}; + /// \brief A no-op implementation of VarArgHelper. struct VarArgNoOpHelper : public VarArgHelper { VarArgNoOpHelper(Function &F, MemorySanitizer &MS, @@ -3400,6 +3558,9 @@ return new VarArgMIPS64Helper(Func, Msan, Visitor); else if (TargetTriple.getArch() == llvm::Triple::aarch64) return new VarArgAArch64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == llvm::Triple::ppc64 || + TargetTriple.getArch() == llvm::Triple::ppc64le) + return new VarArgPowerPC64Helper(Func, Msan, Visitor); else return new VarArgNoOpHelper(Func, Msan, Visitor); } Index: llvm/trunk/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll =================================================================== --- llvm/trunk/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll +++ llvm/trunk/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll @@ -0,0 +1,113 @@ +; RUN: opt < %s -msan -S | FileCheck %s + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64--linux" + +define i32 @foo(i32 %guard, ...) { + %vl = alloca i8*, align 8 + %1 = bitcast i8** %vl to i8* + call void @llvm.lifetime.start(i64 32, i8* %1) + call void @llvm.va_start(i8* %1) + call void @llvm.va_end(i8* %1) + call void @llvm.lifetime.end(i64 32, i8* %1) + ret i32 0 +} + +; First, check allocation of the save area. + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 0, [[A]] +; CHECK: [[C:%.*]] = alloca {{.*}} [[B]] + +; CHECK: [[STACK:%.*]] = bitcast {{.*}} @__msan_va_arg_tls to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[C]], i8* [[STACK]], i64 [[B]], i32 8, i1 false) + +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 +declare void @llvm.va_start(i8*) #2 +declare void @llvm.va_end(i8*) #2 +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. The first argument is stored at position 4, since it's right +; justified. +; CHECK-LABEL: @bar +; CHECK: store i32 0, i32* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 4) to i32*), align 8 +; CHECK: store i64 0, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to i64*), align 8 +; CHECK: store i64 0, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 16) to i64*), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check vector argument. +define i32 @bar2() { + %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> ) + ret i32 %1 +} + +; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls +; corresponds to offset 8+ of parameter save area - so the offset from +; __msan_va_arg_tls is actually misaligned. +; CHECK-LABEL: @bar2 +; CHECK: store <2 x i64> zeroinitializer, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to <2 x i64>*), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check QPX vector argument. +define i32 @bar3() "target-features"="+qpx" { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i32 2, <4 x double> ) + ret i32 %1 +} + +; That one is even stranger: the parameter save area starts at offset 48 from +; (32-byte aligned) stack pointer, the vector parameter is at 96 bytes from +; the stack pointer, so its offset from parameter save area is misaligned. +; CHECK-LABEL: @bar3 +; CHECK: store i32 0, i32* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 4) to i32*), align 8 +; CHECK: store i32 0, i32* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 12) to i32*), align 8 +; CHECK: store <4 x i64> zeroinitializer, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 40) to <4 x i64>*), align 8 +; CHECK: store {{.*}} 72, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check i64 array. +define i32 @bar4() { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) + ret i32 %1 +} + +; CHECK-LABEL: @bar4 +; CHECK: store [2 x i64] zeroinitializer, [2 x i64]* bitcast ([100 x i64]* @__msan_va_arg_tls to [2 x i64]*), align 8 +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check i128 array. +define i32 @bar5() { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) + ret i32 %1 +} + +; CHECK-LABEL: @bar5 +; CHECK: store [2 x i128] zeroinitializer, [2 x i128]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to [2 x i128]*), align 8 +; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check 8-aligned byval. +define i32 @bar6([2 x i64]* %arg) { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64]* byval align 8 %arg) + ret i32 %1 +} + +; CHECK-LABEL: @bar6 +; CHECK: [[SHADOW:%[0-9]+]] = bitcast [2 x i64]* bitcast ([100 x i64]* @__msan_va_arg_tls to [2 x i64]*) to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHADOW]], i8* {{.*}}, i64 16, i32 8, i1 false) +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check 16-aligned byval. +define i32 @bar7([4 x i64]* %arg) { + %1 = call i32 (i32, ...) @foo(i32 0, [4 x i64]* byval align 16 %arg) + ret i32 %1 +} + +; CHECK-LABEL: @bar7 +; CHECK: [[SHADOW:%[0-9]+]] = bitcast [4 x i64]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to [4 x i64]*) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHADOW]], i8* {{.*}}, i64 32, i32 8, i1 false) +; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls Index: llvm/trunk/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll =================================================================== --- llvm/trunk/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll +++ llvm/trunk/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll @@ -0,0 +1,97 @@ +; RUN: opt < %s -msan -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le--linux" + +define i32 @foo(i32 %guard, ...) { + %vl = alloca i8*, align 8 + %1 = bitcast i8** %vl to i8* + call void @llvm.lifetime.start(i64 32, i8* %1) + call void @llvm.va_start(i8* %1) + call void @llvm.va_end(i8* %1) + call void @llvm.lifetime.end(i64 32, i8* %1) + ret i32 0 +} + +; First, check allocation of the save area. + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 0, [[A]] +; CHECK: [[C:%.*]] = alloca {{.*}} [[B]] + +; CHECK: [[STACK:%.*]] = bitcast {{.*}} @__msan_va_arg_tls to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[C]], i8* [[STACK]], i64 [[B]], i32 8, i1 false) + +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 +declare void @llvm.va_start(i8*) #2 +declare void @llvm.va_end(i8*) #2 +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. +; CHECK-LABEL: @bar +; CHECK: store i32 0, i32* bitcast ([100 x i64]* @__msan_va_arg_tls to i32*), align 8 +; CHECK: store i64 0, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to i64*), align 8 +; CHECK: store i64 0, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 16) to i64*), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check vector argument. +define i32 @bar2() { + %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> ) + ret i32 %1 +} + +; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls +; corresponds to offset 8+ of parameter save area - so the offset from +; __msan_va_arg_tls is actually misaligned. +; CHECK-LABEL: @bar2 +; CHECK: store <2 x i64> zeroinitializer, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to <2 x i64>*), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check i64 array. +define i32 @bar4() { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) + ret i32 %1 +} + +; CHECK-LABEL: @bar4 +; CHECK: store [2 x i64] zeroinitializer, [2 x i64]* bitcast ([100 x i64]* @__msan_va_arg_tls to [2 x i64]*), align 8 +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check i128 array. +define i32 @bar5() { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) + ret i32 %1 +} + +; CHECK-LABEL: @bar5 +; CHECK: store [2 x i128] zeroinitializer, [2 x i128]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to [2 x i128]*), align 8 +; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check 8-aligned byval. +define i32 @bar6([2 x i64]* %arg) { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64]* byval align 8 %arg) + ret i32 %1 +} + +; CHECK-LABEL: @bar6 +; CHECK: [[SHADOW:%[0-9]+]] = bitcast [2 x i64]* bitcast ([100 x i64]* @__msan_va_arg_tls to [2 x i64]*) to i8* +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHADOW]], i8* {{.*}}, i64 16, i32 8, i1 false) +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check 16-aligned byval. +define i32 @bar7([4 x i64]* %arg) { + %1 = call i32 (i32, ...) @foo(i32 0, [4 x i64]* byval align 16 %arg) + ret i32 %1 +} + +; CHECK-LABEL: @bar7 +; CHECK: [[SHADOW:%[0-9]+]] = bitcast [4 x i64]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_va_arg_tls to i64), i64 8) to [4 x i64]*) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHADOW]], i8* {{.*}}, i64 32, i32 8, i1 false) +; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls