Index: lib/Transforms/Instrumentation/MemorySanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3249,8 +3249,10 @@ // An unfortunate workaround for asymmetric lowering of va_arg stuff. // See a comment in visitCallSite for more details. static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 - static const unsigned AMD64FpEndOffset = 176; + static const unsigned AMD64FpEndOffsetSSE = 176; + static const unsigned AMD64FpEndOffsetNoSSE = 48; + unsigned AMD64FpEndOffset; Function &F; MemorySanitizer &MS; MemorySanitizerVisitor &MSV; @@ -3262,7 +3264,18 @@ enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; VarArgAMD64Helper(Function &F, MemorySanitizer &MS, - MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV) { + AMD64FpEndOffset = AMD64FpEndOffsetSSE; + for (const auto &Attr : F.getAttributes().getFnAttributes()) { + if (Attr.isStringAttribute() && + (Attr.getKindAsString() == "target-features")) { + if (Attr.getValueAsString().contains("-sse")) + AMD64FpEndOffset = AMD64FpEndOffsetNoSSE; + break; + } + } + } ArgKind classifyArgument(Value* arg) { // A very rough approximation of X86_64 argument classification rules. Index: test/Instrumentation/MemorySanitizer/msan_basic.ll =================================================================== --- test/Instrumentation/MemorySanitizer/msan_basic.ll +++ test/Instrumentation/MemorySanitizer/msan_basic.ll @@ -915,6 +915,26 @@ ; CHECK: call void (i32, ...) @VAArgStructFn ; CHECK: ret void +; Same code compiled without SSE (see attributes below). +; The register save area is only 48 bytes instead of 176. +define void @VAArgStructNoSSE(%struct.StructByVal* nocapture %s) sanitize_memory #0 { +entry: + %agg.tmp2 = alloca %struct.StructByVal, align 8 + %0 = bitcast %struct.StructByVal* %s to i8* + %agg.tmp.sroa.0.0..sroa_cast = bitcast %struct.StructByVal* %s to i64* + %agg.tmp.sroa.0.0.copyload = load i64, i64* %agg.tmp.sroa.0.0..sroa_cast, align 4 + %agg.tmp.sroa.2.0..sroa_idx = getelementptr inbounds %struct.StructByVal, %struct.StructByVal* %s, i64 0, i32 2 + %agg.tmp.sroa.2.0..sroa_cast = bitcast i32* %agg.tmp.sroa.2.0..sroa_idx to i64* + %agg.tmp.sroa.2.0.copyload = load i64, i64* %agg.tmp.sroa.2.0..sroa_cast, align 4 + %1 = bitcast %struct.StructByVal* %agg.tmp2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 %0, i64 16, i1 false) + call void (i32, ...) @VAArgStructFn(i32 undef, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, %struct.StructByVal* byval align 8 %agg.tmp2) + ret void +} + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; CHECK: bitcast { i32, i32, i32, i32 }* {{.*}}@__msan_va_arg_tls {{.*}}, i64 48 declare i32 @InnerTailCall(i32 %a)