diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -3438,17 +3438,21 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { const llvm::DataLayout &TD = getDataLayout(); + unsigned SourceSize = + (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); if (!T0 || T0->isDoubleTy()) return llvm::Type::getDoubleTy(getVMContext()); // Get the adjacent FP type. - llvm::Type *T1 = - getFPTypeAtOffset(IRType, IROffset + TD.getTypeAllocSize(T0), TD); + llvm::Type *T1 = nullptr; + unsigned T0Size = TD.getTypeAllocSize(T0); + if (SourceSize > T0Size) + T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); if (T1 == nullptr) { // Check if IRType is a half + float. float type will be in IROffset+4 due // to its alignment. - if (T0->isHalfTy()) + if (T0->isHalfTy() && SourceSize > 4) T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); // If we can't get a second FP type, return a simple half or float. // avx512fp16-abi.c:pr51813_2 shows it works to return float for @@ -3461,7 +3465,9 @@ return llvm::FixedVectorType::get(T0, 2); if (T0->isHalfTy() && T1->isHalfTy()) { - llvm::Type *T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); + llvm::Type *T2 = nullptr; + if (SourceSize > 4) + T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); if (T2 == nullptr) return llvm::FixedVectorType::get(T0, 2); return llvm::FixedVectorType::get(T0, 4); diff --git a/clang/test/CodeGen/X86/va-arg-sse.c b/clang/test/CodeGen/X86/va-arg-sse.c --- a/clang/test/CodeGen/X86/va-arg-sse.c +++ b/clang/test/CodeGen/X86/va-arg-sse.c @@ -34,16 +34,16 @@ // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 16 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[TMP]] to { <2 x float>, <2 x float> }* +// CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[TMP]] to { <2 x float>, float }* // CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP1]] to <2 x float>* // CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* [[TMP3]], i32 0, i32 0 // CHECK-NEXT: store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to <2 x float>* -// CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[TMP7]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* [[TMP3]], i32 0, i32 1 -// CHECK-NEXT: store <2 x float> [[TMP8]], <2 x float>* [[TMP9]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = bitcast { <2 x float>, <2 x float> }* [[TMP3]] to %struct.S* +// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to float* +// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[TMP7]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: store float [[TMP8]], float* [[TMP9]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast { <2 x float>, float }* [[TMP3]] to %struct.S* // CHECK-NEXT: [[TMP11:%.*]] = add i32 [[FP_OFFSET]], 32 // CHECK-NEXT: store i32 [[TMP11]], i32* [[FP_OFFSET_P]], align 4 // CHECK-NEXT: br label [[VAARG_END:%.*]]