diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -3438,17 +3438,21 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { const llvm::DataLayout &TD = getDataLayout(); + unsigned SourceSize = + (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); if (!T0 || T0->isDoubleTy()) return llvm::Type::getDoubleTy(getVMContext()); // Get the adjacent FP type. - llvm::Type *T1 = - getFPTypeAtOffset(IRType, IROffset + TD.getTypeAllocSize(T0), TD); + llvm::Type *T1 = nullptr; + unsigned T0Size = TD.getTypeAllocSize(T0); + if (SourceSize > T0Size) + T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); if (T1 == nullptr) { // Check if IRType is a half + float. float type will be in IROffset+4 due // to its alignment. - if (T0->isHalfTy()) + if (T0->isHalfTy() && SourceSize > 4) T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); // If we can't get a second FP type, return a simple half or float. // avx512fp16-abi.c:pr51813_2 shows it works to return float for @@ -3461,7 +3465,9 @@ return llvm::FixedVectorType::get(T0, 2); if (T0->isHalfTy() && T1->isHalfTy()) { - llvm::Type *T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); + llvm::Type *T2 = nullptr; + if (SourceSize > 4) + T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); if (T2 == nullptr) return llvm::FixedVectorType::get(T0, 2); return llvm::FixedVectorType::get(T0, 4); diff --git a/clang/test/CodeGen/X86/va-arg-sse.c b/clang/test/CodeGen/X86/va-arg-sse.c --- a/clang/test/CodeGen/X86/va-arg-sse.c +++ b/clang/test/CodeGen/X86/va-arg-sse.c @@ -17,23 +17,30 @@ // CHECK-NEXT: [[FITS_IN_FP:%.*]] = icmp ult i32 [[FP_OFFSET]], 145 // CHECK-NEXT: br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] // CHECK: vaarg.in_reg: -// CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[FP_OFFSET]], 32 -// CHECK-NEXT: store i32 [[TMP1]], i32* [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* [[AP]], i64 0, i64 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[TMP1]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[FP_OFFSET]] to i64 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i64 [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 16 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float* +// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 16 +// CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i32 [[FP_OFFSET]], 32 +// CHECK-NEXT: store i32 [[TMP7]], i32* [[FP_OFFSET_P]], align 4 // CHECK-NEXT: br label [[VAARG_END:%.*]] // CHECK: vaarg.in_mem: // CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* [[AP]], i64 0, i64 0, i32 2 // CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load i8*, i8** [[OVERFLOW_ARG_AREA_P]], align 8 // CHECK-NEXT: [[DOTSROA_GEP:%.*]] = getelementptr inbounds i8, i8* [[OVERFLOW_ARG_AREA]], i64 8 -// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DOTSROA_GEP]] to float* +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[DOTSROA_GEP]] to float* // CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, i8* [[OVERFLOW_ARG_AREA]], i64 16 // CHECK-NEXT: store i8* [[OVERFLOW_ARG_AREA_NEXT]], i8** [[OVERFLOW_ARG_AREA_P]], align 8 -// CHECK-NEXT: [[VAARG_ADDR_SROA_PHI_SROA_SPECULATE_LOAD_VAARG_IN_MEM:%.*]] = load float, float* [[TMP2]], align 4, !tbaa.struct !2 +// CHECK-NEXT: [[VAARG_ADDR_SROA_PHI_SROA_SPECULATE_LOAD_VAARG_IN_MEM:%.*]] = load float, float* [[TMP8]], align 4, !tbaa.struct !2 // CHECK-NEXT: br label [[VAARG_END]] // CHECK: vaarg.end: -// CHECK-NEXT: [[VAARG_ADDR_SROA_PHI_SROA_SPECULATED:%.*]] = phi float [ undef, [[VAARG_IN_REG]] ], [ [[VAARG_ADDR_SROA_PHI_SROA_SPECULATE_LOAD_VAARG_IN_MEM]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: [[VAARG_ADDR_SROA_PHI_SROA_SPECULATED:%.*]] = phi float [ [[TMP6]], [[VAARG_IN_REG]] ], [ [[VAARG_ADDR_SROA_PHI_SROA_SPECULATE_LOAD_VAARG_IN_MEM]], [[VAARG_IN_MEM]] ] // CHECK-NEXT: call void @llvm.va_end(i8* nonnull [[TMP0]]) -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([5 x %struct.S], [5 x %struct.S]* @a, i64 0, i64 2, i32 0, i64 2), align 16, !tbaa [[TBAA6:![0-9]+]] -// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[TMP3]], [[VAARG_ADDR_SROA_PHI_SROA_SPECULATED]] +// CHECK-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([5 x %struct.S], [5 x %struct.S]* @a, i64 0, i64 2, i32 0, i64 2), align 16, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[TMP9]], [[VAARG_ADDR_SROA_PHI_SROA_SPECULATED]] // CHECK-NEXT: [[RETVAL_0:%.*]] = zext i1 [[CMP]] to i32 // CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[TMP0]]) #[[ATTR3]] // CHECK-NEXT: ret i32 [[RETVAL_0]]