diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -3438,17 +3438,21 @@
 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
                    QualType SourceTy, unsigned SourceOffset) const {
   const llvm::DataLayout &TD = getDataLayout();
+  unsigned SourceSize =
+      (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
   llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
   if (!T0 || T0->isDoubleTy())
     return llvm::Type::getDoubleTy(getVMContext());
 
   // Get the adjacent FP type.
-  llvm::Type *T1 =
-      getFPTypeAtOffset(IRType, IROffset + TD.getTypeAllocSize(T0), TD);
+  llvm::Type *T1 = nullptr;
+  unsigned T0Size = TD.getTypeAllocSize(T0);
+  if (SourceSize > T0Size)
+      T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
   if (T1 == nullptr) {
     // Check if IRType is a half + float. float type will be in IROffset+4 due
     // to its alignment.
-    if (T0->isHalfTy())
+    if (T0->isHalfTy() && SourceSize > 4)
       T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
     // If we can't get a second FP type, return a simple half or float.
     // avx512fp16-abi.c:pr51813_2 shows it works to return float for
@@ -3461,7 +3465,9 @@
     return llvm::FixedVectorType::get(T0, 2);
 
   if (T0->isHalfTy() && T1->isHalfTy()) {
-    llvm::Type *T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
+    llvm::Type *T2 = nullptr;
+    if (SourceSize > 4)
+      T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
     if (T2 == nullptr)
       return llvm::FixedVectorType::get(T0, 2);
     return llvm::FixedVectorType::get(T0, 4);
diff --git a/clang/test/CodeGen/X86/va-arg-sse.c b/clang/test/CodeGen/X86/va-arg-sse.c
--- a/clang/test/CodeGen/X86/va-arg-sse.c
+++ b/clang/test/CodeGen/X86/va-arg-sse.c
@@ -34,16 +34,16 @@
 // CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[TMP0]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 [[FP_OFFSET]]
 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 16
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast %struct.S* [[TMP]] to { <2 x float>, <2 x float> }*
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast %struct.S* [[TMP]] to { <2 x float>, float }*
 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
 // CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 16
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* [[TMP3]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* [[TMP3]], i32 0, i32 0
 // CHECK-NEXT:    store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4
-// CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP2]] to <2 x float>*
-// CHECK-NEXT:    [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[TMP7]], align 16
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* [[TMP3]], i32 0, i32 1
-// CHECK-NEXT:    store <2 x float> [[TMP8]], <2 x float>* [[TMP9]], align 4
-// CHECK-NEXT:    [[TMP10:%.*]] = bitcast { <2 x float>, <2 x float> }* [[TMP3]] to %struct.S*
+// CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP2]] to float*
+// CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[TMP7]], align 16
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds { <2 x float>, float }, { <2 x float>, float }* [[TMP3]], i32 0, i32 1
+// CHECK-NEXT:    store float [[TMP8]], float* [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = bitcast { <2 x float>, float }* [[TMP3]] to %struct.S*
 // CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[FP_OFFSET]], 32
 // CHECK-NEXT:    store i32 [[TMP11]], i32* [[FP_OFFSET_P]], align 4
 // CHECK-NEXT:    br label [[VAARG_END:%.*]]