diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1963,6 +1963,8 @@
   if (LV.isVectorElt()) {
     llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(),
                                               LV.isVolatileQualified());
+    applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr,
+                           LV.getType(), Load);
     return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(),
                                                     "vecext"));
   }
@@ -1986,6 +1988,8 @@
     }
     llvm::LoadInst *Load =
         Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
+    applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr,
+                           LV.getType(), Load);
     return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext"));
   }
 
@@ -2033,6 +2037,8 @@
 RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
   llvm::Value *Vec = Builder.CreateLoad(LV.getExtVectorAddress(),
                                         LV.isVolatileQualified());
+  applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr,
+                         LV.getType(), dyn_cast<llvm::LoadInst>(Vec));
 
   const llvm::Constant *Elts = LV.getExtVectorElts();
 
diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c
--- a/clang/test/CodeGen/matrix-type-operators.c
+++ b/clang/test/CodeGen/matrix-type-operators.c
@@ -1217,7 +1217,7 @@
 
 void insert_compound_stmt(dx5x5_t a) {
   // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a)
-  // CHECK:        [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8{{$}}
+  // CHECK:        [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8, !noundef [[NOUNDEF:![0-9]+]]{{$}}
   // CHECK-NEXT:   [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17
   // CHECK-NEXT:   [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00
   // CHECK-NEXT:   [[A2:%.*]] = load <25 x double>, <25 x double>* [[A_PTR]], align 8{{$}}
@@ -1245,7 +1245,7 @@
   // CHECK-NEXT:    [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>*
   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
   // OPT-NEXT:      call void @llvm.assume(i1 [[CMP]])
-  // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4{{$}}
+  // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4, !noundef [[NOUNDEF]]{{$}}
   // CHECK-NEXT:    [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
   // CHECK-NEXT:    [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
   // OPT-NEXT:      [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
diff --git a/clang/test/CodeGen/vector-noundef.c b/clang/test/CodeGen/vector-noundef.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/vector-noundef.c
@@ -0,0 +1,107 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding -triple x86_64-gnu-linux -target-feature +avx512f -O0 -enable-noundef-load-analysis -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding -triple x86_64-gnu-linux -target-feature +avx512f -O0 -no-enable-noundef-load-analysis -emit-llvm -o - %s | FileCheck %s --check-prefix=DISABLE
+
+#include <immintrin.h>
+
+// CHECK-LABEL: @test_mm_mask_div_ss(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[__W_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[__U_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[__A_ADDR_I2:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I2:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[__W_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[__U_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[__A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[__B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store <4 x float> [[__W:%.*]], <4 x float>* [[__W_ADDR]], align 16
+// CHECK-NEXT:    store i8 [[__U:%.*]], i8* [[__U_ADDR]], align 1
+// CHECK-NEXT:    store <4 x float> [[__A:%.*]], <4 x float>* [[__A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x float> [[__B:%.*]], <4 x float>* [[__B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR]], align 16, !noundef [[NOUNDEF2:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[__U_ADDR]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    store <4 x float> [[TMP0]], <4 x float>* [[__W_ADDR_I]], align 16
+// CHECK-NEXT:    store i8 [[TMP1]], i8* [[__U_ADDR_I]], align 1
+// CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[__A_ADDR_I2]], align 16
+// CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[__B_ADDR_I2]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I2]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
+// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x float> [[TMP7]], i32 0
+// CHECK-NEXT:    [[DIV_I:%.*]] = fdiv float [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP8]], float [[DIV_I]], i32 0
+// CHECK-NEXT:    store <4 x float> [[VECINS_I]], <4 x float>* [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    store <4 x float> [[TMP9]], <4 x float>* [[__A_ADDR_I2]], align 16
+// CHECK-NEXT:    [[TMP10:%.*]] = load i8, i8* [[__U_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    [[TMP12:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR_I]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i64 0
+// CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[TMP12]], i64 0
+// CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8 [[TMP10]] to <8 x i1>
+// CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
+// CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP14]]
+// CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP17]], i64 0
+// CHECK-NEXT:    ret <4 x float> [[TMP18]]
+//
+// DISABLE-LABEL: @test_mm_mask_div_ss(
+// DISABLE-NEXT:  entry:
+// DISABLE-NEXT:    [[__A_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT:    [[__B_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT:    [[__W_ADDR_I:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT:    [[__U_ADDR_I:%.*]] = alloca i8, align 1
+// DISABLE-NEXT:    [[__A_ADDR_I2:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT:    [[__B_ADDR_I2:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT:    [[__W_ADDR:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT:    [[__U_ADDR:%.*]] = alloca i8, align 1
+// DISABLE-NEXT:    [[__A_ADDR:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT:    [[__B_ADDR:%.*]] = alloca <4 x float>, align 16
+// DISABLE-NEXT:    store <4 x float> [[__W:%.*]], <4 x float>* [[__W_ADDR]], align 16
+// DISABLE-NEXT:    store i8 [[__U:%.*]], i8* [[__U_ADDR]], align 1
+// DISABLE-NEXT:    store <4 x float> [[__A:%.*]], <4 x float>* [[__A_ADDR]], align 16
+// DISABLE-NEXT:    store <4 x float> [[__B:%.*]], <4 x float>* [[__B_ADDR]], align 16
+// DISABLE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR]], align 16
+// DISABLE-NEXT:    [[TMP1:%.*]] = load i8, i8* [[__U_ADDR]], align 1
+// DISABLE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR]], align 16
+// DISABLE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR]], align 16
+// DISABLE-NEXT:    store <4 x float> [[TMP0]], <4 x float>* [[__W_ADDR_I]], align 16
+// DISABLE-NEXT:    store i8 [[TMP1]], i8* [[__U_ADDR_I]], align 1
+// DISABLE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[__A_ADDR_I2]], align 16
+// DISABLE-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[__B_ADDR_I2]], align 16
+// DISABLE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16
+// DISABLE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I2]], align 16
+// DISABLE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[__B_ADDR_I]], align 16
+// DISABLE-NEXT:    [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I]], align 16
+// DISABLE-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
+// DISABLE-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x float> [[TMP7]], i32 0
+// DISABLE-NEXT:    [[DIV_I:%.*]] = fdiv float [[VECEXT1_I]], [[VECEXT_I]]
+// DISABLE-NEXT:    [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP8]], float [[DIV_I]], i32 0
+// DISABLE-NEXT:    store <4 x float> [[VECINS_I]], <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT:    [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16
+// DISABLE-NEXT:    store <4 x float> [[TMP9]], <4 x float>* [[__A_ADDR_I2]], align 16
+// DISABLE-NEXT:    [[TMP10:%.*]] = load i8, i8* [[__U_ADDR_I]], align 1
+// DISABLE-NEXT:    [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16
+// DISABLE-NEXT:    [[TMP12:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR_I]], align 16
+// DISABLE-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i64 0
+// DISABLE-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[TMP12]], i64 0
+// DISABLE-NEXT:    [[TMP15:%.*]] = bitcast i8 [[TMP10]] to <8 x i1>
+// DISABLE-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
+// DISABLE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP14]]
+// DISABLE-NEXT:    [[TMP18:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP17]], i64 0
+// DISABLE-NEXT:    ret <4 x float> [[TMP18]]
+//
+__m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return _mm_mask_div_ss(__W,__U,__A,__B);
+}
diff --git a/clang/test/CodeGenCXX/vector-noundef.cpp b/clang/test/CodeGenCXX/vector-noundef.cpp
--- a/clang/test/CodeGenCXX/vector-noundef.cpp
+++ b/clang/test/CodeGenCXX/vector-noundef.cpp
@@ -6,6 +6,9 @@
 using VecOfThreeChars __attribute__((ext_vector_type(3))) = char;
 using VecOfThreeUChars __attribute__((ext_vector_type(3))) = unsigned char;
 
+using VecOfFourFloats __attribute__((ext_vector_type(4))) = float;
+using VecOfTwoFloats __attribute__((ext_vector_type(2))) = float;
+
 // CHECK-LABEL: @_Z15getElement4BoolRDv4_b(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
@@ -82,3 +85,33 @@
 {
   return a[0];
 }
+
+// CHECK-LABEL: @_Z16vectorSubsectionRDv2_fRDv4_f(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VEC2_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[VEC4_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[VEC2:%.*]], ptr [[VEC2_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[VEC4:%.*]], ptr [[VEC4_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VEC4_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[TMP0]], align 16, !noundef [[NOUNDEF2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[VEC2_ADDR]], align 8
+// CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    ret void
+//
+// DISABLE-LABEL: @_Z16vectorSubsectionRDv2_fRDv4_f(
+// DISABLE-NEXT:  entry:
+// DISABLE-NEXT:    [[VEC2_ADDR:%.*]] = alloca ptr, align 8
+// DISABLE-NEXT:    [[VEC4_ADDR:%.*]] = alloca ptr, align 8
+// DISABLE-NEXT:    store ptr [[VEC2:%.*]], ptr [[VEC2_ADDR]], align 8
+// DISABLE-NEXT:    store ptr [[VEC4:%.*]], ptr [[VEC4_ADDR]], align 8
+// DISABLE-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VEC4_ADDR]], align 8
+// DISABLE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
+// DISABLE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+// DISABLE-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[VEC2_ADDR]], align 8
+// DISABLE-NEXT:    store <2 x float> [[TMP2]], ptr [[TMP3]], align 8
+// DISABLE-NEXT:    ret void
+//
+void vectorSubsection(VecOfTwoFloats& vec2, VecOfFourFloats& vec4) {
+    vec2 = vec4.xy;
+}