diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1963,6 +1963,8 @@ if (LV.isVectorElt()) { llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(), LV.isVolatileQualified()); + applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr, + LV.getType(), Load); return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(), "vecext")); } @@ -1986,6 +1988,8 @@ } llvm::LoadInst *Load = Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified()); + applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr, + LV.getType(), Load); return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext")); } @@ -2033,6 +2037,8 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { llvm::Value *Vec = Builder.CreateLoad(LV.getExtVectorAddress(), LV.isVolatileQualified()); + applyNoundefToLoadInst(CGM.getCodeGenOpts().EnableNoundefLoadAttr, + LV.getType(), dyn_cast(Vec)); const llvm::Constant *Elts = LV.getExtVectorElts(); diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c --- a/clang/test/CodeGen/matrix-type-operators.c +++ b/clang/test/CodeGen/matrix-type-operators.c @@ -1217,7 +1217,7 @@ void insert_compound_stmt(dx5x5_t a) { // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a) - // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8{{$}} + // CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* [[A_PTR:%.*]], align 8, !noundef [[NOUNDEF:![0-9]+]]{{$}} // CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17 // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00 // CHECK-NEXT: [[A2:%.*]] = load <25 x double>, <25 x double>* [[A_PTR]], align 8{{$}} @@ -1245,7 +1245,7 @@ // CHECK-NEXT: [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>* // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) - // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4{{$}} + // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4, !noundef [[NOUNDEF]]{{$}} // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]] // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}} // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 diff --git a/clang/test/CodeGen/vector-noundef.c b/clang/test/CodeGen/vector-noundef.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/vector-noundef.c @@ -0,0 +1,107 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding -triple x86_64-gnu-linux -target-feature +avx512f -O0 -enable-noundef-load-analysis -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding -triple x86_64-gnu-linux -target-feature +avx512f -O0 -no-enable-noundef-load-analysis -emit-llvm -o - %s | FileCheck %s --check-prefix=DISABLE + +#include + +// CHECK-LABEL: @test_mm_mask_div_ss( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[__W_ADDR_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[__U_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[__A_ADDR_I2:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[__B_ADDR_I2:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[__U_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[__A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[__B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[__W:%.*]], <4 x float>* [[__W_ADDR]], align 16 +// CHECK-NEXT: store i8 [[__U:%.*]], i8* [[__U_ADDR]], align 1 +// CHECK-NEXT: store <4 x float> [[__A:%.*]], <4 x float>* [[__A_ADDR]], align 16 +// CHECK-NEXT: store <4 x float> [[__B:%.*]], <4 x float>* [[__B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR]], align 16, !noundef [[NOUNDEF2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[__U_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: store <4 x float> [[TMP0]], <4 x float>* [[__W_ADDR_I]], align 16 +// CHECK-NEXT: store i8 [[TMP1]], i8* [[__U_ADDR_I]], align 1 +// CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[__A_ADDR_I2]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[__B_ADDR_I2]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I2]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP5]], <4 x float>* [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[TMP7]], i32 0 +// CHECK-NEXT: [[DIV_I:%.*]] = fdiv float [[VECEXT1_I]], [[VECEXT_I]] +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP8]], float [[DIV_I]], i32 0 +// CHECK-NEXT: store <4 x float> [[VECINS_I]], <4 x float>* [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[__A_ADDR_I2]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[__U_ADDR_I]], align 1 +// CHECK-NEXT: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: [[TMP12:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR_I]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i64 0 +// CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP12]], i64 0 +// CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[TMP10]] to <8 x i1> +// CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0 +// CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP14]] +// CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP17]], i64 0 +// CHECK-NEXT: ret <4 x float> [[TMP18]] +// +// DISABLE-LABEL: @test_mm_mask_div_ss( +// DISABLE-NEXT: entry: +// DISABLE-NEXT: [[__A_ADDR_I:%.*]] = alloca <4 x float>, align 16 +// DISABLE-NEXT: [[__B_ADDR_I:%.*]] = alloca <4 x float>, align 16 +// DISABLE-NEXT: [[__W_ADDR_I:%.*]] = alloca <4 x float>, align 16 +// DISABLE-NEXT: [[__U_ADDR_I:%.*]] = alloca i8, align 1 +// DISABLE-NEXT: [[__A_ADDR_I2:%.*]] = alloca <4 x float>, align 16 +// DISABLE-NEXT: [[__B_ADDR_I2:%.*]] = alloca <4 x float>, align 16 +// DISABLE-NEXT: [[__W_ADDR:%.*]] = alloca <4 x float>, align 16 +// DISABLE-NEXT: [[__U_ADDR:%.*]] = alloca i8, align 1 +// DISABLE-NEXT: [[__A_ADDR:%.*]] = alloca <4 x float>, align 16 +// DISABLE-NEXT: [[__B_ADDR:%.*]] = alloca <4 x float>, align 16 +// DISABLE-NEXT: store <4 x float> [[__W:%.*]], <4 x float>* [[__W_ADDR]], align 16 +// DISABLE-NEXT: store i8 [[__U:%.*]], i8* [[__U_ADDR]], align 1 +// DISABLE-NEXT: store <4 x float> [[__A:%.*]], <4 x float>* [[__A_ADDR]], align 16 +// DISABLE-NEXT: store <4 x float> [[__B:%.*]], <4 x float>* [[__B_ADDR]], align 16 +// DISABLE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR]], align 16 +// DISABLE-NEXT: [[TMP1:%.*]] = load i8, i8* [[__U_ADDR]], align 1 +// DISABLE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR]], align 16 +// DISABLE-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR]], align 16 +// DISABLE-NEXT: store <4 x float> [[TMP0]], <4 x float>* [[__W_ADDR_I]], align 16 +// DISABLE-NEXT: store i8 [[TMP1]], i8* [[__U_ADDR_I]], align 1 +// DISABLE-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[__A_ADDR_I2]], align 16 +// DISABLE-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[__B_ADDR_I2]], align 16 +// DISABLE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16 +// DISABLE-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I2]], align 16 +// DISABLE-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[__A_ADDR_I]], align 16 +// DISABLE-NEXT: store <4 x float> [[TMP5]], <4 x float>* [[__B_ADDR_I]], align 16 +// DISABLE-NEXT: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I]], align 16 +// DISABLE-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 +// DISABLE-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16 +// DISABLE-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[TMP7]], i32 0 +// DISABLE-NEXT: [[DIV_I:%.*]] = fdiv float [[VECEXT1_I]], [[VECEXT_I]] +// DISABLE-NEXT: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16 +// DISABLE-NEXT: [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP8]], float [[DIV_I]], i32 0 +// DISABLE-NEXT: store <4 x float> [[VECINS_I]], <4 x float>* [[__A_ADDR_I]], align 16 +// DISABLE-NEXT: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I]], align 16 +// DISABLE-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[__A_ADDR_I2]], align 16 +// DISABLE-NEXT: [[TMP10:%.*]] = load i8, i8* [[__U_ADDR_I]], align 1 +// DISABLE-NEXT: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I2]], align 16 +// DISABLE-NEXT: [[TMP12:%.*]] = load <4 x float>, <4 x float>* [[__W_ADDR_I]], align 16 +// DISABLE-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i64 0 +// DISABLE-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP12]], i64 0 +// DISABLE-NEXT: [[TMP15:%.*]] = bitcast i8 [[TMP10]] to <8 x i1> +// DISABLE-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0 +// DISABLE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP14]] +// DISABLE-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP17]], i64 0 +// DISABLE-NEXT: ret <4 x float> [[TMP18]] +// +__m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return _mm_mask_div_ss(__W,__U,__A,__B); +} diff --git a/clang/test/CodeGenCXX/vector-noundef.cpp b/clang/test/CodeGenCXX/vector-noundef.cpp --- a/clang/test/CodeGenCXX/vector-noundef.cpp +++ b/clang/test/CodeGenCXX/vector-noundef.cpp @@ -6,6 +6,9 @@ using VecOfThreeChars __attribute__((ext_vector_type(3))) = char; using VecOfThreeUChars __attribute__((ext_vector_type(3))) = unsigned char; +using VecOfFourFloats __attribute__((ext_vector_type(4))) = float; +using VecOfTwoFloats __attribute__((ext_vector_type(2))) = float; + // CHECK-LABEL: @_Z15getElement4BoolRDv4_b( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 @@ -82,3 +85,33 @@ { return a[0]; } + +// CHECK-LABEL: @_Z16vectorSubsectionRDv2_fRDv4_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[VEC4_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[VEC2:%.*]], ptr [[VEC2_ADDR]], align 8 +// CHECK-NEXT: store ptr [[VEC4:%.*]], ptr [[VEC4_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC4_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[TMP0]], align 16, !noundef [[NOUNDEF2]] +// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VEC2_ADDR]], align 8 +// CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[TMP3]], align 8 +// CHECK-NEXT: ret void +// +// DISABLE-LABEL: @_Z16vectorSubsectionRDv2_fRDv4_f( +// DISABLE-NEXT: entry: +// DISABLE-NEXT: [[VEC2_ADDR:%.*]] = alloca ptr, align 8 +// DISABLE-NEXT: [[VEC4_ADDR:%.*]] = alloca ptr, align 8 +// DISABLE-NEXT: store ptr [[VEC2:%.*]], ptr [[VEC2_ADDR]], align 8 +// DISABLE-NEXT: store ptr [[VEC4:%.*]], ptr [[VEC4_ADDR]], align 8 +// DISABLE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC4_ADDR]], align 8 +// DISABLE-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[TMP0]], align 16 +// DISABLE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> +// DISABLE-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VEC2_ADDR]], align 8 +// DISABLE-NEXT: store <2 x float> [[TMP2]], ptr [[TMP3]], align 8 +// DISABLE-NEXT: ret void +// +void vectorSubsection(VecOfTwoFloats& vec2, VecOfFourFloats& vec4) { + vec2 = vec4.xy; +}