Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -3919,10 +3919,44 @@ // Propagate the alignment from the array itself to the result. QualType arrayType = Array->getType(); - Addr = emitArraySubscriptGEP( - *this, ArrayLV.getAddress(*this), {CGM.getSize(CharUnits::Zero()), Idx}, - E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, - E->getExprLoc(), &arrayType, E->getBase()); + + Address ArrayLVAddr = ArrayLV.getAddress(*this); + + if (!getLangOpts().isSignedOverflowDefined() && + // ISO/IEC 9899:TC3, 6.5.6.8 + (getLangOpts().C99 || getLangOpts().CPlusPlus) && + getContext().getAsConstantArrayType(arrayType)) { + auto *CAT = getContext().getAsConstantArrayType(arrayType); + uint64_t BoundedRegionSize = CAT->getSize().getZExtValue() * + getContext().getTypeSize(CAT->getElementType()) / 8; + + Address BeginOff = emitArraySubscriptGEP( + *this, ArrayLVAddr, + {CGM.getSize(CharUnits::Zero()), CGM.getSize(CharUnits::Zero())}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc(), &arrayType, E->getBase()); + + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::memory_region_decl, + BeginOff.getPointer()->getType()); + llvm::Value *Call = Builder.CreateCall(F, + {BeginOff.getPointer(), + llvm::ConstantInt::get(Int64Ty, 0), + llvm::ConstantInt::get(Int64Ty, BoundedRegionSize)}, + "arrayidx.bounded"); + Address RetAddr(Call, BeginOff.getElementType(), + ArrayLVAddr.getAlignment()); + + Addr = emitArraySubscriptGEP( + *this, RetAddr, {Idx}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc(), &arrayType, E->getBase()); + } else { + Addr = emitArraySubscriptGEP( + *this, ArrayLVAddr, {CGM.getSize(CharUnits::Zero()), Idx}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc(), &arrayType, E->getBase()); + } + EltBaseInfo = ArrayLV.getBaseInfo(); EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType()); } else { Index: clang/test/CodeGen/2005-01-02-ConstantInits.c =================================================================== --- clang/test/CodeGen/2005-01-02-ConstantInits.c +++ clang/test/CodeGen/2005-01-02-ConstantInits.c @@ -7,13 +7,6 @@ struct X { int a[2]; }; extern int bar(); -//. -// CHECK: @test.i23 = internal global i32 4, align 4 -// CHECK: @i = global i32 4, align 4 -// CHECK: @Arr = global [100 x i32] zeroinitializer, align 16 -// CHECK: @foo2.X = internal global ptr getelementptr (i8, ptr @Arr, i64 196), align 8 -// CHECK: @foo2.i23 = internal global i32 0, align 4 -//. // CHECK-LABEL: define {{[^@]+}}@test // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: @@ -34,12 +27,15 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @bar(ptr noundef getelementptr inbounds ([100 x i32], ptr @Arr, i64 0, i64 49)) +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr @Arr, i64 0, i64 400) +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX_BOUNDED]], i64 49 +// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @bar(ptr noundef [[ARRAYIDX]]) // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr @Arr, i64 0, i64 [[IDXPROM]] -// CHECK-NEXT: [[CALL1:%.*]] = call i32 (ptr, ...) @bar(ptr noundef [[ARRAYIDX]]) -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: [[ARRAYIDX_BOUNDED1:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr @Arr, i64 0, i64 400) +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX_BOUNDED1]], i64 [[IDXPROM]] +// CHECK-NEXT: [[CALL3:%.*]] = call i32 (ptr, ...) @bar(ptr noundef [[ARRAYIDX2]]) +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL3]] // CHECK-NEXT: ret i32 [[ADD]] // int foo(int i) { return bar(&Arr[49])+bar(&Arr[i]); } Index: clang/test/CodeGen/X86/va-arg-sse.c =================================================================== --- clang/test/CodeGen/X86/va-arg-sse.c +++ clang/test/CodeGen/X86/va-arg-sse.c @@ -22,28 +22,30 @@ // CHECK-NEXT: store i32 0, ptr [[K]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 // CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]]) -// CHECK-NEXT: store ptr getelementptr inbounds ([5 x %struct.S], ptr @a, i64 0, i64 2), ptr [[P]], align 8 -// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 -// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY2]], i32 0, i32 1 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr @a, i64 0, i64 60) +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYIDX_BOUNDED]], i64 2 +// CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[P]], align 8 +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 +// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 // CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 // CHECK-NEXT: [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 144 // CHECK-NEXT: br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] // CHECK: vaarg.in_reg: -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY2]], i32 0, i32 3 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -// CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, ptr [[TMP1]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds { <2 x float>, float }, ptr [[TMP]], i32 0, i32 0 -// CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP6]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP2]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds { <2 x float>, float }, ptr [[TMP]], i32 0, i32 1 -// CHECK-NEXT: store float [[TMP8]], ptr [[TMP9]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = add i32 [[FP_OFFSET]], 32 -// CHECK-NEXT: store i32 [[TMP11]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[TMP1]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds { <2 x float>, float }, ptr [[TMP]], i32 0, i32 0 +// CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds { <2 x float>, float }, ptr [[TMP]], i32 0, i32 1 +// CHECK-NEXT: store float [[TMP5]], ptr [[TMP6]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = add i32 [[FP_OFFSET]], 32 +// CHECK-NEXT: store i32 [[TMP7]], ptr [[FP_OFFSET_P]], align 4 // CHECK-NEXT: br label [[VAARG_END:%.*]] // CHECK: vaarg.in_mem: -// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY2]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 // CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 // CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 // CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 @@ -51,20 +53,24 @@ // CHECK: vaarg.end: // CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARG]], ptr align 4 [[VAARG_ADDR]], i64 12, i1 false) -// CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 -// CHECK-NEXT: call void @llvm.va_end(ptr [[ARRAYDECAY3]]) -// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[P]], align 8 -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[TMP15]], null +// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[AP]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_end(ptr [[ARRAYDECAY2]]) +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[P]], align 8 +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[TMP8]], null // CHECK-NEXT: br i1 [[TOBOOL]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]] // CHECK: land.lhs.true: -// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[P]], align 8 -// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP16]], i32 0, i32 0 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x float], ptr [[A]], i64 0, i64 2 -// CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARG]], i32 0, i32 0 -// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [3 x float], ptr [[A5]], i64 0, i64 2 -// CHECK-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[TMP17]], [[TMP18]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[P]], align 8 +// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x float], ptr [[A]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED4:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX3]], i64 0, i64 12) +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[ARRAYIDX_BOUNDED4]], i64 2 +// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARG]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x float], ptr [[A6]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED8:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX7]], i64 0, i64 12) +// CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[ARRAYIDX_BOUNDED8]], i64 2 +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX9]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[TMP10]], [[TMP11]] // CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]] // CHECK: if.then: // CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 @@ -73,8 +79,8 @@ // CHECK-NEXT: store i32 1, ptr [[RETVAL]], align 4 // CHECK-NEXT: br label [[RETURN]] // CHECK: return: -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK-NEXT: ret i32 [[TMP19]] +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP12]] // int check (int z, ...) { Index: clang/test/CodeGen/builtin-align-array.c =================================================================== --- clang/test/CodeGen/builtin-align-array.c +++ clang/test/CodeGen/builtin-align-array.c @@ -7,23 +7,29 @@ // CHECK-LABEL: @test_array( // CHECK-NEXT: entry: // CHECK-NEXT: [[BUF:%.*]] = alloca [1024 x i8], align 16 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 44 -// CHECK-NEXT: [[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED]], i64 44 +// CHECK-NEXT: [[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16 // CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]] -// CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 [[DIFF]] +// CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 [[DIFF]] // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT]], i64 16) ] // CHECK-NEXT: [[CALL:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT]]) -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 22 -// CHECK-NEXT: [[INTPTR2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[INTPTR2]], 31 -// CHECK-NEXT: [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32 -// CHECK-NEXT: [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]] -// CHECK-NEXT: [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 [[DIFF5]] -// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT6]], i64 32) ] -// CHECK-NEXT: [[CALL7:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT6]]) -// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 16 -// CHECK-NEXT: [[SRC_ADDR:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED3:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX2]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED3]], i64 22 +// CHECK-NEXT: [[INTPTR5:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 +// CHECK-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[INTPTR5]], 31 +// CHECK-NEXT: [[ALIGNED_INTPTR7:%.*]] = and i64 [[OVER_BOUNDARY]], -32 +// CHECK-NEXT: [[DIFF8:%.*]] = sub i64 [[ALIGNED_INTPTR7]], [[INTPTR5]] +// CHECK-NEXT: [[ALIGNED_RESULT9:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX4]], i64 [[DIFF8]] +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT9]], i64 32) ] +// CHECK-NEXT: [[CALL10:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT9]]) +// CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED12:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX11]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED12]], i64 16 +// CHECK-NEXT: [[SRC_ADDR:%.*]] = ptrtoint ptr [[ARRAYIDX13]] to i64 // CHECK-NEXT: [[SET_BITS:%.*]] = and i64 [[SRC_ADDR]], 63 // CHECK-NEXT: [[IS_ALIGNED:%.*]] = icmp eq i64 [[SET_BITS]], 0 // CHECK-NEXT: [[CONV:%.*]] = zext i1 [[IS_ALIGNED]] to i32 @@ -39,21 +45,25 @@ // CHECK-LABEL: @test_array_should_not_mask( // CHECK-NEXT: entry: // CHECK-NEXT: [[BUF:%.*]] = alloca [1024 x i8], align 32 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 64 -// CHECK-NEXT: [[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED]], i64 64 +// CHECK-NEXT: [[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 // CHECK-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16 // CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]] -// CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 [[DIFF]] +// CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 [[DIFF]] // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT]], i64 16) ] // CHECK-NEXT: [[CALL:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT]]) -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 32 -// CHECK-NEXT: [[INTPTR2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[INTPTR2]], 31 -// CHECK-NEXT: [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32 -// CHECK-NEXT: [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]] -// CHECK-NEXT: [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 [[DIFF5]] -// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT6]], i64 32) ] -// CHECK-NEXT: [[CALL7:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT6]]) +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 0 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED3:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX2]], i64 0, i64 1024) +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_BOUNDED3]], i64 32 +// CHECK-NEXT: [[INTPTR5:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 +// CHECK-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[INTPTR5]], 31 +// CHECK-NEXT: [[ALIGNED_INTPTR7:%.*]] = and i64 [[OVER_BOUNDARY]], -32 +// CHECK-NEXT: [[DIFF8:%.*]] = sub i64 [[ALIGNED_INTPTR7]], [[INTPTR5]] +// CHECK-NEXT: [[ALIGNED_RESULT9:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX4]], i64 [[DIFF8]] +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT9]], i64 32) ] +// CHECK-NEXT: [[CALL10:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT9]]) // CHECK-NEXT: ret i32 1 // int test_array_should_not_mask(void) { Index: clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp =================================================================== --- clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp +++ clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp @@ -31,7 +31,9 @@ // CHECK-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4 // CHECK-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr [[ARRAYIDX]], i64 0, i64 400) +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX_BOUNDED]], i64 0 +// CHECK-NEXT: store i32 3, ptr [[ARRAYIDX1]], align 4 // CHECK-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 // CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8 @@ -64,7 +66,25 @@ class A { int x; public: +// CHECK-LABEL: @_ZN1AC1Ev( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_ZN1AC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK-NEXT: ret void +// A():x(0) {} +// CHECK-LABEL: @_ZN1AD1Ev( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// CHECK-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_ZN1AD2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// ~A() { destroy(x); } Index: llvm/include/llvm/Analysis/PtrUseVisitor.h =================================================================== --- llvm/include/llvm/Analysis/PtrUseVisitor.h +++ llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -285,6 +285,10 @@ } } + void visitMemRegDeclInst(MemRegDeclInst &I) { + enqueueUsers(I); + } + // Generically, arguments to calls and invokes escape the pointer to some // other function. Mark that. void visitCallBase(CallBase &CB) { Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -683,6 +683,7 @@ case Intrinsic::invariant_end: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: Index: llvm/include/llvm/IR/InstVisitor.h =================================================================== --- llvm/include/llvm/IR/InstVisitor.h +++ llvm/include/llvm/IR/InstVisitor.h @@ -10,6 +10,7 @@ #ifndef LLVM_IR_INSTVISITOR_H #define LLVM_IR_INSTVISITOR_H +#include "Intrinsics.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -216,6 +217,7 @@ RetTy visitVAStartInst(VAStartInst &I) { DELEGATE(IntrinsicInst); } RetTy visitVAEndInst(VAEndInst &I) { DELEGATE(IntrinsicInst); } RetTy visitVACopyInst(VACopyInst &I) { DELEGATE(IntrinsicInst); } + RetTy visitMemRegDeclInst(MemRegDeclInst &I) { DELEGATE(IntrinsicInst); } RetTy visitIntrinsicInst(IntrinsicInst &I) { DELEGATE(CallInst); } RetTy visitCallInst(CallInst &I) { DELEGATE(CallBase); } RetTy visitInvokeInst(InvokeInst &I) { DELEGATE(CallBase); } @@ -298,6 +300,8 @@ case Intrinsic::vastart: DELEGATE(VAStartInst); case Intrinsic::vaend: DELEGATE(VAEndInst); case Intrinsic::vacopy: DELEGATE(VACopyInst); + case Intrinsic::memory_region_decl: + DELEGATE(MemRegDeclInst); case Intrinsic::not_intrinsic: break; } } Index: llvm/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/include/llvm/IR/IntrinsicInst.h +++ llvm/include/llvm/IR/IntrinsicInst.h @@ -23,6 +23,7 @@ #ifndef LLVM_IR_INTRINSICINST_H #define LLVM_IR_INTRINSICINST_H +#include "Intrinsics.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -100,6 +101,7 @@ case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::memory_region_decl: case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: @@ -1387,6 +1389,18 @@ Value *getSrc() const { return const_cast(getArgOperand(1)); } }; +class MemRegDeclInst : public IntrinsicInst { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memory_region_decl; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + Value *getPtr() const { return const_cast(getArgOperand(0)); } +}; + /// A base class for all instrprof intrinsics. class InstrProfInstBase : public IntrinsicInst { public: Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1544,6 +1544,14 @@ [LLVMMatchType<0>], [IntrSpeculatable, IntrNoMem, IntrWillReturn]>; +// Declares that the returned pointer (the first argument), +// and any pointer that is (transitively) def-use based on that pointer, +// points into the memory region [ptr+begin_offset, ptr+end_offset), +// or is poison otherwise. +def int_memory_region_decl : DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0> /*ptr*/, llvm_i64_ty /*begin_offset*/, llvm_i64_ty /*end_offset*/], + [IntrNoMem, IntrSpeculatable, ReadNone>]>; + //===------------------------ Stackmap Intrinsics -------------------------===// // def int_experimental_stackmap : DefaultAttrsIntrinsic<[], Index: llvm/lib/Analysis/AliasSetTracker.cpp =================================================================== --- llvm/lib/Analysis/AliasSetTracker.cpp +++ llvm/lib/Analysis/AliasSetTracker.cpp @@ -412,6 +412,7 @@ case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: + case Intrinsic::memory_region_decl: return; } } Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1506,6 +1506,30 @@ if (isEscapeSource(O2) && AAQI.CI->isNotCapturedBeforeOrAt(O1, cast(O2))) return AliasResult::NoAlias; + + // If an underlying value is a call to a memory region declaration + // intrinsic, extract the GEP and infer upper bounds on the MemoryLocation + // size using the end offset of the region + auto *CB1 = dyn_cast(O1), *CB2 = dyn_cast(O2); + bool FoundMemRegDecl = false; + if (CB1 && CB1->getIntrinsicID() == Intrinsic::memory_region_decl) { + FoundMemRegDecl = true; + V1 = CB1->getArgOperand(0); + if (auto *End1 = dyn_cast(CB1->getArgOperand(2))) + if (auto End1Val = End1->getZExtValue(); + End1Val > 0 && V1Size.hasValue() && End1Val < V1Size.getValue()) + V1Size = LocationSize::upperBound(End1Val); + } + if (CB2 && CB2->getIntrinsicID() == Intrinsic::memory_region_decl) { + FoundMemRegDecl = true; + V2 = CB2->getArgOperand(0); + if (auto *End2 = dyn_cast(CB2->getArgOperand(2))) + if (auto End2Val = End2->getZExtValue(); + End2Val > 0 && V2Size.hasValue() && End2Val < V2Size.getValue()) + V2Size = LocationSize::upperBound(End2Val); + } + if (FoundMemRegDecl) + return aliasCheck(V1, V1Size, V2, V2Size, AAQI, CtxI); } // If the size of one access is larger than the entire object on the other Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -1500,6 +1500,7 @@ case Intrinsic::fshr: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: case Intrinsic::masked_load: case Intrinsic::get_active_lane_mask: case Intrinsic::abs: Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -2245,6 +2245,7 @@ return false; case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0))) SROAArgValues[II] = SROAArg; return true; Index: llvm/lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -160,6 +160,9 @@ // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; + case Intrinsic::memory_region_decl: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + return ModRefInfo::Ref; case Intrinsic::masked_load: Loc = MemoryLocation::getForArgument(II, 0, TLI); return ModRefInfo::Ref; Index: llvm/lib/Analysis/MemoryLocation.cpp =================================================================== --- llvm/lib/Analysis/MemoryLocation.cpp +++ llvm/lib/Analysis/MemoryLocation.cpp @@ -194,6 +194,15 @@ cast(II->getArgOperand(0))->getZExtValue()), AATags); + case Intrinsic::memory_region_decl: + assert(ArgIdx == 0 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::precise( + cast(II->getArgOperand(2))->getZExtValue() - + cast(II->getArgOperand(1))->getZExtValue()), + AATags); + case Intrinsic::masked_load: assert(ArgIdx == 0 && "Invalid argument index"); return MemoryLocation( Index: llvm/lib/Analysis/MemorySSA.cpp =================================================================== --- llvm/lib/Analysis/MemorySSA.cpp +++ llvm/lib/Analysis/MemorySSA.cpp @@ -294,6 +294,7 @@ switch (II->getIntrinsicID()) { case Intrinsic::invariant_start: case Intrinsic::invariant_end: + case Intrinsic::memory_region_decl: case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::pseudoprobe: Index: llvm/lib/Analysis/ObjCARCInstKind.cpp =================================================================== --- llvm/lib/Analysis/ObjCARCInstKind.cpp +++ llvm/lib/Analysis/ObjCARCInstKind.cpp @@ -181,6 +181,7 @@ case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: + case Intrinsic::memory_region_decl: // Don't let dbg info affect our results. case Intrinsic::dbg_declare: case Intrinsic::dbg_value: Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -5772,6 +5772,7 @@ switch (Call->getIntrinsicID()) { case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: case Intrinsic::aarch64_irg: case Intrinsic::aarch64_tagp: // The amdgcn_make_buffer_rsrc function does not alter the address of the Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2391,7 +2391,8 @@ } case Intrinsic::launder_invariant_group: - case Intrinsic::strip_invariant_group: { + case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: { Value *ArgVal = II->getArgOperand(0); auto it = LargeOffsetGEPMap.find(II); if (it != LargeOffsetGEPMap.end()) { Index: llvm/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1373,6 +1373,7 @@ case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: case Intrinsic::expect: { Register ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -675,6 +675,10 @@ V = Call->getArgOperand(0); continue; } + if (Call->getIntrinsicID() == Intrinsic::memory_region_decl) { + V = Call->getArgOperand(0); + continue; + } } return V; } Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2231,6 +2231,23 @@ if (Instruction *I = visitGEPOfGEP(GEP, Src)) return I; + if (auto *SrcIntrCall = dyn_cast(PtrOp); SrcIntrCall && + SrcIntrCall->getIntrinsicID() == Intrinsic::memory_region_decl) + if (auto *Src = dyn_cast(SrcIntrCall->getArgOperand(0))) + if (Instruction *I = visitGEPOfGEP(GEP, Src)) { + I->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt()); + llvm::Instruction *Call = Builder.CreateCall( + Intrinsic::getDeclaration( + GEP.getModule(), + Intrinsic::memory_region_decl, + { SrcIntrCall->getType() }), + {I, + SrcIntrCall->getArgOperand(1), + SrcIntrCall->getArgOperand(2)}, + "arrayidx.bounded"); + return replaceInstUsesWith(GEP, Call); + } + // Skip if GEP source element type is scalable. The type alloc size is unknown // at compile-time. if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) { @@ -2393,6 +2410,7 @@ continue; case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::memory_region_decl: Users.emplace_back(I); Worklist.push_back(I); continue; Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -770,6 +770,7 @@ case Intrinsic::invariant_end: case Intrinsic::launder_invariant_group: case Intrinsic::assume: + case Intrinsic::memory_region_decl: return true; case Intrinsic::dbg_declare: case Intrinsic::dbg_label: Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -928,6 +928,13 @@ return Base::visitAddrSpaceCastInst(ASC); } + void visitMemRegDeclInst(MemRegDeclInst &I) { + if (I.use_empty()) + return markAsDead(I); + + return Base::visitMemRegDeclInst(I); + } + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { if (GEPI.use_empty()) return markAsDead(GEPI); @@ -3748,6 +3755,11 @@ return false; } + bool visitMemRegDeclInst(MemRegDeclInst &I) { + enqueueUsers(I); + return false; + } + // Fold gep (select cond, ptr1, ptr2) => select cond, gep(ptr1), gep(ptr2) bool foldGEPSelect(GetElementPtrInst &GEPI) { if (!GEPI.hasAllConstantIndices()) Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -467,6 +467,9 @@ II->getIntrinsicID() == Intrinsic::launder_invariant_group) return true; + if (II->getIntrinsicID() == Intrinsic::memory_region_decl) + return true; + if (II->isLifetimeStartOrEnd()) { auto *Arg = II->getArgOperand(1); // Lifetime intrinsics are dead when their right-hand is undef. Index: llvm/test/Transforms/InstCombine/gep-mem-reg-decl.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/gep-mem-reg-decl.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s -passes=instcombine | FileCheck %s + +%struct.S = type { [1024 x i32], [1024 x i32] } + +declare ptr @llvm.memory.region.decl.p0(ptr readnone, i64, i64) + +; test that a GEP of a GEP can be combined in the presence of +; intermediate intrinsic calls +define i32 @test_gep_of_gep(ptr noundef %s, i64 %i) { +; CHECK-LABEL: @test_gep_of_gep( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[S:%.*]], i64 0, i32 1, i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr nonnull [[ARRAYIDX21]], i64 0, i64 4096) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_BOUNDED]], align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 4096) + %B = getelementptr inbounds %struct.S, ptr %s, i32 0, i32 1 + %arrayidx.bounded1 = call ptr @llvm.memory.region.decl.p0(ptr %B, i64 0, i64 4096) + %arrayidx2 = getelementptr inbounds [1024 x i32], ptr %arrayidx.bounded1, i64 0, i64 %i + %0 = load i32, ptr %arrayidx2, align 4 + ret i32 %0 +} + +; ensure that InstructionCombining.cpp:isAllocSiteRemovable() +; does not think that pointers may escape through the intrinsic +define i32 @test_erase_alloc_site(i32 %i) { +; CHECK-LABEL: @test_erase_alloc_site( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 0 +; +entry: + %arr = alloca [1000 x i32], align 8 + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %arr, i64 0, i64 8000) + %arrayidx1 = getelementptr inbounds i32, ptr %arrayidx.bounded, i32 %i + store i32 1, ptr %arrayidx1, align 8 + ret i32 0 +} + +; ensure that we can constant-fold a call to the intrinsic, +; thereby allowing isAllocSiteRemovable() to properly recognize +; a redundant alloca +define i32 @test_constant_fold_intrinsic() { +; CHECK-LABEL: @test_constant_fold_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 1 +; +entry: + %s = alloca { i32 }, align 8 + store i32 1, ptr %s, align 8 + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 8) + %0 = load i32, ptr %arrayidx.bounded, align 8 + ret i32 %0 +} Index: llvm/test/Transforms/SROA/mem-reg-decl.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/mem-reg-decl.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG + +declare ptr @llvm.memory.region.decl.p0(ptr readnone, i64, i64) + +; ensure that SROA can "see through" the intrinsic call +define i32 @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 1 +; +entry: + %s = alloca { i32 }, align 8 + store i32 1, ptr %s, align 8 + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 8) + %0 = load i32, ptr %arrayidx.bounded, align 8 + ret i32 %0 +} + +; variation of the above test +define i32 @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + %s = alloca [1024 x i32], align 4 + %t = alloca [1024 x i32], align 4 + %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 4096) + %0 = load i32, ptr %arrayidx.bounded, align 4 + %arrayidx.bounded1 = call ptr @llvm.memory.region.decl.p0(ptr %t, i64 0, i64 4096) + %arrayidx2 = getelementptr inbounds i32, ptr %arrayidx.bounded1, i32 1 + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-MODIFY-CFG: {{.*}} +; CHECK-PRESERVE-CFG: {{.*}}