Index: include/llvm/IR/LLVMContext.h =================================================================== --- include/llvm/IR/LLVMContext.h +++ include/llvm/IR/LLVMContext.h @@ -58,7 +58,9 @@ MD_noalias = 8, // "noalias", MD_nontemporal = 9, // "nontemporal" MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access" - MD_nonnull = 11 // "nonnull" + MD_nonnull = 11, // "nonnull" + MD_dereferenceable = 12, // "dereferenceable" + MD_dereferenceable_or_null = 13 // "dereferenceable_or_null" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -2872,6 +2872,20 @@ DerefBytes = CS.getDereferenceableOrNullBytes(0); CheckForNonNull = true; } + } else if (const LoadInst *LI = dyn_cast(BV)) { + MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable); + if (MD) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + if (!DerefBytes.getBoolValue()) { + MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable_or_null); + if (MD) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + CheckForNonNull = true; + } } if (DerefBytes.getBoolValue()) Index: lib/IR/LLVMContext.cpp =================================================================== --- lib/IR/LLVMContext.cpp +++ lib/IR/LLVMContext.cpp @@ -88,11 +88,22 @@ "mem_parallel_loop_access kind id drifted"); (void)MemParallelLoopAccessID; - // Create the 'nonnull' metadata kind. unsigned NonNullID = getMDKindID("nonnull"); assert(NonNullID == MD_nonnull && "nonnull kind id drifted"); (void)NonNullID; + + // Create the 'dereferenceable' metadata kind. + unsigned DereferenceableID = getMDKindID("dereferenceable"); + assert(DereferenceableID == MD_dereferenceable && + "dereferenceable kind id drifted"); + (void)DereferenceableID; + + // Create the 'dereferenceable_or_null' metadata kind. + unsigned DereferenceableOrNullID = getMDKindID("dereferenceable_or_null"); + assert(DereferenceableOrNullID == MD_dereferenceable_or_null && + "dereferenceable_or_null kind id drifted"); + (void)DereferenceableOrNullID; } LLVMContext::~LLVMContext() { delete pImpl; } Index: test/Analysis/ValueTracking/memory-dereferenceable.ll =================================================================== --- test/Analysis/ValueTracking/memory-dereferenceable.ll +++ test/Analysis/ValueTracking/memory-dereferenceable.ll @@ -8,6 +8,7 @@ declare zeroext i1 @return_i1() @globalstr = global [6 x i8] c"hello\00" +@globali32ptr = external global i32* define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" { ; CHECK: The following are dereferenceable: @@ -15,6 +16,7 @@ ; CHECK: %alloca ; CHECK: %dparam ; CHECK: %relocate +; CHECK: %dload ; CHECK-NOT: %nparam entry: %globalptr = getelementptr inbounds [6 x i8], [6 x i8]* @globalstr, i32 0, i32 0 @@ -27,8 +29,12 @@ %load4 = load i32, i32 addrspace(1)* %relocate %nparam = getelementptr i32, i32 addrspace(1)* %dparam, i32 5 %load5 = load i32, i32 addrspace(1)* %nparam + %dload = load i32*, i32** @globali32ptr, !dereferenceable !0 + %load6 = load i32, i32* %dload ret void } declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...) declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) + +!0 = !{i32 4} \ No newline at end of file Index: test/Transforms/LICM/hoist-deref-load.ll =================================================================== --- test/Transforms/LICM/hoist-deref-load.ll +++ test/Transforms/LICM/hoist-deref-load.ll @@ -250,5 +250,139 @@ ret void } -attributes #0 = { nounwind uwtable } +; This test represents the following function: +; void test1(int * __restrict__ a, int * __restrict__ b, int **cptr, int n) { +; c = *cptr; +; for (int i = 0; i < n; ++i) +; if (a[i] > 0) +; a[i] = (*c)*b[i]; +; } +; and we want to hoist the load of %c out of the loop. This can be done only +; because the dereferenceable meatdata on the c = *cptr load. + +; CHECK-LABEL: @test7 +; CHECK: load i32, i32* %c, align 4 +; CHECK: for.body: + +define void @test7(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32** nocapture readonly %cptr, i32 %n) #0 { +entry: + %c = load i32*, i32** %cptr, !dereferenceable !0 + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; This test represents the following function: +; void test1(int * __restrict__ a, int * __restrict__ b, int **cptr, int n) { +; c = *cptr; +; if (c != null) +; for (int i = 0; i < n; ++i) +; if (a[i] > 0) +; a[i] = (*c)*b[i]; +; } +; and we want to hoist the load of %c out of the loop. This can be done only +; because the dereferenceable_or_null meatdata on the c = *cptr load and there +; is a null check on %c. + +; CHECK-LABEL: @test8 +; CHECK: load i32, i32* %c, align 4 +; CHECK: for.body: + +define void @test8(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32** nocapture readonly %cptr, i32 %n) #0 { +entry: + %c = load i32*, i32** %cptr, !dereferenceable_or_null !0 + %not_null = icmp ne i32* %c, null + br i1 %not_null, label %not.null, label %for.end + +not.null: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %not.null, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %not.null ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry, %not.null + ret void +} + +; This is the same as @test8, but without the null check on %c. +; Without this check, we should not hoist the load of %c. + +; CHECK-LABEL: @test9 +; CHECK: if.then: +; CHECK: load i32, i32* %c, align 4 + +define void @test9(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32** nocapture readonly %cptr, i32 %n) #0 { +entry: + %c = load i32*, i32** %cptr, !dereferenceable_or_null !0 + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +attributes #0 = { nounwind uwtable } +!0 = !{i32 4}