Index: llvm/trunk/docs/LangRef.rst =================================================================== --- llvm/trunk/docs/LangRef.rst +++ llvm/trunk/docs/LangRef.rst @@ -5658,7 +5658,7 @@ :: - = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !nonnull !] + = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !] = load atomic [volatile] * [singlethread] , align ! = !{ i32 1 } @@ -5723,6 +5723,25 @@ on parameters and return values. This metadata can only be applied to loads of a pointer type. +The optional ``!dereferenceable`` metadata must reference a single +metadata name ```` corresponding to a metadata node with one ``i64`` +entry. The existence of the ``!dereferenceable`` metadata on the instruction +tells the optimizer that the value loaded is known to be dereferenceable. +The number of bytes known to be dereferenceable is specified by the integer +value in the metadata node. This is analogous to the ''dereferenceable'' +attribute on parameters and return values. This metadata can only be applied +to loads of a pointer type. + +The optional ``!dereferenceable_or_null`` metadata must reference a single +metadata name ```` corresponding to a metadata node with one ``i64`` +entry. The existence of the ``!dereferenceable_or_null`` metadata on the +instruction tells the optimizer that the value loaded is known to be either +dereferenceable or null. +The number of bytes known to be dereferenceable is specified by the integer +value in the metadata node. This is analogous to the ''dereferenceable_or_null'' +attribute on parameters and return values. This metadata can only be applied +to loads of a pointer type. + Semantics: """""""""" Index: llvm/trunk/include/llvm/IR/LLVMContext.h =================================================================== --- llvm/trunk/include/llvm/IR/LLVMContext.h +++ llvm/trunk/include/llvm/IR/LLVMContext.h @@ -58,7 +58,9 @@ MD_noalias = 8, // "noalias", MD_nontemporal = 9, // "nontemporal" MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access" - MD_nonnull = 11 // "nonnull" + MD_nonnull = 11, // "nonnull" + MD_dereferenceable = 12, // "dereferenceable" + MD_dereferenceable_or_null = 13 // "dereferenceable_or_null" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. Index: llvm/trunk/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp +++ llvm/trunk/lib/Analysis/ValueTracking.cpp @@ -2885,6 +2885,19 @@ DerefBytes = CS.getDereferenceableOrNullBytes(0); CheckForNonNull = true; } + } else if (const LoadInst *LI = dyn_cast(BV)) { + if (MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + if (!DerefBytes.getBoolValue()) { + if (MDNode *MD = + LI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + CheckForNonNull = true; + } } if (DerefBytes.getBoolValue()) Index: llvm/trunk/lib/IR/LLVMContext.cpp =================================================================== --- llvm/trunk/lib/IR/LLVMContext.cpp +++ llvm/trunk/lib/IR/LLVMContext.cpp @@ -88,11 +88,22 @@ "mem_parallel_loop_access kind id drifted"); (void)MemParallelLoopAccessID; - // Create the 'nonnull' metadata kind. unsigned NonNullID = getMDKindID("nonnull"); assert(NonNullID == MD_nonnull && "nonnull kind id drifted"); (void)NonNullID; + + // Create the 'dereferenceable' metadata kind. + unsigned DereferenceableID = getMDKindID("dereferenceable"); + assert(DereferenceableID == MD_dereferenceable && + "dereferenceable kind id drifted"); + (void)DereferenceableID; + + // Create the 'dereferenceable_or_null' metadata kind. + unsigned DereferenceableOrNullID = getMDKindID("dereferenceable_or_null"); + assert(DereferenceableOrNullID == MD_dereferenceable_or_null && + "dereferenceable_or_null kind id drifted"); + (void)DereferenceableOrNullID; } LLVMContext::~LLVMContext() { delete pImpl; } Index: llvm/trunk/test/Analysis/ValueTracking/memory-dereferenceable.ll =================================================================== --- llvm/trunk/test/Analysis/ValueTracking/memory-dereferenceable.ll +++ llvm/trunk/test/Analysis/ValueTracking/memory-dereferenceable.ll @@ -8,6 +8,7 @@ declare zeroext i1 @return_i1() @globalstr = global [6 x i8] c"hello\00" +@globali32ptr = external global i32* define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" { ; CHECK: The following are dereferenceable: @@ -15,7 +16,12 @@ ; CHECK: %alloca ; CHECK: %dparam ; CHECK: %relocate +; CHECK: %d4_load +; CHECK: %d_or_null_non_null_load ; CHECK-NOT: %nparam +; CHECK-NOT: %nd_load +; CHECK-NOT: %d2_load +; CHECK-NOT: %d_or_null_load entry: %globalptr = getelementptr inbounds [6 x i8], [6 x i8]* @globalstr, i32 0, i32 0 %load1 = load i8, i8* %globalptr @@ -27,8 +33,33 @@ %load4 = load i32, i32 addrspace(1)* %relocate %nparam = getelementptr i32, i32 addrspace(1)* %dparam, i32 5 %load5 = load i32, i32 addrspace(1)* %nparam + + ; Load from a non-dereferenceable load + %nd_load = load i32*, i32** @globali32ptr, !dereferenceable !0 + %load6 = load i32, i32* %nd_load + + ; Load from a dereferenceable load + %d4_load = load i32*, i32** @globali32ptr, !dereferenceable !0 + %load7 = load i32, i32* %d4_load + + ; Load from an offset not covered by the dereferenceable portion + %d2_load = load i32*, i32** @globali32ptr, !dereferenceable !1 + %load8 = load i32, i32* %d2_load + + ; Load from a potentially null pointer with dereferenceable_or_null + %d_or_null_load = load i32*, i32** @globali32ptr, !dereferenceable_or_null !0 + %load9 = load i32, i32* %d_or_null_load + + ; Load from a non-null pointer with dereferenceable_or_null + %d_or_null_non_null_load = load i32*, i32** @globali32ptr, !nonnull !2, !dereferenceable_or_null !0 + %load10 = load i32, i32* %d_or_null_non_null_load + ret void } declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) -declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) \ No newline at end of file +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) + +!0 = !{i64 4} +!1 = !{i64 2} +!2 = !{} Index: llvm/trunk/test/Transforms/LICM/hoist-deref-load.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/hoist-deref-load.ll +++ llvm/trunk/test/Transforms/LICM/hoist-deref-load.ll @@ -254,5 +254,139 @@ ret i1 %not_null } -attributes #0 = { nounwind uwtable } +; This test represents the following function: +; void test1(int * __restrict__ a, int *b, int **cptr, int n) { +; c = *cptr; +; for (int i = 0; i < n; ++i) +; if (a[i] > 0) +; a[i] = (*c)*b[i]; +; } +; and we want to hoist the load of %c out of the loop. This can be done only +; because the dereferenceable meatdata on the c = *cptr load. + +; CHECK-LABEL: @test7 +; CHECK: load i32, i32* %c, align 4 +; CHECK: for.body: + +define void @test7(i32* noalias %a, i32* %b, i32** %cptr, i32 %n) #0 { +entry: + %c = load i32*, i32** %cptr, !dereferenceable !0 + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; This test represents the following function: +; void test1(int * __restrict__ a, int *b, int **cptr, int n) { +; c = *cptr; +; if (c != null) +; for (int i = 0; i < n; ++i) +; if (a[i] > 0) +; a[i] = (*c)*b[i]; +; } +; and we want to hoist the load of %c out of the loop. This can be done only +; because the dereferenceable_or_null meatdata on the c = *cptr load and there +; is a null check on %c. + +; CHECK-LABEL: @test8 +; CHECK: load i32, i32* %c, align 4 +; CHECK: for.body: + +define void @test8(i32* noalias %a, i32* %b, i32** %cptr, i32 %n) #0 { +entry: + %c = load i32*, i32** %cptr, !dereferenceable_or_null !0 + %not_null = icmp ne i32* %c, null + br i1 %not_null, label %not.null, label %for.end + +not.null: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %not.null, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %not.null ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry, %not.null + ret void +} + +; This is the same as @test8, but without the null check on %c. +; Without this check, we should not hoist the load of %c. + +; CHECK-LABEL: @test9 +; CHECK: if.then: +; CHECK: load i32, i32* %c, align 4 + +define void @test9(i32* noalias %a, i32* %b, i32** %cptr, i32 %n) #0 { +entry: + %c = load i32*, i32** %cptr, !dereferenceable_or_null !0 + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +attributes #0 = { nounwind uwtable } +!0 = !{i64 4}