Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -699,6 +699,14 @@ // checked. if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; + } else if (isa(U) && isa(U->getOperand(0)) && + isa(U->getOperand(1))) { + assert(isa( + cast(U->getOperand(0))->getPointerOperand()) && + "Should be GlobalVariable"); + // This and only this kind of ICmpInst is to be replaced with the + // comparing of the value of the created global init bool later in + // optimizeGlobalAddressOfMalloc for the global variable. } else { //cerr << "NONTRAPPING USE: " << *U; return false; @@ -940,9 +948,13 @@ // Loop over all uses of GV, processing them in turn. while (!GV->use_empty()) { if (StoreInst *SI = dyn_cast(GV->user_back())) { - // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, - Align(1), SI->getOrdering(), SI->getSyncScopeID(), SI); + // The global is initialized when the store to it occurs. If the stored + // value is null value, the global bool is set to false, otherwise true. + new StoreInst(ConstantInt::getBool( + GV->getContext(), + !isa(SI->getValueOperand())), + InitBool, false, Align(1), SI->getOrdering(), + SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -957,11 +969,9 @@ } // Replace the cmp X, 0 with a use of the bool value. - // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool->getValueType(), InitBool, InitBool->getName() + ".val", false, Align(1), - LI->getOrdering(), LI->getSyncScopeID(), - LI->isUnordered() ? (Instruction *)ICI : LI); + LI->getOrdering(), LI->getSyncScopeID(), LI); InitBoolUsed = true; switch (ICI->getPredicate()) { default: llvm_unreachable("Unknown ICmp Predicate!"); Index: llvm/test/Transforms/GlobalOpt/malloc-promote-4.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalOpt/malloc-promote-4.ll @@ -0,0 +1,51 @@ +; RUN: opt -S -globalopt -o - < %s | FileCheck %s + +; CHECK: [[G_INIT:@.*]] = internal unnamed_addr global i1 false +@g = internal global i32* null, align 8 + +; CHECK-LABEL: define {{.*}} @f1( +; CHECK-NEXT: [[G_INIT_VAL:%.*]] = load i1, i1* [[G_INIT]], align 1 +; CHECK-NEXT: call fastcc void @f2() +; CHECK-NEXT: [[NOTINIT:%.*]] = xor i1 [[G_INIT_VAL]], true +; CHECK-NEXT: br i1 [[NOTINIT]], label [[TMP1:%.*]], label [[TMP2:%.*]] +; +define internal i32 @f1() { + %1 = load i32*, i32** @g, align 8 + call void @f2(); + %2 = icmp eq i32* %1, null + br i1 %2, label %3, label %4 + +3: ; preds = %0 + br label %5 + +4: ; preds = %0 + br label %5 + +5: ; preds = %3, %4 + %6 = phi i32 [ -1, %3 ], [ 1, %4 ] + ret i32 %6 +} + +; CHECK-LABEL: define {{.*}} @f2( +; CHECK-NEXT: store i1 true, i1* [[G_INIT]], align 1 +; CHECK-NEXT: ret void +; +define internal void @f2() { + %1 = call noalias i8* @malloc(i64 4) + %2 = bitcast i8* %1 to i32* + store i32* %2, i32** @g, align 8 + ret void +} + +; CHECK-LABEL: define {{.*}} @main( +; CHECK-NEXT: store i1 false, i1* [[G_INIT]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = call fastcc i32 @f1() +; CHECK-NEXT: ret i32 [[TMP1]] +; +define dso_local i32 @main() { + store i32* null, i32** @g, align 8 + %1 = call i32 @f1() + ret i32 %1 +} + +declare dso_local noalias i8* @malloc(i64) Index: llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll =================================================================== --- llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll +++ llvm/test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll @@ -3,10 +3,13 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +; CHECK: [[_ZL3G_I_INIT:@.*]] = internal unnamed_addr global i1 false @_ZL3g_i = internal global i32* null, align 8 @.str = private unnamed_addr constant [2 x i8] c"0\00", align 1 @.str.1 = private unnamed_addr constant [2 x i8] c"1\00", align 1 +; CHECK-LABEL: define {{.*}} @main( +; CHECK-NEXT: store i1 false, i1* [[_ZL3G_I_INIT]], align 1 define dso_local i32 @main() { store i32* null, i32** @_ZL3g_i, align 8 call void @_ZL13PutsSomethingv() @@ -14,8 +17,11 @@ } ; CHECK-LABEL: define {{.*}} @_ZL13PutsSomethingv() -; CHECK: [[gvLoad:%.*]] = load i32*, i32** @_ZL3g_i -; CHECK-NEXT: icmp eq i32* [[gvLoad]], null +; CHECK-NEXT: [[_ZL3G_I_INIT_VAL:%.*]] = load i1, i1* [[_ZL3G_I_INIT]], align 1 +; CHECK-NEXT: [[NOTINIT:%.*]] = xor i1 [[_ZL3G_I_INIT_VAL]], true +; CHECK-NEXT: br i1 [[NOTINIT]], label %[[TMP1:.*]], label %[[TMP3:.*]] +; CHECK: [[TMP1]]: +; CHECK-NEXT: store i1 true, i1* [[_ZL3G_I_INIT]], align 1 define internal void @_ZL13PutsSomethingv() { %1 = load i32*, i32** @_ZL3g_i, align 8 %2 = icmp eq i32* %1, null Index: llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll @@ -0,0 +1,46 @@ +; RUN: opt -S -globalopt -o - < %s | FileCheck %s + +; No malloc promotion with non-null check. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZL3g_i = internal global i32* null, align 8 +@_ZL3g_j = global i32* null, align 8 +@.str = private unnamed_addr constant [2 x i8] c"0\00", align 1 +@.str.1 = private unnamed_addr constant [2 x i8] c"1\00", align 1 + +define dso_local i32 @main() { + store i32* null, i32** @_ZL3g_i, align 8 + call void @_ZL13PutsSomethingv() + ret i32 0 +} + +; CHECK-LABEL: define {{.*}} @_ZL13PutsSomethingv() +; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @_ZL3g_i, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** @_ZL3g_j, align 8 +; CHECK-NEXT: icmp eq i32* [[TMP1]], [[TMP2]] +define internal void @_ZL13PutsSomethingv() { + %1 = load i32*, i32** @_ZL3g_i, align 8 + %2 = load i32*, i32** @_ZL3g_j, align 8 + %cmp = icmp eq i32* %1, %2 + br i1 %cmp, label %3, label %7 + +3: ; preds = %0 + %4 = call noalias i8* @malloc(i64 4) #3 + %5 = bitcast i8* %4 to i32* + store i32* %5, i32** @_ZL3g_i, align 8 + %6 = call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0)) + br label %9 + +7: ; preds = %0 + %8 = call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0)) + br label %9 + +9: ; preds = %7, %3 + ret void +} + +declare dso_local noalias i8* @malloc(i64) + +declare dso_local i32 @puts(i8* nocapture readonly)