Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp @@ -912,14 +912,23 @@ // // If at least one store is guaranteed to execute, both properties are // satisfied, and promotion is legal. + // // This, however, is not a necessary condition. Even if no store/load is - // guaranteed to execute, we can still establish these properties: - // (p1) by proving that hoisting the load into the preheader is - // safe (i.e. proving dereferenceability on all paths through the loop). We + // guaranteed to execute, we can still establish these properties. + // We can establish (p1) by proving that hoisting the load into the preheader + // is safe (i.e. proving dereferenceability on all paths through the loop). We // can use any access within the alias set to prove dereferenceability, // since they're all must alias. - // (p2) by proving the memory is thread-local, so the memory model + // + // There are two ways establish (p2): + // a) Prove the location is thread-local. In this case the memory model // requirement does not apply, and stores are safe to insert. + // b) Prove a store dominates every exit block. In this case, if an exit + // blocks is reached, the original dynamic path would have taken us through + // the store, so inserting a store into the exit block is safe. Note that this + // is different from the store being guaranteed to execute. For instance, + // if an exception is thrown on the first iteration of the loop, the original + // store is never executed, but the exit blocks are not executed either. bool DereferenceableInPH = false; bool SafeToInsertStore = false; @@ -1001,6 +1010,17 @@ } } + // If a store dominates all exit blocks, it is safe to sink. + // As explained above, if an exit block was executed, a dominating + // store must have been been executed at least once, so we are not + // introducing stores on paths that did not have them. + // Note that this only looks at explicit exit blocks. If we ever + // start sinking stores into unwind edges (see above), this will break. + if (!SafeToInsertStore) + SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { + return DT->dominates(Store->getParent(), Exit); + }); + // If the store is not guaranteed to execute, we may still get // deref info through it. if (!DereferenceableInPH) { Index: llvm/trunk/test/Transforms/LICM/scalar_promote.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/scalar_promote.ll +++ llvm/trunk/test/Transforms/LICM/scalar_promote.ll @@ -186,6 +186,198 @@ ; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0 } +declare i32 @opaque(i32) argmemonly +declare void @capture(i32*) + +; We can promote even if opaque may throw. +define i32 @test7() { +; CHECK-LABEL: @test7( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: call void @capture(i32* %local) +; CHECK-NEXT: load i32, i32* %local +; CHECK-NEXT: br label %loop +; CHECK: exit: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ] +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local +; CHECK-NEXT: %ret = load i32, i32* %local +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + call void @capture(i32* %local) + br label %loop + +loop: + %j = phi i32 [ 0, %entry ], [ %next, %loop ] + %x = load i32, i32* %local + %x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local + store i32 %x2, i32* %local + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %local + ret i32 %ret +} + +; Make sure we don't promote if the store is really control-flow dependent. +define i32 @test7bad() { +; CHECK-LABEL: @test7bad( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: call void @capture(i32* %local) +; CHECK-NEXT: br label %loop +; CHECK: if: +; CHECK-NEXT: store i32 %x2, i32* %local +; CHECK-NEXT: br label %else +; CHECK: exit: +; CHECK-NEXT: %ret = load i32, i32* %local +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + call void @capture(i32* %local) + br label %loop +loop: + %j = phi i32 [ 0, %entry ], [ %next, %else ] + %x = load i32, i32* %local + %x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local + %cmp = icmp eq i32 %x2, 0 + br i1 %cmp, label %if, label %else + +if: + store i32 %x2, i32* %local + br label %else + +else: + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %local + ret i32 %ret +} + +; Even if neither the load nor the store or guaranteed to execute because +; opaque() may throw, we can still promote - the load not being guaranteed +; doesn't block us, because %local is always dereferenceable. +define i32 @test8() { +; CHECK-LABEL: @test8( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: call void @capture(i32* %local) +; CHECK-NEXT: load i32, i32* %local +; CHECK-NEXT: br label %loop +; CHECK: exit: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ] +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local +; CHECK-NEXT: %ret = load i32, i32* %local +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + call void @capture(i32* %local) + br label %loop + +loop: + %j = phi i32 [ 0, %entry ], [ %next, %loop ] + %throwaway = call i32 @opaque(i32 %j) + %x = load i32, i32* %local + %x2 = call i32 @opaque(i32 %x) + store i32 %x2, i32* %local + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %local + ret i32 %ret +} + + +; If the store is "guaranteed modulo exceptions", and the load depends on +; control flow, we can only promote if the pointer is otherwise known to be +; dereferenceable +define i32 @test9() { +; CHECK-LABEL: @test9( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: call void @capture(i32* %local) +; CHECK-NEXT: load i32, i32* %local +; CHECK-NEXT: br label %loop +; CHECK: exit: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %else ] +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local +; CHECK-NEXT: %ret = load i32, i32* %local +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + call void @capture(i32* %local) + br label %loop + +loop: + %j = phi i32 [ 0, %entry ], [ %next, %else ] + %j2 = call i32 @opaque(i32 %j) + %cmp = icmp eq i32 %j2, 0 + br i1 %cmp, label %if, label %else + +if: + %x = load i32, i32* %local + br label %else + +else: + %x2 = phi i32 [ 0, %loop ], [ %x, %if] + store i32 %x2, i32* %local + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %local + ret i32 %ret +} + +define i32 @test9bad(i32 %i) { +; CHECK-LABEL: @test9bad( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: call void @capture(i32* %local) +; CHECK-NEXT: %notderef = getelementptr +; CHECK-NEXT: br label %loop +; CHECK: if: +; CHECK-NEXT: load i32, i32* %notderef +; CHECK-NEXT: br label %else +; CHECK: exit: +; CHECK-NEXT: %ret = load i32, i32* %notderef +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + call void @capture(i32* %local) + %notderef = getelementptr i32, i32* %local, i32 %i + br label %loop + +loop: + %j = phi i32 [ 0, %entry ], [ %next, %else ] + %j2 = call i32 @opaque(i32 %j) + %cmp = icmp eq i32 %j2, 0 + br i1 %cmp, label %if, label %else + +if: + %x = load i32, i32* %notderef + br label %else + +else: + %x2 = phi i32 [ 0, %loop ], [ %x, %if] + store i32 %x2, i32* %notderef + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %notderef + ret i32 %ret +} + !0 = !{!4, !4, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"}