Index: llvm/trunk/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ llvm/trunk/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -335,18 +335,6 @@ return true; } -// Helper function to rewrite srem and sdiv. As a policy choice, we choose not -// to waste compile time on anything where the operands are local defs. While -// LVI can sometimes reason about such cases, it's not its primary purpose. -static bool hasLocalDefs(BinaryOperator *SDI) { - for (Value *O : SDI->operands()) { - auto *I = dyn_cast(O); - if (I && I->getParent() == SDI->getParent()) - return true; - } - return false; -} - static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) { Constant *Zero = ConstantInt::get(SDI->getType(), 0); for (Value *O : SDI->operands()) { @@ -358,7 +346,7 @@ } static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI) || + if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) return false; @@ -376,7 +364,7 @@ /// conditions, this can sometimes prove conditions instcombine can't by /// exploiting range information. static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI) || + if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) return false; @@ -391,7 +379,7 @@ } static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI)) + if (SDI->getType()->isVectorTy()) return false; Constant *Zero = ConstantInt::get(SDI->getType(), 0); @@ -415,7 +403,7 @@ if (DontProcessAdds) return false; - if (AddOp->getType()->isVectorTy() || hasLocalDefs(AddOp)) + if (AddOp->getType()->isVectorTy()) return false; bool NSW = AddOp->hasNoSignedWrap(); Index: llvm/trunk/test/Transforms/CorrelatedValuePropagation/add.ll =================================================================== --- llvm/trunk/test/Transforms/CorrelatedValuePropagation/add.ll +++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/add.ll @@ -307,3 +307,26 @@ ret void } +; single basic block loop +; because the loop exit condition is SLT, we can supplement the iv add +; (iv.next def) with an nsw. +; CHECK-LABEL: @test16( +define i32 @test16(i32* %n, i32* %a) { +preheader: + br label %loop + +loop: +; CHECK: %iv.next = add nsw i32 %iv, 1 + %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %preheader ], [ %acc.curr, %loop ] + %x = load atomic i32, i32* %a unordered, align 8 + fence acquire + %acc.curr = add i32 %acc, %x + %iv.next = add i32 %iv, 1 + %nval = load atomic i32, i32* %n unordered, align 8 + %cmp = icmp slt i32 %iv.next, %nval + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %acc.curr +} Index: llvm/trunk/test/Transforms/CorrelatedValuePropagation/ashr.ll =================================================================== --- llvm/trunk/test/Transforms/CorrelatedValuePropagation/ashr.ll +++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/ashr.ll @@ -54,3 +54,46 @@ exit: ret void } + +; looping case where loop has exactly one block +; at the point of ashr, we know that the operand is always greater than 0, +; because of the guard before it, so we can transform it to lshr. +declare void @llvm.experimental.guard(i1,...) +; CHECK-LABEL: @test4 +define void @test4(i32 %n) { +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %loop, label %exit + +loop: +; CHECK: lshr i32 %a, 1 + %a = phi i32 [ %n, %entry ], [ %shr, %loop ] + %cond = icmp sgt i32 %a, 2 + call void(i1,...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] + %shr = ashr i32 %a, 1 + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +; same test as above with assume instead of guard. +declare void @llvm.assume(i1) +; CHECK-LABEL: @test5 +define void @test5(i32 %n) { +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %loop, label %exit + +loop: +; CHECK: lshr i32 %a, 1 + %a = phi i32 [ %n, %entry ], [ %shr, %loop ] + %cond = icmp sgt i32 %a, 4 + call void @llvm.assume(i1 %cond) + %shr = ashr i32 %a, 1 + %loopcond = icmp sgt i32 %shr, 8 + br i1 %loopcond, label %loop, label %exit + +exit: + ret void +} Index: llvm/trunk/test/Transforms/CorrelatedValuePropagation/sdiv.ll =================================================================== --- llvm/trunk/test/Transforms/CorrelatedValuePropagation/sdiv.ll +++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/sdiv.ll @@ -52,3 +52,46 @@ exit: ret void } + +; looping case where loop has exactly one block +; at the point of sdiv, we know that %a is always greater than 0, +; because of the guard before it, so we can transform it to udiv. +declare void @llvm.experimental.guard(i1,...) +; CHECK-LABEL: @test4 +define void @test4(i32 %n) { +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %loop, label %exit + +loop: +; CHECK: udiv i32 %a, 6 + %a = phi i32 [ %n, %entry ], [ %div, %loop ] + %cond = icmp sgt i32 %a, 4 + call void(i1,...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] + %div = sdiv i32 %a, 6 + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +; same test as above with assume instead of guard. +declare void @llvm.assume(i1) +; CHECK-LABEL: @test5 +define void @test5(i32 %n) { +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %loop, label %exit + +loop: +; CHECK: udiv i32 %a, 6 + %a = phi i32 [ %n, %entry ], [ %div, %loop ] + %cond = icmp sgt i32 %a, 4 + call void @llvm.assume(i1 %cond) + %div = sdiv i32 %a, 6 + %loopcond = icmp sgt i32 %div, 8 + br i1 %loopcond, label %loop, label %exit + +exit: + ret void +} Index: llvm/trunk/test/Transforms/CorrelatedValuePropagation/srem.ll =================================================================== --- llvm/trunk/test/Transforms/CorrelatedValuePropagation/srem.ll +++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/srem.ll @@ -19,3 +19,26 @@ if.end: ret void } + +; looping case where loop has exactly one block +; at the point of srem, we know that %a is always greater than 0, +; because of the assume before it, so we can transform it to urem. +declare void @llvm.assume(i1) +; CHECK-LABEL: @test4 +define void @test4(i32 %n) { +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %loop, label %exit + +loop: +; CHECK: urem i32 %a, 6 + %a = phi i32 [ %n, %entry ], [ %rem, %loop ] + %cond = icmp sgt i32 %a, 4 + call void @llvm.assume(i1 %cond) + %rem = srem i32 %a, 6 + %loopcond = icmp sgt i32 %rem, 8 + br i1 %loopcond, label %loop, label %exit + +exit: + ret void +}