Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -79,6 +79,13 @@ cl::desc("Maximum number of elements in atomic memcpy the optimizer is " "allowed to unfold")); +static cl::opt GuardWideningWindow( + "instcombine-guard-widening-window", + cl::init(3), + cl::desc("How wide an instruction window to bypass looking for " + "another guard")); + + /// Return the specified type promoted as it would be to pass though a va_arg /// area. static Type *getPromotedType(Type *Ty) { @@ -3624,8 +3631,16 @@ } case Intrinsic::experimental_guard: { - // Is this guard followed by another guard? + // Is this guard followed by another guard? We scan forward over a small + // fixed window of instructions to handle common cases with conditions + // computed between guards. Instruction *NextInst = II->getNextNode(); + for (int i = 0; i < GuardWideningWindow; i++) { + // Note: Using context-free form to avoid compile time blow up + if (!isSafeToSpeculativelyExecute(NextInst)) + break; + NextInst = NextInst->getNextNode(); + } Value *NextCond = nullptr; if (match(NextInst, m_Intrinsic(m_Value(NextCond)))) { @@ -3636,6 +3651,12 @@ return eraseInstFromFunction(*NextInst); // Otherwise canonicalize guard(a); guard(b) -> guard(a & b). + Instruction* MoveI = II->getNextNode(); + while (MoveI != NextInst) { + auto *Temp = MoveI; + MoveI = MoveI->getNextNode(); + Temp->moveBefore(II); + } II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond)); return eraseInstFromFunction(*NextInst); } Index: llvm/trunk/test/Transforms/InstCombine/call-guard.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/call-guard.ll +++ llvm/trunk/test/Transforms/InstCombine/call-guard.ll @@ -30,3 +30,81 @@ call void(i1, ...) @llvm.experimental.guard( i1 %C, i32 789 )[ "deopt"() ] ret void } + +; This version tests for the common form where the conditions are +; between the guards +define void @test_guard_adjacent_diff_cond2(i32 %V1, i32 %V2) { +; CHECK-LABEL: @test_guard_adjacent_diff_cond2( +; CHECK-NEXT: %1 = and i32 %V1, %V2 +; CHECK-NEXT: %2 = icmp slt i32 %1, 0 +; CHECK-NEXT: %and = and i32 %V1, 255 +; CHECK-NEXT: %C = icmp ult i32 %and, 129 +; CHECK-NEXT: %3 = and i1 %2, %C +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %3, i32 123) [ "deopt"() ] +; CHECK-NEXT: ret void + %A = icmp slt i32 %V1, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ] + %B = icmp slt i32 %V2, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ] + %and = and i32 %V1, 255 + %C = icmp sle i32 %and, 128 + call void(i1, ...) @llvm.experimental.guard( i1 %C, i32 789 )[ "deopt"() ] + ret void +} + +; Might not be legal to hoist the load above the first guard since the +; guard might control dereferenceability +define void @negative_load(i32 %V1, i32* %P) { +; CHECK-LABEL: @negative_load +; CHECK: @llvm.experimental.guard +; CHECK: @llvm.experimental.guard + %A = icmp slt i32 %V1, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ] + %V2 = load i32, i32* %P + %B = icmp slt i32 %V2, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ] + ret void +} + +define void @deref_load(i32 %V1, i32* dereferenceable(4) %P) { +; CHECK-LABEL: @deref_load +; CHECK-NEXT: %V2 = load i32, i32* %P, align 4 +; CHECK-NEXT: %1 = and i32 %V2, %V1 +; CHECK-NEXT: %2 = icmp slt i32 %1, 0 +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %2, i32 123) [ "deopt"() ] + %A = icmp slt i32 %V1, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ] + %V2 = load i32, i32* %P + %B = icmp slt i32 %V2, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ] + ret void +} + +; The divide might fault above the guard +define void @negative_div(i32 %V1, i32 %D) { +; CHECK-LABEL: @negative_div +; CHECK: @llvm.experimental.guard +; CHECK: @llvm.experimental.guard + %A = icmp slt i32 %V1, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ] + %V2 = udiv i32 %V1, %D + %B = icmp slt i32 %V2, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ] + ret void +} + +; Highlight the limit of the window in a case which would otherwise be mergable +define void @negative_window(i32 %V1, i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @negative_window +; CHECK: @llvm.experimental.guard +; CHECK: @llvm.experimental.guard + %A = icmp slt i32 %V1, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ] + %V2 = add i32 %a, %b + %V3 = add i32 %V2, %c + %V4 = add i32 %V3, %d + %B = icmp slt i32 %V4, 0 + call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ] + ret void +} +