Index: llvm/trunk/lib/CodeGen/ImplicitNullChecks.cpp =================================================================== --- llvm/trunk/lib/CodeGen/ImplicitNullChecks.cpp +++ llvm/trunk/lib/CodeGen/ImplicitNullChecks.cpp @@ -253,7 +253,7 @@ unsigned RegB = MOB.getReg(); - if (TRI->regsOverlap(RegA, RegB)) + if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef())) return false; } } @@ -310,7 +310,7 @@ // lookup due to this condition will fail for any further instruction. for (auto *PrevMI : PrevInsts) for (auto &PrevMO : PrevMI->operands()) - if (PrevMO.isReg() && PrevMO.getReg() && + if (PrevMO.isReg() && PrevMO.getReg() && PrevMO.isDef() && TRI->regsOverlap(PrevMO.getReg(), PointerReg)) return SR_Impossible; @@ -367,7 +367,8 @@ // The Dependency can't be re-defining the base register -- then we won't // get the memory operation on the address we want. This is already // checked in \c IsSuitableMemoryOp. - assert(!TRI->regsOverlap(DependenceMO.getReg(), PointerReg) && + assert(!(DependenceMO.isDef() && + TRI->regsOverlap(DependenceMO.getReg(), PointerReg)) && "Should have been checked before!"); } Index: llvm/trunk/test/CodeGen/X86/implicit-null-check.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/implicit-null-check.ll +++ llvm/trunk/test/CodeGen/X86/implicit-null-check.ll @@ -135,6 +135,33 @@ ret i32 200 } +define i32 @imp_null_check_gep_load_with_use_dep(i32* %x, i32 %a) { +; CHECK-LABEL: imp_null_check_gep_load_with_use_dep: +; CHECK: [[BB0_imp_null_check_gep_load_with_use_dep:L[^:]+]]: +; CHECK: movl (%rdi), %eax +; CHECK: addl %edi, %esi +; CHECK: leal 4(%rax,%rsi), %eax +; CHECK: retq +; CHECK: [[BB1_imp_null_check_gep_load_with_use_dep:LBB5_[0-9]+]]: +; CHECK: movl $42, %eax +; CHECK: retq + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %x.loc = getelementptr i32, i32* %x, i32 1 + %y = ptrtoint i32* %x.loc to i32 + %b = add i32 %a, %y + %t = load i32, i32* %x + %z = add i32 %t, %b + ret i32 %z +} + !0 = !{} ; CHECK-LABEL: __LLVM_FaultMaps: @@ -147,7 +174,7 @@ ; CHECK-NEXT: .short 0 ; # functions: -; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 6 ; FunctionAddr: ; CHECK-NEXT: .quad _imp_null_check_add_result @@ -176,6 +203,19 @@ ; CHECK-NEXT: .long [[BB1_imp_null_check_gep_load]]-_imp_null_check_gep_load ; FunctionAddr: +; CHECK-NEXT: .quad _imp_null_check_gep_load_with_use_dep +; NumFaultingPCs +; CHECK-NEXT: .long 1 +; Reserved: +; CHECK-NEXT: .long 0 +; Fault[0].Type: +; CHECK-NEXT: .long 1 +; Fault[0].FaultOffset: +; CHECK-NEXT: .long [[BB0_imp_null_check_gep_load_with_use_dep]]-_imp_null_check_gep_load_with_use_dep +; Fault[0].HandlerOffset: +; CHECK-NEXT: .long [[BB1_imp_null_check_gep_load_with_use_dep]]-_imp_null_check_gep_load_with_use_dep + +; FunctionAddr: ; CHECK-NEXT: .quad _imp_null_check_hoist_over_unrelated_load ; NumFaultingPCs ; CHECK-NEXT: .long 1 @@ -216,12 +256,14 @@ ; OBJDUMP: FaultMap table: ; OBJDUMP-NEXT: Version: 0x1 -; OBJDUMP-NEXT: NumFunctions: 5 +; OBJDUMP-NEXT: NumFunctions: 6 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 5 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 7 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 +; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 9 +; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 7 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 3 Index: llvm/trunk/test/CodeGen/X86/implicit-null-checks.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/implicit-null-checks.mir +++ llvm/trunk/test/CodeGen/X86/implicit-null-checks.mir @@ -145,6 +145,35 @@ attributes #0 = { "target-features"="+bmi,+bmi2" } + define i32 @imp_null_check_gep_load_with_use_dep(i32* %x, i32 %a) { + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: ; preds = %entry + ret i32 42 + + not_null: ; preds = %entry + %x.loc = getelementptr i32, i32* %x, i32 1 + %y = ptrtoint i32* %x.loc to i32 + %b = add i32 %a, %y + %t = load i32, i32* %x + %z = add i32 %t, %b + ret i32 %z + } + + define i32 @imp_null_check_load_with_base_sep(i32* %x, i32 %a) { + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: ; preds = %entry + ret i32 42 + + not_null: ; preds = %entry + ret i32 undef + } + !0 = !{} ... --- @@ -447,8 +476,8 @@ name: use_alternate_load_op # CHECK-LABEL: use_alternate_load_op # CHECK: bb.0.entry: -# CHECK: TEST64rr %rdi, %rdi, implicit-def %eflags -# CHECK-NEXT: JE_1 %bb.2.is_null, implicit killed %eflags +# CHECK: %r10 = FAULTING_LOAD_OP %bb.2.is_null, {{[0-9]+}}, killed %rdi, 1, _, 0, _ +# CHECK-NEXT: JMP_1 %bb.1.not_null # CHECK: bb.1.not_null alignment: 4 @@ -477,3 +506,66 @@ RETQ %eax ... +--- +name: imp_null_check_gep_load_with_use_dep +# CHECK: bb.0.entry: +# CHECK: %eax = FAULTING_LOAD_OP %bb.2.is_null, {{[0-9]+}}, killed %rdi, 1, _, 0, _, implicit-def %rax :: (load 4 from %ir.x) +# CHECK-NEXT: JMP_1 %bb.1.not_null +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '%rdi' } + - { reg: '%rsi' } +body: | + bb.0.entry: + successors: %bb.1.is_null(0x30000000), %bb.2.not_null(0x50000000) + liveins: %rsi, %rdi + + TEST64rr %rdi, %rdi, implicit-def %eflags + JE_1 %bb.1.is_null, implicit %eflags + + bb.2.not_null: + liveins: %rdi, %rsi + + %rsi = ADD64rr %rsi, %rdi, implicit-def dead %eflags + %eax = MOV32rm killed %rdi, 1, _, 0, _, implicit-def %rax :: (load 4 from %ir.x) + %eax = LEA64_32r killed %rax, 1, killed %rsi, 4, _ + RETQ %eax + + bb.1.is_null: + %eax = MOV32ri 42 + RETQ %eax + +... +--- +name: imp_null_check_load_with_base_sep +# CHECK: bb.0.entry: +# CHECK: %rsi = ADD64rr %rsi, %rdi, implicit-def dead %eflags +# CHECK-NEXT: %esi = FAULTING_LOAD_OP %bb.2.is_null, {{[0-9]+}}, killed %esi, %rdi, 1, _, 0, _, implicit-def dead %eflags +# CHECK-NEXT: JMP_1 %bb.1.not_null +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '%rdi' } + - { reg: '%rsi' } +body: | + bb.0.entry: + successors: %bb.1.is_null(0x30000000), %bb.2.not_null(0x50000000) + liveins: %rsi, %rdi + + TEST64rr %rdi, %rdi, implicit-def %eflags + JE_1 %bb.1.is_null, implicit %eflags + + bb.2.not_null: + liveins: %rdi, %rsi + + %rsi = ADD64rr %rsi, %rdi, implicit-def dead %eflags + %esi = AND32rm killed %esi, %rdi, 1, _, 0, _, implicit-def dead %eflags + %eax = MOV32rr %esi + RETQ %eax + + bb.1.is_null: + %eax = MOV32ri 42 + RETQ %eax + +...