Index: lib/CodeGen/ImplicitNullChecks.cpp =================================================================== --- lib/CodeGen/ImplicitNullChecks.cpp +++ lib/CodeGen/ImplicitNullChecks.cpp @@ -25,10 +25,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -177,6 +179,9 @@ // callq throw_NullPointerException // // LblNotNull: + // Inst0 + // Inst1 + // ... // Def = Load (%RAX + ) // ... // @@ -187,6 +192,8 @@ // jmp LblNotNull ;; explicit or fallthrough // // LblNotNull: + // Inst0 + // Inst1 // ... // // LblNull: @@ -194,16 +201,83 @@ // unsigned PointerReg = MBP.LHS.getReg(); - MachineInstr *MemOp = &*NotNullSucc->begin(); - unsigned BaseReg, Offset; - if (TII->getMemOpBaseRegImmOfs(MemOp, BaseReg, Offset, TRI)) - if (MemOp->mayLoad() && !MemOp->isPredicable() && BaseReg == PointerReg && - Offset < PageSize && MemOp->getDesc().getNumDefs() == 1) { - NullCheckList.emplace_back(MemOp, MBP.ConditionDef, &MBB, NotNullSucc, - NullSucc); - return true; + + // As we scan NotNullSucc for a suitable load instruction, we keep track of + // the registers defined and used by the instructions we scan past. This bit + // of information lets us decide if it is legal to reorder the load + // instruction we find (if we do find such an instruction) to before + // NotNullSucc. + DenseSet RegDefs, RegUses; + + // Returns true if it is safe to reorder MI to before NotNullSucc. + auto IsSafeToReorder = [&](MachineInstr *MI) { + // Right now we don't want to worry about LLVM's memory model. This can be + // made more precise later. + for (auto *MMO : MI->memoperands()) + if (!MMO->isUnordered()) + return false; + + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.getReg()) { + for (unsigned Reg : RegDefs) + if (TRI->regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-write or read-after-write + + if (MO.isDef()) + for (unsigned Reg : RegUses) + if (TRI->regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-read + } } + return true; + }; + + for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE; + ++MII) { + MachineInstr *MI = &*MII; + unsigned BaseReg, Offset; + if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg && + Offset < PageSize && MI->getDesc().getNumDefs() == 1 && + IsSafeToReorder(MI)) { + NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc, + NullSucc); + return true; + } + + // MI did not match our criteria for conversion to a trapping load. Check + // if we can continue looking. + + if (MI->mayStore() || MI->hasUnmodeledSideEffects()) + return false; + + for (auto *MMO : MI->memoperands()) + // Right now we don't want to worry about LLVM's memory model. + if (!MMO->isUnordered()) + return false; + + // It _may_ be okay to reorder a later load instruction across MI. Make a + // note of its operands so that we can make the legality check if we find a + // suitable load instruction: + + auto RecordOperand = [&](const MachineOperand &MO) { + if (!MO.isReg() || !MO.getReg()) + return; + + if (MO.isDef()) + RegDefs.insert(MO.getReg()); + else + RegUses.insert(MO.getReg()); + }; + + for (auto &MO : MI->defs()) + RecordOperand(MO); + + for (auto &IMO : MI->implicit_operands()) + RecordOperand(IMO); + } + return false; } Index: test/CodeGen/X86/implicit-null-check-negative.ll =================================================================== --- test/CodeGen/X86/implicit-null-check-negative.ll +++ test/CodeGen/X86/implicit-null-check-negative.ll @@ -51,4 +51,46 @@ ret i32 %t } +define i32 @imp_null_check_no_hoist_over_acquire_load(i32* %x, i32* %y) { +; We cannot hoist %t1 over %t0 since %t0 is an acquire load + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t0 = load atomic i32, i32* %y acquire, align 4 + %t1 = load i32, i32* %x + %p = add i32 %t0, %t1 + ret i32 %p +} + +define i32 @imp_null_check_add_result(i32* %x, i32* %y) { +; This will codegen to: +; +; movl (%rsi), %eax +; addl (%rdi), %eax +; +; The load instruction we wish to hoist is the addl, but there is a +; write-after-write hazard preventing that from happening. We could +; get fancy here and exploit the commutativity of addition, but right +; now -implicit-null-checks isn't that smart. +; + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t0 = load i32, i32* %y + %t1 = load i32, i32* %x + %p = add i32 %t0, %t1 + ret i32 %p +} + !0 = !{} Index: test/CodeGen/X86/implicit-null-check.ll =================================================================== --- test/CodeGen/X86/implicit-null-check.ll +++ test/CodeGen/X86/implicit-null-check.ll @@ -76,6 +76,31 @@ ret i32 %p1 } +define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z) { +; CHECK-LABEL: _imp_null_check_hoist_over_unrelated_load: +; CHECK: Ltmp7: +; CHECK: movl (%rdi), %eax +; CHECK: movl (%rsi), %ecx +; CHECK: movl %ecx, (%rdx) +; CHECK: retq +; CHECK: Ltmp6: +; CHECK: movl $42, %eax +; CHECK: retq + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t0 = load i32, i32* %y + %t1 = load i32, i32* %x + store i32 %t0, i32* %z + ret i32 %t1 +} + !0 = !{} ; CHECK-LABEL: __LLVM_FaultMaps: @@ -88,7 +113,7 @@ ; CHECK-NEXT: .short 0 ; # functions: -; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long 4 ; FunctionAddr: ; CHECK-NEXT: .quad _imp_null_check_add_result @@ -117,6 +142,19 @@ ; CHECK-NEXT: .long Ltmp2-_imp_null_check_gep_load ; FunctionAddr: +; CHECK-NEXT: .quad _imp_null_check_hoist_over_unrelated_load +; NumFaultingPCs +; CHECK-NEXT: .long 1 +; Reserved: +; CHECK-NEXT: .long 0 +; Fault[0].Type: +; CHECK-NEXT: .long 1 +; Fault[0].FaultOffset: +; CHECK-NEXT: .long Ltmp7-_imp_null_check_hoist_over_unrelated_load +; Fault[0].HandlerOffset: +; CHECK-NEXT: .long Ltmp6-_imp_null_check_hoist_over_unrelated_load + +; FunctionAddr: ; CHECK-NEXT: .quad _imp_null_check_load ; NumFaultingPCs ; CHECK-NEXT: .long 1 @@ -131,10 +169,12 @@ ; OBJDUMP: FaultMap table: ; OBJDUMP-NEXT: Version: 0x1 -; OBJDUMP-NEXT: NumFunctions: 3 +; OBJDUMP-NEXT: NumFunctions: 4 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 5 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 7 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 +; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 7 +; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 3