diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1270,6 +1270,26 @@ return false; } + /// Return true if MI is a memory op and it contains base operand, optional + /// index operand, scale. This function also calculates the total constant + /// address using both the base and the (optional) index operands. This is + /// recorded in ConstantAddr. We also record if the constant address is + /// calculated from the BaseReg (second operand of the pair + /// BaseRegUsedInConstAddr) and if the constant address is calculated from the + /// IndexReg (second operand of the IndexRegUsedInConstAddr). \p + /// IgnoreInstInAddrCalc contains the list of instructions which do not + /// contribute to the address calculation (even though they modify the base + /// and + /// index operands). + virtual bool getConstantAddrFromMemoryOp( + const MachineInstr &MI, int64_t &ConstantAddr, int64_t &Scale, + std::pair &BaseRegUsedInConstAddr, + std::pair &IndexRegUsedInConstAddr, + const TargetRegisterInfo *TRI, + const DenseSet &IgnoreInstInAddrCalc) const { + return false; + } + /// Returns true if MI's Def is NullValueReg, and the MI /// does not change the Zero value. i.e. cases such as rax = shr rax, X where /// NullValueReg = rax. Note that if the NullValueReg is non-zero, this diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -161,11 +161,23 @@ MachineInstr *getOnlyDependency() const { return OnlyDependency; } }; + // We can have multiple starting addresses for faulting pages, i.e. page zero + // need not be the only faulting page defined by the runtime. This is + // collected through an optional module level metadatade provided by + // front-ends. The values are expected to be positive or zero. + SmallVector StartAddressesOfFaultingPage; + const TargetInstrInfo *TII = nullptr; const TargetRegisterInfo *TRI = nullptr; AliasAnalysis *AA = nullptr; MachineFrameInfo *MFI = nullptr; + // Returns true if some non-zero page is protected. + bool canFaultInNonZeroPage() { + return StartAddressesOfFaultingPage.size() > 1 || + StartAddressesOfFaultingPage.front() != 0; + } + bool analyzeBlockForNullChecks(MachineBasicBlock &MBB, SmallVectorImpl &NullCheckList); MachineInstr *insertFaultingInstr(MachineInstr *MI, MachineBasicBlock *MBB, @@ -196,9 +208,10 @@ /// no sense to continue lookup due to any other instruction will not be able /// to be used. \p PrevInsts is the set of instruction seen since /// the explicit null check on \p PointerReg. - SuitabilityResult isSuitableMemoryOp(const MachineInstr &MI, - unsigned PointerReg, - ArrayRef PrevInsts); + SuitabilityResult + isSuitableMemoryOp(const MachineInstr &MI, unsigned PointerReg, + ArrayRef PrevInsts, + const DenseSet &InstrPreservesZeroReg); /// Returns true if \p DependenceMI can clobber the liveIns in NullSucc block /// if it was hoisted to the NullCheck block. This is used by caller @@ -307,6 +320,21 @@ MFI = &MF.getFrameInfo(); AA = &getAnalysis().getAAResults(); + // Populate the list of faulting page start addresses. + // If the module metadata is attached, populate using that, else consider only + // zeroth page to be the faulting page. + Module *M = MF.getFunction().getParent(); + StartAddressesOfFaultingPage.clear(); + if (auto *SAFP = M->getNamedMetadata("startaddress_faulting_pages")) { + for (MDNode *SANode : SAFP->operands()) { + uint64_t SA = + mdconst::extract(SANode->getOperand(0))->getZExtValue(); + assert(SA >= 0 && "expected positive starting address!"); + StartAddressesOfFaultingPage.push_back(SA); + } + } else + StartAddressesOfFaultingPage.push_back(0); + SmallVector NullCheckList; for (auto &MBB : MF) @@ -366,35 +394,69 @@ return AR_NoAlias; } -ImplicitNullChecks::SuitabilityResult -ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, - unsigned PointerReg, - ArrayRef PrevInsts) { - int64_t Offset; - bool OffsetIsScalable; - const MachineOperand *BaseOp; +ImplicitNullChecks::SuitabilityResult ImplicitNullChecks::isSuitableMemoryOp( + const MachineInstr &MI, unsigned PointerReg, + ArrayRef PrevInsts, + const DenseSet &InstrPreservesZeroReg) { + int64_t ConstantAddr; + std::pair BaseRegUsedInConstAddr; + std::pair IndexRegUsedInConstAddr; + int64_t Scale = 0; + if (!MI.mayLoadOrStore() || MI.isPredicable()) + return SR_Unsuitable; + + // Check whether this is a valid memory op and get the entire constant address + // (i.e. not just offset) for the memory op if we can. + if (!TII->getConstantAddrFromMemoryOp( + MI, ConstantAddr, Scale, BaseRegUsedInConstAddr, + IndexRegUsedInConstAddr, TRI, InstrPreservesZeroReg)) + return SR_Unsuitable; - // FIXME: This handles only simple addressing mode. - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) - return SR_Unsuitable; + const bool UsedBaseRegIsNotNullChecked = + BaseRegUsedInConstAddr.first && + BaseRegUsedInConstAddr.first != PointerReg; + const bool UsedIndexRegIsNotNullChecked = + Scale != 0 && IndexRegUsedInConstAddr.first && + IndexRegUsedInConstAddr.first != PointerReg; - // We need the base of the memory instruction to be same as the register - // where the null check is performed (i.e. PointerReg). - if (!BaseOp->isReg() || BaseOp->getReg() != PointerReg) + // At least one of base or index reg should have been null checked. + if (UsedBaseRegIsNotNullChecked && UsedIndexRegIsNotNullChecked) return SR_Unsuitable; - // Scalable offsets are a part of scalable vectors (SVE for AArch64). That - // target is in-practice unsupported for ImplicitNullChecks. - if (OffsetIsScalable) + // The register which is not null checked should be part of the constant + // address calculation, otherwise we do not know whether the ConstantAddress + // is just Offset + 0 or Offset + some symbolic address. + // This matters because we do not want to incorrectly assume that load from + // "Offset + some symbolic address" falls in the zeroth faulting page in the + // "sane offset check" below. + // + // Also, We do not care about getting the constant address through the + // register that is + // null checked. There are two reasons for this: + // 1. If the constant address calculation was done before null check, then the + // null check will not fail for that register (since it won't be null) + // 2. If *some modification* of the constant address should + // have occurred in the not_null block, we would have caught it earlier + // in the checks at analyzeBlockForNullChecks (see preservesZeroValueInReg). + if (UsedBaseRegIsNotNullChecked && !BaseRegUsedInConstAddr.second) return SR_Unsuitable; - if (!MI.mayLoadOrStore() || MI.isPredicable()) + if (UsedIndexRegIsNotNullChecked && !IndexRegUsedInConstAddr.second) return SR_Unsuitable; // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. - if (!(-PageSize < Offset && Offset < PageSize)) + if (!llvm::any_of(StartAddressesOfFaultingPage, + [ConstantAddr](uint64_t StartAddress) { + // Zeroth page is special cased when we have a negative + // offset (greater than -PageSize) and the access will + // fault. + return (StartAddress == 0 && ConstantAddr < 0 && + ConstantAddr > -PageSize) || + (ConstantAddr >= StartAddress && + ConstantAddr < StartAddress + PageSize); + })) return SR_Unsuitable; // Finally, check whether the current memory access aliases with previous one. @@ -606,13 +668,15 @@ // because some_cond is always true. SmallVector InstsSeenSoFar; + DenseSet InstrPreservesZeroReg; for (auto &MI : *NotNullSucc) { if (!canHandle(&MI) || InstsSeenSoFar.size() >= MaxInstsToConsider) return false; MachineInstr *Dependence; - SuitabilityResult SR = isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar); + SuitabilityResult SR = isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar, + InstrPreservesZeroReg); if (SR == SR_Impossible) return false; if (SR == SR_Suitable && @@ -626,6 +690,8 @@ // PointerReg if it was null, then we cannot move further. if (!TII->preservesZeroValueInReg(&MI, PointerReg, TRI)) return false; + if (MI.modifiesRegister(PointerReg, TRI)) + InstrPreservesZeroReg.insert(&MI); InstsSeenSoFar.push_back(&MI); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -317,6 +317,13 @@ SmallVectorImpl &Cond, bool AllowModify) const override; + bool getConstantAddrFromMemoryOp( + const MachineInstr &MI, int64_t &ConstantAddr, int64_t &Scale, + std::pair &BaseRegUsedInConstAddr, + std::pair &IndexRegUsedInConstAddr, + const TargetRegisterInfo *TRI, + const DenseSet &IgnoreInstInAddrCalc) const override; + bool preservesZeroValueInReg(const MachineInstr *MI, const Register NullValueReg, const TargetRegisterInfo *TRI) const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3663,6 +3663,84 @@ } } +bool X86InstrInfo::getConstantAddrFromMemoryOp( + const MachineInstr &MemI, int64_t &ConstantAddr, int64_t &Scale, + std::pair &BaseRegUsedInConstAddr, + std::pair &IndexRegUsedInConstAddr, + const TargetRegisterInfo *TRI, + const DenseSet &IgnoreInstInAddrCalc) const { + + const MCInstrDesc &Desc = MemI.getDesc(); + int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBegin < 0) + return false; + + MemRefBegin += X86II::getOperandBias(Desc); + + auto *BaseOp = &MemI.getOperand(MemRefBegin + X86::AddrBaseReg); + if (!BaseOp->isReg()) // Can be an MO_FrameIndex + return false; + auto BaseOpReg = BaseOp->getReg(); + BaseRegUsedInConstAddr = std::make_pair(BaseOpReg, false); + + const MachineOperand &DispMO = MemI.getOperand(MemRefBegin + X86::AddrDisp); + // Displacement can be symbolic + if (!DispMO.isImm()) + return false; + + ConstantAddr = DispMO.getImm(); + Scale = MemI.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm(); + auto *IndexOp = &MemI.getOperand(MemRefBegin + X86::AddrIndexReg); + auto IndexOpReg = IndexOp->getReg(); + IndexRegUsedInConstAddr = std::make_pair(IndexOpReg, false); + + // Check if there are other constant values that make up the address. + // TODO: Extend this beyond the current basic block because usually the + // immediate is constant hoisted into a dominator MBB and used in a + // MOV/OR/ADD. + // Returns true if we could safely update ConstantAddr. + auto UpdateAddrForComplexMode = [&](Register RegUsedInAddr, + int64_t multiplier) { + if (RegUsedInAddr == X86::NoRegister || !multiplier) + return; + auto It = std::next(MachineBasicBlock::const_reverse_iterator(&MemI)), + End = MemI.getParent()->rend(); + for (; It != End; It++) { + const MachineInstr *CurrMI = &*It; + if (CurrMI->modifiesRegister(RegUsedInAddr, TRI) && + !IgnoreInstInAddrCalc.count(CurrMI)) + break; + } + // No modifications. + if (It == End) + return; + + const MachineInstr *CurrMI = &*It; + // We have found the most recent def of the baseOp which is not in one + // of the forms required to check for immediate. + // We only support cases where we have a direct mov since that means all + // other previous values for that register has been invalidated. + // TODO: We can also support instructions of the form: rax = add rax, rcx + // where + // the *most recent* def of rcx is a mov and we know rax to be zero at the + // point where the add is done. In other words: + // mov rcx, 12345 <-- this could be in another dominating block. + // rax = add rax, rcx + if (CurrMI->getOpcode() != X86::MOV32ri && + CurrMI->getOpcode() != X86::MOV64ri) + return; + ConstantAddr += CurrMI->getOperand(1).getImm() * multiplier; + if (RegUsedInAddr == BaseOpReg) + BaseRegUsedInConstAddr.second = true; + else + IndexRegUsedInConstAddr.second = true; + }; + + UpdateAddrForComplexMode(BaseOpReg, 1); + UpdateAddrForComplexMode(IndexOpReg, Scale); + return true; +} + bool X86InstrInfo::preservesZeroValueInReg( const MachineInstr *MI, const Register NullValueReg, const TargetRegisterInfo *TRI) const { diff --git a/llvm/test/CodeGen/X86/implicit-null-check-negative.ll b/llvm/test/CodeGen/X86/implicit-null-check-negative.ll --- a/llvm/test/CodeGen/X86/implicit-null-check-negative.ll +++ b/llvm/test/CodeGen/X86/implicit-null-check-negative.ll @@ -129,4 +129,7 @@ %t = load i64, i64* %x.loc ret i64 %t } +!startaddress_faulting_pages = !{!1, !2} !0 = !{} +!1 = !{i64 0} +!2 = !{i64 35184372088832} diff --git a/llvm/test/CodeGen/X86/implicit-null-check.ll b/llvm/test/CodeGen/X86/implicit-null-check.ll --- a/llvm/test/CodeGen/X86/implicit-null-check.ll +++ b/llvm/test/CodeGen/X86/implicit-null-check.ll @@ -593,14 +593,12 @@ ; Same as imp_null_check_load_shift_addr but shift is by 3 and this is now ; converted into complex addressing. -; TODO: Can be converted into implicit null check define i64 @imp_null_check_load_shift_by_3_addr(i64* %x) { ; CHECK-LABEL: imp_null_check_load_shift_by_3_addr: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: je LBB22_1 +; CHECK-NEXT: Ltmp18: +; CHECK-NEXT: movq 8(,%rdi,8), %rax ## on-fault: LBB22_1 ; CHECK-NEXT: ## %bb.2: ## %not_null -; CHECK-NEXT: movq 8(,%rdi,8), %rax ; CHECK-NEXT: retq ; CHECK-NEXT: LBB22_1: ## %is_null ; CHECK-NEXT: movl $42, %eax @@ -621,4 +619,7 @@ %t = load i64, i64* %x.loc ret i64 %t } +!startaddress_faulting_pages = !{!1, !2} !0 = !{} +!1 = !{i64 0} +!2 = !{i64 35184372088832} diff --git a/llvm/test/CodeGen/X86/implicit-null-checks.mir b/llvm/test/CodeGen/X86/implicit-null-checks.mir --- a/llvm/test/CodeGen/X86/implicit-null-checks.mir +++ b/llvm/test/CodeGen/X86/implicit-null-checks.mir @@ -4,6 +4,8 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx" + ;; Just to avoid confusion here - none of these IR actually correspond to MIR test cases below (other than the names and basic blocks). + ;; We need those since we rely on the make.implicit metadata in the pass. ;; Positive test define i32 @imp_null_check_with_bitwise_op_0(i32* %x, i32 %val) { entry: @@ -377,9 +379,51 @@ ret i32 undef } + define i32 @load_from_non_zero_faulting_page(i32* %ptr, i32 %val) { + entry: + %ptr_is_null = icmp eq i32* %ptr, null + br i1 %ptr_is_null, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + br i1 undef, label %ret_100, label %ret_200 + + ret_100: + ret i32 100 + + ret_200: + ret i32 200 + } + + + define i32 @load_from_non_zero_faulting_page_negative_test(i32* %ptr, i32 %val) { + entry: + %ptr_is_null = icmp eq i32* %ptr, null + br i1 %ptr_is_null, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + br i1 undef, label %ret_100, label %ret_200 + + ret_100: + ret i32 100 + + ret_200: + ret i32 200 + } + attributes #0 = { "target-features"="+bmi,+bmi2" } + !startaddress_faulting_pages = !{!1, !2} !0 = !{} + !1 = !{i64 0} + !2 = !{i64 35184372088832} + attributes #0 = { "target-features"="+bmi,+bmi2" } + ... --- name: imp_null_check_with_bitwise_op_0 @@ -1316,3 +1360,88 @@ RETQ $eax ... +--- +name: load_from_non_zero_faulting_page +# This is a positive test since the address for the memory load is within +# the valid regions known to be faulting (according to the +# startaddress_faulting_pages metadata). +# TODO: General IR test cases will be added once the dependence tracking handles +# more than one dependence in implicit null checks pass. +# CHECK-LABEL: name: load_from_non_zero_faulting_page +# CHECK: bb.0.entry: +# CHECK: renamable $rdx = MOV64ri 35184372088832 +# CHECK: $ecx = FAULTING_OP 1, %bb.3, 1724, renamable $rdx, 8, renamable $rax, 4, $noreg +# CHECK: JMP_1 %bb.1 +# CHECK: bb.1.not_null: +# CHECK: CMP32ri killed renamable $ecx, 1195, implicit-def $eflags +# CHECK: JCC_1 %bb.4, 4, implicit $eflags +liveins: + - { reg: '$rax' } + - { reg: '$rbx' } + +body: | + bb.0.entry: + liveins: $rax, $rbx + TEST64rr renamable $rax, renamable $rax, implicit-def $eflags + JCC_1 %bb.3, 4, implicit $eflags + + bb.1.not_null: + liveins: $rax, $rbx + + renamable $rdx = MOV64ri 35184372088832 + renamable $ecx = MOV32rm renamable $rdx, 8, renamable $rax, 4, $noreg + CMP32ri killed renamable $ecx, 1195, implicit-def $eflags + JCC_1 %bb.4, 4, implicit $eflags + + bb.2.ret_200: + $eax = MOV32ri 200 + RETQ $eax + + bb.3.is_null: + $eax = MOV32ri 42 + RETQ $eax + + bb.4.ret_100: + $eax = MOV32ri 100 + RETQ $eax + +... +--- +name: load_from_non_zero_faulting_page_negative_test +# This is a negative test since the address for the memory load is not within +# the valid regions known to be faulting (according to the +# startaddress_faulting_pages metadata). + +# CHECK-LABEL: name: load_from_non_zero_faulting_page_negative_test +# CHECK: bb.0.entry: +# CHECK: TEST64rr renamable $rax, renamable $rax, implicit-def $eflags +# CHECK-NOT: FAULTING_OP +liveins: + - { reg: '$rax' } + - { reg: '$rbx' } + +body: | + bb.0.entry: + liveins: $rax, $rbx + TEST64rr renamable $rax, renamable $rax, implicit-def $eflags + JCC_1 %bb.3, 4, implicit $eflags + + bb.1.not_null: + liveins: $rax, $rbx + + renamable $rdx = MOV64ri 351843720 + renamable $ecx = MOV32rm renamable $rdx, 8, renamable $rax, 4, $noreg + CMP32ri killed renamable $ecx, 1195, implicit-def $eflags + JCC_1 %bb.4, 4, implicit $eflags + + bb.2.ret_200: + $eax = MOV32ri 200 + RETQ $eax + + bb.3.is_null: + $eax = MOV32ri 42 + RETQ $eax + + bb.4.ret_100: + $eax = MOV32ri 100 + RETQ $eax