diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -507,25 +507,31 @@ Next = std::next(J); MachineInstr &MI = *J; - if (MI.isConditionalBranch()) { - MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); - if (!isBlockInRange(MI, *DestBB)) { - if (Next != MBB.end() && Next->isConditionalBranch()) { - // If there are multiple conditional branches, this isn't an - // analyzable block. Split later terminators into a new block so - // each one will be analyzable. - - splitBlockBeforeInstr(*Next, DestBB); - } else { - fixupConditionalBranch(MI); - ++NumConditionalRelaxed; - } + if (!MI.isConditionalBranch()) + continue; + + if (MI.getOpcode() == TargetOpcode::FAULTING_OP) + // FAULTING_OP's destination is not encoded in the instruction stream + // and thus never needs relaxed. + continue; + + MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); + if (!isBlockInRange(MI, *DestBB)) { + if (Next != MBB.end() && Next->isConditionalBranch()) { + // If there are multiple conditional branches, this isn't an + // analyzable block. Split later terminators into a new block so + // each one will be analyzable. + + splitBlockBeforeInstr(*Next, DestBB); + } else { + fixupConditionalBranch(MI); + ++NumConditionalRelaxed; + } - Changed = true; + Changed = true; - // This may have modified all of the terminators, so start over. - Next = MBB.getFirstTerminator(); - } + // This may have modified all of the terminators, so start over. + Next = MBB.getFirstTerminator(); } } } diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -373,10 +373,14 @@ bool OffsetIsScalable; const MachineOperand *BaseOp; + // Implementation restriction for faulting_op insertion + // TODO: This could be relaxed if we find a test case which warrants it. + if (MI.getDesc().getNumDefs() > 1) + return SR_Unsuitable; // FIXME: This handles only simple addressing mode. if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) - return SR_Unsuitable; + return SR_Unsuitable; // We need the base of the memory instruction to be same as the register // where the null check is performed (i.e. PointerReg). @@ -502,9 +506,9 @@ MBP.Predicate == MachineBranchPredicate::PRED_EQ))) return false; - // If we cannot erase the test instruction itself, then making the null check - // implicit does not buy us much. - if (!MBP.SingleUseCondition) + // If there is a separate condition generation instruction, we chose not to + // transform unless we can remove both condition and consuming branch. + if (MBP.ConditionDef && !MBP.SingleUseCondition) return false; MachineBasicBlock *NotNullSucc, *NullSucc; @@ -522,32 +526,34 @@ if (NotNullSucc->pred_size() != 1) return false; - // To prevent the invalid transformation of the following code: - // - // mov %rax, %rcx - // test %rax, %rax - // %rax = ... - // je throw_npe - // mov(%rcx), %r9 - // mov(%rax), %r10 - // - // into: - // - // mov %rax, %rcx - // %rax = .... - // faulting_load_op("movl (%rax), %r10", throw_npe) - // mov(%rcx), %r9 - // - // we must ensure that there are no instructions between the 'test' and - // conditional jump that modify %rax. const Register PointerReg = MBP.LHS.getReg(); - assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block"); - - for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I) - if (I->modifiesRegister(PointerReg, TRI)) - return false; + if (MBP.ConditionDef) { + // To prevent the invalid transformation of the following code: + // + // mov %rax, %rcx + // test %rax, %rax + // %rax = ... + // je throw_npe + // mov(%rcx), %r9 + // mov(%rax), %r10 + // + // into: + // + // mov %rax, %rcx + // %rax = .... + // faulting_load_op("movl (%rax), %r10", throw_npe) + // mov(%rcx), %r9 + // + // we must ensure that there are no instructions between the 'test' and + // conditional jump that modify %rax. + assert(MBP.ConditionDef->getParent() == &MBB && + "Should be in basic block"); + for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I) + if (I->modifiesRegister(PointerReg, TRI)) + return false; + } // Starting with a code fragment like: // // test %rax, %rax @@ -726,9 +732,11 @@ } NC.getMemOperation()->eraseFromParent(); - NC.getCheckOperation()->eraseFromParent(); + if (auto *CheckOp = NC.getCheckOperation()) + CheckOp->eraseFromParent(); - // Insert an *unconditional* branch to not-null successor. + // Insert an *unconditional* branch to not-null successor - we expect + // block placement to remove fallthroughs later. TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr, /*Cond=*/None, DL); diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -32,6 +32,7 @@ #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/FaultMaps.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -69,12 +70,13 @@ class AArch64AsmPrinter : public AsmPrinter { AArch64MCInstLower MCInstLowering; StackMaps SM; + FaultMaps FM; const AArch64Subtarget *STI; public: AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this), - SM(*this) {} + SM(*this), FM(*this) {} StringRef getPassName() const override { return "AArch64 Assembly Printer"; } @@ -97,6 +99,7 @@ const MachineInstr &MI); void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); + void LowerFAULTING_OP(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); @@ -524,7 +527,11 @@ // generates code that does this, it is always safe to set. OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + + // Emit stack and fault map information. emitStackMaps(SM); + FM.serializeToFaultMapSection(); + } void AArch64AsmPrinter::EmitLOHs() { @@ -970,6 +977,42 @@ SM.recordStatepoint(*MILabel, MI); } +void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) { + // FAULTING_LOAD_OP , , , + // , + + Register DefRegister = FaultingMI.getOperand(0).getReg(); + FaultMaps::FaultKind FK = + static_cast(FaultingMI.getOperand(1).getImm()); + MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); + unsigned Opcode = FaultingMI.getOperand(3).getImm(); + unsigned OperandsBeginIdx = 4; + + auto &Ctx = OutStreamer->getContext(); + MCSymbol *FaultingLabel = Ctx.createTempSymbol(); + OutStreamer->emitLabel(FaultingLabel); + + assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); + FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); + + MCInst MI; + MI.setOpcode(Opcode); + + if (DefRegister != (Register)0) + MI.addOperand(MCOperand::createReg(DefRegister)); + + for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx, + E = FaultingMI.operands_end(); + I != E; ++I) { + MCOperand Dest; + lowerOperand(*I, Dest); + MI.addOperand(Dest); + } + + OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); + OutStreamer->emitInstruction(MI, getSubtargetInfo()); +} + void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { Register DestReg = MI.getOperand(0).getReg(); if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) { @@ -1254,6 +1297,9 @@ case TargetOpcode::STATEPOINT: return LowerSTATEPOINT(*OutStreamer, SM, *MI); + case TargetOpcode::FAULTING_OP: + return LowerFAULTING_OP(*MI); + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: LowerPATCHABLE_FUNCTION_ENTER(*MI); return; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -188,6 +188,9 @@ MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify = false) const override; + bool analyzeBranchPredicate(MachineBasicBlock &MBB, + MachineBranchPredicate &MBP, + bool AllowModify) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -328,6 +328,56 @@ return true; } +bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, + MachineBranchPredicate &MBP, + bool AllowModify) const { + // For the moment, handle only a block which ends with a cb(n)zx followed by + // a fallthrough. Why this? Because it is a common form. + // TODO: Should we handle b.cc? + + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return true; + + // Skip over SpeculationBarrierEndBB terminators + if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || + I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { + --I; + } + + if (!isUnpredicatedTerminator(*I)) + return true; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + if (!isCondBranchOpcode(LastOpc)) + return true; + + switch (LastOpc) { + default: + return true; + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + break; + }; + + MBP.TrueDest = LastInst->getOperand(1).getMBB(); + assert(MBP.TrueDest && "expected!"); + MBP.FalseDest = MBB.getNextNode(); + + MBP.ConditionDef = nullptr; + MBP.SingleUseCondition = false; + + MBP.LHS = LastInst->getOperand(0); + MBP.RHS = MachineOperand::CreateImm(0); + MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE + : MachineBranchPredicate::PRED_EQ; + return false; +} + bool AArch64InstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { if (Cond[0].getImm() != -1) { diff --git a/llvm/test/CodeGen/AArch64/implicit-null-check.ll b/llvm/test/CodeGen/AArch64/implicit-null-check.ll --- a/llvm/test/CodeGen/AArch64/implicit-null-check.ll +++ b/llvm/test/CodeGen/AArch64/implicit-null-check.ll @@ -5,15 +5,14 @@ ; file with the same name in the X86 tree, but adjusted to remove patterns ; related to memory folding of arithmetic (since aarch64 doesn't), and add ; a couple of aarch64 specific tests. -; NOTE: Currently negative tests as these are being precommitted before -; the changes to enable. define i32 @imp_null_check_load_fallthrough(i32* %x) { ; CHECK-LABEL: imp_null_check_load_fallthrough: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB0_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: ldr w0, [x0] // on-fault: .LBB0_2 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_1: // %not_null ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %is_null ; CHECK-NEXT: mov w0, #42 @@ -34,9 +33,10 @@ define i32 @imp_null_check_load_reorder(i32* %x) { ; CHECK-LABEL: imp_null_check_load_reorder: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB1_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: ldr w0, [x0] // on-fault: .LBB1_2 +; CHECK-NEXT: b .LBB1_1 +; CHECK-NEXT: .LBB1_1: // %not_null ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %is_null ; CHECK-NEXT: mov w0, #42 @@ -56,9 +56,10 @@ define i32 @imp_null_check_unordered_load(i32* %x) { ; CHECK-LABEL: imp_null_check_unordered_load: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB2_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ldr w0, [x0] // on-fault: .LBB2_2 +; CHECK-NEXT: b .LBB2_1 +; CHECK-NEXT: .LBB2_1: // %not_null ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_2: // %is_null ; CHECK-NEXT: mov w0, #42 @@ -76,6 +77,8 @@ } +; TODO: Can be converted into implicit check. +;; Probably could be implicit, but we're conservative for now define i32 @imp_null_check_seq_cst_load(i32* %x) { ; CHECK-LABEL: imp_null_check_seq_cst_load: ; CHECK: // %bb.0: // %entry @@ -125,9 +128,10 @@ define i8 @imp_null_check_load_i8(i8* %x) { ; CHECK-LABEL: imp_null_check_load_i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB5_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldrb w0, [x0] +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: ldrb w0, [x0] // on-fault: .LBB5_2 +; CHECK-NEXT: b .LBB5_1 +; CHECK-NEXT: .LBB5_1: // %not_null ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_2: // %is_null ; CHECK-NEXT: mov w0, #42 @@ -176,9 +180,10 @@ define i32 @imp_null_check_gep_load(i32* %x) { ; CHECK-LABEL: imp_null_check_gep_load: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB7_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldr w0, [x0, #128] +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: ldr w0, [x0, #128] // on-fault: .LBB7_2 +; CHECK-NEXT: b .LBB7_1 +; CHECK-NEXT: .LBB7_1: // %not_null ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB7_2: // %is_null ; CHECK-NEXT: mov w0, #42 @@ -199,9 +204,10 @@ define i32 @imp_null_check_add_result(i32* %x, i32 %p) { ; CHECK-LABEL: imp_null_check_add_result: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB8_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: ldr w8, [x0] // on-fault: .LBB8_2 +; CHECK-NEXT: b .LBB8_1 +; CHECK-NEXT: .LBB8_1: // %not_null ; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_2: // %is_null @@ -225,9 +231,10 @@ define i32 @imp_null_check_hoist_over_udiv(i32* %x, i32 %a, i32 %b) { ; CHECK-LABEL: imp_null_check_hoist_over_udiv: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB9_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: ldr w8, [x0] // on-fault: .LBB9_2 +; CHECK-NEXT: b .LBB9_1 +; CHECK-NEXT: .LBB9_1: // %not_null ; CHECK-NEXT: udiv w9, w1, w2 ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret @@ -249,6 +256,8 @@ } +; TODO: We should be able to hoist this - we can on x86, why isn't this +; working for aarch64? Aliasing? define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z) { ; CHECK-LABEL: imp_null_check_hoist_over_unrelated_load: ; CHECK: // %bb.0: // %entry @@ -278,9 +287,10 @@ define i32 @imp_null_check_gep_load_with_use_dep(i32* %x, i32 %a) { ; CHECK-LABEL: imp_null_check_gep_load_with_use_dep: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB11_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: ldr w8, [x0] // on-fault: .LBB11_2 +; CHECK-NEXT: b .LBB11_1 +; CHECK-NEXT: .LBB11_1: // %not_null ; CHECK-NEXT: add w9, w0, w1 ; CHECK-NEXT: add w8, w9, w8 ; CHECK-NEXT: add w0, w8, #4 // =4 @@ -304,6 +314,8 @@ ret i32 %z } +;; TODO: We could handle this case as we can lift the fence into the +;; previous block before the conditional without changing behavior. define i32 @imp_null_check_load_fence1(i32* %x) { ; CHECK-LABEL: imp_null_check_load_fence1: ; CHECK: // %bb.0: // %entry @@ -328,6 +340,8 @@ ret i32 %t } +;; TODO: We could handle this case as we can lift the fence into the +;; previous block before the conditional without changing behavior. define i32 @imp_null_check_load_fence2(i32* %x) { ; CHECK-LABEL: imp_null_check_load_fence2: ; CHECK: // %bb.0: // %entry @@ -352,6 +366,7 @@ ret i32 %t } +; TODO: We can fold to implicit null here, not sure why this isn't working define void @imp_null_check_store(i32* %x) { ; CHECK-LABEL: imp_null_check_store: ; CHECK: // %bb.0: // %entry @@ -374,6 +389,7 @@ ret void } +;; TODO: can be implicit define void @imp_null_check_unordered_store(i32* %x) { ; CHECK-LABEL: imp_null_check_unordered_store: ; CHECK: // %bb.0: // %entry @@ -399,9 +415,10 @@ define i32 @imp_null_check_neg_gep_load(i32* %x) { ; CHECK-LABEL: imp_null_check_neg_gep_load: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB16_2 -; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: ldur w0, [x0, #-128] +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: ldur w0, [x0, #-128] // on-fault: .LBB16_2 +; CHECK-NEXT: b .LBB16_1 +; CHECK-NEXT: .LBB16_1: // %not_null ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB16_2: // %is_null ; CHECK-NEXT: mov w0, #42