Index: lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -64,6 +64,8 @@ void placeBlockMarker(MachineBasicBlock &MBB); void placeLoopMarker(MachineBasicBlock &MBB); void placeTryMarker(MachineBasicBlock &MBB); + void fixUnwindMismatches(MachineFunction &MF); + void removeUnnecessaryInstrs(MachineFunction &MF); void rewriteDepthImmediates(MachineFunction &MF); void fixEndsAtEndOfFunction(MachineFunction &MF); @@ -78,11 +80,12 @@ // map DenseMap BeginToBottom; - // Helper functions to register scope information created by marker - // instructions. + // Helper functions to register / unregister scope information created by + // marker instructions. void registerScope(MachineInstr *Begin, MachineInstr *End); void registerTryScope(MachineInstr *Begin, MachineInstr *End, MachineBasicBlock *EHPad); + void unregisterScope(MachineInstr *Begin); MachineBasicBlock *getBottom(const MachineInstr *Begin); @@ -167,6 +170,42 @@ return InsertPos; } +// Returns true if this instruction may throw and it unwinds to an EH pad. +static bool HasUnwindDest(const MachineInstr *MI) { + const MachineBasicBlock *MBB = MI->getParent(); + if (!MBB->hasEHPadSuccessor()) + return false; + // In ExceptionPrepare pass, we temporarily added catch_all terminatepad as + // catch terminatepad's successor in order to make them sorted together. So + // even if catch terminatepad has an EH pad successor, it is not an unwind + // edge. + if (WebAssembly::isCatchTerminatePad(*MBB)) + return false; + // rethrows always has an unwind destination until the very end of + // CFGStackify. (rethrows without an unwind destinations are rethrow_to_caller + // instructions at this point, which will be converted to rethrows in + // rewriteDepthImmediates()) + if (MI->getOpcode() == WebAssembly::RETHROW) + return true; + // If not rethrow, this should be a call to have an unwind destination. + if (!MI->isCall()) + return false; + + // Even if this is a call, if there is a rethrow after this, not this call but + // the rethrow is the instruction that unwinds to an EH pad. + auto TermPos = MBB->getFirstTerminator(); + if (TermPos != MBB->end() && WebAssembly::isRethrow(*TermPos)) + return false; + + // At this point, this BB has an EH pad successor, this is a call instruction, + // and there is no rethrow after this. Check if this is the last call + // instruction in the BB. + for (auto Pos = MBB->end(), E = MBB->begin(); Pos != E; --Pos) + if (std::prev(Pos)->isCall()) + return &*std::prev(Pos) == MI; + return false; +} + void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin, MachineInstr *End) { BeginToEnd[Begin] = End; @@ -181,6 +220,23 @@ EHPadToTry[EHPad] = Begin; } +void WebAssemblyCFGStackify::unregisterScope(MachineInstr *Begin) { + assert(BeginToEnd.count(Begin)); + MachineInstr *End = BeginToEnd[Begin]; + assert(EndToBegin.count(End)); + BeginToEnd.erase(Begin); + EndToBegin.erase(End); + MachineBasicBlock *EHPad = TryToEHPad.lookup(Begin); + if (EHPad) { + assert(EHPadToTry.count(EHPad)); + TryToEHPad.erase(Begin); + EHPadToTry.erase(EHPad); + } + MachineBasicBlock *Bottom = BeginToBottom.lookup(Begin); + if (Bottom) + BeginToBottom.erase(Begin); +} + // Given a LOOP/TRY marker, returns its bottom BB. Use cached information if any // to prevent recomputation. MachineBasicBlock * @@ -590,6 +646,557 @@ ScopeTops[Number] = Header; } +void WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { + const auto &TII = *MF.getSubtarget().getInstrInfo(); + SmallVector EHPadStack; + auto *EHInfo = MF.getWasmEHFuncInfo(); + bool Changed = false; + + // Linearing the control flow by placing TRY / END_TRY markers can create + // mismatches in unwind destinations. There are three kinds of mismatches we + // try to solve here. + + // 1. When an exception is not caught by an EH pad, the EH pad it will unwind + // to can be different from the original CFG after placing TRY markers. + // + // For example, there are EH pad BBs A and B. If A starts with a catch + // instruction, which means it catches exceptions with a certain tag, other + // exceptions will not be caught by A and unwind to B. To preserve this + // relationship, after placing markers, the program should be like this: + // try + // try + // catch <-- A + // end + // catch <-- B (it can be catch_all too) + // end + // And also there are EH pad BBs C and D, whose relationship is the same (if + // an exception is not caught by C, it unwinds to D). + // + // But the only thing CFGSort pass guarantees is BBs are sorted in the + // topological order. So it is possible the sorted order is A - C - B - D. In + // that case, after placing markers, the nesting relationship can be: + // try + // try + // try + // catch <-- A + // end + // try + // catch <-- C + // end + // catch <-- B + // end + // catch <-- D + // end + // This happens because when placing markers for EH pad D, its TRY marker + // should be placed above C, but C is within a more deeply nested context, so + // the TRY marker for EH pad D should be placed at the top here. In this case, + // while an exception that is not caught by C should unwind to D, it ends up + // unwind to B instead. + // + // We solve this case by wrapping C's TRY ~ END_TRY scope with one more + // try~end_try with catch_all, and make the exception rethrow to D. + // try + // try + // try + // catch <-- A + // end + // try (new) + // try + // catch <-- C + // end + // catch_all (new) + // rethrow D + // end (new) + // catch <-- B + // end + // catch <-- D + // end + // So if an exception is not caught by C, it should be caught by the newly + // inserted catch_all, and then is rethrown to D, preserving the original + // CFG's semantics. + for (auto &MBB : MF) { + // The catch_all terminatepad is grouped with the catch terminatepad and + // does not have its own try scope. + if (WebAssembly::isCatchAllTerminatePad(MBB)) + continue; + + for (auto &MI : MBB) { + if (MI.getOpcode() == WebAssembly::TRY) + EHPadStack.push_back(TryToEHPad[&MI]); + + if (WebAssembly::isCatch(MI)) + EHPadStack.pop_back(); + + if (MI.getOpcode() != WebAssembly::CATCH_I32 && + MI.getOpcode() != WebAssembly::CATCH_I64) + continue; + + // If the EH pad on the stack top is where we should unwind next, or the + // next unwind destination is the caller and WasmEHFuncInfo does not have + // an entry for the current EH pad, there is no mismatches and we're good. + auto *UnwindBB = EHInfo->hasEHPadUnwindDest(&MBB) + ? EHInfo->getEHPadUnwindDest(&MBB) + : nullptr; + if ((UnwindBB && UnwindBB == EHPadStack.back()) || + (!UnwindBB && EHPadStack.empty())) + continue; + + // Now we have a mismatch to fix. Wrap the current try~end scope with one + // more try ~ catch_all ~ end scope and insert a rethrow. + // - Before: + // bb0: + // try + // bb1: + // catch + // bb2: + // end + // next_inst + // + // - After: + // bb0: + // try (new) + // try + // bb1: + // catch + // bb2: + // end + // bb3: (new) + // catch_all (new) + // rethrow dest (new) + // bb4: (new) + // end (new) + // next_inst + Changed = true; + assert(MBB.isEHPad()); + MachineInstr *Try = EHPadToTry[&MBB], *End = BeginToEnd[Try]; + MachineInstr *OuterTry = + BuildMI(*Try->getParent(), Try, Try->getDebugLoc(), + TII.get(WebAssembly::TRY)) + .addImm(int64_t(WebAssembly::ExprType::Void)); + + MachineBasicBlock *OuterEHPad = MF.CreateMachineBasicBlock(); + OuterEHPad->setIsEHPad(); + OuterEHPad->setIsEHScopeEntry(); + MachineBasicBlock *OuterEndTryBB = End->getParent(); + MF.insert(MachineFunction::iterator(OuterEndTryBB), OuterEHPad); + + OuterEHPad->splice(OuterEHPad->end(), OuterEndTryBB, + OuterEndTryBB->begin(), + std::next(MachineBasicBlock::iterator(End))); + + BuildMI(OuterEHPad, End->getDebugLoc(), TII.get(WebAssembly::CATCH_ALL)); + if (UnwindBB) + BuildMI(OuterEHPad, End->getDebugLoc(), TII.get(WebAssembly::RETHROW)) + .addMBB(UnwindBB); + else + BuildMI(OuterEHPad, End->getDebugLoc(), + TII.get(WebAssembly::RETHROW_TO_CALLER)); + MachineInstr *OuterEndTry = + BuildMI(*OuterEndTryBB, OuterEndTryBB->begin(), End->getDebugLoc(), + TII.get(WebAssembly::END_TRY)); + registerTryScope(OuterTry, OuterEndTry, OuterEHPad); + + // Update WasmEHFuncInfo. + EHInfo->setEHPadUnwindDest(&MBB, OuterEHPad); + EHInfo->setThrowUnwindDest(OuterEHPad, UnwindBB); + + EHPadStack.push_back(OuterEHPad); + } + } + + assert(EHPadStack.empty()); + + // 2. When an instruction may throw, but the EH pad it will unwind to can be + // different from the original CFG. + // + // Example: we have the following CFG: + // bb0: + // call foo() (if it throws, unwind to bb2) + // bb1: + // call bar() (if it throws, unwind to bb3) + // bb2 (ehpad): + // catch + // bb3 (ehpad) + // catch + // + // And the CFG is sorted in ths order. Then after placing TRY markers, it will + // look like + // try + // try + // call foo() + // call bar() + // catch <- ehpad (bb2) + // end + // catch <- ehpad (bb3) + // end + // + // and now if bar() throws, it is going to end up ip in bb2, not bb3, where it + // is supposed to end up. We solve this problem by wrapping the 'call bar()' + // instruction by one more try/catch_all/end_try scope and make it rethrow to + // the right catch block. + // + // try + // try + // call foo() + // try (new) + // call bar() + // catch_all (new) + // rethrow bb3 + // end + // catch <- ehpad (bb2) + // end + // catch <- ehpad (bb3) + // end + for (auto MFI = MF.begin(), MFE = MF.end(); MFI != MFE; ++MFI) { + MachineBasicBlock *MBB = &*MFI; + // The catch_all terminatepad is grouped with the catch terminatepad and + // does not have its own try scope, and it contains no instructions that can + // throw. + if (WebAssembly::isCatchAllTerminatePad(*MBB)) + continue; + + for (auto MBBI = MBB->begin(); MBBI != MBB->end(); ++MBBI) { + MBB = &*MFI; + MachineInstr *MI = &*MBBI; + if (MI->getOpcode() == WebAssembly::TRY) + EHPadStack.push_back(TryToEHPad[MI]); + + if (WebAssembly::isCatch(*MI)) + EHPadStack.pop_back(); + + // Check if this instruction unwinds to another EH pad within this + // function + if (!HasUnwindDest(MI)) + continue; + + // If the EH pad on the stack top is where this instruction should unwind + // next, we're good. + assert(EHInfo->hasThrowUnwindDest(MBB)); + auto *UnwindBB = EHInfo->getThrowUnwindDest(MBB); + assert(UnwindBB); + if (EHPadStack.back() == UnwindBB) + continue; + + // Now we have a mismatch to fix. Wrap the current instruction with one + // more try ~ catch_all ~ end scope and insert a rethrow. + // - Before: + // bb0: + // try + // call @foo <- current instruction (should unwind to not bb2 but bb3) + // next_inst + // bb1: + // catch + // bb2: + // end + // bb3: + // catch + // + //- After: + // bb0: + // try + // try (new) + // call @foo <- current instruction + // bb4: (new) + // catch_all (new) + // rethrow bb3 (new) + // bb5: (new) + // end (new) + // next_inst + // bb2: + // end + // bb3: + // catch + Changed = true; + MachineInstr *NestedTry = + BuildMI(*MBB, *MI, MI->getDebugLoc(), TII.get(WebAssembly::TRY)) + .addImm(int64_t(WebAssembly::ExprType::Void)); + MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock(); + NestedEHPad->setIsEHPad(); + NestedEHPad->setIsEHScopeEntry(); + MachineBasicBlock *NestedEndTryBB = MF.CreateMachineBasicBlock(); + MF.insert(std::next(MachineFunction::iterator(MBB)), NestedEndTryBB); + MF.insert(MachineFunction::iterator(NestedEndTryBB), NestedEHPad); + + NestedEndTryBB->splice(NestedEndTryBB->end(), MBB, + std::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + + BuildMI(NestedEHPad, MI->getDebugLoc(), TII.get(WebAssembly::CATCH_ALL)); + BuildMI(NestedEHPad, MI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) + .addMBB(UnwindBB); + MachineInstr *NestedEndTry = + BuildMI(*NestedEndTryBB, NestedEndTryBB->begin(), MI->getDebugLoc(), + TII.get(WebAssembly::END_TRY)); + registerTryScope(NestedTry, NestedEndTry, NestedEHPad); + + // Update WasmEHFuncInfo. + EHInfo->setThrowUnwindDest(MBB, NestedEHPad); + EHInfo->setThrowUnwindDest(NestedEHPad, UnwindBB); + + // Set the cursor to the end of the newly created nested end_try, effectly + // skipping the the new scope. + MFI = MachineFunction::iterator(NestedEndTryBB); + MBBI = MachineBasicBlock::iterator(NestedEndTry); + } + } + + assert(EHPadStack.empty()); + + // 3. The same as 2, but in this case the instruction should unwind to a + // caller function, not another EH pad. We use a 'rethrow_to_caller' pseudo + // instruction in this case, which will be converted to a normal 'rethrow' + // instruction later in this pass. We handle them in the same way as 2, but if + // there are multiple calls in a BB that may throw and are supposed to unwind + // to the caller, they can be wrapped together in one nested try scope. (In 2, + // this couldn't happen, because may-throwing instruction there had an unwind + // destination, i.e., it was an invoke before, and there could be only one + // invoke within a BB.) + for (auto MFI = MF.begin(), MFE = MF.end(); MFI != MFE; ++MFI) { + MachineBasicBlock &MBB = *MFI; + MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; // inclusive + // The catch_all terminatepad is grouped with the catch terminatepad and + // does not have its own try scope, and it contains no instructions that can + // throw. + if (WebAssembly::isCatchAllTerminatePad(MBB)) + continue; + + for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + if (MI.getOpcode() == WebAssembly::TRY) + EHPadStack.push_back(TryToEHPad[&MI]); + + if (WebAssembly::isCatch(MI)) + EHPadStack.pop_back(); + + // If the current EH pad stack is empty, we don't care if the current + // instruction throws, because it is anyway going to throw to the caller. + // Also, the case when an instruction has an unwind destination has been + // already handled in above, so skip it. + if (EHPadStack.empty() || !WebAssembly::mayThrow(MI) || + HasUnwindDest(&MI)) + continue; + + // Now MI may throw and if it throws, it should throw up to the caller. + // (i.e., should not be caught by any of catches within this function) + // Record the start and the end of the current range. + if (!RangeBegin) + RangeBegin = &MI; + RangeEnd = &MI; + } + // If no mismatch found, we're good. + if (!RangeBegin) + continue; + + // Now we have one or more mismatched instructions to fix. Wrap the + // instruction range with one more try ~ catch_all ~ end scope and insert a + // rethrow_to_caller. + // - Before: + // bb0: + // try + // call @foo <- should unwind to the caller + // call @bar <- should unwind to the caller + // next_inst + // bb1: + // catch + // bb2: + // end + // + // - After + // bb0: + // try + // try (new) + // call @foo <- should unwind to the caller + // call @bar <- should unwind to the caller + // bb3: (new) + // catch_all (new) + // rethrow_to_caller (new) + // bb4: (new) + // end (new) + // next_inst + // bb1: + // catch + // bb2: + // end + Changed = true; + MachineInstr *NestedTry = + BuildMI(MBB, RangeBegin, RangeBegin->getDebugLoc(), + TII.get(WebAssembly::TRY)) + .addImm(int64_t(WebAssembly::ExprType::Void)); + MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock(); + NestedEHPad->setIsEHPad(); + NestedEHPad->setIsEHScopeEntry(); + MachineBasicBlock *NestedEndTryBB = MF.CreateMachineBasicBlock(); + MF.insert(std::next(MachineFunction::iterator(&MBB)), NestedEndTryBB); + MF.insert(MachineFunction::iterator(NestedEndTryBB), NestedEHPad); + + NestedEndTryBB->splice(NestedEndTryBB->end(), &MBB, + std::next(MachineBasicBlock::iterator(RangeEnd)), + MBB.end()); + + BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), + TII.get(WebAssembly::CATCH_ALL)); + BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), + TII.get(WebAssembly::RETHROW_TO_CALLER)); + MachineInstr *NestedEndTry = + BuildMI(*NestedEndTryBB, NestedEndTryBB->begin(), + RangeEnd->getDebugLoc(), TII.get(WebAssembly::END_TRY)); + registerTryScope(NestedTry, NestedEndTry, NestedEHPad); + + EHInfo->setThrowUnwindDest(&MBB, NestedEHPad); + + MFI = MachineFunction::iterator(NestedEndTryBB); + } + + assert(EHPadStack.empty()); + + // Renumber BBs and recalculate ScopeTop info because new BBs might have been + // created and inserted above. + if (Changed) { + MF.RenumberBlocks(); + ScopeTops.clear(); + ScopeTops.resize(MF.getNumBlockIDs()); + for (auto &MBB : reverse(MF)) { + for (auto &MI : reverse(MBB)) { + if (ScopeTops[MBB.getNumber()]) + break; + switch (MI.getOpcode()) { + case WebAssembly::END_BLOCK: + case WebAssembly::END_LOOP: + case WebAssembly::END_TRY: + ScopeTops[MBB.getNumber()] = EndToBegin[&MI]->getParent(); + break; + } + } + } + + // Because EH pads could have been added above, delete all EH pad successors + // from every BB and reconstruct them using WasmEHFuncInfo. + for (auto &MBB : MF) { + // This will be handled specially later + if (WebAssembly::isCatchTerminatePad(MBB)) + continue; + if (!EHInfo->hasThrowUnwindDest(&MBB)) + continue; + + // Delete all unwind successors and reconstruct them + for (auto *Succ : MBB.successors()) + if (Succ->isEHPad()) + MBB.removeSuccessor(Succ); + + auto *UnwindDest = EHInfo->getThrowUnwindDest(&MBB); + while (UnwindDest) { + MBB.addSuccessor(UnwindDest); + bool CatchAllExists = false; + for (auto &MI : *UnwindDest) { + if (MI.getOpcode() == WebAssembly::CATCH_ALL) + CatchAllExists = true; + if (WebAssembly::isCatch(MI)) + break; + } + if (CatchAllExists || !EHInfo->hasEHPadUnwindDest(UnwindDest)) + break; + UnwindDest = EHInfo->getEHPadUnwindDest(UnwindDest); + } + } + } + + // Fix terminate pad successors. In the current implementation, the terminate + // pad is the only case in which there are both 'catch' and 'catch_all' blocks + // in one try block, in the form of + // try + // ... + // catch + // call @__clang_call_terminate(..) + // catch_all + // call @std::terminate() + // end + // + // So all predecessors of the 'catch' terminatepad here should also be the + // predecessors for 'catch_all' terminatepad. But in ExceptionPrepare pass, we + // considered the 'catch' terminatepad as the only successor of 'catch_all' + // terminatepad, to make them be grouped and sorted together. Fix the + // it here so the predecessor-successor relationship is correct. + for (auto &MBB : MF) { + if (!WebAssembly::isCatchTerminatePad(MBB)) + continue; + MachineBasicBlock *CatchTermPad = &MBB; + MachineBasicBlock *CatchAllTermPad = + &*std::next(MachineFunction::iterator(CatchTermPad)); + assert(WebAssembly::isCatchAllTerminatePad(*CatchAllTermPad)); + for (auto *Pred : CatchTermPad->predecessors()) + Pred->addSuccessor(CatchAllTermPad); + CatchTermPad->removeSuccessor(CatchAllTermPad); + } +} + +void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { + const auto &TII = *MF.getSubtarget().getInstrInfo(); + + // When there is an unconditional branch right before a catch instruction and + // it branches to the end of end_try marker, we don't need the branch, because + // it there is no exception, the control flow transfers to that point anyway. + // bb0: + // try + // ... + // br bb2 <- Not necessary + // bb1: + // catch + // ... + // bb2: + // end + for (auto &MBB : MF) { + if (!MBB.isEHPad() || WebAssembly::isCatchAllTerminatePad(MBB)) + continue; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + + MachineBasicBlock *LPadLayoutPred = + &*std::prev(MachineFunction::iterator(&MBB)); + + MachineBasicBlock *AfterTryBB = BeginToEnd[EHPadToTry[&MBB]]->getParent(); + bool Analyzable = !TII.analyzeBranch(*LPadLayoutPred, TBB, FBB, Cond); + if (Analyzable && ((Cond.empty() && TBB && TBB == AfterTryBB) || + (!Cond.empty() && FBB && FBB == AfterTryBB))) + TII.removeBranch(*LPadLayoutPred); + } + + // When there are block / end_block markers that overlap with try / end_try + // markers, they are not necessary, because try / end_try markers also can + // serve as boundaries for branches. + // block <- Not necessary + // try + // ... + // catch + // ... + // end + // end <- Not necessary + SmallVector ToDelete; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.getOpcode() != WebAssembly::TRY) + continue; + + MachineInstr *Try = &MI, *EndTry = BeginToEnd[Try]; + MachineBasicBlock *TryBB = Try->getParent(); + MachineBasicBlock *EndTryBB = EndTry->getParent(); + for (auto B = MachineBasicBlock::iterator(Try), + E = std::next(MachineBasicBlock::iterator(EndTry)); + B != TryBB->begin() && E != EndTryBB->end() && + std::prev(B)->getOpcode() == WebAssembly::BLOCK && + E->getOpcode() == WebAssembly::END_BLOCK; + --B, ++E) { + ToDelete.push_back(&*std::prev(B)); + ToDelete.push_back(&*E); + } + } + } + for (auto *MI : ToDelete) { + MI->eraseFromParent(); + if (MI->getOpcode() == WebAssembly::BLOCK) + unregisterScope(MI); + } +} + static unsigned GetDepth(const SmallVectorImpl &Stack, const MachineBasicBlock *MBB) { @@ -803,12 +1410,22 @@ releaseMemory(); + const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo(); + // Liveness is not tracked for VALUE_STACK physreg. MF.getRegInfo().invalidateLiveness(); // Place the BLOCK/LOOP/TRY markers to indicate the beginnings of scopes. placeMarkers(MF); + if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && + MF.getFunction().hasPersonalityFn()) { + // Fix mismatches in unwind destinations induced by linearizing the code. + fixUnwindMismatches(MF); + // Remove unnecessary instructions. + removeUnnecessaryInstrs(MF); + } + // Convert MBB operands in terminators to relative depth immediates. rewriteDepthImmediates(MF);