Index: include/llvm/Analysis/EHPersonalities.h =================================================================== --- include/llvm/Analysis/EHPersonalities.h +++ include/llvm/Analysis/EHPersonalities.h @@ -27,6 +27,7 @@ GNU_C_SjLj, GNU_CXX, GNU_CXX_SjLj, + GNU_CXX_Wasm, GNU_ObjC, MSVC_X86SEH, MSVC_Win64SEH, @@ -74,6 +75,22 @@ llvm_unreachable("invalid enum"); } +/// \brief Returns true if this personality function uses new Windows-style EH +/// instructions: catchswitch, catchpad/ret, and cleanuppad/ret. +inline bool usesWindowsEHInstructions(EHPersonality Pers) { + switch (Pers) { + case EHPersonality::GNU_CXX_Wasm: + case EHPersonality::MSVC_CXX: + case EHPersonality::MSVC_X86SEH: + case EHPersonality::MSVC_Win64SEH: + case EHPersonality::CoreCLR: + return true; + default: + return false; + } + llvm_unreachable("invalid enum"); +} + /// \brief Return true if this personality may be safely removed if there /// are no invoke instructions remaining in the current function. inline bool isNoOpWithoutInvoke(EHPersonality Pers) { Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -329,13 +329,17 @@ /// createWinEHPass - Prepares personality functions used by MSVC on Windows, /// in addition to the Itanium LSDA based personalities. - FunctionPass *createWinEHPass(); + FunctionPass *createWinEHPass(bool DemoteCatchSwitchPHIOnly = false); /// createSjLjEHPreparePass - This pass adapts exception handling code to use /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. /// FunctionPass *createSjLjEHPreparePass(); + /// createWasmEHPass - This pass adapts exception handling code to use + /// WebAssembly's exception handling scheme. + FunctionPass *createWasmEHPass(); + /// LocalStackSlotAllocation - This pass assigns local frame indices to stack /// slots relative to one another and allocates base registers to access them /// when it is estimated by the target to be out of range of normal frame Index: include/llvm/IR/IntrinsicsNVVM.td =================================================================== --- include/llvm/IR/IntrinsicsNVVM.td +++ include/llvm/IR/IntrinsicsNVVM.td @@ -3920,7 +3920,9 @@ } multiclass NVVM_WMMA_LD { + defm _m32n8k16_load: NVVM_WMMA_LD_G<"m32n8k16">; defm _m16n16k16_load: NVVM_WMMA_LD_G<"m16n16k16">; + defm _m8n32k16_load: NVVM_WMMA_LD_G<"m8n32k16">; } defm int_nvvm_wmma: NVVM_WMMA_LD; @@ -3947,7 +3949,7 @@ # !if(WithStride, ".stride", "") # "." # Type>; -multiclass NVVM_WMMA_STD_GLT { def _stride: NVVM_WMMA_STD_GLSTS; def NAME: NVVM_WMMA_STD_GLSTS; @@ -3963,7 +3965,9 @@ } multiclass NVVM_WMMA_STD { + defm _m32n8k16_store: NVVM_WMMA_STD_G<"m32n8k16">; defm _m16n16k16_store: NVVM_WMMA_STD_G<"m16n16k16">; + defm _m8n32k16_store: NVVM_WMMA_STD_G<"m8n32k16">; } defm int_nvvm_wmma: NVVM_WMMA_STD; @@ -4033,7 +4037,9 @@ } multiclass NVVM_WMMA_MMA { + defm _m32n8k16_mma : NVVM_WMMA_MMA_G<"m32n8k16">; defm _m16n16k16_mma : NVVM_WMMA_MMA_G<"m16n16k16">; + defm _m8n32k16_mma : NVVM_WMMA_MMA_G<"m8n32k16">; } defm int_nvvm_wmma : NVVM_WMMA_MMA; Index: include/llvm/IR/IntrinsicsWebAssembly.td =================================================================== --- include/llvm/IR/IntrinsicsWebAssembly.td +++ include/llvm/IR/IntrinsicsWebAssembly.td @@ -45,4 +45,17 @@ def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [], [IntrHasSideEffects]>; def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [], [IntrHasSideEffects]>; + +// wasm.catch returns the pointer to the exception object caught by wasm 'catch' +// instruction. +def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], + [IntrHasSideEffects]>; + +// WebAssembly EH must maintain the landingpads in the order assigned to them +// by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is +// used in order to give them the indices in WasmEHPrepare. +def int_wasm_landingpad_index: Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; + +// Returns LSDA address of the current function. +def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; } Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -383,6 +383,7 @@ void initializeVerifierLegacyPassPass(PassRegistry&); void initializeVirtRegMapPass(PassRegistry&); void initializeVirtRegRewriterPass(PassRegistry&); +void initializeWasmEHPreparePass(PassRegistry&); void initializeWholeProgramDevirtPass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); Index: lib/Analysis/EHPersonalities.cpp =================================================================== --- lib/Analysis/EHPersonalities.cpp +++ lib/Analysis/EHPersonalities.cpp @@ -25,20 +25,21 @@ if (!F) return EHPersonality::Unknown; return StringSwitch(F->getName()) - .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) - .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) - .Case("__gxx_personality_seh0",EHPersonality::GNU_CXX) - .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) - .Case("__gcc_personality_v0", EHPersonality::GNU_C) - .Case("__gcc_personality_seh0",EHPersonality::GNU_C) - .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) - .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) - .Case("_except_handler3", EHPersonality::MSVC_X86SEH) - .Case("_except_handler4", EHPersonality::MSVC_X86SEH) - .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) - .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) - .Case("ProcessCLRException", EHPersonality::CoreCLR) - .Case("rust_eh_personality", EHPersonality::Rust) + .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) + .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_seh0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) + .Case("__gxx_wasm_personality_v0", EHPersonality::GNU_CXX_Wasm) + .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__gcc_personality_seh0", EHPersonality::GNU_C) + .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) + .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) + .Case("_except_handler3", EHPersonality::MSVC_X86SEH) + .Case("_except_handler4", EHPersonality::MSVC_X86SEH) + .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) + .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) + .Case("ProcessCLRException", EHPersonality::CoreCLR) + .Case("rust_eh_personality", EHPersonality::Rust) .Default(EHPersonality::Unknown); } @@ -47,6 +48,7 @@ case EHPersonality::GNU_Ada: return "__gnat_eh_personality"; case EHPersonality::GNU_CXX: return "__gxx_personality_v0"; case EHPersonality::GNU_CXX_SjLj: return "__gxx_personality_sj0"; + case EHPersonality::GNU_CXX_Wasm: return "__gxx_wasm_personality_v0"; case EHPersonality::GNU_C: return "__gcc_personality_v0"; case EHPersonality::GNU_C_SjLj: return "__gcc_personality_sj0"; case EHPersonality::GNU_ObjC: return "__objc_personality_v0"; Index: lib/Analysis/MustExecute.cpp =================================================================== --- lib/Analysis/MustExecute.cpp +++ lib/Analysis/MustExecute.cpp @@ -52,7 +52,7 @@ Function *Fn = CurLoop->getHeader()->getParent(); if (Fn->hasPersonalityFn()) if (Constant *PersonalityFn = Fn->getPersonalityFn()) - if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) + if (usesWindowsEHInstructions(classifyEHPersonality(PersonalityFn))) SafetyInfo->BlockColors = colorEHFunclets(*Fn); } Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -157,6 +157,7 @@ UnreachableBlockElim.cpp ValueTypes.cpp VirtRegMap.cpp + WasmEHPrepare.cpp WinEHPrepare.cpp XRayInstrumentation.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -101,6 +101,7 @@ initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); initializeVirtRegRewriterPass(Registry); + initializeWasmEHPreparePass(Registry); initializeWinEHPreparePass(Registry); initializeXRayInstrumentationPass(Registry); initializeMIRCanonicalizerPass(Registry); Index: lib/CodeGen/DwarfEHPrepare.cpp =================================================================== --- lib/CodeGen/DwarfEHPrepare.cpp +++ lib/CodeGen/DwarfEHPrepare.cpp @@ -197,7 +197,7 @@ // Check the personality, don't do anything if it's funclet-based. EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn()); - if (isFuncletEHPersonality(Pers)) + if (usesWindowsEHInstructions(Pers)) return false; LLVMContext &Ctx = Fn.getContext(); Index: lib/CodeGen/MachineVerifier.cpp =================================================================== --- lib/CodeGen/MachineVerifier.cpp +++ lib/CodeGen/MachineVerifier.cpp @@ -646,7 +646,7 @@ !(AsmInfo && AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && BB && isa(BB->getTerminator())) && - !isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + !usesWindowsEHInstructions(classifyEHPersonality(F.getPersonalityFn()))) report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1348,10 +1348,14 @@ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; bool IsCoreCLR = Pers == EHPersonality::CoreCLR; + bool IsWasmCXX = Pers == EHPersonality::GNU_CXX_Wasm; MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. - if (IsMSVCCXX || IsCoreCLR) + if (IsMSVCCXX || IsCoreCLR || IsWasmCXX) CatchPadMBB->setIsEHFuncletEntry(); + // Wasm does not need catchpads anymore + if (IsWasmCXX) + return; DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); } @@ -1418,6 +1422,7 @@ classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + bool IsWasmCXX = Personality == EHPersonality::GNU_CXX_Wasm; while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); @@ -1437,7 +1442,7 @@ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); // For MSVC++ and the CLR, catchblocks are funclets and need prologues. - if (IsMSVCCXX || IsCoreCLR) + if (IsMSVCCXX || IsCoreCLR || IsWasmCXX) UnwindDests.back().first->setIsEHFuncletEntry(); } NewEHPadBB = CatchSwitch->getUnwindDest(); @@ -6175,6 +6180,12 @@ HasTailCall = true; return nullptr; } + + case Intrinsic::wasm_landingpad_index: { + // TODO + return nullptr; + } + } } @@ -6324,7 +6335,10 @@ DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - if (MF.hasEHFunclets()) { + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + // There is a platform (e.g. wasm) that uses funclet style IR but does not + // actually use outlined funclets and their LSDA info style. + if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); EHInfo->addIPToStateRange(cast(CLI.CS.getInstruction()), Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -661,7 +661,12 @@ addPass(createDwarfEHPass()); break; case ExceptionHandling::Wasm: - // TODO to prevent warning + // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs + // on catchpads and cleanuppads because it does not outline them into + // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we + // should remove PHIs there. + addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); + addPass(createWasmEHPass()); break; case ExceptionHandling::None: addPass(createLowerInvokePass()); Index: lib/CodeGen/WasmEHPrepare.cpp =================================================================== --- /dev/null +++ lib/CodeGen/WasmEHPrepare.cpp @@ -0,0 +1,324 @@ +//===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use +// WebAssembly exception handling scheme. +// +// WebAssembly exception handling uses Windows exception IR for the middle level +// representation. This pass does the following transformation for every +// catchpad block: +// (In C-style pseudocode) +// +// - Before: +// catchpad ... +// exn = wasm.get.exception(); +// selector = wasm.get.selector(); +// ... +// +// - After: +// catchpad ... +// exn = wasm.catch(0); // 0 is a tag for C++ +// wasm.landingpad.index(index); +// // Only add below in case it's not a single catch (...) +// __wasm_lpad_context.lpad_index = index; +// __wasm_lpad_context.lsda = wasm.lsda(); +// _Unwind_CallPersonality(exn); +// int selector = __wasm.landingpad_context.selector; +// ... +// +// Also, does the following for a cleanuppad block with a call to +// __clang_call_terminate(): +// - Before: +// cleanuppad ... +// exn = wasm.get.exception(); +// __clang_call_terminate(exn); +// +// - After: +// cleanuppad ... +// exn = wasm.catch(0); // 0 is a tag for C++ +// __clang_call_terminate(exn); +// +// +// * Background: WebAssembly EH instructions +// WebAssembly's try and catch instructions are structured as follows: +// try +// instruction* +// catch (C++ tag) +// instruction* +// ... +// catch_all +// instruction* +// try_end +// +// A catch instruction in WebAssembly does not correspond to a C++ catch clause. +// In WebAssembly, there is a single catch instruction for all C++ exceptions. +// There can be more catch instructions for exceptions in other languages, but +// they are not generated for now. catch_all catches all exceptions including +// foreign exceptions. We turn catchpads into catch (C++ tag) and cleanuppads +// into catch_all, with one exception: cleanuppad with a call to +// __clang_call_terminate should be both in catch (C++ tag) and catch_all. +// +// +// * Background: Direct personality function call +// In WebAssembly EH, the VM is responsible for unwinding stack once an +// exception is thrown. After stack is unwound, the control flow is transfered +// to WebAssembly 'catch' instruction, which returns a caught exception object. +// +// Unwinding stack is not done by libunwind but the VM, so the personality +// function in libcxxabi cannot be called from libunwind during the unwinding +// process. So after a catch instruction, we insert a call to a wrapper function +// in libunwind that in turn calls the real personality function. +// +// In Itanium EH, if the personality function decides there is no matching catch +// clause in a call frame and no cleanup action to perform, the unwinder doesn't +// stop there and continues unwinding. But in Wasm EH, the unwinder stops at +// every call frame with a catch intruction, after which the personality +// function is called from the compiler-generated user code here. +// +// In libunwind, we have this struct that serves as a communincation channel +// between the compiler-generated user code and the personality function in +// libcxxabi. +// +// struct _Unwind_LandingPadContext { +// uintptr_t lpad_index; +// uintptr_t lsda; +// uintptr_t selector; +// }; +// struct _Unwind_LandingPadContext __wasm_lpad_context = ...; +// +// And this wrapper in libunwind calls the personality function. +// +// _Unwind_Reason_Code _Unwind_CallPersonality(void *exception_ptr) { +// struct _Unwind_Exception *exception_obj = +// (struct _Unwind_Exception *)exception_ptr; +// _Unwind_Reason_Code ret = __gxx_personality_v0( +// 1, _UA_CLEANUP_PHASE, exception_obj->exception_class, exception_obj, +// (struct _Unwind_Context *)__wasm_lpad_context); +// return ret; +// } +// +// We pass a landing pad index, and the address of LSDA for the current function +// to the wrapper function _Unwind_CallPersonality in libunwind, and we retrieve +// the selector after it returns. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasmehprepare" + +namespace { +class WasmEHPrepare : public FunctionPass { + Type *LPadContextTy = nullptr; // type of 'struct _Unwind_LandingPadContext' + GlobalVariable *LPadContextGV = nullptr; // __wasm_lpad_context + + // Field addresses of struct _Unwind_LandingPadContext + Value *LPadIndexField = nullptr; // lpad_index field + Value *LSDAField = nullptr; // lsda field + Value *SelectorField = nullptr; // selector + + Function *CatchF = nullptr; // wasm.catch.extract() intrinsic + Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic + Function *LSDAF = nullptr; // wasm.lsda() intrinsic + Function *GetExnF = nullptr; // wasm.get.exception() intrinsic + Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic + Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper + Function *ClangCallTermF = nullptr; // __clang_call_terminate() function + + void prepareEHPad(BasicBlock *BB, unsigned Index); + void prepareTerminateCleanupPad(BasicBlock *BB); + +public: + static char ID; // Pass identification, replacement for typeid + + WasmEHPrepare() : FunctionPass(ID) {} + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "WebAssembly Exception handling preparation"; + } +}; +} // end anonymous namespace + +char WasmEHPrepare::ID = 0; +INITIALIZE_PASS(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions", + false, false); + +FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); } + +bool WasmEHPrepare::doInitialization(Module &M) { + IRBuilder<> IRB(M.getContext()); + LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index + IRB.getInt8PtrTy(), // lsda + IRB.getInt32Ty() // selector + ); + + // __wasm_lpad_context global variable + LPadContextGV = cast( + M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); + LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, + "lpad_index_gep"); + LSDAField = + IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep"); + SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2, + "selector_gep"); + + // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction. + CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch); + // wasm.landingpad.index() intrinsic, which is to specify landingpad index + LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index); + // wasm.lsda() intrinsic. Returns the address of LSDA table for the current + // function. + LSDAF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_lsda); + // wasm.get.exception() and wasm.get.ehselector() intrinsics. Calls to these + // are generated in clang. + GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception); + GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector); + + // _Unwind_CallPersonality() wrapper function, which calls the personality + CallPersonalityF = cast(M.getOrInsertFunction( + "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy())); + + // __clang_call_terminate() function, which is inserted by clang in case a + // cleanup throws + ClangCallTermF = M.getFunction("__clang_call_terminate"); + + return false; +} + +bool WasmEHPrepare::runOnFunction(Function &F) { + SmallVector CatchPads; + SmallVector CleanupPads; + for (BasicBlock &BB : F) { + if (!BB.isEHPad()) + continue; + if (isa(BB.getFirstNonPHI())) + CatchPads.push_back(&BB); + if (isa(BB.getFirstNonPHI())) + CleanupPads.push_back(&BB); + } + + if (CatchPads.empty() && CleanupPads.empty()) + return false; + assert(F.hasPersonalityFn() && "Personality function not found"); + + unsigned Index = 0; + for (auto *BB : CatchPads) { + CatchPadInst *CPI = cast(BB->getFirstNonPHI()); + // In case of a single catch (...), we don't need to emit LSDA + if (CPI->getNumArgOperands() == 1 && + cast(CPI->getArgOperand(0))->isNullValue()) + prepareEHPad(BB, -1); + else + prepareEHPad(BB, Index++); + } + + if (!ClangCallTermF) + return !CatchPads.empty(); + + // Cleanuppads will turn into catch_all later, but cleanuppads with a call to + // __clang_call_terminate() is a special case. __clang_call_terminate() takes + // an exception object, so we have to duplicate call in both 'catch ' + // and 'catch_all' clauses. Here we only insert a call to catch; the + // duplication will be done later. In catch_all, the exception object will be + // set to null. + for (auto *BB : CleanupPads) + for (auto &I : *BB) + if (auto *CI = dyn_cast(&I)) + if (CI->getCalledValue() == ClangCallTermF) + prepareEHPad(BB, -1); + + return true; +} + +void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { + assert(BB->isEHPad() && "BB is not an EHPad!"); + IRBuilder<> IRB(BB->getContext()); + + IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); + // The argument to wasm.catch() is the tag for C++ exceptions, which we set to + // 0 for this module. + // Pseudocode: void *exn = wasm.catch(0); + Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn"); + // Replace the return value of wasm.get.exception() with the return value from + // wasm.catch(). + Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr; + for (auto &I : *BB) + if (auto *CI = dyn_cast(&I)) { + if (CI->getCalledValue() == GetExnF) + GetExnCI = CI; + if (CI->getCalledValue() == GetSelectorF) + GetSelectorCI = CI; + } + assert(GetExnCI && "wasm.get.exception() call does not exist"); + GetExnCI->replaceAllUsesWith(Exn); + GetExnCI->eraseFromParent(); + + // In case it is a catchpad with single catch (...) or a cleanuppad, we don't + // need to call personality function because we don't need a selector. + FuncletPadInst *FPI = cast(BB->getFirstNonPHI()); + if (FPI->getNumArgOperands() == 0 || + (FPI->getNumArgOperands() == 1 && + cast(FPI->getArgOperand(0))->isNullValue())) { + if (GetSelectorCI) { + assert(GetSelectorCI->use_empty() && + "wasm.get.ehselector() still has uses!"); + GetSelectorCI->eraseFromParent(); + } + return; + } + IRB.SetInsertPoint(Exn->getNextNode()); + + // This is to create a map of in + // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables. + // Pseudocode: wasm.landingpad.index(Index); + IRB.CreateCall(LPadIndexF, IRB.getInt32(Index)); + + // Pseudocode: __wasm_lpad_context.lpad_index = index; + IRB.CreateStore(IRB.getInt32(Index), LPadIndexField, /*isVolatile=*/true); + + // Store LSDA address only if this catchpad belongs to a top-level + // catchswitch. If there is another catchpad that dominates this pad, we don't + // need to store LSDA address again, because they are the same throughout the + // function and have been already stored before. + // TODO Can we not store LSDA address in user function but make libcxxabi + // compute it? + CatchPadInst *CPI = cast(FPI); + if (isa(CPI->getCatchSwitch()->getParentPad())) + // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda(); + IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField, true); + + // Pseudocode: _Unwind_CallPersonality(exn); + CallInst *PersCI = + IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI)); + PersCI->setDoesNotThrow(); + + // Pseudocode: int selector = __wasm.landingpad_context.selector; + Instruction *Selector = IRB.CreateLoad(SelectorField, "selector"); + + // Replace the return value from wasm.get.ehselector() with the selector value + // loaded from __wasm_lpad_context.selector. + assert(GetSelectorCI && "wasm.get.ehselector() call does not exist"); + GetSelectorCI->replaceAllUsesWith(Selector); + GetSelectorCI->eraseFromParent(); +} Index: lib/CodeGen/WinEHPrepare.cpp =================================================================== --- lib/CodeGen/WinEHPrepare.cpp +++ lib/CodeGen/WinEHPrepare.cpp @@ -49,12 +49,17 @@ cl::desc("Do not remove implausible terminators or other similar cleanups"), cl::init(false)); +static cl::opt DemoteCatchSwitchPHIOnlyOpt( + "demote-catchswitch-only", cl::Hidden, + cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false)); + namespace { class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - WinEHPrepare() : FunctionPass(ID) {} + WinEHPrepare(bool DemoteCatchSwitchPHIOnly = false) + : FunctionPass(ID), DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {} bool runOnFunction(Function &Fn) override; @@ -77,12 +82,14 @@ bool prepareExplicitEH(Function &F); void colorFunclets(Function &F); - void demotePHIsOnFunclets(Function &F); + void demotePHIsOnFunclets(Function &F, bool DemoteCatchSwitchPHIOnly); void cloneCommonBlocks(Function &F); void removeImplausibleInstructions(Function &F); void cleanupPreparedFunclets(Function &F); void verifyPreparedFunclets(Function &F); + bool DemoteCatchSwitchPHIOnly; + // All fields are reset by runOnFunction. EHPersonality Personality = EHPersonality::Unknown; @@ -97,7 +104,9 @@ INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions", false, false) -FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); } +FunctionPass *llvm::createWinEHPass(bool DemoteCatchSwitchPHIOnly) { + return new WinEHPrepare(DemoteCatchSwitchPHIOnly); +} bool WinEHPrepare::runOnFunction(Function &Fn) { if (!Fn.hasPersonalityFn()) @@ -107,7 +116,7 @@ Personality = classifyEHPersonality(Fn.getPersonalityFn()); // Do nothing if this is not a funclet-based personality. - if (!isFuncletEHPersonality(Personality)) + if (!usesWindowsEHInstructions(Personality)) return false; DL = &Fn.getParent()->getDataLayout(); @@ -677,13 +686,17 @@ } } -void WinEHPrepare::demotePHIsOnFunclets(Function &F) { +void WinEHPrepare::demotePHIsOnFunclets(Function &F, + bool DemoteCatchSwitchPHIOnly) { // Strip PHI nodes off of EH pads. SmallVector PHINodes; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { BasicBlock *BB = &*FI++; if (!BB->isEHPad()) continue; + if (DemoteCatchSwitchPHIOnly && !isa(BB->getFirstNonPHI())) + continue; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = &*BI++; auto *PN = dyn_cast(I); @@ -1031,7 +1044,8 @@ cloneCommonBlocks(F); if (!DisableDemotion) - demotePHIsOnFunclets(F); + demotePHIsOnFunclets(F, DemoteCatchSwitchPHIOnly || + DemoteCatchSwitchPHIOnlyOpt); if (!DisableCleanups) { DEBUG(verifyFunction(F)); Index: lib/Target/NVPTX/NVPTX.td =================================================================== --- lib/Target/NVPTX/NVPTX.td +++ lib/Target/NVPTX/NVPTX.td @@ -52,6 +52,8 @@ "Target SM 6.2">; def SM70 : SubtargetFeature<"sm_70", "SmVersion", "70", "Target SM 7.0">; +def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72", + "Target SM 7.2">; // PTX Versions def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32", @@ -68,6 +70,8 @@ "Use PTX version 5.0">; def PTX60 : SubtargetFeature<"ptx60", "PTXVersion", "60", "Use PTX version 6.0">; +def PTX61 : SubtargetFeature<"ptx61", "PTXVersion", "61", + "Use PTX version 6.1">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -89,6 +93,7 @@ def : Proc<"sm_61", [SM61, PTX50]>; def : Proc<"sm_62", [SM62, PTX50]>; def : Proc<"sm_70", [SM70, PTX60]>; +def : Proc<"sm_72", [SM72, PTX61]>; def NVPTXInstrInfo : InstrInfo { } Index: lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.cpp +++ lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3329,7 +3329,23 @@ case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col: case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row: case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride: - case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride: { + case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col: + case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row: + case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col: + case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row: + case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col: + case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row: + case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col: + case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row: + case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::v8f16; Info.ptrVal = I.getArgOperand(0); @@ -3342,7 +3358,15 @@ case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col: case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row: case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride: - case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride: { + case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col: + case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row: + case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col: + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row: + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::v4f16; Info.ptrVal = I.getArgOperand(0); @@ -3355,7 +3379,15 @@ case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col: case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row: case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride: - case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride: { + case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col: + case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row: + case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col: + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row: + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::v8f32; Info.ptrVal = I.getArgOperand(0); @@ -3368,7 +3400,15 @@ case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col: case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row: case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride: - case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride: { + case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride: + case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col: + case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row: + case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride: + case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride: + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col: + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row: + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride: + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: { Info.opc = ISD::INTRINSIC_VOID; Info.memVT = MVT::v4f16; Info.ptrVal = I.getArgOperand(0); @@ -3381,7 +3421,15 @@ case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col: case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row: case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride: - case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride: { + case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride: + case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col: + case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row: + case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride: + case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride: + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col: + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row: + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride: + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride: { Info.opc = ISD::INTRINSIC_VOID; Info.memVT = MVT::v8f32; Info.ptrVal = I.getArgOperand(0); Index: lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- lib/Target/NVPTX/NVPTXInstrInfo.td +++ lib/Target/NVPTX/NVPTXInstrInfo.td @@ -142,6 +142,7 @@ def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; +def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">; def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">; def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">; Index: lib/Target/NVPTX/NVPTXIntrinsics.td =================================================================== --- lib/Target/NVPTX/NVPTXIntrinsics.td +++ lib/Target/NVPTX/NVPTXIntrinsics.td @@ -7378,7 +7378,11 @@ class WMMA_LOAD_GALSTOS - : EmptyNVPTXInst, Requires<[hasPTX60, hasSM70]> { + : EmptyNVPTXInst, + Requires<[!if(!eq(Geometry, "m16n16k16"), + hasPTX60, + hasPTX61), + hasSM70]> { // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic // for this function. PatFrag IntrMatcher = !cast("INT_WMMA_" @@ -7420,10 +7424,10 @@ let InOperandList = Ins; let AsmString = "wmma.load." # Abc - # ".sync." - # Layout - # ".m16n16k16" - # Space + # ".sync" + # "." # Layout + # "." # Geometry + # Space # "." # Type # " \t" # !if(!eq(Abc#Type, "cf16"), "{{$r0, $r1, $r2, $r3}}", @@ -7512,7 +7516,9 @@ defm _load_c_f32: WMMA_LOAD_GAT; } +defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">; defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">; +defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">; // // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] @@ -7520,7 +7526,11 @@ class WMMA_STORE_D_GLSTSO - : EmptyNVPTXInst, Requires<[hasPTX60, hasSM70]> { + : EmptyNVPTXInst, + Requires<[!if(!eq(Geometry, "m16n16k16"), + hasPTX60, + hasPTX61), + hasSM70]> { PatFrag IntrMatcher = !cast("INT_WMMA" # "_" # Geometry # "_store_d" # "_" # Type @@ -7641,11 +7651,9 @@ defm _store_d_f32: WMMA_STORE_D_GT; } -// multiclass WMMA_STORE_D { -// defm _m16n16k16: WMMA_STORE_D_G<"m16n16k16">; -// } - +defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">; defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">; +defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">; // WMMA.MMA class WMMA_MMA_GABDCS - : EmptyNVPTXInst, Requires<[hasPTX60, hasSM70]> { + : EmptyNVPTXInst, + Requires<[!if(!eq(Geometry, "m16n16k16"), + hasPTX60, + hasPTX61), + hasSM70]> { Intrinsic Intr = !cast("int_nvvm_wmma_" # Geometry # "_mma" @@ -7686,7 +7698,7 @@ let AsmString = "wmma.mma.sync." # ALayout # "." # BLayout - # ".m16n16k16" + # "." # Geometry # "." # DType # "." # CType # Satfinite # "\n\t\t" @@ -7734,4 +7746,6 @@ defm _row: WMMA_MMA_GA; } +defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">; defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">; +defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">; Index: lib/Target/WebAssembly/CMakeLists.txt =================================================================== --- lib/Target/WebAssembly/CMakeLists.txt +++ lib/Target/WebAssembly/CMakeLists.txt @@ -17,6 +17,7 @@ WebAssemblyCallIndirectFixup.cpp WebAssemblyCFGStackify.cpp WebAssemblyCFGSort.cpp + WebAssemblyExceptionPrepare.cpp WebAssemblyExplicitLocals.cpp WebAssemblyFastISel.cpp WebAssemblyFixIrreducibleControlFlow.cpp Index: lib/Target/WebAssembly/WebAssembly.h =================================================================== --- lib/Target/WebAssembly/WebAssembly.h +++ lib/Target/WebAssembly/WebAssembly.h @@ -47,6 +47,7 @@ FunctionPass *createWebAssemblyExplicitLocals(); FunctionPass *createWebAssemblyFixIrreducibleControlFlow(); FunctionPass *createWebAssemblyCFGSort(); +FunctionPass *createWebAssemblyExceptionPrepare(); FunctionPass *createWebAssemblyCFGStackify(); FunctionPass *createWebAssemblyLowerBrUnless(); FunctionPass *createWebAssemblyRegNumbering(); @@ -69,6 +70,7 @@ void initializeWebAssemblyExplicitLocalsPass(PassRegistry &); void initializeWebAssemblyFixIrreducibleControlFlowPass(PassRegistry &); void initializeWebAssemblyCFGSortPass(PassRegistry &); +void initializeWebAssemblyExceptionPreparePass(PassRegistry &); void initializeWebAssemblyCFGStackifyPass(PassRegistry &); void initializeWebAssemblyLowerBrUnlessPass(PassRegistry &); void initializeWebAssemblyRegNumberingPass(PassRegistry &); Index: lib/Target/WebAssembly/WebAssemblyExceptionPrepare.cpp =================================================================== --- /dev/null +++ lib/Target/WebAssembly/WebAssemblyExceptionPrepare.cpp @@ -0,0 +1,91 @@ +//=== WebAssemblyExceptionPrepare.cpp - WebAssembly Exception Preparation -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Does various transformations for exception handling. +/// +//===----------------------------------------------------------------------===// + +// TODO +// 1. Add a 'catch_all' instruction to beginning of cleanup pads +// 2. Make sure 'catch' instruction is the first instruction in BB +// 3. Add 'rethrow' instruction after __cxa_rethrow() function call +// 4. Duplicate a call to __clang_call_terminate in catch_all block + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-exception-prepare" + +namespace { +class WebAssemblyExceptionPrepare final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Prepare Exception"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + + bool replaceFuncletReturnInstructions(MachineFunction &MF); + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyExceptionPrepare() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyExceptionPrepare::ID = 0; +INITIALIZE_PASS(WebAssemblyExceptionPrepare, DEBUG_TYPE, + "WebAssembly Exception Preparation", false, false) + +FunctionPass *llvm::createWebAssemblyExceptionPrepare() { + return new WebAssemblyExceptionPrepare(); +} + +bool WebAssemblyExceptionPrepare::runOnMachineFunction(MachineFunction &MF) { + // TODO More tasks will be added + return replaceFuncletReturnInstructions(MF); +} + +// Replace catchret and cleanupret pseudo instructions to appropriate wasm +// instructions. +bool WebAssemblyExceptionPrepare::replaceFuncletReturnInstructions( + MachineFunction &MF) { + bool Changed = false; + const auto &TII = *MF.getSubtarget().getInstrInfo(); + + for (auto &MBB : MF) { + auto Pos = MBB.getFirstTerminator(); + if (Pos == MBB.end()) + continue; + MachineInstr *TI = &*Pos; + + switch (TI->getOpcode()) { + case WebAssembly::CATCHRET: { + MachineBasicBlock *TBB = TI->getOperand(0).getMBB(); + if (!MBB.isLayoutSuccessor(TBB)) + BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR)) + .addMBB(TBB); + TI->eraseFromParent(); + Changed = true; + break; + } + case WebAssembly::CLEANUPRET: { + BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) + .addImm(0); + TI->eraseFromParent(); + Changed = true; + break; + } + } + } + + return Changed; +} Index: lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -90,6 +90,7 @@ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; }; namespace WebAssembly { Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -151,6 +151,9 @@ // Trap lowers to wasm unreachable setOperationAction(ISD::TRAP, MVT::Other, Legal); + // Exception handling intrinsics + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setMaxAtomicSizeInBitsSupported(64); } @@ -737,6 +740,8 @@ return LowerFRAMEADDR(Op, DAG); case ISD::CopyToReg: return LowerCopyToReg(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return LowerINTRINSIC_WO_CHAIN(Op, DAG); } } @@ -869,6 +874,21 @@ MachinePointerInfo(SV), 0); } +SDValue +WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + SDLoc DL(Op); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + + case Intrinsic::wasm_lsda: + // TODO For now, just return 0 not to crash + return DAG.getConstant(0, DL, Op.getValueType()); + } +} + //===----------------------------------------------------------------------===// // WebAssembly Optimization Hooks //===----------------------------------------------------------------------===// Index: lib/Target/WebAssembly/WebAssemblyInstrControl.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -136,9 +136,25 @@ def END_TRY : I<(outs), (ins), [], "end_try", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] -} // Defs = [ARGUMENTS] +// Catching an exception: catch / catch_all +let hasCtrlDep = 1 in { +def CATCH_I32 : I<(outs I32:$dst), (ins i32imm:$tag), + [(set I32:$dst, (int_wasm_catch imm:$tag))], + "i32.catch \t$dst, $tag", 0x07>; +def CATCH_I64 : I<(outs I64:$dst), (ins i32imm:$tag), + [(set I64:$dst, (int_wasm_catch imm:$tag))], + "i64.catch \t$dst, $tag", 0x07>; +def CATCH_ALL : I<(outs), (ins), [], "catch_all", 0x05>; +} -// rethrow takes a relative depth as an argument, for which currently only 0 is -// possible for C++. Once other languages need depths other than 0, depths will -// be computed in CFGStackify. -def : Pat<(int_wasm_rethrow), (RETHROW 0)>; +// Pseudo instructions: cleanupret / catchret +// They are not return instructions in wasm, but setting 'isReturn' to true as +// in X86 is necessary for computing funclet membership. +let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, + isCodeGenOnly = 1, isReturn = 1 in { + def CLEANUPRET : I<(outs), (ins), [(cleanupret)], "", 0>; + def CATCHRET : I<(outs), (ins bb_op:$dst, bb_op:$from), + [(catchret bb:$dst, bb:$from)], "", 0>; +} + +} // Defs = [ARGUMENTS] Index: lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -30,7 +30,8 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, - WebAssembly::ADJCALLSTACKUP), + WebAssembly::ADJCALLSTACKUP, + WebAssembly::CATCHRET), RI(STI.getTargetTriple()) {} bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( Index: lib/Target/WebAssembly/WebAssemblyRegStackify.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -160,10 +160,9 @@ // and/or uses the stack pointer value. static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, bool &Write, bool &Effects, bool &StackPointer) { - assert(!MI.isPosition()); assert(!MI.isTerminator()); - if (MI.isDebugValue()) + if (MI.isPosition() || MI.isDebugValue()) return; // Check for loads. Index: lib/Target/WebAssembly/WebAssemblyRegisterInfo.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegisterInfo.h +++ lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -45,6 +45,7 @@ const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const override; + const uint32_t *getNoPreservedMask() const override { return nullptr; } }; } // end namespace llvm Index: lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -66,6 +66,7 @@ initializeWebAssemblyExplicitLocalsPass(PR); initializeWebAssemblyFixIrreducibleControlFlowPass(PR); initializeWebAssemblyCFGSortPass(PR); + initializeWebAssemblyExceptionPreparePass(PR); initializeWebAssemblyCFGStackifyPass(PR); initializeWebAssemblyLowerBrUnlessPass(PR); initializeWebAssemblyRegNumberingPass(PR); @@ -324,6 +325,9 @@ // BLOCK and LOOP markers. addPass(createWebAssemblyCFGSort()); + // Do various transformations for exception handling + addPass(createWebAssemblyExceptionPrepare()); + // Insert BLOCK and LOOP markers. addPass(createWebAssemblyCFGStackify()); Index: lib/Target/X86/MCTargetDesc/X86BaseInfo.h =================================================================== --- lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -670,6 +670,10 @@ return 1; return 0; case 2: + // XCHG/XADD have two destinations and two sources. + if (NumOps >= 4 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 && + Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1) + return 2; // Check for gather. AVX-512 has the second tied operand early. AVX2 // has it as the last op. if (NumOps == 9 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 && Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -1934,56 +1934,69 @@ // Swap between registers. let SchedRW = [WriteALU] in { -let Constraints = "$val = $dst" in { -def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), - "xchg{b}\t{$val, $src|$src, $val}", []>; -def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), - "xchg{w}\t{$val, $src|$src, $val}", []>, +let Constraints = "$src1 = $dst1, $src2 = $dst2", hasSideEffects = 0 in { +def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst1, GR8:$dst2), + (ins GR8:$src1, GR8:$src2), + "xchg{b}\t{$src2, $src1|$src1, $src2}", []>; +def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst1, GR16:$dst2), + (ins GR16:$src1, GR16:$src2), + "xchg{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16; -def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src), - "xchg{l}\t{$val, $src|$src, $val}", []>, +def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst1, GR32:$dst2), + (ins GR32:$src1, GR32:$src2), + "xchg{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32; -def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), - "xchg{q}\t{$val, $src|$src, $val}", []>; +def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst1, GR64:$dst2), + (ins GR64:$src1 ,GR64:$src2), + "xchg{q}\t{$src2, $src1|$src1, $src2}", []>; } // Swap between EAX and other registers. +let Constraints = "$src = $dst", hasSideEffects = 0 in { let Uses = [AX], Defs = [AX] in -def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), +def XCHG16ar : I<0x90, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "xchg{w}\t{$src, %ax|ax, $src}", []>, OpSize16; let Uses = [EAX], Defs = [EAX] in -def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), - "xchg{l}\t{$src, %eax|eax, $src}", []>, - OpSize32; +def XCHG32ar : I<0x90, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), + "xchg{l}\t{$src, %eax|eax, $src}", []>, OpSize32; let Uses = [RAX], Defs = [RAX] in -def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), +def XCHG64ar : RI<0x90, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "xchg{q}\t{$src, %rax|rax, $src}", []>; +} } // SchedRW -let SchedRW = [WriteALU] in { -def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), - "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; -def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), - "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, - OpSize16; -def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB, - OpSize32; -def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; +let hasSideEffects = 0, Constraints = "$src1 = $dst1, $src2 = $dst2", + Defs = [EFLAGS], SchedRW = [WriteALU] in { +def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst1, GR8:$dst2), + (ins GR8:$src1, GR8:$src2), + "xadd{b}\t{$src2, $src1|$src1, $src2}", []>, TB; +def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst1, GR16:$dst2), + (ins GR16:$src1, GR16:$src2), + "xadd{w}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize16; +def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst1, GR32:$dst2), + (ins GR32:$src1, GR32:$src2), + "xadd{l}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize32; +def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst1, GR64:$dst2), + (ins GR64:$src1, GR64:$src2), + "xadd{q}\t{$src2, $src1|$src1, $src2}", []>, TB; } // SchedRW -let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { -def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), - "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; -def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), - "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, +let mayLoad = 1, mayStore = 1, hasSideEffects = 0, Constraints = "$val = $dst", + Defs = [EFLAGS], SchedRW = [WriteALULd, WriteRMW] in { +def XADD8rm : I<0xC0, MRMSrcMem, (outs GR8:$dst), + (ins GR8:$val, i8mem:$ptr), + "xadd{b}\t{$val, $ptr|$ptr, $val}", []>, TB; +def XADD16rm : I<0xC1, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$val, i16mem:$ptr), + "xadd{w}\t{$val, $ptr|$ptr, $val}", []>, TB, OpSize16; -def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB, +def XADD32rm : I<0xC1, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$val, i32mem:$ptr), + "xadd{l}\t{$val, $ptr|$ptr, $val}", []>, TB, OpSize32; -def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; +def XADD64rm : RI<0xC1, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$val, i64mem:$ptr), + "xadd{q}\t{$val, $ptr|$ptr, $val}", []>, TB; } Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1146,7 +1146,7 @@ OS.EmitInstruction(MCInstBuilder(Opc), STI); break; case X86::XCHG16ar: - OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX), STI); + OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), STI); break; case X86::NOOPL: case X86::NOOPW: Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2565,6 +2565,7 @@ return false; case EHPersonality::GNU_CXX: case EHPersonality::GNU_CXX_SjLj: + case EHPersonality::GNU_CXX_Wasm: case EHPersonality::GNU_ObjC: case EHPersonality::MSVC_X86SEH: case EHPersonality::MSVC_Win64SEH: Index: lib/Transforms/Instrumentation/GCOVProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -503,11 +503,11 @@ return false; } -static bool isUsingFuncletBasedEH(Function &F) { +static bool isUsingWindowsEHInstructions(Function &F) { if (!F.hasPersonalityFn()) return false; EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); - return isFuncletEHPersonality(Personality); + return usesWindowsEHInstructions(Personality); } static bool shouldKeepInEntry(BasicBlock::iterator It) { @@ -551,7 +551,7 @@ if (!SP) continue; if (!functionHasLines(F)) continue; // TODO: Functions using funclet-based EH are currently not supported. - if (isUsingFuncletBasedEH(F)) continue; + if (isUsingWindowsEHInstructions(F)) continue; // gcov expects every function to start with an entry block that has a // single successor, so split the entry block to make sure of that. @@ -630,7 +630,7 @@ if (!SP) continue; if (!functionHasLines(F)) continue; // TODO: Functions using funclet-based EH are currently not supported. - if (isUsingFuncletBasedEH(F)) continue; + if (isUsingWindowsEHInstructions(F)) continue; if (!Result) Result = true; unsigned Edges = 0; Index: lib/Transforms/ObjCARC/ObjCARCContract.cpp =================================================================== --- lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -534,7 +534,7 @@ DenseMap BlockColors; if (F.hasPersonalityFn() && - isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + usesWindowsEHInstructions(classifyEHPersonality(F.getPersonalityFn()))) BlockColors = colorEHFunclets(F); DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n"); Index: lib/Transforms/ObjCARC/ObjCARCOpts.cpp =================================================================== --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -719,7 +719,7 @@ DenseMap BlockColors; if (F.hasPersonalityFn() && - isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + usesWindowsEHInstructions(classifyEHPersonality(F.getPersonalityFn()))) BlockColors = colorEHFunclets(F); // Visit all objc_* calls in F. Index: lib/Transforms/Utils/EscapeEnumerator.cpp =================================================================== --- lib/Transforms/Utils/EscapeEnumerator.cpp +++ lib/Transforms/Utils/EscapeEnumerator.cpp @@ -73,7 +73,7 @@ F.setPersonalityFn(PersFn); } - if (isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { + if (usesWindowsEHInstructions(classifyEHPersonality(F.getPersonalityFn()))) { report_fatal_error("Funclet EH not supported"); } Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -1569,7 +1569,7 @@ Instruction *CallSiteEHPad = nullptr; if (CallerPersonality) { EHPersonality Personality = classifyEHPersonality(CallerPersonality); - if (isFuncletEHPersonality(Personality)) { + if (usesWindowsEHInstructions(Personality)) { Optional ParentFunclet = CS.getOperandBundle(LLVMContext::OB_funclet); if (ParentFunclet) Index: test/CodeGen/NVPTX/wmma.py =================================================================== --- test/CodeGen/NVPTX/wmma.py +++ test/CodeGen/NVPTX/wmma.py @@ -2,7 +2,7 @@ # generates correct instructions for them. # RUN: python %s > %t.ll -# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %t.ll +# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx61 | FileCheck %t.ll from itertools import product from string import Template @@ -36,13 +36,15 @@ check_f16_4 = "{{%s}}" % ", *".join(["%hh[0-9]+"] * 4) check_f32_8 = "{{%s}}" % ", *".join(["%f[0-9]+"] * 8) +known_geoms = ["m16n16k16", "m8n32k16", "m32n8k16"] + def gen_wmma_load_tests(): load_template = """ declare ${ret_ty} @${intrinsic}(i8 ${as}* %src ${extra_args}); ; CHECK-LABEL: .func {{.*}}test_${function}( define ${ret_ty} @test_${function}(i8 ${as}* %src ${extra_args}) { -; CHECK ${instruction} +; CHECK: ${instruction} ; CHECK: {${check_result}} ; CHECK: [%rd{{[0-9]+}}]${stride_pattern} %v0 = call ${ret_ty} @${intrinsic}(i8 ${as}* %src ${extra_args}); @@ -51,7 +53,7 @@ ; CHECK-LABEL: .func{{.*}}test_${function}_o( define ${ret_ty} @test_${function}_o(i8 ${as}* %src ${extra_args}) { -; CHECK ${instruction} +; CHECK: ${instruction} ; CHECK: {${check_result}} ; CHECK: [%rd{{[0-9]+}}+128]${stride_pattern} %src1 = getelementptr i8, i8 ${as}* %src, i32 128; @@ -60,9 +62,10 @@ } """ intrinsic_template = "llvm.nvvm.wmma.${geom}.load.${abc}.${layout}${stride}.${itype}.${pspace}" - instruction_template = "wmma.load.${abc}.sync.${geom}.${layout}${space}.${itype}" + instruction_template = "wmma.load.${abc}.sync.${layout}.${geom}${space}.${itype}" - for abc, layout, space, stride, itype in product( + for geom, abc, layout, space, stride, itype in product( + known_geoms, "abc", ["row","col"], ["",".shared",".global"], @@ -77,7 +80,7 @@ "itype" : itype, "pspace" : get_pspace(space), "as" : "addrspace(%d)" % get_aspace(space), - "geom" : "m16n16k16", + "geom" : geom, } if itype == "f32" and abc != "c": @@ -112,7 +115,7 @@ ; CHECK-LABEL: .func {{.*}}test_${function}( define void @test_${function}(i8 ${as}* %src, ${args}${extra_args}) { -; CHECK ${instruction} {{.*}}[%rd{{[0-9+]}} +; CHECK: ${instruction} {{.*}}[%rd{{[0-9+]}} ; CHECK: {${check_args}} ; CHECK: ${stride_pattern} call void @${intrinsic}(i8 ${as}* %src, ${args} ${extra_args}); @@ -121,7 +124,7 @@ ; CHECK-LABEL: .func{{.*}}test_${function}_o( define void @test_${function}_o(i8 ${as}* %src, ${args}${extra_args}) { -; CHECK ${instruction} {{.*}}[%rd{{[0-9+]}}+128] +; CHECK: ${instruction} {{.*}}[%rd{{[0-9+]}}+128] ; CHECK: ${check_args} ; CHECK: ${stride_pattern} %src1 = getelementptr i8, i8 ${as}* %src, i32 128; @@ -130,9 +133,10 @@ } """ intrinsic_template = "llvm.nvvm.wmma.${geom}.store.${abc}.${layout}${stride}.${itype}.${pspace}" - instruction_template = "wmma.store.${abc}.sync.${geom}.${layout}${space}.${itype}" + instruction_template = "wmma.store.${abc}.sync.${layout}.${geom}${space}.${itype}" - for abc, layout, space, stride, itype in product( + for geom, abc, layout, space, stride, itype in product( + known_geoms, "d", ["row","col"], ["",".shared",".global"], @@ -147,7 +151,7 @@ "itype" : itype, "pspace" : get_pspace(space), "as" : "addrspace(%d)" % get_aspace(space), - "geom" : "m16n16k16", + "geom" : geom, } test_params = params @@ -174,11 +178,11 @@ ; CHECK-LABEL: .func {{.*}}test_${function}( define ${ret_ty} @test_${function}( ${args}) { -; CHECK ${instruction} {{.*}}[%rd{{[0-9+]}} -; CHECK ${check_d} -; CHECK ${check_ab} -; CHECK ${check_ab} -; CHECK ${check_c} +; CHECK: ${instruction} +; CHECK-NEXT: ${check_d} +; CHECK-NEXT: ${check_ab} +; CHECK-NEXT: ${check_ab} +; CHECK-NEXT: ${check_c} %r = call ${ret_ty} @${intrinsic}( ${args}); ret ${ret_ty} %r; @@ -187,7 +191,8 @@ intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}.${dtype}.${ctype}${satf}" instruction_template = "wmma.mma.sync.${alayout}.${blayout}.${geom}.${dtype}.${ctype}${satf}" - for alayout, blayout, ctype, dtype, satf in product( + for geom, alayout, blayout, ctype, dtype, satf in product( + known_geoms, ["row","col"], ["row","col"], ["f16", "f32"], @@ -200,7 +205,7 @@ "ctype" : ctype, "dtype" : dtype, "satf" : satf, - "geom" : "m16n16k16", + "geom" : geom, } test_params = params Index: test/CodeGen/RISCV/compress.ll =================================================================== --- test/CodeGen/RISCV/compress.ll +++ test/CodeGen/RISCV/compress.ll @@ -150,20 +150,16 @@ ret i32 -559038737 } -; TODO: c.mv is unnecessary. define i32 @pos_i32_hi20_only() nounwind { ; RV32IC-LABEL: pos_i32_hi20_only: ; RV32IC: c.lui a0, 16 -; RV32IC: c.mv a0, a0 ; RV32IC-NEXT: c.jr ra ret i32 65536 } -; TODO: c.mv is unnecessary. define i32 @neg_i32_hi20_only() nounwind { ; RV32IC-LABEL: neg_i32_hi20_only: ; RV32IC: c.lui a0, 1048560 -; RV32IC: c.mv a0, a0 ; RV32IC-NEXT: c.jr ra ret i32 -65536 } Index: test/CodeGen/WebAssembly/exception.ll =================================================================== --- test/CodeGen/WebAssembly/exception.ll +++ test/CodeGen/WebAssembly/exception.ll @@ -1,22 +1,90 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -exception-model=wasm | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown-wasm" +%struct.Cleanup = type { i8 } + +@_ZTIi = external constant i8* + declare void @llvm.wasm.throw(i32, i8*) -declare void @llvm.wasm.rethrow() -; CHECK-LABEL: throw: +; CHECK-LABEL: test_throw: ; CHECK-NEXT: i32.const $push0=, 0 ; CHECK-NEXT: throw 0, $pop0 -define void @throw() { +define void @test_throw() { call void @llvm.wasm.throw(i32 0, i8* null) ret void } -; CHECK-LABEL: rethrow: -; CHECK-NEXT: rethrow 0 -define void @rethrow() { - call void @llvm.wasm.rethrow() +; CHECK-LABEL: test_catch: +; CHECK: call foo@FUNCTION +; CHECK: i32.catch $push{{.+}}=, 0 +; CHECK: i32.store __wasm_lpad_context +; CHECK: i32.store __wasm_lpad_context+4 +; CHECK: i32.call $push{{.+}}=, _Unwind_CallPersonality@FUNCTION +; CHECK: i32.call $push{{.+}}=, __cxa_begin_catch@FUNCTION +; CHECK: call __cxa_end_catch@FUNCTION +; CHECK: br 1 +; CHECK: call __cxa_rethrow@FUNCTION +define void @test_catch() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +entry: + invoke void @foo() + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* bitcast (i8** @_ZTIi to i8*)] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %3, %4 + br i1 %matches, label %catch, label %rethrow + +catch: ; preds = %catch.start + %5 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont + +rethrow: ; preds = %catch.start + call void @__cxa_rethrow() [ "funclet"(token %1) ] + unreachable + +try.cont: ; preds = %entry, %catch ret void } + +; CHECK-LABEL: test_cleanup: +; CHECK: call foo@FUNCTION +; CHECK: return +; CHECK: i32.call $push20=, _ZN7CleanupD1Ev@FUNCTION +; CHECK: rethrow 0 +define void @test_cleanup() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +entry: + %c = alloca %struct.Cleanup, align 1 + invoke void @foo() + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: ; preds = %entry + %call = call %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* %c) + ret void + +ehcleanup: ; preds = %entry + %0 = cleanuppad within none [] + %call1 = call %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* %c) [ "funclet"(token %0) ] + cleanupret from %0 unwind to caller +} + +declare void @foo() +declare void @func(i32) +declare i32 @__gxx_wasm_personality_v0(...) +declare i8* @llvm.wasm.get.exception() +declare i32 @llvm.wasm.get.ehselector() +declare i32 @llvm.eh.typeid.for(i8*) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() +declare void @__cxa_rethrow() +declare void @__clang_call_terminate(i8*) +declare %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* returned) Index: test/CodeGen/WebAssembly/wasmehprepare.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/wasmehprepare.ll @@ -0,0 +1,317 @@ +; RUN: opt < %s -winehprepare -demote-catchswitch-only -wasmehprepare -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK: @__wasm_lpad_context = external global { i32, i8*, i32 } + +@_ZTIi = external constant i8* +%struct.Cleanup = type { i8 } + +; A single 'catch (int)' clause. +; A wasm.catch() call, wasm.lsda() call, and personality call to generate a +; selector should all be genereated after the catchpad. +define void @test0() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test0() +entry: + invoke void @foo() + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* bitcast (i8** @_ZTIi to i8*)] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %3, %4 + br i1 %matches, label %catch, label %rethrow +; CHECK: catch.start: +; CHECK-NEXT: %[[CATCHPAD:.*]] = catchpad +; CHECK-NEXT: %[[EXN:.*]] = call i8* @llvm.wasm.catch(i32 0) +; CHECK-NEXT: call void @llvm.wasm.landingpad.index(i32 0) +; CHECK-NEXT: store volatile i32 0, i32* getelementptr inbounds ({ i32, i8*, i32 }, { i32, i8*, i32 }* @__wasm_lpad_context, i32 0, i32 0) +; CHECK-NEXT: %[[LSDA:.*]] = call i8* @llvm.wasm.lsda() +; CHECK-NEXT: store volatile i8* %[[LSDA]], i8** getelementptr inbounds ({ i32, i8*, i32 }, { i32, i8*, i32 }* @__wasm_lpad_context, i32 0, i32 1) +; CHECK-NEXT: call i32 @_Unwind_CallPersonality(i8* %[[EXN]]) {{.*}} [ "funclet"(token %[[CATCHPAD]]) ] +; CHECK-NEXT: %[[SELECTOR:.*]] = load i32, i32* getelementptr inbounds ({ i32, i8*, i32 }, { i32, i8*, i32 }* @__wasm_lpad_context, i32 0, i32 2) +; CHECK: icmp eq i32 %[[SELECTOR]] + +catch: ; preds = %catch.start + %5 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont +; CHECK: catch: +; CHECK-NEXT: call i8* @__cxa_begin_catch(i8* %[[EXN]]) + +rethrow: ; preds = %catch.start + call void @__cxa_rethrow() [ "funclet"(token %1) ] + unreachable + +try.cont: ; preds = %entry, %catch + ret void +} + +; Two try-catches, one of them is with a single 'catch (...)' clause. +; For the catchpad with a single 'catch (...)', only a wasm.catch() call should +; be generated after the catchpad; wasm.landingpad.index() and personality call +; should NOT be generated. For the other catchpad, the argument of +; wasm.landingpad.index() should be not 1 but 0. +define void @test1() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test1() +entry: + invoke void @foo() + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* null] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont +; CHECK: catch.start: +; CHECK-NEXT: catchpad within %0 [i8* null] +; CHECK-NEXT: call i8* @llvm.wasm.catch(i32 0) +; CHECK-NOT: call void @llvm.wasm.landingpad.index +; CHECK-NOT: store {{.*}} @__wasm_lpad_context +; CHECK-NOT: call i8* @llvm.wasm.lsda() +; CHECK-NOT: call i32 @_Unwind_CallPersonality +; CHECK-NOT: load {{.*}} @__wasm_lpad_context + +try.cont: ; preds = %entry, %catch.start + invoke void @foo() + to label %try.cont7 unwind label %catch.dispatch2 + +catch.dispatch2: ; preds = %try.cont + %5 = catchswitch within none [label %catch.start3] unwind to caller + +catch.start3: ; preds = %catch.dispatch2 + %6 = catchpad within %5 [i8* bitcast (i8** @_ZTIi to i8*)] + %7 = call i8* @llvm.wasm.get.exception() + %8 = call i32 @llvm.wasm.get.ehselector() + %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %8, %9 + br i1 %matches, label %catch4, label %rethrow +; CHECK: catch.start3: +; CHECK: call void @llvm.wasm.landingpad.index(i32 0) + +catch4: ; preds = %catch.start3 + %10 = call i8* @__cxa_begin_catch(i8* %7) [ "funclet"(token %6) ] + call void @__cxa_end_catch() [ "funclet"(token %6) ] + catchret from %6 to label %try.cont7 + +rethrow: ; preds = %catch.start3 + call void @__cxa_rethrow() [ "funclet"(token %6) ] + unreachable + +try.cont7: ; preds = %try.cont, %catch4 + ret void +} + +; A nested try-catch within a catch. Within the nested catchpad, wasm.lsda() +; call should NOT be generated. +define void @test2() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test2() +entry: + invoke void @foo() + to label %try.cont9 unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* bitcast (i8** @_ZTIi to i8*)] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %3, %4 + br i1 %matches, label %catch, label %rethrow +; CHECK: catch.start: +; CHECK: call i8* @llvm.wasm.lsda() + +catch: ; preds = %catch.start + %5 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + invoke void @foo() [ "funclet"(token %1) ] + to label %try.cont unwind label %catch.dispatch2 + +catch.dispatch2: ; preds = %catch + %6 = catchswitch within %1 [label %catch.start3] unwind label %ehcleanup + +catch.start3: ; preds = %catch.dispatch2 + %7 = catchpad within %6 [i8* bitcast (i8** @_ZTIi to i8*)] + %8 = call i8* @llvm.wasm.get.exception() + %9 = call i32 @llvm.wasm.get.ehselector() + %10 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches4 = icmp eq i32 %9, %10 + br i1 %matches4, label %catch6, label %rethrow5 +; CHECK: catch.start3: +; CHECK-NOT: call i8* @llvm.wasm.lsda() + +catch6: ; preds = %catch.start3 + %11 = call i8* @__cxa_begin_catch(i8* %8) [ "funclet"(token %7) ] + call void @__cxa_end_catch() [ "funclet"(token %7) ] + catchret from %7 to label %try.cont + +rethrow5: ; preds = %catch.start3 + invoke void @__cxa_rethrow() [ "funclet"(token %7) ] + to label %unreachable unwind label %ehcleanup + +try.cont: ; preds = %catch, %catch6 + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont9 + +rethrow: ; preds = %catch.start + call void @__cxa_rethrow() [ "funclet"(token %1) ] + unreachable + +try.cont9: ; preds = %entry, %try.cont + ret void + +ehcleanup: ; preds = %rethrow5, %catch.dispatch2 + %12 = cleanuppad within %1 [] + call void @__cxa_end_catch() [ "funclet"(token %12) ] + cleanupret from %12 unwind to caller +; CHECK: ehcleanup: +; CHECK-NEXT: cleanuppad +; CHECK-NOT: call i8* @llvm.wasm.catch(i32 0) +; CHECK-NOT: call void @llvm.wasm.landingpad.index +; CHECK-NOT: store {{.*}} @__wasm_lpad_context +; CHECK-NOT: call i8* @llvm.wasm.lsda() +; CHECK-NOT: call i32 @_Unwind_CallPersonality +; CHECK-NOT: load {{.*}} @__wasm_lpad_context + +unreachable: ; preds = %rethrow5 + unreachable +} + +; A cleanuppad with a call to __clang_call_terminate(). +; A call to wasm.catch() should be generated after the cleanuppad. +define hidden void @test3() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test3 +entry: + invoke void @foo() + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* null] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + invoke void @foo() [ "funclet"(token %1) ] + to label %invoke.cont1 unwind label %ehcleanup + +invoke.cont1: ; preds = %catch.start + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont + +try.cont: ; preds = %entry, %invoke.cont1 + ret void + +ehcleanup: ; preds = %catch.start + %5 = cleanuppad within %1 [] + invoke void @__cxa_end_catch() [ "funclet"(token %5) ] + to label %invoke.cont2 unwind label %terminate + +invoke.cont2: ; preds = %ehcleanup + cleanupret from %5 unwind to caller + +terminate: ; preds = %ehcleanup + %6 = cleanuppad within %5 [] + %7 = call i8* @llvm.wasm.get.exception() + call void @__clang_call_terminate(i8* %7) [ "funclet"(token %6) ] + unreachable +; CHECK: terminate: +; CHECK-NEXT: cleanuppad +; CHECK-NEXT: %[[EXN:.*]] = call i8* @llvm.wasm.catch(i32 0) +; CHECK-NEXT: call void @__clang_call_terminate(i8* %[[EXN]]) +} + +; PHI demotion test. Only the phi before catchswitch should be demoted; the phi +; before cleanuppad should NOT. +define void @test5() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test5 +entry: + %c = alloca %struct.Cleanup, align 1 + invoke void @foo() + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: ; preds = %entry + invoke void @foo() + to label %invoke.cont1 unwind label %ehcleanup + +invoke.cont1: ; preds = %invoke.cont + %call = call %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* %c) + br label %try.cont + +ehcleanup: ; preds = %invoke.cont, %entry + %num.0 = phi i32 [ 2, %invoke.cont ], [ 1, %entry ] + %0 = cleanuppad within none [] + %call2 = call %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* %c) [ "funclet"(token %0) ] + cleanupret from %0 unwind label %catch.dispatch +; CHECK: ehcleanup: +; CHECK-NEXT: = phi + +catch.dispatch: ; preds = %ehcleanup + %1 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %2 = catchpad within %1 [i8* null] + %3 = call i8* @llvm.wasm.get.exception() + %4 = call i32 @llvm.wasm.get.ehselector() + %5 = call i8* @__cxa_begin_catch(i8* %3) [ "funclet"(token %2) ] + call void @func(i32 %num.0) [ "funclet"(token %2) ] + call void @__cxa_end_catch() [ "funclet"(token %2) ] + catchret from %2 to label %try.cont + +try.cont: ; preds = %catch.start, %invoke.cont1 + invoke void @foo() + to label %invoke.cont3 unwind label %catch.dispatch5 + +invoke.cont3: ; preds = %try.cont + invoke void @foo() + to label %try.cont10 unwind label %catch.dispatch5 + +catch.dispatch5: ; preds = %invoke.cont3, %try.cont + %num.1 = phi i32 [ 2, %invoke.cont3 ], [ 1, %try.cont ] + %6 = catchswitch within none [label %catch.start6] unwind to caller +; CHECK: catch.dispatch5: +; CHECK-NOT: = phi + +catch.start6: ; preds = %catch.dispatch5 + %7 = catchpad within %6 [i8* null] + %8 = call i8* @llvm.wasm.get.exception() + %9 = call i32 @llvm.wasm.get.ehselector() + %10 = call i8* @__cxa_begin_catch(i8* %8) [ "funclet"(token %7) ] + call void @func(i32 %num.1) [ "funclet"(token %7) ] + call void @__cxa_end_catch() [ "funclet"(token %7) ] + catchret from %7 to label %try.cont10 + +try.cont10: ; preds = %invoke.cont3, %catch.start6 + ret void +} + +declare void @foo() +declare void @func(i32) +declare %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* returned) +declare i32 @__gxx_wasm_personality_v0(...) +declare i8* @llvm.wasm.get.exception() +declare i32 @llvm.wasm.get.ehselector() +declare i32 @llvm.eh.typeid.for(i8*) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() +declare void @__cxa_rethrow() +declare void @__clang_call_terminate(i8*) + +; CHECK-DAG: declare i8* @llvm.wasm.catch(i32) +; CHECK-DAG: declare void @llvm.wasm.landingpad.index(i32) +; CHECK-DAG: declare i8* @llvm.wasm.lsda() +; CHECK-DAG: declare i32 @_Unwind_CallPersonality(i8*) + Index: test/tools/llvm-mca/X86/BtVer2/resources-sse2.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-sse2.s +++ test/tools/llvm-mca/X86/BtVer2/resources-sse2.s @@ -144,15 +144,6 @@ orpd %xmm0, %xmm2 orpd (%rax), %xmm2 -pabsb %xmm0, %xmm2 -pabsb (%rax), %xmm2 - -pabsd %xmm0, %xmm2 -pabsd (%rax), %xmm2 - -pabsw %xmm0, %xmm2 -pabsw (%rax), %xmm2 - packssdw %xmm0, %xmm2 packssdw (%rax), %xmm2 @@ -483,12 +474,6 @@ # CHECK-NEXT: 1 9 2.00 * mulsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 orpd %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * orpd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 pabsb %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * pabsb (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 pabsd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * pabsd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 pabsw %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * pabsw (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 packssdw %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * packssdw (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 packsswb %xmm0, %xmm2 @@ -658,7 +643,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 17.00 2.00 - 47.50 202.50 115.00 127.00 114.00 - 12.00 43.00 65.50 65.50 10.00 +# CHECK-NEXT: 17.00 2.00 - 47.50 202.50 112.00 124.00 111.00 - 12.00 43.00 62.50 62.50 10.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -760,12 +745,6 @@ # CHECK-NEXT: - - - - 2.00 - 1.00 1.00 - - - - - - mulsd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - orpd %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - orpd (%rax), %xmm2 -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsb %xmm0, %xmm2 -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsb (%rax), %xmm2 -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsd %xmm0, %xmm2 -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsd (%rax), %xmm2 -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsw %xmm0, %xmm2 -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - packssdw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - packssdw (%rax), %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - packsswb %xmm0, %xmm2 Index: test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s +++ test/tools/llvm-mca/X86/BtVer2/resources-ssse3.s @@ -1,42 +1,99 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s +pabsb %mm0, %mm2 +pabsb (%rax), %mm2 + +pabsb %xmm0, %xmm2 +pabsb (%rax), %xmm2 + +pabsd %mm0, %mm2 +pabsd (%rax), %mm2 + +pabsd %xmm0, %xmm2 +pabsd (%rax), %xmm2 + +pabsw %mm0, %mm2 +pabsw (%rax), %mm2 + +pabsw %xmm0, %xmm2 +pabsw (%rax), %xmm2 + +palignr $1, %mm0, %mm2 +palignr $1, (%rax), %mm2 + palignr $1, %xmm0, %xmm2 palignr $1, (%rax), %xmm2 +phaddd %mm0, %mm2 +phaddd (%rax), %mm2 + phaddd %xmm0, %xmm2 phaddd (%rax), %xmm2 +phaddsw %mm0, %mm2 +phaddsw (%rax), %mm2 + phaddsw %xmm0, %xmm2 phaddsw (%rax), %xmm2 +phaddw %mm0, %mm2 +phaddw (%rax), %mm2 + phaddw %xmm0, %xmm2 phaddw (%rax), %xmm2 +phsubd %mm0, %mm2 +phsubd (%rax), %mm2 + phsubd %xmm0, %xmm2 phsubd (%rax), %xmm2 +phsubsw %mm0, %mm2 +phsubsw (%rax), %mm2 + phsubsw %xmm0, %xmm2 phsubsw (%rax), %xmm2 +phsubw %mm0, %mm2 +phsubw (%rax), %mm2 + phsubw %xmm0, %xmm2 phsubw (%rax), %xmm2 +pmaddubsw %mm0, %mm2 +pmaddubsw (%rax), %mm2 + pmaddubsw %xmm0, %xmm2 pmaddubsw (%rax), %xmm2 +pmulhrsw %mm0, %mm2 +pmulhrsw (%rax), %mm2 + pmulhrsw %xmm0, %xmm2 pmulhrsw (%rax), %xmm2 +pshufb %mm0, %mm2 +pshufb (%rax), %mm2 + pshufb %xmm0, %xmm2 pshufb (%rax), %xmm2 +psignb %mm0, %mm2 +psignb (%rax), %mm2 + psignb %xmm0, %xmm2 psignb (%rax), %xmm2 +psignd %mm0, %mm2 +psignd (%rax), %mm2 + psignd %xmm0, %xmm2 psignd (%rax), %xmm2 +psignw %mm0, %mm2 +psignw (%rax), %mm2 + psignw %xmm0, %xmm2 psignw (%rax), %xmm2 @@ -49,30 +106,68 @@ # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 pabsb %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * pabsb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pabsb %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pabsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pabsd %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * pabsd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pabsd %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pabsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pabsw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * pabsw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pabsw %xmm0, %xmm2 +# CHECK-NEXT: 1 6 1.00 * pabsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 palignr $1, %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phaddd %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 phaddd %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phaddsw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 phaddsw %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phaddw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 phaddw %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phsubd %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 phsubd %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phsubsw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 phsubsw %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 phsubw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 phsubw %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 pmaddubsw %mm0, %mm2 +# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2 # CHECK-NEXT: 1 2 1.00 pmaddubsw %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: 1 2 1.00 pmulhrsw %mm0, %mm2 +# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %mm2 # CHECK-NEXT: 1 2 1.00 pmulhrsw %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 pshufb %mm0, %mm2 +# CHECK-NEXT: 3 7 2.00 * pshufb (%rax), %mm2 # CHECK-NEXT: 3 2 2.00 pshufb %xmm0, %xmm2 # CHECK-NEXT: 3 7 2.00 * pshufb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignb %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * psignb (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * psignb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignd %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * psignd (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 psignd %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * psignd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignw %mm0, %mm2 +# CHECK-NEXT: 1 6 1.00 * psignw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 psignw %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * psignw (%rax), %xmm2 @@ -94,34 +189,72 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - - - - - 15.00 11.00 13.00 - - - 14.00 14.00 4.00 +# CHECK-NEXT: - - - - - 36.00 28.00 32.00 - - - 34.00 34.00 8.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsb %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsb (%rax), %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsb (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsd %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsd (%rax), %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsw %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsw (%rax), %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pabsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pabsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - palignr $1, %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - palignr $1, (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - palignr $1, %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - palignr $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phaddd %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phaddd (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phaddd %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phaddd (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phaddsw %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phaddsw (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phaddsw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phaddsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phaddw %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phaddw (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phaddw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phaddw (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phsubd %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phsubd (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phsubd %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phsubd (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phsubsw %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phsubsw (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phsubsw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phsubsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phsubw %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phsubw (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - phsubw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - phsubw (%rax), %xmm2 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 pmaddubsw %mm0, %mm2 +# CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 pmaddubsw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 pmaddubsw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 pmulhrsw %mm0, %mm2 +# CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 pmulhrsw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 pmulhrsw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 2.00 2.00 - pshufb %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 2.00 2.00 - pshufb (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 2.00 2.00 - pshufb %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 2.00 2.00 - pshufb (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignb %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignb (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignb %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignb (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignd %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignd (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignd %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignd (%rax), %xmm2 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignw %mm0, %mm2 +# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignw (%rax), %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignw %xmm0, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignw (%rax), %xmm2 Index: tools/opt/opt.cpp =================================================================== --- tools/opt/opt.cpp +++ tools/opt/opt.cpp @@ -410,6 +410,7 @@ initializePostInlineEntryExitInstrumenterPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry); initializeExpandReductionsPass(Registry); + initializeWasmEHPreparePass(Registry); initializeWriteBitcodePassPass(Registry); #ifdef LINK_POLLY_INTO_TOOLS