Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -366,6 +366,8 @@ option(LLVM_ENABLE_LIBEDIT "Use libedit if available." ON) +option(LLVM_ENABLE_LIBPFM "Use libpfm for performance counters if available." ON) + option(LLVM_ENABLE_THREADS "Use threads if available." ON) option(LLVM_ENABLE_ZLIB "Use zlib for compression/decompression if available." ON) Index: cmake/config-ix.cmake =================================================================== --- cmake/config-ix.cmake +++ cmake/config-ix.cmake @@ -90,15 +90,7 @@ endif() # Check for libpfm. -check_library_exists(pfm pfm_initialize "" HAVE_LIBPFM_INITIALIZE) -if(HAVE_LIBPFM_INITIALIZE) - check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H) - check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H) - check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H) - if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H) - set(HAVE_LIBPFM 1) - endif() -endif() +include(FindLibpfm) if(HAVE_LIBPTHREAD) # We want to find pthreads library and at the moment we do want to Index: cmake/modules/FindLibpfm.cmake =================================================================== --- /dev/null +++ cmake/modules/FindLibpfm.cmake @@ -0,0 +1,23 @@ +# CMake module for finding libpfm4. +# +# If successful, the following variables will be defined: +# HAVE_LIBPFM +# +# Libpfm can be disabled by setting LLVM_ENABLE_LIBPFM to 0. + +include(CheckIncludeFile) +include(CheckLibraryExists) + +if (LLVM_ENABLE_LIBPFM) + check_library_exists(pfm pfm_initialize "" HAVE_LIBPFM_INITIALIZE) + if(HAVE_LIBPFM_INITIALIZE) + check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H) + check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H) + check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H) + if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H) + set(HAVE_LIBPFM 1) + endif() + endif() +endif() + + Index: docs/CMake.rst =================================================================== --- docs/CMake.rst +++ docs/CMake.rst @@ -374,6 +374,11 @@ **LLVM_USE_INTEL_JITEVENTS**:BOOL Enable building support for Intel JIT Events API. Defaults to OFF. +**LLVM_ENABLE_LIBPFM**:BOOL + Enable building with libpfm to support hardware counter measurements in LLVM + tools. + Defaults to ON. + **LLVM_ENABLE_ZLIB**:BOOL Enable building with zlib to support compression/uncompression in LLVM tools. Defaults to ON. Index: docs/CommandGuide/llvm-mca.rst =================================================================== --- docs/CommandGuide/llvm-mca.rst +++ docs/CommandGuide/llvm-mca.rst @@ -52,7 +52,7 @@ __asm volatile("# LLVM-MCA-BEGIN foo"); a += 42; __asm volatile("# LLVM-MCA-END"); - a *= b; + a \*= b; return a; } @@ -136,12 +136,6 @@ queue. A value of zero for this flag is ignored, and the default store queue size is used instead. -.. option:: -verbose - - Enable verbose output. In particular, this flag enables a number of extra - statistics and performance counters for the dispatch logic, the reorder - buffer, the retire control unit and the register file. - .. option:: -timeline Enable the timeline view. @@ -170,6 +164,15 @@ dispatch events, as well as static/dynamic dispatch stall events. This view is disabled by default. +.. option:: -scheduler-stats + + Enable extra scheduler statistics. This view collects and analyzes instruction + issue events. This view is disabled by default. + +.. option:: -retire-stats + + Enable extra retire control unit statistics. This view is disabled by default. + .. option:: -instruction-info Enable the instruction info view. This is enabled by default. Index: include/llvm/Analysis/EHPersonalities.h =================================================================== --- include/llvm/Analysis/EHPersonalities.h +++ include/llvm/Analysis/EHPersonalities.h @@ -27,6 +27,7 @@ GNU_C_SjLj, GNU_CXX, GNU_CXX_SjLj, + GNU_CXX_Wasm, GNU_ObjC, MSVC_X86SEH, MSVC_Win64SEH, @@ -67,6 +68,7 @@ case EHPersonality::MSVC_X86SEH: case EHPersonality::MSVC_Win64SEH: case EHPersonality::CoreCLR: + case EHPersonality::GNU_CXX_Wasm: return true; default: return false; Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -329,13 +329,17 @@ /// createWinEHPass - Prepares personality functions used by MSVC on Windows, /// in addition to the Itanium LSDA based personalities. - FunctionPass *createWinEHPass(); + FunctionPass *createWinEHPass(bool DemoteCatchSwitchPHIOnly = false); /// createSjLjEHPreparePass - This pass adapts exception handling code to use /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. /// FunctionPass *createSjLjEHPreparePass(); + /// createWasmEHPass - This pass adapts exception handling code to use + /// WebAssembly's exception handling scheme. + FunctionPass *createWasmEHPass(); + /// LocalStackSlotAllocation - This pass assigns local frame indices to stack /// slots relative to one another and allocates base registers to access them /// when it is estimated by the target to be out of range of normal frame Index: include/llvm/IR/IntrinsicsWebAssembly.td =================================================================== --- include/llvm/IR/IntrinsicsWebAssembly.td +++ include/llvm/IR/IntrinsicsWebAssembly.td @@ -45,4 +45,17 @@ def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [], [IntrHasSideEffects]>; def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [], [IntrHasSideEffects]>; + +// wasm.catch returns the pointer to the exception object caught by wasm 'catch' +// instruction. +def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], + [IntrHasSideEffects]>; + +// WebAssembly EH must maintain the landingpads in the order assigned to them +// by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is +// used in order to give them the indices in WasmEHPrepare. +def int_wasm_landingpad_index: Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; + +// Returns LSDA address of the current function. +def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; } Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -4793,32 +4793,12 @@ def int_x86_avx512_pmulh_w_512 : GCCBuiltin<"__builtin_ia32_pmulhw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx512_mask_pmaddw_d_128 : // FIXME: remove this intrinsic - Intrinsic<[llvm_v4i32_ty], - [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmaddw_d_256 : // FIXME: remove this intrinsic - Intrinsic<[llvm_v8i32_ty], - [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmaddw_d_512 : - GCCBuiltin<"__builtin_ia32_pmaddwd512_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmaddubs_w_128 : // FIXME: remove this intrinsic - Intrinsic<[llvm_v8i16_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmaddubs_w_256 : // FIXME: remove this intrinsic - Intrinsic<[llvm_v16i16_ty], - [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmaddubs_w_512 : - GCCBuiltin<"__builtin_ia32_pmaddubsw512_mask">, - Intrinsic<[llvm_v32i16_ty], - [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrNoMem]>; + def int_x86_avx512_pmaddw_d_512 : GCCBuiltin<"__builtin_ia32_pmaddwd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v32i16_ty, + llvm_v32i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx512_pmaddubs_w_512 : GCCBuiltin<"__builtin_ia32_pmaddubsw512">, + Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty, + llvm_v64i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_dbpsadbw_128 : GCCBuiltin<"__builtin_ia32_dbpsadbw128_mask">, Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -383,6 +383,7 @@ void initializeVerifierLegacyPassPass(PassRegistry&); void initializeVirtRegMapPass(PassRegistry&); void initializeVirtRegRewriterPass(PassRegistry&); +void initializeWasmEHPreparePass(PassRegistry&); void initializeWholeProgramDevirtPass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); Index: lib/Analysis/EHPersonalities.cpp =================================================================== --- lib/Analysis/EHPersonalities.cpp +++ lib/Analysis/EHPersonalities.cpp @@ -25,20 +25,21 @@ if (!F) return EHPersonality::Unknown; return StringSwitch(F->getName()) - .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) - .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) - .Case("__gxx_personality_seh0",EHPersonality::GNU_CXX) - .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) - .Case("__gcc_personality_v0", EHPersonality::GNU_C) - .Case("__gcc_personality_seh0",EHPersonality::GNU_C) - .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) - .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) - .Case("_except_handler3", EHPersonality::MSVC_X86SEH) - .Case("_except_handler4", EHPersonality::MSVC_X86SEH) - .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) - .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) - .Case("ProcessCLRException", EHPersonality::CoreCLR) - .Case("rust_eh_personality", EHPersonality::Rust) + .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) + .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_seh0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) + .Case("__gxx_wasm_personality_v0", EHPersonality::GNU_CXX_Wasm) + .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__gcc_personality_seh0", EHPersonality::GNU_C) + .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) + .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) + .Case("_except_handler3", EHPersonality::MSVC_X86SEH) + .Case("_except_handler4", EHPersonality::MSVC_X86SEH) + .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) + .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) + .Case("ProcessCLRException", EHPersonality::CoreCLR) + .Case("rust_eh_personality", EHPersonality::Rust) .Default(EHPersonality::Unknown); } @@ -47,6 +48,7 @@ case EHPersonality::GNU_Ada: return "__gnat_eh_personality"; case EHPersonality::GNU_CXX: return "__gxx_personality_v0"; case EHPersonality::GNU_CXX_SjLj: return "__gxx_personality_sj0"; + case EHPersonality::GNU_CXX_Wasm: return "__gxx_wasm_personality_v0"; case EHPersonality::GNU_C: return "__gcc_personality_v0"; case EHPersonality::GNU_C_SjLj: return "__gcc_personality_sj0"; case EHPersonality::GNU_ObjC: return "__objc_personality_v0"; Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -157,6 +157,7 @@ UnreachableBlockElim.cpp ValueTypes.cpp VirtRegMap.cpp + WasmEHPrepare.cpp WinEHPrepare.cpp XRayInstrumentation.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -101,6 +101,7 @@ initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); initializeVirtRegRewriterPass(Registry); + initializeWasmEHPreparePass(Registry); initializeWinEHPreparePass(Registry); initializeXRayInstrumentationPass(Registry); initializeMIRCanonicalizerPass(Registry); Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -661,7 +661,12 @@ addPass(createDwarfEHPass()); break; case ExceptionHandling::Wasm: - // TODO to prevent warning + // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs + // on catchpads and cleanuppads because it does not outline them into + // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we + // should remove PHIs there. + addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); + addPass(createWasmEHPass()); break; case ExceptionHandling::None: addPass(createLowerInvokePass()); Index: lib/CodeGen/WasmEHPrepare.cpp =================================================================== --- /dev/null +++ lib/CodeGen/WasmEHPrepare.cpp @@ -0,0 +1,325 @@ +//===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use +// WebAssembly exception handling scheme. +// +// WebAssembly exception handling uses Windows exception IR for the middle level +// representation. This pass does the following transformation for every +// catchpad block: +// (In C-style pseudocode) +// +// - Before: +// catchpad ... +// exn = wasm.get.exception(); +// selector = wasm.get.selector(); +// ... +// +// - After: +// catchpad ... +// exn = wasm.catch(0); // 0 is a tag for C++ +// wasm.landingpad.index(index); +// // Only add below in case it's not a single catch (...) +// __wasm_lpad_context.lpad_index = index; +// __wasm_lpad_context.lsda = wasm.lsda(); +// _Unwind_CallPersonality(exn); +// int selector = __wasm.landingpad_context.selector; +// ... +// +// Also, does the following for a cleanuppad block with a call to +// __clang_call_terminate(): +// - Before: +// cleanuppad ... +// exn = wasm.get.exception(); +// __clang_call_terminate(exn); +// +// - After: +// cleanuppad ... +// exn = wasm.catch(0); // 0 is a tag for C++ +// __clang_call_terminate(exn); +// +// +// * Background: WebAssembly EH instructions +// WebAssembly's try and catch instructions are structured as follows: +// try +// instruction* +// catch (C++ tag) +// instruction* +// ... +// catch_all +// instruction* +// try_end +// +// A catch instruction in WebAssembly does not correspond to a C++ catch clause. +// In WebAssembly, there is a single catch instruction for all C++ exceptions. +// There can be more catch instructions for exceptions in other languages, but +// they are not generated for now. catch_all catches all exceptions including +// foreign exceptions. We turn catchpads into catch (C++ tag) and cleanuppads +// into catch_all, with one exception: cleanuppad with a call to +// __clang_call_terminate should be both in catch (C++ tag) and catch_all. +// +// +// * Background: Direct personality function call +// In WebAssembly EH, the VM is responsible for unwinding stack once an +// exception is thrown. After stack is unwound, the control flow is transfered +// to WebAssembly 'catch' instruction, which returns a caught exception object. +// +// Unwinding stack is not done by libunwind but the VM, so the personality +// function in libcxxabi cannot be called from libunwind during the unwinding +// process. So after a catch instruction, we insert a call to a wrapper function +// in libunwind that in turn calls the real personality function. +// +// In Itanium EH, if the personality function decides there is no matching catch +// clause in a call frame and no cleanup action to perform, the unwinder doesn't +// stop there and continues unwinding. But in Wasm EH, the unwinder stops at +// every call frame with a catch intruction, after which the personality +// function is called from the compiler-generated user code here. +// +// In libunwind, we have this struct that serves as a communincation channel +// between the compiler-generated user code and the personality function in +// libcxxabi. +// +// struct _Unwind_LandingPadContext { +// uintptr_t lpad_index; +// uintptr_t lsda; +// uintptr_t selector; +// }; +// struct _Unwind_LandingPadContext __wasm_lpad_context = ...; +// +// And this wrapper in libunwind calls the personality function. +// +// _Unwind_Reason_Code _Unwind_CallPersonality(void *exception_ptr) { +// struct _Unwind_Exception *exception_obj = +// (struct _Unwind_Exception *)exception_ptr; +// _Unwind_Reason_Code ret = __gxx_personality_v0( +// 1, _UA_CLEANUP_PHASE, exception_obj->exception_class, exception_obj, +// (struct _Unwind_Context *)__wasm_lpad_context); +// return ret; +// } +// +// We pass a landing pad index, and the address of LSDA for the current function +// to the wrapper function _Unwind_CallPersonality in libunwind, and we retrieve +// the selector after it returns. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasmehprepare" + +namespace { +class WasmEHPrepare : public FunctionPass { + Type *LPadContextTy = nullptr; // type of 'struct _Unwind_LandingPadContext' + GlobalVariable *LPadContextGV = nullptr; // __wasm_lpad_context + + // Field addresses of struct _Unwind_LandingPadContext + Value *LPadIndexField = nullptr; // lpad_index field + Value *LSDAField = nullptr; // lsda field + Value *SelectorField = nullptr; // selector + + Function *CatchF = nullptr; // wasm.catch.extract() intrinsic + Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic + Function *LSDAF = nullptr; // wasm.lsda() intrinsic + Function *GetExnF = nullptr; // wasm.get.exception() intrinsic + Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic + Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper + Function *ClangCallTermF = nullptr; // __clang_call_terminate() function + + void prepareEHPad(BasicBlock *BB, unsigned Index); + void prepareTerminateCleanupPad(BasicBlock *BB); + +public: + static char ID; // Pass identification, replacement for typeid + + WasmEHPrepare() : FunctionPass(ID) {} + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "WebAssembly Exception handling preparation"; + } +}; +} // end anonymous namespace + +char WasmEHPrepare::ID = 0; +INITIALIZE_PASS(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions", + false, false); + +FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); } + +bool WasmEHPrepare::doInitialization(Module &M) { + IRBuilder<> IRB(M.getContext()); + LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index + IRB.getInt8PtrTy(), // lsda + IRB.getInt32Ty() // selector + ); + + // __wasm_lpad_context global variable + LPadContextGV = cast( + M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); + LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, + "lpad_index_gep"); + LSDAField = + IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep"); + SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2, + "selector_gep"); + + // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction. + CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch); + // wasm.landingpad.index() intrinsic, which is to specify landingpad index + LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index); + // wasm.lsda() intrinsic. Returns the address of LSDA table for the current + // function. + LSDAF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_lsda); + // wasm.get.exception() and wasm.get.ehselector() intrinsics. Calls to these + // are generated in clang. + GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception); + GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector); + + // _Unwind_CallPersonality() wrapper function, which calls the personality + CallPersonalityF = cast(M.getOrInsertFunction( + "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy())); + + // __clang_call_terminate() function, which is inserted by clang in case a + // cleanup throws + ClangCallTermF = M.getFunction("__clang_call_terminate"); + + return false; +} + +bool WasmEHPrepare::runOnFunction(Function &F) { + SmallVector CatchPads; + SmallVector CleanupPads; + for (BasicBlock &BB : F) { + if (!BB.isEHPad()) + continue; + if (isa(BB.getFirstNonPHI())) + CatchPads.push_back(&BB); + if (isa(BB.getFirstNonPHI())) + CleanupPads.push_back(&BB); + } + + if (CatchPads.empty() && CleanupPads.empty()) + return false; + assert(F.hasPersonalityFn() && "Personality function not found"); + + unsigned Index = 0; + for (auto *BB : CatchPads) { + CatchPadInst *CPI = cast(BB->getFirstNonPHI()); + // In case of a single catch (...), we don't need to emit LSDA + if (CPI->getNumArgOperands() == 1 && + cast(CPI->getArgOperand(0))->isNullValue()) + prepareEHPad(BB, -1); + else + prepareEHPad(BB, Index++); + } + + if (!ClangCallTermF) + return !CatchPads.empty(); + + // Cleanuppads will turn into catch_all later, but cleanuppads with a call to + // __clang_call_terminate() is a special case. __clang_call_terminate() takes + // an exception object, so we have to duplicate call in both 'catch ' + // and 'catch_all' clauses. Here we only insert a call to catch; the + // duplication will be done later. In catch_all, the exception object will be + // set to null. + for (auto *BB : CleanupPads) + for (auto &I : *BB) + if (auto *CI = dyn_cast(&I)) + if (CI->getCalledValue() == ClangCallTermF) + prepareEHPad(BB, -1); + + return true; +} + +void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { + assert(BB->isEHPad() && "BB is not an EHPad!"); + IRBuilder<> IRB(BB->getContext()); + + IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); + // The argument to wasm.catch() is the tag for C++ exceptions, which we set to + // 0 for this module. + // Pseudocode: void *exn = wasm.catch(0); + Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn"); + // Replace the return value of wasm.get.exception() with the return value from + // wasm.catch(). + Instruction *GetExnCI = nullptr; + Instruction *GetSelectorCI = nullptr; + for (auto &I : *BB) + if (auto *CI = dyn_cast(&I)) { + if (CI->getCalledValue() == GetExnF) + GetExnCI = CI; + if (CI->getCalledValue() == GetSelectorF) + GetSelectorCI = CI; + } + assert(GetExnCI && "wasm.get.exception() call does not exist"); + GetExnCI->replaceAllUsesWith(Exn); + GetExnCI->eraseFromParent(); + + // In case it is a catchpad with single catch (...) or a cleanuppad, we don't + // need to call personality function because we don't need a selector. + FuncletPadInst *FPI = cast(BB->getFirstNonPHI()); + if (FPI->getNumArgOperands() == 0 || + (FPI->getNumArgOperands() == 1 && + cast(FPI->getArgOperand(0))->isNullValue())) { + if (GetSelectorCI) { + assert(GetSelectorCI->use_empty() && + "wasm.get.ehselector() still has uses!"); + GetSelectorCI->eraseFromParent(); + } + return; + } + IRB.SetInsertPoint(Exn->getNextNode()); + + // This is to create a map of in + // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables. + // Pseudocode: wasm.landingpad.index(Index); + IRB.CreateCall(LPadIndexF, IRB.getInt32(Index)); + + // Pseudocode: __wasm_lpad_context.lpad_index = index; + IRB.CreateStore(IRB.getInt32(Index), LPadIndexField, /*isVolatile=*/true); + + // Store LSDA address only if this catchpad belongs to a top-level + // catchswitch. If there is another catchpad that dominates this pad, we don't + // need to store LSDA address again, because they are the same throughout the + // function and have been already stored before. + // TODO Can we not store LSDA address in user function but make libcxxabi + // compute it? + CatchPadInst *CPI = cast(FPI); + if (isa(CPI->getCatchSwitch()->getParentPad())) + // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda(); + IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField, true); + + // Pseudocode: _Unwind_CallPersonality(exn); + CallInst *PersCI = + IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI)); + PersCI->setDoesNotThrow(); + + // Pseudocode: int selector = __wasm.landingpad_context.selector; + Instruction *Selector = IRB.CreateLoad(SelectorField, "selector"); + + // Replace the return value from wasm.get.ehselector() with the selector value + // loaded from __wasm_lpad_context.selector. + assert(GetSelectorCI && "wasm.get.ehselector() call does not exist"); + GetSelectorCI->replaceAllUsesWith(Selector); + GetSelectorCI->eraseFromParent(); +} Index: lib/CodeGen/WinEHPrepare.cpp =================================================================== --- lib/CodeGen/WinEHPrepare.cpp +++ lib/CodeGen/WinEHPrepare.cpp @@ -49,12 +49,17 @@ cl::desc("Do not remove implausible terminators or other similar cleanups"), cl::init(false)); +static cl::opt DemoteCatchSwitchPHIOnlyOpt( + "demote-catchswitch-only", cl::Hidden, + cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false)); + namespace { class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - WinEHPrepare() : FunctionPass(ID) {} + WinEHPrepare(bool DemoteCatchSwitchPHIOnly = false) + : FunctionPass(ID), DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {} bool runOnFunction(Function &Fn) override; @@ -77,12 +82,14 @@ bool prepareExplicitEH(Function &F); void colorFunclets(Function &F); - void demotePHIsOnFunclets(Function &F); + void demotePHIsOnFunclets(Function &F, bool DemoteCatchSwitchPHIOnly); void cloneCommonBlocks(Function &F); void removeImplausibleInstructions(Function &F); void cleanupPreparedFunclets(Function &F); void verifyPreparedFunclets(Function &F); + bool DemoteCatchSwitchPHIOnly; + // All fields are reset by runOnFunction. EHPersonality Personality = EHPersonality::Unknown; @@ -97,7 +104,9 @@ INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions", false, false) -FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); } +FunctionPass *llvm::createWinEHPass(bool DemoteCatchSwitchPHIOnly) { + return new WinEHPrepare(DemoteCatchSwitchPHIOnly); +} bool WinEHPrepare::runOnFunction(Function &Fn) { if (!Fn.hasPersonalityFn()) @@ -677,13 +686,17 @@ } } -void WinEHPrepare::demotePHIsOnFunclets(Function &F) { +void WinEHPrepare::demotePHIsOnFunclets(Function &F, + bool DemoteCatchSwitchPHIOnly) { // Strip PHI nodes off of EH pads. SmallVector PHINodes; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { BasicBlock *BB = &*FI++; if (!BB->isEHPad()) continue; + if (DemoteCatchSwitchPHIOnly && !isa(BB->getFirstNonPHI())) + continue; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = &*BI++; auto *PN = dyn_cast(I); @@ -1031,7 +1044,8 @@ cloneCommonBlocks(F); if (!DisableDemotion) - demotePHIsOnFunclets(F); + demotePHIsOnFunclets(F, DemoteCatchSwitchPHIOnly || + DemoteCatchSwitchPHIOnlyOpt); if (!DisableCleanups) { DEBUG(verifyFunction(F)); Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -173,6 +173,8 @@ Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0 + Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0 + Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 @@ -1071,6 +1073,24 @@ IID = Intrinsic::x86_avx512_pmulhu_w_512; else llvm_unreachable("Unexpected intrinsic"); + } else if (Name.startswith("pmaddw.d.")) { + if (VecWidth == 128) + IID = Intrinsic::x86_sse2_pmadd_wd; + else if (VecWidth == 256) + IID = Intrinsic::x86_avx2_pmadd_wd; + else if (VecWidth == 512) + IID = Intrinsic::x86_avx512_pmaddw_d_512; + else + llvm_unreachable("Unexpected intrinsic"); + } else if (Name.startswith("pmaddubs.w.")) { + if (VecWidth == 128) + IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; + else if (VecWidth == 256) + IID = Intrinsic::x86_avx2_pmadd_ub_sw; + else if (VecWidth == 512) + IID = Intrinsic::x86_avx512_pmaddubs_w_512; + else + llvm_unreachable("Unexpected intrinsic"); } else if (Name.startswith("packsswb.")) { if (VecWidth == 128) IID = Intrinsic::x86_sse2_packsswb_128; Index: lib/Target/AArch64/AArch64RegisterInfo.td =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.td +++ lib/Target/AArch64/AArch64RegisterInfo.td @@ -776,7 +776,7 @@ let ParserMethod = "tryParseSVEPredicateVector"; } -def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", -1>; +def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>; def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>; def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; @@ -788,7 +788,7 @@ def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>; def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>; -def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", -1>; +def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>; def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>; def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>; def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>; @@ -818,10 +818,10 @@ # Width # ", AArch64::ZPRRegClassID>"; let RenderMethod = "addRegOperands"; let ParserMethod = "tryParseSVEDataVector<" - # !if(!eq(Width, -1), "false", "true") # ">"; + # !if(!eq(Width, 0), "false", "true") # ">"; } -def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", -1>; +def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", 0>; def ZPRAsmOp8 : ZPRAsmOperand<"VectorB", 8>; def ZPRAsmOp16 : ZPRAsmOperand<"VectorH", 16>; def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>; Index: lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp =================================================================== --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -86,7 +86,6 @@ bool parseCondCode(OperandVector &Operands, bool invertCondCode); unsigned matchRegisterNameAlias(StringRef Name, RegKind Kind); int tryParseRegister(); - int tryMatchVectorRegister(StringRef &Kind, bool expected); bool parseRegister(OperandVector &Operands); bool parseSymbolicImmVal(const MCExpr *&ImmVal); bool parseVectorList(OperandVector &Operands); @@ -121,8 +120,8 @@ /// } - OperandMatchResultTy tryParseSVERegister(int &Reg, StringRef &Kind, - RegKind MatchKind); + OperandMatchResultTy tryParseVectorRegister(int &Reg, StringRef &Kind, + RegKind MatchKind); OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands); OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands); OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands); @@ -822,7 +821,7 @@ template bool isSVEVectorRegOfWidth() const { return isSVEVectorReg() && - (ElementWidth == -1 || Reg.ElementWidth == ElementWidth); + (ElementWidth == 0 || Reg.ElementWidth == ElementWidth); } bool isGPR32as64() const { @@ -1573,8 +1572,11 @@ } static std::unique_ptr - CreateReg(unsigned RegNum, RegKind Kind, unsigned ElementWidth, - SMLoc S, SMLoc E, MCContext &Ctx) { + CreateVectorReg(unsigned RegNum, RegKind Kind, unsigned ElementWidth, + SMLoc S, SMLoc E, MCContext &Ctx) { + assert((Kind == RegKind::NeonVector || Kind == RegKind::SVEDataVector || + Kind == RegKind::SVEPredicateVector) && + "Invalid vector kind"); auto Op = make_unique(k_Register, Ctx); Op->Reg.RegNum = RegNum; Op->Reg.ElementWidth = ElementWidth; @@ -1586,12 +1588,30 @@ static std::unique_ptr CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements, - char ElementKind, SMLoc S, SMLoc E, MCContext &Ctx) { + unsigned ElementWidth, SMLoc S, SMLoc E, MCContext &Ctx) { auto Op = make_unique(k_VectorList, Ctx); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.NumElements = NumElements; - Op->VectorList.ElementKind = ElementKind; + switch (ElementWidth) { + case 0: + Op->VectorList.ElementKind = 0; + break; + case 8: + Op->VectorList.ElementKind = 'b'; + break; + case 16: + Op->VectorList.ElementKind = 'h'; + break; + case 32: + Op->VectorList.ElementKind = 's'; + break; + case 64: + Op->VectorList.ElementKind = 'd'; + break; + default: + llvm_unreachable("Unsupported elementwidth"); + } Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -1839,29 +1859,65 @@ .Default(0); } -static bool isValidVectorKind(StringRef Name) { - return StringSwitch(Name.lower()) - .Case(".8b", true) - .Case(".16b", true) - .Case(".4h", true) - .Case(".8h", true) - .Case(".2s", true) - .Case(".4s", true) - .Case(".1d", true) - .Case(".2d", true) - .Case(".1q", true) - // Accept the width neutral ones, too, for verbose syntax. If those - // aren't used in the right places, the token operand won't match so - // all will work out. - .Case(".b", true) - .Case(".h", true) - .Case(".s", true) - .Case(".d", true) - // Needed for fp16 scalar pairwise reductions - .Case(".2h", true) - // another special case for the ARMv8.2a dot product operand - .Case(".4b", true) - .Default(false); +/// Returns an optional pair of (#elements, element-width) if Suffix +/// is a valid vector kind. Where the number of elements in a vector +/// or the vector width is implicit or explicitly unknown (but still a +/// valid suffix kind), 0 is used. +static Optional> parseVectorKind(StringRef Suffix, + RegKind VectorKind) { + std::pair Res = {-1, -1}; + + switch (VectorKind) { + case RegKind::NeonVector: + Res = + StringSwitch>(Suffix.lower()) + .Case("", {0, 0}) + .Case(".1d", {1, 64}) + .Case(".1q", {1, 128}) + // '.2h' needed for fp16 scalar pairwise reductions + .Case(".2h", {2, 16}) + .Case(".2s", {2, 32}) + .Case(".2d", {2, 64}) + // '.4b' is another special case for the ARMv8.2a dot product + // operand + .Case(".4b", {4, 8}) + .Case(".4h", {4, 16}) + .Case(".4s", {4, 32}) + .Case(".8b", {8, 8}) + .Case(".8h", {8, 16}) + .Case(".16b", {16, 8}) + // Accept the width neutral ones, too, for verbose syntax. If those + // aren't used in the right places, the token operand won't match so + // all will work out. + .Case(".b", {0, 8}) + .Case(".h", {0, 16}) + .Case(".s", {0, 32}) + .Case(".d", {0, 64}) + .Default({-1, -1}); + break; + case RegKind::SVEPredicateVector: + case RegKind::SVEDataVector: + Res = StringSwitch>(Suffix.lower()) + .Case("", {0, 0}) + .Case(".b", {0, 8}) + .Case(".h", {0, 16}) + .Case(".s", {0, 32}) + .Case(".d", {0, 64}) + .Case(".q", {0, 128}) + .Default({-1, -1}); + break; + default: + llvm_unreachable("Unsupported RegKind"); + } + + if (Res == std::make_pair(-1, -1)) + return Optional>(); + + return Optional>(Res); +} + +static bool isValidVectorKind(StringRef Suffix, RegKind VectorKind) { + return parseVectorKind(Suffix, VectorKind).hasValue(); } static unsigned matchSVEDataVectorRegName(StringRef Name) { @@ -1922,34 +1978,6 @@ .Default(0); } -static bool isValidSVEKind(StringRef Name) { - return StringSwitch(Name.lower()) - .Case(".b", true) - .Case(".h", true) - .Case(".s", true) - .Case(".d", true) - .Case(".q", true) - .Default(false); -} - -static void parseValidVectorKind(StringRef Name, unsigned &NumElements, - char &ElementKind) { - assert(isValidVectorKind(Name)); - - ElementKind = Name.lower()[Name.size() - 1]; - NumElements = 0; - - if (Name.size() == 2) - return; - - // Parse the lane count - Name = Name.drop_front(); - while (isdigit(Name.front())) { - NumElements = 10 * NumElements + (Name.front() - '0'); - Name = Name.drop_front(); - } -} - bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { StartLoc = getLoc(); @@ -2018,39 +2046,6 @@ return RegNum; } -/// tryMatchVectorRegister - Try to parse a vector register name with optional -/// kind specifier. If it is a register specifier, eat the token and return it. -int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) { - MCAsmParser &Parser = getParser(); - if (Parser.getTok().isNot(AsmToken::Identifier)) { - TokError("vector register expected"); - return -1; - } - - StringRef Name = Parser.getTok().getString(); - // If there is a kind specifier, it's separated from the register name by - // a '.'. - size_t Start = 0, Next = Name.find('.'); - StringRef Head = Name.slice(Start, Next); - unsigned RegNum = matchRegisterNameAlias(Head, RegKind::NeonVector); - - if (RegNum) { - if (Next != StringRef::npos) { - Kind = Name.slice(Next, StringRef::npos); - if (!isValidVectorKind(Kind)) { - TokError("invalid vector kind qualifier"); - return -1; - } - } - Parser.Lex(); // Eat the register token. - return RegNum; - } - - if (expected) - TokError("vector register expected"); - return -1; -} - /// tryParseSysCROperand - Try to parse a system instruction CR operand name. OperandMatchResultTy AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) { @@ -2660,12 +2655,20 @@ SMLoc S = getLoc(); // Check for a vector register specifier first. StringRef Kind; - int64_t Reg = tryMatchVectorRegister(Kind, false); - if (Reg == -1) + int Reg = -1; + OperandMatchResultTy Res = + tryParseVectorRegister(Reg, Kind, RegKind::NeonVector); + if (Res != MatchOperand_Success) return true; + + const auto &KindRes = parseVectorKind(Kind, RegKind::NeonVector); + if (!KindRes) + return true; + + unsigned ElementWidth = KindRes->second; Operands.push_back( - AArch64Operand::CreateReg(Reg, RegKind::NeonVector, S, getLoc(), - getContext())); + AArch64Operand::CreateVectorReg(Reg, RegKind::NeonVector, ElementWidth, + S, getLoc(), getContext())); // If there was an explicit qualifier, that goes on as a literal text // operand. @@ -2697,12 +2700,12 @@ return false; } -// tryParseSVEDataVectorRegister - Try to parse a SVE vector register name with +// tryParseVectorRegister - Try to parse a vector register name with // optional kind specifier. If it is a register specifier, eat the token // and return it. OperandMatchResultTy -AArch64AsmParser::tryParseSVERegister(int &Reg, StringRef &Kind, - RegKind MatchKind) { +AArch64AsmParser::tryParseVectorRegister(int &Reg, StringRef &Kind, + RegKind MatchKind) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); @@ -2719,8 +2722,8 @@ if (RegNum) { if (Next != StringRef::npos) { Kind = Name.slice(Next, StringRef::npos); - if (!isValidSVEKind(Kind)) { - TokError("invalid sve vector kind qualifier"); + if (!isValidVectorKind(Kind, MatchKind)) { + TokError("invalid vector kind qualifier"); return MatchOperand_ParseFail; } } @@ -2740,25 +2743,18 @@ const SMLoc S = getLoc(); StringRef Kind; int RegNum = -1; - auto Res = tryParseSVERegister(RegNum, Kind, RegKind::SVEPredicateVector); + auto Res = tryParseVectorRegister(RegNum, Kind, RegKind::SVEPredicateVector); if (Res != MatchOperand_Success) return Res; - unsigned ElementWidth = StringSwitch(Kind.lower()) - .Case("", -1) - .Case(".b", 8) - .Case(".h", 16) - .Case(".s", 32) - .Case(".d", 64) - .Case(".q", 128) - .Default(0); - - if (!ElementWidth) + const auto &KindRes = parseVectorKind(Kind, RegKind::SVEPredicateVector); + if (!KindRes) return MatchOperand_NoMatch; - Operands.push_back( - AArch64Operand::CreateReg(RegNum, RegKind::SVEPredicateVector, - ElementWidth, S, getLoc(), getContext())); + unsigned ElementWidth = KindRes->second; + Operands.push_back(AArch64Operand::CreateVectorReg( + RegNum, RegKind::SVEPredicateVector, ElementWidth, S, + getLoc(), getContext())); // Not all predicates are followed by a '/m' or '/z'. MCAsmParser &Parser = getParser(); @@ -2884,21 +2880,38 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) { MCAsmParser &Parser = getParser(); assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket"); + + // Wrapper around parse function + auto ParseVector = [this](int &Reg, StringRef &Kind, SMLoc Loc) { + if (tryParseVectorRegister(Reg, Kind, RegKind::NeonVector) == + MatchOperand_Success) { + if (parseVectorKind(Kind, RegKind::NeonVector)) + return true; + llvm_unreachable("Expected a valid vector kind"); + } + + Error(Loc, "vector register expected"); + return false; + }; + SMLoc S = getLoc(); Parser.Lex(); // Eat left bracket token. StringRef Kind; - int64_t FirstReg = tryMatchVectorRegister(Kind, true); - if (FirstReg == -1) - return true; + int FirstReg = -1; + if (!ParseVector(FirstReg, Kind, getLoc())) + return true; + int64_t PrevReg = FirstReg; unsigned Count = 1; if (parseOptionalToken(AsmToken::Minus)) { SMLoc Loc = getLoc(); StringRef NextKind; - int64_t Reg = tryMatchVectorRegister(NextKind, true); - if (Reg == -1) + + int Reg; + if (!ParseVector(Reg, NextKind, getLoc())) return true; + // Any Kind suffices must match on all regs in the list. if (Kind != NextKind) return Error(Loc, "mismatched register size suffix"); @@ -2915,8 +2928,8 @@ while (parseOptionalToken(AsmToken::Comma)) { SMLoc Loc = getLoc(); StringRef NextKind; - int64_t Reg = tryMatchVectorRegister(NextKind, true); - if (Reg == -1) + int Reg; + if (!ParseVector(Reg, NextKind, getLoc())) return true; // Any Kind suffices must match on all regs in the list. if (Kind != NextKind) @@ -2939,12 +2952,14 @@ return Error(S, "invalid number of vectors"); unsigned NumElements = 0; - char ElementKind = 0; - if (!Kind.empty()) - parseValidVectorKind(Kind, NumElements, ElementKind); + unsigned ElementWidth = 0; + if (!Kind.empty()) { + if (const auto &VK = parseVectorKind(Kind, RegKind::NeonVector)) + std::tie(NumElements, ElementWidth) = *VK; + } Operands.push_back(AArch64Operand::CreateVectorList( - FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext())); + FirstReg, Count, NumElements, ElementWidth, S, getLoc(), getContext())); // If there is an index specifier following the list, parse that too. SMLoc SIdx = getLoc(); @@ -4454,8 +4469,13 @@ if (RegNum == -1) { StringRef Kind; RegisterKind = RegKind::NeonVector; - RegNum = tryMatchVectorRegister(Kind, false); - if (!Kind.empty()) + OperandMatchResultTy Res = + tryParseVectorRegister(RegNum, Kind, RegKind::NeonVector); + + if (Res == MatchOperand_ParseFail) + return true; + + if (Res == MatchOperand_Success && !Kind.empty()) return Error(SRegLoc, "vector register without type specifier expected"); } @@ -4463,7 +4483,7 @@ StringRef Kind; RegisterKind = RegKind::SVEDataVector; OperandMatchResultTy Res = - tryParseSVERegister(RegNum, Kind, RegKind::SVEDataVector); + tryParseVectorRegister(RegNum, Kind, RegKind::SVEDataVector); if (Res == MatchOperand_ParseFail) return true; @@ -4477,7 +4497,7 @@ StringRef Kind; RegisterKind = RegKind::SVEPredicateVector; OperandMatchResultTy Res = - tryParseSVERegister(RegNum, Kind, RegKind::SVEPredicateVector); + tryParseVectorRegister(RegNum, Kind, RegKind::SVEPredicateVector); if (Res == MatchOperand_ParseFail) return true; @@ -4722,7 +4742,7 @@ StringRef Kind; OperandMatchResultTy Res = - tryParseSVERegister(RegNum, Kind, RegKind::SVEDataVector); + tryParseVectorRegister(RegNum, Kind, RegKind::SVEDataVector); if (Res != MatchOperand_Success) return Res; @@ -4730,20 +4750,14 @@ if (ParseSuffix && Kind.empty()) return MatchOperand_NoMatch; - unsigned ElementWidth = StringSwitch(Kind.lower()) - .Case("", -1) - .Case(".b", 8) - .Case(".h", 16) - .Case(".s", 32) - .Case(".d", 64) - .Case(".q", 128) - .Default(0); - if (!ElementWidth) + const auto &KindRes = parseVectorKind(Kind, RegKind::SVEDataVector); + if (!KindRes) return MatchOperand_NoMatch; - Operands.push_back( - AArch64Operand::CreateReg(RegNum, RegKind::SVEDataVector, ElementWidth, - S, S, getContext())); + unsigned ElementWidth = KindRes->second; + Operands.push_back(AArch64Operand::CreateVectorReg( + RegNum, RegKind::SVEDataVector, ElementWidth, S, S, + getContext())); return MatchOperand_Success; } Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1047,16 +1047,19 @@ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); setOperationAction(ISD::SETCC, MVT::i32, Expand); - setOperationAction(ISD::SETCC, MVT::f16, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::SETCC, MVT::f16, Expand); + setOperationAction(ISD::SELECT, MVT::f16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); + } // Thumb-1 cannot currently select ARMISD::SUBE. if (!Subtarget->isThumb1Only()) @@ -1064,7 +1067,8 @@ setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::f16, Custom); + if (Subtarget->hasFullFP16()) + setOperationAction(ISD::BR_CC, MVT::f16, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Custom); @@ -4522,7 +4526,9 @@ // Normalize the fp compare. If RHS is zero we keep it there so we match // CMPFPw0 instead of CMPFP. if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) && - (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { + (TrueVal.getValueType() == MVT::f16 || + TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { bool swpCmpOps = false; bool swpVselOps = false; checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -843,18 +843,6 @@ X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_mask_permvar_si_512, VPERM_2OP_MASK, X86ISD::VPERMV, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_128, INTR_TYPE_2OP_MASK, - X86ISD::VPMADDUBSW, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_256, INTR_TYPE_2OP_MASK, - X86ISD::VPMADDUBSW, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_512, INTR_TYPE_2OP_MASK, - X86ISD::VPMADDUBSW, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_128, INTR_TYPE_2OP_MASK, - X86ISD::VPMADDWD, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_256, INTR_TYPE_2OP_MASK, - X86ISD::VPMADDWD, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_512, INTR_TYPE_2OP_MASK, - X86ISD::VPMADDWD, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK, X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK, @@ -1454,6 +1442,10 @@ X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0), + X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP, + X86ISD::VPMADDUBSW, 0), + X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP, + X86ISD::VPMADDWD, 0), X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0), X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0), Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2696,5 +2696,35 @@ return SelectInst::Create(Cmp, Builder.CreateNeg(A), A); } + // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max: + // + // %notx = xor i32 %x, -1 + // %cmp1 = icmp sgt i32 %notx, %y + // %smax = select i1 %cmp1, i32 %notx, i32 %y + // %res = xor i32 %smax, -1 + // => + // %noty = xor i32 %y, -1 + // %cmp2 = icmp slt %x, %noty + // %res = select i1 %cmp2, i32 %x, i32 %noty + // + // Same is applicable for smin/umax/umin. + { + Value *LHS, *RHS; + SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor; + if (Op0->hasOneUse() && SelectPatternResult::isMinOrMax(SPF) && + match(Op1, m_AllOnes())) { + + Value *X; + if (match(RHS, m_Not(m_Value(X)))) + std::swap(RHS, LHS); + + if (match(LHS, m_Not(m_Value(X)))) { + Value *NotY = Builder.CreateNot(RHS); + return SelectInst::Create( + Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY); + } + } + } + return Changed ? &I : nullptr; } Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2613,6 +2613,7 @@ return false; case EHPersonality::GNU_CXX: case EHPersonality::GNU_CXX_SjLj: + case EHPersonality::GNU_CXX_Wasm: case EHPersonality::GNU_ObjC: case EHPersonality::MSVC_X86SEH: case EHPersonality::MSVC_Win64SEH: Index: runtimes/CMakeLists.txt =================================================================== --- runtimes/CMakeLists.txt +++ runtimes/CMakeLists.txt @@ -107,6 +107,13 @@ endif() endif() + # Avoid checking whether the compiler is working. + set(LLVM_COMPILER_CHECKED ON) + + # Handle common options used by all runtimes. + include(AddLLVM) + include(HandleLLVMOptions) + set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_LIBRARIES ${SAFE_CMAKE_REQUIRED_LIBRARIES}) Index: test/CodeGen/ARM/fp16-instructions.ll =================================================================== --- test/CodeGen/ARM/fp16-instructions.ll +++ test/CodeGen/ARM/fp16-instructions.ll @@ -4,11 +4,11 @@ ; SOFTFP: ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 -; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 +; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 -; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 ; Test fast-isel @@ -703,37 +703,167 @@ ret half %2 ; CHECK-LABEL: select_cc1: + +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32: it eq +; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}} } +; FIXME: more tests need to be added for VSELGE and VSELGT. +; That is, more combinations of immediate operands that can or can't +; be encoded as an FP16 immediate need to be added here. +; ; 36. VSELGE -define half @select_cc2() { +define half @select_cc_ge1() { %1 = fcmp nsz oge half undef, 0xH0001 %2 = select i1 %1, half 0xHC000, half 0xH0002 ret half %2 -; CHECK-LABEL: select_cc2: -; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}} +; CHECK-LABEL: select_cc_ge1: + +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: it ge +; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} +} + +; +; FIXME: add fcmp ole, ult here. +; + +define half @select_cc_ge3() { + %1 = fcmp nsz ugt half undef, 0xH0001 + %2 = select i1 %1, half 0xHC000, half 0xH0002 + ret half %2 + +; CHECK-LABEL: select_cc_ge3: + +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: it hi +; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} } ; 37. VSELGT -define half @select_cc3() { +define half @select_cc_gt1() { %1 = fcmp nsz ogt half undef, 0xH0001 %2 = select i1 %1, half 0xHC000, half 0xH0002 ret half %2 -; CHECK-LABEL: select_cc3: -; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} +; CHECK-LABEL: select_cc_gt1: + +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: it gt +; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} } -; 38. VSELVS -define half @select_cc4() { - %1 = fcmp nsz ueq half undef, 0xH0001 +define half @select_cc_gt2() { + %1 = fcmp nsz uge half undef, 0xH0001 %2 = select i1 %1, half 0xHC000, half 0xH0002 ret half %2 +; CHECK-LABEL: select_cc_gt2: + +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: it pl +; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} +} + +; +; FIXME: add fcmp ule, olt here. +; + +; 38. VSELVS +define float @select_cc4(float %a.coerce) { +entry: + %0 = bitcast float %a.coerce to i32 + %tmp.0.extract.trunc = trunc i32 %0 to i16 + %1 = bitcast i16 %tmp.0.extract.trunc to half + + %2 = fcmp nsz ueq half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + + %4 = bitcast half %3 to i16 + %tmp4.0.insert.ext = zext i16 %4 to i32 + %5 = bitcast i32 %tmp4.0.insert.ext to float + ret float %5 + ; CHECK-LABEL: select_cc4: -; CHECK-HARDFP-FULLFP16: vselvs.f16 s0, s{{.}}, s{{.}} + +; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} +; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}} +; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]] +; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]] + +; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0 +; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}} +; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]] +; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0 +; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}} +; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]] +; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]] +; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]] + +; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0 +; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}} +; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]] +; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0 +; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}} +; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32: it eq +; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]] +; CHECK-SOFTFP-FP16-T32: it vs +; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]] +; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]] } ; 39. VSQRT - TODO Index: test/CodeGen/WebAssembly/wasmehprepare.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/wasmehprepare.ll @@ -0,0 +1,317 @@ +; RUN: opt < %s -winehprepare -demote-catchswitch-only -wasmehprepare -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK: @__wasm_lpad_context = external global { i32, i8*, i32 } + +@_ZTIi = external constant i8* +%struct.Cleanup = type { i8 } + +; A single 'catch (int)' clause. +; A wasm.catch() call, wasm.lsda() call, and personality call to generate a +; selector should all be genereated after the catchpad. +define void @test0() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test0() +entry: + invoke void @foo() + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* bitcast (i8** @_ZTIi to i8*)] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %3, %4 + br i1 %matches, label %catch, label %rethrow +; CHECK: catch.start: +; CHECK-NEXT: %[[CATCHPAD:.*]] = catchpad +; CHECK-NEXT: %[[EXN:.*]] = call i8* @llvm.wasm.catch(i32 0) +; CHECK-NEXT: call void @llvm.wasm.landingpad.index(i32 0) +; CHECK-NEXT: store volatile i32 0, i32* getelementptr inbounds ({ i32, i8*, i32 }, { i32, i8*, i32 }* @__wasm_lpad_context, i32 0, i32 0) +; CHECK-NEXT: %[[LSDA:.*]] = call i8* @llvm.wasm.lsda() +; CHECK-NEXT: store volatile i8* %[[LSDA]], i8** getelementptr inbounds ({ i32, i8*, i32 }, { i32, i8*, i32 }* @__wasm_lpad_context, i32 0, i32 1) +; CHECK-NEXT: call i32 @_Unwind_CallPersonality(i8* %[[EXN]]) {{.*}} [ "funclet"(token %[[CATCHPAD]]) ] +; CHECK-NEXT: %[[SELECTOR:.*]] = load i32, i32* getelementptr inbounds ({ i32, i8*, i32 }, { i32, i8*, i32 }* @__wasm_lpad_context, i32 0, i32 2) +; CHECK: icmp eq i32 %[[SELECTOR]] + +catch: ; preds = %catch.start + %5 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont +; CHECK: catch: +; CHECK-NEXT: call i8* @__cxa_begin_catch(i8* %[[EXN]]) + +rethrow: ; preds = %catch.start + call void @__cxa_rethrow() [ "funclet"(token %1) ] + unreachable + +try.cont: ; preds = %entry, %catch + ret void +} + +; Two try-catches, one of them is with a single 'catch (...)' clause. +; For the catchpad with a single 'catch (...)', only a wasm.catch() call should +; be generated after the catchpad; wasm.landingpad.index() and personality call +; should NOT be generated. For the other catchpad, the argument of +; wasm.landingpad.index() should be not 1 but 0. +define void @test1() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test1() +entry: + invoke void @foo() + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* null] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont +; CHECK: catch.start: +; CHECK-NEXT: catchpad within %0 [i8* null] +; CHECK-NEXT: call i8* @llvm.wasm.catch(i32 0) +; CHECK-NOT: call void @llvm.wasm.landingpad.index +; CHECK-NOT: store {{.*}} @__wasm_lpad_context +; CHECK-NOT: call i8* @llvm.wasm.lsda() +; CHECK-NOT: call i32 @_Unwind_CallPersonality +; CHECK-NOT: load {{.*}} @__wasm_lpad_context + +try.cont: ; preds = %entry, %catch.start + invoke void @foo() + to label %try.cont7 unwind label %catch.dispatch2 + +catch.dispatch2: ; preds = %try.cont + %5 = catchswitch within none [label %catch.start3] unwind to caller + +catch.start3: ; preds = %catch.dispatch2 + %6 = catchpad within %5 [i8* bitcast (i8** @_ZTIi to i8*)] + %7 = call i8* @llvm.wasm.get.exception() + %8 = call i32 @llvm.wasm.get.ehselector() + %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %8, %9 + br i1 %matches, label %catch4, label %rethrow +; CHECK: catch.start3: +; CHECK: call void @llvm.wasm.landingpad.index(i32 0) + +catch4: ; preds = %catch.start3 + %10 = call i8* @__cxa_begin_catch(i8* %7) [ "funclet"(token %6) ] + call void @__cxa_end_catch() [ "funclet"(token %6) ] + catchret from %6 to label %try.cont7 + +rethrow: ; preds = %catch.start3 + call void @__cxa_rethrow() [ "funclet"(token %6) ] + unreachable + +try.cont7: ; preds = %try.cont, %catch4 + ret void +} + +; A nested try-catch within a catch. Within the nested catchpad, wasm.lsda() +; call should NOT be generated. +define void @test2() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test2() +entry: + invoke void @foo() + to label %try.cont9 unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* bitcast (i8** @_ZTIi to i8*)] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches = icmp eq i32 %3, %4 + br i1 %matches, label %catch, label %rethrow +; CHECK: catch.start: +; CHECK: call i8* @llvm.wasm.lsda() + +catch: ; preds = %catch.start + %5 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + invoke void @foo() [ "funclet"(token %1) ] + to label %try.cont unwind label %catch.dispatch2 + +catch.dispatch2: ; preds = %catch + %6 = catchswitch within %1 [label %catch.start3] unwind label %ehcleanup + +catch.start3: ; preds = %catch.dispatch2 + %7 = catchpad within %6 [i8* bitcast (i8** @_ZTIi to i8*)] + %8 = call i8* @llvm.wasm.get.exception() + %9 = call i32 @llvm.wasm.get.ehselector() + %10 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %matches4 = icmp eq i32 %9, %10 + br i1 %matches4, label %catch6, label %rethrow5 +; CHECK: catch.start3: +; CHECK-NOT: call i8* @llvm.wasm.lsda() + +catch6: ; preds = %catch.start3 + %11 = call i8* @__cxa_begin_catch(i8* %8) [ "funclet"(token %7) ] + call void @__cxa_end_catch() [ "funclet"(token %7) ] + catchret from %7 to label %try.cont + +rethrow5: ; preds = %catch.start3 + invoke void @__cxa_rethrow() [ "funclet"(token %7) ] + to label %unreachable unwind label %ehcleanup + +try.cont: ; preds = %catch, %catch6 + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont9 + +rethrow: ; preds = %catch.start + call void @__cxa_rethrow() [ "funclet"(token %1) ] + unreachable + +try.cont9: ; preds = %entry, %try.cont + ret void + +ehcleanup: ; preds = %rethrow5, %catch.dispatch2 + %12 = cleanuppad within %1 [] + call void @__cxa_end_catch() [ "funclet"(token %12) ] + cleanupret from %12 unwind to caller +; CHECK: ehcleanup: +; CHECK-NEXT: cleanuppad +; CHECK-NOT: call i8* @llvm.wasm.catch(i32 0) +; CHECK-NOT: call void @llvm.wasm.landingpad.index +; CHECK-NOT: store {{.*}} @__wasm_lpad_context +; CHECK-NOT: call i8* @llvm.wasm.lsda() +; CHECK-NOT: call i32 @_Unwind_CallPersonality +; CHECK-NOT: load {{.*}} @__wasm_lpad_context + +unreachable: ; preds = %rethrow5 + unreachable +} + +; A cleanuppad with a call to __clang_call_terminate(). +; A call to wasm.catch() should be generated after the cleanuppad. +define hidden void @test3() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test3 +entry: + invoke void @foo() + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* null] + %2 = call i8* @llvm.wasm.get.exception() + %3 = call i32 @llvm.wasm.get.ehselector() + %4 = call i8* @__cxa_begin_catch(i8* %2) [ "funclet"(token %1) ] + invoke void @foo() [ "funclet"(token %1) ] + to label %invoke.cont1 unwind label %ehcleanup + +invoke.cont1: ; preds = %catch.start + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %try.cont + +try.cont: ; preds = %entry, %invoke.cont1 + ret void + +ehcleanup: ; preds = %catch.start + %5 = cleanuppad within %1 [] + invoke void @__cxa_end_catch() [ "funclet"(token %5) ] + to label %invoke.cont2 unwind label %terminate + +invoke.cont2: ; preds = %ehcleanup + cleanupret from %5 unwind to caller + +terminate: ; preds = %ehcleanup + %6 = cleanuppad within %5 [] + %7 = call i8* @llvm.wasm.get.exception() + call void @__clang_call_terminate(i8* %7) [ "funclet"(token %6) ] + unreachable +; CHECK: terminate: +; CHECK-NEXT: cleanuppad +; CHECK-NEXT: %[[EXN:.*]] = call i8* @llvm.wasm.catch(i32 0) +; CHECK-NEXT: call void @__clang_call_terminate(i8* %[[EXN]]) +} + +; PHI demotion test. Only the phi before catchswitch should be demoted; the phi +; before cleanuppad should NOT. +define void @test5() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +; CHECK-LABEL: @test5 +entry: + %c = alloca %struct.Cleanup, align 1 + invoke void @foo() + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: ; preds = %entry + invoke void @foo() + to label %invoke.cont1 unwind label %ehcleanup + +invoke.cont1: ; preds = %invoke.cont + %call = call %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* %c) + br label %try.cont + +ehcleanup: ; preds = %invoke.cont, %entry + %num.0 = phi i32 [ 2, %invoke.cont ], [ 1, %entry ] + %0 = cleanuppad within none [] + %call2 = call %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* %c) [ "funclet"(token %0) ] + cleanupret from %0 unwind label %catch.dispatch +; CHECK: ehcleanup: +; CHECK-NEXT: = phi + +catch.dispatch: ; preds = %ehcleanup + %1 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %2 = catchpad within %1 [i8* null] + %3 = call i8* @llvm.wasm.get.exception() + %4 = call i32 @llvm.wasm.get.ehselector() + %5 = call i8* @__cxa_begin_catch(i8* %3) [ "funclet"(token %2) ] + call void @func(i32 %num.0) [ "funclet"(token %2) ] + call void @__cxa_end_catch() [ "funclet"(token %2) ] + catchret from %2 to label %try.cont + +try.cont: ; preds = %catch.start, %invoke.cont1 + invoke void @foo() + to label %invoke.cont3 unwind label %catch.dispatch5 + +invoke.cont3: ; preds = %try.cont + invoke void @foo() + to label %try.cont10 unwind label %catch.dispatch5 + +catch.dispatch5: ; preds = %invoke.cont3, %try.cont + %num.1 = phi i32 [ 2, %invoke.cont3 ], [ 1, %try.cont ] + %6 = catchswitch within none [label %catch.start6] unwind to caller +; CHECK: catch.dispatch5: +; CHECK-NOT: = phi + +catch.start6: ; preds = %catch.dispatch5 + %7 = catchpad within %6 [i8* null] + %8 = call i8* @llvm.wasm.get.exception() + %9 = call i32 @llvm.wasm.get.ehselector() + %10 = call i8* @__cxa_begin_catch(i8* %8) [ "funclet"(token %7) ] + call void @func(i32 %num.1) [ "funclet"(token %7) ] + call void @__cxa_end_catch() [ "funclet"(token %7) ] + catchret from %7 to label %try.cont10 + +try.cont10: ; preds = %invoke.cont3, %catch.start6 + ret void +} + +declare void @foo() +declare void @func(i32) +declare %struct.Cleanup* @_ZN7CleanupD1Ev(%struct.Cleanup* returned) +declare i32 @__gxx_wasm_personality_v0(...) +declare i8* @llvm.wasm.get.exception() +declare i32 @llvm.wasm.get.ehselector() +declare i32 @llvm.eh.typeid.for(i8*) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() +declare void @__cxa_rethrow() +declare void @__clang_call_terminate(i8*) + +; CHECK-DAG: declare i8* @llvm.wasm.catch(i32) +; CHECK-DAG: declare void @llvm.wasm.landingpad.index(i32) +; CHECK-DAG: declare i8* @llvm.wasm.lsda() +; CHECK-DAG: declare i32 @_Unwind_CallPersonality(i8*) + Index: test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -2693,3 +2693,51 @@ %res2 = add <32 x i16> %res, %res1 ret <32 x i16> %res2 } + +declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: +; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vpaddd %zmm3, %zmm2, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: +; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 +; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpaddd %zmm3, %zmm2, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1361,51 +1361,55 @@ ret void } -declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) +declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) -define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { +define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: ; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: ; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) - %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) - %res2 = add <32 x i16> %res, %res1 + %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) + %2 = bitcast i32 %x3 to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 + %4 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) + %res2 = add <32 x i16> %3, %4 ret <32 x i16> %res2 } -declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) -define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { +define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: ; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} -; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: ; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 ; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) - %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) - %res2 = add <16 x i32> %res, %res1 + %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + %4 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) + %res2 = add <16 x i32> %3, %4 ret <16 x i32> %res2 } Index: test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -3946,3 +3946,67 @@ %res2 = add <16 x i16> %res, %res1 ret <16 x i16> %res2 } + +declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xd9] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xd9] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xd9] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xd9] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} Index: test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2212,67 +2212,76 @@ ret void } -declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) +declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) -define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { +define <4 x i32> @test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: ; CHECK: ## %bb.0: +; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] -; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1) - %res2 = add <4 x i32> %res, %res1 + %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x2 + %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1) + %res2 = add <4 x i32> %3, %4 ret <4 x i32> %res2 } -declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8) +declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) -define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { +define <8 x i32> @test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: ; CHECK: ## %bb.0: +; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] -; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) - %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1) - %res2 = add <8 x i32> %res, %res1 + %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 + %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1) + %res2 = add <8 x i32> %3, %4 ret <8 x i32> %res2 } -declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8) +declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) -define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { +define <8 x i16> @test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: ; CHECK: ## %bb.0: +; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] -; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1) - %res2 = add <8 x i16> %res, %res1 + %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 + %4 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1) + %res2 = add <8 x i16> %3, %4 ret <8 x i16> %res2 } -declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16) +declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) -define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { +define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: ; CHECK: ## %bb.0: +; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] -; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) - %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1) - %res2 = add <16 x i16> %res, %res1 + %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 + %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1) + %res2 = add <16 x i16> %3, %4 ret <16 x i16> %res2 } Index: test/MC/AArch64/SVE/add-diagnostics.s =================================================================== --- test/MC/AArch64/SVE/add-diagnostics.s +++ test/MC/AArch64/SVE/add-diagnostics.s @@ -8,7 +8,7 @@ // Invalid element kind. add z20.h, z2.h, z31.x -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid sve vector kind qualifier +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier // CHECK-NEXT: add z20.h, z2.h, z31.x // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: Index: test/MC/AArch64/SVE/sub-diagnostics.s =================================================================== --- test/MC/AArch64/SVE/sub-diagnostics.s +++ test/MC/AArch64/SVE/sub-diagnostics.s @@ -8,7 +8,7 @@ // Invalid element kind. sub z4.h, z27.h, z31.x -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid sve vector kind qualifier +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier // CHECK-NEXT: sub z4.h, z27.h, z31.x // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: Index: test/MC/AArch64/SVE/zip1-diagnostics.s =================================================================== --- test/MC/AArch64/SVE/zip1-diagnostics.s +++ test/MC/AArch64/SVE/zip1-diagnostics.s @@ -2,7 +2,7 @@ // Invalid element kind. zip1 z10.h, z22.h, z31.x -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid sve vector kind qualifier +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier // CHECK-NEXT: zip1 z10.h, z22.h, z31.x // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: Index: test/MC/AArch64/SVE/zip2-diagnostics.s =================================================================== --- test/MC/AArch64/SVE/zip2-diagnostics.s +++ test/MC/AArch64/SVE/zip2-diagnostics.s @@ -2,7 +2,7 @@ // Invalid element kind. zip2 z6.h, z23.h, z31.x -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid sve vector kind qualifier +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier // CHECK-NEXT: zip2 z6.h, z23.h, z31.x // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: Index: test/Transforms/InstCombine/max-of-nots.ll =================================================================== --- test/Transforms/InstCombine/max-of-nots.ll +++ test/Transforms/InstCombine/max-of-nots.ll @@ -238,10 +238,9 @@ ; CHECK-LABEL: @compute_min_pessimization( ; CHECK-NEXT: [[NOT_VALUE:%.*]] = sub i32 3, [[X:%.*]] ; CHECK-NEXT: call void @fake_use(i32 [[NOT_VALUE]]) -; CHECK-NEXT: [[NOT_Y:%.*]] = xor i32 [[Y:%.*]], -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[NOT_VALUE]], [[NOT_Y]] -; CHECK-NEXT: [[NOT_MIN:%.*]] = select i1 [[CMP]], i32 [[NOT_VALUE]], i32 [[NOT_Y]] -; CHECK-NEXT: [[MIN:%.*]] = xor i32 [[NOT_MIN]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[MIN:%.*]] = select i1 [[TMP2]], i32 [[Y]], i32 [[TMP1]] ; CHECK-NEXT: ret i32 [[MIN]] ; %not_value = sub i32 3, %x Index: test/Transforms/InstCombine/xor.ll =================================================================== --- test/Transforms/InstCombine/xor.ll +++ test/Transforms/InstCombine/xor.ll @@ -575,3 +575,147 @@ %xor = xor i32 %and, %B ret i32 %xor } + +; The tests 39-47 are related to the canonicalization: +; %notx = xor i32 %x, -1 +; %cmp = icmp sgt i32 %notx, %y +; %smax = select i1 %cmp, i32 %notx, i32 %y +; %res = xor i32 %smax, -1 +; => +; %noty = xor i32 %y, -1 +; %cmp2 = icmp slt %x, %noty +; %res = select i1 %cmp2, i32 %x, i32 %noty +; +; Same transformations is valid for smin/umax/umin. + +define i32 @test39(i32 %x) { +; CHECK-LABEL: @test39( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = xor i32 %x, -1 + %2 = icmp sgt i32 %1, -256 + %3 = select i1 %2, i32 %1, i32 -256 + %res = xor i32 %3, -1 + ret i32 %res +} + +define i32 @test40(i32 %x, i32 %y) { +; CHECK-LABEL: @test40( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[RES]] +; + %notx = xor i32 %x, -1 + %cmp1 = icmp sgt i32 %notx, %y + %smax = select i1 %cmp1, i32 %notx, i32 %y + %res = xor i32 %smax, -1 + ret i32 %res +} + +define i32 @test41(i32 %x, i32 %y) { +; CHECK-LABEL: @test41( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[RES]] +; + %notx = xor i32 %x, -1 + %cmp1 = icmp slt i32 %notx, %y + %smin = select i1 %cmp1, i32 %notx, i32 %y + %res = xor i32 %smin, -1 + ret i32 %res +} + +define i32 @test42(i32 %x, i32 %y) { +; CHECK-LABEL: @test42( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[RES]] +; + %notx = xor i32 %x, -1 + %cmp1 = icmp ugt i32 %notx, %y + %umax = select i1 %cmp1, i32 %notx, i32 %y + %res = xor i32 %umax, -1 + ret i32 %res +} + +define i32 @test43(i32 %x, i32 %y) { +; CHECK-LABEL: @test43( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[RES]] +; + %notx = xor i32 %x, -1 + %cmp1 = icmp ult i32 %notx, %y + %umin = select i1 %cmp1, i32 %notx, i32 %y + %res = xor i32 %umin, -1 + ret i32 %res +} + +define i32 @test44(i32 %x, i32 %y) { +; CHECK-LABEL: @test44( +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 -4, [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[RES]] +; + %z = add i32 %y, 3 ; thwart complexity-based canonicalization + %notx = xor i32 %x, -1 + %cmp1 = icmp ult i32 %z, %notx + %umin = select i1 %cmp1, i32 %z, i32 %notx + %res = xor i32 %umin, -1 + ret i32 %res +} + +define i32 @test45(i32 %x, i32 %y) { +; CHECK-LABEL: @test45( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 [[X]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %z = xor i32 %y, -1 + %notx = xor i32 %x, -1 + %cmp1 = icmp ult i32 %z, %notx + %umin = select i1 %cmp1, i32 %z, i32 %notx + %res = xor i32 %umin, -1 + ret i32 %res +} + +; Check that we work with splat vectors also. +define <4 x i32> @test46(<4 x i32> %x) { +; CHECK-LABEL: @test46( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %1 = xor <4 x i32> %x, + %2 = icmp sgt <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + %4 = xor <4 x i32> %3, + ret <4 x i32> %4 +} + +; Test case when select pattern has more than one use. +define i32 @test47(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test47( +; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[NOTX]], [[Y:%.*]] +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[CMP1]], i32 [[NOTX]], i32 [[Y]] +; CHECK-NEXT: [[UMIN:%.*]] = xor i32 [[UMAX]], -1 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[UMAX]], [[Z:%.*]] +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[ADD]], [[UMIN]] +; CHECK-NEXT: ret i32 [[RES]] +; + %notx = xor i32 %x, -1 + %cmp1 = icmp ugt i32 %notx, %y + %umax = select i1 %cmp1, i32 %notx, i32 %y + %umin = xor i32 %umax, -1 + %add = add i32 %umax, %z + %res = mul i32 %umin, %add + ret i32 %res +} Index: test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s =================================================================== --- test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s +++ test/tools/llvm-mca/AArch64/Exynos/scheduler-queue-usage.s @@ -1,7 +1,13 @@ -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -verbose -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=ALL -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m1 -iterations=1 -verbose -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=ALL +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=ALL +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m1 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=ALL - b t + b t + +# ALL: Schedulers - number of cycles where we saw N instructions issued: +# ALL-NEXT: [# issued], [# cycles] +# ALL-NEXT: 0, 1 (50.0%) +# ALL-NEXT: 1, 1 (50.0%) # ALL: Scheduler's queue usage: # ALL-NEXT: No scheduler resources used. + Index: test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s =================================================================== --- /dev/null +++ test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s @@ -0,0 +1,56 @@ +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -retire-stats -iterations=1 < %s | FileCheck %s + + vsqrtps %xmm0, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + vaddps %xmm0, %xmm1, %xmm2 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 16 +# CHECK-NEXT: Total Cycles: 31 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.52 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 23 (74.2%) +# CHECK-NEXT: 2, 8 (25.8%) + Index: test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s +++ test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s @@ -1,8 +1,31 @@ -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -verbose < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -scheduler-stats < %s | FileCheck %s vmulps (%rsi), %xmm0, %xmm0 add %rsi, %rsi +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.20 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 7 1.00 * vmulps (%rsi), %xmm0, %xmm0 +# CHECK-NEXT: 1 1 0.50 addq %rsi, %rsi + +# CHECK: Schedulers - number of cycles where we saw N instructions issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 9 (90.0%) +# CHECK-NEXT: 2, 1 (10.0%) + # CHECK: Scheduler's queue usage: # CHECK-NEXT: JALU01, 1/20 # CHECK-NEXT: JFPU01, 1/18 @@ -26,9 +49,10 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - 1.00 - - 1.00 - 1.00 1.00 - - - - - - +# CHECK-NEXT: - 1.00 - - 1.00 - 1.00 1.00 - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vmulps (%rsi), %xmm0, %xmm0 # CHECK-NEXT: - 1.00 - - - - - - - - - - - - addq %rsi, %rsi + Index: tools/llvm-exegesis/CMakeLists.txt =================================================================== --- tools/llvm-exegesis/CMakeLists.txt +++ tools/llvm-exegesis/CMakeLists.txt @@ -10,7 +10,6 @@ add_subdirectory(lib) target_link_libraries(llvm-exegesis PRIVATE LLVMExegesis) -if(HAVE_LIBPFM) +if(LLVM_ENABLE_LIBPFM AND HAVE_LIBPFM) target_link_libraries(llvm-exegesis PRIVATE pfm) endif() - Index: tools/llvm-mca/CMakeLists.txt =================================================================== --- tools/llvm-mca/CMakeLists.txt +++ tools/llvm-mca/CMakeLists.txt @@ -12,7 +12,6 @@ add_llvm_tool(llvm-mca Backend.cpp BackendPrinter.cpp - BackendStatistics.cpp CodeRegion.cpp Dispatch.cpp DispatchStatistics.cpp @@ -25,7 +24,9 @@ llvm-mca.cpp RegisterFileStatistics.cpp ResourcePressureView.cpp + RetireControlUnitStatistics.cpp Scheduler.cpp + SchedulerStatistics.cpp Support.cpp SummaryView.cpp TimelineView.cpp Index: tools/llvm-mca/DispatchStatistics.h =================================================================== --- tools/llvm-mca/DispatchStatistics.h +++ tools/llvm-mca/DispatchStatistics.h @@ -42,11 +42,6 @@ namespace mca { class DispatchStatistics : public View { - const llvm::MCSubtargetInfo &STI; - - using Histogram = llvm::DenseMap; - Histogram DispatchGroupSizePerCycle; - unsigned NumDispatched; unsigned NumCycles; @@ -54,6 +49,9 @@ // is one counter for every generic stall kind (see class HWStallEvent). llvm::SmallVector HWStalls; + using Histogram = llvm::DenseMap; + Histogram DispatchGroupSizePerCycle; + void updateHistograms() { DispatchGroupSizePerCycle[NumDispatched]++; NumDispatched = 0; @@ -62,12 +60,9 @@ void printDispatchHistogram(llvm::raw_ostream &OS) const; void printDispatchStalls(llvm::raw_ostream &OS) const; - void printDispatchUnitUsage(llvm::raw_ostream &OS, const Histogram &Stats, - unsigned Cycles) const; public: - DispatchStatistics(const llvm::MCSubtargetInfo &sti) - : STI(sti), NumDispatched(0), NumCycles(0), + DispatchStatistics() : NumDispatched(0), NumCycles(0), HWStalls(HWStallEvent::LastGenericEvent) {} void onInstructionEvent(const HWInstructionEvent &Event) override; @@ -76,10 +71,7 @@ void onCycleEnd(unsigned Cycle) override { updateHistograms(); } - void onStallEvent(const HWStallEvent &Event) override { - if (Event.Type < HWStallEvent::LastGenericEvent) - HWStalls[Event.Type]++; - } + void onStallEvent(const HWStallEvent &Event) override; void printView(llvm::raw_ostream &OS) const override { printDispatchStalls(OS); Index: tools/llvm-mca/DispatchStatistics.cpp =================================================================== --- tools/llvm-mca/DispatchStatistics.cpp +++ tools/llvm-mca/DispatchStatistics.cpp @@ -20,6 +20,11 @@ namespace mca { +void DispatchStatistics::onStallEvent(const HWStallEvent &Event) { + if (Event.Type < HWStallEvent::LastGenericEvent) + HWStalls[Event.Type]++; +} + void DispatchStatistics::onInstructionEvent(const HWInstructionEvent &Event) { if (Event.Type == HWInstructionEvent::Dispatched) ++NumDispatched; Index: tools/llvm-mca/RetireControlUnitStatistics.h =================================================================== --- /dev/null +++ tools/llvm-mca/RetireControlUnitStatistics.h @@ -0,0 +1,61 @@ +//===--------------------- RetireControlUnitStatistics.h ------------------*- +//C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines class RetireControlUnitStatistics: a view that knows how +/// to print general statistics related to the retire control unit. +/// +/// Example: +/// ======== +/// +/// Retire Control Unit - number of cycles where we saw N instructions retired: +/// [# retired], [# cycles] +/// 0, 9 (6.9%) +/// 1, 6 (4.6%) +/// 2, 1 (0.8%) +/// 4, 3 (2.3%) +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H + +#include "View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace mca { + +class RetireControlUnitStatistics : public View { + using Histogram = llvm::DenseMap; + Histogram RetiredPerCycle; + + unsigned NumRetired; + unsigned NumCycles; + + void updateHistograms() { + RetiredPerCycle[NumRetired]++; + NumRetired = 0; + } + +public: + RetireControlUnitStatistics() : NumRetired(0), NumCycles(0) {} + + void onInstructionEvent(const HWInstructionEvent &Event) override; + + void onCycleBegin(unsigned Cycle) override { NumCycles++; } + + void onCycleEnd(unsigned Cycle) override { updateHistograms(); } + + void printView(llvm::raw_ostream &OS) const override; +}; +} // namespace mca + +#endif Index: tools/llvm-mca/RetireControlUnitStatistics.cpp =================================================================== --- /dev/null +++ tools/llvm-mca/RetireControlUnitStatistics.cpp @@ -0,0 +1,51 @@ +//===--------------------- RetireControlUnitStatistics.cpp ---------------*- C++ +//-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the RetireControlUnitStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "RetireControlUnitStatistics.h" +#include "llvm/Support/Format.h" + +using namespace llvm; + +namespace mca { + +void RetireControlUnitStatistics::onInstructionEvent( + const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Retired) + ++NumRetired; +} + +void RetireControlUnitStatistics::printView(llvm::raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nRetire Control Unit - " + << "number of cycles where we saw N instructions retired:\n"; + TempStream << "[# retired], [# cycles]\n"; + + for (const std::pair &Entry : RetiredPerCycle) { + TempStream << " " << Entry.first; + if (Entry.first < 10) + TempStream << ", "; + else + TempStream << ", "; + TempStream << Entry.second << " (" + << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) + << "%)\n"; + } + + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca Index: tools/llvm-mca/SchedulerStatistics.h =================================================================== --- tools/llvm-mca/SchedulerStatistics.h +++ tools/llvm-mca/SchedulerStatistics.h @@ -1,4 +1,4 @@ -//===--------------------- BackendStatistics.h ------------------*- C++ -*-===// +//===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,8 +8,9 @@ //===----------------------------------------------------------------------===// /// \file /// -/// This file implements a printer class for printing generic Backend -/// statistics related to the scheduler and retire unit. +/// This file defines class SchedulerStatistics. Class SchedulerStatistics is a +/// View that listens to instruction issue events in order to print general +/// statistics related to the hardware schedulers. /// /// Example: /// ======== @@ -20,13 +21,6 @@ /// 1, 4 (3.1%) /// 2, 8 (6.2%) /// -/// Retire Control Unit - number of cycles where we saw N instructions retired: -/// [# retired], [# cycles] -/// 0, 9 (6.9%) -/// 1, 6 (4.6%) -/// 2, 1 (0.8%) -/// 4, 3 (2.3%) -/// /// Scheduler's queue usage: /// JALU01, 0/20 /// JFPU01, 18/18 @@ -34,8 +28,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_TOOLS_LLVM_MCA_BACKENDSTATISTICS_H -#define LLVM_TOOLS_LLVM_MCA_BACKENDSTATISTICS_H +#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H #include "View.h" #include "llvm/ADT/SmallVector.h" @@ -44,15 +38,13 @@ namespace mca { -class BackendStatistics : public View { - const llvm::MCSubtargetInfo &STI; +class SchedulerStatistics : public View { + const llvm::MCSchedModel &SM; using Histogram = llvm::DenseMap; - Histogram RetiredPerCycle; Histogram IssuedPerCycle; unsigned NumIssued; - unsigned NumRetired; unsigned NumCycles; // Tracks the usage of a scheduler's queue. @@ -61,31 +53,19 @@ unsigned MaxUsedSlots; }; - // There is a map entry for each buffered resource in the scheduling model. - // Every time a buffer is consumed/freed, this view updates the corresponding - // entry. llvm::DenseMap BufferedResources; void updateHistograms() { IssuedPerCycle[NumIssued]++; - RetiredPerCycle[NumRetired]++; NumIssued = 0; - NumRetired = 0; } - void printRetireUnitStatistics(llvm::raw_ostream &OS) const; void printSchedulerStatistics(llvm::raw_ostream &OS) const; - - void printRCUStatistics(llvm::raw_ostream &OS, const Histogram &Histogram, - unsigned Cycles) const; - void printIssuePerCycle(const Histogram &IssuePerCycle, - unsigned TotalCycles) const; - void printSchedulerUsage(llvm::raw_ostream &OS, - const llvm::MCSchedModel &SM) const; + void printSchedulerUsage(llvm::raw_ostream &OS) const; public: - BackendStatistics(const llvm::MCSubtargetInfo &sti) - : STI(sti), NumIssued(0), NumRetired(0), NumCycles(0) { } + SchedulerStatistics(const llvm::MCSubtargetInfo &STI) + : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0) { } void onInstructionEvent(const HWInstructionEvent &Event) override; @@ -103,8 +83,7 @@ void printView(llvm::raw_ostream &OS) const override { printSchedulerStatistics(OS); - printRetireUnitStatistics(OS); - printSchedulerUsage(OS, STI.getSchedModel()); + printSchedulerUsage(OS); } }; } // namespace mca Index: tools/llvm-mca/SchedulerStatistics.cpp =================================================================== --- tools/llvm-mca/SchedulerStatistics.cpp +++ tools/llvm-mca/SchedulerStatistics.cpp @@ -1,4 +1,4 @@ -//===--------------------- BackendStatistics.cpp ---------------*- C++ -*-===// +//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,32 +8,23 @@ //===----------------------------------------------------------------------===// /// \file /// -/// Functionalities used by the BackendPrinter to print out histograms -/// related to number of {issue/retire} per number of cycles. +/// This file implements the SchedulerStatistics interface. /// //===----------------------------------------------------------------------===// -#include "BackendStatistics.h" +#include "SchedulerStatistics.h" #include "llvm/Support/Format.h" using namespace llvm; namespace mca { -void BackendStatistics::onInstructionEvent(const HWInstructionEvent &Event) { - switch (Event.Type) { - default: - break; - case HWInstructionEvent::Retired: { - ++NumRetired; - break; - } - case HWInstructionEvent::Issued: +void SchedulerStatistics::onInstructionEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Issued) ++NumIssued; - } } -void BackendStatistics::onReservedBuffers(ArrayRef Buffers) { +void SchedulerStatistics::onReservedBuffers(ArrayRef Buffers) { for (const unsigned Buffer : Buffers) { if (BufferedResources.find(Buffer) != BufferedResources.end()) { BufferUsage &BU = BufferedResources[Buffer]; @@ -47,7 +38,7 @@ } } -void BackendStatistics::onReleasedBuffers(ArrayRef Buffers) { +void SchedulerStatistics::onReleasedBuffers(ArrayRef Buffers) { for (const unsigned Buffer : Buffers) { assert(BufferedResources.find(Buffer) != BufferedResources.end() && "Buffered resource not in map?"); @@ -56,29 +47,8 @@ } } -void BackendStatistics::printRetireUnitStatistics(llvm::raw_ostream &OS) const { - std::string Buffer; - raw_string_ostream TempStream(Buffer); - TempStream << "\n\nRetire Control Unit - " - << "number of cycles where we saw N instructions retired:\n"; - TempStream << "[# retired], [# cycles]\n"; - - for (const std::pair &Entry : RetiredPerCycle) { - TempStream << " " << Entry.first; - if (Entry.first < 10) - TempStream << ", "; - else - TempStream << ", "; - TempStream << Entry.second << " (" - << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) - << "%)\n"; - } - - TempStream.flush(); - OS << Buffer; -} - -void BackendStatistics::printSchedulerStatistics(llvm::raw_ostream &OS) const { +void SchedulerStatistics::printSchedulerStatistics( + llvm::raw_ostream &OS) const { std::string Buffer; raw_string_ostream TempStream(Buffer); TempStream << "\n\nSchedulers - number of cycles where we saw N instructions " @@ -94,8 +64,7 @@ OS << Buffer; } -void BackendStatistics::printSchedulerUsage(raw_ostream &OS, - const MCSchedModel &SM) const { +void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const { std::string Buffer; raw_string_ostream TempStream(Buffer); TempStream << "\n\nScheduler's queue usage:\n"; Index: tools/llvm-mca/llvm-mca.cpp =================================================================== --- tools/llvm-mca/llvm-mca.cpp +++ tools/llvm-mca/llvm-mca.cpp @@ -22,13 +22,14 @@ //===----------------------------------------------------------------------===// #include "BackendPrinter.h" -#include "BackendStatistics.h" #include "CodeRegion.h" #include "DispatchStatistics.h" #include "InstructionInfoView.h" #include "InstructionTables.h" #include "RegisterFileStatistics.h" #include "ResourcePressureView.h" +#include "RetireControlUnitStatistics.h" +#include "SchedulerStatistics.h" #include "SummaryView.h" #include "TimelineView.h" #include "llvm/MC/MCAsmInfo.h" @@ -99,6 +100,16 @@ cl::desc("Print dispatch statistics"), cl::init(false)); +static cl::opt + PrintSchedulerStats("scheduler-stats", + cl::desc("Print scheduler statistics"), + cl::init(false)); + +static cl::opt + PrintRetireStats("retire-stats", + cl::desc("Print retire control unit statistics"), + cl::init(false)); + static cl::opt PrintResourcePressureView("resource-pressure", cl::desc("Print the resource pressure view"), @@ -430,10 +441,13 @@ llvm::make_unique(*STI, *MCII, S, *IP)); if (PrintDispatchStats) - Printer.addView(llvm::make_unique(*STI)); + Printer.addView(llvm::make_unique()); + + if (PrintSchedulerStats) + Printer.addView(llvm::make_unique(*STI)); - if (PrintModeVerbose) - Printer.addView(llvm::make_unique(*STI)); + if (PrintRetireStats) + Printer.addView(llvm::make_unique()); if (PrintRegisterFileStats) Printer.addView(llvm::make_unique(*STI)); Index: tools/opt/opt.cpp =================================================================== --- tools/opt/opt.cpp +++ tools/opt/opt.cpp @@ -414,6 +414,7 @@ initializePostInlineEntryExitInstrumenterPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry); initializeExpandReductionsPass(Registry); + initializeWasmEHPreparePass(Registry); initializeWriteBitcodePassPass(Registry); #ifdef LINK_POLLY_INTO_TOOLS Index: unittests/tools/llvm-exegesis/CMakeLists.txt =================================================================== --- unittests/tools/llvm-exegesis/CMakeLists.txt +++ unittests/tools/llvm-exegesis/CMakeLists.txt @@ -22,7 +22,6 @@ ) target_link_libraries(LLVMExegesisTests PRIVATE LLVMExegesis) -if(HAVE_LIBPFM) +if(LLVM_ENABLE_LIBPFM AND HAVE_LIBPFM) target_link_libraries(LLVMExegesisTests PRIVATE pfm) endif() -