diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index 3751d6f..6f49892 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -1,236 +1,241 @@ //===- llvm/CallingConv.h - LLVM Calling Conventions ------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines LLVM's set of calling conventions. // //===----------------------------------------------------------------------===// #ifndef LLVM_IR_CALLINGCONV_H #define LLVM_IR_CALLINGCONV_H namespace llvm { /// CallingConv Namespace - This namespace contains an enum with a value for /// the well-known calling conventions. /// namespace CallingConv { /// LLVM IR allows to use arbitrary numbers as calling convention identifiers. using ID = unsigned; /// A set of enums which specify the assigned numeric values for known llvm /// calling conventions. /// LLVM Calling Convention Representation enum { /// C - The default llvm calling convention, compatible with C. This /// convention is the only calling convention that supports varargs calls. /// As with typical C calling conventions, the callee/caller have to /// tolerate certain amounts of prototype mismatch. C = 0, // Generic LLVM calling conventions. None of these calling conventions // support varargs calls, and all assume that the caller and callee // prototype exactly match. /// Fast - This calling convention attempts to make calls as fast as /// possible (e.g. by passing things in registers). Fast = 8, // Cold - This calling convention attempts to make code in the caller as // efficient as possible under the assumption that the call is not commonly // executed. As such, these calls often preserve all registers so that the // call does not break any live ranges in the caller side. Cold = 9, // GHC - Calling convention used by the Glasgow Haskell Compiler (GHC). GHC = 10, // HiPE - Calling convention used by the High-Performance Erlang Compiler // (HiPE). HiPE = 11, // WebKit JS - Calling convention for stack based JavaScript calls WebKit_JS = 12, // AnyReg - Calling convention for dynamic register based calls (e.g. // stackmap and patchpoint intrinsics). AnyReg = 13, // PreserveMost - Calling convention for runtime calls that preserves most // registers. PreserveMost = 14, // PreserveAll - Calling convention for runtime calls that preserves // (almost) all registers. PreserveAll = 15, // Swift - Calling convention for Swift. Swift = 16, // CXX_FAST_TLS - Calling convention for access functions. CXX_FAST_TLS = 17, // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, /// X86_StdCall - stdcall is the calling conventions mostly used by the /// Win32 API. It is basically the same as the C convention with the /// difference in that the callee is responsible for popping the arguments /// from the stack. X86_StdCall = 64, /// X86_FastCall - 'fast' analog of X86_StdCall. Passes first two arguments /// in ECX:EDX registers, others - via stack. Callee is responsible for /// stack cleaning. X86_FastCall = 65, /// ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete, /// but still used on some targets). ARM_APCS = 66, /// ARM_AAPCS - ARM Architecture Procedure Calling Standard calling /// convention (aka EABI). Soft float variant. ARM_AAPCS = 67, /// ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI. ARM_AAPCS_VFP = 68, /// MSP430_INTR - Calling convention used for MSP430 interrupt routines. MSP430_INTR = 69, /// X86_ThisCall - Similar to X86_StdCall. Passes first argument in ECX, /// others via stack. Callee is responsible for stack cleaning. MSVC uses /// this by default for methods in its ABI. X86_ThisCall = 70, /// PTX_Kernel - Call to a PTX kernel. /// Passes all arguments in parameter space. PTX_Kernel = 71, /// PTX_Device - Call to a PTX device function. /// Passes all arguments in register or parameter space. PTX_Device = 72, /// SPIR_FUNC - Calling convention for SPIR non-kernel device functions. /// No lowering or expansion of arguments. /// Structures are passed as a pointer to a struct with the byval attribute. /// Functions can only call SPIR_FUNC and SPIR_KERNEL functions. /// Functions can only have zero or one return values. /// Variable arguments are not allowed, except for printf. /// How arguments/return values are lowered are not specified. /// Functions are only visible to the devices. SPIR_FUNC = 75, /// SPIR_KERNEL - Calling convention for SPIR kernel functions. /// Inherits the restrictions of SPIR_FUNC, except /// Cannot have non-void return values. /// Cannot have variable arguments. /// Can also be called by the host. /// Is externally visible. SPIR_KERNEL = 76, /// Intel_OCL_BI - Calling conventions for Intel OpenCL built-ins Intel_OCL_BI = 77, /// The C convention as specified in the x86-64 supplement to the /// System V ABI, used on most non-Windows systems. X86_64_SysV = 78, /// The C convention as implemented on Windows/x86-64 and /// AArch64. This convention differs from the more common /// \c X86_64_SysV convention in a number of ways, most notably in /// that XMM registers used to pass arguments are shadowed by GPRs, /// and vice versa. /// On AArch64, this is identical to the normal C (AAPCS) calling /// convention for normal functions, but floats are passed in integer /// registers to variadic functions. Win64 = 79, /// MSVC calling convention that passes vectors and vector aggregates /// in SSE registers. X86_VectorCall = 80, /// Calling convention used by HipHop Virtual Machine (HHVM) to /// perform calls to and from translation cache, and for calling PHP /// functions. /// HHVM calling convention supports tail/sibling call elimination. HHVM = 81, /// HHVM calling convention for invoking C/C++ helpers. HHVM_C = 82, /// X86_INTR - x86 hardware interrupt context. Callee may take one or two /// parameters, where the 1st represents a pointer to hardware context frame /// and the 2nd represents hardware error code, the presence of the later /// depends on the interrupt vector taken. Valid for both 32- and 64-bit /// subtargets. X86_INTR = 83, /// Used for AVR interrupt routines. AVR_INTR = 84, /// Calling convention used for AVR signal routines. AVR_SIGNAL = 85, /// Calling convention used for special AVR rtlib functions /// which have an "optimized" convention to preserve registers. AVR_BUILTIN = 86, /// Calling convention used for Mesa vertex shaders, or AMDPAL last shader /// stage before rasterization (vertex shader if tessellation and geometry /// are not in use, or otherwise copy shader if one is needed). AMDGPU_VS = 87, /// Calling convention used for Mesa/AMDPAL geometry shaders. AMDGPU_GS = 88, /// Calling convention used for Mesa/AMDPAL pixel shaders. AMDGPU_PS = 89, /// Calling convention used for Mesa/AMDPAL compute shaders. AMDGPU_CS = 90, /// Calling convention for AMDGPU code object kernels. AMDGPU_KERNEL = 91, /// Register calling convention used for parameters transfer optimization X86_RegCall = 92, /// Calling convention used for Mesa/AMDPAL hull shaders (= tessellation /// control shaders). AMDGPU_HS = 93, /// Calling convention used for special MSP430 rtlib functions /// which have an "optimized" convention using additional registers. MSP430_BUILTIN = 94, /// Calling convention used for AMDPAL vertex shader if tessellation is in /// use. AMDGPU_LS = 95, /// Calling convention used for AMDPAL shader stage before geometry shader /// if geometry is in use. So either the domain (= tessellation evaluation) /// shader if tessellation is in use, or otherwise the vertex shader. AMDGPU_ES = 96, // Calling convention between AArch64 Advanced SIMD functions AArch64_VectorCall = 97, /// Calling convention between AArch64 SVE functions AArch64_SVE_VectorCall = 98, + /// Calling convention for emscripten __invoke_* functions. The first + /// argument is required to be the function ptr being indirectly called. + /// The remainder matches the regular calling convention. + WASM_EmscriptenInvoke = 99, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; } // end namespace CallingConv } // end namespace llvm #endif // LLVM_IR_CALLINGCONV_H diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a0180fe..8f2f074 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1,1480 +1,1491 @@ //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file implements the WebAssemblyTargetLowering class. /// //===----------------------------------------------------------------------===// #include "WebAssemblyISelLowering.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; #define DEBUG_TYPE "wasm-lower" WebAssemblyTargetLowering::WebAssemblyTargetLowering( const TargetMachine &TM, const WebAssemblySubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; // Booleans always contain 0 or 1. setBooleanContents(ZeroOrOneBooleanContent); // Except in SIMD vectors setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // We don't know the microarchitecture here, so just reduce register pressure. setSchedulingPreference(Sched::RegPressure); // Tell ISel that we have a stack pointer. setStackPointerRegisterToSaveRestore( Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); // Set up the register classes. addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); if (Subtarget->hasSIMD128()) { addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); } if (Subtarget->hasUnimplementedSIMD128()) { addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); setOperationAction(ISD::JumpTable, MVTPtr, Custom); setOperationAction(ISD::BlockAddress, MVTPtr, Custom); setOperationAction(ISD::BRIND, MVT::Other, Custom); // Take the default expansion for va_arg, va_copy, and va_end. There is no // default action for va_start, so we do that custom. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { // Don't expand the floating-point types to constant pools. setOperationAction(ISD::ConstantFP, T, Legal); // Expand floating-point comparisons. for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) setCondCodeAction(CC, T, Expand); // Expand floating-point library function operators. for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) setOperationAction(Op, T, Expand); // Note supported floating-point library function operators that otherwise // default to expand. for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) setOperationAction(Op, T, Legal); // Support minimum and maximum, which otherwise default to expand. setOperationAction(ISD::FMINIMUM, T, Legal); setOperationAction(ISD::FMAXIMUM, T, Legal); // WebAssembly currently has no builtin f16 support. setOperationAction(ISD::FP16_TO_FP, T, Expand); setOperationAction(ISD::FP_TO_FP16, T, Expand); setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); setTruncStoreAction(T, MVT::f16, Expand); } // Expand unavailable integer operations. for (auto Op : {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { for (auto T : {MVT::i32, MVT::i64}) setOperationAction(Op, T, Expand); if (Subtarget->hasSIMD128()) for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(Op, T, Expand); if (Subtarget->hasUnimplementedSIMD128()) setOperationAction(Op, MVT::v2i64, Expand); } // SIMD-specific configuration if (Subtarget->hasSIMD128()) { // Support saturating add for i8x16 and i16x8 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) for (auto T : {MVT::v16i8, MVT::v8i16}) setOperationAction(Op, T, Legal); // Custom lower BUILD_VECTORs to minimize number of replace_lanes for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) setOperationAction(ISD::BUILD_VECTOR, T, Custom); if (Subtarget->hasUnimplementedSIMD128()) for (auto T : {MVT::v2i64, MVT::v2f64}) setOperationAction(ISD::BUILD_VECTOR, T, Custom); // We have custom shuffle lowering to expose the shuffle mask for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); if (Subtarget->hasUnimplementedSIMD128()) for (auto T: {MVT::v2i64, MVT::v2f64}) setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); // Custom lowering since wasm shifts must have a scalar shift amount for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(Op, T, Custom); if (Subtarget->hasUnimplementedSIMD128()) setOperationAction(Op, MVT::v2i64, Custom); } // Custom lower lane accesses to expand out variable indices for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) setOperationAction(Op, T, Custom); if (Subtarget->hasUnimplementedSIMD128()) for (auto T : {MVT::v2i64, MVT::v2f64}) setOperationAction(Op, T, Custom); } // There is no i64x2.mul instruction setOperationAction(ISD::MUL, MVT::v2i64, Expand); // There are no vector select instructions for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) setOperationAction(Op, T, Expand); if (Subtarget->hasUnimplementedSIMD128()) for (auto T : {MVT::v2i64, MVT::v2f64}) setOperationAction(Op, T, Expand); } // Expand integer operations supported for scalars but not SIMD for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(Op, T, Expand); if (Subtarget->hasUnimplementedSIMD128()) setOperationAction(Op, MVT::v2i64, Expand); } // Expand float operations supported for scalars but not SIMD for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FRINT}) { setOperationAction(Op, MVT::v4f32, Expand); if (Subtarget->hasUnimplementedSIMD128()) setOperationAction(Op, MVT::v2f64, Expand); } // Expand additional SIMD ops that V8 hasn't implemented yet if (!Subtarget->hasUnimplementedSIMD128()) { setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FDIV, MVT::v4f32, Expand); } } // As a special case, these operators use the type to mean the type to // sign-extend from. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!Subtarget->hasSignExt()) { // Sign extends are legal only when extending a vector extract auto Action = Subtarget->hasSIMD128() ? Custom : Expand; for (auto T : {MVT::i8, MVT::i16, MVT::i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); } for (auto T : MVT::integer_vector_valuetypes()) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); // Dynamic stack allocation: use the default expansion. setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); setOperationAction(ISD::CopyToReg, MVT::Other, Custom); // Expand these forms; we pattern-match the forms that we can handle in isel. for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) setOperationAction(Op, T, Expand); // We have custom switch handling. setOperationAction(ISD::BR_JT, MVT::Other, Custom); // WebAssembly doesn't have: // - Floating-point extending loads. // - Floating-point truncating stores. // - i1 extending loads. // - extending/truncating SIMD loads/stores setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); for (auto T : MVT::integer_valuetypes()) for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) setLoadExtAction(Ext, T, MVT::i1, Promote); if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}) { for (auto MemT : MVT::vector_valuetypes()) { if (MVT(T) != MemT) { setTruncStoreAction(T, MemT, Expand); for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) setLoadExtAction(Ext, T, MemT, Expand); } } } } // Don't do anything clever with build_pairs setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // Trap lowers to wasm unreachable setOperationAction(ISD::TRAP, MVT::Other, Legal); // Exception handling intrinsics setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setMaxAtomicSizeInBitsSupported(64); if (Subtarget->hasBulkMemory()) { // Use memory.copy and friends over multiple loads and stores MaxStoresPerMemcpy = 1; MaxStoresPerMemcpyOptSize = 1; MaxStoresPerMemmove = 1; MaxStoresPerMemmoveOptSize = 1; MaxStoresPerMemset = 1; MaxStoresPerMemsetOptSize = 1; } // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is // consistent with the f64 and f128 names. setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); // Define the emscripten name for return address helper. // TODO: when implementing other WASM backends, make this generic or only do // this on emscripten depending on what they end up doing. setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address"); // Always convert switches to br_tables unless there is only one case, which // is equivalent to a simple branch. This reduces code size for wasm, and we // defer possible jump table optimizations to the VM. setMinimumJumpTableEntries(2); } TargetLowering::AtomicExpansionKind WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { // We have wasm instructions for these switch (AI->getOperation()) { case AtomicRMWInst::Add: case AtomicRMWInst::Sub: case AtomicRMWInst::And: case AtomicRMWInst::Or: case AtomicRMWInst::Xor: case AtomicRMWInst::Xchg: return AtomicExpansionKind::None; default: break; } return AtomicExpansionKind::CmpXChg; } FastISel *WebAssemblyTargetLowering::createFastISel( FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return WebAssembly::createFastISel(FuncInfo, LibInfo); } MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, EVT VT) const { unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); if (BitWidth > 1 && BitWidth < 8) BitWidth = 8; if (BitWidth > 64) { // The shift will be lowered to a libcall, and compiler-rt libcalls expect // the count to be an i32. BitWidth = 32; assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && "32-bit shift counts ought to be enough for anyone"); } MVT Result = MVT::getIntegerVT(BitWidth); assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && "Unable to represent scalar shift amount type"); return Result; } // Lower an fp-to-int conversion operator from the LLVM opcode, which has an // undefined result on invalid/overflow, to the WebAssembly opcode, which // traps on invalid/overflow. static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode) { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned OutReg = MI.getOperand(0).getReg(); unsigned InReg = MI.getOperand(1).getReg(); unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; unsigned Eqz = WebAssembly::EQZ_I32; unsigned And = WebAssembly::AND_I32; int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; int64_t Substitute = IsUnsigned ? 0 : Limit; double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; auto &Context = BB->getParent()->getFunction().getContext(); Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); const BasicBlock *LLVMBB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB); MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); MachineFunction::iterator It = ++BB->getIterator(); F->insert(It, FalseMBB); F->insert(It, TrueMBB); F->insert(It, DoneMBB); // Transfer the remainder of BB and its successor edges to DoneMBB. DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); DoneMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(TrueMBB); BB->addSuccessor(FalseMBB); TrueMBB->addSuccessor(DoneMBB); FalseMBB->addSuccessor(DoneMBB); unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); MI.eraseFromParent(); // For signed numbers, we can do a single comparison to determine whether // fabs(x) is within range. if (IsUnsigned) { Tmp0 = InReg; } else { BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg); } BuildMI(BB, DL, TII.get(FConst), Tmp1) .addFPImm(cast(ConstantFP::get(Ty, CmpVal))); BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1); // For unsigned numbers, we have to do a separate comparison with zero. if (IsUnsigned) { Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(BB, DL, TII.get(FConst), Tmp1) .addFPImm(cast(ConstantFP::get(Ty, 0.0))); BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg); CmpReg = AndReg; } BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg); // Create the CFG diamond to select between doing the conversion or using // the substitute value. BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg); BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg); BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute); BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) .addReg(FalseReg) .addMBB(FalseMBB) .addReg(TrueReg) .addMBB(TrueMBB); return DoneMBB; } MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); case WebAssembly::FP_TO_SINT_I32_F32: return LowerFPToInt(MI, DL, BB, TII, false, false, false, WebAssembly::I32_TRUNC_S_F32); case WebAssembly::FP_TO_UINT_I32_F32: return LowerFPToInt(MI, DL, BB, TII, true, false, false, WebAssembly::I32_TRUNC_U_F32); case WebAssembly::FP_TO_SINT_I64_F32: return LowerFPToInt(MI, DL, BB, TII, false, true, false, WebAssembly::I64_TRUNC_S_F32); case WebAssembly::FP_TO_UINT_I64_F32: return LowerFPToInt(MI, DL, BB, TII, true, true, false, WebAssembly::I64_TRUNC_U_F32); case WebAssembly::FP_TO_SINT_I32_F64: return LowerFPToInt(MI, DL, BB, TII, false, false, true, WebAssembly::I32_TRUNC_S_F64); case WebAssembly::FP_TO_UINT_I32_F64: return LowerFPToInt(MI, DL, BB, TII, true, false, true, WebAssembly::I32_TRUNC_U_F64); case WebAssembly::FP_TO_SINT_I64_F64: return LowerFPToInt(MI, DL, BB, TII, false, true, true, WebAssembly::I64_TRUNC_S_F64); case WebAssembly::FP_TO_UINT_I64_F64: return LowerFPToInt(MI, DL, BB, TII, true, true, true, WebAssembly::I64_TRUNC_U_F64); llvm_unreachable("Unexpected instruction to emit with custom inserter"); } } const char * WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (static_cast(Opcode)) { case WebAssemblyISD::FIRST_NUMBER: break; #define HANDLE_NODETYPE(NODE) \ case WebAssemblyISD::NODE: \ return "WebAssemblyISD::" #NODE; #include "WebAssemblyISD.def" #undef HANDLE_NODETYPE } return nullptr; } std::pair WebAssemblyTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to a // WebAssembly register class. if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': assert(VT != MVT::iPTR && "Pointer MVT not expected here"); if (Subtarget->hasSIMD128() && VT.isVector()) { if (VT.getSizeInBits() == 128) return std::make_pair(0U, &WebAssembly::V128RegClass); } if (VT.isInteger() && !VT.isVector()) { if (VT.getSizeInBits() <= 32) return std::make_pair(0U, &WebAssembly::I32RegClass); if (VT.getSizeInBits() <= 64) return std::make_pair(0U, &WebAssembly::I64RegClass); } break; default: break; } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { // Assume ctz is a relatively cheap operation. return true; } bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { // Assume clz is a relatively cheap operation. return true; } bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // WebAssembly offsets are added as unsigned without wrapping. The // isLegalAddressingMode gives us no way to determine if wrapping could be // happening, so we approximate this by accepting only non-negative offsets. if (AM.BaseOffs < 0) return false; // WebAssembly has no scale register operands. if (AM.Scale != 0) return false; // Everything else is legal. return true; } bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, MachineMemOperand::Flags /*Flags*/, bool *Fast) const { // WebAssembly supports unaligned accesses, though it should be declared // with the p2align attribute on loads and stores which do so, and there // may be a performance impact. We tell LLVM they're "fast" because // for the kinds of things that LLVM uses this for (merging adjacent stores // of constants, etc.), WebAssembly implementations will either want the // unaligned access or they'll split anyway. if (Fast) *Fast = true; return true; } bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { // The current thinking is that wasm engines will perform this optimization, // so we can save on code size. return true; } EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (VT.isVector()) return VT.changeVectorElementTypeToInteger(); return TargetLowering::getSetCCResultType(DL, C, VT); } bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::wasm_atomic_notify: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = Align(4); // atomic.notify instruction does not really load the memory specified with // this argument, but MachineMemOperand should either be load or store, so // we set this to a load. // FIXME Volatile isn't really correct, but currently all LLVM atomic // instructions are treated as volatiles in the backend, so we should be // consistent. The same applies for wasm_atomic_wait intrinsics too. Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; case Intrinsic::wasm_atomic_wait_i32: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = Align(4); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; case Intrinsic::wasm_atomic_wait_i64: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = Align(8); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; default: return false; } } //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); } // Test whether the given calling convention is supported. static bool callingConvSupported(CallingConv::ID CallConv) { // We currently support the language-independent target-independent // conventions. We don't yet have a way to annotate calls with properties like // "cold", and we don't have any call-clobbered registers, so these are mostly // all handled the same. return CallConv == CallingConv::C || CallConv == CallingConv::Fast || CallConv == CallingConv::Cold || CallConv == CallingConv::PreserveMost || CallConv == CallingConv::PreserveAll || - CallConv == CallingConv::CXX_FAST_TLS; + CallConv == CallingConv::CXX_FAST_TLS || + CallConv == CallingConv::WASM_EmscriptenInvoke; } SDValue WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc DL = CLI.DL; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; MachineFunction &MF = DAG.getMachineFunction(); auto Layout = MF.getDataLayout(); CallingConv::ID CallConv = CLI.CallConv; if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support language-specific or target-specific " "calling conventions yet"); if (CLI.IsPatchPoint) fail(DL, DAG, "WebAssembly doesn't support patch point yet"); if (CLI.IsTailCall) { bool MustTail = CLI.CS && CLI.CS.isMustTailCall(); if (Subtarget->hasTailCall() && !CLI.IsVarArg) { // Do not tail call unless caller and callee return types match const Function &F = MF.getFunction(); const TargetMachine &TM = getTargetMachine(); Type *RetTy = F.getReturnType(); SmallVector CallerRetTys; SmallVector CalleeRetTys; computeLegalValueVTs(F, TM, RetTy, CallerRetTys); computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && std::equal(CallerRetTys.begin(), CallerRetTys.end(), CalleeRetTys.begin()); if (!TypesMatch) { // musttail in this case would be an LLVM IR validation failure assert(!MustTail); CLI.IsTailCall = false; } } else { CLI.IsTailCall = false; if (MustTail) { if (CLI.IsVarArg) { // The return would pop the argument buffer fail(DL, DAG, "WebAssembly does not support varargs tail calls"); } else { fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); } } } } SmallVectorImpl &Ins = CLI.Ins; if (Ins.size() > 1) fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; + + // The generic code may have added an sret argument. If we're lowering an + // invoke function, the ABI requires that the function pointer be the first + // argument, so we may have to swap the arguments. + if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && + Outs[0].Flags.isSRet()) { + std::swap(Outs[0], Outs[1]); + std::swap(OutVals[0], OutVals[1]); + } + unsigned NumFixedArgs = 0; for (unsigned I = 0; I < Outs.size(); ++I) { const ISD::OutputArg &Out = Outs[I]; SDValue &OutVal = OutVals[I]; if (Out.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (Out.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); if (Out.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); if (Out.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { auto &MFI = MF.getFrameInfo(); int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), Out.Flags.getByValAlign(), /*isSS=*/false); SDValue SizeNode = DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); Chain = DAG.getMemcpy( Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(), /*isVolatile*/ false, /*AlwaysInline=*/false, /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); OutVal = FINode; } // Count the number of fixed args *after* legalization. NumFixedArgs += Out.IsFixed; } bool IsVarArg = CLI.IsVarArg; auto PtrVT = getPointerTy(Layout); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); if (IsVarArg) { // Outgoing non-fixed arguments are placed in a buffer. First // compute their offsets and the total amount of buffer space needed. for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { const ISD::OutputArg &Out = Outs[I]; SDValue &Arg = OutVals[I]; EVT VT = Arg.getValueType(); assert(VT != MVT::iPTR && "Legalized args should be concrete"); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); unsigned Align = std::max(Out.Flags.getOrigAlign(), Layout.getABITypeAlignment(Ty)); unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Align); CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), Offset, VT.getSimpleVT(), CCValAssign::Full)); } } unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); SDValue FINode; if (IsVarArg && NumBytes) { // For non-fixed arguments, next emit stores to store the argument values // to the stack buffer at the offsets computed above. int FI = MF.getFrameInfo().CreateStackObject(NumBytes, Layout.getStackAlignment(), /*isSS=*/false); unsigned ValNo = 0; SmallVector Chains; for (SDValue Arg : make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { assert(ArgLocs[ValNo].getValNo() == ValNo && "ArgLocs should remain in order and only hold varargs args"); unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, DAG.getConstant(Offset, DL, PtrVT)); Chains.push_back( DAG.getStore(Chain, DL, Arg, Add, MachinePointerInfo::getFixedStack(MF, FI, Offset), 0)); } if (!Chains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } else if (IsVarArg) { FINode = DAG.getIntPtrConstant(0, DL); } if (Callee->getOpcode() == ISD::GlobalAddress) { // If the callee is a GlobalAddress node (quite common, every direct call // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress // doesn't at MO_GOT which is not needed for direct calls. GlobalAddressSDNode* GA = cast(Callee); Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, getPointerTy(DAG.getDataLayout()), GA->getOffset()); Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL, getPointerTy(DAG.getDataLayout()), Callee); } // Compute the operands for the CALLn node. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs // isn't reliable. Ops.append(OutVals.begin(), IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); // Add a pointer to the vararg buffer. if (IsVarArg) Ops.push_back(FINode); SmallVector InTys; for (const auto &In : Ins) { assert(!In.Flags.isByVal() && "byval is not valid for return values"); assert(!In.Flags.isNest() && "nest is not valid for return values"); if (In.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); if (In.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); if (In.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last return values"); // Ignore In.getOrigAlign() because all our arguments are passed in // registers. InTys.push_back(In.VT); } if (CLI.IsTailCall) { // ret_calls do not return values to the current frame SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops); } InTys.push_back(MVT::Other); SDVTList InTyList = DAG.getVTList(InTys); SDValue Res = DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, DL, InTyList, Ops); if (Ins.empty()) { Chain = Res; } else { InVals.push_back(Res); Chain = Res.getValue(1); } return Chain; } bool WebAssemblyTargetLowering::CanLowerReturn( CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, const SmallVectorImpl &Outs, LLVMContext & /*Context*/) const { // WebAssembly can't currently handle returning tuples. return Outs.size() <= 1; } SDValue WebAssemblyTargetLowering::LowerReturn( SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); SmallVector RetOps(1, Chain); RetOps.append(OutVals.begin(), OutVals.end()); Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); // Record the number and types of the return values. for (const ISD::OutputArg &Out : Outs) { assert(!Out.Flags.isByVal() && "byval is not valid for return values"); assert(!Out.Flags.isNest() && "nest is not valid for return values"); assert(Out.IsFixed && "non-fixed return value is not valid"); if (Out.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); if (Out.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); if (Out.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); } return Chain; } SDValue WebAssemblyTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); MachineFunction &MF = DAG.getMachineFunction(); auto *MFI = MF.getInfo(); // Set up the incoming ARGUMENTS value, which serves to represent the liveness // of the incoming values before they're represented by virtual registers. MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); for (const ISD::InputArg &In : Ins) { if (In.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); if (In.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (In.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); if (In.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); // Ignore In.getOrigAlign() because all our arguments are passed in // registers. InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, DAG.getTargetConstant(InVals.size(), DL, MVT::i32)) : DAG.getUNDEF(In.VT)); // Record the number and types of arguments. MFI->addParam(In.VT); } // Varargs are copied into a buffer allocated by the caller, and a pointer to // the buffer is passed as an argument. if (IsVarArg) { MVT PtrVT = getPointerTy(MF.getDataLayout()); unsigned VarargVreg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); MFI->setVarargBufferVreg(VarargVreg); Chain = DAG.getCopyToReg( Chain, DL, VarargVreg, DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); MFI->addParam(PtrVT); } // Record the number and types of arguments and results. SmallVector Params; SmallVector Results; computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(), DAG.getTarget(), Params, Results); for (MVT VT : Results) MFI->addResult(VT); // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify // the param logic here with ComputeSignatureVTs assert(MFI->getParams().size() == Params.size() && std::equal(MFI->getParams().begin(), MFI->getParams().end(), Params.begin())); return Chain; } void WebAssemblyTargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { case ISD::SIGN_EXTEND_INREG: // Do not add any results, signifying that N should not be custom lowered // after all. This happens because simd128 turns on custom lowering for // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an // illegal type. break; default: llvm_unreachable( "ReplaceNodeResults not implemented for this op for WebAssembly!"); } } //===----------------------------------------------------------------------===// // Custom lowering hooks. //===----------------------------------------------------------------------===// SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); switch (Op.getOpcode()) { default: llvm_unreachable("unimplemented operation lowering"); return SDValue(); case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::BlockAddress: case ISD::BRIND: fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); return SDValue(); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::CopyToReg: return LowerCopyToReg(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: case ISD::INSERT_VECTOR_ELT: return LowerAccessVectorElement(Op, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_W_CHAIN: return LowerIntrinsic(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::SHL: case ISD::SRA: case ISD::SRL: return LowerShift(Op, DAG); } } SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(2); if (isa(Src.getNode())) { // CopyToReg nodes don't support FrameIndex operands. Other targets select // the FI to some LEA-like instruction, but since we don't have that, we // need to insert some kind of instruction that can take an FI operand and // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy // local.copy between Op and its FI operand. SDValue Chain = Op.getOperand(0); SDLoc DL(Op); unsigned Reg = cast(Op.getOperand(1))->getReg(); EVT VT = Src.getValueType(); SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 : WebAssembly::COPY_I64, DL, VT, Src), 0); return Op.getNode()->getNumValues() == 1 ? DAG.getCopyToReg(Chain, DL, Reg, Copy) : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4 ? Op.getOperand(3) : SDValue()); } return SDValue(); } SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { int FI = cast(Op)->getIndex(); return DAG.getTargetFrameIndex(FI, Op.getValueType()); } SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); if (!Subtarget->getTargetTriple().isOSEmscripten()) { fail(DL, DAG, "Non-Emscripten WebAssembly hasn't implemented " "__builtin_return_address"); return SDValue(); } if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL) .first; } SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // Non-zero depths are not supported by WebAssembly currently. Use the // legalizer's default expansion, which is to return 0 (what this function is // documented to do). if (Op.getConstantOperandVal(0) > 0) return SDValue(); DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); unsigned FP = Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); } SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); const auto *GA = cast(Op); EVT VT = Op.getValueType(); assert(GA->getTargetFlags() == 0 && "Unexpected target flags on generic GlobalAddressSDNode"); if (GA->getAddressSpace() != 0) fail(DL, DAG, "WebAssembly only expects the 0 address space"); unsigned OperandFlags = 0; if (isPositionIndependent()) { const GlobalValue *GV = GA->getGlobal(); if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) { MachineFunction &MF = DAG.getMachineFunction(); MVT PtrVT = getPointerTy(MF.getDataLayout()); const char *BaseName; if (GV->getValueType()->isFunctionTy()) { BaseName = MF.createExternalSymbolName("__table_base"); OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL; } else { BaseName = MF.createExternalSymbolName("__memory_base"); OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL; } SDValue BaseAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, DAG.getTargetExternalSymbol(BaseName, PtrVT)); SDValue SymAddr = DAG.getNode( WebAssemblyISD::WrapperPIC, DL, VT, DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), OperandFlags)); return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr); } else { OperandFlags = WebAssemblyII::MO_GOT; } } return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), OperandFlags)); } SDValue WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); const auto *ES = cast(Op); EVT VT = Op.getValueType(); assert(ES->getTargetFlags() == 0 && "Unexpected target flags on generic ExternalSymbolSDNode"); return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); } SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // There's no need for a Wrapper node because we always incorporate a jump // table operand into a BR_TABLE instruction, rather than ever // materializing it in a register. const JumpTableSDNode *JT = cast(Op); return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), JT->getTargetFlags()); } SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Chain = Op.getOperand(0); const auto *JT = cast(Op.getOperand(1)); SDValue Index = Op.getOperand(2); assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Index); MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; // Add an operand for each case. for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB)); // TODO: For now, we just pick something arbitrary for a default case for now. // We really want to sniff out the guard and put in the real default case (and // delete the guard). Ops.push_back(DAG.getBasicBlock(MBBs[0])); return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); } SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); auto *MFI = DAG.getMachineFunction().getInfo(); const Value *SV = cast(Op.getOperand(2))->getValue(); SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, MFI->getVarargBufferVreg(), PtrVT); return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), MachinePointerInfo(SV), 0); } SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); unsigned IntNo; switch (Op.getOpcode()) { case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: IntNo = cast(Op.getOperand(1))->getZExtValue(); break; case ISD::INTRINSIC_WO_CHAIN: IntNo = cast(Op.getOperand(0))->getZExtValue(); break; default: llvm_unreachable("Invalid intrinsic"); } SDLoc DL(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::wasm_lsda: { EVT VT = Op.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); auto &Context = MF.getMMI().getContext(); MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + Twine(MF.getFunctionNumber())); return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, DAG.getMCSymbol(S, PtrVT)); } case Intrinsic::wasm_throw: { // We only support C++ exceptions for now int Tag = cast(Op.getOperand(2).getNode())->getZExtValue(); if (Tag != CPP_EXCEPTION) llvm_unreachable("Invalid tag!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const char *SymName = MF.createExternalSymbolName("__cpp_exception"); SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, DAG.getTargetExternalSymbol(SymName, PtrVT)); return DAG.getNode(WebAssemblyISD::THROW, DL, MVT::Other, // outchain type { Op.getOperand(0), // inchain SymNode, // exception symbol Op.getOperand(3) // thrown value }); } } } SDValue WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // If sign extension operations are disabled, allow sext_inreg only if operand // is a vector extract. SIMD does not depend on sign extension operations, but // allowing sext_inreg in this context lets us have simple patterns to select // extract_lane_s instructions. Expanding sext_inreg everywhere would be // simpler in this file, but would necessitate large and brittle patterns to // undo the expansion and select extract_lane_s instructions. assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { const SDValue &Extract = Op.getOperand(0); MVT VecT = Extract.getOperand(0).getSimpleValueType(); MVT ExtractedLaneT = static_cast(Op.getOperand(1).getNode()) ->getVT() .getSimpleVT(); MVT ExtractedVecT = MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); if (ExtractedVecT == VecT) return Op; // Bitcast vector to appropriate type to ensure ISel pattern coverage const SDValue &Index = Extract.getOperand(1); unsigned IndexVal = static_cast(Index.getNode())->getZExtValue(); unsigned Scale = ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); assert(Scale > 1); SDValue NewIndex = DAG.getConstant(IndexVal * Scale, DL, Index.getValueType()); SDValue NewExtract = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract, Op.getOperand(1)); } // Otherwise expand return SDValue(); } SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); const EVT VecT = Op.getValueType(); const EVT LaneT = Op.getOperand(0).getValueType(); const size_t Lanes = Op.getNumOperands(); auto IsConstant = [](const SDValue &V) { return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; }; // Find the most common operand, which is approximately the best to splat using Entry = std::pair; SmallVector ValueCounts; size_t NumConst = 0, NumDynamic = 0; for (const SDValue &Lane : Op->op_values()) { if (Lane.isUndef()) { continue; } else if (IsConstant(Lane)) { NumConst++; } else { NumDynamic++; } auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(), [&Lane](Entry A) { return A.first == Lane; }); if (CountIt == ValueCounts.end()) { ValueCounts.emplace_back(Lane, 1); } else { CountIt->second++; } } auto CommonIt = std::max_element(ValueCounts.begin(), ValueCounts.end(), [](Entry A, Entry B) { return A.second < B.second; }); assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector"); SDValue SplatValue = CommonIt->first; size_t NumCommon = CommonIt->second; // If v128.const is available, consider using it instead of a splat if (Subtarget->hasUnimplementedSIMD128()) { // {i32,i64,f32,f64}.const opcode, and value const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes); // SIMD prefix and opcode const size_t SplatBytes = 2; const size_t SplatConstBytes = SplatBytes + ConstBytes; // SIMD prefix, opcode, and lane index const size_t ReplaceBytes = 3; const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes; // SIMD prefix, v128.const opcode, and 128-bit value const size_t VecConstBytes = 18; // Initial v128.const and a replace_lane for each non-const operand const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes; // Initial splat and all necessary replace_lanes const size_t SplatInitBytes = IsConstant(SplatValue) // Initial constant splat ? (SplatConstBytes + // Constant replace_lanes (NumConst - NumCommon) * ReplaceConstBytes + // Dynamic replace_lanes (NumDynamic * ReplaceBytes)) // Initial dynamic splat : (SplatBytes + // Constant replace_lanes (NumConst * ReplaceConstBytes) + // Dynamic replace_lanes (NumDynamic - NumCommon) * ReplaceBytes); if (ConstInitBytes < SplatInitBytes) { // Create build_vector that will lower to initial v128.const SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { ConstLanes.push_back(Lane); } else if (LaneT.isFloatingPoint()) { ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); } else { ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); } } SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes); // Add replace_lane instructions for non-const lanes for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); if (!Lane.isUndef() && !IsConstant(Lane)) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, DAG.getConstant(I, DL, MVT::i32)); } return Result; } } // Use a splat for the initial vector SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); // Add replace_lane instructions for other values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); if (Lane != SplatValue) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, DAG.getConstant(I, DL, MVT::i32)); } return Result; } SDValue WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); ArrayRef Mask = cast(Op.getNode())->getMask(); MVT VecType = Op.getOperand(0).getSimpleValueType(); assert(VecType.is128BitVector() && "Unexpected shuffle vector type"); size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8; // Space for two vector args and sixteen mask indices SDValue Ops[18]; size_t OpIdx = 0; Ops[OpIdx++] = Op.getOperand(0); Ops[OpIdx++] = Op.getOperand(1); // Expand mask indices to byte indices and materialize them as operands for (int M : Mask) { for (size_t J = 0; J < LaneBytes; ++J) { // Lower undefs (represented by -1 in mask) to zero uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J; Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); } } return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); } SDValue WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const { // Allow constant lane indices, expand variable lane indices SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); if (isa(IdxNode) || IdxNode->isUndef()) return Op; else // Perform default expansion return SDValue(); } static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { EVT LaneT = Op.getSimpleValueType().getVectorElementType(); // 32-bit and 64-bit unrolled shifts will have proper semantics if (LaneT.bitsGE(MVT::i32)) return DAG.UnrollVectorOp(Op.getNode()); // Otherwise mask the shift value to get proper semantics from 32-bit shift SDLoc DL(Op); SDValue ShiftVal = Op.getOperand(1); uint64_t MaskVal = LaneT.getSizeInBits() - 1; SDValue MaskedShiftVal = DAG.getNode( ISD::AND, // mask opcode DL, ShiftVal.getValueType(), // masked value type ShiftVal, // original shift value operand DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand ); return DAG.UnrollVectorOp( DAG.getNode(Op.getOpcode(), // original shift opcode DL, Op.getValueType(), // original return type Op.getOperand(0), // original vector operand, MaskedShiftVal // new masked shift value operand ) .getNode()); } SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // Only manually lower vector shifts assert(Op.getSimpleValueType().isVector()); // Expand all vector shifts until V8 fixes its implementation // TODO: remove this once V8 is fixed if (!Subtarget->hasUnimplementedSIMD128()) return unrollVectorShift(Op, DAG); // Unroll non-splat vector shifts BuildVectorSDNode *ShiftVec; SDValue SplatVal; if (!(ShiftVec = dyn_cast(Op.getOperand(1).getNode())) || !(SplatVal = ShiftVec->getSplatValue())) return unrollVectorShift(Op, DAG); // All splats except i64x2 const splats are handled by patterns auto *SplatConst = dyn_cast(SplatVal); if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64) return Op; // i64x2 const splats are custom lowered to avoid unnecessary wraps unsigned Opcode; switch (Op.getOpcode()) { case ISD::SHL: Opcode = WebAssemblyISD::VEC_SHL; break; case ISD::SRA: Opcode = WebAssemblyISD::VEC_SHR_S; break; case ISD::SRL: Opcode = WebAssemblyISD::VEC_SHR_U; break; default: llvm_unreachable("unexpected opcode"); } APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32); return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), DAG.getConstant(Shift, DL, MVT::i32)); } //===----------------------------------------------------------------------===// // WebAssembly Optimization Hooks //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index a002a70..53aede4 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -1,1123 +1,1123 @@ //=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file lowers exception-related instructions and setjmp/longjmp /// function calls in order to use Emscripten's JavaScript try and catch /// mechanism. /// /// To handle exceptions and setjmp/longjmps, this scheme relies on JavaScript's /// try and catch syntax and relevant exception-related libraries implemented /// in JavaScript glue code that will be produced by Emscripten. This is similar /// to the current Emscripten asm.js exception handling in fastcomp. For /// fastcomp's EH / SjLj scheme, see these files in fastcomp LLVM branch: /// (Location: https://github.com/kripken/emscripten-fastcomp) /// lib/Target/JSBackend/NaCl/LowerEmExceptionsPass.cpp /// lib/Target/JSBackend/NaCl/LowerEmSetjmp.cpp /// lib/Target/JSBackend/JSBackend.cpp /// lib/Target/JSBackend/CallHandlers.h /// /// * Exception handling /// This pass lowers invokes and landingpads into library functions in JS glue /// code. Invokes are lowered into function wrappers called invoke wrappers that /// exist in JS side, which wraps the original function call with JS try-catch. /// If an exception occurred, cxa_throw() function in JS side sets some /// variables (see below) so we can check whether an exception occurred from /// wasm code and handle it appropriately. /// /// * Setjmp-longjmp handling /// This pass lowers setjmp to a reasonably-performant approach for emscripten. /// The idea is that each block with a setjmp is broken up into two parts: the /// part containing setjmp and the part right after the setjmp. The latter part /// is either reached from the setjmp, or later from a longjmp. To handle the /// longjmp, all calls that might longjmp are also called using invoke wrappers /// and thus JS / try-catch. JS longjmp() function also sets some variables so /// we can check / whether a longjmp occurred from wasm code. Each block with a /// function call that might longjmp is also split up after the longjmp call. /// After the longjmp call, we check whether a longjmp occurred, and if it did, /// which setjmp it corresponds to, and jump to the right post-setjmp block. /// We assume setjmp-longjmp handling always run after EH handling, which means /// we don't expect any exception-related instructions when SjLj runs. /// FIXME Currently this scheme does not support indirect call of setjmp, /// because of the limitation of the scheme itself. fastcomp does not support it /// either. /// /// In detail, this pass does following things: /// /// 1) Assumes the existence of global variables: __THREW__, __threwValue /// __THREW__ and __threwValue will be set in invoke wrappers /// in JS glue code. For what invoke wrappers are, refer to 3). These /// variables are used for both exceptions and setjmp/longjmps. /// __THREW__ indicates whether an exception or a longjmp occurred or not. 0 /// means nothing occurred, 1 means an exception occurred, and other numbers /// mean a longjmp occurred. In the case of longjmp, __threwValue variable /// indicates the corresponding setjmp buffer the longjmp corresponds to. /// /// * Exception handling /// /// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions /// at link time. /// The global variables in 1) will exist in wasm address space, /// but their values should be set in JS code, so these functions /// as interfaces to JS glue code. These functions are equivalent to the /// following JS functions, which actually exist in asm.js version of JS /// library. /// /// function setThrew(threw, value) { /// if (__THREW__ == 0) { /// __THREW__ = threw; /// __threwValue = value; /// } /// } // /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. /// /// In exception handling, getTempRet0 indicates the type of an exception /// caught, and in setjmp/longjmp, it means the second argument to longjmp /// function. /// /// 3) Lower /// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad /// into /// __THREW__ = 0; /// call @__invoke_SIG(func, arg1, arg2) /// %__THREW__.val = __THREW__; /// __THREW__ = 0; /// if (%__THREW__.val == 1) /// goto %lpad /// else /// goto %invoke.cont /// SIG is a mangled string generated based on the LLVM IR-level function /// signature. After LLVM IR types are lowered to the target wasm types, /// the names for these wrappers will change based on wasm types as well, /// as in invoke_vi (function takes an int and returns void). The bodies of /// these wrappers will be generated in JS glue code, and inside those /// wrappers we use JS try-catch to generate actual exception effects. It /// also calls the original callee function. An example wrapper in JS code /// would look like this: /// function invoke_vi(index,a1) { /// try { /// Module["dynCall_vi"](index,a1); // This calls original callee /// } catch(e) { /// if (typeof e !== 'number' && e !== 'longjmp') throw e; /// asm["setThrew"](1, 0); // setThrew is called here /// } /// } /// If an exception is thrown, __THREW__ will be set to true in a wrapper, /// so we can jump to the right BB based on this value. /// /// 4) Lower /// %val = landingpad catch c1 catch c2 catch c3 ... /// ... use %val ... /// into /// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...) /// %val = {%fmc, getTempRet0()} /// ... use %val ... /// Here N is a number calculated based on the number of clauses. /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. /// /// 5) Lower /// resume {%a, %b} /// into /// call @__resumeException(%a) /// where __resumeException() is a function in JS glue code. /// /// 6) Lower /// call @llvm.eh.typeid.for(type) (intrinsic) /// into /// call @llvm_eh_typeid_for(type) /// llvm_eh_typeid_for function will be generated in JS glue code. /// /// * Setjmp / Longjmp handling /// /// In case calls to longjmp() exists /// /// 1) Lower /// longjmp(buf, value) /// into /// emscripten_longjmp_jmpbuf(buf, value) /// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later. /// /// In case calls to setjmp() exists /// /// 2) In the function entry that calls setjmp, initialize setjmpTable and /// sejmpTableSize as follows: /// setjmpTableSize = 4; /// setjmpTable = (int *) malloc(40); /// setjmpTable[0] = 0; /// setjmpTable and setjmpTableSize are used in saveSetjmp() function in JS /// code. /// /// 3) Lower /// setjmp(buf) /// into /// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize); /// setjmpTableSize = getTempRet0(); /// For each dynamic setjmp call, setjmpTable stores its ID (a number which /// is incrementally assigned from 0) and its label (a unique number that /// represents each callsite of setjmp). When we need more entries in /// setjmpTable, it is reallocated in saveSetjmp() in JS code and it will /// return the new table address, and assign the new table size in /// setTempRet0(). saveSetjmp also stores the setjmp's ID into the buffer /// buf. A BB with setjmp is split into two after setjmp call in order to /// make the post-setjmp BB the possible destination of longjmp BB. /// /// /// 4) Lower every call that might longjmp into /// __THREW__ = 0; /// call @__invoke_SIG(func, arg1, arg2) /// %__THREW__.val = __THREW__; /// __THREW__ = 0; /// if (%__THREW__.val != 0 & __threwValue != 0) { /// %label = testSetjmp(mem[%__THREW__.val], setjmpTable, /// setjmpTableSize); /// if (%label == 0) /// emscripten_longjmp(%__THREW__.val, __threwValue); /// setTempRet0(__threwValue); /// } else { /// %label = -1; /// } /// longjmp_result = getTempRet0(); /// switch label { /// label 1: goto post-setjmp BB 1 /// label 2: goto post-setjmp BB 2 /// ... /// default: goto splitted next BB /// } /// testSetjmp examines setjmpTable to see if there is a matching setjmp /// call. After calling an invoke wrapper, if a longjmp occurred, __THREW__ /// will be the address of matching jmp_buf buffer and __threwValue be the /// second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is /// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to /// each setjmp callsite. Label 0 means this longjmp buffer does not /// correspond to one of the setjmp callsites in this function, so in this /// case we just chain the longjmp to the caller. (Here we call /// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf. /// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while /// emscripten_longjmp takes an int. Both of them will eventually be lowered /// to emscripten_longjmp in s2wasm, but here we need two signatures - we /// can't translate an int value to a jmp_buf.) /// Label -1 means no longjmp occurred. Otherwise we jump to the right /// post-setjmp BB based on the label. /// ///===----------------------------------------------------------------------===// #include "WebAssembly.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; #define DEBUG_TYPE "wasm-lower-em-ehsjlj" static cl::list EHWhitelist("emscripten-cxx-exceptions-whitelist", cl::desc("The list of function names in which Emscripten-style " "exception handling is enabled (see emscripten " "EMSCRIPTEN_CATCHING_WHITELIST options)"), cl::CommaSeparated); namespace { class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { static const char *ResumeFName; static const char *EHTypeIDFName; static const char *EmLongjmpFName; static const char *EmLongjmpJmpbufFName; static const char *SaveSetjmpFName; static const char *TestSetjmpFName; static const char *FindMatchingCatchPrefix; static const char *InvokePrefix; bool EnableEH; // Enable exception handling bool EnableSjLj; // Enable setjmp/longjmp handling GlobalVariable *ThrewGV = nullptr; GlobalVariable *ThrewValueGV = nullptr; Function *GetTempRet0Func = nullptr; Function *SetTempRet0Func = nullptr; Function *ResumeF = nullptr; Function *EHTypeIDF = nullptr; Function *EmLongjmpF = nullptr; Function *EmLongjmpJmpbufF = nullptr; Function *SaveSetjmpF = nullptr; Function *TestSetjmpF = nullptr; // __cxa_find_matching_catch_N functions. // Indexed by the number of clauses in an original landingpad instruction. DenseMap FindMatchingCatches; // Map of StringMap InvokeWrappers; // Set of whitelisted function names for exception handling std::set EHWhitelistSet; StringRef getPassName() const override { return "WebAssembly Lower Emscripten Exceptions"; } bool runEHOnFunction(Function &F); bool runSjLjOnFunction(Function &F); Function *getFindMatchingCatch(Module &M, unsigned NumClauses); template Value *wrapInvoke(CallOrInvoke *CI); void wrapTestSetjmp(BasicBlock *BB, Instruction *InsertPt, Value *Threw, Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult, BasicBlock *&EndBB); template Function *getInvokeWrapper(CallOrInvoke *CI); bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); } bool canLongjmp(Module &M, const Value *Callee) const; void rebuildSSA(Function &F); public: static char ID; WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true) : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj) { EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end()); } bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } }; } // End anonymous namespace const char *WebAssemblyLowerEmscriptenEHSjLj::ResumeFName = "__resumeException"; const char *WebAssemblyLowerEmscriptenEHSjLj::EHTypeIDFName = "llvm_eh_typeid_for"; const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpFName = "emscripten_longjmp"; const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpJmpbufFName = "emscripten_longjmp_jmpbuf"; const char *WebAssemblyLowerEmscriptenEHSjLj::SaveSetjmpFName = "saveSetjmp"; const char *WebAssemblyLowerEmscriptenEHSjLj::TestSetjmpFName = "testSetjmp"; const char *WebAssemblyLowerEmscriptenEHSjLj::FindMatchingCatchPrefix = "__cxa_find_matching_catch_"; const char *WebAssemblyLowerEmscriptenEHSjLj::InvokePrefix = "__invoke_"; char WebAssemblyLowerEmscriptenEHSjLj::ID = 0; INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE, "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp", false, false) ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEH, bool EnableSjLj) { return new WebAssemblyLowerEmscriptenEHSjLj(EnableEH, EnableSjLj); } static bool canThrow(const Value *V) { if (const auto *F = dyn_cast(V)) { // Intrinsics cannot throw if (F->isIntrinsic()) return false; StringRef Name = F->getName(); // leave setjmp and longjmp (mostly) alone, we process them properly later if (Name == "setjmp" || Name == "longjmp") return false; return !F->doesNotThrow(); } // not a function, so an indirect call - can throw, we can't tell return true; } // Get a global variable with the given name. If it doesn't exist declare it, // which will generate an import and asssumes that it will exist at link time. static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB, const char *Name) { auto* GV = dyn_cast(M.getOrInsertGlobal(Name, IRB.getInt32Ty())); if (!GV) report_fatal_error(Twine("unable to create global: ") + Name); return GV; } // Simple function name mangler. // This function simply takes LLVM's string representation of parameter types // and concatenate them with '_'. There are non-alphanumeric characters but llc // is ok with it, and we need to postprocess these names after the lowering // phase anyway. static std::string getSignature(FunctionType *FTy) { std::string Sig; raw_string_ostream OS(Sig); OS << *FTy->getReturnType(); for (Type *ParamTy : FTy->params()) OS << "_" << *ParamTy; if (FTy->isVarArg()) OS << "_..."; Sig = OS.str(); Sig.erase(remove_if(Sig, isspace), Sig.end()); // When s2wasm parses .s file, a comma means the end of an argument. So a // mangled function name can contain any character but a comma. std::replace(Sig.begin(), Sig.end(), ',', '.'); return Sig; } // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2. // This is because a landingpad instruction contains two more arguments, a // personality function and a cleanup bit, and __cxa_find_matching_catch_N // functions are named after the number of arguments in the original landingpad // instruction. Function * WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, unsigned NumClauses) { if (FindMatchingCatches.count(NumClauses)) return FindMatchingCatches[NumClauses]; PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); SmallVector Args(NumClauses, Int8PtrTy); FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false); Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage, FindMatchingCatchPrefix + Twine(NumClauses + 2), &M); FindMatchingCatches[NumClauses] = F; return F; } // Generate invoke wrapper seqence with preamble and postamble // Preamble: // __THREW__ = 0; // Postamble: // %__THREW__.val = __THREW__; __THREW__ = 0; // Returns %__THREW__.val, which indicates whether an exception is thrown (or // whether longjmp occurred), for future use. template Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { LLVMContext &C = CI->getModule()->getContext(); // If we are calling a function that is noreturn, we must remove that // attribute. The code we insert here does expect it to return, after we // catch the exception. if (CI->doesNotReturn()) { if (auto *F = dyn_cast(CI->getCalledValue())) F->removeFnAttr(Attribute::NoReturn); CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn); } IRBuilder<> IRB(C); IRB.SetInsertPoint(CI); // Pre-invoke // __THREW__ = 0; IRB.CreateStore(IRB.getInt32(0), ThrewGV); // Invoke function wrapper in JavaScript SmallVector Args; // Put the pointer to the callee as first argument, so it can be called // within the invoke wrapper later Args.push_back(CI->getCalledValue()); Args.append(CI->arg_begin(), CI->arg_end()); CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args); NewCall->takeName(CI); - NewCall->setCallingConv(CI->getCallingConv()); + NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke); NewCall->setDebugLoc(CI->getDebugLoc()); // Because we added the pointer to the callee as first argument, all // argument attribute indices have to be incremented by one. SmallVector ArgAttributes; const AttributeList &InvokeAL = CI->getAttributes(); // No attributes for the callee pointer. ArgAttributes.push_back(AttributeSet()); // Copy the argument attributes from the original for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I) ArgAttributes.push_back(InvokeAL.getParamAttributes(I)); AttrBuilder FnAttrs(InvokeAL.getFnAttributes()); if (FnAttrs.contains(Attribute::AllocSize)) { // The allocsize attribute (if any) referes to parameters by index and needs // to be adjusted. unsigned SizeArg; Optional NEltArg; std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs(); SizeArg += 1; if (NEltArg.hasValue()) NEltArg = NEltArg.getValue() + 1; FnAttrs.addAllocSizeAttr(SizeArg, NEltArg); } // Reconstruct the AttributesList based on the vector we constructed. AttributeList NewCallAL = AttributeList::get(C, AttributeSet::get(C, FnAttrs), InvokeAL.getRetAttributes(), ArgAttributes); NewCall->setAttributes(NewCallAL); CI->replaceAllUsesWith(NewCall); // Post-invoke // %__THREW__.val = __THREW__; __THREW__ = 0; Value *Threw = IRB.CreateLoad(IRB.getInt32Ty(), ThrewGV, ThrewGV->getName() + ".val"); IRB.CreateStore(IRB.getInt32(0), ThrewGV); return Threw; } // Get matching invoke wrapper based on callee signature template Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) { Module *M = CI->getModule(); SmallVector ArgTys; Value *Callee = CI->getCalledValue(); FunctionType *CalleeFTy; if (auto *F = dyn_cast(Callee)) CalleeFTy = F->getFunctionType(); else { auto *CalleeTy = cast(Callee->getType())->getElementType(); CalleeFTy = dyn_cast(CalleeTy); } std::string Sig = getSignature(CalleeFTy); if (InvokeWrappers.find(Sig) != InvokeWrappers.end()) return InvokeWrappers[Sig]; // Put the pointer to the callee as first argument ArgTys.push_back(PointerType::getUnqual(CalleeFTy)); // Add argument types ArgTys.append(CalleeFTy->param_begin(), CalleeFTy->param_end()); FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys, CalleeFTy->isVarArg()); Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage, InvokePrefix + Sig, M); InvokeWrappers[Sig] = F; return F; } bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M, const Value *Callee) const { if (auto *CalleeF = dyn_cast(Callee)) if (CalleeF->isIntrinsic()) return false; // Attempting to transform inline assembly will result in something like: // call void @__invoke_void(void ()* asm ...) // which is invalid because inline assembly blocks do not have addresses // and can't be passed by pointer. The result is a crash with illegal IR. if (isa(Callee)) return false; // The reason we include malloc/free here is to exclude the malloc/free // calls generated in setjmp prep / cleanup routines. Function *SetjmpF = M.getFunction("setjmp"); Function *MallocF = M.getFunction("malloc"); Function *FreeF = M.getFunction("free"); if (Callee == SetjmpF || Callee == MallocF || Callee == FreeF) return false; // There are functions in JS glue code if (Callee == ResumeF || Callee == EHTypeIDF || Callee == SaveSetjmpF || Callee == TestSetjmpF) return false; // __cxa_find_matching_catch_N functions cannot longjmp if (Callee->getName().startswith(FindMatchingCatchPrefix)) return false; // Exception-catching related functions Function *BeginCatchF = M.getFunction("__cxa_begin_catch"); Function *EndCatchF = M.getFunction("__cxa_end_catch"); Function *AllocExceptionF = M.getFunction("__cxa_allocate_exception"); Function *ThrowF = M.getFunction("__cxa_throw"); Function *TerminateF = M.getFunction("__clang_call_terminate"); if (Callee == BeginCatchF || Callee == EndCatchF || Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF || Callee == GetTempRet0Func || Callee == SetTempRet0Func) return false; // Otherwise we don't know return true; } // Generate testSetjmp function call seqence with preamble and postamble. // The code this generates is equivalent to the following JavaScript code: // if (%__THREW__.val != 0 & threwValue != 0) { // %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize); // if (%label == 0) // emscripten_longjmp(%__THREW__.val, threwValue); // setTempRet0(threwValue); // } else { // %label = -1; // } // %longjmp_result = getTempRet0(); // // As output parameters. returns %label, %longjmp_result, and the BB the last // instruction (%longjmp_result = ...) is in. void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp( BasicBlock *BB, Instruction *InsertPt, Value *Threw, Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult, BasicBlock *&EndBB) { Function *F = BB->getParent(); LLVMContext &C = BB->getModule()->getContext(); IRBuilder<> IRB(C); IRB.SetInsertPoint(InsertPt); // if (%__THREW__.val != 0 & threwValue != 0) IRB.SetInsertPoint(BB); BasicBlock *ThenBB1 = BasicBlock::Create(C, "if.then1", F); BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F); BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F); Value *ThrewCmp = IRB.CreateICmpNE(Threw, IRB.getInt32(0)); Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV, ThrewValueGV->getName() + ".val"); Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0)); Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1"); IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1); // %label = _testSetjmp(mem[%__THREW__.val], _setjmpTable, _setjmpTableSize); // if (%label == 0) IRB.SetInsertPoint(ThenBB1); BasicBlock *ThenBB2 = BasicBlock::Create(C, "if.then2", F); BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F); Value *ThrewInt = IRB.CreateIntToPtr(Threw, Type::getInt32PtrTy(C), Threw->getName() + ".i32p"); Value *LoadedThrew = IRB.CreateLoad(IRB.getInt32Ty(), ThrewInt, ThrewInt->getName() + ".loaded"); Value *ThenLabel = IRB.CreateCall( TestSetjmpF, {LoadedThrew, SetjmpTable, SetjmpTableSize}, "label"); Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0)); IRB.CreateCondBr(Cmp2, ThenBB2, EndBB2); // emscripten_longjmp(%__THREW__.val, threwValue); IRB.SetInsertPoint(ThenBB2); IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue}); IRB.CreateUnreachable(); // setTempRet0(threwValue); IRB.SetInsertPoint(EndBB2); IRB.CreateCall(SetTempRet0Func, ThrewValue); IRB.CreateBr(EndBB1); IRB.SetInsertPoint(ElseBB1); IRB.CreateBr(EndBB1); // longjmp_result = getTempRet0(); IRB.SetInsertPoint(EndBB1); PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label"); LabelPHI->addIncoming(ThenLabel, EndBB2); LabelPHI->addIncoming(IRB.getInt32(-1), ElseBB1); // Output parameter assignment Label = LabelPHI; EndBB = EndBB1; LongjmpResult = IRB.CreateCall(GetTempRet0Func, None, "longjmp_result"); } void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { DominatorTree &DT = getAnalysis(F).getDomTree(); DT.recalculate(F); // CFG has been changed SSAUpdater SSA; for (BasicBlock &BB : F) { for (Instruction &I : BB) { for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) { Use &U = *UI; ++UI; SSA.Initialize(I.getType(), I.getName()); SSA.AddAvailableValue(&BB, &I); auto *User = cast(U.getUser()); if (User->getParent() == &BB) continue; if (auto *UserPN = dyn_cast(User)) if (UserPN->getIncomingBlock(U) == &BB) continue; if (DT.dominates(&I, User)) continue; SSA.RewriteUseAfterInsertions(U); } } } } bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n"); LLVMContext &C = M.getContext(); IRBuilder<> IRB(C); Function *SetjmpF = M.getFunction("setjmp"); Function *LongjmpF = M.getFunction("longjmp"); bool SetjmpUsed = SetjmpF && !SetjmpF->use_empty(); bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty(); bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed); // Declare (or get) global variables __THREW__, __threwValue, and // getTempRet0/setTempRet0 function which are used in common for both // exception handling and setjmp/longjmp handling ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__"); ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue"); GetTempRet0Func = Function::Create(FunctionType::get(IRB.getInt32Ty(), false), GlobalValue::ExternalLinkage, "getTempRet0", &M); SetTempRet0Func = Function::Create( FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false), GlobalValue::ExternalLinkage, "setTempRet0", &M); GetTempRet0Func->setDoesNotThrow(); SetTempRet0Func->setDoesNotThrow(); bool Changed = false; // Exception handling if (EnableEH) { // Register __resumeException function FunctionType *ResumeFTy = FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false); ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage, ResumeFName, &M); // Register llvm_eh_typeid_for function FunctionType *EHTypeIDTy = FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false); EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage, EHTypeIDFName, &M); for (Function &F : M) { if (F.isDeclaration()) continue; Changed |= runEHOnFunction(F); } } // Setjmp/longjmp handling if (DoSjLj) { Changed = true; // We have setjmp or longjmp somewhere if (LongjmpF) { // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is // defined in JS code EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(), GlobalValue::ExternalLinkage, EmLongjmpJmpbufFName, &M); LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF); } if (SetjmpF) { // Register saveSetjmp function FunctionType *SetjmpFTy = SetjmpF->getFunctionType(); SmallVector Params = {SetjmpFTy->getParamType(0), IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}; FunctionType *FTy = FunctionType::get(Type::getInt32PtrTy(C), Params, false); SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, SaveSetjmpFName, &M); // Register testSetjmp function Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}; FTy = FunctionType::get(IRB.getInt32Ty(), Params, false); TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, TestSetjmpFName, &M); FTy = FunctionType::get(IRB.getVoidTy(), {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, EmLongjmpFName, &M); // Only traverse functions that uses setjmp in order not to insert // unnecessary prep / cleanup code in every function SmallPtrSet SetjmpUsers; for (User *U : SetjmpF->users()) { auto *UI = cast(U); SetjmpUsers.insert(UI->getFunction()); } for (Function *F : SetjmpUsers) runSjLjOnFunction(*F); } } if (!Changed) { // Delete unused global variables and functions if (ResumeF) ResumeF->eraseFromParent(); if (EHTypeIDF) EHTypeIDF->eraseFromParent(); if (EmLongjmpF) EmLongjmpF->eraseFromParent(); if (SaveSetjmpF) SaveSetjmpF->eraseFromParent(); if (TestSetjmpF) TestSetjmpF->eraseFromParent(); return false; } return true; } bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { Module &M = *F.getParent(); LLVMContext &C = F.getContext(); IRBuilder<> IRB(C); bool Changed = false; SmallVector ToErase; SmallPtrSet LandingPads; bool AllowExceptions = areAllExceptionsAllowed() || EHWhitelistSet.count(F.getName()); for (BasicBlock &BB : F) { auto *II = dyn_cast(BB.getTerminator()); if (!II) continue; Changed = true; LandingPads.insert(II->getLandingPadInst()); IRB.SetInsertPoint(II); bool NeedInvoke = AllowExceptions && canThrow(II->getCalledValue()); if (NeedInvoke) { // Wrap invoke with invoke wrapper and generate preamble/postamble Value *Threw = wrapInvoke(II); ToErase.push_back(II); // Insert a branch based on __THREW__ variable Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(1), "cmp"); IRB.CreateCondBr(Cmp, II->getUnwindDest(), II->getNormalDest()); } else { // This can't throw, and we don't need this invoke, just replace it with a // call+branch SmallVector Args(II->arg_begin(), II->arg_end()); CallInst *NewCall = IRB.CreateCall(II->getFunctionType(), II->getCalledValue(), Args); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setDebugLoc(II->getDebugLoc()); NewCall->setAttributes(II->getAttributes()); II->replaceAllUsesWith(NewCall); ToErase.push_back(II); IRB.CreateBr(II->getNormalDest()); // Remove any PHI node entries from the exception destination II->getUnwindDest()->removePredecessor(&BB); } } // Process resume instructions for (BasicBlock &BB : F) { // Scan the body of the basic block for resumes for (Instruction &I : BB) { auto *RI = dyn_cast(&I); if (!RI) continue; // Split the input into legal values Value *Input = RI->getValue(); IRB.SetInsertPoint(RI); Value *Low = IRB.CreateExtractValue(Input, 0, "low"); // Create a call to __resumeException function IRB.CreateCall(ResumeF, {Low}); // Add a terminator to the block IRB.CreateUnreachable(); ToErase.push_back(RI); } } // Process llvm.eh.typeid.for intrinsics for (BasicBlock &BB : F) { for (Instruction &I : BB) { auto *CI = dyn_cast(&I); if (!CI) continue; const Function *Callee = CI->getCalledFunction(); if (!Callee) continue; if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for) continue; IRB.SetInsertPoint(CI); CallInst *NewCI = IRB.CreateCall(EHTypeIDF, CI->getArgOperand(0), "typeid"); CI->replaceAllUsesWith(NewCI); ToErase.push_back(CI); } } // Look for orphan landingpads, can occur in blocks with no predecessors for (BasicBlock &BB : F) { Instruction *I = BB.getFirstNonPHI(); if (auto *LPI = dyn_cast(I)) LandingPads.insert(LPI); } // Handle all the landingpad for this function together, as multiple invokes // may share a single lp for (LandingPadInst *LPI : LandingPads) { IRB.SetInsertPoint(LPI); SmallVector FMCArgs; for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) { Constant *Clause = LPI->getClause(I); // As a temporary workaround for the lack of aggregate varargs support // in the interface between JS and wasm, break out filter operands into // their component elements. if (LPI->isFilter(I)) { auto *ATy = cast(Clause->getType()); for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) { Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter"); FMCArgs.push_back(EV); } } else FMCArgs.push_back(Clause); } // Create a call to __cxa_find_matching_catch_N function Function *FMCF = getFindMatchingCatch(M, FMCArgs.size()); CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc"); Value *Undef = UndefValue::get(LPI->getType()); Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0"); Value *TempRet0 = IRB.CreateCall(GetTempRet0Func, None, "tempret0"); Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1"); LPI->replaceAllUsesWith(Pair1); ToErase.push_back(LPI); } // Erase everything we no longer need in this function for (Instruction *I : ToErase) I->eraseFromParent(); return Changed; } bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { Module &M = *F.getParent(); LLVMContext &C = F.getContext(); IRBuilder<> IRB(C); SmallVector ToErase; // Vector of %setjmpTable values std::vector SetjmpTableInsts; // Vector of %setjmpTableSize values std::vector SetjmpTableSizeInsts; // Setjmp preparation // This instruction effectively means %setjmpTableSize = 4. // We create this as an instruction intentionally, and we don't want to fold // this instruction to a constant 4, because this value will be used in // SSAUpdater.AddAvailableValue(...) later. BasicBlock &EntryBB = F.getEntryBlock(); BinaryOperator *SetjmpTableSize = BinaryOperator::Create( Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize", &*EntryBB.getFirstInsertionPt()); // setjmpTable = (int *) malloc(40); Instruction *SetjmpTable = CallInst::CreateMalloc( SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40), nullptr, nullptr, "setjmpTable"); // setjmpTable[0] = 0; IRB.SetInsertPoint(SetjmpTableSize); IRB.CreateStore(IRB.getInt32(0), SetjmpTable); SetjmpTableInsts.push_back(SetjmpTable); SetjmpTableSizeInsts.push_back(SetjmpTableSize); // Setjmp transformation std::vector SetjmpRetPHIs; Function *SetjmpF = M.getFunction("setjmp"); for (User *U : SetjmpF->users()) { auto *CI = dyn_cast(U); if (!CI) report_fatal_error("Does not support indirect calls to setjmp"); BasicBlock *BB = CI->getParent(); if (BB->getParent() != &F) // in other function continue; // The tail is everything right after the call, and will be reached once // when setjmp is called, and later when longjmp returns to the setjmp BasicBlock *Tail = SplitBlock(BB, CI->getNextNode()); // Add a phi to the tail, which will be the output of setjmp, which // indicates if this is the first call or a longjmp back. The phi directly // uses the right value based on where we arrive from IRB.SetInsertPoint(Tail->getFirstNonPHI()); PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret"); // setjmp initial call returns 0 SetjmpRet->addIncoming(IRB.getInt32(0), BB); // The proper output is now this, not the setjmp call itself CI->replaceAllUsesWith(SetjmpRet); // longjmp returns to the setjmp will add themselves to this phi SetjmpRetPHIs.push_back(SetjmpRet); // Fix call target // Our index in the function is our place in the array + 1 to avoid index // 0, because index 0 means the longjmp is not ours to handle. IRB.SetInsertPoint(CI); Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()), SetjmpTable, SetjmpTableSize}; Instruction *NewSetjmpTable = IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable"); Instruction *NewSetjmpTableSize = IRB.CreateCall(GetTempRet0Func, None, "setjmpTableSize"); SetjmpTableInsts.push_back(NewSetjmpTable); SetjmpTableSizeInsts.push_back(NewSetjmpTableSize); ToErase.push_back(CI); } // Update each call that can longjmp so it can return to a setjmp where // relevant. // Because we are creating new BBs while processing and don't want to make // all these newly created BBs candidates again for longjmp processing, we // first make the vector of candidate BBs. std::vector BBs; for (BasicBlock &BB : F) BBs.push_back(&BB); // BBs.size() will change within the loop, so we query it every time for (unsigned I = 0; I < BBs.size(); I++) { BasicBlock *BB = BBs[I]; for (Instruction &I : *BB) { assert(!isa(&I)); auto *CI = dyn_cast(&I); if (!CI) continue; const Value *Callee = CI->getCalledValue(); if (!canLongjmp(M, Callee)) continue; Value *Threw = nullptr; BasicBlock *Tail; if (Callee->getName().startswith(InvokePrefix)) { // If invoke wrapper has already been generated for this call in // previous EH phase, search for the load instruction // %__THREW__.val = __THREW__; // in postamble after the invoke wrapper call LoadInst *ThrewLI = nullptr; StoreInst *ThrewResetSI = nullptr; for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end(); I != IE; ++I) { if (auto *LI = dyn_cast(I)) if (auto *GV = dyn_cast(LI->getPointerOperand())) if (GV == ThrewGV) { Threw = ThrewLI = LI; break; } } // Search for the store instruction after the load above // __THREW__ = 0; for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end(); I != IE; ++I) { if (auto *SI = dyn_cast(I)) if (auto *GV = dyn_cast(SI->getPointerOperand())) if (GV == ThrewGV && SI->getValueOperand() == IRB.getInt32(0)) { ThrewResetSI = SI; break; } } assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke"); assert(ThrewResetSI && "Cannot find __THREW__ store after invoke"); Tail = SplitBlock(BB, ThrewResetSI->getNextNode()); } else { // Wrap call with invoke wrapper and generate preamble/postamble Threw = wrapInvoke(CI); ToErase.push_back(CI); Tail = SplitBlock(BB, CI->getNextNode()); } // We need to replace the terminator in Tail - SplitBlock makes BB go // straight to Tail, we need to check if a longjmp occurred, and go to the // right setjmp-tail if so ToErase.push_back(BB->getTerminator()); // Generate a function call to testSetjmp function and preamble/postamble // code to figure out (1) whether longjmp occurred (2) if longjmp // occurred, which setjmp it corresponds to Value *Label = nullptr; Value *LongjmpResult = nullptr; BasicBlock *EndBB = nullptr; wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label, LongjmpResult, EndBB); assert(Label && LongjmpResult && EndBB); // Create switch instruction IRB.SetInsertPoint(EndBB); SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size()); // -1 means no longjmp happened, continue normally (will hit the default // switch case). 0 means a longjmp that is not ours to handle, needs a // rethrow. Otherwise the index is the same as the index in P+1 (to avoid // 0). for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) { SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent()); SetjmpRetPHIs[I]->addIncoming(LongjmpResult, EndBB); } // We are splitting the block here, and must continue to find other calls // in the block - which is now split. so continue to traverse in the Tail BBs.push_back(Tail); } } // Erase everything we no longer need in this function for (Instruction *I : ToErase) I->eraseFromParent(); // Free setjmpTable buffer before each return instruction for (BasicBlock &BB : F) { Instruction *TI = BB.getTerminator(); if (isa(TI)) CallInst::CreateFree(SetjmpTable, TI); } // Every call to saveSetjmp can change setjmpTable and setjmpTableSize // (when buffer reallocation occurs) // entry: // setjmpTableSize = 4; // setjmpTable = (int *) malloc(40); // setjmpTable[0] = 0; // ... // somebb: // setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize); // setjmpTableSize = getTempRet0(); // So we need to make sure the SSA for these variables is valid so that every // saveSetjmp and testSetjmp calls have the correct arguments. SSAUpdater SetjmpTableSSA; SSAUpdater SetjmpTableSizeSSA; SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable"); SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize"); for (Instruction *I : SetjmpTableInsts) SetjmpTableSSA.AddAvailableValue(I->getParent(), I); for (Instruction *I : SetjmpTableSizeInsts) SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I); for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end(); UI != UE;) { // Grab the use before incrementing the iterator. Use &U = *UI; // Increment the iterator before removing the use from the list. ++UI; if (auto *I = dyn_cast(U.getUser())) if (I->getParent() != &EntryBB) SetjmpTableSSA.RewriteUse(U); } for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end(); UI != UE;) { Use &U = *UI; ++UI; if (auto *I = dyn_cast(U.getUser())) if (I->getParent() != &EntryBB) SetjmpTableSizeSSA.RewriteUse(U); } // Finally, our modifications to the cfg can break dominance of SSA variables. // For example, in this code, // if (x()) { .. setjmp() .. } // if (y()) { .. longjmp() .. } // We must split the longjmp block, and it can jump into the block splitted // from setjmp one. But that means that when we split the setjmp block, it's // first part no longer dominates its second part - there is a theoretically // possible control flow path where x() is false, then y() is true and we // reach the second part of the setjmp block, without ever reaching the first // part. So, we rebuild SSA form here. rebuildSSA(F); return true; } diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-exceptions-whitelist.ll b/llvm/test/CodeGen/WebAssembly/lower-em-exceptions-whitelist.ll index 5fcc399..ce33a9f 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-exceptions-whitelist.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-exceptions-whitelist.ll @@ -1,65 +1,65 @@ ; RUN: opt < %s -wasm-lower-em-ehsjlj -emscripten-cxx-exceptions-whitelist=do_catch -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" define void @dont_catch() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @dont_catch( entry: invoke void @foo() to label %invoke.cont unwind label %lpad ; CHECK: entry: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label %invoke.cont invoke.cont: ; preds = %entry br label %try.cont lpad: ; preds = %entry %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 br label %catch catch: ; preds = %lpad %3 = call i8* @__cxa_begin_catch(i8* %1) call void @__cxa_end_catch() br label %try.cont try.cont: ; preds = %catch, %invoke.cont ret void } define void @do_catch() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @do_catch( entry: invoke void @foo() to label %invoke.cont unwind label %lpad ; CHECK: entry: ; CHECK-NEXT: store i32 0, i32* -; CHECK-NEXT: call void @__invoke_void(void ()* @foo) +; CHECK-NEXT: call cc{{.*}} void @__invoke_void(void ()* @foo) invoke.cont: ; preds = %entry br label %try.cont lpad: ; preds = %entry %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 br label %catch catch: ; preds = %lpad %3 = call i8* @__cxa_begin_catch(i8* %1) call void @__cxa_end_catch() br label %try.cont try.cont: ; preds = %catch, %invoke.cont ret void } declare void @foo() declare i32 @__gxx_personality_v0(...) declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll b/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll index e54cb2f..1f98310 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll @@ -1,174 +1,174 @@ ; RUN: opt < %s -wasm-lower-em-ehsjlj -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" @_ZTIi = external constant i8* @_ZTIc = external constant i8* ; CHECK-DAG: __THREW__ = external global i32 ; CHECK-DAG: __threwValue = external global i32 ; Test invoke instruction with clauses (try-catch block) define void @clause() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @clause( entry: invoke void @foo(i32 3) to label %invoke.cont unwind label %lpad ; CHECK: entry: ; CHECK-NEXT: store i32 0, i32* @__THREW__ -; CHECK-NEXT: call void @__invoke_void_i32(void (i32)* @foo, i32 3) +; CHECK-NEXT: call cc{{.*}} void @__invoke_void_i32(void (i32)* @foo, i32 3) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load i32, i32* @__THREW__ ; CHECK-NEXT: store i32 0, i32* @__THREW__ ; CHECK-NEXT: %cmp = icmp eq i32 %[[__THREW__VAL]], 1 ; CHECK-NEXT: br i1 %cmp, label %lpad, label %invoke.cont invoke.cont: ; preds = %entry br label %try.cont lpad: ; preds = %entry %0 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 br label %catch.dispatch ; CHECK: lpad: ; CHECK-NEXT: %[[FMC:.*]] = call i8* @__cxa_find_matching_catch_4(i8* bitcast (i8** @_ZTIi to i8*), i8* null) ; CHECK-NEXT: %[[IVI1:.*]] = insertvalue { i8*, i32 } undef, i8* %[[FMC]], 0 ; CHECK-NEXT: %[[TEMPRET0_VAL:.*]] = call i32 @getTempRet0() ; CHECK-NEXT: %[[IVI2:.*]] = insertvalue { i8*, i32 } %[[IVI1]], i32 %[[TEMPRET0_VAL]], 1 ; CHECK-NEXT: extractvalue { i8*, i32 } %[[IVI2]], 0 ; CHECK-NEXT: %[[CDR:.*]] = extractvalue { i8*, i32 } %[[IVI2]], 1 catch.dispatch: ; preds = %lpad %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) %matches = icmp eq i32 %2, %3 br i1 %matches, label %catch1, label %catch ; CHECK: catch.dispatch: ; CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm_eh_typeid_for(i8* bitcast (i8** @_ZTIi to i8*)) ; CHECK-NEXT: %matches = icmp eq i32 %[[CDR]], %[[TYPEID]] catch1: ; preds = %catch.dispatch %4 = call i8* @__cxa_begin_catch(i8* %1) %5 = bitcast i8* %4 to i32* %6 = load i32, i32* %5, align 4 call void @__cxa_end_catch() br label %try.cont try.cont: ; preds = %catch, %catch1, %invoke.cont ret void catch: ; preds = %catch.dispatch %7 = call i8* @__cxa_begin_catch(i8* %1) call void @__cxa_end_catch() br label %try.cont } ; Test invoke instruction with filters (functions with throw(...) declaration) define void @filter() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @filter( entry: invoke void @foo(i32 3) to label %invoke.cont unwind label %lpad ; CHECK: entry: ; CHECK-NEXT: store i32 0, i32* @__THREW__ -; CHECK-NEXT: call void @__invoke_void_i32(void (i32)* @foo, i32 3) +; CHECK-NEXT: call cc{{.*}} void @__invoke_void_i32(void (i32)* @foo, i32 3) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load i32, i32* @__THREW__ ; CHECK-NEXT: store i32 0, i32* @__THREW__ ; CHECK-NEXT: %cmp = icmp eq i32 %[[__THREW__VAL]], 1 ; CHECK-NEXT: br i1 %cmp, label %lpad, label %invoke.cont invoke.cont: ; preds = %entry ret void lpad: ; preds = %entry %0 = landingpad { i8*, i32 } filter [2 x i8*] [i8* bitcast (i8** @_ZTIi to i8*), i8* bitcast (i8** @_ZTIc to i8*)] %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 br label %filter.dispatch ; CHECK: lpad: ; CHECK-NEXT: %[[FMC:.*]] = call i8* @__cxa_find_matching_catch_4(i8* bitcast (i8** @_ZTIi to i8*), i8* bitcast (i8** @_ZTIc to i8*)) ; CHECK-NEXT: %[[IVI1:.*]] = insertvalue { i8*, i32 } undef, i8* %[[FMC]], 0 ; CHECK-NEXT: %[[TEMPRET0_VAL:.*]] = call i32 @getTempRet0() ; CHECK-NEXT: %[[IVI2:.*]] = insertvalue { i8*, i32 } %[[IVI1]], i32 %[[TEMPRET0_VAL]], 1 ; CHECK-NEXT: extractvalue { i8*, i32 } %[[IVI2]], 0 ; CHECK-NEXT: extractvalue { i8*, i32 } %[[IVI2]], 1 filter.dispatch: ; preds = %lpad %ehspec.fails = icmp slt i32 %2, 0 br i1 %ehspec.fails, label %ehspec.unexpected, label %eh.resume ehspec.unexpected: ; preds = %filter.dispatch call void @__cxa_call_unexpected(i8* %1) #4 unreachable eh.resume: ; preds = %filter.dispatch %lpad.val = insertvalue { i8*, i32 } undef, i8* %1, 0 %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %2, 1 resume { i8*, i32 } %lpad.val3 ; CHECK: eh.resume: ; CHECK-NEXT: insertvalue ; CHECK-NEXT: %[[LPAD_VAL:.*]] = insertvalue ; CHECK-NEXT: %[[LOW:.*]] = extractvalue { i8*, i32 } %[[LPAD_VAL]], 0 ; CHECK-NEXT: call void @__resumeException(i8* %[[LOW]]) ; CHECK-NEXT: unreachable } ; Test if argument attributes indices in newly created call instructions are correct define void @arg_attributes() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @arg_attributes( entry: %0 = invoke noalias i8* @bar(i8 signext 1, i8 zeroext 2) to label %invoke.cont unwind label %lpad ; CHECK: entry: ; CHECK-NEXT: store i32 0, i32* @__THREW__ -; CHECK-NEXT: %0 = call noalias i8* @"__invoke_i8*_i8_i8"(i8* (i8, i8)* @bar, i8 signext 1, i8 zeroext 2) +; CHECK-NEXT: %0 = call cc{{.*}} noalias i8* @"__invoke_i8*_i8_i8"(i8* (i8, i8)* @bar, i8 signext 1, i8 zeroext 2) invoke.cont: ; preds = %entry br label %try.cont lpad: ; preds = %entry %1 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*) catch i8* null %2 = extractvalue { i8*, i32 } %1, 0 %3 = extractvalue { i8*, i32 } %1, 1 br label %catch.dispatch catch.dispatch: ; preds = %lpad %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) %matches = icmp eq i32 %3, %4 br i1 %matches, label %catch1, label %catch catch1: ; preds = %catch.dispatch %5 = call i8* @__cxa_begin_catch(i8* %2) %6 = bitcast i8* %5 to i32* %7 = load i32, i32* %6, align 4 call void @__cxa_end_catch() br label %try.cont try.cont: ; preds = %catch, %catch1, %invoke.cont ret void catch: ; preds = %catch.dispatch %8 = call i8* @__cxa_begin_catch(i8* %2) call void @__cxa_end_catch() br label %try.cont } declare void @foo(i32) declare i8* @bar(i8, i8) declare i32 @__gxx_personality_v0(...) declare i32 @llvm.eh.typeid.for(i8*) declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() declare void @__cxa_call_unexpected(i8*) ; JS glue functions and invoke wrappers declaration ; CHECK-DAG: declare i32 @getTempRet0() ; CHECK-DAG: declare void @setTempRet0(i32) ; CHECK-DAG: declare void @__resumeException(i8*) ; CHECK-DAG: declare void @__invoke_void_i32(void (i32)*, i32) ; CHECK-DAG: declare i8* @__cxa_find_matching_catch_4(i8*, i8*) diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-sret.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-sret.ll new file mode 100644 index 0000000..d93eed8 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-sret.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -asm-verbose=false -enable-emscripten-sjlj -wasm-keep-registers | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +%struct.__jmp_buf_tag = type { [6 x i32], i32, [32 x i32] } + +declare i32 @setjmp(%struct.__jmp_buf_tag*) #0 +declare {i32, i32} @returns_struct() + +; Test the combination of backend legalization of large return types and the +; Emscripten sjlj transformation +define {i32, i32} @legalized_to_sret() { +entry: + %env = alloca [1 x %struct.__jmp_buf_tag], align 16 + %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %env, i32 0, i32 0 + %call = call i32 @setjmp(%struct.__jmp_buf_tag* %arraydecay) #0 +; This is the function pointer to pass to invoke. +; It needs to be the first argument (that's what we're testing here) +; CHECK: i32.const $push[[FPTR:[0-9]+]]=, returns_struct +; This is the sret stack region (as an offset from the stack pointer local) +; CHECK: call "__invoke_{i32.i32}", $pop[[FPTR]] + %ret = call {i32, i32} @returns_struct() + ret {i32, i32} %ret +} + +attributes #0 = { returns_twice } diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll index 425c4dd..75f6b7d 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll @@ -1,244 +1,244 @@ ; RUN: opt < %s -wasm-lower-em-ehsjlj -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" %struct.__jmp_buf_tag = type { [6 x i32], i32, [32 x i32] } @global_var = hidden global i32 0, align 4 ; CHECK-DAG: __THREW__ = external global i32 ; CHECK-DAG: __threwValue = external global i32 ; Test a simple setjmp - longjmp sequence define hidden void @setjmp_longjmp() { ; CHECK-LABEL: @setjmp_longjmp entry: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 %call = call i32 @setjmp(%struct.__jmp_buf_tag* %arraydecay) #0 %arraydecay1 = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 call void @longjmp(%struct.__jmp_buf_tag* %arraydecay1, i32 1) #1 unreachable ; CHECK: entry: ; CHECK-NEXT: %[[MALLOCCALL:.*]] = tail call i8* @malloc(i32 40) ; CHECK-NEXT: %[[SETJMP_TABLE:.*]] = bitcast i8* %[[MALLOCCALL]] to i32* ; CHECK-NEXT: store i32 0, i32* %[[SETJMP_TABLE]] ; CHECK-NEXT: %[[SETJMP_TABLE_SIZE:.*]] = add i32 4, 0 ; CHECK-NEXT: %[[BUF:.*]] = alloca [1 x %struct.__jmp_buf_tag] ; CHECK-NEXT: %[[ARRAYDECAY:.*]] = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %[[BUF]], i32 0, i32 0 ; CHECK-NEXT: %[[SETJMP_TABLE1:.*]] = call i32* @saveSetjmp(%struct.__jmp_buf_tag* %[[ARRAYDECAY]], i32 1, i32* %[[SETJMP_TABLE]], i32 %[[SETJMP_TABLE_SIZE]]) ; CHECK-NEXT: %[[SETJMP_TABLE_SIZE1:.*]] = call i32 @getTempRet0() ; CHECK-NEXT: br label %entry.split ; CHECK: entry.split: ; CHECK-NEXT: phi i32 [ 0, %entry ], [ %[[LONGJMP_RESULT:.*]], %if.end ] ; CHECK-NEXT: %[[ARRAYDECAY1:.*]] = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %[[BUF]], i32 0, i32 0 ; CHECK-NEXT: store i32 0, i32* @__THREW__ -; CHECK-NEXT: call void @"__invoke_void_%struct.__jmp_buf_tag*_i32"(void (%struct.__jmp_buf_tag*, i32)* @emscripten_longjmp_jmpbuf, %struct.__jmp_buf_tag* %[[ARRAYDECAY1]], i32 1) +; CHECK-NEXT: call cc{{.*}} void @"__invoke_void_%struct.__jmp_buf_tag*_i32"(void (%struct.__jmp_buf_tag*, i32)* @emscripten_longjmp_jmpbuf, %struct.__jmp_buf_tag* %[[ARRAYDECAY1]], i32 1) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load i32, i32* @__THREW__ ; CHECK-NEXT: store i32 0, i32* @__THREW__ ; CHECK-NEXT: %[[CMP0:.*]] = icmp ne i32 %__THREW__.val, 0 ; CHECK-NEXT: %[[THREWVALUE_VAL:.*]] = load i32, i32* @__threwValue ; CHECK-NEXT: %[[CMP1:.*]] = icmp ne i32 %[[THREWVALUE_VAL]], 0 ; CHECK-NEXT: %[[CMP:.*]] = and i1 %[[CMP0]], %[[CMP1]] ; CHECK-NEXT: br i1 %[[CMP]], label %if.then1, label %if.else1 ; CHECK: entry.split.split: ; CHECK-NEXT: unreachable ; CHECK: if.then1: ; CHECK-NEXT: %[[__THREW__VAL_I32P:.*]] = inttoptr i32 %[[__THREW__VAL]] to i32* ; CHECK-NEXT: %[[__THREW__VAL_I32P_LOADED:.*]] = load i32, i32* %[[__THREW__VAL_I32P]] ; CHECK-NEXT: %[[LABEL:.*]] = call i32 @testSetjmp(i32 %[[__THREW__VAL_I32P_LOADED]], i32* %[[SETJMP_TABLE1]], i32 %[[SETJMP_TABLE_SIZE1]]) ; CHECK-NEXT: %[[CMP:.*]] = icmp eq i32 %[[LABEL]], 0 ; CHECK-NEXT: br i1 %[[CMP]], label %if.then2, label %if.end2 ; CHECK: if.else1: ; CHECK-NEXT: br label %if.end ; CHECK: if.end: ; CHECK-NEXT: %[[LABEL_PHI:.*]] = phi i32 [ %[[LABEL:.*]], %if.end2 ], [ -1, %if.else1 ] ; CHECK-NEXT: %[[LONGJMP_RESULT]] = call i32 @getTempRet0() ; CHECK-NEXT: switch i32 %[[LABEL_PHI]], label %entry.split.split [ ; CHECK-NEXT: i32 1, label %entry.split ; CHECK-NEXT: ] ; CHECK: if.then2: ; CHECK-NEXT: call void @emscripten_longjmp(i32 %[[__THREW__VAL]], i32 %[[THREWVALUE_VAL]]) ; CHECK-NEXT: unreachable ; CHECK: if.end2: ; CHECK-NEXT: call void @setTempRet0(i32 %[[THREWVALUE_VAL]]) ; CHECK-NEXT: br label %if.end } ; Test a case of a function call (which is not longjmp) after a setjmp define hidden void @setjmp_other() { ; CHECK-LABEL: @setjmp_other entry: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 %call = call i32 @setjmp(%struct.__jmp_buf_tag* %arraydecay) #0 call void @foo() ret void ; CHECK: entry: ; CHECK: %[[SETJMP_TABLE:.*]] = call i32* @saveSetjmp( ; CHECK: entry.split: -; CHECK: call void @__invoke_void(void ()* @foo) +; CHECK: @__invoke_void(void ()* @foo) ; CHECK: entry.split.split: ; CHECK-NEXT: %[[BUF:.*]] = bitcast i32* %[[SETJMP_TABLE]] to i8* ; CHECK-NEXT: tail call void @free(i8* %[[BUF]]) ; CHECK-NEXT: ret void } ; Test a case when a function call is within try-catch, after a setjmp define hidden void @exception_and_longjmp() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @exception_and_longjmp entry: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 %call = call i32 @setjmp(%struct.__jmp_buf_tag* %arraydecay) #0 invoke void @foo() to label %try.cont unwind label %lpad ; CHECK: entry.split: ; CHECK: store i32 0, i32* @__THREW__ -; CHECK-NEXT: call void @__invoke_void(void ()* @foo) +; CHECK-NEXT: call cc{{.*}} void @__invoke_void(void ()* @foo) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load i32, i32* @__THREW__ ; CHECK-NEXT: store i32 0, i32* @__THREW__ ; CHECK-NEXT: %[[CMP0:.*]] = icmp ne i32 %[[__THREW__VAL]], 0 ; CHECK-NEXT: %[[THREWVALUE_VAL:.*]] = load i32, i32* @__threwValue ; CHECK-NEXT: %[[CMP1:.*]] = icmp ne i32 %[[THREWVALUE_VAL]], 0 ; CHECK-NEXT: %[[CMP:.*]] = and i1 %[[CMP0]], %[[CMP1]] ; CHECK-NEXT: br i1 %[[CMP]], label %if.then1, label %if.else1 ; CHECK: entry.split.split: ; CHECK-NEXT: %[[CMP:.*]] = icmp eq i32 %[[__THREW__VAL]], 1 ; CHECK-NEXT: br i1 %[[CMP]], label %lpad, label %try.cont lpad: ; preds = %entry %0 = landingpad { i8*, i32 } catch i8* null %1 = extractvalue { i8*, i32 } %0, 0 %2 = extractvalue { i8*, i32 } %0, 1 %3 = call i8* @__cxa_begin_catch(i8* %1) #2 call void @__cxa_end_catch() br label %try.cont try.cont: ; preds = %entry, %lpad ret void } ; Test SSA validity define hidden void @ssa(i32 %n) { ; CHECK-LABEL: @ssa entry: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 %cmp = icmp sgt i32 %n, 5 br i1 %cmp, label %if.then, label %if.end ; CHECK: entry: ; CHECK: %[[SETJMP_TABLE0:.*]] = bitcast i8* ; CHECK: %[[SETJMP_TABLE_SIZE0:.*]] = add i32 4, 0 if.then: ; preds = %entry %0 = load i32, i32* @global_var, align 4 %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 %call = call i32 @setjmp(%struct.__jmp_buf_tag* %arraydecay) #0 store i32 %0, i32* @global_var, align 4 br label %if.end ; CHECK: if.then: ; CHECK: %[[VAR0:.*]] = load i32, i32* @global_var, align 4 ; CHECK: %[[SETJMP_TABLE1:.*]] = call i32* @saveSetjmp( ; CHECK-NEXT: %[[SETJMP_TABLE_SIZE1:.*]] = call i32 @getTempRet0() ; CHECK: if.then.split: ; CHECK: %[[VAR1:.*]] = phi i32 [ %[[VAR0]], %if.then ], [ %[[VAR2:.*]], %if.end3 ] ; CHECK: %[[SETJMP_TABLE_SIZE2:.*]] = phi i32 [ %[[SETJMP_TABLE_SIZE1]], %if.then ], [ %[[SETJMP_TABLE_SIZE3:.*]], %if.end3 ] ; CHECK: %[[SETJMP_TABLE2:.*]] = phi i32* [ %[[SETJMP_TABLE1]], %if.then ], [ %[[SETJMP_TABLE3:.*]], %if.end3 ] ; CHECK: store i32 %[[VAR1]], i32* @global_var, align 4 if.end: ; preds = %if.then, %entry %arraydecay1 = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 call void @longjmp(%struct.__jmp_buf_tag* %arraydecay1, i32 5) #1 unreachable ; CHECK: if.end: ; CHECK: %[[VAR2]] = phi i32 [ %[[VAR1]], %if.then.split ], [ undef, %entry ] ; CHECK: %[[SETJMP_TABLE_SIZE3]] = phi i32 [ %[[SETJMP_TABLE_SIZE2]], %if.then.split ], [ %[[SETJMP_TABLE_SIZE0]], %entry ] ; CHECK: %[[SETJMP_TABLE3]] = phi i32* [ %[[SETJMP_TABLE2]], %if.then.split ], [ %[[SETJMP_TABLE0]], %entry ] } ; Test a case when a function only calls other functions that are neither setjmp nor longjmp define hidden void @only_other_func() { entry: call void @foo() ret void ; CHECK: call void @foo() } ; Test a case when a function only calls longjmp and not setjmp define hidden void @only_longjmp() { entry: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 call void @longjmp(%struct.__jmp_buf_tag* %arraydecay, i32 5) #1 unreachable ; CHECK: %[[ARRAYDECAY:.*]] = getelementptr inbounds ; CHECK-NEXT: call void @emscripten_longjmp_jmpbuf(%struct.__jmp_buf_tag* %[[ARRAYDECAY]], i32 5) } ; Test inline asm handling define hidden void @inline_asm() { ; CHECK-LABEL: @inline_asm entry: %env = alloca [1 x %struct.__jmp_buf_tag], align 16 %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %env, i32 0, i32 0 %call = call i32 @setjmp(%struct.__jmp_buf_tag* %arraydecay) #4 ; Inline assembly should not generate __invoke wrappers. ; Doing so would fail as inline assembly cannot be passed as a function pointer. ; CHECK: call void asm sideeffect "", ""() ; CHECK-NOT: __invoke_void call void asm sideeffect "", ""() ret void } ; Test that the allocsize attribute is being transformed properly declare i8 *@allocator(i32, %struct.__jmp_buf_tag*) #3 define hidden i8 *@allocsize() { ; CHECK-LABEL: @allocsize entry: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 %arraydecay = getelementptr inbounds [1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* %buf, i32 0, i32 0 %call = call i32 @setjmp(%struct.__jmp_buf_tag* %arraydecay) #0 -; CHECK: i8* @"__invoke_i8*_i32_%struct.__jmp_buf_tag*"([[ARGS:.*]]) #[[ALLOCSIZE_ATTR:[0-9]+]] +; CHECK: call cc{{.*}} i8* @"__invoke_i8*_i32_%struct.__jmp_buf_tag*"([[ARGS:.*]]) #[[ALLOCSIZE_ATTR:[0-9]+]] %alloc = call i8* @allocator(i32 20, %struct.__jmp_buf_tag* %arraydecay) #3 ret i8 *%alloc } declare void @foo() ; Function Attrs: returns_twice declare i32 @setjmp(%struct.__jmp_buf_tag*) #0 ; Function Attrs: noreturn declare void @longjmp(%struct.__jmp_buf_tag*, i32) #1 declare i32 @__gxx_personality_v0(...) declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() declare i8* @malloc(i32) declare void @free(i8*) ; JS glue functions and invoke wrappers declaration ; CHECK-DAG: declare i32 @getTempRet0() ; CHECK-DAG: declare void @setTempRet0(i32) ; CHECK-DAG: declare i32* @saveSetjmp(%struct.__jmp_buf_tag*, i32, i32*, i32) ; CHECK-DAG: declare i32 @testSetjmp(i32, i32*, i32) ; CHECK-DAG: declare void @emscripten_longjmp_jmpbuf(%struct.__jmp_buf_tag*, i32) ; CHECK-DAG: declare void @emscripten_longjmp(i32, i32) ; CHECK-DAG: declare void @__invoke_void(void ()*) ; CHECK-DAG: declare void @"__invoke_void_%struct.__jmp_buf_tag*_i32"(void (%struct.__jmp_buf_tag*, i32)*, %struct.__jmp_buf_tag*, i32) attributes #0 = { returns_twice } attributes #1 = { noreturn } attributes #2 = { nounwind } attributes #3 = { allocsize(0) } ; CHECK: attributes #[[ALLOCSIZE_ATTR]] = { allocsize(1) }