diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -22,688 +22,809 @@ namespace X86ISD { // X86 Specific DAG Nodes - enum NodeType : unsigned { - // Start the numbering where the builtin ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - /// Bit scan forward. - BSF, - /// Bit scan reverse. - BSR, - - /// X86 funnel/double shift i16 instructions. These correspond to - /// X86::SHLDW and X86::SHRDW instructions which have different amt - /// modulo rules to generic funnel shifts. - /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. - FSHL, - FSHR, - - /// Bitwise logical AND of floating point values. This corresponds - /// to X86::ANDPS or X86::ANDPD. - FAND, - - /// Bitwise logical OR of floating point values. This corresponds - /// to X86::ORPS or X86::ORPD. - FOR, - - /// Bitwise logical XOR of floating point values. This corresponds - /// to X86::XORPS or X86::XORPD. - FXOR, - - /// Bitwise logical ANDNOT of floating point values. This - /// corresponds to X86::ANDNPS or X86::ANDNPD. - FANDN, - - /// These operations represent an abstract X86 call - /// instruction, which includes a bunch of information. In particular the - /// operands of these node are: - /// - /// #0 - The incoming token chain - /// #1 - The callee - /// #2 - The number of arg bytes the caller pushes on the stack. - /// #3 - The number of arg bytes the callee pops off the stack. - /// #4 - The value to pass in AL/AX/EAX (optional) - /// #5 - The value to pass in DL/DX/EDX (optional) - /// - /// The result values of these nodes are: - /// - /// #0 - The outgoing token chain - /// #1 - The first register result value (optional) - /// #2 - The second register result value (optional) - /// - CALL, - - /// Same as call except it adds the NoTrack prefix. - NT_CALL, - - /// X86 compare and logical compare instructions. - CMP, FCMP, COMI, UCOMI, - - /// X86 bit-test instructions. - BT, - - /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS - /// operand, usually produced by a CMP instruction. - SETCC, - - /// X86 Select - SELECTS, - - // Same as SETCC except it's materialized with a sbb and the value is all - // one's or all zero's. - SETCC_CARRY, // R = carry_bit ? ~0 : 0 - - /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. - /// Operands are two FP values to compare; result is a mask of - /// 0s or 1s. Generally DTRT for C/C++ with NaNs. - FSETCC, - - /// X86 FP SETCC, similar to above, but with output as an i1 mask and - /// and a version with SAE. - FSETCCM, FSETCCM_SAE, - - /// X86 conditional moves. Operand 0 and operand 1 are the two values - /// to select from. Operand 2 is the condition code, and operand 3 is the - /// flag operand produced by a CMP or TEST instruction. - CMOV, - - /// X86 conditional branches. Operand 0 is the chain operand, operand 1 - /// is the block to branch if condition is true, operand 2 is the - /// condition code, and operand 3 is the flag operand produced by a CMP - /// or TEST instruction. - BRCOND, - - /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and - /// operand 1 is the target address. - NT_BRIND, - - /// Return with a flag operand. Operand 0 is the chain operand, operand - /// 1 is the number of bytes of stack to pop. - RET_FLAG, - - /// Return from interrupt. Operand 0 is the number of bytes to pop. - IRET, - - /// Repeat fill, corresponds to X86::REP_STOSx. - REP_STOS, - - /// Repeat move, corresponds to X86::REP_MOVSx. - REP_MOVS, - - /// On Darwin, this node represents the result of the popl - /// at function entry, used for PIC code. - GlobalBaseReg, - - /// A wrapper node for TargetConstantPool, TargetJumpTable, - /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, - /// MCSymbol and TargetBlockAddress. - Wrapper, - - /// Special wrapper used under X86-64 PIC mode for RIP - /// relative displacements. - WrapperRIP, - - /// Copies a 64-bit value from an MMX vector to the low word - /// of an XMM vector, with the high word zero filled. - MOVQ2DQ, - - /// Copies a 64-bit value from the low word of an XMM vector - /// to an MMX vector. - MOVDQ2Q, - - /// Copies a 32-bit value from the low word of a MMX - /// vector to a GPR. - MMX_MOVD2W, - - /// Copies a GPR into the low 32-bit word of a MMX vector - /// and zero out the high word. - MMX_MOVW2D, - - /// Extract an 8-bit value from a vector and zero extend it to - /// i32, corresponds to X86::PEXTRB. - PEXTRB, - - /// Extract a 16-bit value from a vector and zero extend it to - /// i32, corresponds to X86::PEXTRW. - PEXTRW, - - /// Insert any element of a 4 x float vector into any element - /// of a destination 4 x floatvector. - INSERTPS, - - /// Insert the lower 8-bits of a 32-bit value to a vector, - /// corresponds to X86::PINSRB. - PINSRB, - - /// Insert the lower 16-bits of a 32-bit value to a vector, - /// corresponds to X86::PINSRW. - PINSRW, - - /// Shuffle 16 8-bit values within a vector. - PSHUFB, - - /// Compute Sum of Absolute Differences. - PSADBW, - /// Compute Double Block Packed Sum-Absolute-Differences - DBPSADBW, - - /// Bitwise Logical AND NOT of Packed FP values. - ANDNP, - - /// Blend where the selector is an immediate. - BLENDI, - - /// Dynamic (non-constant condition) vector blend where only the sign bits - /// of the condition elements are used. This is used to enforce that the - /// condition mask is not valid for generic VSELECT optimizations. This - /// is also used to implement the intrinsics. - /// Operands are in VSELECT order: MASK, TRUE, FALSE - BLENDV, - - /// Combined add and sub on an FP vector. - ADDSUB, - - // FP vector ops with rounding mode. - FADD_RND, FADDS, FADDS_RND, - FSUB_RND, FSUBS, FSUBS_RND, - FMUL_RND, FMULS, FMULS_RND, - FDIV_RND, FDIVS, FDIVS_RND, - FMAX_SAE, FMAXS_SAE, - FMIN_SAE, FMINS_SAE, - FSQRT_RND, FSQRTS, FSQRTS_RND, - - // FP vector get exponent. - FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE, - // Extract Normalized Mantissas. - VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE, - // FP Scale. - SCALEF, SCALEF_RND, - SCALEFS, SCALEFS_RND, - - // Unsigned Integer average. - AVG, - - /// Integer horizontal add/sub. - HADD, - HSUB, - - /// Floating point horizontal add/sub. - FHADD, - FHSUB, - - // Detect Conflicts Within a Vector - CONFLICT, - - /// Floating point max and min. - FMAX, FMIN, - - /// Commutative FMIN and FMAX. - FMAXC, FMINC, - - /// Scalar intrinsic floating point max and min. - FMAXS, FMINS, - - /// Floating point reciprocal-sqrt and reciprocal approximation. - /// Note that these typically require refinement - /// in order to obtain suitable precision. - FRSQRT, FRCP, - - // AVX-512 reciprocal approximations with a little more precision. - RSQRT14, RSQRT14S, RCP14, RCP14S, - - // Thread Local Storage. - TLSADDR, - - // Thread Local Storage. A call to get the start address - // of the TLS block for the current module. - TLSBASEADDR, - - // Thread Local Storage. When calling to an OS provided - // thunk at the address from an earlier relocation. - TLSCALL, - - // Exception Handling helpers. - EH_RETURN, - - // SjLj exception handling setjmp. - EH_SJLJ_SETJMP, + enum NodeType : unsigned { + // Start the numbering where the builtin ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + /// Bit scan forward. + BSF, + /// Bit scan reverse. + BSR, + + /// X86 funnel/double shift i16 instructions. These correspond to + /// X86::SHLDW and X86::SHRDW instructions which have different amt + /// modulo rules to generic funnel shifts. + /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. + FSHL, + FSHR, + + /// Bitwise logical AND of floating point values. This corresponds + /// to X86::ANDPS or X86::ANDPD. + FAND, + + /// Bitwise logical OR of floating point values. This corresponds + /// to X86::ORPS or X86::ORPD. + FOR, + + /// Bitwise logical XOR of floating point values. This corresponds + /// to X86::XORPS or X86::XORPD. + FXOR, + + /// Bitwise logical ANDNOT of floating point values. This + /// corresponds to X86::ANDNPS or X86::ANDNPD. + FANDN, + + /// These operations represent an abstract X86 call + /// instruction, which includes a bunch of information. In particular the + /// operands of these node are: + /// + /// #0 - The incoming token chain + /// #1 - The callee + /// #2 - The number of arg bytes the caller pushes on the stack. + /// #3 - The number of arg bytes the callee pops off the stack. + /// #4 - The value to pass in AL/AX/EAX (optional) + /// #5 - The value to pass in DL/DX/EDX (optional) + /// + /// The result values of these nodes are: + /// + /// #0 - The outgoing token chain + /// #1 - The first register result value (optional) + /// #2 - The second register result value (optional) + /// + CALL, - // SjLj exception handling longjmp. - EH_SJLJ_LONGJMP, + /// Same as call except it adds the NoTrack prefix. + NT_CALL, - // SjLj exception handling dispatch. - EH_SJLJ_SETUP_DISPATCH, + /// X86 compare and logical compare instructions. + CMP, + FCMP, + COMI, + UCOMI, - /// Tail call return. See X86TargetLowering::LowerCall for - /// the list of operands. - TC_RETURN, + /// X86 bit-test instructions. + BT, - // Vector move to low scalar and zero higher vector elements. - VZEXT_MOVL, + /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS + /// operand, usually produced by a CMP instruction. + SETCC, - // Vector integer truncate. - VTRUNC, - // Vector integer truncate with unsigned/signed saturation. - VTRUNCUS, VTRUNCS, + /// X86 Select + SELECTS, - // Masked version of the above. Used when less than a 128-bit result is - // produced since the mask only applies to the lower elements and can't - // be represented by a select. - // SRC, PASSTHRU, MASK - VMTRUNC, VMTRUNCUS, VMTRUNCS, - - // Vector FP extend. - VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, - - // Vector FP round. - VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, - - // Masked version of above. Used for v2f64->v4f32. - // SRC, PASSTHRU, MASK - VMFPROUND, - - // 128-bit vector logical left / right shift - VSHLDQ, VSRLDQ, - - // Vector shift elements - VSHL, VSRL, VSRA, - - // Vector variable shift - VSHLV, VSRLV, VSRAV, - - // Vector shift elements by immediate - VSHLI, VSRLI, VSRAI, - - // Shifts of mask registers. - KSHIFTL, KSHIFTR, - - // Bit rotate by immediate - VROTLI, VROTRI, - - // Vector packed double/float comparison. - CMPP, - - // Vector integer comparisons. - PCMPEQ, PCMPGT, - - // v8i16 Horizontal minimum and position. - PHMINPOS, - - MULTISHIFT, - - /// Vector comparison generating mask bits for fp and - /// integer signed and unsigned data types. - CMPM, - // Vector comparison with SAE for FP values - CMPM_SAE, - - // Arithmetic operations with FLAGS results. - ADD, SUB, ADC, SBB, SMUL, UMUL, - OR, XOR, AND, - - // Bit field extract. - BEXTR, - - // Zero High Bits Starting with Specified Bit Position. - BZHI, - - // Parallel extract and deposit. - PDEP, PEXT, - - // X86-specific multiply by immediate. - MUL_IMM, - - // Vector sign bit extraction. - MOVMSK, - - // Vector bitwise comparisons. - PTEST, - - // Vector packed fp sign bitwise comparisons. - TESTP, - - // OR/AND test for masks. - KORTEST, - KTEST, - - // ADD for masks. - KADD, - - // Several flavors of instructions with vector shuffle behaviors. - // Saturated signed/unnsigned packing. - PACKSS, - PACKUS, - // Intra-lane alignr. - PALIGNR, - // AVX512 inter-lane alignr. - VALIGN, - PSHUFD, - PSHUFHW, - PSHUFLW, - SHUFP, - // VBMI2 Concat & Shift. - VSHLD, - VSHRD, - VSHLDV, - VSHRDV, - //Shuffle Packed Values at 128-bit granularity. - SHUF128, - MOVDDUP, - MOVSHDUP, - MOVSLDUP, - MOVLHPS, - MOVHLPS, - MOVSD, - MOVSS, - UNPCKL, - UNPCKH, - VPERMILPV, - VPERMILPI, - VPERMI, - VPERM2X128, - - // Variable Permute (VPERM). - // Res = VPERMV MaskV, V0 - VPERMV, - - // 3-op Variable Permute (VPERMT2). - // Res = VPERMV3 V0, MaskV, V1 - VPERMV3, - - // Bitwise ternary logic. - VPTERNLOG, - // Fix Up Special Packed Float32/64 values. - VFIXUPIMM, VFIXUPIMM_SAE, - VFIXUPIMMS, VFIXUPIMMS_SAE, - // Range Restriction Calculation For Packed Pairs of Float32/64 values. - VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE, - // Reduce - Perform Reduction Transformation on scalar\packed FP. - VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE, - // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. - // Also used by the legacy (V)ROUND intrinsics where we mask out the - // scaling part of the immediate. - VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, - // Tests Types Of a FP Values for packed types. - VFPCLASS, - // Tests Types Of a FP Values for scalar types. - VFPCLASSS, - - // Broadcast (splat) scalar or element 0 of a vector. If the operand is - // a vector, this node may change the vector length as part of the splat. - VBROADCAST, - // Broadcast mask to vector. - VBROADCASTM, - // Broadcast subvector to vector. - SUBV_BROADCAST, - - /// SSE4A Extraction and Insertion. - EXTRQI, INSERTQI, - - // XOP arithmetic/logical shifts. - VPSHA, VPSHL, - // XOP signed/unsigned integer comparisons. - VPCOM, VPCOMU, - // XOP packed permute bytes. - VPPERM, - // XOP two source permutation. - VPERMIL2, - - // Vector multiply packed unsigned doubleword integers. - PMULUDQ, - // Vector multiply packed signed doubleword integers. - PMULDQ, - // Vector Multiply Packed UnsignedIntegers with Round and Scale. - MULHRS, - - // Multiply and Add Packed Integers. - VPMADDUBSW, VPMADDWD, - - // AVX512IFMA multiply and add. - // NOTE: These are different than the instruction and perform - // op0 x op1 + op2. - VPMADD52L, VPMADD52H, - - // VNNI - VPDPBUSD, - VPDPBUSDS, - VPDPWSSD, - VPDPWSSDS, - - // FMA nodes. - // We use the target independent ISD::FMA for the non-inverted case. - FNMADD, - FMSUB, - FNMSUB, - FMADDSUB, - FMSUBADD, - - // FMA with rounding mode. - FMADD_RND, - FNMADD_RND, - FMSUB_RND, - FNMSUB_RND, - FMADDSUB_RND, - FMSUBADD_RND, - - // Compress and expand. - COMPRESS, - EXPAND, - - // Bits shuffle - VPSHUFBITQMB, - - // Convert Unsigned/Integer to Floating-Point Value with rounding mode. - SINT_TO_FP_RND, UINT_TO_FP_RND, - SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP, - SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND, - - // Vector float/double to signed/unsigned integer. - CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND, - // Scalar float/double to signed/unsigned integer. - CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND, - - // Vector float/double to signed/unsigned integer with truncation. - CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE, - // Scalar float/double to signed/unsigned integer with truncation. - CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE, - - // Vector signed/unsigned integer to float/double. - CVTSI2P, CVTUI2P, - - // Masked versions of above. Used for v2f64->v4f32. - // SRC, PASSTHRU, MASK - MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI, - MCVTSI2P, MCVTUI2P, - - // Vector float to bfloat16. - // Convert TWO packed single data to one packed BF16 data - CVTNE2PS2BF16, - // Convert packed single data to packed BF16 data - CVTNEPS2BF16, - // Masked version of above. - // SRC, PASSTHRU, MASK - MCVTNEPS2BF16, - - // Dot product of BF16 pairs to accumulated into - // packed single precision. - DPBF16PS, - - // Save xmm argument registers to the stack, according to %al. An operator - // is needed so that this can be expanded with control flow. - VASTART_SAVE_XMM_REGS, - - // Windows's _chkstk call to do stack probing. - WIN_ALLOCA, - - // For allocating variable amounts of stack space when using - // segmented stacks. Check if the current stacklet has enough space, and - // falls back to heap allocation if not. - SEG_ALLOCA, - - // For allocating stack space when using stack clash protector. - // Allocation is performed by block, and each block is probed. - PROBED_ALLOCA, - - // Memory barriers. - MEMBARRIER, - MFENCE, - - // Get a random integer and indicate whether it is valid in CF. - RDRAND, - - // Get a NIST SP800-90B & C compliant random integer and - // indicate whether it is valid in CF. - RDSEED, - - // Protection keys - // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. - // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is - // value for ECX. - RDPKRU, WRPKRU, - - // SSE42 string comparisons. - // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG - // will emit one or two instructions based on which results are used. If - // flags and index/mask this allows us to use a single instruction since - // we won't have to pick and opcode for flags. Instead we can rely on the - // DAG to CSE everything and decide at isel. - PCMPISTR, - PCMPESTR, - - // Test if in transactional execution. - XTEST, - - // ERI instructions. - RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE, - RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE, - - // Conversions between float and half-float. - CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE, - - // Masked version of above. - // SRC, RND, PASSTHRU, MASK - MCVTPS2PH, - - // Galois Field Arithmetic Instructions - GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB, - - // LWP insert record. - LWPINS, - - // User level wait - UMWAIT, TPAUSE, - - // Enqueue Stores Instructions - ENQCMD, ENQCMDS, - - // For avx512-vp2intersect - VP2INTERSECT, - - /// X86 strict FP compare instructions. - STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, - STRICT_FCMPS, - - // Vector packed double/float comparison. - STRICT_CMPP, - - /// Vector comparison generating mask bits for fp and - /// integer signed and unsigned data types. - STRICT_CMPM, - - // Vector float/double to signed/unsigned integer with truncation. - STRICT_CVTTP2SI, STRICT_CVTTP2UI, - - // Vector FP extend. - STRICT_VFPEXT, - - // Vector FP round. - STRICT_VFPROUND, - - // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. - // Also used by the legacy (V)ROUND intrinsics where we mask out the - // scaling part of the immediate. - STRICT_VRNDSCALE, - - // Vector signed/unsigned integer to float/double. - STRICT_CVTSI2P, STRICT_CVTUI2P, - - // Strict FMA nodes. - STRICT_FNMADD, STRICT_FMSUB, STRICT_FNMSUB, - - // Conversions between float and half-float. - STRICT_CVTPS2PH, STRICT_CVTPH2PS, - - // Compare and swap. - LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, - LCMPXCHG8_DAG, - LCMPXCHG16_DAG, - LCMPXCHG8_SAVE_EBX_DAG, - LCMPXCHG16_SAVE_RBX_DAG, - - /// LOCK-prefixed arithmetic read-modify-write instructions. - /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) - LADD, LSUB, LOR, LXOR, LAND, - - // Load, scalar_to_vector, and zero extend. - VZEXT_LOAD, - - // extract_vector_elt, store. - VEXTRACT_STORE, - - // scalar broadcast from memory - VBROADCAST_LOAD, - - // Store FP control world into i16 memory. - FNSTCW16m, - - /// This instruction implements FP_TO_SINT with the - /// integer destination in memory and a FP reg source. This corresponds - /// to the X86::FIST*m instructions and the rounding mode change stuff. It - /// has two inputs (token chain and address) and two outputs (int value - /// and token chain). Memory VT specifies the type to store to. - FP_TO_INT_IN_MEM, - - /// This instruction implements SINT_TO_FP with the - /// integer source in memory and FP reg result. This corresponds to the - /// X86::FILD*m instructions. It has two inputs (token chain and address) - /// and two outputs (FP value and token chain). The integer source type is - /// specified by the memory VT. - FILD, + // Same as SETCC except it's materialized with a sbb and the value is all + // one's or all zero's. + SETCC_CARRY, // R = carry_bit ? ~0 : 0 - /// This instruction implements a fp->int store from FP stack - /// slots. This corresponds to the fist instruction. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FIST, + /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. + /// Operands are two FP values to compare; result is a mask of + /// 0s or 1s. Generally DTRT for C/C++ with NaNs. + FSETCC, - /// This instruction implements an extending load to FP stack slots. - /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain - /// operand, and ptr to load from. The memory VT specifies the type to - /// load from. - FLD, + /// X86 FP SETCC, similar to above, but with output as an i1 mask and + /// and a version with SAE. + FSETCCM, + FSETCCM_SAE, - /// This instruction implements a truncating store from FP stack - /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FST, - - /// This instruction grabs the address of the next argument - /// from a va_list. (reads and modifies the va_list in memory) - VAARG_64, - - // Vector truncating store with unsigned/signed saturation - VTRUNCSTOREUS, VTRUNCSTORES, - // Vector truncating masked store with unsigned/signed saturation - VMTRUNCSTOREUS, VMTRUNCSTORES, - - // X86 specific gather and scatter - MGATHER, MSCATTER, - - // WARNING: Do not add anything in the end unless you want the node to - // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all - // opcodes will be thought as target memory ops! - }; + /// X86 conditional moves. Operand 0 and operand 1 are the two values + /// to select from. Operand 2 is the condition code, and operand 3 is the + /// flag operand produced by a CMP or TEST instruction. + CMOV, + + /// X86 conditional branches. Operand 0 is the chain operand, operand 1 + /// is the block to branch if condition is true, operand 2 is the + /// condition code, and operand 3 is the flag operand produced by a CMP + /// or TEST instruction. + BRCOND, + + /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and + /// operand 1 is the target address. + NT_BRIND, + + /// Return with a flag operand. Operand 0 is the chain operand, operand + /// 1 is the number of bytes of stack to pop. + RET_FLAG, + + /// Return from interrupt. Operand 0 is the number of bytes to pop. + IRET, + + /// Repeat fill, corresponds to X86::REP_STOSx. + REP_STOS, + + /// Repeat move, corresponds to X86::REP_MOVSx. + REP_MOVS, + + /// On Darwin, this node represents the result of the popl + /// at function entry, used for PIC code. + GlobalBaseReg, + + /// A wrapper node for TargetConstantPool, TargetJumpTable, + /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, + /// MCSymbol and TargetBlockAddress. + Wrapper, + + /// Special wrapper used under X86-64 PIC mode for RIP + /// relative displacements. + WrapperRIP, + + /// Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. + MOVQ2DQ, + + /// Copies a 64-bit value from the low word of an XMM vector + /// to an MMX vector. + MOVDQ2Q, + + /// Copies a 32-bit value from the low word of a MMX + /// vector to a GPR. + MMX_MOVD2W, + + /// Copies a GPR into the low 32-bit word of a MMX vector + /// and zero out the high word. + MMX_MOVW2D, + + /// Extract an 8-bit value from a vector and zero extend it to + /// i32, corresponds to X86::PEXTRB. + PEXTRB, + + /// Extract a 16-bit value from a vector and zero extend it to + /// i32, corresponds to X86::PEXTRW. + PEXTRW, + + /// Insert any element of a 4 x float vector into any element + /// of a destination 4 x floatvector. + INSERTPS, + + /// Insert the lower 8-bits of a 32-bit value to a vector, + /// corresponds to X86::PINSRB. + PINSRB, + + /// Insert the lower 16-bits of a 32-bit value to a vector, + /// corresponds to X86::PINSRW. + PINSRW, + + /// Shuffle 16 8-bit values within a vector. + PSHUFB, + + /// Compute Sum of Absolute Differences. + PSADBW, + /// Compute Double Block Packed Sum-Absolute-Differences + DBPSADBW, + + /// Bitwise Logical AND NOT of Packed FP values. + ANDNP, + + /// Blend where the selector is an immediate. + BLENDI, + + /// Dynamic (non-constant condition) vector blend where only the sign bits + /// of the condition elements are used. This is used to enforce that the + /// condition mask is not valid for generic VSELECT optimizations. This + /// is also used to implement the intrinsics. + /// Operands are in VSELECT order: MASK, TRUE, FALSE + BLENDV, + + /// Combined add and sub on an FP vector. + ADDSUB, + + // FP vector ops with rounding mode. + FADD_RND, + FADDS, + FADDS_RND, + FSUB_RND, + FSUBS, + FSUBS_RND, + FMUL_RND, + FMULS, + FMULS_RND, + FDIV_RND, + FDIVS, + FDIVS_RND, + FMAX_SAE, + FMAXS_SAE, + FMIN_SAE, + FMINS_SAE, + FSQRT_RND, + FSQRTS, + FSQRTS_RND, + + // FP vector get exponent. + FGETEXP, + FGETEXP_SAE, + FGETEXPS, + FGETEXPS_SAE, + // Extract Normalized Mantissas. + VGETMANT, + VGETMANT_SAE, + VGETMANTS, + VGETMANTS_SAE, + // FP Scale. + SCALEF, + SCALEF_RND, + SCALEFS, + SCALEFS_RND, + + // Unsigned Integer average. + AVG, + + /// Integer horizontal add/sub. + HADD, + HSUB, + + /// Floating point horizontal add/sub. + FHADD, + FHSUB, + + // Detect Conflicts Within a Vector + CONFLICT, + + /// Floating point max and min. + FMAX, + FMIN, + + /// Commutative FMIN and FMAX. + FMAXC, + FMINC, + + /// Scalar intrinsic floating point max and min. + FMAXS, + FMINS, + + /// Floating point reciprocal-sqrt and reciprocal approximation. + /// Note that these typically require refinement + /// in order to obtain suitable precision. + FRSQRT, + FRCP, + + // AVX-512 reciprocal approximations with a little more precision. + RSQRT14, + RSQRT14S, + RCP14, + RCP14S, + + // Thread Local Storage. + TLSADDR, + + // Thread Local Storage. A call to get the start address + // of the TLS block for the current module. + TLSBASEADDR, + + // Thread Local Storage. When calling to an OS provided + // thunk at the address from an earlier relocation. + TLSCALL, + + // Exception Handling helpers. + EH_RETURN, + + // SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + + // SjLj exception handling dispatch. + EH_SJLJ_SETUP_DISPATCH, + + /// Tail call return. See X86TargetLowering::LowerCall for + /// the list of operands. + TC_RETURN, + + // Vector move to low scalar and zero higher vector elements. + VZEXT_MOVL, + + // Vector integer truncate. + VTRUNC, + // Vector integer truncate with unsigned/signed saturation. + VTRUNCUS, + VTRUNCS, + + // Masked version of the above. Used when less than a 128-bit result is + // produced since the mask only applies to the lower elements and can't + // be represented by a select. + // SRC, PASSTHRU, MASK + VMTRUNC, + VMTRUNCUS, + VMTRUNCS, + + // Vector FP extend. + VFPEXT, + VFPEXT_SAE, + VFPEXTS, + VFPEXTS_SAE, + + // Vector FP round. + VFPROUND, + VFPROUND_RND, + VFPROUNDS, + VFPROUNDS_RND, + + // Masked version of above. Used for v2f64->v4f32. + // SRC, PASSTHRU, MASK + VMFPROUND, + + // 128-bit vector logical left / right shift + VSHLDQ, + VSRLDQ, + + // Vector shift elements + VSHL, + VSRL, + VSRA, + + // Vector variable shift + VSHLV, + VSRLV, + VSRAV, + + // Vector shift elements by immediate + VSHLI, + VSRLI, + VSRAI, + + // Shifts of mask registers. + KSHIFTL, + KSHIFTR, + + // Bit rotate by immediate + VROTLI, + VROTRI, + + // Vector packed double/float comparison. + CMPP, + + // Vector integer comparisons. + PCMPEQ, + PCMPGT, + + // v8i16 Horizontal minimum and position. + PHMINPOS, + + MULTISHIFT, + + /// Vector comparison generating mask bits for fp and + /// integer signed and unsigned data types. + CMPM, + // Vector comparison with SAE for FP values + CMPM_SAE, + + // Arithmetic operations with FLAGS results. + ADD, + SUB, + ADC, + SBB, + SMUL, + UMUL, + OR, + XOR, + AND, + + // Bit field extract. + BEXTR, + + // Zero High Bits Starting with Specified Bit Position. + BZHI, + + // Parallel extract and deposit. + PDEP, + PEXT, + + // X86-specific multiply by immediate. + MUL_IMM, + + // Vector sign bit extraction. + MOVMSK, + + // Vector bitwise comparisons. + PTEST, + + // Vector packed fp sign bitwise comparisons. + TESTP, + + // OR/AND test for masks. + KORTEST, + KTEST, + + // ADD for masks. + KADD, + + // Several flavors of instructions with vector shuffle behaviors. + // Saturated signed/unnsigned packing. + PACKSS, + PACKUS, + // Intra-lane alignr. + PALIGNR, + // AVX512 inter-lane alignr. + VALIGN, + PSHUFD, + PSHUFHW, + PSHUFLW, + SHUFP, + // VBMI2 Concat & Shift. + VSHLD, + VSHRD, + VSHLDV, + VSHRDV, + // Shuffle Packed Values at 128-bit granularity. + SHUF128, + MOVDDUP, + MOVSHDUP, + MOVSLDUP, + MOVLHPS, + MOVHLPS, + MOVSD, + MOVSS, + UNPCKL, + UNPCKH, + VPERMILPV, + VPERMILPI, + VPERMI, + VPERM2X128, + + // Variable Permute (VPERM). + // Res = VPERMV MaskV, V0 + VPERMV, + + // 3-op Variable Permute (VPERMT2). + // Res = VPERMV3 V0, MaskV, V1 + VPERMV3, + + // Bitwise ternary logic. + VPTERNLOG, + // Fix Up Special Packed Float32/64 values. + VFIXUPIMM, + VFIXUPIMM_SAE, + VFIXUPIMMS, + VFIXUPIMMS_SAE, + // Range Restriction Calculation For Packed Pairs of Float32/64 values. + VRANGE, + VRANGE_SAE, + VRANGES, + VRANGES_SAE, + // Reduce - Perform Reduction Transformation on scalar\packed FP. + VREDUCE, + VREDUCE_SAE, + VREDUCES, + VREDUCES_SAE, + // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. + // Also used by the legacy (V)ROUND intrinsics where we mask out the + // scaling part of the immediate. + VRNDSCALE, + VRNDSCALE_SAE, + VRNDSCALES, + VRNDSCALES_SAE, + // Tests Types Of a FP Values for packed types. + VFPCLASS, + // Tests Types Of a FP Values for scalar types. + VFPCLASSS, + + // Broadcast (splat) scalar or element 0 of a vector. If the operand is + // a vector, this node may change the vector length as part of the splat. + VBROADCAST, + // Broadcast mask to vector. + VBROADCASTM, + // Broadcast subvector to vector. + SUBV_BROADCAST, + + /// SSE4A Extraction and Insertion. + EXTRQI, + INSERTQI, + + // XOP arithmetic/logical shifts. + VPSHA, + VPSHL, + // XOP signed/unsigned integer comparisons. + VPCOM, + VPCOMU, + // XOP packed permute bytes. + VPPERM, + // XOP two source permutation. + VPERMIL2, + + // Vector multiply packed unsigned doubleword integers. + PMULUDQ, + // Vector multiply packed signed doubleword integers. + PMULDQ, + // Vector Multiply Packed UnsignedIntegers with Round and Scale. + MULHRS, + + // Multiply and Add Packed Integers. + VPMADDUBSW, + VPMADDWD, + + // AVX512IFMA multiply and add. + // NOTE: These are different than the instruction and perform + // op0 x op1 + op2. + VPMADD52L, + VPMADD52H, + + // VNNI + VPDPBUSD, + VPDPBUSDS, + VPDPWSSD, + VPDPWSSDS, + + // FMA nodes. + // We use the target independent ISD::FMA for the non-inverted case. + FNMADD, + FMSUB, + FNMSUB, + FMADDSUB, + FMSUBADD, + + // FMA with rounding mode. + FMADD_RND, + FNMADD_RND, + FMSUB_RND, + FNMSUB_RND, + FMADDSUB_RND, + FMSUBADD_RND, + + // Compress and expand. + COMPRESS, + EXPAND, + + // Bits shuffle + VPSHUFBITQMB, + + // Convert Unsigned/Integer to Floating-Point Value with rounding mode. + SINT_TO_FP_RND, + UINT_TO_FP_RND, + SCALAR_SINT_TO_FP, + SCALAR_UINT_TO_FP, + SCALAR_SINT_TO_FP_RND, + SCALAR_UINT_TO_FP_RND, + + // Vector float/double to signed/unsigned integer. + CVTP2SI, + CVTP2UI, + CVTP2SI_RND, + CVTP2UI_RND, + // Scalar float/double to signed/unsigned integer. + CVTS2SI, + CVTS2UI, + CVTS2SI_RND, + CVTS2UI_RND, + + // Vector float/double to signed/unsigned integer with truncation. + CVTTP2SI, + CVTTP2UI, + CVTTP2SI_SAE, + CVTTP2UI_SAE, + // Scalar float/double to signed/unsigned integer with truncation. + CVTTS2SI, + CVTTS2UI, + CVTTS2SI_SAE, + CVTTS2UI_SAE, + + // Vector signed/unsigned integer to float/double. + CVTSI2P, + CVTUI2P, + + // Masked versions of above. Used for v2f64->v4f32. + // SRC, PASSTHRU, MASK + MCVTP2SI, + MCVTP2UI, + MCVTTP2SI, + MCVTTP2UI, + MCVTSI2P, + MCVTUI2P, + + // Vector float to bfloat16. + // Convert TWO packed single data to one packed BF16 data + CVTNE2PS2BF16, + // Convert packed single data to packed BF16 data + CVTNEPS2BF16, + // Masked version of above. + // SRC, PASSTHRU, MASK + MCVTNEPS2BF16, + + // Dot product of BF16 pairs to accumulated into + // packed single precision. + DPBF16PS, + + // Save xmm argument registers to the stack, according to %al. An operator + // is needed so that this can be expanded with control flow. + VASTART_SAVE_XMM_REGS, + + // Windows's _chkstk call to do stack probing. + WIN_ALLOCA, + + // For allocating variable amounts of stack space when using + // segmented stacks. Check if the current stacklet has enough space, and + // falls back to heap allocation if not. + SEG_ALLOCA, + + // For allocating stack space when using stack clash protector. + // Allocation is performed by block, and each block is probed. + PROBED_ALLOCA, + + // Memory barriers. + MEMBARRIER, + MFENCE, + + // Get a random integer and indicate whether it is valid in CF. + RDRAND, + + // Get a NIST SP800-90B & C compliant random integer and + // indicate whether it is valid in CF. + RDSEED, + + // Protection keys + // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. + // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is + // value for ECX. + RDPKRU, + WRPKRU, + + // SSE42 string comparisons. + // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG + // will emit one or two instructions based on which results are used. If + // flags and index/mask this allows us to use a single instruction since + // we won't have to pick and opcode for flags. Instead we can rely on the + // DAG to CSE everything and decide at isel. + PCMPISTR, + PCMPESTR, + + // Test if in transactional execution. + XTEST, + + // ERI instructions. + RSQRT28, + RSQRT28_SAE, + RSQRT28S, + RSQRT28S_SAE, + RCP28, + RCP28_SAE, + RCP28S, + RCP28S_SAE, + EXP2, + EXP2_SAE, + + // Conversions between float and half-float. + CVTPS2PH, + CVTPH2PS, + CVTPH2PS_SAE, + + // Masked version of above. + // SRC, RND, PASSTHRU, MASK + MCVTPS2PH, + + // Galois Field Arithmetic Instructions + GF2P8AFFINEINVQB, + GF2P8AFFINEQB, + GF2P8MULB, + + // LWP insert record. + LWPINS, + + // User level wait + UMWAIT, + TPAUSE, + + // Enqueue Stores Instructions + ENQCMD, + ENQCMDS, + + // For avx512-vp2intersect + VP2INTERSECT, + + /// X86 strict FP compare instructions. + STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCMPS, + + // Vector packed double/float comparison. + STRICT_CMPP, + + /// Vector comparison generating mask bits for fp and + /// integer signed and unsigned data types. + STRICT_CMPM, + + // Vector float/double to signed/unsigned integer with truncation. + STRICT_CVTTP2SI, + STRICT_CVTTP2UI, + + // Vector FP extend. + STRICT_VFPEXT, + + // Vector FP round. + STRICT_VFPROUND, + + // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. + // Also used by the legacy (V)ROUND intrinsics where we mask out the + // scaling part of the immediate. + STRICT_VRNDSCALE, + + // Vector signed/unsigned integer to float/double. + STRICT_CVTSI2P, + STRICT_CVTUI2P, + + // Strict FMA nodes. + STRICT_FNMADD, + STRICT_FMSUB, + STRICT_FNMSUB, + + // Conversions between float and half-float. + STRICT_CVTPS2PH, + STRICT_CVTPH2PS, + + // Compare and swap. + LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, + LCMPXCHG8_DAG, + LCMPXCHG16_DAG, + LCMPXCHG8_SAVE_EBX_DAG, + LCMPXCHG16_SAVE_RBX_DAG, + + /// LOCK-prefixed arithmetic read-modify-write instructions. + /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) + LADD, + LSUB, + LOR, + LXOR, + LAND, + + // Load, scalar_to_vector, and zero extend. + VZEXT_LOAD, + + // extract_vector_elt, store. + VEXTRACT_STORE, + + // scalar broadcast from memory + VBROADCAST_LOAD, + + // Store FP control world into i16 memory. + FNSTCW16m, + + /// This instruction implements FP_TO_SINT with the + /// integer destination in memory and a FP reg source. This corresponds + /// to the X86::FIST*m instructions and the rounding mode change stuff. It + /// has two inputs (token chain and address) and two outputs (int value + /// and token chain). Memory VT specifies the type to store to. + FP_TO_INT_IN_MEM, + + /// This instruction implements SINT_TO_FP with the + /// integer source in memory and FP reg result. This corresponds to the + /// X86::FILD*m instructions. It has two inputs (token chain and address) + /// and two outputs (FP value and token chain). The integer source type is + /// specified by the memory VT. + FILD, + + /// This instruction implements a fp->int store from FP stack + /// slots. This corresponds to the fist instruction. It takes a + /// chain operand, value to store, address, and glue. The memory VT + /// specifies the type to store as. + FIST, + + /// This instruction implements an extending load to FP stack slots. + /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain + /// operand, and ptr to load from. The memory VT specifies the type to + /// load from. + FLD, + + /// This instruction implements a truncating store from FP stack + /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a + /// chain operand, value to store, address, and glue. The memory VT + /// specifies the type to store as. + FST, + + /// This instruction grabs the address of the next argument + /// from a va_list. (reads and modifies the va_list in memory) + VAARG_64, + + // Vector truncating store with unsigned/signed saturation + VTRUNCSTOREUS, + VTRUNCSTORES, + // Vector truncating masked store with unsigned/signed saturation + VMTRUNCSTOREUS, + VMTRUNCSTORES, + + // X86 specific gather and scatter + MGATHER, + MSCATTER, + + // WARNING: Do not add anything in the end unless you want the node to + // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all + // opcodes will be thought as target memory ops! + }; } // end namespace X86ISD /// Define some predicates that are used for node matching.