diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -740,6 +740,15 @@ setOperationAction(ISD::FEXP2, VT, Expand); } + // Handle constrained floating-point operations of vector types. + for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, + MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { + setOperationAction(ISD::STRICT_FADD, VT, Legal); + setOperationAction(ISD::STRICT_FSUB, VT, Legal); + setOperationAction(ISD::STRICT_FMUL, VT, Legal); + setOperationAction(ISD::STRICT_FDIV, VT, Legal); + } + // First set operation action for all vector types to either promote // (for widening) or expand (for scalarization). Then we will selectively // turn on ones that can be effectively codegen'd. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5391,14 +5391,14 @@ NAME#"SD">, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } -defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds, - SchedWriteFAddSizes, 1>; -defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds, - SchedWriteFMulSizes, 1>; -defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds, - SchedWriteFAddSizes, 0>; -defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds, - SchedWriteFDivSizes, 0>; +defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, + SchedWriteFAddSizes, 1>, SIMD_EXC; +defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, + SchedWriteFMulSizes, 1>, SIMD_EXC; +defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, + SchedWriteFAddSizes, 0>, SIMD_EXC; +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, + SchedWriteFDivSizes, 0>, SIMD_EXC; defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, SchedWriteFCmpSizes, 0>; defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, @@ -5546,18 +5546,18 @@ EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } -defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, +defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512, SchedWriteFAddSizes, 1>, - avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; -defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, + avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>, SIMD_EXC; +defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512, SchedWriteFMulSizes, 1>, - avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, + avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>, SIMD_EXC; +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512, SchedWriteFAddSizes>, - avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; -defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, + avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>, SIMD_EXC; +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512, SchedWriteFDivSizes>, - avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; + avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>, SIMD_EXC; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, SchedWriteFCmpSizes, 0>, avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -227,6 +227,7 @@ class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; } class NOTRACK { bit hasNoTrackPrefix = 1; } +class SIMD_EXC { list Uses = [MXCSR]; bit mayRaiseFPException = 1; } // Specify AVX512 8-bit compressed displacement encoding based on the vector // element size in bits (8, 16, 32, 64) and the CDisp8 form. diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7233,6 +7233,11 @@ bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const { + // FIXME if this's not true: + // For float point instructions that constrained by rounding and exception + // masks, we assign them an implicit operand MXCSR. But we don't need to + // assume instructions that have reassociable operands might have 5 operands. + // The only class I know is [V][U]COMI*, which should never be reassociable. assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) && "Reassociation needs binary operators"); @@ -7244,9 +7249,11 @@ // that are set by using these particular operands with this operation. if (Inst.getNumOperands() == 4) { assert(Inst.getOperand(3).isReg() && - Inst.getOperand(3).getReg() == X86::EFLAGS && + (Inst.getOperand(3).getReg() == X86::EFLAGS || + Inst.getOperand(3).getReg() == X86::MXCSR) && "Unexpected operand in reassociable instruction"); - if (!Inst.getOperand(3).isDead()) + if (Inst.getOperand(3).getReg() == X86::EFLAGS && + !Inst.getOperand(3).isDead()) return false; } @@ -7667,6 +7674,12 @@ MachineOperand &NewOp1 = NewMI1.getOperand(3); MachineOperand &NewOp2 = NewMI2.getOperand(3); + // Only float point instructions use implicit MXCSR source register, and they + // will not define an implicit EFLAGS source register operand at the same time. + if ((OldOp1.isReg() && OldOp1.getReg() == X86::MXCSR) || + (OldOp2.isReg() && OldOp2.getReg() == X86::MXCSR)) + return; + assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() && "Must have dead EFLAGS operand in reassociable instruction"); assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() && diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2621,19 +2621,19 @@ } // Binary Arithmetic instructions -defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, - basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>, - basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; -defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>, - basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>, - basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; +defm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>, SIMD_EXC; +defm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>, + basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>, + basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>, SIMD_EXC; let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>, - basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>, - basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; - defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>, - basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>, - basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; + defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>, SIMD_EXC; + defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, + basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, + basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>, SIMD_EXC; defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>; @@ -2727,15 +2727,15 @@ } } -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; -defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; +defm : scalar_math_patterns; /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -515,6 +515,9 @@ // Set the floating point control register as reserved. Reserved.set(X86::FPCW); + // Set the SIMD floating point control register as reserved. + Reserved.set(X86::MXCSR); + // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -294,6 +294,11 @@ // Floating-point control word def FPCW : X86Reg<"fpcr", 0>; +// SIMD Floating-point control register. +// Note: We only model the current rounding modes and the IEEE masks. +// IEEE flags, FTZ and DAZ are not modeled here. +def MXCSR : X86Reg<"mxcsr", 0>; + // Status flags register. // // Note that some flags that are commonly thought of as part of the status diff --git a/llvm/test/CodeGen/MIR/X86/constant-pool.mir b/llvm/test/CodeGen/MIR/X86/constant-pool.mir --- a/llvm/test/CodeGen/MIR/X86/constant-pool.mir +++ b/llvm/test/CodeGen/MIR/X86/constant-pool.mir @@ -61,12 +61,12 @@ alignment: 4 body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -89,10 +89,10 @@ value: 'float 6.250000e+00' body: | bb.0.entry: - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -117,12 +117,12 @@ alignment: 1 body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -135,11 +135,11 @@ value: 'float 6.250000e+00' body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.1 - 12, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.0 + 8, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.1 - 12, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.0 + 8, _ + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.1 - 12, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.0 + 8, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.1 - 12, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.0 + 8, _, implicit $mxcsr $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... diff --git a/llvm/test/CodeGen/MIR/X86/fastmath.mir b/llvm/test/CodeGen/MIR/X86/fastmath.mir --- a/llvm/test/CodeGen/MIR/X86/fastmath.mir +++ b/llvm/test/CodeGen/MIR/X86/fastmath.mir @@ -10,24 +10,24 @@ ; CHECK: %0:fr32 = COPY $xmm0 %0:fr32 = COPY $xmm0 - ; CHECK: %1:fr32 = nnan VMULSSrr %0, %0 - %1:fr32 = nnan VMULSSrr %0, %0 - ; CHECK: %2:fr32 = ninf VMULSSrr %1, %1 - %2:fr32 = ninf VMULSSrr %1, %1 - ; CHECK: %3:fr32 = nsz VMULSSrr %2, %2 - %3:fr32 = nsz VMULSSrr %2, %2 - ; CHECK: %4:fr32 = arcp VMULSSrr %3, %3 - %4:fr32 = arcp VMULSSrr %3, %3 - ; CHECK: %5:fr32 = contract VMULSSrr %4, %4 - %5:fr32 = contract VMULSSrr %4, %4 - ; CHECK: %6:fr32 = afn VMULSSrr %5, %5 - %6:fr32 = afn VMULSSrr %5, %5 - ; CHECK: %7:fr32 = reassoc VMULSSrr %6, %6 - %7:fr32 = reassoc VMULSSrr %6, %6 - ; CHECK: %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7 - %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7 - ; CHECK: %9:fr32 = contract afn reassoc VMULSSrr %8, %8 - %9:fr32 = contract afn reassoc VMULSSrr %8, %8 + ; CHECK: %1:fr32 = nnan VMULSSrr %0, %0, implicit $mxcsr + %1:fr32 = nnan VMULSSrr %0, %0, implicit $mxcsr + ; CHECK: %2:fr32 = ninf VMULSSrr %1, %1, implicit $mxcsr + %2:fr32 = ninf VMULSSrr %1, %1, implicit $mxcsr + ; CHECK: %3:fr32 = nsz VMULSSrr %2, %2, implicit $mxcsr + %3:fr32 = nsz VMULSSrr %2, %2, implicit $mxcsr + ; CHECK: %4:fr32 = arcp VMULSSrr %3, %3, implicit $mxcsr + %4:fr32 = arcp VMULSSrr %3, %3, implicit $mxcsr + ; CHECK: %5:fr32 = contract VMULSSrr %4, %4, implicit $mxcsr + %5:fr32 = contract VMULSSrr %4, %4, implicit $mxcsr + ; CHECK: %6:fr32 = afn VMULSSrr %5, %5, implicit $mxcsr + %6:fr32 = afn VMULSSrr %5, %5, implicit $mxcsr + ; CHECK: %7:fr32 = reassoc VMULSSrr %6, %6, implicit $mxcsr + %7:fr32 = reassoc VMULSSrr %6, %6, implicit $mxcsr + ; CHECK: %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7, implicit $mxcsr + %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7, implicit $mxcsr + ; CHECK: %9:fr32 = contract afn reassoc VMULSSrr %8, %8, implicit $mxcsr + %9:fr32 = contract afn reassoc VMULSSrr %8, %8, implicit $mxcsr ; CHECK: $xmm0 = COPY %9 $xmm0 = COPY %9 ; CHECK: RET 0, $xmm0 diff --git a/llvm/test/CodeGen/MIR/X86/memory-operands.mir b/llvm/test/CodeGen/MIR/X86/memory-operands.mir --- a/llvm/test/CodeGen/MIR/X86/memory-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/memory-operands.mir @@ -336,10 +336,10 @@ bb.0.entry: liveins: $xmm0 ; CHECK: name: constant_pool_psv - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool) - ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool + 8) - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ :: (load 8 from constant-pool) - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ :: (load 8 from constant-pool + 8) + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load 8 from constant-pool) + ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load 8 from constant-pool + 8) + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load 8 from constant-pool) + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load 8 from constant-pool + 8) RETQ $xmm0 ... --- diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -167,14 +167,14 @@ $ymm0 = VPADDWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VPADDWYrr $ymm0, $ymm1 $ymm0 = VPADDWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMULPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMULPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMULPDYrr $ymm0, $ymm1 - $ymm0 = VMULPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMULPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VMULPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VMULPSYrr $ymm0, $ymm1 - $ymm0 = VMULPSZ256rr $ymm0, $ymm1 + ; CHECK: $ymm0 = VMULPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMULPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMULPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMULPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VMULPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VMULPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMULPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VMULPSZ256rr $ymm0, $ymm1, implicit $mxcsr ; CHECK: $ymm0 = VORPDYrm $ymm0, $rip, 1, $rax, 0, $noreg $ymm0 = VORPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VORPDYrr $ymm0, $ymm1 @@ -315,14 +315,14 @@ $ymm0 = VPXORQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VPXORYrr $ymm0, $ymm1 $ymm0 = VPXORQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VADDPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VADDPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VADDPDYrr $ymm0, $ymm1 - $ymm0 = VADDPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VADDPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VADDPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VADDPSYrr $ymm0, $ymm1 - $ymm0 = VADDPSZ256rr $ymm0, $ymm1 + ; CHECK: $ymm0 = VADDPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VADDPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VADDPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VADDPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VADDPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VADDPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VADDPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VADDPSZ256rr $ymm0, $ymm1, implicit $mxcsr ; CHECK: $ymm0 = VANDNPDYrm $ymm0, $rip, 1, $rax, 0, $noreg $ymm0 = VANDNPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VANDNPDYrr $ymm0, $ymm1 @@ -339,14 +339,14 @@ $ymm0 = VANDPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VANDPSYrr $ymm0, $ymm1 $ymm0 = VANDPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VDIVPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VDIVPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VDIVPDYrr $ymm0, $ymm1 - $ymm0 = VDIVPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VDIVPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VDIVPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VDIVPSYrr $ymm0, $ymm1 - $ymm0 = VDIVPSZ256rr $ymm0, $ymm1 + ; CHECK: $ymm0 = VDIVPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VDIVPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VDIVPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VDIVPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VDIVPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VDIVPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VDIVPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VDIVPSZ256rr $ymm0, $ymm1, implicit $mxcsr ; CHECK: $ymm0 = VMAXCPDYrm $ymm0, $rip, 1, $rax, 0, $noreg $ymm0 = VMAXCPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VMAXCPDYrr $ymm0, $ymm1 @@ -419,14 +419,14 @@ $ymm0 = VUNPCKLPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VUNPCKLPSYrr $ymm0, $ymm1 $ymm0 = VUNPCKLPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VSUBPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VSUBPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VSUBPDYrr $ymm0, $ymm1 - $ymm0 = VSUBPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VSUBPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VSUBPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VSUBPSYrr $ymm0, $ymm1 - $ymm0 = VSUBPSZ256rr $ymm0, $ymm1 + ; CHECK: $ymm0 = VSUBPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VSUBPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VSUBPDYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VSUBPDZ256rr $ymm0, $ymm1, implicit $mxcsr + ; CHECK: $ymm0 = VSUBPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm0 = VSUBPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VSUBPSYrr $ymm0, $ymm1, implicit $mxcsr + $ymm0 = VSUBPSZ256rr $ymm0, $ymm1, implicit $mxcsr ; CHECK: $ymm0 = VPUNPCKHBWYrm $ymm0, $rip, 1, $rax, 0, $noreg $ymm0 = VPUNPCKHBWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm0 = VPUNPCKHBWYrr $ymm0, $ymm1 @@ -1107,14 +1107,14 @@ $xmm0 = VMINPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VMINPSrr $xmm0, $xmm1 $xmm0 = VMINPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULPDrr $xmm0, $xmm1 - $xmm0 = VMULPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULPSrr $xmm0, $xmm1 - $xmm0 = VMULPSZ128rr $xmm0, $xmm1 + ; CHECK: $xmm0 = VMULPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULPSZ128rr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VORPDrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VORPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VORPDrr $xmm0, $xmm1 @@ -1295,14 +1295,14 @@ $xmm0 = VPSUBWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VPSUBWrr $xmm0, $xmm1 $xmm0 = VPSUBWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDPDrr $xmm0, $xmm1 - $xmm0 = VADDPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDPSrr $xmm0, $xmm1 - $xmm0 = VADDPSZ128rr $xmm0, $xmm1 + ; CHECK: $xmm0 = VADDPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VADDPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDPSZ128rr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VANDNPDrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VANDNPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VANDNPDrr $xmm0, $xmm1 @@ -1319,14 +1319,14 @@ $xmm0 = VANDPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VANDPSrr $xmm0, $xmm1 $xmm0 = VANDPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVPDrr $xmm0, $xmm1 - $xmm0 = VDIVPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVPSrr $xmm0, $xmm1 - $xmm0 = VDIVPSZ128rr $xmm0, $xmm1 + ; CHECK: $xmm0 = VDIVPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVPSZ128rr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VPXORrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VPXORDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VPXORrr $xmm0, $xmm1 @@ -1335,14 +1335,14 @@ $xmm0 = VPXORQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VPXORrr $xmm0, $xmm1 $xmm0 = VPXORQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBPDrr $xmm0, $xmm1 - $xmm0 = VSUBPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBPSrr $xmm0, $xmm1 - $xmm0 = VSUBPSZ128rr $xmm0, $xmm1 + ; CHECK: $xmm0 = VSUBPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBPDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBPDZ128rr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBPSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBPSZ128rr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VXORPDrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VXORPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VXORPDrr $xmm0, $xmm1 @@ -1810,38 +1810,38 @@ body: | bb.0: - ; CHECK: $xmm0 = VADDSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDSDrr $xmm0, $xmm1 - $xmm0 = VADDSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDSDrr_Int $xmm0, $xmm1 - $xmm0 = VADDSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VADDSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VADDSSrr $xmm0, $xmm1 - $xmm0 = VADDSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDSSrr_Int $xmm0, $xmm1 - $xmm0 = VADDSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVSDrr $xmm0, $xmm1 - $xmm0 = VDIVSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVSDrr_Int $xmm0, $xmm1 - $xmm0 = VDIVSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VDIVSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VDIVSSrr $xmm0, $xmm1 - $xmm0 = VDIVSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVSSrr_Int $xmm0, $xmm1 - $xmm0 = VDIVSSZrr_Int $xmm0, $xmm1 + ; CHECK: $xmm0 = VADDSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VADDSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VADDSSZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VDIVSSZrr_Int $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VMAXCSDrm $xmm0, $rip, 1, $rax, 0, $noreg $xmm0 = VMAXCSDZrm $xmm0, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VMAXCSDrr $xmm0, $xmm1 @@ -1890,38 +1890,38 @@ $xmm0 = VMINSSZrr $xmm0, $xmm1 ; CHECK: $xmm0 = VMINSSrr_Int $xmm0, $xmm1 $xmm0 = VMINSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULSDrr $xmm0, $xmm1 - $xmm0 = VMULSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULSDrr_Int $xmm0, $xmm1 - $xmm0 = VMULSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VMULSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VMULSSrr $xmm0, $xmm1 - $xmm0 = VMULSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VMULSSrr_Int $xmm0, $xmm1 - $xmm0 = VMULSSZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBSDZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBSDrr $xmm0, $xmm1 - $xmm0 = VSUBSDZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBSDrr_Int $xmm0, $xmm1 - $xmm0 = VSUBSDZrr_Int $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBSSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBSSZrm $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VSUBSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm0 = VSUBSSrr $xmm0, $xmm1 - $xmm0 = VSUBSSZrr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBSSrr_Int $xmm0, $xmm1 - $xmm0 = VSUBSSZrr_Int $xmm0, $xmm1 + ; CHECK: $xmm0 = VMULSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VMULSSZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBSDZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBSDZrr_Int $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrr $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBSSZrr $xmm0, $xmm1, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrr_Int $xmm0, $xmm1, implicit $mxcsr + $xmm0 = VSUBSSZrr_Int $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VFMADD132SDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg $xmm0 = VFMADD132SDZm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VFMADD132SDm_Int $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg @@ -2530,14 +2530,14 @@ $ymm16 = VPADDWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VPADDWZ256rr $ymm16, $ymm1 $ymm16 = VPADDWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMULPDZ256rr $ymm16, $ymm1 - $ymm16 = VMULPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VMULPSZ256rr $ymm16, $ymm1 - $ymm16 = VMULPSZ256rr $ymm16, $ymm1 + ; CHECK: $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMULPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMULPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMULPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VMULPSZ256rr $ymm16, $ymm1, implicit $mxcsr ; CHECK: $ymm16 = VORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg $ymm16 = VORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VORPDZ256rr $ymm16, $ymm1 @@ -2678,14 +2678,14 @@ $ymm16 = VPXORQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VPXORQZ256rr $ymm16, $ymm1 $ymm16 = VPXORQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VADDPDZ256rr $ymm16, $ymm1 - $ymm16 = VADDPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VADDPSZ256rr $ymm16, $ymm1 - $ymm16 = VADDPSZ256rr $ymm16, $ymm1 + ; CHECK: $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VADDPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VADDPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VADDPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VADDPSZ256rr $ymm16, $ymm1, implicit $mxcsr ; CHECK: $ymm16 = VANDNPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg $ymm16 = VANDNPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VANDNPDZ256rr $ymm16, $ymm1 @@ -2702,14 +2702,14 @@ $ymm16 = VANDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VANDPSZ256rr $ymm16, $ymm1 $ymm16 = VANDPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VDIVPDZ256rr $ymm16, $ymm1 - $ymm16 = VDIVPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VDIVPSZ256rr $ymm16, $ymm1 - $ymm16 = VDIVPSZ256rr $ymm16, $ymm1 + ; CHECK: $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VDIVPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VDIVPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VDIVPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VDIVPSZ256rr $ymm16, $ymm1, implicit $mxcsr ; CHECK: $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VMAXCPDZ256rr $ymm16, $ymm1 @@ -2782,14 +2782,14 @@ $ymm16 = VUNPCKLPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VUNPCKLPSZ256rr $ymm16, $ymm1 $ymm16 = VUNPCKLPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VSUBPDZ256rr $ymm16, $ymm1 - $ymm16 = VSUBPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VSUBPSZ256rr $ymm16, $ymm1 - $ymm16 = VSUBPSZ256rr $ymm16, $ymm1 + ; CHECK: $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VSUBPDZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VSUBPDZ256rr $ymm16, $ymm1, implicit $mxcsr + ; CHECK: $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VSUBPSZ256rr $ymm16, $ymm1, implicit $mxcsr + $ymm16 = VSUBPSZ256rr $ymm16, $ymm1, implicit $mxcsr ; CHECK: $ymm16 = VPUNPCKHBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg $ymm16 = VPUNPCKHBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg ; CHECK: $ymm16 = VPUNPCKHBWZ256rr $ymm16, $ymm1 @@ -3478,14 +3478,14 @@ $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VMINPSZ128rr $xmm16, $xmm1 $xmm16 = VMINPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULPDZ128rr $xmm16, $xmm1 - $xmm16 = VMULPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULPSZ128rr $xmm16, $xmm1 - $xmm16 = VMULPSZ128rr $xmm16, $xmm1 + ; CHECK: $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULPSZ128rr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VORPDZ128rr $xmm16, $xmm1 @@ -3666,14 +3666,14 @@ $xmm16 = VPSUBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VPSUBWZ128rr $xmm16, $xmm1 $xmm16 = VPSUBWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDPDZ128rr $xmm16, $xmm1 - $xmm16 = VADDPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDPSZ128rr $xmm16, $xmm1 - $xmm16 = VADDPSZ128rr $xmm16, $xmm1 + ; CHECK: $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDPSZ128rr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VANDNPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VANDNPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VANDNPDZ128rr $xmm16, $xmm1 @@ -3690,14 +3690,14 @@ $xmm16 = VANDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VANDPSZ128rr $xmm16, $xmm1 $xmm16 = VANDPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVPDZ128rr $xmm16, $xmm1 - $xmm16 = VDIVPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVPSZ128rr $xmm16, $xmm1 - $xmm16 = VDIVPSZ128rr $xmm16, $xmm1 + ; CHECK: $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVPSZ128rr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VPXORDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VPXORDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VPXORDZ128rr $xmm16, $xmm1 @@ -3706,14 +3706,14 @@ $xmm16 = VPXORQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VPXORQZ128rr $xmm16, $xmm1 $xmm16 = VPXORQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBPDZ128rr $xmm16, $xmm1 - $xmm16 = VSUBPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBPSZ128rr $xmm16, $xmm1 - $xmm16 = VSUBPSZ128rr $xmm16, $xmm1 + ; CHECK: $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBPDZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBPDZ128rr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBPSZ128rr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBPSZ128rr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VXORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VXORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VXORPDZ128rr $xmm16, $xmm1 @@ -4188,38 +4188,38 @@ name: evex_scalar_to_evex_test body: | bb.0: - ; CHECK: $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDSDZrr $xmm16, $xmm1 - $xmm16 = VADDSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDSDZrr_Int $xmm16, $xmm1 - $xmm16 = VADDSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VADDSSZrr $xmm16, $xmm1 - $xmm16 = VADDSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDSSZrr_Int $xmm16, $xmm1 - $xmm16 = VADDSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVSDZrr $xmm16, $xmm1 - $xmm16 = VDIVSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1 - $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VDIVSSZrr $xmm16, $xmm1 - $xmm16 = VDIVSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1 - $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1 + ; CHECK: $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VADDSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VADDSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VMAXCSDZrr $xmm16, $xmm1 @@ -4268,38 +4268,38 @@ $xmm16 = VMINSSZrr $xmm16, $xmm1 ; CHECK: $xmm16 = VMINSSZrr_Int $xmm16, $xmm1 $xmm16 = VMINSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULSDZrr $xmm16, $xmm1 - $xmm16 = VMULSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULSDZrr_Int $xmm16, $xmm1 - $xmm16 = VMULSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMULSSZrr $xmm16, $xmm1 - $xmm16 = VMULSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VMULSSZrr_Int $xmm16, $xmm1 - $xmm16 = VMULSSZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBSDZrr $xmm16, $xmm1 - $xmm16 = VSUBSDZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1 - $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VSUBSSZrr $xmm16, $xmm1 - $xmm16 = VSUBSSZrr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1 - $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1 + ; CHECK: $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VMULSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBSDZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrr $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBSSZrr $xmm16, $xmm1, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1, implicit $mxcsr + $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VFMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg $xmm16 = VFMADD132SDZm $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VFMADD132SDZm_Int $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg diff --git a/llvm/test/CodeGen/X86/fp-strict-avx.ll b/llvm/test/CodeGen/X86/fp-strict-avx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-strict-avx.ll @@ -0,0 +1,198 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 -O3 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -O3 | FileCheck %s --check-prefixes=CHECK,X64 + +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fsub.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fmul.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float>, <8 x float>, metadata, metadata) + +define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f2(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <4 x double> @f3(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f4(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <2 x double> @f5(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f6(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <4 x double> @f7(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f8(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fsub.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <2 x double> @f9(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f9: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f10(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f10: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <4 x double> @f11(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f11: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f12(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fmul.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <2 x double> @f13(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f13: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f14(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f14: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <4 x double> @f15(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f15: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f16(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/fp-strict-avx512.ll b/llvm/test/CodeGen/X86/fp-strict-avx512.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-strict-avx512.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X64 + +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fadd.v8f64(<8 x double>, <8 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fsub.v8f64(<8 x double>, <8 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fsub.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fsub.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fmul.v8f64(<8 x double>, <8 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fmul.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fmul.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.fdiv.v8f64(<8 x double>, <8 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float>, <8 x float>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fdiv.v16f32(<16 x float>, <16 x float>, metadata, metadata) + +define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f2(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <4 x double> @f3(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f4(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <8 x double> @f5(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fadd.v8f64(<8 x double> %a, <8 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <16 x float> @f6(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %a, <16 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %ret +} + +define <2 x double> @f7(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f8(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <4 x double> @f9(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f9: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f10(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f10: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fsub.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <8 x double> @f11(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: f11: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fsub.v8f64(<8 x double> %a, <8 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <16 x float> @f12(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.fsub.v16f32(<16 x float> %a, <16 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %ret +} + +define <2 x double> @f13(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f13: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f14(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f14: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <4 x double> @f15(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f15: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f16(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fmul.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <8 x double> @f17(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: f17: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fmul.v8f64(<8 x double> %a, <8 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <16 x float> @f18(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: f18: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.fmul.v16f32(<16 x float> %a, <16 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %ret +} + +define <2 x double> @f19(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f19: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f20(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f20: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <4 x double> @f21(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: f21: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double> %a, <4 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %ret +} + +define <8 x float> @f22(<8 x float> %a, <8 x float> %b) #0 { +; CHECK-LABEL: f22: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float> %a, <8 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %ret +} + +define <8 x double> @f23(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: f23: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <8 x double> @llvm.experimental.constrained.fdiv.v8f64(<8 x double> %a, <8 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %ret +} + +define <16 x float> @f24(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: f24: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <16 x float> @llvm.experimental.constrained.fdiv.v16f32(<16 x float> %a, <16 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %ret +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/fp-strict-sse.ll b/llvm/test/CodeGen/X86/fp-strict-sse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-strict-sse.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,X64 + +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) + +define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: addpd %xmm1, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f2(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <2 x double> @f3(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: subpd %xmm1, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f4(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: subps %xmm1, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <2 x double> @f5(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: mulpd %xmm1, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f6(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: +; CHECK-NEXT: mulps %xmm1, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +define <2 x double> @f7(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: f7: +; CHECK: # %bb.0: +; CHECK-NEXT: divpd %xmm1, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> %a, <2 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %ret +} + +define <4 x float> @f8(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: f8: +; CHECK: # %bb.0: +; CHECK-NEXT: divps %xmm1, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} + %ret = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %a, <4 x float> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %ret +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -113,10 +113,10 @@ ; CHECK-LABEL: constrained_vector_fdiv_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1] -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] -; CHECK-NEXT: divpd %xmm2, %xmm0 ; CHECK-NEXT: movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0] ; CHECK-NEXT: divpd %xmm2, %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] +; CHECK-NEXT: divpd %xmm2, %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fdiv_v4f64: @@ -498,10 +498,10 @@ define <4 x double> @constrained_vector_fmul_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fmul_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [2.0E+0,3.0E+0] -; CHECK-NEXT: mulpd %xmm1, %xmm0 -; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0] +; CHECK-NEXT: mulpd %xmm0, %xmm1 +; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fmul_v4f64: @@ -544,19 +544,14 @@ define <2 x double> @constrained_vector_fadd_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: addsd %xmm0, %xmm1 -; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( @@ -603,24 +598,22 @@ define <3 x double> @constrained_vector_fadd_v3f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorpd %xmm2, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v3f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm2 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -636,28 +629,16 @@ define <4 x double> @constrained_vector_fadd_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1] +; CHECK-NEXT: addpd %xmm0, %xmm1 +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v4f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] +; AVX-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: retq entry: %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64( @@ -694,19 +675,14 @@ define <2 x double> @constrained_vector_fsub_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fsub_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( @@ -756,12 +732,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd %xmm0, %xmm2 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; @@ -770,9 +746,8 @@ ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm1, %xmm2 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -788,28 +763,16 @@ define <4 x double> @constrained_vector_fsub_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fsub_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm2 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1 +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v4f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: retq entry: %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(