Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -771,6 +771,10 @@ unsigned EqOpc; switch (Op) { default: llvm_unreachable("Unexpected FP pseudo-opcode"); + case ISD::STRICT_FADD: EqOpc = ISD::FADD; break; + case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break; + case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break; + case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break; case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1090,6 +1090,10 @@ return; } break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -293,6 +293,10 @@ switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: @@ -725,9 +729,14 @@ return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: return ExpandCTTZ_ZERO_UNDEF(Op); + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FEXP: @@ -1134,8 +1143,8 @@ SDValue Chain = Op.getOperand(0); SDLoc dl(Op); - SmallVector OpValues; - SmallVector OpChains; + SmallVector OpValues; + SmallVector OpChains; for (unsigned i = 0; i < NumElems; ++i) { SmallVector Opers; SDValue Idx = DAG.getConstant(i, dl, @@ -1146,8 +1155,15 @@ // Now process the remaining operands. for (unsigned j = 1; j < NumOpers; ++j) { - SDValue Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - EltVT, Op.getOperand(j), Idx); + EVT OpVT = Op.getOperand(j).getValueType(); + SDValue Oper; + + if (OpVT.isVector()) + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Op.getOperand(j), Idx); + else + Oper = Op.getOperand(j); + Opers.push_back(Oper); } Index: test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/fp-intrinsics.ll +++ test/CodeGen/X86/fp-intrinsics.ll @@ -33,12 +33,12 @@ ; COMMON: subsd define double @f2(double %a) { entry: - %div = call double @llvm.experimental.constrained.fsub.f64( + %sub = call double @llvm.experimental.constrained.fsub.f64( double %a, double 0.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") - ret double %div + ret double %sub } ; Verify that '-((-a)*b)' isn't simplified to 'a*b' when the rounding mode is Index: test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -1,12 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s +; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s +; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s define <2 x double> @constrained_vector_fdiv() { -; CHECK-LABEL: constrained_vector_fdiv: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00] -; CHECK-NEXT: divpd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_fdiv: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00] +; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fdiv: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00] +; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm0, %xmm0 +; HAS-FMA-NEXT: retq entry: %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( <2 x double> , @@ -17,11 +24,17 @@ } define <2 x double> @constrained_vector_fmul(<2 x double> %a) { -; CHECK-LABEL: constrained_vector_fmul: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] -; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_fmul: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] +; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm0 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fmul: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] +; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0 +; HAS-FMA-NEXT: retq entry: %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( <2 x double> , @@ -32,11 +45,17 @@ } define <2 x double> @constrained_vector_fadd() { -; CHECK-LABEL: constrained_vector_fadd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] -; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_fadd: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] +; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm0 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fadd: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] +; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 +; HAS-FMA-NEXT: retq entry: %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( <2 x double> , @@ -47,11 +66,17 @@ } define <2 x double> @constrained_vector_fsub() { -; CHECK-LABEL: constrained_vector_fsub: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308] -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_fsub: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308] +; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fsub: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308] +; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; HAS-FMA-NEXT: retq entry: %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( <2 x double> , @@ -61,11 +86,102 @@ ret <2 x double> %sub } +define <2 x double> @constrained_vector_fma_v2f64() { +; NO-FMA-LABEL: constrained_vector_fma_v2f64: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; NO-FMA-NEXT: callq fma +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; NO-FMA-NEXT: callq fma +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fma_v2f64: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.500000e+00,5.000000e-01] +; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [3.500000e+00,2.500000e+00] +; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem +; HAS-FMA-NEXT: retq +entry: + %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( + <2 x double> , + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %fma +} + +define <4 x float> @constrained_vector_fma_v4f32() { +; NO-FMA-LABEL: constrained_vector_fma_v4f32: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 48 +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fmaf +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fmaf +; NO-FMA-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fmaf +; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fmaf +; NO-FMA-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fma_v4f32: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01] +; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00] +; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem +; HAS-FMA-NEXT: retq +entry: + %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( + <4 x float> , + <4 x float> , + <4 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %fma +} + define <2 x double> @constrained_vector_sqrt() { -; CHECK-LABEL: constrained_vector_sqrt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_sqrt: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_sqrt: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vsqrtpd {{.*}}(%rip), %xmm0 +; HAS-FMA-NEXT: retq entry: %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( <2 x double> , @@ -75,22 +191,39 @@ } define <2 x double> @constrained_vector_pow() { -; CHECK-LABEL: constrained_vector_pow: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: callq pow -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: callq pow -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_pow: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq pow +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq pow +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_pow: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq pow +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq pow +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq entry: %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64( <2 x double> , @@ -100,21 +233,79 @@ ret <2 x double> %pow } +define <2 x double> @constrained_vector_powi() { +; NO-FMA-LABEL: constrained_vector_powi: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movl $3, %edi +; NO-FMA-NEXT: callq __powidf2 +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movl $3, %edi +; NO-FMA-NEXT: callq __powidf2 +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_powi: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: movl $3, %edi +; HAS-FMA-NEXT: callq __powidf2 +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: movl $3, %edi +; HAS-FMA-NEXT: callq __powidf2 +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64( + <2 x double> , + i32 3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %powi +} + define <2 x double> @constrained_vector_sin() { -; CHECK-LABEL: constrained_vector_sin: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq sin -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq sin -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_sin: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq sin +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq sin +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_sin: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq sin +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq sin +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq entry: %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64( <2 x double> , @@ -124,20 +315,35 @@ } define <2 x double> @constrained_vector_cos() { -; CHECK-LABEL: constrained_vector_cos: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq cos -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq cos -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_cos: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq cos +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq cos +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_cos: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq cos +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq cos +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq entry: %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64( <2 x double> , @@ -147,20 +353,35 @@ } define <2 x double> @constrained_vector_exp() { -; CHECK-LABEL: constrained_vector_exp: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq exp -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq exp -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_exp: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq exp +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq exp +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_exp: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq exp +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq exp +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq entry: %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64( <2 x double> , @@ -170,20 +391,35 @@ } define <2 x double> @constrained_vector_exp2() { -; CHECK-LABEL: constrained_vector_exp2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq exp2 -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq exp2 -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_exp2: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq exp2 +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq exp2 +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_exp2: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq exp2 +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq exp2 +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq entry: %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64( <2 x double> , @@ -193,20 +429,35 @@ } define <2 x double> @constrained_vector_log() { -; CHECK-LABEL: constrained_vector_log: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq log -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq log -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_log: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq log +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq log +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_log: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq log +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq log +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq entry: %log = call <2 x double> @llvm.experimental.constrained.log.v2f64( <2 x double> , @@ -216,20 +467,35 @@ } define <2 x double> @constrained_vector_log10() { -; CHECK-LABEL: constrained_vector_log10: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq log10 -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq log10 -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_log10: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq log10 +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq log10 +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_log10: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq log10 +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq log10 +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq entry: %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64( <2 x double> , @@ -239,20 +505,35 @@ } define <2 x double> @constrained_vector_log2() { -; CHECK-LABEL: constrained_vector_log2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq log2 -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq log2 -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_log2: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq log2 +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq log2 +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_log2: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq log2 +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: callq log2 +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq entry: %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64( <2 x double> , @@ -262,20 +543,25 @@ } define <2 x double> @constrained_vector_rint() { -; CHECK-LABEL: constrained_vector_rint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq rint -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq rint -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_rint: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq rint +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq rint +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_rint: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vroundpd $4, {{.*}}(%rip), %xmm0 +; HAS-FMA-NEXT: retq entry: %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( <2 x double> , @@ -285,20 +571,25 @@ } define <2 x double> @constrained_vector_nearbyint() { -; CHECK-LABEL: constrained_vector_nearbyint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq nearbyint -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: callq nearbyint -; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; NO-FMA-LABEL: constrained_vector_nearbyint: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq nearbyint +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: callq nearbyint +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_nearbyint: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vroundpd $12, {{.*}}(%rip), %xmm0 +; HAS-FMA-NEXT: retq entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( <2 x double> , @@ -312,9 +603,11 @@ declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) -declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)