Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -3362,6 +3362,42 @@ defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>; defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>; +// Also match the pre-AVX512 intrinsics. +// FIXME: Also support memory folding. +multiclass avx512_sse_scalar_ss_intrin_patterns { + let Predicates = [HasAVX512] in { + def : Pat<(IntNode VR128X:$src1, VR128X:$src2), + (COPY_TO_REGCLASS (SSZIrr (COPY_TO_REGCLASS VR128X:$src1, FR32), + (COPY_TO_REGCLASS VR128X:$src2, FR32)), + VR128X)>; + } +} + +multiclass avx512_sse_scalar_sd_intrin_patterns { + let Predicates = [HasAVX512] in { + def : Pat<(IntNode VR128X:$src1, VR128X:$src2), + (COPY_TO_REGCLASS (SDZIrr (COPY_TO_REGCLASS VR128X:$src1, FR64), + (COPY_TO_REGCLASS VR128X:$src2, FR64)), + VR128X)>; + } +} + +multiclass avx512_sse_scalar_intrin_patterns { + defm : avx512_sse_scalar_ss_intrin_patterns< + !cast("int_x86_sse_"#IntStr#"_ss"), + !cast("V"#OpcodeStr#"SSZrr")>; + defm : avx512_sse_scalar_sd_intrin_patterns< + !cast("int_x86_sse2_"#IntStr#"_sd"), + !cast("V"#OpcodeStr#"SDZrr")>; +} + +defm : avx512_sse_scalar_intrin_patterns<"ADD", "add">; +defm : avx512_sse_scalar_intrin_patterns<"MUL", "mul">; +defm : avx512_sse_scalar_intrin_patterns<"SUB", "sub">; +defm : avx512_sse_scalar_intrin_patterns<"DIV", "div">; +defm : avx512_sse_scalar_intrin_patterns<"MIN", "min">; +defm : avx512_sse_scalar_intrin_patterns<"MAX", "max">; + multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { defm rr: AVX512_maskable @test_maxss(<4 x float> %a0, <4 x float> %a1) #0 { + %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %res +} + +; CHECK-LABEL: test_maxsd: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x5f,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1) #0 { + %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %res +} + +; CHECK-LABEL: test_maxps: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1) #0 { + %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %res +} + +; CHECK-LABEL: test_maxpd: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { + %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) + +;====== VMIN ==================================================================; + +; CHECK-LABEL: test_minss: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x5d,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1) #0 { + %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %res +} + +; CHECK-LABEL: test_minsd: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x5d,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1) #0 { + %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %res +} + +; CHECK-LABEL: test_minps: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1) #0 { + %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %res +} + +; CHECK-LABEL: test_minpd: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1) #0 { + %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) + +;====== VADD ==================================================================; + +; CHECK-LABEL: test_addss: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x58,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <4 x float> @test_addss(<4 x float> %a0, <4 x float> %a1) #0 { + %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %res +} + +; CHECK-LABEL: test_addsd: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x58,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <2 x double> @test_addsd(<2 x double> %a0, <2 x double> %a1) #0 { + %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) + +;====== VSUB ==================================================================; + +; CHECK-LABEL: test_subss: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x5c,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <4 x float> @test_subss(<4 x float> %a0, <4 x float> %a1) #0 { + %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %res +} + +; CHECK-LABEL: test_subsd: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x5c,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <2 x double> @test_subsd(<2 x double> %a0, <2 x double> %a1) #0 { + %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) + +;====== VMUL ==================================================================; + +; CHECK-LABEL: test_mulss: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x59,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <4 x float> @test_mulss(<4 x float> %a0, <4 x float> %a1) #0 { + %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %res +} + +; CHECK-LABEL: test_mulsd: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x59,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <2 x double> @test_mulsd(<2 x double> %a0, <2 x double> %a1) #0 { + %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) + +;====== VDIV ==================================================================; + +; CHECK-LABEL: test_divss: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x5e,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <4 x float> @test_divss(<4 x float> %a0, <4 x float> %a1) #0 { + %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %res +} + +; CHECK-LABEL: test_divsd: +; CHECK-NEXT: BB#0: +; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x5e,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +define <2 x double> @test_divsd(<2 x double> %a0, <2 x double> %a1) #0 { + %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) +declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) + +attributes #0 = { nounwind }