Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -31957,6 +31957,7 @@ case 'u': case 'y': case 'x': + case 'v': case 'Y': case 'l': return C_RegisterClass; @@ -32026,6 +32027,10 @@ if (type->isX86_MMXTy() && Subtarget.hasMMX()) weight = CW_SpecificReg; break; + case 'v': + if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) + weight = CW_Register; + LLVM_FALLTHROUGH; case 'x': case 'Y': if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || @@ -32362,17 +32367,23 @@ case 'Y': // SSE_REGS if SSE2 allowed if (!Subtarget.hasSSE2()) break; LLVM_FALLTHROUGH; + case 'v': case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed if (!Subtarget.hasSSE1()) break; + bool VConstraint = (Constraint[0] == 'v'); switch (VT.SimpleTy) { default: break; // Scalar SSE types. case MVT::f32: case MVT::i32: + if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX()) + return std::make_pair(0U, &X86::FR32XRegClass); return std::make_pair(0U, &X86::FR32RegClass); case MVT::f64: case MVT::i64: + if (VConstraint && Subtarget.hasVLX()) + return std::make_pair(0U, &X86::FR64XRegClass); return std::make_pair(0U, &X86::FR64RegClass); // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. // Vector types. @@ -32382,6 +32393,8 @@ case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: + if (VConstraint && Subtarget.hasVLX()) + return std::make_pair(0U, &X86::VR128XRegClass); return std::make_pair(0U, &X86::VR128RegClass); // AVX types. case MVT::v32i8: @@ -32390,6 +32403,8 @@ case MVT::v4i64: case MVT::v8f32: case MVT::v4f64: + if (VConstraint && Subtarget.hasVLX()) + return std::make_pair(0U, &X86::VR256XRegClass); return std::make_pair(0U, &X86::VR256RegClass); case MVT::v8f64: case MVT::v16f32: Index: test/CodeGen/X86/inline-asm-avx-v-constraint-32bit.ll =================================================================== --- test/CodeGen/X86/inline-asm-avx-v-constraint-32bit.ll +++ test/CodeGen/X86/inline-asm-avx-v-constraint-32bit.ll @@ -0,0 +1,136 @@ +; RUN: not llc < %s -mtriple i386-unknown-linux-gnu -mattr +avx 1> /dev/null 2> %t +; RUN: FileCheck %s --input-file %t + +define <4 x float> @testXMM_1(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_2(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "movapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_3(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_4(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vmpsadbw $$0, $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_5(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, i32 %_l) + ret <4 x float> %0 +} + +define i32 @testXMM_6(i32 returned %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + tail call void asm sideeffect "vmovd $0, %eax", "v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l) + ret i32 %_l +} + +define <4 x float> @testXMM_7(<4 x float> returned %_xmm0) { +; CHECK: error: inline assembly requires more registers than available +entry: + tail call void asm sideeffect "vmovmskps $0, %eax", "v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0) + ret <4 x float> %_xmm0 +} + +define i32 @testXMM_8(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call i32 asm "vmulsd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0) + ret i32 %0 +} + +define <4 x float> @testXMM_9(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_10(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "pabsb $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_11(<4 x float> %_xmm0, i32 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vpabsd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l) + ret <4 x float> %0 +} + +define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmulps $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmulpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + Index: test/CodeGen/X86/inline-asm-avx-v-constraint.ll =================================================================== --- test/CodeGen/X86/inline-asm-avx-v-constraint.ll +++ test/CodeGen/X86/inline-asm-avx-v-constraint.ll @@ -0,0 +1,136 @@ +; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx | FileCheck %s +; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx512f | FileCheck %s + +define <4 x float> @testXMM_1(<4 x float> %_xmm0, i64 %_l) { +; CHECK: vmovhlps %xmm1, %xmm0, %xmm0 +entry: + %0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_2(<4 x float> %_xmm0, i64 %_l) { +; CHECK: movapd %xmm0, %xmm0 +entry: + %0 = tail call <4 x float> asm "movapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_3(<4 x float> %_xmm0, i64 %_l) { +; CHECK: vmovapd %xmm0, %xmm0 +entry: + %0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_4(<4 x float> %_xmm0, i64 %_l) { +; CHECK: vmpsadbw $0, %xmm1, %xmm0, %xmm0 +entry: + %0 = tail call <4 x float> asm "vmpsadbw $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_5(<4 x float> %_xmm0, i64 %_l) { +; CHECK: vminpd %xmm0, %xmm0, %xmm0 +entry: + %0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, i64 %_l) + ret <4 x float> %0 +} + +define i64 @testXMM_6(i64 returned %_l) { +; CHECK: vmovd %xmm0, %eax +entry: + tail call void asm sideeffect "vmovd $0, %eax", "v,~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret i64 %_l +} + +define <4 x float> @testXMM_7(<4 x float> returned %_xmm0) { +; CHECK: vmovmskps %xmm0, %eax +entry: + tail call void asm sideeffect "vmovmskps $0, %rax", "v,~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0) + ret <4 x float> %_xmm0 +} + +define i64 @testXMM_8(<4 x float> %_xmm0, i64 %_l) { +; CHECK: vmulsd %xmm1, %xmm0, %xmm0 +entry: + %0 = tail call i64 asm "vmulsd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0) + ret i64 %0 +} + +define <4 x float> @testXMM_9(<4 x float> %_xmm0, i64 %_l) { +; CHECK: vorpd %xmm1, %xmm0, %xmm0 +entry: + %0 = tail call <4 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_10(<4 x float> %_xmm0, i64 %_l) { +; CHECK: pabsb %xmm0, %xmm0 +entry: + %0 = tail call <4 x float> asm "pabsb $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_11(<4 x float> %_xmm0, i64 %_l) { +; CHECK: vpabsd %xmm0, %xmm0 +entry: + %0 = tail call <4 x float> asm "vpabsd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret <4 x float> %0 +} + +define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: vmovsldup %ymm0, %ymm0 +entry: + %0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: vmovapd %ymm1, %ymm0 +entry: + %0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: vminpd %ymm1, %ymm0, %ymm0 +entry: + %0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: vorpd %ymm1, %ymm0, %ymm0 +entry: + %0 = tail call <8 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: vmulps %ymm1, %ymm0, %ymm0 +entry: + %0 = tail call <8 x float> asm "vmulps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: vmulpd %ymm1, %ymm0, %ymm0 +entry: + %0 = tail call <8 x float> asm "vmulpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: vmovups %ymm1, %ymm0 +entry: + %0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: vmovupd %ymm1, %ymm0 +entry: + %0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + Index: test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll =================================================================== --- test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll +++ test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll @@ -0,0 +1,72 @@ +; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx512f | FileCheck %s + +define <16 x float> @testZMM_1(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vpternlogd $0, %zmm1, %zmm0, %zmm0 + %0 = tail call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm0) + ret <16 x float> %0 +} + +define <16 x float> @testZMM_2(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vpabsq %zmm1, %zmm0 + %0 = tail call <16 x float> asm "vpabsq $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1) + ret <16 x float> %0 +} + + +define <16 x float> @testZMM_3(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vpaddd %zmm1, %zmm1, %zmm0 + %0 = tail call <16 x float> asm "vpaddd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1) + ret <16 x float> %0 +} + + +define <16 x float> @testZMM_4(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vpaddq %zmm1, %zmm1, %zmm0 + %0 = tail call <16 x float> asm "vpaddq $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1) + ret <16 x float> %0 +} + + +define <16 x float> @testZMM_5(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vpandd %zmm1, %zmm1, %zmm0 + %0 = tail call <16 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1) + ret <16 x float> %0 +} + + +define <16 x float> @testZMM_6(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vpandnd %zmm1, %zmm1, %zmm0 + %0 = tail call <16 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1) + ret <16 x float> %0 +} + + +define <16 x float> @testZMM_7(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vpmaxsd %zmm1, %zmm1, %zmm0 + %0 = tail call <16 x float> asm "vpmaxsd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1) + ret <16 x float> %0 +} + + +define <16 x float> @testZMM_8(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vmovups %zmm1, %zmm0 + %0 = tail call <16 x float> asm "vmovups $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1) + ret <16 x float> %0 +} + + +define <16 x float> @testZMM_9(<16 x float> %_zmm0, <16 x float> %_zmm1) { +entry: +; CHECK: vmovupd %zmm1, %zmm0 + %0 = tail call <16 x float> asm "vmovupd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1) + ret <16 x float> %0 +} + Index: test/CodeGen/X86/inline-asm-avx512vl-v-constraint-32bit.ll =================================================================== --- test/CodeGen/X86/inline-asm-avx512vl-v-constraint-32bit.ll +++ test/CodeGen/X86/inline-asm-avx512vl-v-constraint-32bit.ll @@ -0,0 +1,138 @@ +; RUN: not llc < %s -mtriple i386-unknown-linux-gnu -mattr +avx512vl 1> /dev/null 2> %t +; RUN: FileCheck %s --input-file %t + +define <4 x float> @testXMM_1(<4 x float> %_xmm0, i64 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0) + ret <4 x float> %0 +} + + +define <4 x float> @testXMM_2(<4 x float> %_xmm0, i64 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret <4 x float> %0 +} + + +define <4 x float> @testXMM_3(<4 x float> %_xmm0, i64 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l, i64 %_l) + ret <4 x float> %0 +} + + +define i64 @testXMM_4(<4 x float> %_xmm0, i64 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call i64 asm "vmulsd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0) + ret i64 %0 +} + + +define <4 x float> @testXMM_5(<4 x float> %_xmm0, i64 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vpabsq $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret <4 x float> %0 +} + + +define <4 x float> @testXMM_6(<4 x float> %_xmm0, i64 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0, i64 %_l) + ret <4 x float> %0 +} + + +define <4 x float> @testXMM_7(<4 x float> %_xmm0, i64 %_l) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <4 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0, i64 %_l) + ret <4 x float> %0 +} + + +define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm1) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vpabsq $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_5(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vpminud $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vpmaxsd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_9(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + + +define <8 x float> @testYMM_10(<8 x float> %_ymm0, <8 x float> %_ymm1) { +; CHECK: error: inline assembly requires more registers than available +entry: + %0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + Index: test/CodeGen/X86/inline-asm-avx512vl-v-constraint.ll =================================================================== --- test/CodeGen/X86/inline-asm-avx512vl-v-constraint.ll +++ test/CodeGen/X86/inline-asm-avx512vl-v-constraint.ll @@ -0,0 +1,121 @@ +; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx512vl | FileCheck %s + +define <4 x float> @testXMM_1(<4 x float> %_xmm0, i64 %_l) { +entry: +; CHECK: vmovhlps %xmm17, %xmm16, %xmm16 + %0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_2(<4 x float> %_xmm0, i64 %_l) { +entry: +; CHECK: vmovapd %xmm16, %xmm16 + %0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_3(<4 x float> %_xmm0, i64 %_l) { +entry: +; CHECK: vminpd %xmm16, %xmm16, %xmm16 + %0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l, i64 %_l) + ret <4 x float> %0 +} + +define i64 @testXMM_4(<4 x float> %_xmm0, i64 %_l) { +entry: +; CHECK: vmulsd %xmm17, %xmm16, %xmm16 + %0 = tail call i64 asm "vmulsd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0) + ret i64 %0 +} + +define <4 x float> @testXMM_5(<4 x float> %_xmm0, i64 %_l) { +entry: +; CHECK: vpabsq %xmm16, %xmm16 + %0 = tail call <4 x float> asm "vpabsq $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_6(<4 x float> %_xmm0, i64 %_l) { +entry: +; CHECK: vpandd %xmm16, %xmm17, %xmm16 + %0 = tail call <4 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0, i64 %_l) + ret <4 x float> %0 +} + +define <4 x float> @testXMM_7(<4 x float> %_xmm0, i64 %_l) { +entry: +; CHECK: vpandnd %xmm16, %xmm17, %xmm16 + %0 = tail call <4 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0, i64 %_l) + ret <4 x float> %0 +} + +define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vmovsldup %ymm16, %ymm16 + %0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vmovapd %ymm16, %ymm16 + %0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vminpd %ymm16, %ymm16, %ymm16 + %0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vpabsq %ymm16, %ymm16 + %0 = tail call <8 x float> asm "vpabsq $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_5(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vpandd %ymm16, %ymm17, %ymm16 + %0 = tail call <8 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vpandnd %ymm16, %ymm17, %ymm16 + %0 = tail call <8 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vpminud %ymm16, %ymm17, %ymm16 + %0 = tail call <8 x float> asm "vpminud $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vpmaxsd %ymm16, %ymm17, %ymm16 + %0 = tail call <8 x float> asm "vpmaxsd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_9(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vmovups %ymm16, %ymm16 + %0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} + +define <8 x float> @testYMM_10(<8 x float> %_ymm0, <8 x float> %_ymm1) { +entry: +; CHECK: vmovupd %ymm16, %ymm16 + %0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1) + ret <8 x float> %0 +} +