Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -767,6 +767,7 @@ def NoDQI : Predicate<"!Subtarget->hasDQI()">; def HasBWI : Predicate<"Subtarget->hasBWI()">, AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">; +def NoBWI : Predicate<"!Subtarget->hasBWI()">; def HasVLX : Predicate<"Subtarget->hasVLX()">, AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">; def NoVLX : Predicate<"!Subtarget->hasVLX()">; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -2845,8 +2845,8 @@ multiclass PDI_binop_all opc, string OpcodeStr, SDNode Opcode, ValueType OpVT128, ValueType OpVT256, - OpndItins itins, bit IsCommutable = 0> { -let Predicates = [HasAVX, NoVLX] in + OpndItins itins, bit IsCommutable = 0, Predicate prd> { +let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rm, VEX_4V; @@ -2854,7 +2854,7 @@ defm NAME : PDI_binop_rm; -let Predicates = [HasAVX2, NoVLX] in +let Predicates = [HasAVX2, prd] in defm V#NAME#Y : PDI_binop_rm, VEX_4V, VEX_L; @@ -2863,13 +2863,13 @@ // These are ordered here for pattern ordering requirements with the fp versions defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1>; + SSE_VEC_BIT_ITINS_P, 1, NoVLX>; defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1>; + SSE_VEC_BIT_ITINS_P, 1, NoVLX>; defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1>; + SSE_VEC_BIT_ITINS_P, 1, NoVLX>; defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 0>; + SSE_VEC_BIT_ITINS_P, 0, NoVLX>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions @@ -4010,39 +4010,39 @@ } // ExeDomain = SSEPackedInt defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoBWI>; defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoBWI>; defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX>; defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, - SSE_INTALUQ_ITINS_P, 1>; + SSE_INTALUQ_ITINS_P, 1, NoVLX>; defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, - SSE_INTMUL_ITINS_P, 1>; + SSE_INTMUL_ITINS_P, 1, NoBWI>; defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, - SSE_INTMUL_ITINS_P, 1>; + SSE_INTMUL_ITINS_P, 1, NoBWI>; defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, - SSE_INTMUL_ITINS_P, 1>; + SSE_INTMUL_ITINS_P, 1, NoBWI>; defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoBWI>; defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoBWI>; defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX>; defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, - SSE_INTALUQ_ITINS_P, 0>; + SSE_INTALUQ_ITINS_P, 0, NoVLX>; defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoBWI>; defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoBWI>; defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoBWI>; defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoBWI>; defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoBWI>; defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoBWI>; // Intrinsic forms defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, @@ -4239,17 +4239,17 @@ //===---------------------------------------------------------------------===// defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoBWI>; defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoBWI>; defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX>; defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoBWI>; defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoBWI>; defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Shuffle Instructions Index: test/CodeGen/X86/avx-isa-check.ll =================================================================== --- test/CodeGen/X86/avx-isa-check.ll +++ test/CodeGen/X86/avx-isa-check.ll @@ -0,0 +1,215 @@ +; check AVX2 instructions that are disabled in case avx512VL/avx512BW present + +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null + +define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { + ; Force the execution domain with an add. + %a2 = add <4 x i64> %a, + %x = and <4 x i64> %a2, %b + ret <4 x i64> %x +} + +define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { + ; Force the execution domain with an add. + %a2 = add <2 x i64> %a, + %x = and <2 x i64> %a2, %b + ret <2 x i64> %x +} + +define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { + ; Force the execution domain with an add. + %a2 = add <4 x i64> %a, + %y = xor <4 x i64> %a2, + %x = and <4 x i64> %a, %y + ret <4 x i64> %x +} + +define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { + ; Force the execution domain with an add. + %a2 = add <2 x i64> %a, + %y = xor <2 x i64> %a2, + %x = and <2 x i64> %a, %y + ret <2 x i64> %x +} + +define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { + ; Force the execution domain with an add. + %a2 = add <4 x i64> %a, + %x = or <4 x i64> %a2, %b + ret <4 x i64> %x +} + +define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { + ; Force the execution domain with an add. + %a2 = add <4 x i64> %a, + %x = xor <4 x i64> %a2, %b + ret <4 x i64> %x +} + +define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { + ; Force the execution domain with an add. + %a2 = add <2 x i64> %a, + %x = or <2 x i64> %a2, %b + ret <2 x i64> %x +} + +define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { + ; Force the execution domain with an add. + %a2 = add <2 x i64> %a, + %x = xor <2 x i64> %a2, %b + ret <2 x i64> %x +} + +define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %x = add <4 x i64> %i, %j + ret <4 x i64> %x +} + +define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %x = add <8 x i32> %i, %j + ret <8 x i32> %x +} + +define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %x = add <16 x i16> %i, %j + ret <16 x i16> %x +} + +define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %x = add <32 x i8> %i, %j + ret <32 x i8> %x +} + +define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %x = sub <4 x i64> %i, %j + ret <4 x i64> %x +} + +define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %x = sub <8 x i32> %i, %j + ret <8 x i32> %x +} + +define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %x = sub <16 x i16> %i, %j + ret <16 x i16> %x +} + +define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %x = sub <32 x i8> %i, %j + ret <32 x i8> %x +} + +define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %x = mul <16 x i16> %i, %j + ret <16 x i16> %x +} + +define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %bincmp = icmp slt <8 x i32> %i, %j + %x = sext <8 x i1> %bincmp to <8 x i32> + ret <8 x i32> %x +} + +define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %bincmp = icmp eq <32 x i8> %i, %j + %x = sext <32 x i1> %bincmp to <32 x i8> + ret <32 x i8> %x +} + +define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %bincmp = icmp eq <16 x i16> %i, %j + %x = sext <16 x i1> %bincmp to <16 x i16> + ret <16 x i16> %x +} + +define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %bincmp = icmp slt <32 x i8> %i, %j + %x = sext <32 x i1> %bincmp to <32 x i8> + ret <32 x i8> %x +} + +define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %bincmp = icmp slt <16 x i16> %i, %j + %x = sext <16 x i1> %bincmp to <16 x i16> + ret <16 x i16> %x +} + +define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %bincmp = icmp eq <8 x i32> %i, %j + %x = sext <8 x i1> %bincmp to <8 x i32> + ret <8 x i32> %x +} + +define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone { + %x = add <2 x i64> %i, %j + ret <2 x i64> %x +} + +define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone { + %x = add <4 x i32> %i, %j + ret <4 x i32> %x +} + +define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { + %x = add <8 x i16> %i, %j + ret <8 x i16> %x +} + +define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { + %x = add <16 x i8> %i, %j + ret <16 x i8> %x +} + +define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone { + %x = sub <2 x i64> %i, %j + ret <2 x i64> %x +} + +define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone { + %x = sub <4 x i32> %i, %j + ret <4 x i32> %x +} + +define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { + %x = sub <8 x i16> %i, %j + ret <8 x i16> %x +} + +define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { + %x = sub <16 x i8> %i, %j + ret <16 x i8> %x +} + +define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { + %x = mul <8 x i16> %i, %j + ret <8 x i16> %x +} + +define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { + %bincmp = icmp slt <8 x i16> %i, %j + %x = sext <8 x i1> %bincmp to <8 x i16> + ret <8 x i16> %x +} + +define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { + %bincmp = icmp slt <16 x i8> %i, %j + %x = sext <16 x i1> %bincmp to <16 x i8> + ret <16 x i8> %x +} + +define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { + %bincmp = icmp eq <8 x i16> %i, %j + %x = sext <8 x i1> %bincmp to <8 x i16> + ret <8 x i16> %x +} + +define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { + %bincmp = icmp eq <16 x i8> %i, %j + %x = sext <16 x i1> %bincmp to <16 x i8> + ret <16 x i8> %x +}