Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -327,6 +327,12 @@ : SubtargetFeature<"prefer-avx256", "PreferAVX256", "true", "Prefer 256-bit AVX instructions">; +// This feature is used in combination with prefer-avx256 to disable 512-bit +// instructions in the legalizer. +def FeatureNo512BitVectors + : SubtargetFeature<"no-512-bit-vectors", "No512BitVectors", "true", + "No 512-bit vectors present in function">; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1137,12 +1137,8 @@ } } - if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { - addRegisterClass(MVT::v16i32, &X86::VR512RegClass); - addRegisterClass(MVT::v16f32, &X86::VR512RegClass); - addRegisterClass(MVT::v8i64, &X86::VR512RegClass); - addRegisterClass(MVT::v8f64, &X86::VR512RegClass); - + if (!Subtarget.useSoftFloat() && + (Subtarget.use512BitOps() || Subtarget.hasVLX())) { addRegisterClass(MVT::v1i1, &X86::VK1RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); @@ -1189,6 +1185,13 @@ for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1, MVT::v32i1, MVT::v64i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + } + + if (!Subtarget.useSoftFloat() && Subtarget.use512BitOps()) { + addRegisterClass(MVT::v16i32, &X86::VR512RegClass); + addRegisterClass(MVT::v16f32, &X86::VR512RegClass); + addRegisterClass(MVT::v8i64, &X86::VR512RegClass); + addRegisterClass(MVT::v8f64, &X86::VR512RegClass); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); @@ -1354,7 +1357,7 @@ }// has AVX-512 if (!Subtarget.useSoftFloat() && - (Subtarget.hasAVX512() || Subtarget.hasVLX())) { + (Subtarget.use512BitOps() || Subtarget.hasVLX())) { // These operations are handled on non-VLX by artificially widening in // isel patterns. // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? @@ -1406,14 +1409,11 @@ } } - if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { - addRegisterClass(MVT::v32i16, &X86::VR512RegClass); - addRegisterClass(MVT::v64i8, &X86::VR512RegClass); - + if (!Subtarget.useSoftFloat() && Subtarget.hasBWI() && + (Subtarget.use512BitOps() || Subtarget.hasVLX())) { addRegisterClass(MVT::v32i1, &X86::VK32RegClass); - addRegisterClass(MVT::v64i1, &X86::VK64RegClass); - for (auto VT : { MVT::v32i1, MVT::v64i1 }) { + for (auto VT : { MVT::v32i1 }) { setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); @@ -1429,14 +1429,39 @@ } setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); // Extends from v32i1 masks to 256-bit vectors. setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); + } + + if (!Subtarget.useSoftFloat() && Subtarget.hasBWI() && + Subtarget.use512BitOps()) { + addRegisterClass(MVT::v32i16, &X86::VR512RegClass); + addRegisterClass(MVT::v64i8, &X86::VR512RegClass); + + addRegisterClass(MVT::v64i1, &X86::VK64RegClass); + + for (auto VT : { MVT::v64i1 }) { + setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::SUB, VT, Custom); + setOperationAction(ISD::MUL, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Expand); + + setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); + // Extends from v64i1 masks to 512-bit vectors. setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); @@ -1503,7 +1528,7 @@ } if (!Subtarget.useSoftFloat() && Subtarget.hasBWI() && - (Subtarget.hasAVX512() || Subtarget.hasVLX())) { + (Subtarget.use512BitOps() || Subtarget.hasVLX())) { for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -351,6 +351,9 @@ /// Prefer 256-bit AVX instructions over 512-bit instructions. bool PreferAVX256; + /// Indicates there are no 512-bit vectors present in the function. + bool No512BitVectors; + /// What processor and OS we're targeting. Triple TargetTriple; @@ -566,6 +569,12 @@ bool preferAVX256() const { return PreferAVX256; } + // If there are no 512-bit vectors and we prefer not to use 512-bit registers, + // disable them in the legalizer. + bool use512BitOps() const { + return hasAVX512() && !(PreferAVX256 && No512BitVectors); + } + bool isXRaySupported() const override { return is64Bit(); } X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -366,6 +366,7 @@ GatherOverhead = 1024; ScatterOverhead = 1024; PreferAVX256 = false; + No512BitVectors = false; } X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,