Index: lib/Target/X86/X86InstrFormats.td =================================================================== --- lib/Target/X86/X86InstrFormats.td +++ lib/Target/X86/X86InstrFormats.td @@ -88,9 +88,9 @@ bits<2> Value = val; } def GenericDomain : Domain<0>; -def SSEPackedSingle : Domain<1>; -def SSEPackedDouble : Domain<2>; -def SSEPackedInt : Domain<3>; +def SSEPackedInt : Domain<1>; +def SSEPackedSingle : Domain<2>; +def SSEPackedDouble : Domain<3>; // Class specifying the vector form of the decompressed // displacement of 8-bit. Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -5717,43 +5717,43 @@ // that we don't include here. We don't want to replace instructions selected // by intrinsics. static const uint16_t ReplaceableInstrs[][3] = { - //PackedSingle PackedDouble PackedInt - { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, - { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, - { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, - { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, - { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, - { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, - { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, - { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, - { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, - { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, - { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, - { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, - { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, - { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, + //PackedInt PackedSingle PackedDouble + { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr }, + { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm }, + { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr }, + { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr }, + { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm }, + { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr }, + { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm }, + { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr }, + { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm }, + { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr }, + { X86::PORrm, X86::ORPSrm, X86::ORPDrm }, + { X86::PORrr, X86::ORPSrr, X86::ORPDrr }, + { X86::PXORrm, X86::XORPSrm, X86::XORPDrm }, + { X86::PXORrr, X86::XORPSrr, X86::XORPDrr }, // AVX 128-bit support - { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, - { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, - { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, - { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, - { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, - { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, - { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, - { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, - { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, - { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, - { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, - { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, - { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, - { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, + { X86::VMOVDQAmr, X86::VMOVAPSmr, X86::VMOVAPDmr }, + { X86::VMOVDQArm, X86::VMOVAPSrm, X86::VMOVAPDrm }, + { X86::VMOVDQArr, X86::VMOVAPSrr, X86::VMOVAPDrr }, + { X86::VMOVDQUmr, X86::VMOVUPSmr, X86::VMOVUPDmr }, + { X86::VMOVDQUrm, X86::VMOVUPSrm, X86::VMOVUPDrm }, + { X86::VMOVNTDQmr, X86::VMOVNTPSmr, X86::VMOVNTPDmr }, + { X86::VPANDNrm, X86::VANDNPSrm, X86::VANDNPDrm }, + { X86::VPANDNrr, X86::VANDNPSrr, X86::VANDNPDrr }, + { X86::VPANDrm, X86::VANDPSrm, X86::VANDPDrm }, + { X86::VPANDrr, X86::VANDPSrr, X86::VANDPDrr }, + { X86::VPORrm, X86::VORPSrm, X86::VORPDrm }, + { X86::VPORrr, X86::VORPSrr, X86::VORPDrr }, + { X86::VPXORrm, X86::VXORPSrm, X86::VXORPDrm }, + { X86::VPXORrr, X86::VXORPSrr, X86::VXORPDrr }, // AVX 256-bit support - { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, - { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, - { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, - { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, - { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, - { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr } + { X86::VMOVDQAYmr, X86::VMOVAPSYmr, X86::VMOVAPDYmr }, + { X86::VMOVDQAYrm, X86::VMOVAPSYrm, X86::VMOVAPDYrm }, + { X86::VMOVDQAYrr, X86::VMOVAPSYrr, X86::VMOVAPDYrr }, + { X86::VMOVDQUYmr, X86::VMOVUPSYmr, X86::VMOVUPDYmr }, + { X86::VMOVDQUYrm, X86::VMOVUPSYrm, X86::VMOVUPDYrm }, + { X86::VMOVNTDQYmr, X86::VMOVNTPSYmr, X86::VMOVNTPDYmr } }; static const uint16_t ReplaceableInstrsAVX2[][3] = { @@ -5805,7 +5805,7 @@ if (domain && lookup(MI->getOpcode(), domain)) validDomains = 0xe; else if (domain && lookupAVX2(MI->getOpcode(), domain)) - validDomains = hasAVX2 ? 0xe : 0x6; + validDomains = hasAVX2 ? 0xe : 0xc; return std::make_pair(domain, validDomains); } @@ -5815,7 +5815,7 @@ assert(dom && "Not an SSE instruction"); const uint16_t *table = lookup(MI->getOpcode(), dom); if (!table) { // try the other table - assert((Subtarget.hasAVX2() || Domain < 3) && + assert((Subtarget.hasAVX2() || Domain > 1) && "256-bit vector operations only available in AVX2"); table = lookupAVX2(MI->getOpcode(), dom); }