Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -7085,14 +7085,14 @@ SS42FI, - Sched<[WriteFAdd]>; + Sched<[WriteCRC32]>; class SS42I_crc32m opc, string asm, RegisterClass RCOut, X86MemOperand x86memop, SDPatternOperator Int> : SS42FI, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_CRC32_MEM>, Sched<[WriteCRC32Ld, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -105,6 +105,7 @@ defm : BWWriteResPair; // Simple integer ALU op. defm : BWWriteResPair; // Integer multiplication. defm : BWWriteResPair; +defm : BWWriteResPair; def : WriteRes { let Latency = 3; } // Integer multiplication, high part. def : WriteRes; // LEA instructions can't fold loads. Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -114,6 +114,7 @@ def : WriteRes { let Latency = 3; } defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -105,6 +105,7 @@ defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -859,8 +860,6 @@ def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>; def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>; def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "CRC32r(16|32|64)r8")>; -def: InstRW<[SBWriteResGroup21], (instregex "CRC32r(16|32|64)r64")>; def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>; def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>; def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>; @@ -1859,8 +1858,6 @@ } def: InstRW<[SBWriteResGroup72], (instregex "BSF(16|32|64)rm")>; def: InstRW<[SBWriteResGroup72], (instregex "BSR(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m64")>; -def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m8")>; def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>; def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>; def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>; Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -113,6 +113,7 @@ let ResourceCycles = [1, 1, 10]; } +defm : SKLWriteResPair; def : WriteRes; // LEA instructions can't fold loads. // Integer shifts and rotates. Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -113,6 +113,7 @@ let ResourceCycles = [1, 1, 10]; } +defm : SKXWriteResPair; def : WriteRes; // LEA instructions can't fold loads. // Integer shifts and rotates. Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -105,6 +105,9 @@ defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. +// CRC32 instruction. +defm WriteCRC32 : X86SchedWritePair; + // Strings instructions. // Packed Compare Implicit Length Strings, Return Mask defm WritePCmpIStrM : X86SchedWritePair; Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -124,6 +124,7 @@ defm : JWriteResIntPair; defm : JWriteResIntPair; // i8/i16/i32 multiplication defm : JWriteResIntPair; // Worst case (i64 division) +defm : JWriteResIntPair; def : WriteRes { let Latency = 6; @@ -206,22 +207,6 @@ def : InstRW<[JWriteIDiv32], (instrs DIV32r, IDIV32r)>; def : InstRW<[JWriteIDiv32Ld], (instrs DIV32m, IDIV32m)>; -def JWriteCRC32 : SchedWriteRes<[JALU01]> { - let Latency = 3; - let ResourceCycles = [4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteCRC32], (instrs CRC32r32r8, CRC32r32r16, CRC32r32r32, - CRC32r64r8, CRC32r64r64)>; - -def JWriteCRC32Ld : SchedWriteRes<[JLAGU, JALU01]> { - let Latency = 6; - let ResourceCycles = [1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteCRC32Ld], (instrs CRC32r32m8, CRC32r32m16, CRC32r32m32, - CRC32r64m8, CRC32r64m64)>; - //////////////////////////////////////////////////////////////////////////////// // Integer shifts and rotates. //////////////////////////////////////////////////////////////////////////////// Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -89,7 +89,8 @@ defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; -defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -151,6 +151,7 @@ defm : ZnWriteResPair; defm : ZnWriteResPair; defm : ZnWriteResPair; +defm : ZnWriteResFpuPair; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; Index: test/CodeGen/X86/sse42-schedule.ll =================================================================== --- test/CodeGen/X86/sse42-schedule.ll +++ test/CodeGen/X86/sse42-schedule.ll @@ -14,7 +14,7 @@ ; GENERIC-LABEL: crc32_32_8: ; GENERIC: # %bb.0: ; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -28,7 +28,7 @@ ; SANDY-LABEL: crc32_32_8: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -224,7 +224,7 @@ ; GENERIC-LABEL: crc32_64_8: ; GENERIC: # %bb.0: ; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -238,7 +238,7 @@ ; SANDY-LABEL: crc32_64_8: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -294,7 +294,7 @@ ; GENERIC-LABEL: crc32_64_64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] +; GENERIC-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -308,7 +308,7 @@ ; SANDY-LABEL: crc32_64_64: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] +; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ;