Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -7074,14 +7074,14 @@ SS42FI, - Sched<[WriteFAdd]>; + Sched<[WriteCRC32]>; class SS42I_crc32m opc, string asm, RegisterClass RCOut, X86MemOperand x86memop, SDPatternOperator Int> : SS42FI, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_CRC32_MEM>, Sched<[WriteCRC32Ld, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, Index: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td +++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td @@ -106,6 +106,7 @@ defm : BWWriteResPair; // Simple integer ALU op. defm : BWWriteResPair; // Integer multiplication. defm : BWWriteResPair; +defm : BWWriteResPair; def : WriteRes { let Latency = 3; } // Integer multiplication, high part. def : WriteRes; // LEA instructions can't fold loads. Index: llvm/trunk/lib/Target/X86/X86SchedHaswell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedHaswell.td +++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td @@ -115,6 +115,7 @@ def : WriteRes { let Latency = 3; } defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on Index: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td +++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td @@ -106,6 +106,7 @@ defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -678,8 +679,6 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0", "ADD_FST0r", "ADD_FrST0", - "CRC32r(16|32|64)r8", - "CRC32r(16|32|64)r64", "MMX_CVTPI2PSirr", "MMX_CVTPS2PIirr", "MMX_CVTTPS2PIirr", @@ -1416,9 +1415,7 @@ let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m64", - "CRC32r(16|32|64)m8", - "FCOM32m", +def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m", "FCOM64m", "FCOMP32m", "FCOMP64m")>; Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td @@ -106,6 +106,7 @@ defm : SKLWriteResPair; // Simple integer ALU op. defm : SKLWriteResPair; // Integer multiplication. defm : SKLWriteResPair; // Integer division. +defm : SKLWriteResPair; def : WriteRes { let Latency = 3; } // Integer multiplication, high part. def : WriteRes; // LEA instructions can't fold loads. Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td @@ -106,6 +106,7 @@ defm : SKXWriteResPair; // Simple integer ALU op. defm : SKXWriteResPair; // Integer multiplication. defm : SKXWriteResPair; // Integer division. +defm : SKXWriteResPair; def : WriteRes { let Latency = 3; } // Integer multiplication, high part. def : WriteRes; // LEA instructions can't fold loads. Index: llvm/trunk/lib/Target/X86/X86Schedule.td =================================================================== --- llvm/trunk/lib/Target/X86/X86Schedule.td +++ llvm/trunk/lib/Target/X86/X86Schedule.td @@ -110,6 +110,9 @@ defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. +// CRC32 instruction. +defm WriteCRC32 : X86SchedWritePair; + // Strings instructions. // Packed Compare Implicit Length Strings, Return Mask defm WritePCmpIStrM : X86SchedWritePair; Index: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td +++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td @@ -124,6 +124,7 @@ defm : JWriteResIntPair; defm : JWriteResIntPair; // i8/i16/i32 multiplication defm : JWriteResIntPair; // Worst case (i64 division) +defm : JWriteResIntPair; def : WriteRes { let Latency = 6; @@ -190,22 +191,6 @@ def : InstRW<[JWriteIDiv32], (instrs DIV32r, IDIV32r)>; def : InstRW<[JWriteIDiv32Ld], (instrs DIV32m, IDIV32m)>; -def JWriteCRC32 : SchedWriteRes<[JALU01]> { - let Latency = 3; - let ResourceCycles = [4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteCRC32], (instrs CRC32r32r8, CRC32r32r16, CRC32r32r32, - CRC32r64r8, CRC32r64r64)>; - -def JWriteCRC32Ld : SchedWriteRes<[JLAGU, JALU01]> { - let Latency = 6; - let ResourceCycles = [1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteCRC32Ld], (instrs CRC32r32m8, CRC32r32m16, CRC32r32m32, - CRC32r64m8, CRC32r64m64)>; - //////////////////////////////////////////////////////////////////////////////// // Integer shifts and rotates. //////////////////////////////////////////////////////////////////////////////// Index: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td +++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td @@ -90,7 +90,8 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on Index: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td +++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td @@ -151,6 +151,7 @@ defm : ZnWriteResPair; defm : ZnWriteResPair; defm : ZnWriteResPair; +defm : ZnWriteResFpuPair; // Bit counts. defm : ZnWriteResPair; Index: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll @@ -141,7 +141,7 @@ ; GENERIC-LABEL: crc32_32_16: ; GENERIC: # %bb.0: ; GENERIC-NEXT: crc32w %si, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -155,14 +155,14 @@ ; SANDY-SSE-LABEL: crc32_32_16: ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; SANDY-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: crc32_32_16: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -260,7 +260,7 @@ ; GENERIC-LABEL: crc32_32_32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] +; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -274,14 +274,14 @@ ; SANDY-SSE-LABEL: crc32_32_32: ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] +; SANDY-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: crc32_32_32: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] +; SANDY-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ;