Index: lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h =================================================================== --- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -58,6 +58,10 @@ void initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI); + +/// Returns true if this instruction has a LOCK prefix. +bool hasLockPrefix(const MCInst &MI); + /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc. /// do not need to go through TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU, Index: lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp =================================================================== --- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -70,6 +70,10 @@ return DWARFFlavour::X86_32_Generic; } +bool X86_MC::hasLockPrefix(const MCInst &MI) { + return MI.getFlags() & X86::IP_HAS_LOCK; +} + void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) { // FIXME: TableGen these. for (unsigned Reg = X86::NoRegister + 1; Reg < X86::NUM_TARGET_REGS; ++Reg) { Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -527,6 +527,10 @@ #define GET_INSTRINFO_HELPER_DECLS #include "X86GenInstrInfo.inc" + static bool hasLockPrefix(const MachineInstr &MI) { + return MI.getDesc().TSFlags & X86II::LOCK; + } + Optional describeLoadedValue(const MachineInstr &MI) const override; Index: lib/Target/X86/X86SchedPredicates.td =================================================================== --- lib/Target/X86/X86SchedPredicates.td +++ lib/Target/X86/X86SchedPredicates.td @@ -84,3 +84,60 @@ CheckImmOperand_s<5, "X86::COND_A">, CheckImmOperand_s<5, "X86::COND_BE"> ]>; + +// A predicate used to check if an instruction has a LOCK prefix. +def CheckLockPrefix : CheckFunctionPredicate< + "X86_MC::hasLockPrefix", + "X86InstrInfo::hasLockPrefix" +>; + +def IsRegRegCompareAndSwap_8 : CheckOpcode<[ CMPXCHG8rr ]>; + +def IsRegMemCompareAndSwap_8 : CheckOpcode<[ + LCMPXCHG8, CMPXCHG8rm +]>; + +def IsRegRegCompareAndSwap_16_32_64 : CheckOpcode<[ + CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr +]>; + +def IsRegMemCompareAndSwap_16_32_64 : CheckOpcode<[ + CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm, + LCMPXCHG16, LCMPXCHG32, LCMPXCHG64, + LCMPXCHG8B, LCMPXCHG16B +]>; + +def IsCompareAndSwap8B : CheckOpcode<[ CMPXCHG8B, LCMPXCHG8B ]>; +def IsCompareAndSwap16B : CheckOpcode<[ CMPXCHG16B, LCMPXCHG16B ]>; + +def IsRegMemCompareAndSwap : CheckOpcode< + !listconcat( + IsRegMemCompareAndSwap_8.ValidOpcodes, + IsRegMemCompareAndSwap_16_32_64.ValidOpcodes + )>; + +def IsRegRegCompareAndSwap : CheckOpcode< + !listconcat( + IsRegRegCompareAndSwap_8.ValidOpcodes, + IsRegRegCompareAndSwap_16_32_64.ValidOpcodes + )>; + +def IsAtomicCompareAndSwap_8 : CheckAll<[ + CheckLockPrefix, + IsRegMemCompareAndSwap_8 +]>; + +def IsAtomicCompareAndSwap : CheckAll<[ + CheckLockPrefix, + IsRegMemCompareAndSwap +]>; + +def IsAtomicCompareAndSwap8B : CheckAll<[ + CheckLockPrefix, + IsCompareAndSwap8B +]>; + +def IsAtomicCompareAndSwap16B : CheckAll<[ + CheckLockPrefix, + IsCompareAndSwap16B +]>; Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -191,10 +191,10 @@ defm : JWriteResIntPair; defm : JWriteResIntPair; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : JWriteResIntPair; @@ -305,6 +305,73 @@ // to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. def : WriteRes { let Latency = 1; } +def JWriteCMPXCHG8rr : SchedWriteRes<[JALU01]> { + let Latency = 3; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +def JWriteLOCK_CMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 16; + let ResourceCycles = [3,16,16]; + let NumMicroOps = 5; +} + +def JWriteLOCK_CMPXCHGrm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 17; + let ResourceCycles = [3,17,17]; + let NumMicroOps = 6; +} + +def JWriteCMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [3,1,1]; + let NumMicroOps = 5; +} + +def JWriteCMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [3,1,1]; + let NumMicroOps = 18; +} + +def JWriteCMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 32; + let ResourceCycles = [6,1,1]; + let NumMicroOps = 28; +} + +def JWriteLOCK_CMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 19; + let ResourceCycles = [3,19,19]; + let NumMicroOps = 18; +} + +def JWriteLOCK_CMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 38; + let ResourceCycles = [6,38,38]; + let NumMicroOps = 28; +} + +def JWriteCMPXCHGVariant : SchedWriteVariant<[ + SchedVar, [JWriteLOCK_CMPXCHG8B]>, + SchedVar, [JWriteLOCK_CMPXCHG16B]>, + SchedVar, [JWriteLOCK_CMPXCHG8rm]>, + SchedVar, [JWriteLOCK_CMPXCHGrm]>, + SchedVar, [JWriteCMPXCHG8B]>, + SchedVar, [JWriteCMPXCHG16B]>, + SchedVar, [JWriteCMPXCHG8rm]>, + SchedVar, [WriteCMPXCHGRMW]>, + SchedVar, [JWriteCMPXCHG8rr]>, + SchedVar +]>; +def : InstRW<[JWriteCMPXCHGVariant], (instrs CMPXCHG8rr, LCMPXCHG8, CMPXCHG8rm, + CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm, + LCMPXCHG16, LCMPXCHG32, LCMPXCHG64, + CMPXCHG8B, CMPXCHG16B, + LCMPXCHG8B, LCMPXCHG16B)>; + + //////////////////////////////////////////////////////////////////////////////// // Floating point. This covers both scalar and vector operations. //////////////////////////////////////////////////////////////////////////////// Index: test/tools/llvm-mca/X86/BtVer2/resources-cmpxchg.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-cmpxchg.s +++ test/tools/llvm-mca/X86/BtVer2/resources-cmpxchg.s @@ -15,10 +15,10 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 4 1.00 * * cmpxchg8b (%rax) -# CHECK-NEXT: 2 4 1.00 * * cmpxchg16b (%rax) -# CHECK-NEXT: 2 4 1.00 * * lock cmpxchg8b (%rax) -# CHECK-NEXT: 2 4 1.00 * * lock cmpxchg16b (%rax) +# CHECK-NEXT: 18 11 1.50 * * cmpxchg8b (%rax) +# CHECK-NEXT: 28 32 3.00 * * cmpxchg16b (%rax) +# CHECK-NEXT: 18 19 19.00 * * lock cmpxchg8b (%rax) +# CHECK-NEXT: 28 38 38.00 * * lock cmpxchg16b (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 @@ -38,11 +38,11 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 2.00 2.00 - - - - - 4.00 - 4.00 - - - - +# CHECK-NEXT: 9.00 9.00 - - - - - 59.00 - 59.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - cmpxchg8b (%rax) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - cmpxchg16b (%rax) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock cmpxchg8b (%rax) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock cmpxchg16b (%rax) +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - cmpxchg8b (%rax) +# CHECK-NEXT: 3.00 3.00 - - - - - 1.00 - 1.00 - - - - cmpxchg16b (%rax) +# CHECK-NEXT: 1.50 1.50 - - - - - 19.00 - 19.00 - - - - lock cmpxchg8b (%rax) +# CHECK-NEXT: 3.00 3.00 - - - - - 38.00 - 38.00 - - - - lock cmpxchg16b (%rax) Index: test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s +++ test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s @@ -1110,18 +1110,18 @@ # CHECK-NEXT: 1 100 0.50 U cmpsw %es:(%rdi), (%rsi) # CHECK-NEXT: 1 100 0.50 U cmpsl %es:(%rdi), (%rsi) # CHECK-NEXT: 1 100 0.50 U cmpsq %es:(%rdi), (%rsi) -# CHECK-NEXT: 1 1 0.50 cmpxchgb %cl, %bl -# CHECK-NEXT: 2 4 1.00 * * cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 2 4 1.00 * * lock cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1 1 0.50 cmpxchgw %cx, %bx -# CHECK-NEXT: 2 4 1.00 * * cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 2 4 1.00 * * lock cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1 1 0.50 cmpxchgl %ecx, %ebx -# CHECK-NEXT: 2 4 1.00 * * cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 2 4 1.00 * * lock cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1 1 0.50 cmpxchgq %rcx, %rbx -# CHECK-NEXT: 2 4 1.00 * * cmpxchgq %rcx, (%rbx) -# CHECK-NEXT: 2 4 1.00 * * lock cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 3 3 1.50 cmpxchgb %cl, %bl +# CHECK-NEXT: 5 11 1.50 * * cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 5 16 16.00 * * lock cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 5 3 1.50 cmpxchgw %cx, %bx +# CHECK-NEXT: 6 11 1.50 * * cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 6 17 17.00 * * lock cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 5 3 1.50 cmpxchgl %ecx, %ebx +# CHECK-NEXT: 6 11 1.50 * * cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 6 17 17.00 * * lock cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 5 3 1.50 cmpxchgq %rcx, %rbx +# CHECK-NEXT: 6 11 1.50 * * cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 6 17 17.00 * * lock cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 1 100 0.50 U cpuid # CHECK-NEXT: 1 1 0.50 decb %dil # CHECK-NEXT: 1 5 1.00 * * decb (%rax) @@ -1705,7 +1705,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 612.00 662.00 380.00 - - - - 334.00 64.00 235.00 - - - - +# CHECK-NEXT: 624.00 674.00 380.00 - - - - 397.00 64.00 298.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -1916,18 +1916,18 @@ # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpsw %es:(%rdi), (%rsi) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpsl %es:(%rdi), (%rsi) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpsq %es:(%rdi), (%rsi) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpxchgb %cl, %bl -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpxchgw %cx, %bx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpxchgl %ecx, %ebx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cmpxchgq %rcx, %rbx -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - cmpxchgq %rcx, (%rbx) -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - lock cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - cmpxchgb %cl, %bl +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - 16.00 - 16.00 - - - - lock cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - cmpxchgw %cx, %bx +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - 17.00 - 17.00 - - - - lock cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - cmpxchgl %ecx, %ebx +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - 17.00 - 17.00 - - - - lock cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - - cmpxchgq %rcx, %rbx +# CHECK-NEXT: 1.50 1.50 - - - - - 1.00 - 1.00 - - - - cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 1.50 1.50 - - - - - 17.00 - 17.00 - - - - lock cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - cpuid # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - decb %dil # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - - - decb (%rax)