Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -400,6 +400,10 @@ def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", "Support movdir64b instruction">; +def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", + "Indicates that the BEXTR instruction is implemented as a single uop " + "with good throughput.">; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -987,6 +991,7 @@ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast15ByteNOP, + FeatureFastBEXTR, FeatureFastPartialYMMorZMMWrite ]>; @@ -1042,6 +1047,7 @@ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast11ByteNOP, + FeatureFastBEXTR, FeatureMacroFusion ]>; @@ -1074,6 +1080,7 @@ FeatureFSGSBase, FeatureLAHFSAHF, FeatureFast11ByteNOP, + FeatureFastBEXTR, FeatureMacroFusion ]>; @@ -1105,6 +1112,7 @@ FeatureSlowSHLD, FeatureFSGSBase, FeatureLAHFSAHF, + FeatureFastBEXTR, FeatureFast11ByteNOP, FeatureMWAITX, FeatureMacroFusion @@ -1130,6 +1138,7 @@ FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, + FeatureFastBEXTR, FeatureFast15ByteNOP, FeatureMacroFusion, FeatureMMX, Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2590,7 +2590,14 @@ SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - if (!Subtarget->hasBMI() && !Subtarget->hasTBM()) + // If we have TBM we can use an immediate for the control. If we have BMI + // we should only do this if the BEXTR instruction is implemented well. + // Otherwise moving the control into a register makes this more costly. + // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM + // hoisting the move immediate would make it worthwhile with a less optimal + // BEXTR? + if (!Subtarget->hasTBM() && + !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR())) return false; // Must have a shift right. Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -385,6 +385,9 @@ /// Processor supports PCONFIG instruction bool HasPCONFIG = false; + /// Processor has a single uop BEXTR implementation. + bool HasFastBEXTR = false; + /// Use a retpoline thunk rather than indirect calls to block speculative /// execution. bool UseRetpolineIndirectCalls = false; @@ -629,6 +632,7 @@ bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasFastBEXTR() const { return HasFastBEXTR; } bool hasMacroFusion() const { return HasMacroFusion; } bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } Index: llvm/trunk/test/CodeGen/X86/bmi-x86_64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bmi-x86_64.ll +++ llvm/trunk/test/CodeGen/X86/bmi-x86_64.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI1,BMI1-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI2,BMI2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST,BMI1,BMI1-FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST,BMI2,BMI2-FAST declare i64 @llvm.x86.bmi.bextr.64(i64, i64) @@ -14,11 +16,18 @@ } define i64 @bextr64b(i64 %x) uwtable ssp { -; CHECK-LABEL: bextr64b: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $3076, %eax # imm = 0xC04 -; CHECK-NEXT: bextrl %eax, %edi, %eax -; CHECK-NEXT: retq +; BEXTR-SLOW-LABEL: bextr64b: +; BEXTR-SLOW: # %bb.0: +; BEXTR-SLOW-NEXT: movq %rdi, %rax +; BEXTR-SLOW-NEXT: shrl $4, %eax +; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF +; BEXTR-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64b: +; BEXTR-FAST: # %bb.0: +; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 +; BEXTR-FAST-NEXT: bextrl %eax, %edi, %eax +; BEXTR-FAST-NEXT: retq %1 = lshr i64 %x, 4 %2 = and i64 %1, 4095 ret i64 %2 @@ -37,11 +46,18 @@ } define i64 @bextr64b_load(i64* %x) { -; CHECK-LABEL: bextr64b_load: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $3076, %eax # imm = 0xC04 -; CHECK-NEXT: bextrl %eax, (%rdi), %eax -; CHECK-NEXT: retq +; BEXTR-SLOW-LABEL: bextr64b_load: +; BEXTR-SLOW: # %bb.0: +; BEXTR-SLOW-NEXT: movl (%rdi), %eax +; BEXTR-SLOW-NEXT: shrl $4, %eax +; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF +; BEXTR-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64b_load: +; BEXTR-FAST: # %bb.0: +; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 +; BEXTR-FAST-NEXT: bextrl %eax, (%rdi), %eax +; BEXTR-FAST-NEXT: retq %1 = load i64, i64* %x, align 8 %2 = lshr i64 %1, 4 %3 = and i64 %2, 4095 @@ -61,11 +77,25 @@ } define i64 @bextr64d(i64 %a) { -; CHECK-LABEL: bextr64d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $8450, %eax # imm = 0x2102 -; CHECK-NEXT: bextrq %rax, %rdi, %rax -; CHECK-NEXT: retq +; BMI1-SLOW-LABEL: bextr64d: +; BMI1-SLOW: # %bb.0: # %entry +; BMI1-SLOW-NEXT: shrq $2, %rdi +; BMI1-SLOW-NEXT: movl $8448, %eax # imm = 0x2100 +; BMI1-SLOW-NEXT: bextrq %rax, %rdi, %rax +; BMI1-SLOW-NEXT: retq +; +; BMI2-SLOW-LABEL: bextr64d: +; BMI2-SLOW: # %bb.0: # %entry +; BMI2-SLOW-NEXT: shrq $2, %rdi +; BMI2-SLOW-NEXT: movb $33, %al +; BMI2-SLOW-NEXT: bzhiq %rax, %rdi, %rax +; BMI2-SLOW-NEXT: retq +; +; BEXTR-FAST-LABEL: bextr64d: +; BEXTR-FAST: # %bb.0: # %entry +; BEXTR-FAST-NEXT: movl $8450, %eax # imm = 0x2102 +; BEXTR-FAST-NEXT: bextrq %rax, %rdi, %rax +; BEXTR-FAST-NEXT: retq entry: %shr = lshr i64 %a, 2 %and = and i64 %shr, 8589934591 Index: llvm/trunk/test/CodeGen/X86/bmi.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bmi.ll +++ llvm/trunk/test/CodeGen/X86/bmi.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW-BEXTR +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW-BEXTR +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST-BEXTR +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST-BEXTR define i32 @andn32(i32 %x, i32 %y) { ; X86-LABEL: andn32: @@ -342,17 +344,31 @@ } define i32 @bextr32b(i32 %x) uwtable ssp { -; X86-LABEL: bextr32b: -; X86: # %bb.0: -; X86-NEXT: movl $3076, %eax # imm = 0xC04 -; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl -; -; X64-LABEL: bextr32b: -; X64: # %bb.0: -; X64-NEXT: movl $3076, %eax # imm = 0xC04 -; X64-NEXT: bextrl %eax, %edi, %eax -; X64-NEXT: retq +; X86-SLOW-BEXTR-LABEL: bextr32b: +; X86-SLOW-BEXTR: # %bb.0: +; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X86-SLOW-BEXTR-NEXT: retl +; +; X64-SLOW-BEXTR-LABEL: bextr32b: +; X64-SLOW-BEXTR: # %bb.0: +; X64-SLOW-BEXTR-NEXT: movl %edi, %eax +; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X64-SLOW-BEXTR-NEXT: retq +; +; X86-FAST-BEXTR-LABEL: bextr32b: +; X86-FAST-BEXTR: # %bb.0: +; X86-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-FAST-BEXTR-NEXT: retl +; +; X64-FAST-BEXTR-LABEL: bextr32b: +; X64-FAST-BEXTR: # %bb.0: +; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax +; X64-FAST-BEXTR-NEXT: retq %1 = lshr i32 %x, 4 %2 = and i32 %1, 4095 ret i32 %2 @@ -376,18 +392,33 @@ } define i32 @bextr32b_load(i32* %x) uwtable ssp { -; X86-LABEL: bextr32b_load: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $3076, %ecx # imm = 0xC04 -; X86-NEXT: bextrl %ecx, (%eax), %eax -; X86-NEXT: retl -; -; X64-LABEL: bextr32b_load: -; X64: # %bb.0: -; X64-NEXT: movl $3076, %eax # imm = 0xC04 -; X64-NEXT: bextrl %eax, (%rdi), %eax -; X64-NEXT: retq +; X86-SLOW-BEXTR-LABEL: bextr32b_load: +; X86-SLOW-BEXTR: # %bb.0: +; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-BEXTR-NEXT: movl (%eax), %eax +; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X86-SLOW-BEXTR-NEXT: retl +; +; X64-SLOW-BEXTR-LABEL: bextr32b_load: +; X64-SLOW-BEXTR: # %bb.0: +; X64-SLOW-BEXTR-NEXT: movl (%rdi), %eax +; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF +; X64-SLOW-BEXTR-NEXT: retq +; +; X86-FAST-BEXTR-LABEL: bextr32b_load: +; X86-FAST-BEXTR: # %bb.0: +; X86-FAST-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-BEXTR-NEXT: movl $3076, %ecx # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: bextrl %ecx, (%eax), %eax +; X86-FAST-BEXTR-NEXT: retl +; +; X64-FAST-BEXTR-LABEL: bextr32b_load: +; X64-FAST-BEXTR: # %bb.0: +; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax +; X64-FAST-BEXTR-NEXT: retq %1 = load i32, i32* %x %2 = lshr i32 %1, 4 %3 = and i32 %2, 4095 Index: llvm/trunk/test/CodeGen/X86/extract-bits.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/extract-bits.ll +++ llvm/trunk/test/CodeGen/X86/extract-bits.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 ; *Please* keep in sync with test/CodeGen/AArch64/extract-bits.ll