Skip to content

Commit 3d14158

Browse files
committedJun 6, 2018
[X86][BMI][TBM] Only demand bottom 16-bits of the BEXTR control op (PR34042)
Only the bottom 16-bits of BEXTR's control op are required (0:8 INDEX, 15:8 LENGTH). Differential Revision: https://reviews.llvm.org/D47690 llvm-svn: 334083
1 parent 1b8bfd7 commit 3d14158

8 files changed

+99
-45
lines changed
 

‎llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -1780,10 +1780,10 @@ bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
17801780
}
17811781

17821782
// In static codegen with small code model, we can get the address of a label
1783-
// into a register with 'movl'. TableGen has already made sure we're looking
1784-
// at a label of some kind.
1785-
assert(N->getOpcode() == X86ISD::Wrapper &&
1786-
"Unexpected node type for MOV32ri64");
1783+
// into a register with 'movl'
1784+
if (N->getOpcode() != X86ISD::Wrapper)
1785+
return false;
1786+
17871787
N = N.getOperand(0);
17881788

17891789
// At least GNU as does not accept 'movl' for TPOFF relocations.

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -36834,6 +36834,39 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
3683436834
return SDValue();
3683536835
}
3683636836

36837+
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
36838+
TargetLowering::DAGCombinerInfo &DCI,
36839+
const X86Subtarget &Subtarget) {
36840+
SDValue Op0 = N->getOperand(0);
36841+
SDValue Op1 = N->getOperand(1);
36842+
EVT VT = N->getValueType(0);
36843+
unsigned NumBits = VT.getSizeInBits();
36844+
36845+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
36846+
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
36847+
!DCI.isBeforeLegalizeOps());
36848+
36849+
// TODO - Constant Folding.
36850+
if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
36851+
// Reduce Cst1 to the bottom 16-bits.
36852+
// NOTE: SimplifyDemandedBits won't do this for constants.
36853+
const APInt &Val1 = Cst1->getAPIntValue();
36854+
APInt MaskedVal1 = Val1 & 0xFFFF;
36855+
if (MaskedVal1 != Val1)
36856+
return DAG.getNode(X86ISD::BEXTR, SDLoc(N), VT, Op0,
36857+
DAG.getConstant(MaskedVal1, SDLoc(N), VT));
36858+
}
36859+
36860+
// Only bottom 16-bits of the control bits are required.
36861+
KnownBits Known;
36862+
APInt DemandedMask(APInt::getLowBitsSet(NumBits, 16));
36863+
if (TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) {
36864+
DCI.CommitTargetLoweringOpt(TLO);
36865+
return SDValue(N, 0);
36866+
}
36867+
36868+
return SDValue();
36869+
}
3683736870

3683836871
static bool isNullFPScalarOrVectorConst(SDValue V) {
3683936872
return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode());
@@ -39220,6 +39253,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
3922039253
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
3922139254
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
3922239255
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
39256+
case X86ISD::BEXTR: return combineBEXTR(N, DAG, DCI, Subtarget);
3922339257
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
3922439258
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
3922539259
case ISD::STORE: return combineStore(N, DAG, Subtarget);

‎llvm/lib/Target/X86/X86InstrCompiler.td

-11
Original file line numberDiff line numberDiff line change
@@ -2040,14 +2040,3 @@ let Predicates = [HasBMI, NoTBM] in {
20402040
(MOV32ri64 mov64imm32:$src2),
20412041
sub_32bit))>;
20422042
} // HasBMI, NoTBM
2043-
2044-
let Predicates = [HasTBM] in {
2045-
def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)),
2046-
(BEXTRI32ri GR32:$src1, imm:$src2)>;
2047-
def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)),
2048-
(BEXTRI32mi addr:$src1, imm:$src2)>;
2049-
def : Pat<(X86bextr GR64:$src1, i64immSExt32:$src2),
2050-
(BEXTRI64ri GR64:$src1, i64immSExt32:$src2)>;
2051-
def : Pat<(X86bextr (loadi64 addr:$src1), i64immSExt32:$src2),
2052-
(BEXTRI64mi addr:$src1, i64immSExt32:$src2)>;
2053-
}

‎llvm/lib/Target/X86/X86InstrInfo.td

+44-26
Original file line numberDiff line numberDiff line change
@@ -2357,16 +2357,16 @@ let Predicates = [HasBMI] in {
23572357
(BLSI64rr GR64:$src)>;
23582358
}
23592359

2360-
multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
2361-
X86MemOperand x86memop, Intrinsic Int,
2362-
PatFrag ld_frag, X86FoldableSchedWrite Sched> {
2360+
multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
2361+
X86MemOperand x86memop, SDNode OpNode,
2362+
PatFrag ld_frag, X86FoldableSchedWrite Sched> {
23632363
def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
23642364
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2365-
[(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
2365+
[(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
23662366
T8PS, VEX, Sched<[Sched]>;
23672367
def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
23682368
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2369-
[(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
2369+
[(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
23702370
(implicit EFLAGS)]>, T8PS, VEX,
23712371
Sched<[Sched.Folded,
23722372
// x86memop:$src1
@@ -2377,17 +2377,36 @@ multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
23772377
}
23782378

23792379
let Predicates = [HasBMI], Defs = [EFLAGS] in {
2380-
defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem,
2381-
int_x86_bmi_bextr_32, loadi32, WriteBEXTR>;
2382-
defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem,
2383-
int_x86_bmi_bextr_64, loadi64, WriteBEXTR>, VEX_W;
2380+
defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem,
2381+
X86bextr, loadi32, WriteBEXTR>;
2382+
defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem,
2383+
X86bextr, loadi64, WriteBEXTR>, VEX_W;
2384+
}
2385+
2386+
multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
2387+
X86MemOperand x86memop, Intrinsic Int,
2388+
PatFrag ld_frag, X86FoldableSchedWrite Sched> {
2389+
def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
2390+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2391+
[(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
2392+
T8PS, VEX, Sched<[Sched]>;
2393+
def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
2394+
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2395+
[(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
2396+
(implicit EFLAGS)]>, T8PS, VEX,
2397+
Sched<[Sched.Folded,
2398+
// x86memop:$src1
2399+
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
2400+
ReadDefault,
2401+
// RC:$src2
2402+
ReadAfterLd]>;
23842403
}
23852404

23862405
let Predicates = [HasBMI2], Defs = [EFLAGS] in {
2387-
defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
2388-
int_x86_bmi_bzhi_32, loadi32, WriteBZHI>;
2389-
defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
2390-
int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W;
2406+
defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
2407+
int_x86_bmi_bzhi_32, loadi32, WriteBZHI>;
2408+
defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
2409+
int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W;
23912410
}
23922411

23932412
def CountTrailingOnes : SDNodeXForm<imm, [{
@@ -2507,31 +2526,30 @@ let Predicates = [HasBMI2] in {
25072526
//
25082527
let Predicates = [HasTBM], Defs = [EFLAGS] in {
25092528

2510-
multiclass tbm_ternary_imm_intr<bits<8> opc, RegisterClass RC, string OpcodeStr,
2511-
X86MemOperand x86memop, PatFrag ld_frag,
2512-
Intrinsic Int, Operand immtype,
2513-
SDPatternOperator immoperator,
2514-
X86FoldableSchedWrite Sched> {
2529+
multiclass tbm_ternary_imm<bits<8> opc, RegisterClass RC, string OpcodeStr,
2530+
X86MemOperand x86memop, PatFrag ld_frag,
2531+
SDNode OpNode, Operand immtype,
2532+
SDPatternOperator immoperator,
2533+
X86FoldableSchedWrite Sched> {
25152534
def ri : Ii32<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, immtype:$cntl),
25162535
!strconcat(OpcodeStr,
25172536
"\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
2518-
[(set RC:$dst, (Int RC:$src1, immoperator:$cntl))]>,
2537+
[(set RC:$dst, (OpNode RC:$src1, immoperator:$cntl))]>,
25192538
XOP, XOPA, Sched<[Sched]>;
25202539
def mi : Ii32<opc, MRMSrcMem, (outs RC:$dst),
25212540
(ins x86memop:$src1, immtype:$cntl),
25222541
!strconcat(OpcodeStr,
25232542
"\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
2524-
[(set RC:$dst, (Int (ld_frag addr:$src1), immoperator:$cntl))]>,
2543+
[(set RC:$dst, (OpNode (ld_frag addr:$src1), immoperator:$cntl))]>,
25252544
XOP, XOPA, Sched<[Sched.Folded]>;
25262545
}
25272546

2528-
defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr{l}", i32mem, loadi32,
2529-
int_x86_tbm_bextri_u32, i32imm, imm,
2530-
WriteBEXTR>;
2547+
defm BEXTRI32 : tbm_ternary_imm<0x10, GR32, "bextr{l}", i32mem, loadi32,
2548+
X86bextr, i32imm, imm, WriteBEXTR>;
25312549
let ImmT = Imm32S in
2532-
defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr{q}", i64mem, loadi64,
2533-
int_x86_tbm_bextri_u64, i64i32imm,
2534-
i64immSExt32, WriteBEXTR>, VEX_W;
2550+
defm BEXTRI64 : tbm_ternary_imm<0x10, GR64, "bextr{q}", i64mem, loadi64,
2551+
X86bextr, i64i32imm,
2552+
i64immSExt32, WriteBEXTR>, VEX_W;
25352553

25362554
multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem,
25372555
RegisterClass RC, string OpcodeStr,

‎llvm/lib/Target/X86/X86IntrinsicsInfo.h

+4
Original file line numberDiff line numberDiff line change
@@ -1345,6 +1345,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
13451345
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_128 , IFMA_OP, X86ISD::VPMADD52L, 0),
13461346
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_256 , IFMA_OP, X86ISD::VPMADD52L, 0),
13471347
X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_512 , IFMA_OP, X86ISD::VPMADD52L, 0),
1348+
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
1349+
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
13481350
X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, ISD::FMA, 0),
13491351
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0),
13501352
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0),
@@ -1456,6 +1458,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
14561458
X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
14571459
X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
14581460
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
1461+
X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
1462+
X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
14591463
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
14601464
X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
14611465
X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),

‎llvm/test/CodeGen/X86/bmi-x86_64.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ define i64 @bextr64b_load(i64* %x) {
5252
define i64 @bextr64c(i64 %x, i32 %y) {
5353
; CHECK-LABEL: bextr64c:
5454
; CHECK: # %bb.0:
55-
; CHECK-NEXT: movslq %esi, %rax
56-
; CHECK-NEXT: bextrq %rax, %rdi, %rax
55+
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
56+
; CHECK-NEXT: bextrq %rsi, %rdi, %rax
5757
; CHECK-NEXT: retq
5858
%tmp0 = sext i32 %y to i64
5959
%tmp1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %tmp0)

‎llvm/test/CodeGen/X86/bmi.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,7 @@ define i32 @bextr32c(i32 %x, i16 zeroext %y) {
404404
;
405405
; X64-LABEL: bextr32c:
406406
; X64: # %bb.0:
407-
; X64-NEXT: movswl %si, %eax
408-
; X64-NEXT: bextrl %eax, %edi, %eax
407+
; X64-NEXT: bextrl %esi, %edi, %eax
409408
; X64-NEXT: retq
410409
%tmp0 = sext i16 %y to i32
411410
%tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)

‎llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll

+10
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ entry:
2424
ret i64 %0
2525
}
2626

27+
define i64 @test_x86_tbm_bextri_u64_bigint(i64 %a) nounwind readnone {
28+
; CHECK-LABEL: test_x86_tbm_bextri_u64_bigint:
29+
; CHECK: # %bb.0: # %entry
30+
; CHECK-NEXT: bextrq $65535, %rdi, %rax # imm = 0xFFFF
31+
; CHECK-NEXT: retq
32+
entry:
33+
%0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 549755813887)
34+
ret i64 %0
35+
}
36+
2737
define i64 @test_x86_tbm_bextri_u64_z(i64 %a, i64 %b) nounwind readnone {
2838
; CHECK-LABEL: test_x86_tbm_bextri_u64_z:
2939
; CHECK: # %bb.0: # %entry

0 commit comments

Comments
 (0)
Please sign in to comment.