@@ -16492,6 +16492,11 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
16492
16492
const X86Subtarget &Subtarget,
16493
16493
SelectionDAG &DAG, SDLoc dl) {
16494
16494
16495
+ if (isAllOnesConstant(Mask))
16496
+ return DAG.getTargetConstant(1, dl, MaskVT);
16497
+ if (X86::isZeroNode(Mask))
16498
+ return DAG.getTargetConstant(0, dl, MaskVT);
16499
+
16495
16500
if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
16496
16501
// Mask should be extended
16497
16502
Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
@@ -17409,79 +17414,52 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
17409
17414
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
17410
17415
MVT MaskVT = MVT::getVectorVT(MVT::i1,
17411
17416
Index.getSimpleValueType().getVectorNumElements());
17412
- SDValue MaskInReg;
17413
- ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
17414
- if (MaskC)
17415
- MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
17416
- else {
17417
- MVT BitcastVT = MVT::getVectorVT(MVT::i1,
17418
- Mask.getSimpleValueType().getSizeInBits());
17419
17417
17420
- // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
17421
- // are extracted by EXTRACT_SUBVECTOR.
17422
- MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
17423
- DAG.getBitcast(BitcastVT, Mask),
17424
- DAG.getIntPtrConstant(0, dl));
17425
- }
17418
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
17426
17419
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
17427
17420
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
17428
17421
SDValue Segment = DAG.getRegister(0, MVT::i32);
17429
17422
if (Src.getOpcode() == ISD::UNDEF)
17430
17423
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
17431
- SDValue Ops[] = {Src, MaskInReg , Base, Scale, Index, Disp, Segment, Chain};
17424
+ SDValue Ops[] = {Src, VMask , Base, Scale, Index, Disp, Segment, Chain};
17432
17425
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
17433
17426
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
17434
17427
return DAG.getMergeValues(RetOps, dl);
17435
17428
}
17436
17429
17437
17430
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
17438
17431
SDValue Src, SDValue Mask, SDValue Base,
17439
- SDValue Index, SDValue ScaleOp, SDValue Chain) {
17432
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
17433
+ const X86Subtarget &Subtarget) {
17440
17434
SDLoc dl(Op);
17441
17435
auto *C = cast<ConstantSDNode>(ScaleOp);
17442
17436
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
17443
17437
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
17444
17438
SDValue Segment = DAG.getRegister(0, MVT::i32);
17445
17439
MVT MaskVT = MVT::getVectorVT(MVT::i1,
17446
17440
Index.getSimpleValueType().getVectorNumElements());
17447
- SDValue MaskInReg;
17448
- ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
17449
- if (MaskC)
17450
- MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
17451
- else {
17452
- MVT BitcastVT = MVT::getVectorVT(MVT::i1,
17453
- Mask.getSimpleValueType().getSizeInBits());
17454
17441
17455
- // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
17456
- // are extracted by EXTRACT_SUBVECTOR.
17457
- MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
17458
- DAG.getBitcast(BitcastVT, Mask),
17459
- DAG.getIntPtrConstant(0, dl));
17460
- }
17442
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
17461
17443
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
17462
- SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg , Src, Chain};
17444
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, VMask , Src, Chain};
17463
17445
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
17464
17446
return SDValue(Res, 1);
17465
17447
}
17466
17448
17467
17449
static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
17468
17450
SDValue Mask, SDValue Base, SDValue Index,
17469
- SDValue ScaleOp, SDValue Chain) {
17451
+ SDValue ScaleOp, SDValue Chain,
17452
+ const X86Subtarget &Subtarget) {
17470
17453
SDLoc dl(Op);
17471
17454
auto *C = cast<ConstantSDNode>(ScaleOp);
17472
17455
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
17473
17456
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
17474
17457
SDValue Segment = DAG.getRegister(0, MVT::i32);
17475
17458
MVT MaskVT =
17476
17459
MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
17477
- SDValue MaskInReg;
17478
- ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
17479
- if (MaskC)
17480
- MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
17481
- else
17482
- MaskInReg = DAG.getBitcast(MaskVT, Mask);
17460
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
17483
17461
//SDVTList VTs = DAG.getVTList(MVT::Other);
17484
- SDValue Ops[] = {MaskInReg , Base, Scale, Index, Disp, Segment, Chain};
17462
+ SDValue Ops[] = {VMask , Base, Scale, Index, Disp, Segment, Chain};
17485
17463
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
17486
17464
return SDValue(Res, 0);
17487
17465
}
@@ -17678,7 +17656,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
17678
17656
SDValue Src = Op.getOperand(5);
17679
17657
SDValue Scale = Op.getOperand(6);
17680
17658
return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
17681
- Scale, Chain);
17659
+ Scale, Chain, Subtarget );
17682
17660
}
17683
17661
case PREFETCH: {
17684
17662
SDValue Hint = Op.getOperand(6);
@@ -17690,7 +17668,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
17690
17668
SDValue Index = Op.getOperand(3);
17691
17669
SDValue Base = Op.getOperand(4);
17692
17670
SDValue Scale = Op.getOperand(5);
17693
- return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain);
17671
+ return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain,
17672
+ Subtarget);
17694
17673
}
17695
17674
// Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
17696
17675
case RDTSC: {
0 commit comments