@@ -129,7 +129,8 @@ class SDWASrcOperand : public SDWAOperand {
129
129
bool getNeg () const { return Neg; }
130
130
bool getSext () const { return Sext; }
131
131
132
- uint64_t getSrcMods () const ;
132
+ uint64_t getSrcMods (const SIInstrInfo *TII,
133
+ const MachineOperand *SrcOp) const ;
133
134
};
134
135
135
136
class SDWADstOperand : public SDWAOperand {
@@ -240,13 +241,24 @@ static bool isSubregOf(const MachineOperand &SubReg,
240
241
return SuperMask.all ();
241
242
}
242
243
243
- uint64_t SDWASrcOperand::getSrcMods () const {
244
+ uint64_t SDWASrcOperand::getSrcMods (const SIInstrInfo *TII,
245
+ const MachineOperand *SrcOp) const {
244
246
uint64_t Mods = 0 ;
247
+ const auto *MI = SrcOp->getParent ();
248
+ if (TII->getNamedOperand (*MI, AMDGPU::OpName::src0) == SrcOp) {
249
+ if (auto *Mod = TII->getNamedOperand (*MI, AMDGPU::OpName::src0_modifiers)) {
250
+ Mods = Mod->getImm ();
251
+ }
252
+ } else if (TII->getNamedOperand (*MI, AMDGPU::OpName::src1) == SrcOp) {
253
+ if (auto *Mod = TII->getNamedOperand (*MI, AMDGPU::OpName::src1_modifiers)) {
254
+ Mods = Mod->getImm ();
255
+ }
256
+ }
245
257
if (Abs || Neg) {
246
258
assert (!Sext &&
247
259
" Float and integer src modifiers can't be set simulteniously" );
248
260
Mods |= Abs ? SISrcMods::ABS : 0 ;
249
- Mods | = Neg ? SISrcMods::NEG : 0 ;
261
+ Mods ^ = Neg ? SISrcMods::NEG : 0 ;
250
262
} else if (Sext) {
251
263
Mods |= SISrcMods::SEXT;
252
264
}
@@ -312,7 +324,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
312
324
}
313
325
copyRegOperand (*Src, *getTargetOperand ());
314
326
SrcSel->setImm (getSrcSel ());
315
- SrcMods->setImm (getSrcMods ());
327
+ SrcMods->setImm (getSrcMods (TII, Src ));
316
328
getTargetOperand ()->setIsKill (false );
317
329
return true ;
318
330
}
@@ -409,7 +421,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
409
421
switch (Opcode) {
410
422
case AMDGPU::V_LSHRREV_B32_e32:
411
423
case AMDGPU::V_ASHRREV_I32_e32:
412
- case AMDGPU::V_LSHLREV_B32_e32: {
424
+ case AMDGPU::V_LSHLREV_B32_e32:
425
+ case AMDGPU::V_LSHRREV_B32_e64:
426
+ case AMDGPU::V_ASHRREV_I32_e64:
427
+ case AMDGPU::V_LSHLREV_B32_e64: {
413
428
// from: v_lshrrev_b32_e32 v1, 16/24, v0
414
429
// to SDWA src:v0 src_sel:WORD_1/BYTE_3
415
430
@@ -432,7 +447,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
432
447
TRI->isPhysicalRegister (Dst->getReg ()))
433
448
break ;
434
449
435
- if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
450
+ if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
451
+ Opcode == AMDGPU::V_LSHLREV_B32_e64) {
436
452
auto SDWADst = make_unique<SDWADstOperand>(
437
453
Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
438
454
DEBUG (dbgs () << " Match: " << MI << " To: " << *SDWADst << ' \n ' );
@@ -441,7 +457,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
441
457
} else {
442
458
auto SDWASrc = make_unique<SDWASrcOperand>(
443
459
Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false , false ,
444
- Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true );
460
+ Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
461
+ Opcode != AMDGPU::V_LSHRREV_B32_e64);
445
462
DEBUG (dbgs () << " Match: " << MI << " To: " << *SDWASrc << ' \n ' );
446
463
SDWAOperands[&MI] = std::move (SDWASrc);
447
464
++NumSDWAPatternsFound;
@@ -451,7 +468,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
451
468
452
469
case AMDGPU::V_LSHRREV_B16_e32:
453
470
case AMDGPU::V_ASHRREV_I16_e32:
454
- case AMDGPU::V_LSHLREV_B16_e32: {
471
+ case AMDGPU::V_LSHLREV_B16_e32:
472
+ case AMDGPU::V_LSHRREV_B16_e64:
473
+ case AMDGPU::V_ASHRREV_I16_e64:
474
+ case AMDGPU::V_LSHLREV_B16_e64: {
455
475
// from: v_lshrrev_b16_e32 v1, 8, v0
456
476
// to SDWA src:v0 src_sel:BYTE_1
457
477
@@ -472,7 +492,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
472
492
TRI->isPhysicalRegister (Dst->getReg ()))
473
493
break ;
474
494
475
- if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
495
+ if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
496
+ Opcode == AMDGPU::V_LSHLREV_B16_e64) {
476
497
auto SDWADst =
477
498
make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
478
499
DEBUG (dbgs () << " Match: " << MI << " To: " << *SDWADst << ' \n ' );
@@ -481,7 +502,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
481
502
} else {
482
503
auto SDWASrc = make_unique<SDWASrcOperand>(
483
504
Src1, Dst, BYTE_1, false , false ,
484
- Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true );
505
+ Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
506
+ Opcode != AMDGPU::V_LSHRREV_B16_e64);
485
507
DEBUG (dbgs () << " Match: " << MI << " To: " << *SDWASrc << ' \n ' );
486
508
SDWAOperands[&MI] = std::move (SDWASrc);
487
509
++NumSDWAPatternsFound;
@@ -549,28 +571,33 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
549
571
++NumSDWAPatternsFound;
550
572
break ;
551
573
}
552
- case AMDGPU::V_AND_B32_e32: {
574
+ case AMDGPU::V_AND_B32_e32:
575
+ case AMDGPU::V_AND_B32_e64: {
553
576
// e.g.:
554
577
// from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
555
578
// to SDWA src:v0 src_sel:WORD_0/BYTE_0
556
579
557
580
MachineOperand *Src0 = TII->getNamedOperand (MI, AMDGPU::OpName::src0);
581
+ MachineOperand *Src1 = TII->getNamedOperand (MI, AMDGPU::OpName::src1);
582
+ auto ValSrc = Src1;
558
583
auto Imm = foldToImm (*Src0);
559
- if (!Imm)
560
- break ;
561
584
562
- if (*Imm != 0x0000ffff && *Imm != 0x000000ff )
585
+ if (!Imm) {
586
+ Imm = foldToImm (*Src1);
587
+ ValSrc = Src0;
588
+ }
589
+
590
+ if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff ))
563
591
break ;
564
592
565
- MachineOperand *Src1 = TII->getNamedOperand (MI, AMDGPU::OpName::src1);
566
593
MachineOperand *Dst = TII->getNamedOperand (MI, AMDGPU::OpName::vdst);
567
594
568
595
if (TRI->isPhysicalRegister (Src1->getReg ()) ||
569
596
TRI->isPhysicalRegister (Dst->getReg ()))
570
597
break ;
571
598
572
599
auto SDWASrc = make_unique<SDWASrcOperand>(
573
- Src1 , Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
600
+ ValSrc , Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
574
601
DEBUG (dbgs () << " Match: " << MI << " To: " << *SDWASrc << ' \n ' );
575
602
SDWAOperands[&MI] = std::move (SDWASrc);
576
603
++NumSDWAPatternsFound;
@@ -583,28 +610,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
583
610
584
611
bool SIPeepholeSDWA::isConvertibleToSDWA (const MachineInstr &MI) const {
585
612
// Check if this instruction has opcode that supports SDWA
586
- return AMDGPU::getSDWAOp (MI.getOpcode ()) != -1 ;
613
+ unsigned Opc = MI.getOpcode ();
614
+ if (AMDGPU::getSDWAOp (Opc) != -1 )
615
+ return true ;
616
+ int Opc32 = AMDGPU::getVOPe32 (Opc);
617
+ if (Opc32 != -1 && AMDGPU::getSDWAOp (Opc32) != -1 )
618
+ return !TII->hasModifiersSet (MI, AMDGPU::OpName::omod) &&
619
+ !TII->getNamedOperand (MI, AMDGPU::OpName::sdst);
620
+ return false ;
587
621
}
588
622
589
623
bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
590
624
const SDWAOperandsVector &SDWAOperands) {
591
625
// Convert to sdwa
592
626
int SDWAOpcode = AMDGPU::getSDWAOp (MI.getOpcode ());
627
+ if (SDWAOpcode == -1 )
628
+ SDWAOpcode = AMDGPU::getSDWAOp (AMDGPU::getVOPe32 (MI.getOpcode ()));
593
629
assert (SDWAOpcode != -1 );
594
630
631
+ // Copy dst, if it is present in original then should also be present in SDWA
632
+ MachineOperand *Dst = TII->getNamedOperand (MI, AMDGPU::OpName::vdst);
633
+ if (!Dst && !TII->isVOPC (MI))
634
+ return false ;
635
+
595
636
const MCInstrDesc &SDWADesc = TII->get (SDWAOpcode);
596
637
597
638
// Create SDWA version of instruction MI and initialize its operands
598
639
MachineInstrBuilder SDWAInst =
599
640
BuildMI (*MI.getParent (), MI, MI.getDebugLoc (), SDWADesc);
600
641
601
- // Copy dst, if it is present in original then should also be present in SDWA
602
- MachineOperand *Dst = TII->getNamedOperand (MI, AMDGPU::OpName::vdst);
603
642
if (Dst) {
604
643
assert (AMDGPU::getNamedOperandIdx (SDWAOpcode, AMDGPU::OpName::vdst) != -1 );
605
644
SDWAInst.add (*Dst);
606
- } else {
607
- assert (TII->isVOPC (MI));
608
645
}
609
646
610
647
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -614,7 +651,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
614
651
Src0 &&
615
652
AMDGPU::getNamedOperandIdx (SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
616
653
AMDGPU::getNamedOperandIdx (SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1 );
617
- SDWAInst.addImm (0 );
654
+ if (auto *Mod = TII->getNamedOperand (MI, AMDGPU::OpName::src0_modifiers))
655
+ SDWAInst.addImm (Mod->getImm ());
656
+ else
657
+ SDWAInst.addImm (0 );
618
658
SDWAInst.add (*Src0);
619
659
620
660
// Copy src1 if present, initialize src1_modifiers.
@@ -623,10 +663,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
623
663
assert (
624
664
AMDGPU::getNamedOperandIdx (SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
625
665
AMDGPU::getNamedOperandIdx (SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1 );
626
- SDWAInst.addImm (0 );
666
+ if (auto *Mod = TII->getNamedOperand (MI, AMDGPU::OpName::src1_modifiers))
667
+ SDWAInst.addImm (Mod->getImm ());
668
+ else
669
+ SDWAInst.addImm (0 );
627
670
SDWAInst.add (*Src1);
628
- } else {
629
- assert (TII->isVOP1 (MI));
630
671
}
631
672
632
673
if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
0 commit comments