@@ -1551,14 +1551,42 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
1551
1551
return true ;
1552
1552
}
1553
1553
case TargetOpcode::G_EXTRACT: {
1554
- LLT SrcTy = MRI.getType (I.getOperand (1 ).getReg ());
1555
- LLT DstTy = MRI.getType (I.getOperand (0 ).getReg ());
1554
+ Register DstReg = I.getOperand (0 ).getReg ();
1555
+ Register SrcReg = I.getOperand (1 ).getReg ();
1556
+ LLT SrcTy = MRI.getType (SrcReg);
1557
+ LLT DstTy = MRI.getType (DstReg);
1556
1558
(void )DstTy;
1557
1559
unsigned SrcSize = SrcTy.getSizeInBits ();
1558
- // Larger extracts are vectors, same-size extracts should be something else
1559
- // by now (either split up or simplified to a COPY).
1560
- if (SrcTy.getSizeInBits () > 64 || Ty.getSizeInBits () > 32 )
1561
- return false ;
1560
+
1561
+ if (SrcTy.getSizeInBits () > 64 ) {
1562
+ // This should be an extract of an s128, which is like a vector extract.
1563
+ if (SrcTy.getSizeInBits () != 128 )
1564
+ return false ;
1565
+ // Only support extracting 64 bits from an s128 at the moment.
1566
+ if (DstTy.getSizeInBits () != 64 )
1567
+ return false ;
1568
+
1569
+ const RegisterBank &SrcRB = *RBI.getRegBank (SrcReg, MRI, TRI);
1570
+ const RegisterBank &DstRB = *RBI.getRegBank (DstReg, MRI, TRI);
1571
+ // Check we have the right regbank always.
1572
+ assert (SrcRB.getID () == AArch64::FPRRegBankID &&
1573
+ DstRB.getID () == AArch64::FPRRegBankID &&
1574
+ " Wrong extract regbank!" );
1575
+
1576
+ // Emit the same code as a vector extract.
1577
+ // Offset must be a multiple of 64.
1578
+ unsigned Offset = I.getOperand (2 ).getImm ();
1579
+ if (Offset % 64 != 0 )
1580
+ return false ;
1581
+ unsigned LaneIdx = Offset / 64 ;
1582
+ MachineIRBuilder MIB (I);
1583
+ MachineInstr *Extract = emitExtractVectorElt (
1584
+ DstReg, DstRB, LLT::scalar (64 ), SrcReg, LaneIdx, MIB);
1585
+ if (!Extract)
1586
+ return false ;
1587
+ I.eraseFromParent ();
1588
+ return true ;
1589
+ }
1562
1590
1563
1591
I.setDesc (TII.get (SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1564
1592
MachineInstrBuilder (MF, I).addImm (I.getOperand (2 ).getImm () +
@@ -1570,7 +1598,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
1570
1598
return constrainSelectedInstRegOperands (I, TII, TRI, RBI);
1571
1599
}
1572
1600
1573
- Register DstReg = MRI.createGenericVirtualRegister (LLT::scalar (64 ));
1601
+ DstReg = MRI.createGenericVirtualRegister (LLT::scalar (64 ));
1574
1602
MIB.setInsertPt (MIB.getMBB (), std::next (I.getIterator ()));
1575
1603
MIB.buildInstr (TargetOpcode::COPY, {I.getOperand (0 ).getReg ()}, {})
1576
1604
.addReg (DstReg, 0 , AArch64::sub_32);
@@ -1928,6 +1956,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
1928
1956
constrainSelectedInstRegOperands (I, TII, TRI, RBI);
1929
1957
return true ;
1930
1958
}
1959
+
1960
+ if (!SrcTy.isVector () && SrcTy.getSizeInBits () == 128 ) {
1961
+ MachineIRBuilder MIB (I);
1962
+ MachineInstr *Extract = emitExtractVectorElt (
1963
+ DstReg, DstRB, LLT::scalar (DstTy.getSizeInBits ()), SrcReg, 0 , MIB);
1964
+ if (!Extract)
1965
+ return false ;
1966
+ I.eraseFromParent ();
1967
+ return true ;
1968
+ }
1931
1969
}
1932
1970
1933
1971
return false ;
@@ -2590,16 +2628,40 @@ bool AArch64InstructionSelector::selectMergeValues(
2590
2628
const LLT DstTy = MRI.getType (I.getOperand (0 ).getReg ());
2591
2629
const LLT SrcTy = MRI.getType (I.getOperand (1 ).getReg ());
2592
2630
assert (!DstTy.isVector () && !SrcTy.isVector () && " invalid merge operation" );
2631
+ const RegisterBank &RB = *RBI.getRegBank (I.getOperand (1 ).getReg (), MRI, TRI);
2593
2632
2594
- // At the moment we only support merging two s32s into an s64.
2595
2633
if (I.getNumOperands () != 3 )
2596
2634
return false ;
2597
- if (DstTy.getSizeInBits () != 64 || SrcTy.getSizeInBits () != 32 )
2598
- return false ;
2599
- const RegisterBank &RB = *RBI.getRegBank (I.getOperand (1 ).getReg (), MRI, TRI);
2635
+
2636
+ // Merging 2 s64s into an s128.
2637
+ if (DstTy == LLT::scalar (128 )) {
2638
+ if (SrcTy.getSizeInBits () != 64 )
2639
+ return false ;
2640
+ MachineIRBuilder MIB (I);
2641
+ Register DstReg = I.getOperand (0 ).getReg ();
2642
+ Register Src1Reg = I.getOperand (1 ).getReg ();
2643
+ Register Src2Reg = I.getOperand (2 ).getReg ();
2644
+ auto Tmp = MIB.buildInstr (TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2645
+ MachineInstr *InsMI =
2646
+ emitLaneInsert (None, Tmp.getReg (0 ), Src1Reg, /* LaneIdx */ 0 , RB, MIB);
2647
+ if (!InsMI)
2648
+ return false ;
2649
+ MachineInstr *Ins2MI = emitLaneInsert (DstReg, InsMI->getOperand (0 ).getReg (),
2650
+ Src2Reg, /* LaneIdx */ 1 , RB, MIB);
2651
+ if (!Ins2MI)
2652
+ return false ;
2653
+ constrainSelectedInstRegOperands (*InsMI, TII, TRI, RBI);
2654
+ constrainSelectedInstRegOperands (*Ins2MI, TII, TRI, RBI);
2655
+ I.eraseFromParent ();
2656
+ return true ;
2657
+ }
2658
+
2600
2659
if (RB.getID () != AArch64::GPRRegBankID)
2601
2660
return false ;
2602
2661
2662
+ if (DstTy.getSizeInBits () != 64 || SrcTy.getSizeInBits () != 32 )
2663
+ return false ;
2664
+
2603
2665
auto *DstRC = &AArch64::GPR64RegClass;
2604
2666
Register SubToRegDef = MRI.createVirtualRegister (DstRC);
2605
2667
MachineInstr &SubRegMI = *BuildMI (*I.getParent (), I, I.getDebugLoc (),
0 commit comments