Skip to content

Commit 511f7f5

Browse files
committedJul 23, 2019
[AArch64][GlobalISel] Add support for s128 loads, stores, extracts, truncs.
We need to be able to load and store s128 for memcpy inlining, where we want to generate Q register mem ops. Making these legal also requires that we add some support in other instructions. Regbankselect should also know about these since they have no GPR register class that can hold them, so need special handling to live on the FPR bank. Differential Revision: https://reviews.llvm.org/D65166 llvm-svn: 366857
1 parent 78b1e77 commit 511f7f5

12 files changed

+218
-279
lines changed
 

‎llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,14 +1551,42 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
15511551
return true;
15521552
}
15531553
case TargetOpcode::G_EXTRACT: {
1554-
LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1555-
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1554+
Register DstReg = I.getOperand(0).getReg();
1555+
Register SrcReg = I.getOperand(1).getReg();
1556+
LLT SrcTy = MRI.getType(SrcReg);
1557+
LLT DstTy = MRI.getType(DstReg);
15561558
(void)DstTy;
15571559
unsigned SrcSize = SrcTy.getSizeInBits();
1558-
// Larger extracts are vectors, same-size extracts should be something else
1559-
// by now (either split up or simplified to a COPY).
1560-
if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1561-
return false;
1560+
1561+
if (SrcTy.getSizeInBits() > 64) {
1562+
// This should be an extract of an s128, which is like a vector extract.
1563+
if (SrcTy.getSizeInBits() != 128)
1564+
return false;
1565+
// Only support extracting 64 bits from an s128 at the moment.
1566+
if (DstTy.getSizeInBits() != 64)
1567+
return false;
1568+
1569+
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1570+
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1571+
// Check we have the right regbank always.
1572+
assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1573+
DstRB.getID() == AArch64::FPRRegBankID &&
1574+
"Wrong extract regbank!");
1575+
1576+
// Emit the same code as a vector extract.
1577+
// Offset must be a multiple of 64.
1578+
unsigned Offset = I.getOperand(2).getImm();
1579+
if (Offset % 64 != 0)
1580+
return false;
1581+
unsigned LaneIdx = Offset / 64;
1582+
MachineIRBuilder MIB(I);
1583+
MachineInstr *Extract = emitExtractVectorElt(
1584+
DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1585+
if (!Extract)
1586+
return false;
1587+
I.eraseFromParent();
1588+
return true;
1589+
}
15621590

15631591
I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
15641592
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
@@ -1570,7 +1598,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
15701598
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
15711599
}
15721600

1573-
Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1601+
DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
15741602
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
15751603
MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
15761604
.addReg(DstReg, 0, AArch64::sub_32);
@@ -1928,6 +1956,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
19281956
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
19291957
return true;
19301958
}
1959+
1960+
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
1961+
MachineIRBuilder MIB(I);
1962+
MachineInstr *Extract = emitExtractVectorElt(
1963+
DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
1964+
if (!Extract)
1965+
return false;
1966+
I.eraseFromParent();
1967+
return true;
1968+
}
19311969
}
19321970

19331971
return false;
@@ -2590,16 +2628,40 @@ bool AArch64InstructionSelector::selectMergeValues(
25902628
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
25912629
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
25922630
assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2631+
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
25932632

2594-
// At the moment we only support merging two s32s into an s64.
25952633
if (I.getNumOperands() != 3)
25962634
return false;
2597-
if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2598-
return false;
2599-
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2635+
2636+
// Merging 2 s64s into an s128.
2637+
if (DstTy == LLT::scalar(128)) {
2638+
if (SrcTy.getSizeInBits() != 64)
2639+
return false;
2640+
MachineIRBuilder MIB(I);
2641+
Register DstReg = I.getOperand(0).getReg();
2642+
Register Src1Reg = I.getOperand(1).getReg();
2643+
Register Src2Reg = I.getOperand(2).getReg();
2644+
auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2645+
MachineInstr *InsMI =
2646+
emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2647+
if (!InsMI)
2648+
return false;
2649+
MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2650+
Src2Reg, /* LaneIdx */ 1, RB, MIB);
2651+
if (!Ins2MI)
2652+
return false;
2653+
constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2654+
constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2655+
I.eraseFromParent();
2656+
return true;
2657+
}
2658+
26002659
if (RB.getID() != AArch64::GPRRegBankID)
26012660
return false;
26022661

2662+
if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2663+
return false;
2664+
26032665
auto *DstRC = &AArch64::GPR64RegClass;
26042666
Register SubToRegDef = MRI.createVirtualRegister(DstRC);
26052667
MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),

‎llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,14 +193,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
193193
.legalIf([=](const LegalityQuery &Query) {
194194
const LLT &Ty0 = Query.Types[0];
195195
const LLT &Ty1 = Query.Types[1];
196-
if (Ty1 != s32 && Ty1 != s64)
196+
if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
197197
return false;
198198
if (Ty1 == p0)
199199
return true;
200200
return isPowerOf2_32(Ty0.getSizeInBits()) &&
201201
(Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
202202
})
203-
.clampScalar(1, s32, s64)
203+
.clampScalar(1, s32, s128)
204204
.widenScalarToNextPow2(1)
205205
.maxScalarIf(typeInSet(1, {s32}), 0, s16)
206206
.maxScalarIf(typeInSet(1, {s64}), 0, s32)
@@ -238,6 +238,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
238238
{s32, p0, 32, 8},
239239
{s64, p0, 64, 8},
240240
{p0, p0, 64, 8},
241+
{s128, p0, 128, 8},
241242
{v8s8, p0, 64, 8},
242243
{v16s8, p0, 128, 8},
243244
{v4s16, p0, 64, 8},
@@ -267,6 +268,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
267268
{s32, p0, 32, 8},
268269
{s64, p0, 64, 8},
269270
{p0, p0, 64, 8},
271+
{s128, p0, 128, 8},
270272
{v16s8, p0, 128, 8},
271273
{v4s16, p0, 64, 8},
272274
{v8s16, p0, 128, 8},

‎llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
635635
// Some of the floating-point instructions have mixed GPR and FPR operands:
636636
// fine-tune the computed mapping.
637637
switch (Opc) {
638+
case TargetOpcode::G_TRUNC: {
639+
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
640+
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
641+
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
642+
break;
643+
}
638644
case TargetOpcode::G_SITOFP:
639645
case TargetOpcode::G_UITOFP:
640646
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
@@ -793,6 +799,15 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
793799
// Index needs to be a GPR.
794800
OpRegBankIdx[3] = PMI_FirstGPR;
795801
break;
802+
case TargetOpcode::G_EXTRACT: {
803+
// For s128 sources we have to use fpr.
804+
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
805+
if (SrcTy.getSizeInBits() == 128) {
806+
OpRegBankIdx[0] = PMI_FirstFPR;
807+
OpRegBankIdx[1] = PMI_FirstFPR;
808+
}
809+
break;
810+
}
796811
case TargetOpcode::G_BUILD_VECTOR:
797812
// If the first source operand belongs to a FPR register bank, then make
798813
// sure that we preserve that.

‎llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ define void @nonpow2_load_narrowing() {
205205
ret void
206206
}
207207

208-
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %4:_(s64) = G_EXTRACT %3:_(s96), 0 (in function: nonpow2_store_narrowing)
208+
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %5:fpr32(s32) = G_EXTRACT %21:fpr(s128), 64 (in function: nonpow2_store_narrowing)
209209
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing
210210
; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing:
211211
define void @nonpow2_store_narrowing(i96* %c) {

‎llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir

Lines changed: 0 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -o - | FileCheck %s
33

4-
---
5-
name: test_extracts_1
6-
body: |
7-
bb.0:
8-
liveins: $w0
9-
10-
; Low part of extraction takes entirity of the low register entirely, so
11-
; value stored is forwarded directly from first load.
12-
13-
; CHECK-LABEL: name: test_extracts_1
14-
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
15-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
16-
; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
17-
; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8, align 16)
18-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
19-
; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64)
20-
; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8)
21-
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
22-
; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8)
23-
; CHECK: RET_ReallyLR
24-
%0:_(s64) = COPY $x0
25-
%1:_(s32) = COPY $w1
26-
%2:_(p0) = COPY $x2
27-
%3:_(s128) = G_LOAD %2(p0) :: (load 16)
28-
%4:_(s64) = G_EXTRACT %3(s128), 0
29-
G_STORE %4(s64), %2(p0) :: (store 8)
30-
RET_ReallyLR
31-
...
32-
33-
---
34-
name: test_extracts_2
35-
body: |
36-
bb.0:
37-
liveins: $w0
38-
39-
; Low extraction wipes takes whole low register. High extraction is real.
40-
; CHECK-LABEL: name: test_extracts_2
41-
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
42-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
43-
; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
44-
; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8, align 16)
45-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
46-
; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64)
47-
; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8)
48-
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
49-
; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s64), 0
50-
; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8)
51-
; CHECK: G_STORE [[EXTRACT]](s32), [[COPY2]](p0) :: (store 4)
52-
; CHECK: RET_ReallyLR
53-
%0:_(s64) = COPY $x0
54-
%1:_(s32) = COPY $w1
55-
%2:_(p0) = COPY $x2
56-
%3:_(s128) = G_LOAD %2(p0) :: (load 16)
57-
%4:_(s64) = G_EXTRACT %3(s128), 0
58-
%5:_(s32) = G_EXTRACT %3(s128), 64
59-
G_STORE %4(s64), %2(p0) :: (store 8)
60-
G_STORE %5(s32), %2(p0) :: (store 4)
61-
RET_ReallyLR
62-
...
63-
64-
---
65-
name: test_extracts_3
66-
body: |
67-
bb.0:
68-
liveins: $x0, $x1, $x2
69-
70-
71-
; CHECK-LABEL: name: test_extracts_3
72-
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
73-
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
74-
; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32
75-
; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
76-
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[EXTRACT]](s32), [[EXTRACT1]](s32)
77-
; CHECK: $x0 = COPY [[MV]](s64)
78-
; CHECK: RET_ReallyLR
79-
%0:_(s64) = COPY $x0
80-
%1:_(s64) = COPY $x1
81-
%2:_(s128) = G_MERGE_VALUES %0, %1
82-
%3:_(s64) = G_EXTRACT %2, 32
83-
$x0 = COPY %3
84-
RET_ReallyLR
85-
...
864

875
---
886
name: test_extracts_4

‎llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir

Lines changed: 1 addition & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -1,152 +1,12 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
12
# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s
23

34
--- |
45
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
56
target triple = "aarch64--"
6-
define void @test_inserts_1() { ret void }
7-
define void @test_inserts_2() { ret void }
8-
define void @test_inserts_3() { ret void }
9-
define void @test_inserts_4() { ret void }
10-
define void @test_inserts_5() { ret void }
11-
define void @test_inserts_6() { ret void }
127
define void @test_inserts_nonpow2() { ret void }
138
...
149

15-
---
16-
name: test_inserts_1
17-
body: |
18-
bb.0:
19-
liveins: $w0
20-
21-
; Low part of insertion wipes out the old register entirely, so %0 gets
22-
; forwarded to the G_STORE. Hi part is unchanged so (split) G_LOAD gets
23-
; forwarded.
24-
; CHECK-LABEL: name: test_inserts_1
25-
; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD
26-
; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD
27-
; CHECK: G_STORE %0(s64)
28-
; CHECK: G_STORE [[HI]]
29-
%0:_(s64) = COPY $x0
30-
%1:_(s32) = COPY $w1
31-
%2:_(p0) = COPY $x2
32-
%3:_(s128) = G_LOAD %2(p0) :: (load 16)
33-
%4:_(s128) = G_INSERT %3(s128), %0(s64), 0
34-
G_STORE %4(s128), %2(p0) :: (store 16)
35-
RET_ReallyLR
36-
...
37-
38-
---
39-
name: test_inserts_2
40-
body: |
41-
bb.0:
42-
liveins: $w0
43-
44-
; Low insertion wipes out the old register entirely, so %0 gets forwarded
45-
; to the G_STORE again. Second insertion is real.
46-
; CHECK-LABEL: name: test_inserts_2
47-
; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD
48-
; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD
49-
; CHECK: [[NEWHI:%[0-9]+]]:_(s64) = G_INSERT [[HI]], %1(s32), 0
50-
; CHECK: G_STORE %0(s64)
51-
; CHECK: G_STORE [[NEWHI]]
52-
%0:_(s64) = COPY $x0
53-
%1:_(s32) = COPY $w1
54-
%2:_(p0) = COPY $x2
55-
%3:_(s128) = G_LOAD %2(p0) :: (load 16)
56-
%4:_(s128) = G_INSERT %3(s128), %0(s64), 0
57-
%5:_(s128) = G_INSERT %4(s128), %1(s32), 64
58-
G_STORE %5(s128), %2(p0) :: (store 16)
59-
RET_ReallyLR
60-
...
61-
62-
---
63-
name: test_inserts_3
64-
body: |
65-
bb.0:
66-
liveins: $w0
67-
68-
; I'm not entirely convinced inserting a p0 into an s64 is valid, but it's
69-
; certainly better than the alternative of directly forwarding the value
70-
; which would cause a nasty type mismatch.
71-
; CHECK-LABEL: name: test_inserts_3
72-
; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD
73-
; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD
74-
; CHECK: [[NEWLO:%[0-9]+]]:_(s64) = G_PTRTOINT %0(p0)
75-
; CHECK: G_STORE [[NEWLO]](s64)
76-
; CHECK: G_STORE [[HI]]
77-
%0:_(p0) = COPY $x0
78-
%1:_(s32) = COPY $w1
79-
%2:_(p0) = COPY $x2
80-
%3:_(s128) = G_LOAD %2(p0) :: (load 16)
81-
%4:_(s128) = G_INSERT %3(s128), %0(p0), 0
82-
G_STORE %4(s128), %2(p0) :: (store 16)
83-
RET_ReallyLR
84-
...
85-
86-
---
87-
name: test_inserts_4
88-
body: |
89-
bb.0:
90-
liveins: $w0
91-
92-
; A narrow insert gets surrounded by a G_ANYEXT/G_TRUNC pair.
93-
; CHECK-LABEL: name: test_inserts_4
94-
; CHECK: [[VALEXT:%[0-9]+]]:_(s32) = COPY %2(s32)
95-
; CHECK: [[VAL:%[0-9]+]]:_(s32) = G_INSERT [[VALEXT]], %1(s1), 0
96-
; CHECK: %5:_(s8) = G_TRUNC [[VAL]](s32)
97-
%4:_(s32) = COPY $w0
98-
%0:_(s1) = G_TRUNC %4
99-
%5:_(s32) = COPY $w1
100-
%1:_(s8) = G_TRUNC %5
101-
%2:_(p0) = COPY $x2
102-
%3:_(s8) = G_INSERT %1(s8), %0(s1), 0
103-
G_STORE %3(s8), %2(p0) :: (store 1)
104-
RET_ReallyLR
105-
...
106-
107-
---
108-
name: test_inserts_5
109-
body: |
110-
bb.0:
111-
liveins: $x0, $x1, $x2
112-
113-
114-
; CHECK-LABEL: name: test_inserts_5
115-
; CHECK: [[INS_LO:%[0-9]+]]:_(s32) = G_EXTRACT %2(s64), 0
116-
; CHECK: [[VAL_LO:%[0-9]+]]:_(s64) = G_INSERT %0, [[INS_LO]](s32), 32
117-
; CHECK: [[INS_HI:%[0-9]+]]:_(s32) = G_EXTRACT %2(s64), 32
118-
; CHECK: [[VAL_HI:%[0-9]+]]:_(s64) = G_INSERT %1, [[INS_HI]](s32), 0
119-
; CHECK: %4:_(s128) = G_MERGE_VALUES [[VAL_LO]](s64), [[VAL_HI]](s64)
120-
%0:_(s64) = COPY $x0
121-
%1:_(s64) = COPY $x1
122-
%2:_(s64) = COPY $x2
123-
%3:_(s128) = G_MERGE_VALUES %0, %1
124-
%4:_(s128) = G_INSERT %3, %2, 32
125-
%5:_(s64) = G_TRUNC %4
126-
$x0 = COPY %5
127-
RET_ReallyLR
128-
...
129-
130-
---
131-
name: test_inserts_6
132-
body: |
133-
bb.0:
134-
liveins: $x0, $x1, $x2
135-
136-
137-
; CHECK-LABEL: name: test_inserts_6
138-
; CHECK: [[VAL_LO:%[0-9]+]]:_(s64) = G_INSERT %0, %2(s32), 32
139-
; CHECK: %4:_(s128) = G_MERGE_VALUES [[VAL_LO]](s64), %1(s64)
140-
%0:_(s64) = COPY $x0
141-
%1:_(s64) = COPY $x1
142-
%2:_(s32) = COPY $w2
143-
%3:_(s128) = G_MERGE_VALUES %0, %1
144-
%4:_(s128) = G_INSERT %3, %2, 32
145-
%5:_(s64) = G_TRUNC %4
146-
$x0 = COPY %5
147-
RET_ReallyLR
148-
...
149-
15010
---
15111
name: test_inserts_nonpow2
15212
body: |

‎llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-s128-unaligned.mir

Lines changed: 0 additions & 30 deletions
This file was deleted.

‎llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,8 @@ body: |
8080
; CHECK: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8)
8181
; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD5]](<2 x s32>)
8282
; CHECK: $x0 = COPY [[BITCAST]](s64)
83-
; CHECK: [[LOAD6:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16)
84-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
85-
; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64)
86-
; CHECK: [[LOAD7:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8)
87-
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD6]](s64), [[LOAD7]](s64)
88-
; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[MV]](s128)
83+
; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16)
84+
; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD6]](s128)
8985
; CHECK: $x0 = COPY [[TRUNC]](s64)
9086
%0:_(p0) = COPY $x0
9187
%1:_(s1) = G_LOAD %0(p0) :: (load 1)
@@ -135,10 +131,8 @@ body: |
135131
; CHECK: G_STORE [[PTRTOINT]](s64), [[COPY]](p0) :: (store 8)
136132
; CHECK: G_STORE [[COPY]](p0), [[COPY]](p0) :: (store 8)
137133
; CHECK: [[PTRTOINT1:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0)
138-
; CHECK: G_STORE [[PTRTOINT1]](s64), [[COPY]](p0) :: (store 8, align 16)
139-
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
140-
; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64)
141-
; CHECK: G_STORE [[PTRTOINT1]](s64), [[GEP]](p0) :: (store 8)
134+
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[PTRTOINT1]](s64), [[PTRTOINT1]](s64)
135+
; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16)
142136
%0:_(p0) = COPY $x0
143137
%1:_(s32) = COPY $w1
144138
%2:_(s1) = G_TRUNC %1(s32)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=regbankselect %s -o - | FileCheck %s
3+
---
4+
name: extract_s64_s128
5+
alignment: 2
6+
legalized: true
7+
tracksRegLiveness: true
8+
body: |
9+
bb.1:
10+
liveins: $q0
11+
12+
; CHECK-LABEL: name: extract_s64_s128
13+
; CHECK: liveins: $q0
14+
; CHECK: [[COPY:%[0-9]+]]:fpr(s128) = COPY $q0
15+
; CHECK: [[EXTRACT:%[0-9]+]]:fpr(s64) = G_EXTRACT [[COPY]](s128), 0
16+
; CHECK: $d2 = COPY [[EXTRACT]](s64)
17+
; CHECK: RET_ReallyLR implicit $d2
18+
%0:_(s128) = COPY $q0
19+
%1:_(s64) = G_EXTRACT %0(s128), 0
20+
$d2 = COPY %1(s64)
21+
RET_ReallyLR implicit $d2
22+
23+
...
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=regbankselect %s -o - | FileCheck %s
3+
---
4+
name: trunc_s64_s128
5+
alignment: 2
6+
legalized: true
7+
tracksRegLiveness: true
8+
body: |
9+
bb.1:
10+
liveins: $q0
11+
12+
; CHECK-LABEL: name: trunc_s64_s128
13+
; CHECK: liveins: $q0
14+
; CHECK: [[COPY:%[0-9]+]]:fpr(s128) = COPY $q0
15+
; CHECK: [[TRUNC:%[0-9]+]]:fpr(s64) = G_TRUNC [[COPY]](s128)
16+
; CHECK: $d2 = COPY [[TRUNC]](s64)
17+
; CHECK: RET_ReallyLR implicit $d2
18+
%0:_(s128) = COPY $q0
19+
%1:_(s64) = G_TRUNC %0(s128)
20+
$d2 = COPY %1(s64)
21+
RET_ReallyLR implicit $d2
22+
23+
...
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=instruction-select %s -o - | FileCheck %s
3+
...
4+
---
5+
name: extract_64_128
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
body: |
10+
bb.0:
11+
liveins: $q0
12+
13+
; CHECK-LABEL: name: extract_64_128
14+
; CHECK: liveins: $q0
15+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
16+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub
17+
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1
18+
; CHECK: $d3 = COPY [[COPY1]]
19+
; CHECK: $d4 = COPY [[CPYi64_]]
20+
; CHECK: RET_ReallyLR implicit $d3
21+
%0:fpr(s128) = COPY $q0
22+
%2:fpr(s64) = G_EXTRACT %0(s128), 0
23+
%3:fpr(s64) = G_EXTRACT %0(s128), 64
24+
$d3 = COPY %2(s64)
25+
$d4 = COPY %3(s64)
26+
RET_ReallyLR implicit $d3
27+
28+
...

‎llvm/test/CodeGen/AArch64/GlobalISel/select-trunc.mir

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
define void @trunc_s32_s64() { ret void }
88
define void @trunc_s8_s64() { ret void }
99
define void @trunc_s1_s32() { ret void }
10+
define void @trunc_s64_s128() { ret void }
11+
define void @trunc_s32_s128() { ret void }
1012
...
1113

1214
---
@@ -70,10 +72,52 @@ body: |
7072
7173
; CHECK-LABEL: name: trunc_s1_s32
7274
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
73-
; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY]]
74-
; CHECK: $w0 = COPY [[COPY2]]
75+
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]]
76+
; CHECK: $w0 = COPY [[COPY1]]
7577
%0(s32) = COPY $w0
7678
%1(s1) = G_TRUNC %0
7779
%2:gpr(s32) = G_ANYEXT %1
7880
$w0 = COPY %2(s32)
7981
...
82+
83+
---
84+
name: trunc_s64_s128
85+
legalized: true
86+
regBankSelected: true
87+
registers:
88+
- { id: 0, class: fpr }
89+
- { id: 1, class: fpr }
90+
91+
body: |
92+
bb.0:
93+
liveins: $q0
94+
95+
; CHECK-LABEL: name: trunc_s64_s128
96+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
97+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub
98+
; CHECK: $x0 = COPY [[COPY1]]
99+
%0(s128) = COPY $q0
100+
%1(s64) = G_TRUNC %0
101+
$x0 = COPY %1(s64)
102+
...
103+
104+
---
105+
name: trunc_s32_s128
106+
legalized: true
107+
regBankSelected: true
108+
registers:
109+
- { id: 0, class: fpr }
110+
- { id: 1, class: fpr }
111+
112+
body: |
113+
bb.0:
114+
liveins: $q0
115+
116+
; CHECK-LABEL: name: trunc_s32_s128
117+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
118+
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
119+
; CHECK: $w0 = COPY [[COPY1]]
120+
%0(s128) = COPY $q0
121+
%1(s32) = G_TRUNC %0
122+
$w0 = COPY %1(s32)
123+
...

0 commit comments

Comments
 (0)
Please sign in to comment.