Skip to content

Commit 33bdb3e

Browse files
committedMar 7, 2018
[AArch64] add missing pattern for insert_subvector undef
The attached testcase started failing after the patch to define isExtractSubvectorCheap with the following pattern mismatch: ISEL: Starting pattern match Initial Opcode index to 85068 Match failed at index 85076 LLVM ERROR: Cannot select: t47: v8i16 = insert_subvector undef:v8i16, t43, Constant:i64<0> The code generated from llvm/lib/Target/AArch64/AArch64InstrInfo.td def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; is in ninja/lib/Target/AArch64/AArch64GenDAGISel.inc At the location of the error it is: /* 85076*/ OPC_CheckChild2Type, MVT::i32, And it failed to match the type of operand 2. Adding another def-pat for i64 fixes the failed def-pat error: def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i64 0)), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; llvm-svn: 326949
1 parent 7d80da1 commit 33bdb3e

File tree

2 files changed

+40
-14
lines changed

2 files changed

+40
-14
lines changed
 

‎llvm/lib/Target/AArch64/AArch64InstrInfo.td

+19-14
Original file line numberDiff line numberDiff line change
@@ -6183,20 +6183,25 @@ def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
61836183

61846184
// A 64-bit subvector insert to the first 128-bit vector position
61856185
// is a subregister copy that needs no instruction.
6186-
def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)),
6187-
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6188-
def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)),
6189-
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6190-
def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)),
6191-
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6192-
def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)),
6193-
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6194-
def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)),
6195-
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6196-
def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (i32 0)),
6197-
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6198-
def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)),
6199-
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6186+
multiclass InsertSubvectorUndef<ValueType Ty> {
6187+
def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
6188+
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6189+
def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
6190+
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6191+
def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
6192+
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6193+
def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
6194+
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6195+
def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
6196+
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6197+
def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
6198+
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6199+
def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
6200+
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
6201+
}
6202+
6203+
defm : InsertSubvectorUndef<i32>;
6204+
defm : InsertSubvectorUndef<i64>;
62006205

62016206
// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
62026207
// or v2f32.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s
2+
3+
; Check that this does not ICE.
4+
5+
@d = common dso_local local_unnamed_addr global <4 x i16> zeroinitializer, align 8
6+
7+
define <8 x i16> @c(i32 %e) {
8+
entry:
9+
%0 = load <4 x i16>, <4 x i16>* @d, align 8
10+
%vminv = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %0)
11+
%1 = trunc i32 %vminv to i16
12+
%vecinit3 = insertelement <4 x i16> <i16 undef, i16 undef, i16 0, i16 0>, i16 %1, i32 1
13+
%call = tail call <8 x i16> @c(i32 0) #3
14+
%vgetq_lane = extractelement <8 x i16> %call, i32 0
15+
%vset_lane = insertelement <4 x i16> %vecinit3, i16 %vgetq_lane, i32 0
16+
%call4 = tail call i32 bitcast (i32 (...)* @k to i32 (<4 x i16>)*)(<4 x i16> %vset_lane) #3
17+
ret <8 x i16> undef
18+
}
19+
20+
declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>)
21+
declare i32 @k(...)

0 commit comments

Comments
 (0)
Please sign in to comment.