Skip to content

Commit a89e7a0

Browse files
Kevin QinKevin Qin
Kevin Qin
authored and
Kevin Qin
committedOct 11, 2013
Implement aarch64 neon instruction set AdvSIMD (copy).
llvm-svn: 192410
1 parent bcf3656 commit a89e7a0

File tree

6 files changed

+875
-56
lines changed

6 files changed

+875
-56
lines changed
 

‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 194 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,15 +297,23 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
297297
setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
298298
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
299299

300+
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
301+
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
300302
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
301303
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
302304
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
303305
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
306+
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
307+
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
304308
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom);
305309
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
306310
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
307311
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
308312

313+
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
314+
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
315+
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
316+
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
309317
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
310318
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
311319
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
@@ -866,12 +874,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
866874
return "AArch64ISD::NEON_CMPZ";
867875
case AArch64ISD::NEON_TST:
868876
return "AArch64ISD::NEON_TST";
869-
case AArch64ISD::NEON_DUPIMM:
870-
return "AArch64ISD::NEON_DUPIMM";
871877
case AArch64ISD::NEON_QSHLs:
872878
return "AArch64ISD::NEON_QSHLs";
873879
case AArch64ISD::NEON_QSHLu:
874880
return "AArch64ISD::NEON_QSHLu";
881+
case AArch64ISD::NEON_VDUP:
882+
return "AArch64ISD::NEON_VDUP";
875883
case AArch64ISD::NEON_VDUPLANE:
876884
return "AArch64ISD::NEON_VDUPLANE";
877885
default:
@@ -3342,7 +3350,7 @@ static SDValue PerformShiftCombine(SDNode *N,
33423350
case ISD::SHL:
33433351
if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
33443352
SDValue RHS =
3345-
DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
3353+
DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
33463354
DAG.getConstant(Cnt, MVT::i32));
33473355
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
33483356
}
@@ -3352,7 +3360,7 @@ static SDValue PerformShiftCombine(SDNode *N,
33523360
case ISD::SRL:
33533361
if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
33543362
SDValue RHS =
3355-
DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
3363+
DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
33563364
DAG.getConstant(Cnt, MVT::i32));
33573365
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
33583366
}
@@ -3492,13 +3500,115 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
34923500
}
34933501
}
34943502
}
3503+
3504+
unsigned NumElts = VT.getVectorNumElements();
3505+
bool isOnlyLowElement = true;
3506+
bool usesOnlyOneValue = true;
3507+
bool hasDominantValue = false;
3508+
bool isConstant = true;
3509+
3510+
// Map of the number of times a particular SDValue appears in the
3511+
// element list.
3512+
DenseMap<SDValue, unsigned> ValueCounts;
3513+
SDValue Value;
3514+
for (unsigned i = 0; i < NumElts; ++i) {
3515+
SDValue V = Op.getOperand(i);
3516+
if (V.getOpcode() == ISD::UNDEF)
3517+
continue;
3518+
if (i > 0)
3519+
isOnlyLowElement = false;
3520+
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3521+
isConstant = false;
3522+
3523+
ValueCounts.insert(std::make_pair(V, 0));
3524+
unsigned &Count = ValueCounts[V];
3525+
3526+
// Is this value dominant? (takes up more than half of the lanes)
3527+
if (++Count > (NumElts / 2)) {
3528+
hasDominantValue = true;
3529+
Value = V;
3530+
}
3531+
}
3532+
if (ValueCounts.size() != 1)
3533+
usesOnlyOneValue = false;
3534+
if (!Value.getNode() && ValueCounts.size() > 0)
3535+
Value = ValueCounts.begin()->first;
3536+
3537+
if (ValueCounts.size() == 0)
3538+
return DAG.getUNDEF(VT);
3539+
3540+
// Loads are better lowered with insert_vector_elt.
3541+
// Keep going if we are hitting this case.
3542+
if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
3543+
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
3544+
3545+
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3546+
// Use VDUP for non-constant splats.
3547+
if (hasDominantValue && EltSize <= 64) {
3548+
if (!isConstant) {
3549+
SDValue N;
3550+
3551+
// If we are DUPing a value that comes directly from a vector, we could
3552+
// just use DUPLANE. We can only do this if the lane being extracted
3553+
// is at a constant index, as the DUP from lane instructions only have
3554+
// constant-index forms.
3555+
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3556+
isa<ConstantSDNode>(Value->getOperand(1))) {
3557+
N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT,
3558+
Value->getOperand(0), Value->getOperand(1));
3559+
} else
3560+
N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
3561+
3562+
if (!usesOnlyOneValue) {
3563+
// The dominant value was splatted as 'N', but we now have to insert
3564+
// all differing elements.
3565+
for (unsigned I = 0; I < NumElts; ++I) {
3566+
if (Op.getOperand(I) == Value)
3567+
continue;
3568+
SmallVector<SDValue, 3> Ops;
3569+
Ops.push_back(N);
3570+
Ops.push_back(Op.getOperand(I));
3571+
Ops.push_back(DAG.getConstant(I, MVT::i32));
3572+
N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
3573+
}
3574+
}
3575+
return N;
3576+
}
3577+
if (usesOnlyOneValue && isConstant) {
3578+
return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
3579+
}
3580+
}
3581+
// If all elements are constants and the case above didn't get hit, fall back
3582+
// to the default expansion, which will generate a load from the constant
3583+
// pool.
3584+
if (isConstant)
3585+
return SDValue();
3586+
3587+
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
3588+
// know the default expansion would otherwise fall back on something even
3589+
// worse. For a vector with one or two non-undef values, that's
3590+
// scalar_to_vector for the elements followed by a shuffle (provided the
3591+
// shuffle is valid for the target) and materialization element by element
3592+
// on the stack followed by a load for everything else.
3593+
if (!isConstant && !usesOnlyOneValue) {
3594+
SDValue Vec = DAG.getUNDEF(VT);
3595+
for (unsigned i = 0 ; i < NumElts; ++i) {
3596+
SDValue V = Op.getOperand(i);
3597+
if (V.getOpcode() == ISD::UNDEF)
3598+
continue;
3599+
SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
3600+
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
3601+
}
3602+
return Vec;
3603+
}
34953604
return SDValue();
34963605
}
34973606

34983607
SDValue
34993608
AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
35003609
SelectionDAG &DAG) const {
35013610
SDValue V1 = Op.getOperand(0);
3611+
SDValue V2 = Op.getOperand(1);
35023612
SDLoc dl(Op);
35033613
EVT VT = Op.getValueType();
35043614
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
@@ -3516,9 +3626,89 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
35163626
// If this is undef splat, generate it via "just" vdup, if possible.
35173627
if (Lane == -1) Lane = 0;
35183628

3629+
// Test if V1 is a SCALAR_TO_VECTOR.
3630+
if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3631+
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
3632+
}
3633+
// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
3634+
if (V1.getOpcode() == ISD::BUILD_VECTOR) {
3635+
bool IsScalarToVector = true;
3636+
for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
3637+
if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
3638+
i != (unsigned)Lane) {
3639+
IsScalarToVector = false;
3640+
break;
3641+
}
3642+
if (IsScalarToVector)
3643+
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
3644+
V1.getOperand(Lane));
3645+
}
35193646
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
35203647
DAG.getConstant(Lane, MVT::i64));
35213648
}
3649+
// For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
3650+
// by element from V2 to V1 .
3651+
// If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
3652+
// better choice to be inserted than V1 as less insert needed, so we count
3653+
// element to be inserted for both V1 and V2, and select less one as insert
3654+
// target.
3655+
3656+
// Collect elements need to be inserted and their index.
3657+
SmallVector<int, 8> NV1Elt;
3658+
SmallVector<int, 8> N1Index;
3659+
SmallVector<int, 8> NV2Elt;
3660+
SmallVector<int, 8> N2Index;
3661+
int Length = ShuffleMask.size();
3662+
int V1EltNum = V1.getValueType().getVectorNumElements();
3663+
for (int I = 0; I != Length; ++I) {
3664+
if (ShuffleMask[I] != I) {
3665+
NV1Elt.push_back(ShuffleMask[I]);
3666+
N1Index.push_back(I);
3667+
}
3668+
}
3669+
for (int I = 0; I != Length; ++I) {
3670+
if (ShuffleMask[I] != (I + V1EltNum)) {
3671+
NV2Elt.push_back(ShuffleMask[I]);
3672+
N2Index.push_back(I);
3673+
}
3674+
}
3675+
3676+
// Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
3677+
// will be inserted.
3678+
SDValue InsV = V1;
3679+
SmallVector<int, 8> InsMasks = NV1Elt;
3680+
SmallVector<int, 8> InsIndex = N1Index;
3681+
if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
3682+
if (NV1Elt.size() > NV2Elt.size()) {
3683+
InsV = V2;
3684+
InsMasks = NV2Elt;
3685+
InsIndex = N2Index;
3686+
}
3687+
} else {
3688+
InsV = DAG.getNode(ISD::UNDEF, dl, VT);
3689+
}
3690+
3691+
SDValue PassN;
3692+
3693+
for (int I = 0, E = InsMasks.size(); I != E; ++I) {
3694+
SDValue ExtV = V1;
3695+
int Mask = InsMasks[I];
3696+
if (Mask > V1EltNum) {
3697+
ExtV = V2;
3698+
Mask -= V1EltNum;
3699+
}
3700+
// Any value type smaller than i32 is illegal in AArch64, and this lower
3701+
// function is called after legalize pass, so we need to legalize
3702+
// the result here.
3703+
EVT EltVT = MVT::i32;
3704+
if(EltSize == 64)
3705+
EltVT = MVT::i64;
3706+
PassN = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
3707+
DAG.getConstant(Mask, MVT::i64));
3708+
PassN = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, PassN,
3709+
DAG.getConstant(InsIndex[I], MVT::i64));
3710+
}
3711+
return PassN;
35223712
}
35233713

35243714
return SDValue();

‎llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,13 @@ namespace AArch64ISD {
134134
// Vector compare bitwise test
135135
NEON_TST,
136136

137-
// Operation for the immediate in vector shift
138-
NEON_DUPIMM,
139-
140137
// Vector saturating shift
141138
NEON_QSHLs,
142139
NEON_QSHLu,
143140

141+
// Vector dup
142+
NEON_VDUP,
143+
144144
// Vector dup by lane
145145
NEON_VDUPLANE
146146
};
@@ -296,6 +296,10 @@ enum NeonModImmType {
296296
Neon_Mov_Imm,
297297
Neon_Mvn_Imm
298298
};
299+
300+
extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement,
301+
bool &usesOnlyOneValue, bool &hasDominantValue,
302+
bool &isConstant, bool &isUNDEF);
299303
} // namespace llvm
300304

301305
#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H

‎llvm/lib/Target/AArch64/AArch64InstrNEON.td

Lines changed: 263 additions & 49 deletions
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AArch64/neon-copy.ll

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,196 @@ define i64 @smovx2s(<2 x i32> %tmp1) {
225225
ret i64 %tmp4
226226
}
227227

228+
define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
229+
;CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
230+
%vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
231+
ret <8 x i8> %vset_lane
232+
}
233+
234+
define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
235+
;CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
236+
%vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
237+
ret <16 x i8> %vset_lane
238+
}
239+
240+
define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
241+
;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
242+
%vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
243+
ret <8 x i8> %vset_lane
244+
}
245+
246+
define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
247+
;CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
248+
%vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
249+
ret <16 x i8> %vset_lane
250+
}
251+
252+
define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
253+
;CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
254+
%vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
255+
%vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
256+
%vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
257+
%vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
258+
%vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
259+
%vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
260+
%vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
261+
%vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
262+
ret <8 x i8> %vecinit7.i
263+
}
264+
265+
define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
266+
;CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
267+
%vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
268+
%vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
269+
%vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
270+
%vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
271+
ret <4 x i16> %vecinit3.i
272+
}
273+
274+
define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
275+
;CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
276+
%vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
277+
%vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
278+
ret <2 x i32> %vecinit1.i
279+
}
280+
281+
define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
282+
;CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
283+
%vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
284+
ret <1 x i64> %vecinit.i
285+
}
228286

287+
define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
288+
;CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
289+
%vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
290+
%vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
291+
%vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
292+
%vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
293+
%vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
294+
%vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
295+
%vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
296+
%vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
297+
%vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
298+
%vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
299+
%vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
300+
%vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
301+
%vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
302+
%vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
303+
%vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
304+
%vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
305+
ret <16 x i8> %vecinit15.i
306+
}
229307

308+
define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
309+
;CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
310+
%vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
311+
%vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
312+
%vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
313+
%vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
314+
%vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
315+
%vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
316+
%vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
317+
%vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
318+
ret <8 x i16> %vecinit7.i
319+
}
230320

321+
define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
322+
;CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
323+
%vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
324+
%vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
325+
%vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
326+
%vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
327+
ret <4 x i32> %vecinit3.i
328+
}
329+
330+
define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
331+
;CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
332+
%vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
333+
%vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
334+
ret <2 x i64> %vecinit1.i
335+
}
231336

337+
define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
338+
;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
339+
%shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
340+
ret <8 x i8> %shuffle
341+
}
342+
343+
define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
344+
;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
345+
%shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
346+
ret <4 x i16> %shuffle
347+
}
348+
349+
define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
350+
;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
351+
%shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
352+
ret <2 x i32> %shuffle
353+
}
354+
355+
define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
356+
;CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
357+
%shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
358+
ret <16 x i8> %shuffle
359+
}
360+
361+
define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
362+
;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
363+
%shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
364+
ret <8 x i16> %shuffle
365+
}
366+
367+
define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
368+
;CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
369+
%shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
370+
ret <4 x i32> %shuffle
371+
}
372+
373+
define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
374+
;CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
375+
%shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
376+
ret <2 x i64> %shuffle
377+
}
378+
379+
define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
380+
;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
381+
%shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
382+
ret <8 x i8> %shuffle
383+
}
384+
385+
define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
386+
;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
387+
%shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
388+
ret <4 x i16> %shuffle
389+
}
390+
391+
define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
392+
;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
393+
%shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
394+
ret <2 x i32> %shuffle
395+
}
396+
397+
define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
398+
;CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
399+
%shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
400+
ret <16 x i8> %shuffle
401+
}
402+
403+
define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
404+
;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
405+
%shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
406+
ret <8 x i16> %shuffle
407+
}
408+
409+
define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
410+
;CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
411+
%shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
412+
ret <4 x i32> %shuffle
413+
}
414+
415+
define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
416+
;CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
417+
%shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
418+
ret <2 x i64> %shuffle
419+
}
232420

‎llvm/test/MC/AArch64/neon-diagnostics.s

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4101,3 +4101,188 @@
41014101
// CHECK-ERROR: error: invalid operand for instruction
41024102
// CHECK-ERROR: st4 {v31.2s-v1.2s}, [x31]
41034103
// CHECK-ERROR: ^
4104+
4105+
ins v2.b[16], w1
4106+
ins v7.h[8], w14
4107+
ins v20.s[5], w30
4108+
ins v1.d[2], x7
4109+
ins v2.b[3], b1
4110+
ins v7.h[2], h14
4111+
ins v20.s[1], s30
4112+
ins v1.d[0], d7
4113+
4114+
// CHECK-ERROR: error: lane number incompatible with layout
4115+
// CHECK-ERROR: ins v2.b[16], w1
4116+
// CHECK-ERROR: ^
4117+
// CHECK-ERROR: error: lane number incompatible with layout
4118+
// CHECK-ERROR: ins v7.h[8], w14
4119+
// CHECK-ERROR: ^
4120+
// CHECK-ERROR: error: lane number incompatible with layout
4121+
// CHECK-ERROR: ins v20.s[5], w30
4122+
// CHECK-ERROR: ^
4123+
// CHECK-ERROR: error: lane number incompatible with layout
4124+
// CHECK-ERROR: ins v1.d[2], x7
4125+
// CHECK-ERROR: ^
4126+
// CHECK-ERROR: error: invalid operand for instruction
4127+
// CHECK-ERROR: ins v2.b[3], b1
4128+
// CHECK-ERROR: ^
4129+
// CHECK-ERROR: error: invalid operand for instruction
4130+
// CHECK-ERROR: ins v7.h[2], h14
4131+
// CHECK-ERROR: ^
4132+
// CHECK-ERROR: error: invalid operand for instruction
4133+
// CHECK-ERROR: ins v20.s[1], s30
4134+
// CHECK-ERROR: ^
4135+
// CHECK-ERROR: error: invalid operand for instruction
4136+
// CHECK-ERROR: ins v1.d[0], d7
4137+
// CHECK-ERROR: ^
4138+
4139+
smov w1, v0.b[16]
4140+
smov w14, v6.h[8]
4141+
smov x1, v0.b[16]
4142+
smov x14, v6.h[8]
4143+
smov x20, v9.s[5]
4144+
smov w1, v0.d[0]
4145+
smov w14, v6.d[1]
4146+
smov x1, v0.d[0]
4147+
smov x14, v6.d[1]
4148+
smov x20, v9.d[0]
4149+
4150+
// CHECK-ERROR error: lane number incompatible with layout
4151+
// CHECK-ERROR smov w1, v0.b[16]
4152+
// CHECK-ERROR ^
4153+
// CHECK-ERROR error: lane number incompatible with layout
4154+
// CHECK-ERROR smov w14, v6.h[8]
4155+
// CHECK-ERROR ^
4156+
// CHECK-ERROR error: lane number incompatible with layout
4157+
// CHECK-ERROR smov x1, v0.b[16]
4158+
// CHECK-ERROR ^
4159+
// CHECK-ERROR error: lane number incompatible with layout
4160+
// CHECK-ERROR smov x14, v6.h[8]
4161+
// CHECK-ERROR ^
4162+
// CHECK-ERROR error: lane number incompatible with layout
4163+
// CHECK-ERROR smov x20, v9.s[5]
4164+
// CHECK-ERROR ^
4165+
// CHECK-ERROR error: invalid operand for instruction
4166+
// CHECK-ERROR smov w1, v0.d[0]
4167+
// CHECK-ERROR ^
4168+
// CHECK-ERROR error: invalid operand for instruction
4169+
// CHECK-ERROR smov w14, v6.d[1]
4170+
// CHECK-ERROR ^
4171+
// CHECK-ERROR error: invalid operand for instruction
4172+
// CHECK-ERROR smov x1, v0.d[0]
4173+
// CHECK-ERROR ^
4174+
// CHECK-ERROR error: invalid operand for instruction
4175+
// CHECK-ERROR smov x14, v6.d[1]
4176+
// CHECK-ERROR ^
4177+
// CHECK-ERROR error: invalid operand for instruction
4178+
// CHECK-ERROR smov x20, v9.d[0]
4179+
// CHECK-ERROR ^
4180+
4181+
umov w1, v0.b[16]
4182+
umov w14, v6.h[8]
4183+
umov w20, v9.s[5]
4184+
umov x7, v18.d[3]
4185+
umov w1, v0.d[0]
4186+
umov s20, v9.s[2]
4187+
umov d7, v18.d[1]
4188+
4189+
// CHECK-ERROR error: lane number incompatible with layout
4190+
// CHECK-ERROR umov w1, v0.b[16]
4191+
// CHECK-ERROR ^
4192+
// CHECK-ERROR error: lane number incompatible with layout
4193+
// CHECK-ERROR umov w14, v6.h[8]
4194+
// CHECK-ERROR ^
4195+
// CHECK-ERROR error: lane number incompatible with layout
4196+
// CHECK-ERROR umov w20, v9.s[5]
4197+
// CHECK-ERROR ^
4198+
// CHECK-ERROR error: lane number incompatible with layout
4199+
// CHECK-ERROR umov x7, v18.d[3]
4200+
// CHECK-ERROR ^
4201+
// CHECK-ERROR error: invalid operand for instruction
4202+
// CHECK-ERROR umov w1, v0.d[0]
4203+
// CHECK-ERROR ^
4204+
// CHECK-ERROR error: invalid operand for instruction
4205+
// CHECK-ERROR umov s20, v9.s[2]
4206+
// CHECK-ERROR ^
4207+
// CHECK-ERROR error: invalid operand for instruction
4208+
// CHECK-ERROR umov d7, v18.d[1]
4209+
// CHECK-ERROR ^
4210+
4211+
Ins v1.h[2], v3.b[6]
4212+
Ins v6.h[7], v7.s[2]
4213+
Ins v15.d[0], v22.s[2]
4214+
Ins v0.d[0], v4.b[1]
4215+
4216+
// CHECK-ERROR error: invalid operand for instruction
4217+
// CHECK-ERROR Ins v1.h[2], v3.b[6]
4218+
// CHECK-ERROR ^
4219+
// CHECK-ERROR error: invalid operand for instruction
4220+
// CHECK-ERROR Ins v6.h[7], v7.s[2]
4221+
// CHECK-ERROR ^
4222+
// CHECK-ERROR error: invalid operand for instruction
4223+
// CHECK-ERROR Ins v15.d[0], v22.s[2]
4224+
// CHECK-ERROR ^
4225+
// CHECK-ERROR error: invalid operand for instruction
4226+
// CHECK-ERROR Ins v0.d[0], v4.b[1]
4227+
// CHECK-ERROR ^
4228+
4229+
dup v1.8h, v2.b[2]
4230+
dup v11.4s, v7.h[7]
4231+
dup v17.2d, v20.s[0]
4232+
dup v1.16b, v2.h[2]
4233+
dup v11.8h, v7.s[3]
4234+
dup v17.4s, v20.d[0]
4235+
dup v5.2d, v1.b[1]
4236+
4237+
// CHECK-ERROR error: invalid operand for instruction
4238+
// CHECK-ERROR dup v1.8h, v2.b[2]
4239+
// CHECK-ERROR ^
4240+
// CHECK-ERROR error: invalid operand for instruction
4241+
// CHECK-ERROR dup v11.4s, v7.h[7]
4242+
// CHECK-ERROR ^
4243+
// CHECK-ERROR error: invalid operand for instruction
4244+
// CHECK-ERROR dup v17.2d, v20.s[0]
4245+
// CHECK-ERROR ^
4246+
// CHECK-ERROR error: invalid operand for instruction
4247+
// CHECK-ERROR dup v1.16b, v2.h[2]
4248+
// CHECK-ERROR ^
4249+
// CHECK-ERROR invalid operand for instruction
4250+
// CHECK-ERROR dup v11.8h, v7.s[3]
4251+
// CHECK-ERROR ^
4252+
// CHECK-ERROR invalid operand for instruction
4253+
// CHECK-ERROR dup v17.4s, v20.d[0]
4254+
// CHECK-ERROR ^
4255+
// CHECK-ERROR invalid operand for instruction
4256+
// CHECK-ERROR dup v5.2d, v1.b[1]
4257+
// CHECK-ERROR ^
4258+
4259+
dup v1.8b, b1
4260+
dup v11.4h, h14
4261+
dup v17.2s, s30
4262+
dup v1.16b, d2
4263+
dup v11.8s, w16
4264+
dup v17.4d, w28
4265+
dup v5.2d, w0
4266+
4267+
// CHECK-ERROR error: invalid operand for instruction
4268+
// CHECK-ERROR dup v1.8b, b1
4269+
// CHECK-ERROR ^
4270+
// CHECK-ERROR error: invalid operand for instruction
4271+
// CHECK-ERROR dup v11.4h, h14
4272+
// CHECK-ERROR ^
4273+
// CHECK-ERROR error: invalid operand for instruction
4274+
// CHECK-ERROR dup v17.2s, s30
4275+
// CHECK-ERROR ^
4276+
// CHECK-ERROR error: invalid operand for instruction
4277+
// CHECK-ERROR dup v1.16b, d2
4278+
// CHECK-ERROR ^
4279+
// CHECK-ERROR error: invalid operand for instruction
4280+
// CHECK-ERROR dup v11.8s, w16
4281+
// CHECK-ERROR ^
4282+
// CHECK-ERROR error: invalid operand for instruction
4283+
// CHECK-ERROR dup v17.4d, w28
4284+
// CHECK-ERROR ^
4285+
// CHECK-ERROR error: invalid operand for instruction
4286+
// CHECK-ERROR dup v5.2d, w0
4287+
// CHECK-ERROR ^
4288+

‎llvm/test/MC/AArch64/neon-simd-copy.s

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,44 @@
6060
// CHECK: ins v15.s[3], v22.s[2] // encoding: [0xcf,0x5e,0x1c,0x6e]
6161
// CHECK: ins v0.d[0], v4.d[1] // encoding: [0x80,0x44,0x08,0x6e]
6262

63+
//------------------------------------------------------------------------------
64+
// Duplicate to all lanes( vector, from element)
65+
//------------------------------------------------------------------------------
66+
dup v1.8b, v2.b[2]
67+
dup v11.4h, v7.h[7]
68+
dup v17.2s, v20.s[0]
69+
dup v1.16b, v2.b[2]
70+
dup v11.8h, v7.h[7]
71+
dup v17.4s, v20.s[0]
72+
dup v5.2d, v1.d[1]
73+
74+
// CHECK: dup v1.8b, v2.b[2] // encoding: [0x41,0x04,0x05,0x0e]
75+
// CHECK: dup v11.4h, v7.h[7] // encoding: [0xeb,0x04,0x1e,0x0e]
76+
// CHECK: dup v17.2s, v20.s[0] // encoding: [0x91,0x06,0x04,0x0e]
77+
// CHECK: dup v1.16b, v2.b[2] // encoding: [0x41,0x04,0x05,0x4e]
78+
// CHECK: dup v11.8h, v7.h[7] // encoding: [0xeb,0x04,0x1e,0x4e]
79+
// CHECK: dup v17.4s, v20.s[0] // encoding: [0x91,0x06,0x04,0x4e]
80+
// CHECK: dup v5.2d, v1.d[1] // encoding: [0x25,0x04,0x18,0x4e]
81+
82+
//------------------------------------------------------------------------------
83+
// Duplicate to all lanes( vector, from main)
84+
//------------------------------------------------------------------------------
85+
dup v1.8b, w1
86+
dup v11.4h, w14
87+
dup v17.2s, w30
88+
dup v1.16b, w2
89+
dup v11.8h, w16
90+
dup v17.4s, w28
91+
dup v5.2d, x0
92+
93+
// CHECK: dup v1.8b, w1 // encoding: [0x21,0x0c,0x01,0x0e]
94+
// CHECK: dup v11.4h, w14 // encoding: [0xcb,0x0d,0x0a,0x0e]
95+
// CHECK: dup v17.2s, w30 // encoding: [0xd1,0x0f,0x14,0x0e]
96+
// CHECK: dup v1.16b, w2 // encoding: [0x41,0x0c,0x01,0x4e]
97+
// CHECK: dup v11.8h, w16 // encoding: [0x0b,0x0e,0x0a,0x4e]
98+
// CHECK: dup v17.4s, w28 // encoding: [0x91,0x0f,0x14,0x4e]
99+
// CHECK: dup v5.2d, x0 // encoding: [0x05,0x0c,0x08,0x4e]
100+
63101

64102

65103

0 commit comments

Comments
 (0)
Please sign in to comment.