Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -162,6 +162,13 @@ SITOF, UITOF, + /// Natural vector cast. ISD::BITCAST is not natural in the big-endian + /// world w.r.t vectors; which causes additional REV instructions to be + /// generated to compensate for the byte-swapping. But sometimes we do + /// need to re-interpret the data in SIMD vector registers in big-endian + /// mode without emitting such REV instructions. + NVCAST, + // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5466,13 +5466,13 @@ if (VT.getSizeInBits() == 128) { SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64, DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } // Support the V64 version via subregister insertion. SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64, DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { @@ -5481,7 +5481,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { @@ -5490,7 +5490,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { @@ -5499,7 +5499,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { @@ -5508,7 +5508,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { @@ -5517,7 +5517,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { @@ -5526,7 +5526,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { @@ -5535,7 +5535,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(264, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { @@ -5544,7 +5544,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(272, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) { @@ -5552,7 +5552,7 @@ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } // The few faces of FMOV... @@ -5561,7 +5561,7 @@ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32; SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) && @@ -5569,7 +5569,7 @@ CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal); SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64, DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } // The many faces of MVNI... @@ -5580,7 +5580,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { @@ -5589,7 +5589,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { @@ -5598,7 +5598,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { @@ -5607,7 +5607,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { @@ -5616,7 +5616,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { @@ -5625,7 +5625,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { @@ -5634,7 +5634,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(264, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { @@ -5643,7 +5643,7 @@ SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, DAG.getConstant(CnstVal, MVT::i32), DAG.getConstant(272, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -237,6 +237,7 @@ def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", SDT_AArch64WrapperLarge>; +def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; //===----------------------------------------------------------------------===// @@ -4957,6 +4958,59 @@ // b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX // +// Natural vector casts (64 bit) +def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; + +// Natural vector casts (128 bit) +def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>; + let Predicates = [IsLE] in { def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; Index: test/CodeGen/AArch64/aarch64-be-bv.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-be-bv.ll @@ -0,0 +1,634 @@ +; RUN: llc -mtriple=aarch64_be--linux-gnu < %s | FileCheck %s + +@vec_v8i16 = global <8 x i16> + +; CHECK-LABEL: movi_modimm_t1: +define i16 @movi_modimm_t1() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t2: +define i16 @movi_modimm_t2() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t3: +define i16 @movi_modimm_t3() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #16 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t4: +define i16 @movi_modimm_t4() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #24 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t5: +define i16 @movi_modimm_t5() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t6: +define i16 @movi_modimm_t6() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1, lsl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t7: +define i16 @movi_modimm_t7() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t8: +define i16 @movi_modimm_t8() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #16 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t9: +define i16 @movi_modimm_t9() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].16b, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: movi_modimm_t10: +define i16 @movi_modimm_t10() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0x00ffff0000ffff + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: fmov_modimm_t11: +define i16 @fmov_modimm_t11() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: fmov v[[REG2:[0-9]+]].4s, #3.00000000 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: fmov_modimm_t12: +define i16 @fmov_modimm_t12() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: fmov v[[REG2:[0-9]+]].2d, #0.17968750 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t1: +define i16 @mvni_modimm_t1() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t2: +define i16 @mvni_modimm_t2() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t3: +define i16 @mvni_modimm_t3() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #16 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t4: +define i16 @mvni_modimm_t4() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #24 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t5: +define i16 @mvni_modimm_t5() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t6: +define i16 @mvni_modimm_t6() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1, lsl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t7: +define i16 @mvni_modimm_t7() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #8 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +; CHECK-LABEL: mvni_modimm_t8: +define i16 @mvni_modimm_t8() nounwind { + ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #16 + ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h + ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] + %in = load <8 x i16>* @vec_v8i16 + %rv = add <8 x i16> %in, + %el = extractelement <8 x i16> %rv, i32 0 + ret i16 %el +} + +declare i8 @f_v8i8(<8 x i8> %arg) +declare i16 @f_v4i16(<4 x i16> %arg) +declare i32 @f_v2i32(<2 x i32> %arg) +declare i8 @f_v16i8(<16 x i8> %arg) +declare i16 @f_v8i16(<8 x i16> %arg) +declare i32 @f_v4i32(<4 x i32> %arg) + +; CHECK-LABEL: movi_modimm_t1_call: +define void @movi_modimm_t1_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t2_call: +define void @movi_modimm_t2_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t3_call: +define void @movi_modimm_t3_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t4_call: +define void @movi_modimm_t4_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #24 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #24 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #24 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #24 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #24 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #24 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t5_call: +define void @movi_modimm_t5_call() { + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x7 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x6 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x4 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x3 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t6_call: +define void @movi_modimm_t6_call() { + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x8, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x7, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4h, #0x6, lsl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x4, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x3, lsl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t7_call: +define void @movi_modimm_t7_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, msl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, msl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, msl #8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, msl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, msl #8 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t8_call: +define void @movi_modimm_t8_call() { + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, msl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, msl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, msl #16 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, msl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, msl #16 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t9_call: +define void @movi_modimm_t9_call() { + ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x8 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x7 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].8b, #0x6 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x5 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x4 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].16b, #0x3 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: movi_modimm_t10_call: +define void @movi_modimm_t10_call() { + ; CHECK: movi d[[REG1:[0-9]+]], #0x0000ff000000ff + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: movi d[[REG1:[0-9]+]], #0x00ffff0000ffff + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: movi d[[REG1:[0-9]+]], #0xffffffffffffffff + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffff00ffffff + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffffffffff0000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffffff00000000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: fmov_modimm_t11_call: +define void @fmov_modimm_t11_call() { + ; CHECK: fmov v[[REG1:[0-9]+]].2s, #4.00000000 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b + ; CHECK-NEXT: bl f_v8i8 + call i8 @f_v8i8(<8 x i8> ) + ; CHECK: fmov v[[REG1:[0-9]+]].2s, #3.75000000 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h + ; CHECK-NEXT: bl f_v4i16 + call i16 @f_v4i16(<4 x i16> ) + ; CHECK: fmov v[[REG1:[0-9]+]].2s, #3.50000000 + ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s + ; CHECK-NEXT: bl f_v2i32 + call i32 @f_v2i32(<2 x i32> ) + ; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.25000000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.00000000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: fmov v[[REG1:[0-9]+]].4s, #2.75000000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +} + +; CHECK-LABEL: fmov_modimm_t12_call: +define void @fmov_modimm_t12_call() { + ; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.18750000 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v16i8 + call i8 @f_v16i8(<16 x i8> ) + ; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.17968750 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v8i16 + call i16 @f_v8i16(<8 x i16> ) + ; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.17187500 + ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v4i32 + call i32 @f_v4i32(<4 x i32> ) + + ret void +}