Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -11094,7 +11094,7 @@ APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (BVN && + if (BVN && Subtarget->hasNEON() && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -995,6 +995,23 @@ defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>; defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16), + (MVE_VMOVLs16bh MQPR:$src)>; + def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8), + (MVE_VMOVLs8bh MQPR:$src)>; + def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8), + (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>; + + // zext_inreg 16 -> 32 + def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))), + (MVE_VMOVLu16bh MQPR:$src)>; + // zext_inreg 8 -> 16 + def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))), + (MVE_VMOVLu8bh MQPR:$src)>; +} + + class MVE_VSHLL_imm pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops), Index: llvm/test/CodeGen/Thumb2/mve-sext.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-sext.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) { +; CHECK-LABEL: sext_v8i8_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <8 x i8> %src to <8 x i16> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @sext_v4i16_v4i32(<4 x i16> %src) { +; CHECK-LABEL: sext_v4i16_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <4 x i16> %src to <4 x i32> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @sext_v4i8_v4i32(<4 x i8> %src) { +; CHECK-LABEL: sext_v4i8_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <4 x i8> %src to <4 x i32> + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) { +; CHECK-LABEL: zext_v8i8_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = zext <8 x i8> %src to <8 x i16> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @zext_v4i16_v4i32(<4 x i16> %src) { +; CHECK-LABEL: zext_v4i16_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = zext <4 x i16> %src to <4 x i32> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @zext_v4i8_v4i32(<4 x i8> %src) { +; CHECK-LABEL: zext_v4i8_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q1, #0xff +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext <4 x i8> %src to <4 x i32> + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) { +; CHECK-LABEL: trunc_v8i16_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <8 x i16> %src to <8 x i8> + ret <8 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i16> @trunc_v4i32_v4i16(<4 x i32> %src) { +; CHECK-LABEL: trunc_v4i32_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <4 x i32> %src to <4 x i16> + ret <4 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i8> @trunc_v4i32_v4i8(<4 x i32> %src) { +; CHECK-LABEL: trunc_v4i32_v4i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <4 x i32> %src to <4 x i8> + ret <4 x i8> %0 +}