Index: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td +++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td @@ -1839,10 +1839,28 @@ def MVE_VADDi16 : MVE_VADD<"i16", 0b01>; def MVE_VADDi32 : MVE_VADD<"i32", 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +} + def MVE_VSUBi8 : MVE_VSUB<"i8", 0b00>; def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>; def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +} + class MVE_VQADDSUB size, list pattern=[]> : MVE_int { Index: llvm/trunk/test/CodeGen/Thumb2/mve-simple-arith.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/mve-simple-arith.ll +++ llvm/trunk/test/CodeGen/Thumb2/mve-simple-arith.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP + +define arm_aapcs_vfpcc <16 x i8> @add_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: add_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = add <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @add_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: add_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = add <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @add_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: add_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = add nsw <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: sub_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.i8 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub <16 x i8> %src2, %src1 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @sub_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: sub_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.i16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub <8 x i16> %src2, %src1 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @sub_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: sub_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.i32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub nsw <4 x i32> %src2, %src1 + ret <4 x i32> %0 +}