Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -222,29 +222,33 @@ } void ARMTargetLowering::addMVEITypes() { - const MVT iTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }; + const MVT iTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 }; for (auto VT : iTypes) { addRegisterClass(VT, &ARM::QPRRegClass); - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, VT, Expand); - setOperationAction(ISD::BITCAST, VT, Legal); - setOperationAction(ISD::LOAD, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); } + + addRegisterClass(MVT::v2i64, &ARM::QPRRegClass); + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, MVT::v2i64, Expand); + setOperationAction(ISD::BITCAST, MVT::v2i64, Legal); + setOperationAction(ISD::LOAD, MVT::v2i64, Legal); + setOperationAction(ISD::STORE, MVT::v2i64, Legal); } void ARMTargetLowering::addMVEFPTypes() { - const MVT fTypes[] = { MVT::v2f64, MVT::v4f32, MVT::v8f16 }; + const MVT fTypes[] = { MVT::v4f32, MVT::v8f16 }; for (MVT VT : fTypes) { addRegisterClass(VT, &ARM::QPRRegClass); - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, VT, Expand); - setOperationAction(ISD::BITCAST, VT, Legal); - setOperationAction(ISD::LOAD, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); } + + addRegisterClass(MVT::v2f64, &ARM::QPRRegClass); + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, MVT::v2f64, Expand); + setOperationAction(ISD::BITCAST, MVT::v2f64, Legal); + setOperationAction(ISD::LOAD, MVT::v2f64, Legal); + setOperationAction(ISD::STORE, MVT::v2f64, Legal); } ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4074,3 +4074,24 @@ def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; } + +// Template classes for pattern matching + +multiclass unpred_int_op_rr { + def i8 : Pat<(v16i8 (opnode (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (!cast(RegRegOp#"i8") (v16i8 MQPR:$val1), + (v16i8 MQPR:$val2)))>; + def i16 : Pat<(v8i16 (opnode (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (!cast(RegRegOp#"i16") (v8i16 MQPR:$val1), + (v8i16 MQPR:$val2)))>; + def i32 : Pat<(v4i32 (opnode (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (!cast(RegRegOp#"i32") (v4i32 MQPR:$val1), + (v4i32 MQPR:$val2)))>; +} + +// Arithmetic + +let Predicates = [HasMVEInt] in { + defm Pat_VADDt1i : unpred_int_op_rr; + defm Pat_VSUBt1i : unpred_int_op_rr; +} Index: llvm/test/CodeGen/Thumb2/mve-simple-arith.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-simple-arith.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @add_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: add_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = add <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @add_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: add_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = add <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @add_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: add_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = add nsw <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: sub_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.i8 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub <16 x i8> %src2, %src1 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @sub_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: sub_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.i16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub <8 x i16> %src2, %src1 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @sub_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: sub_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.i32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub nsw <4 x i32> %src2, %src1 + ret <4 x i32> %0 +} +