Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1538,6 +1538,15 @@ let Inst{0} = 0b0; } +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))), + (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>; + def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))), + (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>; + def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))), + (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>; +} + class MVE_bit_ops bit_21_20, bit bit_28> : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), iname, "", "$Qd, $Qn, $Qm", ""> { @@ -1596,6 +1605,20 @@ (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), + (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), + (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), + (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))), + (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), + (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), + (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; } class MVE_bit_cmode cmode, dag inOps> Index: llvm/test/CodeGen/Thumb2/mve-bitarith.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-bitarith.ll +++ llvm/test/CodeGen/Thumb2/mve-bitarith.ll @@ -93,3 +93,98 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <16 x i8> @v_mvn_i8(<16 x i8> %src) { +; CHECK-LABEL: v_mvn_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %src, + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @v_mvn_i16(<8 x i16> %src) { +; CHECK-LABEL: v_mvn_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %src, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @v_mvn_i32(<4 x i32> %src) { +; CHECK-LABEL: v_mvn_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %src, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @v_bic_i8(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: v_bic_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %src1, + %1 = and <16 x i8> %src2, %0 + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @v_bic_i16(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: v_bic_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %src1, + %1 = and <8 x i16> %src2, %0 + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @v_bic_i32(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: v_bic_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %src1, + %1 = and <4 x i32> %src2, %0 + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <16 x i8> @v_or_i8(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: v_or_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %src1, + %1 = or <16 x i8> %src2, %0 + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @v_or_i16(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: v_or_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %src1, + %1 = or <8 x i16> %src2, %0 + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @v_or_i32(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: v_or_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %src1, + %1 = or <4 x i32> %src2, %0 + ret <4 x i32> %1 +}