Index: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td +++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td @@ -1567,6 +1567,29 @@ (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>; } +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +} + class MVE_bit_cmode cmode, dag inOps> : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary, iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> { Index: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td @@ -2756,7 +2756,8 @@ // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise def top16Zero: PatLeaf<(i32 rGPR:$src), [{ - return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); + return !SDValue(N,0)->getValueType(0).isVector() && + CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); }]>; // so_imm_notSext is needed instead of so_imm_not, as the value of imm Index: llvm/trunk/test/CodeGen/Thumb2/mve-bitarith.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/mve-bitarith.ll +++ llvm/trunk/test/CodeGen/Thumb2/mve-bitarith.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @and_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: and_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @and_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: and_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @and_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: and_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: or_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = or <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @or_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: or_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = or <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @or_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: or_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: xor_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @xor_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: xor_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @xor_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: xor_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + Index: llvm/trunk/test/CodeGen/Thumb2/mve-div-expand.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/mve-div-expand.ll +++ llvm/trunk/test/CodeGen/Thumb2/mve-div-expand.ll @@ -870,12 +870,12 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #64 ; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov.u16 r0, q1[0] ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vmov.u16 r0, q4[0] ; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vstr s2, [sp, #56] ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 Index: llvm/trunk/test/CodeGen/Thumb2/mve-fmath.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/mve-fmath.ll +++ llvm/trunk/test/CodeGen/Thumb2/mve-fmath.ll @@ -1042,12 +1042,12 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #64 ; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov.u16 r0, q1[0] ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vmov.u16 r0, q4[0] ; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vstr s2, [sp, #56] ; CHECK-NEXT: vcvtb.f32.f16 s0, s0