Skip to content

Commit

Permalink
[ARM] MVE VPNOT
Browse files Browse the repository at this point in the history
This adds the patterns required to transform xor P0, -1 to a VPNOT. The
instruction operands have to change a little for this, adding an in and an out
VCCR reg and using a custom DecodeMVEVPNOT for the decode.

Differential Revision: https://reviews.llvm.org/D65133

llvm-svn: 367192
  • Loading branch information
davemgreen committed Jul 28, 2019

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 9cf344e commit b8b8b46
Showing 6 changed files with 75 additions and 215 deletions.
15 changes: 12 additions & 3 deletions llvm/lib/Target/ARM/ARMInstrMVE.td
Original file line number Diff line number Diff line change
@@ -4609,19 +4609,28 @@ let Predicates = [HasMVEFloat] in {
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
}

def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
"vpnot", "", "", vpred_n, "", []> {
let Inst{31-0} = 0b11111110001100010000111101001101;
let Unpredictable{19-17} = 0b111;
let Unpredictable{12} = 0b1;
let Unpredictable{7} = 0b1;
let Unpredictable{5} = 0b1;
let Defs = [P0];
let Uses = [P0];

let Constraints = "";
let DecoderMethod = "DecodeMVEVPNOT";
}

let Predicates = [HasMVEInt] in {
def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))),
(v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>;
def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))),
(v8i1 (MVE_VPNOT (v8i1 VCCR:$pred)))>;
def : Pat<(v16i1 (xor (v16i1 VCCR:$pred), (v16i1 (predicate_cast (i32 65535))))),
(v16i1 (MVE_VPNOT (v16i1 VCCR:$pred)))>;
}


class MVE_loltp_start<dag iops, string asm, string ops, bits<2> size>
: t2LOL<(outs GPRlr:$LR), iops, asm, ops> {
bits<4> Rn;
10 changes: 10 additions & 0 deletions llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
Original file line number Diff line number Diff line change
@@ -561,6 +561,8 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
@@ -6579,3 +6581,11 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
return S;
}

static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createReg(ARM::VPR));
Inst.addOperand(MCOperand::createReg(ARM::VPR));
return S;
}
85 changes: 17 additions & 68 deletions llvm/test/CodeGen/Thumb2/mve-pred-or.ll
Original file line number Diff line number Diff line change
@@ -5,12 +5,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpeqz_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i32 ne, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -25,12 +22,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpnez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpnez_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -45,12 +39,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsltz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpsltz_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.s32 ge, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -65,12 +56,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpsgtz_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.s32 le, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -85,12 +73,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpslez_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.s32 gt, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -105,12 +90,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpsgez_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.s32 lt, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -139,12 +121,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpugtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpugtz_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -192,12 +171,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeq_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i3
; CHECK-LABEL: cmpeq_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i32 ne, q1, q2
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -212,12 +188,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpne_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i3
; CHECK-LABEL: cmpne_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i32 eq, q1, q2
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -232,12 +205,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK-LABEL: cmpslt_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.s32 le, q2, q1
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -252,12 +222,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK-LABEL: cmpsgt_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.s32 le, q1, q2
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -272,12 +239,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsle_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK-LABEL: cmpsle_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.s32 lt, q2, q1
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -292,12 +256,9 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK-LABEL: cmpsge_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.s32 lt, q1, q2
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -391,12 +352,9 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: cmpeqz_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i16 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i16 ne, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -411,12 +369,9 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeq_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1
; CHECK-LABEL: cmpeq_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i16 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i16 ne, q1, q2
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -432,12 +387,9 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: cmpeqz_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i8 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i8 ne, q1, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
@@ -452,12 +404,9 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeq_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x
; CHECK-LABEL: cmpeq_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i8 ne, q0, zr
; CHECK-NEXT: movw r1, #65535
; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.i8 ne, q1, q2
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
Loading

0 comments on commit b8b8b46

Please sign in to comment.