Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -238,9 +238,6 @@ EORV_PRED, ANDV_PRED, - // Vector bitwise negation - NOT, - // Vector bitwise insertion BIT, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1734,7 +1734,6 @@ MAKE_CASE(AArch64ISD::FMINNMV_PRED) MAKE_CASE(AArch64ISD::FMUL_PRED) MAKE_CASE(AArch64ISD::FSUB_PRED) - MAKE_CASE(AArch64ISD::NOT) MAKE_CASE(AArch64ISD::BIT) MAKE_CASE(AArch64ISD::CBZ) MAKE_CASE(AArch64ISD::CBNZ) @@ -9615,7 +9614,7 @@ Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); else Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); - return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq); + return DAG.getNOT(dl, Fcmeq, VT); } case AArch64CC::EQ: if (IsZero) @@ -9654,7 +9653,7 @@ Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); else Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); - return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq); + return DAG.getNOT(dl, Cmeq, VT); } case AArch64CC::EQ: if (IsZero) Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -484,7 +484,6 @@ def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; -def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; @@ -504,7 +503,7 @@ def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), - (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; + (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; @@ -3945,19 +3944,11 @@ def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; -def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; - def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>; Index: llvm/test/Analysis/CostModel/AArch64/vector-select.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -40,8 +40,7 @@ ; CODE-LABEL: v4i16_select_ne ; CODE: bb.0 ; CODE-NEXT: cmeq v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h -; CODE-NEXT: mvn v{{.+}}.8b, v{{.+}}.8b -; CODE-NEXT: bif v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b +; CODE-NEXT: bit v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b ; CODE-NEXT: ret define <4 x i16> @v4i16_select_ne(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { Index: llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -843,8 +843,7 @@ ; CHECK-LABEL: vselect_cmp_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b +; CHECK-NEXT: bsl v0.8b, v2.8b, v1.8b ; CHECK-NEXT: ret %cmp = icmp ne <8 x i8> %a, %b %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c @@ -866,8 +865,7 @@ ; CHECK-LABEL: vselect_cmpz_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.8b, v0.8b, #0 -; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b +; CHECK-NEXT: bsl v0.8b, v2.8b, v1.8b ; CHECK-NEXT: ret %cmp = icmp ne <8 x i8> %a, zeroinitializer %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c @@ -888,12 +886,24 @@ define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: vselect_tst: ; CHECK: // %bb.0: +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: cmeq v0.8b, v0.8b, #0 +; CHECK-NEXT: bsl v0.8b, v2.8b, v1.8b +; CHECK-NEXT: ret + %tmp3 = and <8 x i8> %a, %b + %tmp4 = icmp eq <8 x i8> %tmp3, zeroinitializer + %d = select <8 x i1> %tmp4, <8 x i8> %c, <8 x i8> %b + ret <8 x i8> %d +} + +define <8 x i8> @sext_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +; CHECK-LABEL: sext_tst: +; CHECK: // %bb.0: ; CHECK-NEXT: cmtst v0.8b, v0.8b, v1.8b -; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret - %tmp3 = and <8 x i8> %a, %b - %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer - %d = select <8 x i1> %tmp4, <8 x i8> %b, <8 x i8> %c + %tmp3 = and <8 x i8> %a, %b + %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer + %d = sext <8 x i1> %tmp4 to <8 x i8> ret <8 x i8> %d } Index: llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll +++ llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll @@ -135,9 +135,8 @@ ; CHECK-NEXT: add v1.4s, v3.4s, v1.4s ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, @@ -202,9 +201,8 @@ ; CHECK-NEXT: add v1.4s, v3.4s, v1.4s ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, Index: llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -118,9 +118,8 @@ ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -177,9 +176,8 @@ ; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s ; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, Index: llvm/test/CodeGen/AArch64/vec_umulo.ll =================================================================== --- llvm/test/CodeGen/AArch64/vec_umulo.ll +++ llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -278,14 +278,13 @@ ; CHECK-NEXT: cmeq v1.4s, v2.4s, #0 ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: strh w9, [x0, #6] ; CHECK-NEXT: sturh w10, [x0, #3] ; CHECK-NEXT: lsr w9, w9, #16 ; CHECK-NEXT: lsr w10, w10, #16 ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: lsr w8, w11, #16 ; CHECK-NEXT: strh w11, [x0] ; CHECK-NEXT: strb w9, [x0, #8] @@ -314,15 +313,14 @@ ; CHECK-NEXT: and w9, w9, #0x1 ; CHECK-NEXT: bfi w8, w9, #1, #1 ; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: cmeq v1.4h, v1.4h, #0 ; CHECK-NEXT: ushr v2.4h, v0.4h, #1 ; CHECK-NEXT: and w9, w9, #0x1 ; CHECK-NEXT: bfi w8, w9, #2, #1 ; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: mvn v0.8b, v1.8b -; CHECK-NEXT: cmeq v1.4h, v2.4h, #0 -; CHECK-NEXT: mvn v1.8b, v1.8b -; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-NEXT: cmeq v0.4h, v2.4h, #0 +; CHECK-NEXT: cmeq v1.4h, v1.4h, #0 +; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: orn v0.8b, v0.8b, v1.8b ; CHECK-NEXT: bfi w8, w9, #3, #29 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: and w8, w8, #0xf