diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -444,6 +444,9 @@ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } + bool hasAndNot(SDValue V) const override { + return V.getValueType().isVector(); + } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override; bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -927,6 +927,10 @@ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), (z_vneg VR128:$x)))), (lc (lp VR128:$x))>; + def : Pat<(type (xor (and (xor (z_vneg VR128:$x), VR128:$x), (z_vsra_by_scalar VR128:$x, (i32 shift))), (z_vneg VR128:$x))), + (lp VR128:$x)>; + def : Pat<(type (xor (and (xor VR128:$x, (z_vneg VR128:$x)), (z_vsra_by_scalar VR128:$x, (i32 shift))), VR128:$x)), + (lc (lp VR128:$x))>; } } diff --git a/llvm/test/CodeGen/SystemZ/vec-abs-01.ll b/llvm/test/CodeGen/SystemZ/vec-abs-01.ll --- a/llvm/test/CodeGen/SystemZ/vec-abs-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-abs-01.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Test v16i8 absolute. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s @@ -5,8 +6,9 @@ ; Test with slt. define <16 x i8> @f1(<16 x i8> %val) { ; CHECK-LABEL: f1: -; CHECK: vlpb %v24, %v24 -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v24, %v24 +; CHECK-NEXT: br %r14 %cmp = icmp slt <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val @@ -16,8 +18,9 @@ ; Test with sle. define <16 x i8> @f2(<16 x i8> %val) { ; CHECK-LABEL: f2: -; CHECK: vlpb %v24, %v24 -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v24, %v24 +; CHECK-NEXT: br %r14 %cmp = icmp sle <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val @@ -27,8 +30,9 @@ ; Test with sgt. define <16 x i8> @f3(<16 x i8> %val) { ; CHECK-LABEL: f3: -; CHECK: vlpb %v24, %v24 -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v24, %v24 +; CHECK-NEXT: br %r14 %cmp = icmp sgt <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg @@ -38,8 +42,9 @@ ; Test with sge. define <16 x i8> @f4(<16 x i8> %val) { ; CHECK-LABEL: f4: -; CHECK: vlpb %v24, %v24 -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v24, %v24 +; CHECK-NEXT: br %r14 %cmp = icmp sge <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg @@ -50,9 +55,10 @@ ; of LOAD NEGATIVE. define <16 x i8> @f5(<16 x i8> %val) { ; CHECK-LABEL: f5: -; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -; CHECK: vlcb %v24, [[REG]] -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v0, %v24 +; CHECK-NEXT: vlcb %v24, %v0 +; CHECK-NEXT: br %r14 %cmp = icmp slt <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %abs = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val @@ -63,9 +69,10 @@ ; Try another form of negative absolute (slt version). define <16 x i8> @f6(<16 x i8> %val) { ; CHECK-LABEL: f6: -; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -; CHECK: vlcb %v24, [[REG]] -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v0, %v24 +; CHECK-NEXT: vlcb %v24, %v0 +; CHECK-NEXT: br %r14 %cmp = icmp slt <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg @@ -75,9 +82,10 @@ ; Test with sle. define <16 x i8> @f7(<16 x i8> %val) { ; CHECK-LABEL: f7: -; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -; CHECK: vlcb %v24, [[REG]] -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v0, %v24 +; CHECK-NEXT: vlcb %v24, %v0 +; CHECK-NEXT: br %r14 %cmp = icmp sle <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg @@ -87,9 +95,10 @@ ; Test with sgt. define <16 x i8> @f8(<16 x i8> %val) { ; CHECK-LABEL: f8: -; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -; CHECK: vlcb %v24, [[REG]] -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v0, %v24 +; CHECK-NEXT: vlcb %v24, %v0 +; CHECK-NEXT: br %r14 %cmp = icmp sgt <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val @@ -99,9 +108,10 @@ ; Test with sge. define <16 x i8> @f9(<16 x i8> %val) { ; CHECK-LABEL: f9: -; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -; CHECK: vlcb %v24, [[REG]] -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v0, %v24 +; CHECK-NEXT: vlcb %v24, %v0 +; CHECK-NEXT: br %r14 %cmp = icmp sge <16 x i8> %val, zeroinitializer %neg = sub <16 x i8> zeroinitializer, %val %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val @@ -111,8 +121,9 @@ ; Test with an SRA-based boolean vector. define <16 x i8> @f10(<16 x i8> %val) { ; CHECK-LABEL: f10: -; CHECK: vlpb %v24, %v24 -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v24, %v24 +; CHECK-NEXT: br %r14 %shr = ashr <16 x i8> %val, @@ -129,9 +140,10 @@ ; ...and again in reverse define <16 x i8> @f11(<16 x i8> %val) { ; CHECK-LABEL: f11: -; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -; CHECK: vlcb %v24, [[REG]] -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v0, %v24 +; CHECK-NEXT: vlcb %v24, %v0 +; CHECK-NEXT: br %r14 %shr = ashr <16 x i8> %val, @@ -144,3 +156,36 @@ %ret = or <16 x i8> %and1, %and2 ret <16 x i8> %ret } + +; Test using xor masked-merge pattern. +define <16 x i8> @f12(<16 x i8> %val) { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v24, %v24 +; CHECK-NEXT: br %r14 + %shr = ashr <16 x i8> %val, + + %neg = sub <16 x i8> zeroinitializer, %val + %xor0 = xor <16 x i8> %neg, %val + %and = and <16 x i8> %xor0, %shr + %xor1 = xor <16 x i8> %and, %val + ret <16 x i8> %xor1 +} + +; Test using xor masked-merge pattern in reverse. +define <16 x i8> @f13(<16 x i8> %val) { +; CHECK-LABEL: f13: +; CHECK: # %bb.0: +; CHECK-NEXT: vlpb %v0, %v24 +; CHECK-NEXT: vlcb %v24, %v0 +; CHECK-NEXT: br %r14 + %shr = ashr <16 x i8> %val, + + %neg = sub <16 x i8> zeroinitializer, %val + %xor0 = xor <16 x i8> %neg, %val + %and = and <16 x i8> %xor0, %shr + %xor1 = xor <16 x i8> %and, %neg + ret <16 x i8> %xor1 +} diff --git a/llvm/test/CodeGen/SystemZ/vec-masked-merge.ll b/llvm/test/CodeGen/SystemZ/vec-masked-merge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-masked-merge.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define <2 x i64> @masked_merge0(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { +; CHECK-LABEL: masked_merge0 +; CHECK: vsel %v24, %v26, %v28, %v24 +; CHECK: br %r14 + %and0 = and <2 x i64> %a0, %a1 + %not = xor <2 x i64> %a0, + %and1 = and <2 x i64> %not, %a2 + %or = or <2 x i64> %and0, %and1 + ret <2 x i64> %or +} + +define <2 x i64> @masked_merge1(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { +; CHECK-LABEL: masked_merge1 +; CHECK: vsel %v24, %v26, %v28, %v24 +; CHECK: br %r14 + %xor0 = xor <2 x i64> %a1, %a2 + %and = and <2 x i64> %xor0, %a0 + %xor1 = xor <2 x i64> %and, %a2 + ret <2 x i64> %xor1 +}