Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h
+++ lib/Target/PowerPC/PPCISelLowering.h
@@ -382,6 +382,11 @@
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG);
+ /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VMRGEW or VMRGOW instruction
+ bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+ unsigned ShuffleKind, SelectionDAG &DAG);
+
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1279,6 +1279,99 @@
}
}
+/**
+ * \brief Common function used to match vmrgew and vmrgow shuffles
+ *
+ * The indexOffset determines whether to look for even or odd words in
+ * the shuffle mask. This is based on the of the endianness of the target
+ * machine.
+ * - Little Endian:
+ * - Use offset of 0 to check for odd elements
+ * - Use offset of 4 to check for even elements
+ * - Big Endian:
+ * - Use offset of 0 to check for even elements
+ * - Use offset of 4 to check for odd elements
+ * A detailed description of the vector element ordering for little endian and
+ * big endian can be found at
+ * Targeting your applications - what little endian and big endian IBM XL C/C++
+ * compiler differences mean to you
+ *
+ * The mask to the shuffle vector instruction specifies the indices of the
+ * elements from the two input vectors to place in the result. The elements are
+ * numbered in array-access order, starting with the first vector. These vectors
+ * are always of type v16i8, thus each vector will contain 16 elements of size
+ * 8. More info on the shuffle vector can be found in the Language
+ * Reference.
+ *
+ * The RHSStartValue indicates whether the same input vectors are used (unary)
+ * or two different input vectors are used, based on the following:
+ * - If the instruction uses the same vector for both inputs, the range of the
+ * indices will be 0 to 15. In this case, the RHSStart value passed should
+ * be 0.
+ * - If the instruction has two different vectors then the range of the
+ * indices will be 0 to 31. In this case, the RHSStart value passed should
+ * be 16 (indices 0-15 specify elements in the first vector while indices 16
+ * to 31 specify elements in the second vector).
+ *
+ * \param[in] N The shuffle vector SD Node to analyze
+ * \param[in] IndexOffset Specifies whether to look for even or odd elements
+ * \param[in] RHSStartValue Specifies the starting index for the righthand input
+ * vector to the shuffle_vector instruction
+ * \return true iff this shuffle vector represents an even or odd word merge
+ */
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
+ unsigned RHSStartValue) {
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
+
+ for (unsigned i = 0; i < 2; ++i)
+ for (unsigned j = 0; j < 4; ++j)
+ if (!isConstantOrUndef(N->getMaskElt(i*4+j),
+ i*RHSStartValue+j+IndexOffset) ||
+ !isConstantOrUndef(N->getMaskElt(i*4+j+8),
+ i*RHSStartValue+j+IndexOffset+8))
+ return false;
+ return true;
+}
+
+/**
+ * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
+ * vmrgow instructions.
+ *
+ * \param[in] N The shuffle vector SD Node to analyze
+ * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
+ * \param[in] ShuffleKind Identify the type of merge:
+ * - 0 = big-endian merge with two different inputs;
+ * - 1 = either-endian merge with two identical inputs;
+ * - 2 = little-endian merge with two different inputs (inputs are swapped for
+ * little-endian merges).
+ * \param[in] DAG The current SelectionDAG
+ * \return true iff this shuffle mask
+ */
+bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ unsigned indexOffset = CheckEven ? 4 : 0;
+ if (ShuffleKind == 1) // Unary
+ return isVMerge(N, indexOffset, 0);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, indexOffset, 16);
+ else
+ return false;
+ }
+ else {
+ unsigned indexOffset = CheckEven ? 0 : 4;
+ if (ShuffleKind == 1) // Unary
+ return isVMerge(N, indexOffset, 0);
+ else if (ShuffleKind == 0) // Normal
+ return isVMerge(N, indexOffset, 16);
+ else
+ return false;
+ }
+ return false;
+}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
@@ -7046,7 +7139,9 @@
PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
+ PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
return Op;
}
}
@@ -7064,7 +7159,9 @@
PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
+ PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
Index: lib/Target/PowerPC/PPCInstrAltivec.td
===================================================================
--- lib/Target/PowerPC/PPCInstrAltivec.td
+++ lib/Target/PowerPC/PPCInstrAltivec.td
@@ -155,6 +155,33 @@
}]>;
+def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast(N), true, 0, *CurDAG);
+}]>;
+def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast(N), false, 0, *CurDAG);
+}]>;
+def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast(N), true, 1, *CurDAG);
+}]>;
+def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast(N), false, 1, *CurDAG);
+}]>;
+def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast(N), true, 2, *CurDAG);
+}]>;
+def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast(N), false, 2, *CurDAG);
+}]>;
+
+
+
def VSLDOI_get_imm : SDNodeXForm;
@@ -1008,6 +1035,29 @@
def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
} // isCommutable
+// Vector merge
+def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrgew $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrgow $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+// Match vmrgew(x,x) and vmrgow(x,x)
+def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef),
+ (VMRGEW $vA, $vA)>;
+def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef),
+ (VMRGOW $vA, $vA)>;
+
+// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands. These fragments
+// are matched for little-endian, where the inputs must be swapped for correct
+// semantics.w
+def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGEW $vB, $vA)>;
+def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGOW $vB, $vA)>;
+
+
// Vector shifts
def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
Index: test/CodeGen/PowerPC/vec_mergeow.ll
===================================================================
--- test/CodeGen/PowerPC/vec_mergeow.ll
+++ test/CodeGen/PowerPC/vec_mergeow.ll
@@ -0,0 +1,103 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \
+; RUN: FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \
+; RUN: FileCheck %s -check-prefix=CHECK-BE
+
+; Check for a vector merge instruction using two inputs
+; The shufflevector specifies the even elements, using big endian element
+; ordering. If run on a big endian machine, this should produce the vmrgew
+; instruction. If run on a little endian machine, this should produce the
+; vmrgow instruction. Note also that on little endian the input registers
+; are swapped also.
+define void @check_merge_even_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK-LE-LABEL: @check_merge_even_xy
+; CHECK-BE-LABEL: @check_merge_even_xy
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2,
+ <16 x i32>
+; CHECK-LE: vmrgow 2, 3, 2
+; CHECK-BE: vmrgew 2, 2, 3
+ store <16 x i8> %tmp3, <16 x i8>* %A
+ ret void
+; CHECK-LE: blr
+; CHECK-BE: blr
+}
+
+; Check for a vector merge instruction using a single input.
+; The shufflevector specifies the even elements, using big endian element
+; ordering. If run on a big endian machine, this should produce the vmrgew
+; instruction. If run on a little endian machine, this should produce the
+; vmrgow instruction. Note also that on little endian the input registers
+; are swapped also.
+define void @check_merge_even_xx(<16 x i8>* %A) {
+entry:
+; CHECK-LE-LABEL: @check_merge_even_xx
+; CHECK-BE-LABEL: @check_merge_even_xx
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp,
+ <16 x i32>
+; CHECK-LE: vmrgow 2, 2, 2
+; CHECK-BE: vmrgew 2, 2, 2
+ store <16 x i8> %tmp2, <16 x i8>* %A
+ ret void
+; CHECK-LE: blr
+; CHECK-BE: blr
+}
+
+; Check for a vector merge instruction using two inputs.
+; The shufflevector specifies the odd elements, using big endian element
+; ordering. If run on a big endian machine, this should produce the vmrgow
+; instruction. If run on a little endian machine, this should produce the
+; vmrgew instruction. Note also that on little endian the input registers
+; are swapped also.
+define void @check_merge_odd_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK-LE-LABEL: @check_merge_odd_xy
+; CHECK-BE-LABEL: @check_merge_odd_xy
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2,
+ <16 x i32>
+; CHECK-LE: vmrgew 2, 3, 2
+; CHECK-BE: vmrgow 2, 2, 3
+ store <16 x i8> %tmp3, <16 x i8>* %A
+ ret void
+; CHECK-LE: blr
+; CHECK-BE: blr
+}
+
+; Check for a vector merge instruction using a single input.
+; The shufflevector specifies the odd elements, using big endian element
+; ordering. If run on a big endian machine, this should produce the vmrgow
+; instruction. If run on a little endian machine, this should produce the
+; vmrgew instruction. Note also that on little endian the input registers
+; are swapped also.
+define void @check_merge_odd_xx(<16 x i8>* %A) {
+entry:
+; CHECK-LE-LABEL: @check_merge_odd_xx
+; CHECK-BE-LABEL: @check_merge_odd_xx
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp,
+ <16 x i32>
+; CHECK-LE: vmrgew 2, 2, 2
+; CHECK-BE: vmrgow 2, 2, 2
+ store <16 x i8> %tmp2, <16 x i8>* %A
+ ret void
+; CHECK-LE: blr
+; CHECK-BE: blr
+}
+
Index: test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
===================================================================
--- test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
+++ test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
@@ -99,6 +99,12 @@
# CHECK: vmrglw 2, 3, 4
0x10 0x43 0x21 0x8c
+# CHECK: vmrgew 2, 3, 4
+0x10 0x43 0x27 0x8c
+
+# CHECK: vmrgow 2, 3, 4
+0x10 0x43 0x26 0x8c
+
# CHECK: vspltb 2, 3, 1
0x10 0x41 0x1a 0x0c
Index: test/MC/PowerPC/ppc64-encoding-vmx.s
===================================================================
--- test/MC/PowerPC/ppc64-encoding-vmx.s
+++ test/MC/PowerPC/ppc64-encoding-vmx.s
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
# Vector facility
@@ -110,7 +110,13 @@
# CHECK-BE: vmrglw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x8c]
# CHECK-LE: vmrglw 2, 3, 4 # encoding: [0x8c,0x21,0x43,0x10]
vmrglw 2, 3, 4
-
+# CHECK-BE: vmrgew 2, 3, 4 # encoding: [0x10,0x43,0x27,0x8c]
+# CHECK-LE: vmrgew 2, 3, 4 # encoding: [0x8c,0x27,0x43,0x10]
+ vmrgew 2, 3, 4
+# CHECK-BE: vmrgow 2, 3, 4 # encoding: [0x10,0x43,0x26,0x8c]
+# CHECK-LE: vmrgow 2, 3, 4 # encoding: [0x8c,0x26,0x43,0x10]
+ vmrgow 2, 3, 4
+
# CHECK-BE: vspltb 2, 3, 1 # encoding: [0x10,0x41,0x1a,0x0c]
# CHECK-LE: vspltb 2, 3, 1 # encoding: [0x0c,0x1a,0x41,0x10]
vspltb 2, 3, 1