Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLowering.h +++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h @@ -2117,6 +2117,9 @@ return false; } + /// Return true if the target has a vector blend instruction. + virtual bool hasVectorBlend() const { return false; } + /// \brief Get the maximum supported factor for interleaved memory accesses. /// Default to be the minimum interleave factor: 2. virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1557,33 +1557,35 @@ if (N1.isUndef()) commuteShuffle(N1, N2, MaskVec); - // If shuffling a splat, try to blend the splat instead. We do this here so - // that even when this arises during lowering we don't have to re-handle it. - auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { - BitVector UndefElements; - SDValue Splat = BV->getSplatValue(&UndefElements); - if (!Splat) - return; + if (TLI->hasVectorBlend()) { + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; - for (int i = 0; i < NElts; ++i) { - if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) - continue; + for (int i = 0; i < NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) + continue; - // If this input comes from undef, mark it as such. - if (UndefElements[MaskVec[i] - Offset]) { - MaskVec[i] = -1; - continue; - } + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } - // If we can blend a non-undef lane, use that instead. - if (!UndefElements[i]) - MaskVec[i] = i + Offset; - } - }; - if (auto *N1BV = dyn_cast(N1)) - BlendSplat(N1BV, 0); - if (auto *N2BV = dyn_cast(N2)) - BlendSplat(N2BV, NElts); + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast(N2)) + BlendSplat(N2BV, NElts); + } // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1098,6 +1098,8 @@ StringRef getStackProbeSymbolName(MachineFunction &MF) const override; + bool hasVectorBlend() const override { return true; } + unsigned getMaxSupportedInterleaveFactor() const override { return 4; } /// \brief Lower interleaved load(s) into target specific Index: llvm/trunk/test/CodeGen/AArch64/aarch64-vuzp.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/aarch64-vuzp.ll +++ llvm/trunk/test/CodeGen/AArch64/aarch64-vuzp.ll @@ -58,3 +58,13 @@ store <4 x i32> %y, <4 x i32>* %z, align 4 ret void } + +; Check that this pattern is recognized as a VZIP and +; that the vector blend transform does not scramble the pattern. +; CHECK-LABEL: vzipNoBlend: +; CHECK: zip1 +define <8 x i8> @vzipNoBlend(<8 x i8>* %A, <8 x i16>* %B) nounwind { + %t = load <8 x i8>, <8 x i8>* %A + %vzip = shufflevector <8 x i8> %t, <8 x i8> , <8 x i32> + ret <8 x i8> %vzip +} Index: llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -638,13 +638,13 @@ ; CHECK: [[LOH_LABEL1:Lloh[0-9]+]]: ; CHECK: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] ; The tuple comes from the next instruction. -; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]] +; CHECK: ext.16b v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, #1 ; CHECK: ret ; CHECK: .loh AdrpLdr [[LOH_LABEL0]], [[LOH_LABEL1]] define void @uninterestingSub(i8* nocapture %row) #0 { %tmp = bitcast i8* %row to <16 x i8>* %tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16 - %vext43 = shufflevector <16 x i8> , <16 x i8> %tmp1, <16 x i32> + %vext43 = shufflevector <16 x i8> , <16 x i8> %tmp1, <16 x i32> %add.i.414 = add <16 x i8> zeroinitializer, %vext43 store <16 x i8> %add.i.414, <16 x i8>* %tmp, align 16 %add.ptr51 = getelementptr inbounds i8, i8* %row, i64 16