Index: llvm/docs/CodeGenerator.rst =================================================================== --- llvm/docs/CodeGenerator.rst +++ llvm/docs/CodeGenerator.rst @@ -912,6 +912,31 @@ (and which of the above three actions to take) by calling the ``setOperationAction`` method in its ``TargetLowering`` constructor. +If a target has legal vector types, it is expected to produce efficient machine +code for common forms of the shufflevector IR instruction using those types. +This may require custom legalization for SelectionDAG vector operations that +are created from the shufflevector IR. The shufflevector forms that should be +handled include: + +* Vector select --- Each element of the vector is chosen from either of the +corresponding elements of the 2 input vectors. This operation may also be +known as a "blend" or "bitwise select" in target assembly. This type of shuffle +maps directly to the ``shuffle_vector`` SelectionDAG node. + +* Insert subvector --- A vector is placed into a longer vector type starting +at index 0. This type of shuffle maps directly to the ``insert_subvector`` +SelectionDAG node with the ``index`` operand set to 0. + +* Extract subvector --- A vector is pulled from a longer vector type starting +at index 0. This type of shuffle maps directly to the ``extract_subvector`` +SelectionDAG node with the ``index`` operand set to 0. + +* Splat --- All elements of the vector have identical scalar elements. This +operation may also be known as a "broadcast" or "duplicate" in target assembly. +The shufflevector IR instruction may change the vector length, so this operation +may map to multiple SelectionDAG nodes including ``shuffle_vector``, +``concat_vectors``, ``insert_subvector``, and ``extract_subvector``. + Prior to the existence of the Legalize passes, we required that every target `selector`_ supported and handled every operator and type even if they are not natively supported. The introduction of the Legalize phases allows all of the Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1622,6 +1622,54 @@ return nullptr; } +static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { + // Match the operands as identity with padding (also known as concatenation + // with undef) shuffles of the same source type. The backend is expected to + // recreate these concatenations from a shuffle of narrow operands. + auto *Shuffle0 = dyn_cast(Shuf.getOperand(0)); + auto *Shuffle1 = dyn_cast(Shuf.getOperand(1)); + if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() || + !Shuffle1 || !Shuffle1->isIdentityWithPadding()) + return nullptr; + + // We limit this transform to power-of-2 types because the backend should + // convert the simplified IR patterns to identical nodes as the original IR. + // If we can verify that behavior for arbitrary types, the power-of-2 checks + // can be removed. + Value *X = Shuffle0->getOperand(0); + Value *Y = Shuffle1->getOperand(0); + if (X->getType() != Y->getType() || + !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) || + !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) || + !isPowerOf2_32(X->getType()->getVectorNumElements()) || + isa(X) || isa(Y)) + return nullptr; + assert(isa(Shuffle0->getOperand(1)) && + isa(Shuffle1->getOperand(1)) && + "Unexpected operand for identity shuffle"); + + // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source + // operands directly by adjusting the shuffle mask to account for the narrower + // types: + // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask' + int NarrowElts = X->getType()->getVectorNumElements(); + int WideElts = Shuffle0->getType()->getVectorNumElements(); + assert(WideElts > NarrowElts && "Unexpected types for identity with padding"); + + Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext()); + SmallVector Mask = Shuf.getShuffleMask(); + SmallVector NewMask(Mask.size(), UndefValue::get(I32Ty)); + for (int i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] == -1) + continue; + if (Mask[i] < WideElts) + NewMask[i] = ConstantInt::get(I32Ty, Mask[i]); + else + NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts)); + } + return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask)); +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -1676,10 +1724,12 @@ if (Instruction *I = foldIdentityExtractShuffle(SVI)) return I; - // This transform has the potential to lose undef knowledge, so it is + // These transforms have the potential to lose undef knowledge, so they are // intentionally placed after SimplifyDemandedVectorElts(). if (Instruction *I = foldShuffleWithInsert(SVI)) return I; + if (Instruction *I = foldIdentityPaddedShuffles(SVI)) + return I; if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? Index: llvm/test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -1140,6 +1140,8 @@ ret <2 x i1> %r } +; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically. + define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> @@ -1155,9 +1157,7 @@ define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_pow2elts( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <8 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> undef, <8 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i8> [[S1]], <8 x i8> [[S2]], <8 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> ; CHECK-NEXT: ret <8 x i8> [[S3]] ; %s1 = shufflevector <2 x i8> %x, <2 x i8> undef, <8 x i32> @@ -1167,6 +1167,7 @@ } ; The last shuffle may change the vector type. +; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically. define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_narrowing( @@ -1183,9 +1184,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing_pow2elts(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_narrowing_pow2elts( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <8 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <8 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i8> [[S1]], <8 x i8> [[S2]], <2 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <2 x i32> ; CHECK-NEXT: ret <2 x i8> [[S3]] ; %s1 = shufflevector <4 x i8> %x, <4 x i8> undef, <8 x i32> @@ -1198,9 +1197,7 @@ define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) { ; CHECK-LABEL: @insert_subvector_shuffles_identity( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32>