Index: llvm/docs/CodeGenerator.rst =================================================================== --- llvm/docs/CodeGenerator.rst +++ llvm/docs/CodeGenerator.rst @@ -912,6 +912,25 @@ (and which of the above three actions to take) by calling the ``setOperationAction`` method in its ``TargetLowering`` constructor. +If a target supports vector types, then the IR optimizer assumes that it can +create some forms of the shufflevector IR instruction as canonical IR. Therefore, +a target is expected to produce efficient machine code for those forms of the +shufflevector IR instruction using custom legalization if needed. The shuffle +forms that should be handled include: + +* Splat --- All elements of the vector have identical scalar elements. This +operation may also be known as a "broadcast" or "duplicate" in target assembly. + +* Select --- Each element of the vector is chosen from the corresponding lane +of 1 of 2 input vectors (no lane crossings). This operation may also be known as +a "blend" or "bitwise select" in target assembly. + +* Insert subvector --- The vector is placed into a longer vector type starting +at index 0. + +* Extract subvector --- The vector is pulled from a longer vector type starting +at index 0. + Prior to the existence of the Legalize passes, we required that every target `selector`_ supported and handled every operator and type even if they are not natively supported. The introduction of the Legalize phases allows all of the Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1608,6 +1608,45 @@ return nullptr; } +static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { + // Match the operands as identity with padding (also known as + // insert_subvector into undef) shuffles of the same source type. + auto *Shuffle0 = dyn_cast(Shuf.getOperand(0)); + auto *Shuffle1 = dyn_cast(Shuf.getOperand(1)); + if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() || + !Shuffle1 || !Shuffle1->isIdentityWithPadding()) + return nullptr; + + Value *X = Shuffle0->getOperand(0); + Value *Y = Shuffle1->getOperand(0); + if (X->getType() != Y->getType() || isa(X) || isa(Y)) + return nullptr; + assert(isa(Shuffle0->getOperand(1)) && + isa(Shuffle1->getOperand(1)) && + "Unexpected operand for identity shuffle"); + + // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source + // operands directly by adjusting the shuffle mask to account for the narrower + // types: + // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask' + int NarrowElts = X->getType()->getVectorNumElements(); + int WideElts = Shuffle0->getType()->getVectorNumElements(); + assert(WideElts > NarrowElts && "Unexpected types for identity with padding"); + + Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext()); + SmallVector Mask = Shuf.getShuffleMask(); + SmallVector NewMask(Mask.size(), UndefValue::get(I32Ty)); + for (int i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] == -1) + continue; + if (Mask[i] < WideElts) + NewMask[i] = ConstantInt::get(I32Ty, Mask[i]); + else + NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts)); + } + return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask)); +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -1662,10 +1701,12 @@ if (Instruction *I = foldIdentityExtractShuffle(SVI)) return I; - // This transform has the potential to lose undef knowledge, so it is + // These transforms have the potential to lose undef knowledge, so they are // intentionally placed after SimplifyDemandedVectorElts(). if (Instruction *I = foldShuffleWithInsert(SVI)) return I; + if (Instruction *I = foldIdentityPaddedShuffles(SVI)) + return I; if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? Index: llvm/test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -1142,9 +1142,7 @@ define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]], <7 x i32> ; CHECK-NEXT: ret <7 x i8> [[S3]] ; %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> @@ -1157,9 +1155,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_narrowing( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <2 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]], <2 x i32> ; CHECK-NEXT: ret <2 x i8> [[S3]] ; %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> @@ -1172,9 +1168,7 @@ define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) { ; CHECK-LABEL: @insert_subvector_shuffles_identity( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32>