Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1451,6 +1451,49 @@ return Builder.CreateShuffleVector( NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask()); } + // Both arguments of the binary operation are the shuffle instructions, but + // binary operation vector type is different from a shuffle vector type, e.g. + // shuffle operands data type is <4 x i32>, but a binary operation operands + // data type is <16 x i8> In this situation, in order to move the shuffle + // instruction behind the binary operation instruction we need four bitcast + // instructions: two for each shuffle operand, one for binary operation + // result, and one for shuffle result. That looks like a loot of bitcast + // instructions, but they will be all eliminated during the subsequent + // instructions combine phases. + // Another approach is to change the shuffle instruction data type and + // recompute the shuffle instruction mask has very limited usage because, we + // can recompute the shuffle mask only in a case when the shuffle mask is a + // constant value, and secondly we can do this only in a situation when we + // need to change a shuffle instruction vector type from <4 x i32> to <16 x + // i8> but not visa versa + BitCastInst *LBitCast = dyn_cast(LHS); + BitCastInst *RBitCast = dyn_cast(RHS); + if (LBitCast && RBitCast) { + Value *LBitCastOp = LBitCast->getOperand(0); + Value *RBitCastOp = RBitCast->getOperand(0); + ShuffleVectorInst *LBcShuf = dyn_cast(LBitCastOp); + ShuffleVectorInst *RBcShuf = dyn_cast(RBitCastOp); + + if (LBcShuf && RBcShuf && LBcShuf->getMask() == RBcShuf->getMask() && + isa(LBcShuf->getOperand(1)) && + isa(RBcShuf->getOperand(1)) && + LBcShuf->getOperand(0)->getType() == + RBcShuf->getOperand(0)->getType()) { + + Value *LBitCast = + Builder.CreateBitCast(LBcShuf->getOperand(0), Inst.getType()); + Value *RBitCast = + Builder.CreateBitCast(RBcShuf->getOperand(0), Inst.getType()); + + Value *NewBinOp = CreateBinOpAsGiven(Inst, LBitCast, RBitCast, Builder); + Value *ShufBitCast = Builder.CreateBitCast(NewBinOp, LBcShuf->getType()); + + Value *NewShuf = Builder.CreateShuffleVector( + ShufBitCast, UndefValue::get(LBcShuf->getType()), LBcShuf->getMask()); + Value *NewBitCast = Builder.CreateBitCast(NewShuf, Inst.getType()); + return NewBitCast; + } + } // If one argument is a shuffle within one vector, the other is a constant, // try moving the shuffle after the binary operation. Index: test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- test/Transforms/InstCombine/vec_shuffle.ll +++ test/Transforms/InstCombine/vec_shuffle.ll @@ -463,3 +463,48 @@ %1 = shufflevector <4 x i32*> %A, <4 x i32*> undef, <2 x i32> ret <2 x i32*> %1 } + +; Function Attrs: noinline nounwind uwtable +define <2 x i64> @shuffle_add2_32_16(<2 x i64> %v) { +; CHECK-LABEL: @shuffle_add2_32_16( +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> %v to <8 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %v to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2:%.*]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3:%.*]] +; + %bc0 = bitcast <2 x i64> %v to <4 x i32> + %shuffle = shufflevector <4 x i32> %bc0, <4 x i32> zeroinitializer, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle to <2 x i64> + %bc2 = bitcast <2 x i64> %bc1 to <8 x i16> + %add.i = add <8 x i16> %bc2, %bc2 + %bc3 = bitcast <8 x i16> %add.i to <2 x i64> + %bc4 = bitcast <2 x i64> %bc3 to <4 x i32> + %shuffle4 = shufflevector <4 x i32> %bc4, <4 x i32> zeroinitializer, <4 x i32> + %bc5 = bitcast <4 x i32> %shuffle4 to <2 x i64> + ret <2 x i64> %bc5 +} + + + +; Function Attrs: noinline nounwind uwtable +define <2 x i64> @shuffle_add2_32_8(<2 x i64> %v) { +; CHECK-LABEL: @shuffle_add2_32_8( +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %v to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i8> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2:%.*]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3:%.*]] +; + %bc0 = bitcast <2 x i64> %v to <4 x i32> + %shuffle = shufflevector <4 x i32> %bc0, <4 x i32> zeroinitializer, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle to <2 x i64> + %bc2 = bitcast <2 x i64> %bc1 to <16 x i8> + %add.i = add <16 x i8> %bc2, %bc2 + %bc3 = bitcast <16 x i8> %add.i to <2 x i64> + %bc4 = bitcast <2 x i64> %bc3 to <4 x i32> + %shuffle4 = shufflevector <4 x i32> %bc4, <4 x i32> zeroinitializer, <4 x i32> + %bc5 = bitcast <4 x i32> %shuffle4 to <2 x i64> + ret <2 x i64> %bc5 +} +