Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -299,23 +299,6 @@ : TargetTransformInfo::SK_PermuteSingleSrc; } -///\returns Opcode that can be clubbed with \p Op to create an alternate -/// sequence which can later be merged as a ShuffleVector instruction. -static unsigned getAltOpcode(unsigned Op) { - switch (Op) { - case Instruction::FAdd: - return Instruction::FSub; - case Instruction::FSub: - return Instruction::FAdd; - case Instruction::Add: - return Instruction::Sub; - case Instruction::Sub: - return Instruction::Add; - default: - return 0; - } -} - static bool sameOpcodeOrAlt(unsigned Opcode, unsigned AltOpcode, unsigned CheckedOpcode) { return Opcode == CheckedOpcode || AltOpcode == CheckedOpcode; @@ -361,19 +344,20 @@ if (llvm::any_of(VL, [](Value *V) { return !isa(V); })) return InstructionsState(VL[BaseIndex], 0, 0); + bool IsBinOp = isa(VL[BaseIndex]); unsigned Opcode = cast(VL[BaseIndex])->getOpcode(); unsigned AltOpcode = Opcode; - bool HasAltOpcodes = llvm::any_of(VL, [Opcode](Value *V) { - return Opcode != cast(V)->getOpcode(); - }); - // Check for an alternate opcode pattern. - if (HasAltOpcodes) { - AltOpcode = getAltOpcode(Opcode); - for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { - unsigned InstOpcode = cast(VL[Cnt])->getOpcode(); - if (!sameOpcodeOrAlt(Opcode, AltOpcode, InstOpcode)) - return InstructionsState(VL[BaseIndex], 0, 0); + // Check for one alternate opcode from another BinaryOperator. + // TODO - can we support other operators (casts etc.)? + for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { + unsigned InstOpcode = cast(VL[Cnt])->getOpcode(); + if (!sameOpcodeOrAlt(Opcode, AltOpcode, InstOpcode)) { + if (Opcode == AltOpcode && IsBinOp && isa(VL[Cnt])) { + AltOpcode = InstOpcode; + continue; + } + return InstructionsState(VL[BaseIndex], 0, 0); } } Index: test/Transforms/SLPVectorizer/AArch64/transpose.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -197,25 +197,21 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-LABEL: @build_vec_v4i32_3_binops( -; CHECK-NEXT: [[V0_0:%.*]] = extractelement <2 x i32> %v0, i32 0 -; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i32> %v0, i32 1 -; CHECK-NEXT: [[V1_0:%.*]] = extractelement <2 x i32> %v1, i32 0 -; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i32> %v1, i32 1 -; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V1_0]] -; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]] -; CHECK-NEXT: [[TMP1_0:%.*]] = mul i32 [[V0_0]], [[V1_0]] -; CHECK-NEXT: [[TMP1_1:%.*]] = mul i32 [[V0_1]], [[V1_1]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> %v0, %v1 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> %v0, %v1 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> undef, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[TMP0_0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP1_0]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP0_1]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP1_1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = mul <2 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i32> [[TMP5]], [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i32> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP3_3]] ; %v0.0 = extractelement <2 x i32> %v0, i32 0