Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -376,28 +376,35 @@ return false; bool MadeChange = false; - for (BasicBlock &BB : F) { - // Ignore unreachable basic blocks. - if (!DT.isReachableFromEntry(&BB)) - continue; - // Do not delete instructions under here and invalidate the iterator. - // Walk the block backwards for efficiency. We're matching a chain of - // use->defs, so we're more likely to succeed by starting from the bottom. - // TODO: It could be more efficient to remove dead instructions - // iteratively in this loop rather than waiting until the end. - for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { - if (isa(I)) + + // Iterate until there are no more changes. Transforms can build on each + // other's improvements. + bool IterationChange; + do { + IterationChange = false; + for (BasicBlock &BB : F) { + // Ignore unreachable basic blocks. + if (!DT.isReachableFromEntry(&BB)) continue; - MadeChange |= foldExtractExtract(I, TTI); - MadeChange |= foldBitcastShuf(I, TTI); - MadeChange |= scalarizeBinop(I, TTI); - } - } - // We're done with transforms, so remove dead instructions. - if (MadeChange) - for (BasicBlock &BB : F) - SimplifyInstructionsInBlock(&BB); + // Walk the block backwards for efficiency. We are matching a chain of + // use->defs, so we're more likely to succeed by starting from the bottom. + for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { + if (isa(I)) + continue; + IterationChange |= foldExtractExtract(I, TTI); + IterationChange |= foldBitcastShuf(I, TTI); + IterationChange |= scalarizeBinop(I, TTI); + } + } + // Remove dead instructions before iterating. + if (IterationChange) + for (BasicBlock &BB : F) + SimplifyInstructionsInBlock(&BB); + + // Set overall changed flag. + MadeChange |= IterationChange; + } while (IterationChange); return MadeChange; } Index: llvm/test/Transforms/VectorCombine/X86/insert-binop.ll =================================================================== --- llvm/test/Transforms/VectorCombine/X86/insert-binop.ll +++ llvm/test/Transforms/VectorCombine/X86/insert-binop.ll @@ -51,11 +51,9 @@ define <2 x i64> @ins1_ins1_iterate(i64 %w, i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: @ins1_ins1_iterate( ; CHECK-NEXT: [[S0_SCALAR:%.*]] = sub i64 [[W:%.*]], [[X:%.*]] -; CHECK-NEXT: [[S0:%.*]] = insertelement <2 x i64> undef, i64 [[S0_SCALAR]], i64 1 -; CHECK-NEXT: [[I2:%.*]] = insertelement <2 x i64> undef, i64 [[Y:%.*]], i32 1 -; CHECK-NEXT: [[S1:%.*]] = or <2 x i64> [[S0]], [[I2]] -; CHECK-NEXT: [[I3:%.*]] = insertelement <2 x i64> undef, i64 [[Z:%.*]], i32 1 -; CHECK-NEXT: [[S2:%.*]] = shl <2 x i64> [[I3]], [[S1]] +; CHECK-NEXT: [[S1_SCALAR:%.*]] = or i64 [[S0_SCALAR]], [[Y:%.*]] +; CHECK-NEXT: [[S2_SCALAR:%.*]] = shl i64 [[Z:%.*]], [[S1_SCALAR]] +; CHECK-NEXT: [[S2:%.*]] = insertelement <2 x i64> undef, i64 [[S2_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[S2]] ; %i0 = insertelement <2 x i64> undef, i64 %w, i64 1