diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -811,8 +811,14 @@ StoreInst *NSI = Builder.CreateStore(NewElement, GEP); NSI->copyMetadata(*SI); replaceValue(I, *NSI); + auto *IEI = cast(SI->getOperand(0)); // Need erasing the store manually. I.eraseFromParent(); + // Removing the insertelement instruction may unlock further load/extract + // scalarization opportunities. + if (IEI->use_empty()) + IEI->eraseFromParent(); + scalarizeLoadExtract(*Load); return true; } diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll @@ -6,14 +6,15 @@ define void @test(i64 %i, <225 x double>* %A) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LV:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8 ; CHECK-NEXT: [[IDX_0:%.*]] = add nuw nsw i64 2, [[I:%.*]] -; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <225 x double> [[LV]], i64 [[IDX_0]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A:%.*]], i32 0, i64 [[IDX_0]] +; CHECK-NEXT: [[EXT_0:%.*]] = load double, double* [[TMP0]], align 1 ; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]] -; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <225 x double> [[LV]], i64 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i32 0, i64 [[I]] +; CHECK-NEXT: [[EXT_1:%.*]] = load double, double* [[TMP1]], align 1 ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[I]] -; CHECK-NEXT: store double [[SUB]], double* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[A]], i64 0, i64 [[I]] +; CHECK-NEXT: store double [[SUB]], double* [[TMP2]], align 8 ; CHECK-NEXT: ret void ; entry: