diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9642,13 +9642,15 @@ // If the recipe is uniform across all parts (instead of just per VF), only // generate a single instance. Instruction *UI = getUnderlyingInstr(); - if (isa(UI) && + if ((isa(UI) || isa(UI)) && all_of(operands(), [](VPValue *Op) { return !Op->getDef(); })) { State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), IsPredicated, State); - for (unsigned Part = 1; Part < State.UF; ++Part) - State.set(this, State.get(this, VPIteration(0, 0)), - VPIteration(Part, 0)); + if (!UI->getType()->isVoidTy()) { + for (unsigned Part = 1; Part < State.UF; ++Part) + State.set(this, State.get(this, VPIteration(0, 0)), + VPIteration(Part, 0)); + } return; } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -805,7 +805,6 @@ ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 @@ -1430,7 +1429,6 @@ ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 -; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -179,9 +179,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: store i32 0, i32* [[ADDR:%.*]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]