Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9595,13 +9595,15 @@ // If the recipe is uniform across all parts (instead of just per VF), only // generate a single instance. Instruction *UI = getUnderlyingInstr(); - if (isa(UI) && + if ((isa(UI) || isa(UI)) && all_of(operands(), [](VPValue *Op) { return !Op->getDef(); })) { State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), IsPredicated, State); - for (unsigned Part = 1; Part < State.UF; ++Part) - State.set(this, State.get(this, VPIteration(0, 0)), - VPIteration(Part, 0)); + if (!UI->getType()->isVoidTy()) { + for (unsigned Part = 1; Part < State.UF; ++Part) + State.set(this, State.get(this, VPIteration(0, 0)), + VPIteration(Part, 0)); + } return; } Index: llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -805,7 +805,6 @@ ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 @@ -1430,7 +1429,6 @@ ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 -; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 Index: llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -179,9 +179,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: store i32 0, i32* [[ADDR:%.*]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 -; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]