diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1037,6 +1037,12 @@ return VectorizableTree.front()->getMainOp(); } + /// Returns whether the root node has in-tree uses. + bool doesRootHaveInTreeUses() const { + return !VectorizableTree.empty() && + !VectorizableTree.front()->UserTreeIndices.empty(); + } + /// Builds external uses of the vectorized scalars, i.e. the list of /// vectorized scalars to be extracted, their lanes and their scalar users. \p /// ExternallyUsedValues contains additional list of external uses to handle @@ -11487,7 +11493,9 @@ if (R.isTreeTinyAndNotFullyVectorizable()) continue; R.reorderTopToBottom(); - R.reorderBottomToTop(!isa(Ops.front())); + R.reorderBottomToTop( + /*IgnoreReorder=*/!isa(Ops.front()) && + !R.doesRootHaveInTreeUses()); R.buildExternalUses(); R.computeMinimumValueSizes(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll @@ -82,3 +82,99 @@ store float %add2.i.sink, ptr %d.i.i.i, align 4 ret void } + +; Here PHIs have mutual uses of each other. Reordering one requires reordering the other. +define void @test2(ptr %p1, ptr %p2) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds ptr, ptr [[P1:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds ptr, ptr [[P1]], i32 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> , [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> , [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x double> [[TMP7]], +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> , [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP10]], +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x double> [ [[TMP11]], [[BB1]] ], [ [[TMP16:%.*]], [[BB6:%.*]] ] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X0:%.*]] = getelementptr inbounds double, ptr [[P2:%.*]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, ptr [[X0]], align 8 +; CHECK-NEXT: br i1 poison, label [[BB3:%.*]], label [[BB6]] +; CHECK: bb3: +; CHECK-NEXT: br i1 poison, label [[BB5:%.*]], label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br label [[BB6]] +; CHECK: bb5: +; CHECK-NEXT: br label [[BB6]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x double> [ [[TMP13]], [[BB2]] ], [ [[TMP14]], [[BB4]] ], [ [[TMP14]], [[BB5]] ] +; CHECK-NEXT: [[TMP16]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: br label [[BB2]] +; +entry: + %a1 = getelementptr inbounds ptr, ptr %p1, i32 0 + %a2 = getelementptr inbounds ptr, ptr %p1, i32 1 + %b2 = getelementptr inbounds ptr, ptr %p1, i32 5 + %lda1 = load double, ptr %a1, align 8 + %lda2 = load double, ptr %a2, align 8 + %b1 = getelementptr inbounds ptr, ptr %p1, i32 4 + %ldb1 = load double, ptr %b1, align 8 + %ldb2 = load double, ptr %b2, align 8 + %mul0.1 = fmul fast double 0.1, %lda2 + %mul1.1 = fmul fast double 1.1, %ldb2 + %sub0.1 = fsub fast double 0.1, %mul1.1 + %mul2.1 = fmul fast double %sub0.1, 2.1 + %add0.1 = fadd fast double %mul0.1, 0.1 + %add1.1 = fadd fast double %add0.1, %mul2.1 + %mul3.1 = fmul fast double %add1.1, 3.1 + %mul0.0 = fmul fast double 10.0, %lda1 + %mul1.0 = fmul fast double 11.0, %ldb1 + %sub0.0 = fsub fast double 10.0, %mul1.0 + %mul2.0 = fmul fast double %sub0.0, 2.0 + %add0.0 = fadd fast double %mul0.0, 10.0 + %add1.0 = fadd fast double %add0.0, %mul2.0 + %mul3.0 = fmul fast double 3.0, %add1.0 + br label %bb1 + +bb1: + %add4.1 = fadd fast double 4.1, %mul3.1 + %add2.1 = fadd fast double %add4.1, 2.1 + %add3.1 = fadd fast double %add2.1, 3.1 + %add4.0 = fadd fast double 4.0, %mul3.0 + %add2.0 = fadd fast double %add4.0, 2.0 + %add3.0 = fadd fast double %add2.0, 3.0 + br label %bb2 + +bb2: ; preds = %bb6, %bb1 + %phi0.0 = phi double [ %add3.1, %bb1 ], [ %phi1.0, %bb6 ] + %phi0.1 = phi double [ %add3.0, %bb1 ], [ %phi1.1, %bb6 ] + %x0 = getelementptr inbounds double, ptr %p2, i32 0 + %i0 = load double, ptr %x0, align 8 + %x1 = getelementptr inbounds double, ptr %p2, i32 1 + %i1 = load double, ptr %x1, align 8 + br i1 poison, label %bb3, label %bb6 + +bb3: ; preds = %bb2 + br i1 poison, label %bb5, label %bb4 + +bb4: ; preds = %bb3 + br label %bb6 + +bb5: ; preds = %bb3 + br label %bb6 + +bb6: ; preds = %bb5, %bb4, %bb3 + %phi1.0 = phi double [ %phi0.0, %bb2 ], [ %i0, %bb4 ], [ %i0, %bb5 ] + %phi1.1 = phi double [ %phi0.1, %bb2 ], [ %i1, %bb4 ], [ %i1, %bb5 ] + br label %bb2 +}