diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -855,8 +855,11 @@ F(IE->getOperand(0)); F(IE->getOperand(1)); } else if (auto *SV = dyn_cast(BDV)) { + // For a canonical broadcast, ignore the undef argument + // (without this, we insert a parallel base shuffle for every broadcast) F(SV->getOperand(0)); - F(SV->getOperand(1)); + if (!SV->isZeroEltSplat()) + F(SV->getOperand(1)); } else { llvm_unreachable("unexpected BDV type"); } @@ -1214,7 +1217,8 @@ BaseSV->setOperand(OperandIdx, Base); }; UpdateOperand(0); // vector operand - UpdateOperand(1); // vector operand + if (!BdvSV->isZeroEltSplat()) + UpdateOperand(1); // vector operand } } diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/base-inference.ll b/llvm/test/Transforms/RewriteStatepointsForGC/base-inference.ll --- a/llvm/test/Transforms/RewriteStatepointsForGC/base-inference.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/base-inference.ll @@ -227,11 +227,9 @@ ; CHECK-LABEL: @test_shuffle_broadcast( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IE:%.*]] = insertelement <2 x i8 addrspace(1)*> zeroinitializer, i8 addrspace(1)* [[A:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_BASE:%.*]] = shufflevector <2 x i8 addrspace(1)*> [[IE]], <2 x i8 addrspace(1)*> zeroinitializer, <2 x i32> zeroinitializer, !is_base_value !0 ; CHECK-NEXT: [[BROADCAST:%.*]] = shufflevector <2 x i8 addrspace(1)*> [[IE]], <2 x i8 addrspace(1)*> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x i8 addrspace(1)*> [[BROADCAST]], <2 x i8 addrspace(1)*> [[BROADCAST_BASE]]) ] -; CHECK-NEXT: [[BROADCAST_RELOCATED:%.*]] = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token [[STATEPOINT_TOKEN]], i32 1, i32 0) -; CHECK-NEXT: [[BROADCAST_BASE_RELOCATED:%.*]] = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token [[STATEPOINT_TOKEN]], i32 1, i32 1) +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x i8 addrspace(1)*> [[BROADCAST]]) ] +; CHECK-NEXT: [[BROADCAST_RELOCATED:%.*]] = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token [[STATEPOINT_TOKEN]], i32 0, i32 0) ; CHECK-NEXT: ret <2 x i8 addrspace(1)*> [[BROADCAST_RELOCATED]] ; entry: diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll --- a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll @@ -19,7 +19,7 @@ ; CHECK-NEXT: [[CAST:%.*]] = bitcast i8 addrspace(1)* [[PHI]] to i32 addrspace(1)* ; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x i32 addrspace(1)*> zeroinitializer, i32 addrspace(1)* [[CAST]], i32 0, !is_base_value !0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> poison, i32 addrspace(1)* [[BASE_I32]], i32 0 -; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> zeroinitializer, <2 x i32> zeroinitializer, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> undef, <2 x i32> zeroinitializer, !is_base_value !0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC:%.*]] = getelementptr i32, <2 x i32 addrspace(1)*> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]] ; CHECK-NEXT: [[PTR_BASE:%.*]] = extractelement <2 x i32 addrspace(1)*> [[DOTSPLAT_BASE]], i32 1, !is_base_value !0 @@ -54,7 +54,7 @@ ; CHECK-NEXT: [[CAST:%.*]] = bitcast i8 addrspace(1)* [[BASE]] to i32 addrspace(1)* ; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x i32 addrspace(1)*> zeroinitializer, i32 addrspace(1)* [[CAST]], i32 0, !is_base_value !0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> poison, i32 addrspace(1)* [[BASE_I32]], i32 0 -; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> zeroinitializer, <2 x i32> zeroinitializer, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> undef, <2 x i32> zeroinitializer, !is_base_value !0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC:%.*]] = getelementptr i32, <2 x i32 addrspace(1)*> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]] ; CHECK-NEXT: [[PTR_BASE:%.*]] = extractelement <2 x i32 addrspace(1)*> [[DOTSPLAT_BASE]], i32 1, !is_base_value !0 @@ -102,7 +102,7 @@ ; CHECK-NEXT: [[CAST:%.*]] = bitcast i8 addrspace(1)* [[BASE]] to i32 addrspace(1)* ; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x i32 addrspace(1)*> zeroinitializer, i32 addrspace(1)* [[CAST]], i32 0, !is_base_value !0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32 addrspace(1)*> poison, i32 addrspace(1)* [[BASE_I32]], i32 0 -; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> zeroinitializer, <2 x i32> zeroinitializer, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT_BASE]], <2 x i32 addrspace(1)*> undef, <2 x i32> zeroinitializer, !is_base_value !0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32 addrspace(1)*> [[DOTSPLATINSERT]], <2 x i32 addrspace(1)*> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC:%.*]] = getelementptr i32, <2 x i32 addrspace(1)*> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]] ; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x i32 addrspace(1)*> [[VEC]], <2 x i32 addrspace(1)*> [[DOTSPLAT_BASE]]) ]