diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36715,8 +36715,10 @@ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); // Share broadcast with the longest vector and extract low subvector (free). + // Ensure the same SDValue from the SDNode use is being used. for (SDNode *User : Src->uses()) if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST && + Src == User->getOperand(0) && User->getValueSizeInBits(0).getFixedSize() > VT.getFixedSizeInBits()) { return extractSubVector(SDValue(User, 0), 0, DAG, DL, diff --git a/llvm/test/CodeGen/X86/pr48215.ll b/llvm/test/CodeGen/X86/pr48215.ll --- a/llvm/test/CodeGen/X86/pr48215.ll +++ b/llvm/test/CodeGen/X86/pr48215.ll @@ -33,12 +33,14 @@ ; AVX2-NEXT: idivl %esi ; AVX2-NEXT: vmovd %eax, %xmm0 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7] +; AVX2-NEXT: vmovd %edx, %xmm1 +; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7] +; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7] -; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm1 -; AVX2-NEXT: vmovmskps %ymm1, %ecx -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vmovmskps %xmm0, %eax +; AVX2-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vmovmskps %ymm0, %ecx +; AVX2-NEXT: vmovmskps %xmm1, %eax ; AVX2-NEXT: addl %ecx, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -49,8 +51,9 @@ ; AVX512-NEXT: cltd ; AVX512-NEXT: idivl %esi ; AVX512-NEXT: vpbroadcastd %eax, %ymm0 +; AVX512-NEXT: vpbroadcastd %edx, %xmm1 ; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %ymm0, %k0 -; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm0, %k1 +; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm1, %k1 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: movzbl %al, %ecx ; AVX512-NEXT: kmovw %k1, %eax