diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43520,7 +43520,7 @@ auto *LoadVec = dyn_cast(InputVector); if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() && SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() && - !LikelyUsedAsVector) { + !LikelyUsedAsVector && LoadVec->isSimple()) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue NewPtr = TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx); diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll --- a/llvm/test/CodeGen/X86/extractelement-load.ll +++ b/llvm/test/CodeGen/X86/extractelement-load.ll @@ -342,29 +342,29 @@ ; X32-SSE2-LABEL: multi_use_volatile_load_scalarization: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movl (%ecx), %eax ; X32-SSE2-NEXT: movdqu (%ecx), %xmm0 ; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; X32-SSE2-NEXT: movd %xmm0, %eax ; X32-SSE2-NEXT: psubd %xmm1, %xmm0 ; X32-SSE2-NEXT: movdqa %xmm0, (%ecx) ; X32-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: multi_use_volatile_load_scalarization: ; X64-SSSE3: # %bb.0: -; X64-SSSE3-NEXT: movl (%rdi), %eax ; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0 ; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-SSSE3-NEXT: movd %xmm0, %eax ; X64-SSSE3-NEXT: psubd %xmm1, %xmm0 ; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi) ; X64-SSSE3-NEXT: retq ; ; X64-AVX-LABEL: multi_use_volatile_load_scalarization: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl (%rdi), %eax ; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0 ; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vmovdqa %xmm0, (%rdi) +; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1 +; X64-AVX-NEXT: vmovdqa %xmm1, (%rdi) +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: retq %v = load volatile <4 x i32>, <4 x i32>* %p, align 1 %v1 = add <4 x i32> %v,