Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -5718,6 +5718,9 @@ // requested vector load. if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits()) return SDValue(); + // Do not merge volatile loads. + if (cast(Elt.getNode())->isVolatile()) + return SDValue(); } else return SDValue(); } Index: test/CodeGen/X86/merge-consecutive-loads-128.ll =================================================================== --- test/CodeGen/X86/merge-consecutive-loads-128.ll +++ test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -695,14 +695,14 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp { ; SSE2-LABEL: merge_4f32_f32_2345_volatile: ; SSE2: # BB#0: -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: retq +; SSE2-DAG: movss {{.*#+}} xmm{{[0-9]+}} = mem[0],zero,zero,zero +; SSE2-DAG: movss {{.*#+}} xmm{{[0-9]+}} = mem[0],zero,zero,zero +; SSE2-DAG: movss {{.*#+}} xmm{{[0-9]+}} = mem[0],zero,zero,zero +; SSE2-DAG: movss {{.*#+}} xmm{{[0-9]+}} = mem[0],zero,zero,zero +; SSE2-DAG: unpcklps {{.*#+}} +; SSE2-DAG: unpcklps {{.*#+}} +; SSE2-DAG: unpcklps {{.*#+}} +; SSE2: retq ; ; SSE41-LABEL: merge_4f32_f32_2345_volatile: ; SSE41: # BB#0: Index: test/CodeGen/X86/merge-consecutive-loads-256.ll =================================================================== --- test/CodeGen/X86/merge-consecutive-loads-256.ll +++ test/CodeGen/X86/merge-consecutive-loads-256.ll @@ -694,10 +694,10 @@ ; AVX1: # BB#0: ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm1 -; AVX1-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1 -; AVX1-NEXT: vpinsrw $4, 24(%rdi), %xmm0, %xmm0 -; AVX1-NEXT: vpinsrw $6, 28(%rdi), %xmm0, %xmm0 -; AVX1-NEXT: vpinsrw $7, 30(%rdi), %xmm0, %xmm0 +; AVX1-DAG: vpinsrw $3, 6(%rdi), %xmm1, %xmm1 +; AVX1-DAG: vpinsrw $4, 24(%rdi), %xmm0, %xmm0 +; AVX1-DAG: vpinsrw $6, 28(%rdi), %xmm0, %xmm0 +; AVX1-DAG: vpinsrw $7, 30(%rdi), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; @@ -705,10 +705,10 @@ ; AVX2: # BB#0: ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm1 -; AVX2-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1 -; AVX2-NEXT: vpinsrw $4, 24(%rdi), %xmm0, %xmm0 -; AVX2-NEXT: vpinsrw $6, 28(%rdi), %xmm0, %xmm0 -; AVX2-NEXT: vpinsrw $7, 30(%rdi), %xmm0, %xmm0 +; AVX2-DAG: vpinsrw $3, 6(%rdi), %xmm1, %xmm1 +; AVX2-DAG: vpinsrw $4, 24(%rdi), %xmm0, %xmm0 +; AVX2-DAG: vpinsrw $6, 28(%rdi), %xmm0, %xmm0 +; AVX2-DAG: vpinsrw $7, 30(%rdi), %xmm0, %xmm0 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -716,10 +716,10 @@ ; AVX512F: # BB#0: ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm1 -; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1 -; AVX512F-NEXT: vpinsrw $4, 24(%rdi), %xmm0, %xmm0 -; AVX512F-NEXT: vpinsrw $6, 28(%rdi), %xmm0, %xmm0 -; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm0, %xmm0 +; AVX512F-DAG: vpinsrw $3, 6(%rdi), %xmm1, %xmm1 +; AVX512F-DAG: vpinsrw $4, 24(%rdi), %xmm0, %xmm0 +; AVX512F-DAG: vpinsrw $6, 28(%rdi), %xmm0, %xmm0 +; AVX512F-DAG: vpinsrw $7, 30(%rdi), %xmm0, %xmm0 ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; @@ -728,10 +728,10 @@ ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X32-AVX-NEXT: vpinsrw $0, (%eax), %xmm0, %xmm1 -; X32-AVX-NEXT: vpinsrw $3, 6(%eax), %xmm1, %xmm1 -; X32-AVX-NEXT: vpinsrw $4, 24(%eax), %xmm0, %xmm0 -; X32-AVX-NEXT: vpinsrw $6, 28(%eax), %xmm0, %xmm0 -; X32-AVX-NEXT: vpinsrw $7, 30(%eax), %xmm0, %xmm0 +; X32-AVX-DAG: vpinsrw $3, 6(%eax), %xmm1, %xmm1 +; X32-AVX-DAG: vpinsrw $4, 24(%eax), %xmm0, %xmm0 +; X32-AVX-DAG: vpinsrw $6, 28(%eax), %xmm0, %xmm0 +; X32-AVX-DAG: vpinsrw $7, 30(%eax), %xmm0, %xmm0 ; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 0