diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp --- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp +++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp @@ -215,8 +215,8 @@ // TODO: Support inbounds GEP. Value *NewBasePtr = Builder.CreateGEP(VecBaseTy, VecBasePtr, Builder.getInt32(i)); - Instruction *NewLoad = - Builder.CreateAlignedLoad(VecBaseTy, NewBasePtr, LI->getAlign()); + Instruction *NewLoad = Builder.CreateAlignedLoad( + VecBaseTy, NewBasePtr, NewBasePtr->getPointerAlignment(DL)); DecomposedVectors.push_back(NewLoad); } } diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -6,10 +6,10 @@ define <4 x double> @load_factorf64_4(<16 x double>* %ptr) { ; AVX-LABEL: load_factorf64_4: ; AVX: # %bb.0: -; AVX-NEXT: vmovupd (%rdi), %ymm0 -; AVX-NEXT: vmovupd 32(%rdi), %ymm1 -; AVX-NEXT: vmovupd 64(%rdi), %ymm2 -; AVX-NEXT: vmovupd 96(%rdi), %ymm3 +; AVX-NEXT: vmovapd (%rdi), %ymm0 +; AVX-NEXT: vmovapd 32(%rdi), %ymm1 +; AVX-NEXT: vmovapd 64(%rdi), %ymm2 +; AVX-NEXT: vmovapd 96(%rdi), %ymm3 ; AVX-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] ; AVX-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] ; AVX-NEXT: vhaddpd %ymm5, %ymm4, %ymm4 @@ -34,10 +34,10 @@ define <4 x double> @load_factorf64_2(<16 x double>* %ptr) { ; AVX-LABEL: load_factorf64_2: ; AVX: # %bb.0: -; AVX-NEXT: vmovupd (%rdi), %ymm0 -; AVX-NEXT: vmovupd 32(%rdi), %ymm1 -; AVX-NEXT: vmovupd 64(%rdi), %ymm2 -; AVX-NEXT: vmovupd 96(%rdi), %ymm3 +; AVX-NEXT: vmovapd (%rdi), %ymm0 +; AVX-NEXT: vmovapd 32(%rdi), %ymm1 +; AVX-NEXT: vmovapd 64(%rdi), %ymm2 +; AVX-NEXT: vmovapd 96(%rdi), %ymm3 ; AVX-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] ; AVX-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] ; AVX-NEXT: vunpcklpd {{.*#+}} ymm4 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] @@ -56,8 +56,8 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) { ; AVX-LABEL: load_factorf64_1: ; AVX: # %bb.0: -; AVX-NEXT: vmovupd (%rdi), %ymm0 -; AVX-NEXT: vmovupd 32(%rdi), %ymm1 +; AVX-NEXT: vmovapd (%rdi), %ymm0 +; AVX-NEXT: vmovapd 32(%rdi), %ymm1 ; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],mem[0,1] ; AVX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[0,1],mem[0,1] ; AVX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] @@ -73,10 +73,10 @@ define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) { ; AVX1-LABEL: load_factori64_4: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovups (%rdi), %ymm0 -; AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; AVX1-NEXT: vmovups 96(%rdi), %ymm3 +; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovaps 64(%rdi), %ymm2 +; AVX1-NEXT: vmovaps 96(%rdi), %ymm3 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] @@ -100,10 +100,10 @@ ; ; AVX2OR512-LABEL: load_factori64_4: ; AVX2OR512: # %bb.0: -; AVX2OR512-NEXT: vmovdqu (%rdi), %ymm0 -; AVX2OR512-NEXT: vmovdqu 32(%rdi), %ymm1 -; AVX2OR512-NEXT: vmovdqu 64(%rdi), %ymm2 -; AVX2OR512-NEXT: vmovdqu 96(%rdi), %ymm3 +; AVX2OR512-NEXT: vmovdqa (%rdi), %ymm0 +; AVX2OR512-NEXT: vmovdqa 32(%rdi), %ymm1 +; AVX2OR512-NEXT: vmovdqa 64(%rdi), %ymm2 +; AVX2OR512-NEXT: vmovdqa 96(%rdi), %ymm3 ; AVX2OR512-NEXT: vperm2i128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] ; AVX2OR512-NEXT: vperm2i128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] ; AVX2OR512-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] @@ -1316,18 +1316,18 @@ define <64 x i8> @interleaved_load_vf64_i8_stride3(<192 x i8>* %ptr){ ; AVX1-LABEL: interleaved_load_vf64_i8_stride3: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqu (%rdi), %xmm11 -; AVX1-NEXT: vmovdqu 16(%rdi), %xmm10 -; AVX1-NEXT: vmovdqu 32(%rdi), %xmm8 -; AVX1-NEXT: vmovdqu 48(%rdi), %xmm3 -; AVX1-NEXT: vmovdqu 64(%rdi), %xmm12 -; AVX1-NEXT: vmovdqu 80(%rdi), %xmm9 -; AVX1-NEXT: vmovdqu 96(%rdi), %xmm6 -; AVX1-NEXT: vmovdqu 112(%rdi), %xmm14 -; AVX1-NEXT: vmovdqu 128(%rdi), %xmm13 -; AVX1-NEXT: vmovdqu 144(%rdi), %xmm5 -; AVX1-NEXT: vmovdqu 160(%rdi), %xmm1 -; AVX1-NEXT: vmovdqu 176(%rdi), %xmm15 +; AVX1-NEXT: vmovdqa (%rdi), %xmm11 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm10 +; AVX1-NEXT: vmovdqa 32(%rdi), %xmm8 +; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vmovdqa 64(%rdi), %xmm12 +; AVX1-NEXT: vmovdqa 80(%rdi), %xmm9 +; AVX1-NEXT: vmovdqa 96(%rdi), %xmm6 +; AVX1-NEXT: vmovdqa 112(%rdi), %xmm14 +; AVX1-NEXT: vmovdqa 128(%rdi), %xmm13 +; AVX1-NEXT: vmovdqa 144(%rdi), %xmm5 +; AVX1-NEXT: vmovdqa 160(%rdi), %xmm1 +; AVX1-NEXT: vmovdqa 176(%rdi), %xmm15 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13] ; AVX1-NEXT: vpshufb %xmm4, %xmm6, %xmm6 ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm5 @@ -1393,12 +1393,12 @@ ; ; AVX2-LABEL: interleaved_load_vf64_i8_stride3: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqu (%rdi), %xmm0 -; AVX2-NEXT: vmovdqu 16(%rdi), %xmm1 -; AVX2-NEXT: vmovdqu 32(%rdi), %xmm2 -; AVX2-NEXT: vmovdqu 96(%rdi), %xmm3 -; AVX2-NEXT: vmovdqu 112(%rdi), %xmm4 -; AVX2-NEXT: vmovdqu 128(%rdi), %xmm5 +; AVX2-NEXT: vmovdqa (%rdi), %xmm0 +; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX2-NEXT: vmovdqa 32(%rdi), %xmm2 +; AVX2-NEXT: vmovdqa 96(%rdi), %xmm3 +; AVX2-NEXT: vmovdqa 112(%rdi), %xmm4 +; AVX2-NEXT: vmovdqa 128(%rdi), %xmm5 ; AVX2-NEXT: vinserti128 $1, 48(%rdi), %ymm0, %ymm0 ; AVX2-NEXT: vinserti128 $1, 64(%rdi), %ymm1, %ymm1 ; AVX2-NEXT: vinserti128 $1, 80(%rdi), %ymm2, %ymm2 @@ -1436,12 +1436,12 @@ ; ; AVX512-LABEL: interleaved_load_vf64_i8_stride3: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqu (%rdi), %xmm0 -; AVX512-NEXT: vmovdqu 16(%rdi), %xmm1 -; AVX512-NEXT: vmovdqu 32(%rdi), %xmm2 -; AVX512-NEXT: vmovdqu 96(%rdi), %xmm3 -; AVX512-NEXT: vmovdqu 112(%rdi), %xmm4 -; AVX512-NEXT: vmovdqu 128(%rdi), %xmm5 +; AVX512-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2 +; AVX512-NEXT: vmovdqa 96(%rdi), %xmm3 +; AVX512-NEXT: vmovdqa 112(%rdi), %xmm4 +; AVX512-NEXT: vmovdqa 128(%rdi), %xmm5 ; AVX512-NEXT: vinserti128 $1, 48(%rdi), %ymm0, %ymm0 ; AVX512-NEXT: vinserti128 $1, 64(%rdi), %ymm1, %ymm1 ; AVX512-NEXT: vinserti128 $1, 80(%rdi), %ymm2, %ymm2 diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll --- a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll @@ -5,15 +5,15 @@ define <4 x double> @load_factorf64_4(<16 x double>* %ptr) { ; CHECK-LABEL: @load_factorf64_4( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x double>* %ptr to <4 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x double>* [[PTR:%.*]] to <4 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 32 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 32 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 32 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> @@ -40,15 +40,15 @@ define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) { ; CHECK-LABEL: @load_factori64_4( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i64>* %ptr to <4 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i64>* [[PTR:%.*]] to <4 x i64>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[TMP4]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[TMP4]], align 32 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[TMP6]], align 32 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, <4 x i64>* [[TMP8]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, <4 x i64>* [[TMP8]], align 32 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP7]], <4 x i32> @@ -75,15 +75,15 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) { ; CHECK-LABEL: @load_factorf64_1( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x double>* %ptr to <4 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x double>* [[PTR:%.*]] to <4 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 32 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 32 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 32 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 32 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> @@ -228,6 +228,13 @@ @a = local_unnamed_addr global <4 x double> zeroinitializer, align 32 ; Function Attrs: norecurse nounwind readonly uwtable define <4 x double> @test_unhandled(<4 x double> %b) { +; CHECK-LABEL: @test_unhandled( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* @a, align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[B:%.*]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[SHUFFLE]] +; entry: %0 = load <4 x double>, <4 x double>* @a, align 32 %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleavedLoad.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleavedLoad.ll --- a/llvm/test/Transforms/InterleavedAccess/X86/interleavedLoad.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleavedLoad.ll @@ -6,17 +6,17 @@ ; AVX2-LABEL: @interleaved_load_vf32_i8_stride3( ; AVX2-NEXT: [[TMP1:%.*]] = bitcast <96 x i8>* [[PTR:%.*]] to <16 x i8>* ; AVX2-NEXT: [[TMP2:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 0 -; AVX2-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]] +; AVX2-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]], align 16 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]] +; AVX2-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]], align 16 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 2 -; AVX2-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]] +; AVX2-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 16 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 3 -; AVX2-NEXT: [[TMP9:%.*]] = load <16 x i8>, <16 x i8>* [[TMP8]] +; AVX2-NEXT: [[TMP9:%.*]] = load <16 x i8>, <16 x i8>* [[TMP8]], align 16 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[TMP10]] +; AVX2-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[TMP10]], align 16 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 5 -; AVX2-NEXT: [[TMP13:%.*]] = load <16 x i8>, <16 x i8>* [[TMP12]] +; AVX2-NEXT: [[TMP13:%.*]] = load <16 x i8>, <16 x i8>* [[TMP12]], align 16 ; AVX2-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> [[TMP9]], <32 x i32> ; AVX2-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP11]], <32 x i32> ; AVX2-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP13]], <32 x i32> @@ -48,11 +48,11 @@ ; AVX2-LABEL: @interleaved_load_vf16_i8_stride3( ; AVX2-NEXT: [[TMP1:%.*]] = bitcast <48 x i8>* [[PTR:%.*]] to <16 x i8>* ; AVX2-NEXT: [[TMP2:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 0 -; AVX2-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]] +; AVX2-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]], align 16 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]] +; AVX2-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]], align 16 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 2 -; AVX2-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]] +; AVX2-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 16 ; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> undef, <16 x i32> ; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> undef, <16 x i32> ; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> undef, <16 x i32> @@ -79,7 +79,7 @@ define <8 x i8> @interleaved_load_vf8_i8_stride3(<24 x i8>* %ptr){ ; AVX2-LABEL: @interleaved_load_vf8_i8_stride3( -; AVX2-NEXT: [[WIDE_VEC:%.*]] = load <24 x i8>, <24 x i8>* [[PTR:%.*]] +; AVX2-NEXT: [[WIDE_VEC:%.*]] = load <24 x i8>, <24 x i8>* [[PTR:%.*]], align 32 ; AVX2-NEXT: [[V1:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> ; AVX2-NEXT: [[V2:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> ; AVX2-NEXT: [[V3:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> undef, <8 x i32> @@ -101,29 +101,29 @@ ; AVX2-LABEL: @interleaved_load_vf64_i8_stride3( ; AVX2-NEXT: [[TMP1:%.*]] = bitcast <192 x i8>* [[PTR:%.*]] to <16 x i8>* ; AVX2-NEXT: [[TMP2:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 0 -; AVX2-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]], align 1 +; AVX2-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]], align 16 ; AVX2-NEXT: [[TMP4:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]], align 1 +; AVX2-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP4]], align 16 ; AVX2-NEXT: [[TMP6:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 2 -; AVX2-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 1 +; AVX2-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 16 ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 3 -; AVX2-NEXT: [[TMP9:%.*]] = load <16 x i8>, <16 x i8>* [[TMP8]], align 1 +; AVX2-NEXT: [[TMP9:%.*]] = load <16 x i8>, <16 x i8>* [[TMP8]], align 16 ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[TMP10]], align 1 +; AVX2-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[TMP10]], align 16 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 5 -; AVX2-NEXT: [[TMP13:%.*]] = load <16 x i8>, <16 x i8>* [[TMP12]], align 1 +; AVX2-NEXT: [[TMP13:%.*]] = load <16 x i8>, <16 x i8>* [[TMP12]], align 16 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 6 -; AVX2-NEXT: [[TMP15:%.*]] = load <16 x i8>, <16 x i8>* [[TMP14]], align 1 +; AVX2-NEXT: [[TMP15:%.*]] = load <16 x i8>, <16 x i8>* [[TMP14]], align 16 ; AVX2-NEXT: [[TMP16:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 7 -; AVX2-NEXT: [[TMP17:%.*]] = load <16 x i8>, <16 x i8>* [[TMP16]], align 1 +; AVX2-NEXT: [[TMP17:%.*]] = load <16 x i8>, <16 x i8>* [[TMP16]], align 16 ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP19:%.*]] = load <16 x i8>, <16 x i8>* [[TMP18]], align 1 +; AVX2-NEXT: [[TMP19:%.*]] = load <16 x i8>, <16 x i8>* [[TMP18]], align 16 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 9 -; AVX2-NEXT: [[TMP21:%.*]] = load <16 x i8>, <16 x i8>* [[TMP20]], align 1 +; AVX2-NEXT: [[TMP21:%.*]] = load <16 x i8>, <16 x i8>* [[TMP20]], align 16 ; AVX2-NEXT: [[TMP22:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 10 -; AVX2-NEXT: [[TMP23:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 1 +; AVX2-NEXT: [[TMP23:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 16 ; AVX2-NEXT: [[TMP24:%.*]] = getelementptr <16 x i8>, <16 x i8>* [[TMP1]], i32 11 -; AVX2-NEXT: [[TMP25:%.*]] = load <16 x i8>, <16 x i8>* [[TMP24]], align 1 +; AVX2-NEXT: [[TMP25:%.*]] = load <16 x i8>, <16 x i8>* [[TMP24]], align 16 ; AVX2-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> [[TMP9]], <32 x i32> ; AVX2-NEXT: [[TMP27:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP11]], <32 x i32> ; AVX2-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP13]], <32 x i32> diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll --- a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll @@ -25,7 +25,7 @@ ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> [[TMP14]], <64 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP15]], <32 x i8> [[TMP16]], <64 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <64 x i8> [[TMP17]], <64 x i8> [[TMP18]], <128 x i32> -; CHECK-NEXT: store <128 x i8> [[TMP19]], <128 x i8>* [[P:%.*]] +; CHECK-NEXT: store <128 x i8> [[TMP19]], <128 x i8>* [[P:%.*]], align 128 ; CHECK-NEXT: ret void ; %v1 = shufflevector <32 x i8> %x1, <32 x i8> %x2, <64 x i32> @@ -54,7 +54,7 @@ ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <32 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> [[TMP12]], <32 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> [[TMP14]], <64 x i32> -; CHECK-NEXT: store <64 x i8> [[TMP15]], <64 x i8>* [[P:%.*]] +; CHECK-NEXT: store <64 x i8> [[TMP15]], <64 x i8>* [[P:%.*]], align 64 ; CHECK-NEXT: ret void ; %v1 = shufflevector <16 x i8> %x1, <16 x i8> %x2, <32 x i32> @@ -77,7 +77,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP8]], <32 x i32> -; CHECK-NEXT: store <32 x i8> [[TMP9]], <32 x i8>* [[P:%.*]] +; CHECK-NEXT: store <32 x i8> [[TMP9]], <32 x i8>* [[P:%.*]], align 32 ; CHECK-NEXT: ret void ; %v1 = shufflevector <8 x i8> %x1, <8 x i8> %x2, <16 x i32> @@ -232,7 +232,7 @@ ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <64 x i8> [[TMP23]], <64 x i8> [[TMP24]], <128 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <64 x i8> [[TMP25]], <64 x i8> [[TMP26]], <128 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <128 x i8> [[TMP27]], <128 x i8> [[TMP28]], <256 x i32> -; CHECK-NEXT: store <256 x i8> [[TMP29]], <256 x i8>* [[P:%.*]] +; CHECK-NEXT: store <256 x i8> [[TMP29]], <256 x i8>* [[P:%.*]], align 256 ; CHECK-NEXT: ret void ; %1 = shufflevector <64 x i8> %a, <64 x i8> %b, <128 x i32>