Index: interleaved-accesses-64bits-avx.ll =================================================================== --- interleaved-accesses-64bits-avx.ll +++ interleaved-accesses-64bits-avx.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx -interleaved-access -S | FileCheck %s -; This file tests the function `llvm::lowerInterleavedLoad`. +; This file tests the function `llvm::lowerInterleavedLoad/Store`. define <4 x double> @load_factorf64_4(<16 x double>* %ptr) { ; CHECK-LABEL: @load_factorf64_4( @@ -102,4 +102,63 @@ ret <4 x double> %mul } +define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) { +; CHECK-LABEL: @store_factorf64_4( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> +; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16 +; CHECK-NEXT: ret void +; + %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> + %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> + %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> + store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 + ret void +} + +define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) { +; CHECK-LABEL: @store_factori64_4( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <8 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]], <8 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <16 x i32> +; CHECK-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], <16 x i64>* [[PTR:%.*]], align 16 +; CHECK-NEXT: ret void +; + %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> + %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> + %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> + store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16 + ret void +} + +define void @store_factorf64_4_revMask(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) { +; CHECK-LABEL: @store_factorf64_4_revMask( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> +; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16 +; CHECK-NEXT: ret void +; + %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> + %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> + %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> + store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 + ret void +} + +define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double> %v0, <16 x double> %v1, <16 x double> %v2, <16 x double> %v3) { +; CHECK-LABEL: @store_factorf64_4_arbitraryMask( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <16 x double> [[V0:%.*]], <16 x double> [[V1:%.*]], <32 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x double> [[V2:%.*]], <16 x double> [[V3:%.*]], <32 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <16 x i32> +; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16 +; CHECK-NEXT: ret void +; + %s0 = shufflevector <16 x double> %v0, <16 x double> %v1, <32 x i32> + %s1 = shufflevector <16 x double> %v2, <16 x double> %v3, <32 x i32> + %interleaved.vec = shufflevector <32 x double> %s0, <32 x double> %s1, <16 x i32> + store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 + ret void +}