Index: lib/Target/X86/X86InterleavedAccess.cpp =================================================================== --- lib/Target/X86/X86InterleavedAccess.cpp +++ lib/Target/X86/X86InterleavedAccess.cpp @@ -98,17 +98,19 @@ bool X86InterleavedAccessGroup::isSupported() const { VectorType *ShuffleVecTy = Shuffles[0]->getType(); - uint64_t ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy); Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType(); + unsigned WideInstSize; // Currently, lowering is supported for 4-element vectors of 64 bits on AVX. - uint64_t ExpectedShuffleVecSize; - if (isa(Inst)) - ExpectedShuffleVecSize = 256; - else - ExpectedShuffleVecSize = 1024; + if (isa(Inst)) { + if (DL.getTypeSizeInBits(ShuffleVecTy) != 256) + return false; + + WideInstSize = DL.getTypeSizeInBits(Inst->getType()); + } else + WideInstSize = DL.getTypeSizeInBits(Shuffles[0]->getType()); - if (!Subtarget.hasAVX() || ShuffleVecSize != ExpectedShuffleVecSize || + if (!Subtarget.hasAVX() || WideInstSize != 1024 || DL.getTypeSizeInBits(ShuffleEltTy) != 64 || Factor != 4) return false; Index: test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll =================================================================== --- test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll +++ test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll @@ -217,3 +217,20 @@ store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 ret void } + +; This verifies whether the test passes and does not hit any assertions. +; Today, X86InterleavedAccess could have handled this case and +; generate transposed sequence by extending the current implementation +; which would be creating dummy vectors of undef. But it decided not to +; optimize these cases where the load-size is less Factor * NumberOfElements. +; Because a better sequence can easily be generated by CG. + +@a = local_unnamed_addr global <4 x double> zeroinitializer, align 32 +; Function Attrs: norecurse nounwind readonly uwtable +define <4 x double> @test_unhandled(<4 x double> %b) { +entry: + %0 = load <4 x double>, <4 x double>* @a, align 32 + %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> + %shuffle = shufflevector <4 x double> %1, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +}