Index: llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp =================================================================== --- llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp +++ llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp @@ -174,7 +174,7 @@ /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...> /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7> static bool isReInterleaveMask(ArrayRef Mask, unsigned &Factor, - unsigned MaxFactor) { + unsigned MaxFactor, unsigned OpNumElts) { unsigned NumElts = Mask.size(); if (NumElts < 4) return false; @@ -246,6 +246,9 @@ if (StartMask < 0) break; + // We must stay within the vectors; This case can happen with undefs. + if (StartMask + LaneLen > OpNumElts*2) + break; } // Found an interleaved mask of current factor. @@ -406,7 +409,8 @@ // Check if the shufflevector is RE-interleave shuffle. unsigned Factor; - if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor)) + unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements(); + if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts)) return false; DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); Index: llvm/trunk/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll =================================================================== --- llvm/trunk/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll +++ llvm/trunk/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll @@ -547,3 +547,21 @@ store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4 ret void } + +@g = external global <4 x float> + +; The following does not give a valid interleaved store +; NEON-LABEL: define void @no_interleave +; NEON-NOT: call void @llvm.aarch64.neon.st2 +; NEON: shufflevector +; NEON: store +; NEON: ret void +; NO_NEON-LABEL: define void @no_interleave +; NO_NEON: shufflevector +; NO_NEON: store +; NO_NEON: ret void +define void @no_interleave(<4 x float> %a0) { + %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> + store <4 x float> %v0, <4 x float>* @g, align 16 + ret void +} Index: llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll =================================================================== --- llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -626,3 +626,21 @@ store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4 ret void } + +@g = external global <4 x float> + +; The following does not give a valid interleaved store +; NEON-LABEL: define void @no_interleave +; NEON-NOT: call void @llvm.arm.neon.vst2 +; NEON: shufflevector +; NEON: store +; NEON: ret void +; NO_NEON-LABEL: define void @no_interleave +; NO_NEON: shufflevector +; NO_NEON: store +; NO_NEON: ret void +define void @no_interleave(<4 x float> %a0) { + %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> + store <4 x float> %v0, <4 x float>* @g, align 16 + ret void +}