diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13363,6 +13363,13 @@ auto Mask = SVI->getShuffleMask(); + // Sanity check if all the indices are NOT in range. + // If mask is `undef` or `poison`, `Mask` may be a vector of -1s. + // If all of them are `undef`, OOB read will happen later. + if (llvm::all_of(Mask, [](int Idx) { return Idx == UndefMaskElem; })) { + return false; + } + Type *PtrTy = UseScalable ? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()) @@ -13414,9 +13421,9 @@ } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { - unsigned IdxJ = StoreCount * LaneLen * Factor + j; - if (Mask[IdxJ * Factor + IdxI] >= 0) { - StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ; + unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i; + if (Mask[IdxJ] >= 0) { + StartMask = Mask[IdxJ] - j; break; } } diff --git a/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll b/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-shufflevector.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -opaque-pointers < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define void @f_undef(<8 x i64> %a, ptr %dst) { +; CHECK-LABEL: f_undef: +; CHECK: // %bb.0: // %BB +; CHECK-NEXT: ret +BB: + %S = shufflevector <8 x i64> %a, <8 x i64> %a, <16 x i32> undef + store <16 x i64> %S, ptr %dst, align 64 + ret void +} + +define void @f_poison(<8 x i64> %a, ptr %dst) { +; CHECK-LABEL: f_poison: +; CHECK: // %bb.0: // %BB +; CHECK-NEXT: ret +BB: + %S = shufflevector <8 x i64> %a, <8 x i64> %a, <16 x i32> poison + store <16 x i64> %S, ptr %dst, align 64 + ret void +} + +define void @f_undef_15(<8 x i64> %a, ptr %dst) { +; CHECK-LABEL: f_undef_15: +; CHECK: // %bb.0: // %BB +; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $q0_q1 +; CHECK-NEXT: add x8, x0, #64 +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x9], #32 +; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8] +; CHECK-NEXT: add x8, x0, #96 +; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x9] +; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8] +; CHECK-NEXT: ret +BB: + %S = shufflevector <8 x i64> %a, <8 x i64> %a, <16 x i32> + store <16 x i64> %S, ptr %dst, align 64 + ret void +} + +define void @f_undef_1(<8 x i64> %a, ptr %dst) { +; CHECK-LABEL: f_undef_1: +; CHECK: // %bb.0: // %BB +; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: add x8, x0, #64 +; CHECK-NEXT: mov v16.16b, v0.16b +; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4 +; CHECK-NEXT: mov v17.16b, v16.16b +; CHECK-NEXT: mov v5.16b, v2.16b +; CHECK-NEXT: // kill: def $q1 killed $q1 def $q1_q2 +; CHECK-NEXT: st2 { v16.2d, v17.2d }, [x9], #32 +; CHECK-NEXT: mov v6.16b, v5.16b +; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: mov v4.16b, v3.16b +; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8] +; CHECK-NEXT: add x8, x0, #96 +; CHECK-NEXT: st2 { v1.2d, v2.2d }, [x9] +; CHECK-NEXT: st2 { v3.2d, v4.2d }, [x8] +; CHECK-NEXT: ret +BB: + %S = shufflevector <8 x i64> %a, <8 x i64> %a, <16 x i32> + store <16 x i64> %S, ptr %dst, align 64 + ret void +}