diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11829,6 +11829,11 @@ (PreExtendType == MVT::Other)) return SDValue(); + // Restrict valid pre-extend data type + if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 && + PreExtendType != MVT::i32) + return SDValue(); + EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType); if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount()) diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-vectortype-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-vectortype-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-vectortype-crash.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s + +@a = global i32 0, align 4 +@c = global i32 0, align 4 +@b = global i32 0, align 4 + +; This test covers a case where extended value types can't be converted to +; vector types, resulting in a crash. We don't care about the specific output +; here, only that this case no longer causes said crash. +; See https://reviews.llvm.org/D91255#2484399 for context +define dso_local i32 @d() local_unnamed_addr #0 { +; CHECK-LABEL: d: +entry: + %0 = load i32, i32* @a, align 4 + %and = and i32 %0, 2 + store i32 %and, i32* @c, align 4 + %.pr = load i32, i32* @b, align 4 + %tobool.not11 = icmp eq i32 %.pr, 0 + br i1 %tobool.not11, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %1 = xor i32 %.pr, -1 + %2 = zext i32 %1 to i64 + %3 = add nuw nsw i64 %2, 1 + %min.iters.check = icmp ugt i32 %.pr, -4 + br i1 %min.iters.check, label %for.body.preheader22, label %vector.scevcheck + +vector.scevcheck: ; preds = %for.body.preheader + %4 = trunc i32 %0 to i16 + %5 = and i16 %4, 2 + %6 = trunc i32 %.pr to i16 + %7 = xor i16 %6, -1 + %mul14 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %5, i16 %7) + %mul.result = extractvalue { i16, i1 } %mul14, 0 + %mul.overflow = extractvalue { i16, i1 } %mul14, 1 + %8 = icmp slt i16 %mul.result, 0 + %9 = icmp ult i32 %.pr, -65536 + %10 = icmp ne i16 %5, 0 + %11 = and i1 %9, %10 + %12 = or i1 %8, %11 + %13 = or i1 %12, %mul.overflow + br i1 %13, label %for.body.preheader22, label %vector.ph + +vector.ph: ; preds = %vector.scevcheck + %n.vec = and i64 %3, 8589934588 + %cast.crd = trunc i64 %n.vec to i32 + %ind.end = mul i32 %and, %cast.crd + %cast.crd18 = trunc i64 %n.vec to i32 + %ind.end19 = add i32 %.pr, %cast.crd18 + %.splatinsert = insertelement <4 x i32> poison, i32 %and, i32 0 + %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + %14 = mul nuw nsw <4 x i32> %.splat, + %15 = shl nuw nsw i32 %and, 2 + %.splatinsert20 = insertelement <4 x i32> poison, i32 %15, i32 0 + %.splat21 = shufflevector <4 x i32> %.splatinsert20, <4 x i32> poison, <4 x i32> zeroinitializer + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %and, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %pointer.phi = phi i16* [ undef, %vector.ph ], [ %ptr.ind, %vector.body ] + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.ind = phi <4 x i32> [ %14, %vector.ph ], [ %vec.ind.next, %vector.body ] + %16 = getelementptr i16, i16* %pointer.phi, <4 x i64> + %17 = add nsw <4 x i32> %vec.ind, %broadcast.splat + %18 = shl <4 x i32> %17, + %19 = ashr exact <4 x i32> %18, + %20 = getelementptr inbounds i16, <4 x i16*> %16, i64 -1 + %21 = ptrtoint <4 x i16*> %16 to <4 x i64> + %22 = trunc <4 x i64> %21 to <4 x i32> + %23 = shl <4 x i32> %22, + %24 = ashr exact <4 x i32> %23, + %25 = mul nsw <4 x i32> %19, %24 + %26 = trunc <4 x i32> %25 to <4 x i16> + %reverse = shufflevector <4 x i16> %26, <4 x i16> poison, <4 x i32> + %27 = extractelement <4 x i16*> %20, i32 0 + %28 = getelementptr i16, i16* %27, i64 -3 + %29 = bitcast i16* %28 to <4 x i16>* + store <4 x i16> %reverse, <4 x i16>* %29, align 2 + %index.next = add i64 %index, 4 + %vec.ind.next = add <4 x i32> %vec.ind, %.splat21 + %30 = icmp eq i64 %index.next, %n.vec + %ptr.ind = getelementptr i16, i16* %pointer.phi, i64 -4 + br i1 %30, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %3, %n.vec + br i1 %cmp.n, label %for.cond.for.end_crit_edge, label %for.body.preheader22 + +for.body.preheader22: ; preds = %vector.scevcheck, %for.body.preheader, %middle.block + %f.013.ph = phi i32 [ 0, %vector.scevcheck ], [ 0, %for.body.preheader ], [ %ind.end, %middle.block ] + %.ph = phi i32 [ %.pr, %vector.scevcheck ], [ %.pr, %for.body.preheader ], [ %ind.end19, %middle.block ] + br label %for.body + +for.body: ; preds = %for.body.preheader22, %for.body + %f.013 = phi i32 [ %add, %for.body ], [ %f.013.ph, %for.body.preheader22 ] + %e.012 = phi i16* [ %incdec.ptr, %for.body ], [ undef, %for.body.preheader22 ] + %31 = phi i32 [ %inc, %for.body ], [ %.ph, %for.body.preheader22 ] + %sext = shl i32 %f.013, 16 + %conv = ashr exact i32 %sext, 16 + %add = add nsw i32 %conv, %and + %sext9 = shl i32 %add, 16 + %conv2 = ashr exact i32 %sext9, 16 + %incdec.ptr = getelementptr inbounds i16, i16* %e.012, i64 -1 + %32 = ptrtoint i16* %e.012 to i64 + %33 = trunc i64 %32 to i32 + %sext10 = shl i32 %33, 16 + %conv3 = ashr exact i32 %sext10, 16 + %mul = mul nsw i32 %conv2, %conv3 + %conv4 = trunc i32 %mul to i16 + store i16 %conv4, i16* %incdec.ptr, align 2 + %inc = add nsw i32 %31, 1 + %tobool.not = icmp eq i32 %inc, 0 + br i1 %tobool.not, label %for.cond.for.end_crit_edge, label %for.body + +for.cond.for.end_crit_edge: ; preds = %for.body, %middle.block + store i32 0, i32* @b, align 4 + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret i32 undef +} + +declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16)