Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19698,6 +19698,11 @@ return SDValue(); unsigned NumElems = N->getNumOperands(); + // Offset must be a constant multiple of the + // known-minimum vector length of the result type. + if (Offset % NumElems != 0) + return SDValue(); + SDValue In = Op0.getOperand(0).getOperand(0); EVT InSVT = In.getValueType().getScalarType(); EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); Index: llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s + +@a = external dso_local local_unnamed_addr global i16, align 4 +@f = external dso_local local_unnamed_addr global i32, align 4 + +define dso_local void @myFunc() local_unnamed_addr { +; CHECK-LABEL: myFunc: +; CHECK: // %bb.0: // %entry +; CHECK: addv +; CHECK-NEXT: str +; CHECK-NEXT: ret +; +entry: + %f = load i32, i32* @f, align 4 + %0 = insertelement <8 x i32> zeroinitializer, i32 %f, i32 0 + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> zeroinitializer + %2 = add <8 x i32> %1, + %3 = extractelement <8 x i32> %2, i32 1 + %conv18.2 = zext i32 %3 to i64 + %4 = extractelement <8 x i32> %2, i32 2 + %conv18.3 = zext i32 %4 to i64 + %5 = insertelement <8 x i64> zeroinitializer, i64 1, i32 0 + %6 = insertelement <8 x i64> undef, i64 %conv18.2, i32 2 + %7 = insertelement <8 x i64> %6, i64 %conv18.3, i32 3 + %8 = icmp ne <8 x i64> %5, %7 + %9 = sext <8 x i1> %8 to <8 x i16> + %10 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %9) + store i16 %10, i16* @a, align 4 + ret void + +} + +; Function Attrs: nofree nosync nounwind readnone willreturn +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)