diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -416,6 +416,11 @@ // Ignore unreachable basic blocks. if (!DT.isReachableFromEntry(&BB)) continue; + + // Fold what can be folded, to avoid Constants showing up in unexpected + // places. + MadeChange |= SimplifyInstructionsInBlock(&BB); + // Do not delete instructions under here and invalidate the iterator. // Walk the block forwards to enable simple iterative chains of transforms. // TODO: It could be more efficient to remove dead instructions diff --git a/llvm/test/Transforms/VectorCombine/X86/fold-extract.ll b/llvm/test/Transforms/VectorCombine/X86/fold-extract.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/fold-extract.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -vector-combine | FileCheck %s +; +; foldExtractExtract was crashing with ConstantVector operands. +; Fold all extracts before applying VectorCombine patterns. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @constant_fold_crash(<4 x i32> %x) local_unnamed_addr #0 { +; CHECK-LABEL: @constant_fold_crash( +; CHECK-NEXT: [[B:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[C:%.*]] = add i32 17, [[B]] +; CHECK-NEXT: ret i32 [[C]] +; + + %a = extractelement <4 x i32> , i32 1 + %b = extractelement <4 x i32> %x, i32 0 + %c = add i32 %a, %b + ret i32 %c +} +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core-avx2" "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -333,9 +333,7 @@ define <2 x i64> @urem_constant_op1(i64 %x) { ; CHECK-LABEL: @urem_constant_op1( -; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 -; CHECK-NEXT: ret <2 x i64> [[BO]] +; CHECK-NEXT: ret <2 x i64> undef ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 %bo = urem <2 x i64> %ins, @@ -377,9 +375,7 @@ define <2 x i64> @srem_constant_op1(i64 %x) { ; CHECK-LABEL: @srem_constant_op1( -; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 -; CHECK-NEXT: ret <2 x i64> [[BO]] +; CHECK-NEXT: ret <2 x i64> undef ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 %bo = srem <2 x i64> %ins, @@ -421,9 +417,7 @@ define <2 x i64> @udiv_constant_op1(i64 %x) { ; CHECK-LABEL: @udiv_constant_op1( -; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 -; CHECK-NEXT: ret <2 x i64> [[BO]] +; CHECK-NEXT: ret <2 x i64> undef ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 %bo = udiv <2 x i64> %ins, @@ -465,9 +459,7 @@ define <2 x i64> @sdiv_constant_op1(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op1( -; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 -; CHECK-NEXT: ret <2 x i64> [[BO]] +; CHECK-NEXT: ret <2 x i64> undef ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 %bo = sdiv exact <2 x i64> %ins,