diff --git a/llvm/test/Transforms/GVN/pr63059.ll b/llvm/test/Transforms/GVN/pr63059.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GVN/pr63059.ll @@ -0,0 +1,325 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=gvn -S < %s | FileCheck %s + + +define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) local_unnamed_addr { +; CHECK-LABEL: define dso_local noundef <4 x float> @ConvertVectors_ByRef +; CHECK-SAME: (ptr noundef nonnull align 16 dereferenceable(16) [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP8]], i64 3 +; CHECK-NEXT: ret <4 x float> [[TMP10]] +; + %2 = load <4 x float>, ptr %0, align 16 + %3 = shufflevector <4 x float> %2, <4 x float> poison, <4 x i32> + %4 = getelementptr inbounds [4 x float], ptr %0, i64 0, i64 1 + %5 = load float, ptr %4, align 4 + %6 = insertelement <4 x float> %3, float %5, i64 1 + %7 = getelementptr inbounds [4 x float], ptr %0, i64 0, i64 2 + %8 = load float, ptr %7, align 8 + %9 = insertelement <4 x float> %6, float %8, i64 2 + %10 = insertelement <4 x float> %9, float %8, i64 3 + ret <4 x float> %10 +} + +define i64 @store_element_smaller_than_load(ptr %loc, <4 x i32> %v) { +; CHECK-LABEL: define i64 @store_element_smaller_than_load +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 +; CHECK-NEXT: ret i64 [[TMP1]] +; + entry: + store <4 x i32> %v, ptr %loc + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2 + %ref = load i64, ptr %gep + ret i64 %ref +} + +define i64 @call_before_load(ptr %loc, <4 x i32> %v) { +; CHECK-LABEL: define i64 @call_before_load +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: call void @f(<4 x i32> [[V]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2 +; CHECK-NEXT: [[REF:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: ret i64 [[REF]] +; + entry: + store <4 x i32> %v, ptr %loc + call void @f(<4 x i32> %v) + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2 + %ref = load i64, ptr %gep + ret i64 %ref +} + +define i64 @call_before_load_memory_none(ptr %loc, <4 x i32> %v) { +; CHECK-LABEL: define i64 @call_before_load_memory_none +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: call void @f_no_mem(<4 x i32> [[V]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 +; CHECK-NEXT: ret i64 [[TMP1]] +; + entry: + store <4 x i32> %v, ptr %loc + call void @f_no_mem(<4 x i32> %v) + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2 + %ref = load i64, ptr %gep + ret i64 %ref +} + +define i64 @call_after_load(ptr %loc, <4 x i32> %v) { +; CHECK-LABEL: define i64 @call_after_load +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 +; CHECK-NEXT: call void @f(<4 x i32> [[V]]) +; CHECK-NEXT: ret i64 [[TMP1]] +; + entry: + store <4 x i32> %v, ptr %loc + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2 + %ref = load i64, ptr %gep + call void @f(<4 x i32> %v) + ret i64 %ref +} + +define double @store_element_smaller_than_load_float(ptr %loc, <4 x float> %v) { +; CHECK-LABEL: define double @store_element_smaller_than_load_float +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x float> [[V:%.*]]) { +; CHECK-NEXT: store <4 x float> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V]] to <2 x double> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i64 1 +; CHECK-NEXT: ret double [[TMP2]] +; + store <4 x float> %v, ptr %loc + %gep = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2 + %ref = load double, ptr %gep + ret double %ref +} + +define i64 @load_as_scalar(ptr %loc, <2 x i32> %v) { +; CHECK-LABEL: define i64 @load_as_scalar +; CHECK-SAME: (ptr [[LOC:%.*]], <2 x i32> [[V:%.*]]) { +; CHECK-NEXT: store <2 x i32> [[V]], ptr [[LOC]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] +; + store <2 x i32> %v, ptr %loc + %gep = getelementptr inbounds [4 x float], ptr %loc, i64 0 + %ref = load i64, ptr %gep + ret i64 %ref +} + + +define i9 @load_as_scalar_larger(ptr %loc, <4 x i6> %v) { +; CHECK-LABEL: define i9 @load_as_scalar_larger +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i6> [[V:%.*]]) { +; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i6> [[V]], <4 x i6> poison, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i6> [[TMP1]] to <2 x i9> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i9> [[TMP2]], i64 0 +; CHECK-NEXT: ret i9 [[TMP3]] +; + store <4 x i6> %v, ptr %loc + %gep = getelementptr i9, ptr %loc, i64 0 + %ref = load i9, ptr %gep + ret i9 %ref +} + + +define i4 @load_as_scalar_smaller(ptr %loc, <4 x i6> %v) { +; CHECK-LABEL: define i4 @load_as_scalar_smaller +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i6> [[V:%.*]]) { +; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i6> [[V]], <4 x i6> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i6> [[TMP1]] to <3 x i4> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i4> [[TMP2]], i64 0 +; CHECK-NEXT: ret i4 [[TMP3]] +; + store <4 x i6> %v, ptr %loc + %gep = getelementptr i4, ptr %loc, i64 0 + %ref = load i4, ptr %gep + ret i4 %ref +} + + +define i32 @load_vec_same_type(ptr %loc, <4 x i32> %v) { +; CHECK-LABEL: define i32 @load_vec_same_type +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[V]], i32 1 +; CHECK-NEXT: ret i32 [[R]] +; + entry: + store <4 x i32> %v, ptr %loc + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 + %ref = load <4 x i32>, ptr %gep + %r = extractelement <4 x i32> %ref, i32 1 + ret i32 %r +} + +define i64 @load_vec_same_size_different_type1(ptr %loc, <4 x i32> %v) { +; CHECK-LABEL: define i64 @load_vec_same_size_different_type1 +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x i64> +; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 +; CHECK-NEXT: ret i64 [[R]] +; + entry: + store <4 x i32> %v, ptr %loc + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 + %ref = load <2 x i64>, ptr %gep + %r = extractelement <2 x i64> %ref, i32 1 + ret i64 %r +} + +define double @load_vec_same_size_different_type2(ptr %loc, <4 x i32> %v) { +; CHECK-LABEL: define double @load_vec_same_size_different_type2 +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x double> +; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP0]], i32 1 +; CHECK-NEXT: ret double [[R]] +; + entry: + store <4 x i32> %v, ptr %loc + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 + %ref = load <2 x double>, ptr %gep + %r = extractelement <2 x double> %ref, i32 1 + ret double %r +} + +define i32 @load_subvector_same_type(ptr %loc, <4 x i32> %v) { +; CHECK-LABEL: define i32 @load_subvector_same_type +; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 +; CHECK-NEXT: ret i32 [[R]] +; + entry: + store <4 x i32> %v, ptr %loc + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 + %ref = load <2 x i32>, ptr %gep + %r = extractelement <2 x i32> %ref, i32 1 + ret i32 %r +} + +define i64 @load_subvector_different_type(ptr %loc, <8 x i32> %v) { +; CHECK-LABEL: define i64 @load_subvector_different_type +; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[V]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <2 x i64> +; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: ret i64 [[R]] +; + entry: + store <8 x i32> %v, ptr %loc + %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 + %ref = load <2 x i64>, ptr %gep + %r = extractelement <2 x i64> %ref, i32 1 + ret i64 %r +} + +define i16 @load_subvector_different_type2(ptr %loc, <8 x i32> %v) { +; CHECK-LABEL: define i16 @load_subvector_different_type2 +; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i32> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[V]], <8 x i32> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i32> [[TMP0]] to <2 x i16> +; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1 +; CHECK-NEXT: ret i16 [[R]] +; + entry: + store <8 x i32> %v, ptr %loc + %gep = getelementptr [2 x i16], ptr %loc, i64 0 + %ref = load <2 x i16>, ptr %gep + %r = extractelement <2 x i16> %ref, i32 1 + ret i16 %r +} + +define i4 @load_subvector_different_type3(ptr %loc, <8 x i8> %v) { +; CHECK-LABEL: define i4 @load_subvector_different_type3 +; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i8> [[TMP0]] to <6 x i4> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <6 x i4> [[TMP1]], <6 x i4> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = extractelement <3 x i4> [[TMP2]], i32 1 +; CHECK-NEXT: ret i4 [[R]] +; + entry: + store <8 x i8> %v, ptr %loc + %gep = getelementptr [3 x i4], ptr %loc, i64 0 + %ref = load <3 x i4>, ptr %gep + %r = extractelement <3 x i4> %ref, i32 1 + ret i4 %r +} + +define i12 @load_subvector_different_type4(ptr %loc, <8 x i8> %v) { +; CHECK-LABEL: define i12 @load_subvector_different_type4 +; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i8> [[TMP0]] to <2 x i12> +; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i12> [[TMP1]], i32 1 +; CHECK-NEXT: ret i12 [[R]] +; + entry: + store <8 x i8> %v, ptr %loc + %gep = getelementptr [2 x i12], ptr %loc, i64 0 + %ref = load <2 x i12>, ptr %gep + %r = extractelement <2 x i12> %ref, i32 1 + ret i12 %r +} + +define i6 @load_subvector_different_type5(ptr %loc, <8 x i8> %v) { +; CHECK-LABEL: define i6 @load_subvector_different_type5 +; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i8> [[TMP0]] to <4 x i6> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i6> [[TMP1]], <4 x i6> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i6> [[TMP2]], i32 1 +; CHECK-NEXT: ret i6 [[R]] +; + entry: + store <8 x i8> %v, ptr %loc + %gep = getelementptr [2 x i6], ptr %loc, i64 0 + %ref = load <2 x i6>, ptr %gep + %r = extractelement <2 x i6> %ref, i32 1 + ret i6 %r +} + +declare void @f(<4 x i32>) +declare void @f_no_mem(<4 x i32>) memory(none)