diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/min-bits.ll @@ -0,0 +1,793 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s --check-prefixes=CHECK +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +define void @load_add_store_v2i16(ptr %pa, ptr %pb) { +; CHECK-LABEL: @load_add_store_v2i16( +; CHECK-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 +; CHECK-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; CHECK-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; CHECK-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 +; CHECK-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 +; CHECK-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] +; CHECK-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] +; CHECK-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 +; CHECK-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 +; CHECK-NEXT: ret void +; + %a = load <2 x i16>, ptr %pa, align 8 + %b = load <2 x i16>, ptr %pb, align 8 + %c = add <2 x i16> %a, %b + store <2 x i16> %c, ptr %pa, align 8 + ret void +} + +define void @load_add_store_v3i16(ptr %pa, ptr %pb) { +; CHECK-LABEL: @load_add_store_v3i16( +; CHECK-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 +; CHECK-NEXT: [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2 +; CHECK-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; CHECK-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; CHECK-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; CHECK-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 +; CHECK-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 +; CHECK-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 +; CHECK-NEXT: [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4 +; CHECK-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] +; CHECK-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] +; CHECK-NEXT: [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]] +; CHECK-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 +; CHECK-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 +; CHECK-NEXT: store i16 [[C_I2]], ptr [[PA_I2]], align 4 +; CHECK-NEXT: ret void +; + %a = load <3 x i16>, ptr %pa, align 8 + %b = load <3 x i16>, ptr %pb, align 8 + %c = add <3 x i16> %a, %b + store <3 x i16> %c, ptr %pa, align 8 + ret void +} + +define void @load_add_store_v4i16(ptr %pa, ptr %pb) { +; CHECK-LABEL: @load_add_store_v4i16( +; CHECK-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 +; CHECK-NEXT: [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2 +; CHECK-NEXT: [[PB_I3:%.*]] = getelementptr i16, ptr [[PB]], i32 3 +; CHECK-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; CHECK-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; CHECK-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; CHECK-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 +; CHECK-NEXT: [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3 +; CHECK-NEXT: [[A_I3:%.*]] = load i16, ptr [[PA_I3]], align 2 +; CHECK-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8 +; CHECK-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 +; CHECK-NEXT: [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4 +; CHECK-NEXT: [[B_I3:%.*]] = load i16, ptr [[PB_I3]], align 2 +; CHECK-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]] +; CHECK-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]] +; CHECK-NEXT: [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]] +; CHECK-NEXT: [[C_I3:%.*]] = add i16 [[A_I3]], [[B_I3]] +; CHECK-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8 +; CHECK-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2 +; CHECK-NEXT: store i16 [[C_I2]], ptr [[PA_I2]], align 4 +; CHECK-NEXT: store i16 [[C_I3]], ptr [[PA_I3]], align 2 +; CHECK-NEXT: ret void +; + %a = load <4 x i16>, ptr %pa, align 8 + %b = load <4 x i16>, ptr %pb, align 8 + %c = add <4 x i16> %a, %b + store <4 x i16> %c, ptr %pa, align 8 + ret void +} + +define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b, i1 %cc) { +; CHECK-LABEL: @select_uniform_condition_v2f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: ret <2 x half> [[R]] +; + %r = select i1 %cc, <2 x half> %a, <2 x half> %b + ret <2 x half> %r +} + +define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b, i1 %cc) { +; CHECK-LABEL: @select_uniform_condition_v3f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: ret <3 x half> [[R]] +; + %r = select i1 %cc, <3 x half> %a, <3 x half> %b + ret <3 x half> %r +} + +define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b, i1 %cc) { +; CHECK-LABEL: @select_uniform_condition_v4f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]] +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]] +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; CHECK-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 +; CHECK-NEXT: [[R_I3:%.*]] = select i1 [[CC]], half [[A_I3]], half [[B_I3]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; CHECK-NEXT: ret <4 x half> [[R]] +; + %r = select i1 %cc, <4 x half> %a, <4 x half> %b + ret <4 x half> %r +} + +define <4 x half> @select_vector_condition_v4f16(<4 x half> %a, <4 x half> %b, <4 x i1> %cc) { +; CHECK-LABEL: @select_vector_condition_v4f16( +; CHECK-NEXT: [[CC_I0:%.*]] = extractelement <4 x i1> [[CC:%.*]], i64 0 +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = select i1 [[CC_I0]], half [[A_I0]], half [[B_I0]] +; CHECK-NEXT: [[CC_I1:%.*]] = extractelement <4 x i1> [[CC]], i64 1 +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = select i1 [[CC_I1]], half [[A_I1]], half [[B_I1]] +; CHECK-NEXT: [[CC_I2:%.*]] = extractelement <4 x i1> [[CC]], i64 2 +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = select i1 [[CC_I2]], half [[A_I2]], half [[B_I2]] +; CHECK-NEXT: [[CC_I3:%.*]] = extractelement <4 x i1> [[CC]], i64 3 +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; CHECK-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 +; CHECK-NEXT: [[R_I3:%.*]] = select i1 [[CC_I3]], half [[A_I3]], half [[B_I3]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; CHECK-NEXT: ret <4 x half> [[R]] +; + %r = select <4 x i1> %cc, <4 x half> %a, <4 x half> %b + ret <4 x half> %r +} + +define <2 x half> @unary_v2f16(<2 x half> %a) { +; CHECK-LABEL: @unary_v2f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: ret <2 x half> [[R]] +; + %r = fneg <2 x half> %a + ret <2 x half> %r +} + +define <3 x half> @unary_v3f16(<3 x half> %a) { +; CHECK-LABEL: @unary_v3f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = fneg half [[A_I2]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: ret <3 x half> [[R]] +; + %r = fneg <3 x half> %a + ret <3 x half> %r +} + +define <4 x half> @unary_v4f16(<4 x half> %a) { +; CHECK-LABEL: @unary_v4f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]] +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = fneg half [[A_I2]] +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; CHECK-NEXT: [[R_I3:%.*]] = fneg half [[A_I3]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; CHECK-NEXT: ret <4 x half> [[R]] +; + %r = fneg <4 x half> %a + ret <4 x half> %r +} + +define <2 x half> @binary_v2f16(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: @binary_v2f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: ret <2 x half> [[R]] +; + %r = fadd <2 x half> %a, %b + ret <2 x half> %r +} + +define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) { +; CHECK-LABEL: @binary_v3f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]] +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: ret <3 x half> [[R]] +; + %r = fadd <3 x half> %a, %b + ret <3 x half> %r +} + +define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: @binary_v4f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]] +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]] +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; CHECK-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 +; CHECK-NEXT: [[R_I3:%.*]] = fadd half [[A_I3]], [[B_I3]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; CHECK-NEXT: ret <4 x half> [[R]] +; + %r = fadd <4 x half> %a, %b + ret <4 x half> %r +} + +define <2 x i16> @fptosi_v2f16(<2 x half> %a) { +; CHECK-LABEL: @fptosi_v2f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[R_I0]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %r = fptosi <2 x half> %a to <2 x i16> + ret <2 x i16> %r +} + +define <3 x i16> @fptosi_v3f16(<3 x half> %a) { +; CHECK-LABEL: @fptosi_v3f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = fptosi half [[A_I2]] to i16 +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2 +; CHECK-NEXT: ret <3 x i16> [[R]] +; + %r = fptosi <3 x half> %a to <3 x i16> + ret <3 x i16> %r +} + +define <4 x i16> @fptosi_v4f16(<4 x half> %a) { +; CHECK-LABEL: @fptosi_v4f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16 +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16 +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = fptosi half [[A_I2]] to i16 +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; CHECK-NEXT: [[R_I3:%.*]] = fptosi half [[A_I3]] to i16 +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[R_I3]], i64 3 +; CHECK-NEXT: ret <4 x i16> [[R]] +; + %r = fptosi <4 x half> %a to <4 x i16> + ret <4 x i16> %r +} + +define <4 x float> @fpext_v4f16(<4 x half> %a) { +; CHECK-LABEL: @fpext_v4f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = fpext half [[A_I0]] to float +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = fpext half [[A_I1]] to float +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = fpext half [[A_I2]] to float +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; CHECK-NEXT: [[R_I3:%.*]] = fpext half [[A_I3]] to float +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x float> poison, float [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x float> [[R_UPTO0]], float [[R_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x float> [[R_UPTO1]], float [[R_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[R_UPTO2]], float [[R_I3]], i64 3 +; CHECK-NEXT: ret <4 x float> [[R]] +; + %r = fpext <4 x half> %a to <4 x float> + ret <4 x float> %r +} + +define <4 x i1> @icmp_v4f16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: @icmp_v4f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <4 x i16> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = icmp ugt i16 [[A_I0]], [[B_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <4 x i16> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = icmp ugt i16 [[A_I1]], [[B_I1]] +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <4 x i16> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = icmp ugt i16 [[A_I2]], [[B_I2]] +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3 +; CHECK-NEXT: [[B_I3:%.*]] = extractelement <4 x i16> [[B]], i64 3 +; CHECK-NEXT: [[R_I3:%.*]] = icmp ugt i16 [[A_I3]], [[B_I3]] +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i1> poison, i1 [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i1> [[R_UPTO0]], i1 [[R_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i1> [[R_UPTO1]], i1 [[R_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> [[R_UPTO2]], i1 [[R_I3]], i64 3 +; CHECK-NEXT: ret <4 x i1> [[R]] +; + %r = icmp ugt <4 x i16> %a, %b + ret <4 x i1> %r +} + +define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) { +; CHECK-LABEL: @gep1_v4( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE:%.*]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT_I0:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i64 0 +; CHECK-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[DOTSPLAT_I0]], i16 [[A_I0]] +; CHECK-NEXT: [[DOTSPLAT_I1:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i64 1 +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[DOTSPLAT_I1]], i16 [[A_I1]] +; CHECK-NEXT: [[DOTSPLAT_I2:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i64 2 +; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[DOTSPLAT_I2]], i16 [[A_I2]] +; CHECK-NEXT: [[DOTSPLAT_I3:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i64 3 +; CHECK-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[DOTSPLAT_I3]], i16 [[A_I3]] +; CHECK-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 +; CHECK-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 +; CHECK-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 +; CHECK-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 +; CHECK-NEXT: ret <4 x ptr> [[P]] +; + %p = getelementptr i32, ptr %base, <4 x i16> %a + ret <4 x ptr> %p +} + +define <4 x ptr> @gep2_v4(<4 x ptr> %base, i16 %a) { +; CHECK-LABEL: @gep2_v4( +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT_I0:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i64 0 +; CHECK-NEXT: [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0 +; CHECK-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[DOTSPLAT_I0]] +; CHECK-NEXT: [[DOTSPLAT_I1:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i64 1 +; CHECK-NEXT: [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1 +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[DOTSPLAT_I1]] +; CHECK-NEXT: [[DOTSPLAT_I2:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i64 2 +; CHECK-NEXT: [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2 +; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[DOTSPLAT_I2]] +; CHECK-NEXT: [[DOTSPLAT_I3:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i64 3 +; CHECK-NEXT: [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3 +; CHECK-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[DOTSPLAT_I3]] +; CHECK-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 +; CHECK-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 +; CHECK-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 +; CHECK-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 +; CHECK-NEXT: ret <4 x ptr> [[P]] +; + %p = getelementptr i32, <4 x ptr> %base, i16 %a + ret <4 x ptr> %p +} + +define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) { +; CHECK-LABEL: @gep3_v4( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; CHECK-NEXT: [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0 +; CHECK-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A_I0]] +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; CHECK-NEXT: [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1 +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A_I1]] +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; CHECK-NEXT: [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2 +; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A_I2]] +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3 +; CHECK-NEXT: [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3 +; CHECK-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A_I3]] +; CHECK-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0 +; CHECK-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1 +; CHECK-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2 +; CHECK-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3 +; CHECK-NEXT: ret <4 x ptr> [[P]] +; + %p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a + ret <4 x ptr> %p +} + +define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) { +; CHECK-LABEL: @insertelement_v2i16( +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x i16> [[A:%.*]], i64 0 +; CHECK-NEXT: store i16 [[A_I0]], ptr [[P]], align 4 +; CHECK-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 2 +; CHECK-NEXT: ret void +; + %r = insertelement <2 x i16> %a, i16 %b, i64 1 + store <2 x i16> %r, ptr %p + ret void +} + +define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) { +; CHECK-LABEL: @insertelement_v3i16( +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2 +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <3 x i16> [[A:%.*]], i64 0 +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <3 x i16> [[A]], i64 1 +; CHECK-NEXT: store i16 [[A_I0]], ptr [[P]], align 8 +; CHECK-NEXT: store i16 [[A_I1]], ptr [[P_I1]], align 2 +; CHECK-NEXT: store i16 [[B:%.*]], ptr [[P_I2]], align 4 +; CHECK-NEXT: ret void +; + %r = insertelement <3 x i16> %a, i16 %b, i64 2 + store <3 x i16> %r, ptr %p + ret void +} + +define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) { +; CHECK-LABEL: @insertelement_v4i16( +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1 +; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2 +; CHECK-NEXT: [[P_I3:%.*]] = getelementptr i16, ptr [[P]], i32 3 +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; CHECK-NEXT: store i16 [[A_I0]], ptr [[P]], align 8 +; CHECK-NEXT: store i16 [[A_I1]], ptr [[P_I1]], align 2 +; CHECK-NEXT: store i16 [[A_I2]], ptr [[P_I2]], align 4 +; CHECK-NEXT: store i16 [[B:%.*]], ptr [[P_I3]], align 2 +; CHECK-NEXT: ret void +; + %r = insertelement <4 x i16> %a, i16 %b, i64 3 + store <4 x i16> %r, ptr %p + ret void +} + +define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) { +; CHECK-LABEL: @load_insertelement_v2i16( +; CHECK-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 4 +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[A_I0]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[B:%.*]], i64 1 +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %a = load <2 x i16>, ptr %pa + %r = insertelement <2 x i16> %a, i16 %b, i64 1 + ret <2 x i16> %r +} + +define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) { +; CHECK-LABEL: @load_insertelement_v3i16( +; CHECK-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; CHECK-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[A_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[B:%.*]], i64 2 +; CHECK-NEXT: ret <3 x i16> [[R]] +; + %a = load <3 x i16>, ptr %pa + %r = insertelement <3 x i16> %a, i16 %b, i64 2 + ret <3 x i16> %r +} + +define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) { +; CHECK-LABEL: @load_insertelement_v4i16( +; CHECK-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8 +; CHECK-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; CHECK-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; CHECK-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[A_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[A_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[B:%.*]], i64 3 +; CHECK-NEXT: ret <4 x i16> [[R]] +; + %a = load <4 x i16>, ptr %pa + %r = insertelement <4 x i16> %a, i16 %b, i64 3 + ret <4 x i16> %r +} + +define void @shufflevector_grow(ptr %pa, ptr %pb) { +; CHECK-LABEL: @shufflevector_grow( +; CHECK-NEXT: [[PA_I11:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 1 +; CHECK-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; CHECK-NEXT: [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3 +; CHECK-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1 +; CHECK-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA]], align 4 +; CHECK-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; CHECK-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 4 +; CHECK-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2 +; CHECK-NEXT: store i16 [[A_I0]], ptr [[PA]], align 8 +; CHECK-NEXT: store i16 [[A_I1]], ptr [[PA_I11]], align 2 +; CHECK-NEXT: store i16 [[B_I0]], ptr [[PA_I2]], align 4 +; CHECK-NEXT: store i16 [[B_I1]], ptr [[PA_I3]], align 2 +; CHECK-NEXT: ret void +; + %a = load <2 x i16>, ptr %pa + %b = load <2 x i16>, ptr %pb + %r = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> + store <4 x i16> %r, ptr %pa + ret void +} + +define void @shufflevector_shrink(ptr %pa) { +; CHECK-LABEL: @shufflevector_shrink( +; CHECK-NEXT: [[PA_I11:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 1 +; CHECK-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2 +; CHECK-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2 +; CHECK-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4 +; CHECK-NEXT: store i16 [[A_I1]], ptr [[PA]], align 4 +; CHECK-NEXT: store i16 [[A_I2]], ptr [[PA_I11]], align 2 +; CHECK-NEXT: ret void +; + %a = load <4 x i16>, ptr %pa + %r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> + store <2 x i16> %r, ptr %pa + ret void +} + +define void @phi_v2f16(ptr %base, i64 %bound) { +; CHECK-LABEL: @phi_v2f16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE]], i64 [[IDX]] +; CHECK-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 +; CHECK-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] +; CHECK-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] +; CHECK-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; CHECK-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 4 +; CHECK-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %x = phi <2 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] + %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] + %p = getelementptr <2 x half>, ptr %base, i64 %idx + %a = load <2 x half>, ptr %p, align 2 + %x.next = fadd <2 x half> %x, %a + %idx.next = add i64 %idx, 1 + %cc = icmp ult i64 %idx.next, %bound + br i1 %cc, label %loop, label %end + +end: + store <2 x half> %x.next, ptr %base + ret void +} + +define void @phi_v3f16(ptr %base, i64 %bound) { +; CHECK-LABEL: @phi_v3f16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]] +; CHECK-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 +; CHECK-NEXT: [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2 +; CHECK-NEXT: [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2 +; CHECK-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] +; CHECK-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] +; CHECK-NEXT: [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]] +; CHECK-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; CHECK-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 8 +; CHECK-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 +; CHECK-NEXT: store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %x = phi <3 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] + %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] + %p = getelementptr <3 x half>, ptr %base, i64 %idx + %a = load <3 x half>, ptr %p, align 2 + %x.next = fadd <3 x half> %x, %a + %idx.next = add i64 %idx, 1 + %cc = icmp ult i64 %idx.next, %bound + br i1 %cc, label %loop, label %end + +end: + store <3 x half> %x.next, ptr %base + ret void +} + +define void @phi_v4f16(ptr %base, i64 %bound) { +; CHECK-LABEL: @phi_v4f16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1 +; CHECK-NEXT: [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2 +; CHECK-NEXT: [[BASE_I3:%.*]] = getelementptr half, ptr [[BASE]], i32 3 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X_I3:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]] +; CHECK-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2 +; CHECK-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1 +; CHECK-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2 +; CHECK-NEXT: [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2 +; CHECK-NEXT: [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2 +; CHECK-NEXT: [[P_I3:%.*]] = getelementptr half, ptr [[P]], i32 3 +; CHECK-NEXT: [[A_I3:%.*]] = load half, ptr [[P_I3]], align 2 +; CHECK-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]] +; CHECK-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]] +; CHECK-NEXT: [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]] +; CHECK-NEXT: [[X_NEXT_I3]] = fadd half [[X_I3]], [[A_I3]] +; CHECK-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; CHECK-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]] +; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 8 +; CHECK-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2 +; CHECK-NEXT: store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4 +; CHECK-NEXT: store half [[X_NEXT_I3]], ptr [[BASE_I3]], align 2 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %x = phi <4 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ] + %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] + %p = getelementptr <4 x half>, ptr %base, i64 %idx + %a = load <4 x half>, ptr %p, align 2 + %x.next = fadd <4 x half> %x, %a + %idx.next = add i64 %idx, 1 + %cc = icmp ult i64 %idx.next, %bound + br i1 %cc, label %loop, label %end + +end: + store <4 x half> %x.next, ptr %base + ret void +} + +define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: @call_v2f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: ret <2 x half> [[R]] +; + %r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %r +} + +define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) { +; CHECK-LABEL: @call_v3f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2 +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]]) +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: ret <3 x half> [[R]] +; + %r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b) + ret <3 x half> %r +} + +define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: @call_v4f16( +; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]]) +; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]]) +; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]]) +; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; CHECK-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3 +; CHECK-NEXT: [[R_I3:%.*]] = call half @llvm.minnum.f16(half [[A_I3]], half [[B_I3]]) +; CHECK-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0 +; CHECK-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1 +; CHECK-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3 +; CHECK-NEXT: ret <4 x half> [[R]] +; + %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %r +} + +declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) +declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) +declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)