Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2638,6 +2638,9 @@ static const TypeConversionCostTblEntry VectorSelectTbl[] = { + { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 }, + { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 }, + { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 }, { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 }, { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 }, { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 }, Index: llvm/test/Analysis/CostModel/AArch64/select.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/select.ll +++ llvm/test/Analysis/CostModel/AArch64/select.ll @@ -18,6 +18,9 @@ ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'select' @@ -33,6 +36,9 @@ ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v1 = select i1 undef, i8 undef, i8 undef @@ -53,5 +59,9 @@ %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef + ; simd vector float + %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef + %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef + %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef ret void } Index: llvm/test/Analysis/CostModel/AArch64/vector-select.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -618,7 +618,7 @@ define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float> %c) { ; COST-LABEL: v2f32_select_one ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <2 x float> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c ; CODE-LABEL: v2f32_select_one ; CODE: bb.0 @@ -636,7 +636,7 @@ define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; COST-LABEL: v4f32_select_one ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <4 x float> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c ; CODE-LABEL: v4f32_select_one ; CODE: bb.0 @@ -654,7 +654,7 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; COST-LABEL: v2f64_select_one ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp one <2 x double> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c ; ; CODE-LABEL: v2f64_select_one ; CODE: bb.0 @@ -756,7 +756,7 @@ define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float> %c) { ; COST-LABEL: v2f32_select_ord ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <2 x float> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c ; ; CODE-LABEL: v2f32_select_ord ; CODE: bb.0 @@ -774,7 +774,7 @@ define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; COST-LABEL: v4f32_select_ord ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <4 x float> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c ; CODE-LABEL: v4f32_select_ord ; CODE: bb.0 @@ -792,7 +792,7 @@ define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; COST-LABEL: v2f64_select_ord ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = fcmp ord <2 x double> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c ; ; CODE-LABEL: v2f64_select_ord ; CODE: bb.0 Index: llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -133,37 +133,50 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) { ; CHECK-LABEL: @loop2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000 -; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000 -; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000 -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP2]], [[B]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[C]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 40000 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 40000 +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 40000 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP2]], [[B]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[SCEVGEP]], [[C]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[BOUND04:%.*]] = icmp ugt ptr [[UGLYGEP3]], [[B]] -; CHECK-NEXT: [[BOUND15:%.*]] = icmp ugt ptr [[UGLYGEP]], [[A]] +; CHECK-NEXT: [[BOUND04:%.*]] = icmp ugt ptr [[SCEVGEP3]], [[B]] +; CHECK-NEXT: [[BOUND15:%.*]] = icmp ugt ptr [[SCEVGEP]], [[A]] ; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[LOOP_BODY:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT10]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope !4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !alias.scope !7 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD7]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !9, !noalias !11 -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x float> , <4 x float> [[WIDE_LOAD8]] -; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP3]], [[TMP5]] -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP4]], align 4, !alias.scope !9, !noalias !11 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 -; CHECK-NEXT: br i1 [[TMP6]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope !4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope !7 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD9]], [[BROADCAST_SPLAT11]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x float> , <4 x float> [[WIDE_LOAD12]] +; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x float> , <4 x float> [[WIDE_LOAD13]] +; CHECK-NEXT: [[PREDPHI14:%.*]] = fadd <4 x float> [[TMP7]], [[TMP11]] +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: store <4 x float> [[PREDPHI14]], ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV1]]