diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -523,6 +523,52 @@ } break; } + case Intrinsic::fshl: + case Intrinsic::fshr: { + if (ICA.getArgs().empty()) + break; + + // TODO: Add handling for fshl where third argument is not a constant. + const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(ICA.getArgs()[2]); + if (!OpInfoZ.isConstant()) + break; + + const auto LegalisationCost = getTypeLegalizationCost(RetTy); + if (OpInfoZ.isUniform()) { + // FIXME: The costs could be lower if the codegen is better. + static const CostTblEntry FshlTbl[] = { + {Intrinsic::fshl, MVT::v4i32, 3}, // ushr + shl + orr + {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4}, + {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3}, + {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}}; + // Costs for both fshl & fshr are the same, so just pass Intrinsic::fshl + // to avoid having to duplicate the costs. + const auto *Entry = + CostTableLookup(FshlTbl, Intrinsic::fshl, LegalisationCost.second); + if (Entry) + return LegalisationCost.first * Entry->Cost; + } + + auto TyL = getTypeLegalizationCost(RetTy); + if (!RetTy->isIntegerTy()) + break; + + // Estimate cost manually, as types like i8 and i16 will get promoted to + // i32 and CostTableLookup will ignore the extra conversion cost. + bool HigherCost = (RetTy->getScalarSizeInBits() != 32 && + RetTy->getScalarSizeInBits() < 64) || + (RetTy->getScalarSizeInBits() % 64 != 0); + unsigned ExtraCost = HigherCost ? 1 : 0; + if (RetTy->getScalarSizeInBits() == 32 || + RetTy->getScalarSizeInBits() == 64) + ExtraCost = 0; // fhsl/fshr for i32 and i64 can be lowered to a single + // extr instruction. + else if (HigherCost) + ExtraCost = 1; + else + break; + return TyL.first + ExtraCost; + } default: break; } diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll --- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll @@ -5,7 +5,7 @@ define i8 @fshl_i8_3rd_arg_const(i8 %a, i8 %b) { ; CHECK-LABEL: 'fshl_i8_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl ; entry: @@ -27,7 +27,7 @@ define i16 @fshl_i16(i16 %a, i16 %b) { ; CHECK-LABEL: 'fshl_i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %fshl ; entry: @@ -39,7 +39,7 @@ define i32 @fshl_i32_3rd_arg_const(i32 %a, i32 %b) { ; CHECK-LABEL: 'fshl_i32_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl ; entry: @@ -61,7 +61,7 @@ define i64 @fshl_i64_3rd_arg_const(i64 %a, i64 %b) { ; CHECK-LABEL: 'fshl_i64_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl ; entry: @@ -83,7 +83,7 @@ define i19 @fshl_i19(i19 %a, i19 %b) { ; CHECK-LABEL: 'fshl_i19' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i19 %fshl ; entry: @@ -96,7 +96,7 @@ define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl ; entry: @@ -128,7 +128,7 @@ define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl ; entry: @@ -160,7 +160,7 @@ define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl ; entry: @@ -192,7 +192,7 @@ define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl ; entry: @@ -247,7 +247,7 @@ define i66 @fshl_i66(i66 %a, i66 %b) { ; CHECK-LABEL: 'fshl_i66' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i66 %fshl ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll --- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll @@ -5,7 +5,7 @@ define i8 @fshr_i8_3rd_arg_const(i8 %a, i8 %b) { ; CHECK-LABEL: 'fshr_i8_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr ; entry: @@ -27,7 +27,7 @@ define i16 @fshr_i16(i16 %a, i16 %b) { ; CHECK-LABEL: 'fshr_i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %fshr ; entry: @@ -39,7 +39,7 @@ define i32 @fshr_i32_3rd_arg_const(i32 %a, i32 %b) { ; CHECK-LABEL: 'fshr_i32_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr ; entry: @@ -61,7 +61,7 @@ define i64 @fshr_i64_3rd_arg_const(i64 %a, i64 %b) { ; CHECK-LABEL: 'fshr_i64_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr ; entry: @@ -83,7 +83,7 @@ define i19 @fshr_i19(i19 %a, i19 %b) { ; CHECK-LABEL: 'fshr_i19' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i19 %fshr ; entry: @@ -96,7 +96,7 @@ define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr ; entry: @@ -128,7 +128,7 @@ define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr ; entry: @@ -160,7 +160,7 @@ define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr ; entry: @@ -192,7 +192,7 @@ define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr ; entry: @@ -247,7 +247,7 @@ define i66 @fshr_i66(i66 %a, i66 %b) { ; CHECK-LABEL: 'fshr_i66' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i66 %fshr ; entry: