Index: llvm/trunk/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp @@ -1422,6 +1422,8 @@ case Intrinsic::uadd_sat: case Intrinsic::ssub_sat: case Intrinsic::usub_sat: + case Intrinsic::smul_fix: + case Intrinsic::smul_fix_sat: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::bitreverse: @@ -2198,6 +2200,43 @@ } } + if (const auto *Op1 = dyn_cast(Operands[0])) { + if (const auto *Op2 = dyn_cast(Operands[1])) { + if (const auto *Op3 = dyn_cast(Operands[2])) { + switch (IntrinsicID) { + default: break; + case Intrinsic::smul_fix: + case Intrinsic::smul_fix_sat: { + // This code performs rounding towards negative infinity in case the + // result cannot be represented exactly for the given scale. Targets + // that do care about rounding should use a target hook for specifying + // how rounding should be done, and provide their own folding to be + // consistent with rounding. This is the same approach as used by + // DAGTypeLegalizer::ExpandIntRes_MULFIX. + APInt Lhs = Op1->getValue(); + APInt Rhs = Op2->getValue(); + unsigned Scale = Op3->getValue().getZExtValue(); + unsigned Width = Lhs.getBitWidth(); + assert(Scale < Width && "Illegal scale."); + unsigned ExtendedWidth = Width * 2; + APInt Product = (Lhs.sextOrSelf(ExtendedWidth) * + Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale); + if (IntrinsicID == Intrinsic::smul_fix_sat) { + APInt MaxValue = + APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth); + APInt MinValue = + APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth); + Product = APIntOps::smin(Product, MaxValue); + Product = APIntOps::smax(Product, MinValue); + } + return ConstantInt::get(Ty->getContext(), + Product.sextOrTrunc(Width)); + } + } + } + } + } + if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) { const APInt *C0, *C1, *C2; if (!getConstIntOrUndef(Operands[0], C0) || @@ -2307,6 +2346,13 @@ Lane[J] = Operands[J]; continue; } + // These intrinsics use a scalar type for their third argument. + if (J == 2 && + (IntrinsicID == Intrinsic::smul_fix || + IntrinsicID == Intrinsic::smul_fix_sat)) { + Lane[J] = Operands[J]; + continue; + } Constant *Agg = Operands[J]->getAggregateElement(I); if (!Agg) Index: llvm/trunk/test/Analysis/ConstantFolding/smul-fix-sat.ll =================================================================== --- llvm/trunk/test/Analysis/ConstantFolding/smul-fix-sat.ll +++ llvm/trunk/test/Analysis/ConstantFolding/smul-fix-sat.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + +;----------------------------------------------------------------------------- +; Simple test using scalar layout. +;----------------------------------------------------------------------------- + +declare i32 @llvm.smul.fix.sat.i32(i32, i32, i32) + +define i32 @test_smul_fix_sat_i32_0() { +; CHECK-LABEL: @test_smul_fix_sat_i32_0( +; CHECK-NEXT: ret i32 536870912 +; + %r = call i32 @llvm.smul.fix.sat.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5 + ret i32 %r +} + +;----------------------------------------------------------------------------- +; More extensive tests based on vectors (basically using the scalar fold +; for each index). +;----------------------------------------------------------------------------- + +declare <8 x i3> @llvm.smul.fix.sat.v8i3(<8 x i3>, <8 x i3>, i32) + +define <8 x i3> @test_smul_fix_sat_v8i3_0() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_0( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_1() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_1( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_2() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_2( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_3() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_3( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_4() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_4( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_5() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_5( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_6() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_6( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_7() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_7( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_8() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_8( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} Index: llvm/trunk/test/Analysis/ConstantFolding/smul-fix.ll =================================================================== --- llvm/trunk/test/Analysis/ConstantFolding/smul-fix.ll +++ llvm/trunk/test/Analysis/ConstantFolding/smul-fix.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + +;----------------------------------------------------------------------------- +; Simple test using scalar layout. +;----------------------------------------------------------------------------- + +declare i32 @llvm.smul.fix.i32(i32, i32, i32) + +define i32 @test_smul_fix_i32_0() { +; CHECK-LABEL: @test_smul_fix_i32_0( +; CHECK-NEXT: ret i32 536870912 +; + %r = call i32 @llvm.smul.fix.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5 + ret i32 %r +} + +;----------------------------------------------------------------------------- +; More extensive tests based on vectors (basically using the scalar fold +; for each index). +;----------------------------------------------------------------------------- + +declare <8 x i3> @llvm.smul.fix.v8i3(<8 x i3>, <8 x i3>, i32) + +define <8 x i3> @test_smul_fix_v8i3_0() { +; CHECK-LABEL: @test_smul_fix_v8i3_0( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_1() { +; CHECK-LABEL: @test_smul_fix_v8i3_1( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_2() { +; CHECK-LABEL: @test_smul_fix_v8i3_2( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_3() { +; CHECK-LABEL: @test_smul_fix_v8i3_3( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_4() { +; CHECK-LABEL: @test_smul_fix_v8i3_4( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_5() { +; CHECK-LABEL: @test_smul_fix_v8i3_5( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_6() { +; CHECK-LABEL: @test_smul_fix_v8i3_6( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_7() { +; CHECK-LABEL: @test_smul_fix_v8i3_7( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_8() { +; CHECK-LABEL: @test_smul_fix_v8i3_8( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +}