Index: llvm/include/llvm/IR/PatternMatch.h =================================================================== --- llvm/include/llvm/IR/PatternMatch.h +++ llvm/include/llvm/IR/PatternMatch.h @@ -659,9 +659,27 @@ } }; +/// Match a specified integer value or vector of all elements of that +// value. +struct specific_apintval { + APInt Val; + + specific_apintval(APInt V) : Val(V) {} + + template bool match(ITy *V) { + const auto *CI = dyn_cast(V); + if (!CI && V->getType()->isVectorTy()) + if (const auto *C = dyn_cast(V)) + CI = dyn_cast_or_null(C->getSplatValue()); + + return CI && CI->getValue() == Val; + } +}; + /// Match a specific integer value or vector with all elements equal to /// the value. inline specific_intval m_SpecificInt(uint64_t V) { return specific_intval(V); } +inline specific_apintval m_SpecificAPInt(APInt V) { return specific_apintval(V); } /// Match a ConstantInt and bind to its value. This does not match /// ConstantInts wider than 64-bits. Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -681,6 +681,57 @@ return nullptr; } +/// Match a sadd_sat or ssub_sat which is using a higher bitwidth arithmetic to +/// perform the saturate. +static Instruction *matchSAddSubSat(CastInst &Trunc, + InstCombiner::BuilderTy &Builder) { + // We are looking for a tree of: + // trunc(max(INT_MIN, min(INT_MAX, add(sext A, sext B))) + // Where the min and max could be reversed + Type *Ty = Trunc.getType(); + Type *LargeTy = Trunc.getOperand(0)->getType(); + APInt MinValue = APInt::getSignedMinValue(Ty->getScalarSizeInBits()) + .sext(LargeTy->getScalarSizeInBits()); + APInt MaxValue = APInt::getSignedMaxValue(Ty->getScalarSizeInBits()) + .sext(LargeTy->getScalarSizeInBits()); + + Value *MinMax1 = Trunc.getOperand(0); + Instruction *MinMax2; + BinaryOperator *AddSub; + if (match(MinMax1, m_SMin(m_Instruction(MinMax2), + m_SpecificAPInt(MaxValue)))) { + if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_SpecificAPInt(MinValue)))) + return nullptr; + } else if (match(MinMax1, m_SMax(m_Instruction(MinMax2), + m_SpecificAPInt(MinValue)))) { + if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_SpecificAPInt(MaxValue)))) + return nullptr; + } else + return nullptr; + + // Also make sure that the number of uses is as expected. The "3"s are for the + // the two items of min/max (the compare and the select). + if (!MinMax1->hasOneUse() || MinMax2->hasNUsesOrMore(3) || + AddSub->hasNUsesOrMore(3)) + return nullptr; + + // Match the two extends from the add/sub + Value *A, *B; + Function *F; + if (match(AddSub, m_Add(m_SExt(m_Value(A)), m_SExt(m_Value(B))))) + F = Intrinsic::getDeclaration(Trunc.getModule(), Intrinsic::sadd_sat, Ty); + else if (match(AddSub, m_Sub(m_SExt(m_Value(A)), m_SExt(m_Value(B))))) + F = Intrinsic::getDeclaration(Trunc.getModule(), Intrinsic::ssub_sat, Ty); + else + return nullptr; + // And check the incoming are the correct type + if (A->getType() != Ty || B->getType() != Ty) + return nullptr; + + // Finally create and return the sat intrinsic + return Builder.CreateCall(F, {A, B}); +} + Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -692,18 +743,21 @@ // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. - if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) && - canEvaluateTruncated(Src, DestTy, *this, &CI)) { + if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy))) { + if (canEvaluateTruncated(Src, DestTy, *this, &CI)) { + // If this cast is a truncate, evaluting in a different type always + // eliminates the cast, so it is always a win. + LLVM_DEBUG( + dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid cast: " + << CI << '\n'); + Value *Res = EvaluateInDifferentType(Src, DestTy, false); + assert(Res->getType() == DestTy); + return replaceInstUsesWith(CI, Res); + } - // If this cast is a truncate, evaluting in a different type always - // eliminates the cast, so it is always a win. - LLVM_DEBUG( - dbgs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid cast: " - << CI << '\n'); - Value *Res = EvaluateInDifferentType(Src, DestTy, false); - assert(Res->getType() == DestTy); - return replaceInstUsesWith(CI, Res); + if (Instruction *I = matchSAddSubSat(CI, Builder)) + return replaceInstUsesWith(CI, I); } // Test if the trunc is the user of a select which is part of a Index: llvm/test/Transforms/InstCombine/sadd_sat.ll =================================================================== --- llvm/test/Transforms/InstCombine/sadd_sat.ll +++ llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -4,15 +4,8 @@ define i32 @sadd_sat32(i32 %a, i32 %b) { ; CHECK-LABEL: @sadd_sat32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[ADD]], 2147483647 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[SPEC_STORE_SELECT]], -2147483648 -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 -2147483648 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 -; CHECK-NEXT: ret i32 [[CONV7]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[B:%.*]], i32 [[A:%.*]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %conv = sext i32 %a to i64 @@ -29,15 +22,8 @@ define i32 @ssub_sat32(i32 %a, i32 %b) { ; CHECK-LABEL: @ssub_sat32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[SUB]], 2147483647 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[SUB]], i64 2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[SPEC_STORE_SELECT]], -2147483648 -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 -2147483648 -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 -; CHECK-NEXT: ret i32 [[CONV7]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %conv = sext i32 %a to i64 @@ -79,15 +65,8 @@ define signext i16 @sadd_sat16(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: @sadd_sat16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[B:%.*]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[ADD]], 32767 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i32 [[ADD]], i32 32767 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT]], -32768 -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = select i1 [[TMP1]], i32 [[SPEC_STORE_SELECT]], i32 -32768 -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i16 -; CHECK-NEXT: ret i16 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[B:%.*]], i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[TMP0]] ; entry: %conv = sext i16 %a to i32 @@ -104,15 +83,8 @@ define signext i16 @ssub_sat16(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: @ssub_sat16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[B:%.*]] to i32 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[SUB]], 32767 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i32 [[SUB]], i32 32767 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT]], -32768 -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = select i1 [[TMP1]], i32 [[SPEC_STORE_SELECT]], i32 -32768 -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i16 -; CHECK-NEXT: ret i16 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[A:%.*]], i16 [[B:%.*]]) +; CHECK-NEXT: ret i16 [[TMP0]] ; entry: %conv = sext i16 %a to i32 @@ -129,15 +101,8 @@ define signext i8 @sadd_sat8(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: @sadd_sat8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[A:%.*]] to i32 -; CHECK-NEXT: [[CONV1:%.*]] = sext i8 [[B:%.*]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[ADD]], 127 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i32 [[ADD]], i32 127 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT]], -128 -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = select i1 [[TMP1]], i32 [[SPEC_STORE_SELECT]], i32 -128 -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i8 -; CHECK-NEXT: ret i8 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[B:%.*]], i8 [[A:%.*]]) +; CHECK-NEXT: ret i8 [[TMP0]] ; entry: %conv = sext i8 %a to i32 @@ -154,15 +119,8 @@ define signext i8 @ssub_sat8(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: @ssub_sat8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[A:%.*]] to i32 -; CHECK-NEXT: [[CONV1:%.*]] = sext i8 [[B:%.*]] to i32 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[SUB]], 127 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i32 [[SUB]], i32 127 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT]], -128 -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = select i1 [[TMP1]], i32 [[SPEC_STORE_SELECT]], i32 -128 -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i8 -; CHECK-NEXT: ret i8 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) +; CHECK-NEXT: ret i8 [[TMP0]] ; entry: %conv = sext i8 %a to i32 @@ -179,15 +137,8 @@ define signext i64 @sadd_sat64(i64 signext %a, i64 signext %b) { ; CHECK-LABEL: @sadd_sat64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i64 [[A:%.*]] to i65 -; CHECK-NEXT: [[CONV1:%.*]] = sext i64 [[B:%.*]] to i65 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i65 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i65 [[ADD]], 9223372036854775807 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i65 [[ADD]], i65 9223372036854775807 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i65 [[SPEC_STORE_SELECT]], -9223372036854775808 -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = select i1 [[TMP1]], i65 [[SPEC_STORE_SELECT]], i65 -9223372036854775808 -; CHECK-NEXT: [[CONV9:%.*]] = trunc i65 [[SPEC_STORE_SELECT10]] to i64 -; CHECK-NEXT: ret i64 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[B:%.*]], i64 [[A:%.*]]) +; CHECK-NEXT: ret i64 [[TMP0]] ; entry: %conv = sext i64 %a to i65 @@ -204,15 +155,8 @@ define signext i64 @ssub_sat64(i64 signext %a, i64 signext %b) { ; CHECK-LABEL: @ssub_sat64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i64 [[A:%.*]] to i65 -; CHECK-NEXT: [[CONV1:%.*]] = sext i64 [[B:%.*]] to i65 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i65 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i65 [[SUB]], 9223372036854775807 -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i65 [[SUB]], i65 9223372036854775807 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i65 [[SPEC_STORE_SELECT]], -9223372036854775808 -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = select i1 [[TMP1]], i65 [[SPEC_STORE_SELECT]], i65 -9223372036854775808 -; CHECK-NEXT: [[CONV9:%.*]] = trunc i65 [[SPEC_STORE_SELECT10]] to i64 -; CHECK-NEXT: ret i64 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: ret i64 [[TMP0]] ; entry: %conv = sext i64 %a to i65 @@ -229,15 +173,8 @@ define <4 x i32> @sadd_satv4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sadd_satv4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext <4 x i32> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[CONV1:%.*]] = sext <4 x i32> [[B:%.*]] to <4 x i64> -; CHECK-NEXT: [[ADD:%.*]] = add nsw <4 x i64> [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i64> [[ADD]], -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[ADD]], <4 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i64> [[SPEC_STORE_SELECT]], -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[SPEC_STORE_SELECT]], <4 x i64> -; CHECK-NEXT: [[CONV7:%.*]] = trunc <4 x i64> [[SPEC_STORE_SELECT8]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[CONV7]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP0]] ; entry: %conv = sext <4 x i32> %a to <4 x i64> @@ -254,15 +191,8 @@ define <4 x i32> @ssub_satv4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @ssub_satv4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext <4 x i32> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[CONV1:%.*]] = sext <4 x i32> [[B:%.*]] to <4 x i64> -; CHECK-NEXT: [[ADD:%.*]] = sub nsw <4 x i64> [[CONV1]], [[CONV]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i64> [[ADD]], -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[ADD]], <4 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i64> [[SPEC_STORE_SELECT]], -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[SPEC_STORE_SELECT]], <4 x i64> -; CHECK-NEXT: [[CONV7:%.*]] = trunc <4 x i64> [[SPEC_STORE_SELECT8]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[CONV7]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP0]] ; entry: %conv = sext <4 x i32> %a to <4 x i64>