Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1109,6 +1109,9 @@ if (Instruction *Sel = foldClampRangeOfTwo(II, Builder)) return Sel; + if (Instruction *SAdd = matchSignedSaturate(*II)) + return SAdd; + if (match(I1, m_ImmConstant())) if (auto *Sel = dyn_cast(I0)) if (Instruction *R = FoldOpIntoSelect(*II, Sel)) Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -326,7 +326,7 @@ Instruction *narrowMathIfNoOverflow(BinaryOperator &I); Instruction *narrowFunnelShift(TruncInst &Trunc); Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN); - Instruction *matchSignedSaturate(SelectInst &MinMax1); + Instruction *matchSignedSaturate(Instruction &MinMax1); void freelyInvertAllUsersOf(Value *V); Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2245,7 +2245,7 @@ /// Match a signed saturating min/max clamp pattern, that we can turn into /// either a sadd_sat or ssub_sat, or a fptosi_sat. -Instruction *InstCombinerImpl::matchSignedSaturate(SelectInst &MinMax1) { +Instruction *InstCombinerImpl::matchSignedSaturate(Instruction &MinMax1) { Type *Ty = MinMax1.getType(); // We are looking for a tree of: @@ -2276,9 +2276,10 @@ if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth)) return nullptr; - // Also make sure that the number of uses is as expected. The "3"s are for the - // the two items of min/max (the compare and the select). - if (MinMax2->hasNUsesOrMore(3) || Base->hasNUsesOrMore(3)) + // Also make sure that the number of uses is as expected. The 3 is for the + // the two items of the compare and the select, or 2 from a min/max. + unsigned ExpUses = isa(MinMax1) ? 2 : 3; + if (MinMax2->hasNUsesOrMore(ExpUses) || Base->hasNUsesOrMore(ExpUses)) return nullptr; // Create the new type (which can be a vector type) Index: llvm/test/Transforms/InstCombine/sadd_sat.ll =================================================================== --- llvm/test/Transforms/InstCombine/sadd_sat.ll +++ llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -24,13 +24,8 @@ define i32 @sadd_sat32_mm(i32 %a, i32 %b) { ; CHECK-LABEL: @sadd_sat32_mm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 -; CHECK-NEXT: ret i32 [[CONV7]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[B:%.*]], i32 [[A:%.*]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %conv = sext i32 %a to i64 @@ -63,13 +58,8 @@ define i32 @ssub_sat32_mm(i32 %a, i32 %b) { ; CHECK-LABEL: @ssub_sat32_mm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 2147483647) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) -; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 -; CHECK-NEXT: ret i32 [[CONV7]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %conv = sext i32 %a to i64 @@ -148,13 +138,8 @@ define signext i16 @sadd_sat16_mm(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: @sadd_sat16_mm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[B:%.*]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[ADD]], i32 32767) -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -32768) -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i16 -; CHECK-NEXT: ret i16 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[B:%.*]], i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[TMP0]] ; entry: %conv = sext i16 %a to i32 @@ -187,13 +172,8 @@ define signext i16 @ssub_sat16_mm(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: @ssub_sat16_mm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[B:%.*]] to i32 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 32767) -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -32768) -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i16 -; CHECK-NEXT: ret i16 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[A:%.*]], i16 [[B:%.*]]) +; CHECK-NEXT: ret i16 [[TMP0]] ; entry: %conv = sext i16 %a to i32 @@ -226,13 +206,8 @@ define signext i8 @sadd_sat8_mm(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: @sadd_sat8_mm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[A:%.*]] to i32 -; CHECK-NEXT: [[CONV1:%.*]] = sext i8 [[B:%.*]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[ADD]], i32 127) -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -128) -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i8 -; CHECK-NEXT: ret i8 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[B:%.*]], i8 [[A:%.*]]) +; CHECK-NEXT: ret i8 [[TMP0]] ; entry: %conv = sext i8 %a to i32 @@ -265,13 +240,8 @@ define signext i8 @ssub_sat8_mm(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: @ssub_sat8_mm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[A:%.*]] to i32 -; CHECK-NEXT: [[CONV1:%.*]] = sext i8 [[B:%.*]] to i32 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 127) -; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -128) -; CHECK-NEXT: [[CONV9:%.*]] = trunc i32 [[SPEC_STORE_SELECT10]] to i8 -; CHECK-NEXT: ret i8 [[CONV9]] +; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) +; CHECK-NEXT: ret i8 [[TMP0]] ; entry: %conv = sext i8 %a to i32 @@ -390,13 +360,8 @@ define <4 x i32> @sadd_satv4i32_mm(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sadd_satv4i32_mm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext <4 x i32> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[CONV1:%.*]] = sext <4 x i32> [[B:%.*]] to <4 x i64> -; CHECK-NEXT: [[ADD:%.*]] = add nsw <4 x i64> [[CONV1]], [[CONV]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[ADD]], <4 x i64> ) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[SPEC_STORE_SELECT]], <4 x i64> ) -; CHECK-NEXT: [[CONV7:%.*]] = trunc <4 x i64> [[SPEC_STORE_SELECT8]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[CONV7]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP0]] ; entry: %conv = sext <4 x i32> %a to <4 x i64> @@ -429,13 +394,8 @@ define <4 x i32> @ssub_satv4i32_mm(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @ssub_satv4i32_mm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext <4 x i32> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[CONV1:%.*]] = sext <4 x i32> [[B:%.*]] to <4 x i64> -; CHECK-NEXT: [[ADD:%.*]] = sub nsw <4 x i64> [[CONV1]], [[CONV]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[ADD]], <4 x i64> ) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[SPEC_STORE_SELECT]], <4 x i64> ) -; CHECK-NEXT: [[CONV7:%.*]] = trunc <4 x i64> [[SPEC_STORE_SELECT8]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[CONV7]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP0]] ; entry: %conv = sext <4 x i32> %a to <4 x i64> @@ -534,6 +494,29 @@ ret i32 %conv7 } +define i32 @sadd_sat32_extrause_2_mm(i32 %a, i32 %b) { +; CHECK-LABEL: @sadd_sat32_extrause_2_mm( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: call void @use64(i64 [[SPEC_STORE_SELECT]]) +; CHECK-NEXT: ret i32 [[CONV7]] +; +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %add = add i64 %conv1, %conv + %spec.store.select = call i64 @llvm.smin.i64(i64 %add, i64 2147483647) + %spec.store.select8 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648) + %conv7 = trunc i64 %spec.store.select8 to i32 + call void @use64(i64 %spec.store.select) + ret i32 %conv7 +} + define i32 @sadd_sat32_extrause_3(i32 %a, i32 %b) { ; CHECK-LABEL: @sadd_sat32_extrause_3( ; CHECK-NEXT: entry: @@ -561,6 +544,29 @@ ret i32 %conv7 } +define i32 @sadd_sat32_extrause_3_mm(i32 %a, i32 %b) { +; CHECK-LABEL: @sadd_sat32_extrause_3_mm( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647) +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648) +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: call void @use64(i64 [[ADD]]) +; CHECK-NEXT: ret i32 [[CONV7]] +; +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %add = add i64 %conv1, %conv + %spec.store.select = call i64 @llvm.smin.i64(i64 %add, i64 2147483647) + %spec.store.select8 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648) + %conv7 = trunc i64 %spec.store.select8 to i32 + call void @use64(i64 %add) + ret i32 %conv7 +} + define i32 @sadd_sat32_trunc(i32 %a, i32 %b) { ; CHECK-LABEL: @sadd_sat32_trunc( ; CHECK-NEXT: entry: