Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1277,6 +1277,12 @@ if (!Sel1->hasOneUse()) return nullptr; + // If the types do not match, look through any truncs to the underlying + // instruction. + if (Cmp00->getType() != X->getType()) + if (auto *T = dyn_cast(X)) + X = T->getOperand(0); + // We now can finish matching the condition of the outermost select: // it should either be the X itself, or an addition of some constant to X. Constant *C1; @@ -1347,15 +1353,29 @@ if (!match(Precond2, m_One())) return nullptr; + // If we are matching from a truncated input, we need to sext the + // ReplacementLow and ReplacementHigh values. Only do the transform if they + // are free to extend due to being constants. + if (X->getType() != Sel0.getType() && + (!isa(ReplacementLow) || !isa(ReplacementHigh))) + return nullptr; + // All good, finally emit the new pattern. Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl); Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl); - Value *MaybeReplacedLow = - Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X); - Instruction *MaybeReplacedHigh = - SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow); - - return MaybeReplacedHigh; + Value *MaybeReplacedLow = Builder.CreateSelect( + ShouldReplaceLow, Builder.CreateSExt(ReplacementLow, X->getType()), X); + + // Create the final select. If we looked through a truncate above, we will + // need to retruncate the result. + if (X->getType() == Sel0.getType()) + return SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, + MaybeReplacedLow); + + Value *MaybeReplacedHigh = Builder.CreateSelect( + ShouldReplaceHigh, Builder.CreateSExt(ReplacementHigh, X->getType()), + MaybeReplacedLow); + return new TruncInst(MaybeReplacedHigh, Sel0.getType()); } // If we have Index: llvm/test/Transforms/InstCombine/truncating-saturate.ll =================================================================== --- llvm/test/Transforms/InstCombine/truncating-saturate.ll +++ llvm/test/Transforms/InstCombine/truncating-saturate.ll @@ -7,12 +7,11 @@ define i8 @testi16i8(i16 %add) { ; CHECK-LABEL: @testi16i8( -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD:%.*]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD]], 128 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i16 [[TMP1]], 256 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[ADD]], -1 -; CHECK-NEXT: [[XOR_I:%.*]] = select i1 [[TMP2]], i8 127, i8 -128 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127 +; CHECK-NEXT: [[COND_I:%.*]] = trunc i16 [[TMP4]] to i8 ; CHECK-NEXT: ret i8 [[COND_I]] ; %sh = lshr i16 %add, 8 @@ -29,12 +28,11 @@ define i32 @testi64i32(i64 %add) { ; CHECK-LABEL: @testi64i32( -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[ADD]], 2147483648 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP1]], 4294967296 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[ADD]], -1 -; CHECK-NEXT: [[XOR_I:%.*]] = select i1 [[TMP2]], i32 2147483647, i32 -2147483648 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[ADD:%.*]], -2147483648 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 -2147483648 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 2147483647 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = trunc i64 [[TMP4]] to i32 ; CHECK-NEXT: ret i32 [[COND_I]] ; %sh = lshr i64 %add, 32 @@ -51,12 +49,11 @@ define i16 @testi32i16i8(i32 %add) { ; CHECK-LABEL: @testi32i16i8( -; CHECK-NEXT: [[A:%.*]] = add i32 [[ADD:%.*]], 128 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A]], 256 -; CHECK-NEXT: [[T:%.*]] = trunc i32 [[ADD]] to i16 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[ADD]], -1 -; CHECK-NEXT: [[F:%.*]] = select i1 [[C]], i16 127, i16 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i16 [[T]], i16 [[F]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127 +; CHECK-NEXT: [[R:%.*]] = trunc i32 [[TMP4]] to i16 ; CHECK-NEXT: ret i16 [[R]] ; %a = add i32 %add, 128 @@ -123,16 +120,8 @@ define i32 @testi64i32addsat(i32 %a, i32 %b) { ; CHECK-LABEL: @testi64i32addsat( -; CHECK-NEXT: [[SA:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[SB:%.*]] = sext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[SA]], [[SB]] -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[ADD]], 2147483648 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP1]], 4294967296 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[ADD]], -1 -; CHECK-NEXT: [[XOR_I:%.*]] = select i1 [[TMP2]], i32 2147483647, i32 -2147483648 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] -; CHECK-NEXT: ret i32 [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %sa = sext i32 %a to i64 %sb = sext i32 %b to i64 @@ -151,12 +140,11 @@ define <4 x i8> @testv4i16i8(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8( -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD:%.*]] to <4 x i8> -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[ADD]], -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult <4 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i16> [[ADD]], -; CHECK-NEXT: [[XOR_I:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> , <4 x i8> -; CHECK-NEXT: [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[ADD]], <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP2]], <4 x i16> +; CHECK-NEXT: [[COND_I:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[COND_I]] ; %sh = lshr <4 x i16> %add, @@ -173,16 +161,8 @@ define <4 x i8> @testv4i16i8add(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: @testv4i16i8add( -; CHECK-NEXT: [[SA:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i16> -; CHECK-NEXT: [[SB:%.*]] = sext <4 x i8> [[B:%.*]] to <4 x i16> -; CHECK-NEXT: [[ADD:%.*]] = add nsw <4 x i16> [[SA]], [[SB]] -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8> -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i16> [[ADD]], -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult <4 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i16> [[ADD]], -; CHECK-NEXT: [[XOR_I:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> , <4 x i8> -; CHECK-NEXT: [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]] -; CHECK-NEXT: ret <4 x i8> [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) +; CHECK-NEXT: ret <4 x i8> [[TMP1]] ; %sa = sext <4 x i8> %a to <4 x i16> %sb = sext <4 x i8> %b to <4 x i16> @@ -201,12 +181,11 @@ define i8 @testi16i8_revcmp(i16 %add) { ; CHECK-LABEL: @testi16i8_revcmp( -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD:%.*]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD]], 128 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i16 [[TMP1]], 256 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[ADD]], -1 -; CHECK-NEXT: [[XOR_I:%.*]] = select i1 [[TMP2]], i8 127, i8 -128 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127 +; CHECK-NEXT: [[COND_I:%.*]] = trunc i16 [[TMP4]] to i8 ; CHECK-NEXT: ret i8 [[COND_I]] ; %sh = lshr i16 %add, 8 @@ -223,12 +202,11 @@ define i8 @testi16i8_revselect(i16 %add) { ; CHECK-LABEL: @testi16i8_revselect( -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD:%.*]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD]], 128 -; CHECK-NEXT: [[CMP_NOT_I_NOT:%.*]] = icmp ult i16 [[TMP1]], 256 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[ADD]], -1 -; CHECK-NEXT: [[XOR_I:%.*]] = select i1 [[TMP2]], i8 127, i8 -128 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I_NOT]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127 +; CHECK-NEXT: [[COND_I:%.*]] = trunc i16 [[TMP4]] to i8 ; CHECK-NEXT: ret i8 [[COND_I]] ; %sh = lshr i16 %add, 8 @@ -341,12 +319,11 @@ define i8 @badimm4(i16 %add) { ; CHECK-LABEL: @badimm4( -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD:%.*]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD]], 128 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i16 [[TMP1]], 256 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[ADD]], -1 -; CHECK-NEXT: [[XOR_I:%.*]] = select i1 [[TMP2]], i8 126, i8 -127 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[ADD:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[ADD]], 127 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[ADD]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i8 -127, i8 [[TMP3]] +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[TMP2]], i8 126, i8 [[TMP4]] ; CHECK-NEXT: ret i8 [[COND_I]] ; %sh = lshr i16 %add, 8 @@ -390,11 +367,11 @@ define i32 @oneuseconv(i64 %add) { ; CHECK-LABEL: @oneuseconv( ; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[ADD]], 2147483648 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP1]], 4294967296 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[ADD]], -1 -; CHECK-NEXT: [[XOR_I:%.*]] = select i1 [[TMP2]], i32 2147483647, i32 -2147483648 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[ADD]], -2147483648 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 -2147483648 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 2147483647 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = trunc i64 [[TMP4]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV1_I]]) ; CHECK-NEXT: ret i32 [[COND_I]] ;