diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -454,6 +454,26 @@ break; } case Instruction::Add: + if ((DemandedMask & 1) == 0) { + // If we do not need the low bit, try to convert bool math to logic: + // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN + // Truth table for inputs and output signbits: + // X:0 | X:1 + // ---------- + // Y:0 | 0 | 0 | + // Y:1 | -1 | 0 | + // ---------- + Value *X, *Y; + if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))), + m_OneUse(m_SExt(m_Value(Y))))) && + X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) { + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(I); + Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y); + return Builder.CreateSExt(AndNot, VTy); + } + } + LLVM_FALLTHROUGH; case Instruction::Sub: { /// If the high-bits of an ADD/SUB are not demanded, then we do not care /// about the high bits of the operands. diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -1055,10 +1055,9 @@ define i32 @lshr_add(i1 %x, i1 %y) { ; CHECK-LABEL: @lshr_add( -; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i32 -; CHECK-NEXT: [[YS:%.*]] = sext i1 [[Y:%.*]] to i32 -; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[XZ]], [[YS]] -; CHECK-NEXT: [[R:%.*]] = lshr i32 [[SUB]], 31 +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X:%.*]], true +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %xz = zext i1 %x to i32 @@ -1070,10 +1069,9 @@ define i5 @and_add(i1 %x, i1 %y) { ; CHECK-LABEL: @and_add( -; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i5 -; CHECK-NEXT: [[YS:%.*]] = sext i1 [[Y:%.*]] to i5 -; CHECK-NEXT: [[SUB:%.*]] = add nsw i5 [[XZ]], [[YS]] -; CHECK-NEXT: [[R:%.*]] = and i5 [[SUB]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X:%.*]], true +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i5 -2, i5 0 ; CHECK-NEXT: ret i5 [[R]] ; %xz = zext i1 %x to i5 @@ -1085,11 +1083,10 @@ define <2 x i8> @ashr_add_commute(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @ashr_add_commute( -; CHECK-NEXT: [[XZ:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[YS:%.*]] = sext <2 x i1> [[Y:%.*]] to <2 x i8> -; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i8> [[YS]], [[XZ]] -; CHECK-NEXT: [[R:%.*]] = ashr <2 x i8> [[SUB]], -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i8> +; CHECK-NEXT: ret <2 x i8> [[TMP3]] ; %xz = zext <2 x i1> %x to <2 x i8> %ys = sext <2 x i1> %y to <2 x i8> @@ -1100,12 +1097,8 @@ define i32 @cmp_math(i32 %x, i32 %y) { ; CHECK-LABEL: @cmp_math( -; CHECK-NEXT: [[GT:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[LT:%.*]] = icmp ult i32 [[X]], [[Y]] -; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[GT]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = sext i1 [[LT]] to i32 -; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[XZ]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = lshr i32 [[S]], 31 +; CHECK-NEXT: [[LT:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = zext i1 [[LT]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %gt = icmp ugt i32 %x, %y @@ -1117,6 +1110,8 @@ ret i32 %r } +; Negative test - wrong type + define i32 @lshr_add_nonbool(i2 %x, i1 %y) { ; CHECK-LABEL: @lshr_add_nonbool( ; CHECK-NEXT: [[XZ:%.*]] = zext i2 [[X:%.*]] to i32 @@ -1132,6 +1127,8 @@ ret i32 %r } +; Negative test - wrong demand + define i32 @and31_add(i1 %x, i1 %y) { ; CHECK-LABEL: @and31_add( ; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i32 @@ -1147,6 +1144,8 @@ ret i32 %r } +; Negative test - extra use + define i32 @lshr_add_use(i1 %x, i1 %y, i32* %p) { ; CHECK-LABEL: @lshr_add_use( ; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i32 @@ -1164,6 +1163,8 @@ ret i32 %r } +; Negative test - extra use + define i32 @lshr_add_use2(i1 %x, i1 %y, i32* %p) { ; CHECK-LABEL: @lshr_add_use2( ; CHECK-NEXT: [[XZ:%.*]] = zext i1 [[X:%.*]] to i32