Index: llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1966,13 +1966,6 @@ return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2)); } - // If there's no chance any bit will need to borrow from an adjacent bit: - // sub C, X --> xor X, C - const APInt *Op0C; - if (match(Op0, m_APInt(Op0C)) && - (~computeKnownBits(Op1, 0, &I).Zero).isSubsetOf(*Op0C)) - return BinaryOperator::CreateXor(Op1, Op0); - { Value *Y; // X-(X+Y) == -Y X-(Y+X) == -Y @@ -2231,7 +2224,23 @@ I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()}, {Builder.CreateNot(X)})); - return TryToNarrowDeduceFlags(); + if (Instruction *R = TryToNarrowDeduceFlags()) + return R; + + // If the subtract is equivalent to xor (no bits other than MSB could require + // a borrow), convert to xor for better bit-tracking. + // sub C, X --> xor X, C + // Avoid this fold if the sub has no-wrap flags because that could be an + // information-losing transform that we cannot recover from. + const APInt *Op0C; + if (!I.hasNoSignedWrap() && !I.hasNoUnsignedWrap() && + match(Op0, m_APInt(Op0C))) { + const APInt &MaybeOnes = ~computeKnownBits(Op1, 0, &I).Zero; + if ((*Op0C - MaybeOnes) == (*Op0C ^ MaybeOnes)) + return BinaryOperator::CreateXor(Op1, Op0); + } + + return nullptr; } /// This eliminates floating-point negation in either 'fneg(X)' or Index: llvm/test/Transforms/InstCombine/sub-xor.ll =================================================================== --- llvm/test/Transforms/InstCombine/sub-xor.ll +++ llvm/test/Transforms/InstCombine/sub-xor.ll @@ -3,10 +3,12 @@ declare void @use(i32) +; TODO: This could be converted to xor if the backend recognizes the sub equivalency. + define i32 @low_mask_nsw_nuw(i32 %x) { ; CHECK-LABEL: @low_mask_nsw_nuw( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 31 -; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[AND]], 63 +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 63, [[AND]] ; CHECK-NEXT: ret i32 [[SUB]] ; %and = and i32 %x, 31 @@ -14,10 +16,12 @@ ret i32 %sub } +; TODO: This could be converted to xor if the backend recognizes the sub equivalency. + define <2 x i32> @low_mask_nsw_nuw_vec(<2 x i32> %x) { ; CHECK-LABEL: @low_mask_nsw_nuw_vec( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <2 x i32> , [[AND]] ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; %and = and <2 x i32> %x, @@ -25,10 +29,12 @@ ret <2 x i32> %sub } +; TODO: This could be converted to xor if the backend recognizes the sub equivalency. + define i8 @arbitrary_mask_sub_i8(i8 %x) { ; CHECK-LABEL: @arbitrary_mask_sub_i8( ; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], 10 -; CHECK-NEXT: [[M:%.*]] = xor i8 [[A]], 11 +; CHECK-NEXT: [[M:%.*]] = sub nuw nsw i8 11, [[A]] ; CHECK-NEXT: ret i8 [[M]] ; %a = and i8 %x, 10 ; 0b00001010 @@ -36,12 +42,12 @@ ret i8 %m } -; TODO: Borrow from the MSB is ok. +; Borrow from the MSB is ok. define i8 @arbitrary_mask_sub_high_bit_dont_care_i8(i8 %x) { ; CHECK-LABEL: @arbitrary_mask_sub_high_bit_dont_care_i8( ; CHECK-NEXT: [[MASKX:%.*]] = and i8 [[X:%.*]], -93 -; CHECK-NEXT: [[S:%.*]] = sub i8 39, [[MASKX]] +; CHECK-NEXT: [[S:%.*]] = xor i8 [[MASKX]], 39 ; CHECK-NEXT: ret i8 [[S]] ; %maskx = and i8 %x, 163 ; 0b10100011 @@ -49,6 +55,8 @@ ret i8 %s } +; Converting to xor loses information - nsw cannot be recovered by later analysis. + define i8 @arbitrary_mask_sub_nsw_high_bit_dont_care_i8(i8 %x) { ; CHECK-LABEL: @arbitrary_mask_sub_nsw_high_bit_dont_care_i8( ; CHECK-NEXT: [[MASKX:%.*]] = and i8 [[X:%.*]], -93 @@ -60,6 +68,8 @@ ret i8 %s } +; Converting to xor loses information - nuw cannot be recovered by later analysis. + define i8 @arbitrary_mask_sub_nuw_high_bit_dont_care_i8(i8 %x) { ; CHECK-LABEL: @arbitrary_mask_sub_nuw_high_bit_dont_care_i8( ; CHECK-NEXT: [[MASKX:%.*]] = and i8 [[X:%.*]], -93 @@ -71,10 +81,12 @@ ret i8 %s } +; TODO: This could be converted to xor if the backend recognizes the sub equivalency. + define <2 x i5> @arbitrary_mask_sub_v2i5(<2 x i5> %x) { ; CHECK-LABEL: @arbitrary_mask_sub_v2i5( ; CHECK-NEXT: [[A:%.*]] = and <2 x i5> [[X:%.*]], -; CHECK-NEXT: [[M:%.*]] = xor <2 x i5> [[A]], +; CHECK-NEXT: [[M:%.*]] = sub nuw nsw <2 x i5> , [[A]] ; CHECK-NEXT: ret <2 x i5> [[M]] ; %a = and <2 x i5> %x, ; 0b11000 @@ -97,10 +109,12 @@ declare i32 @llvm.ctlz.i32(i32, i1) +; TODO: This could be converted to xor if the backend recognizes the sub equivalency. + define i32 @range_masked_sub(i32 %x) { ; CHECK-LABEL: @range_masked_sub( ; CHECK-NEXT: [[COUNT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[COUNT]], 31 +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 31, [[COUNT]] ; CHECK-NEXT: ret i32 [[SUB]] ; %count = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) nounwind readnone Index: llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll +++ llvm/test/Transforms/PhaseOrdering/SystemZ/sub-xor.ll @@ -6,6 +6,11 @@ @ARR = internal global [100 x i32] zeroinitializer, align 4 +; This test uses 'sub' instructions for gep offsets to allow +; codegen (LSR) to create optimal asm. If 'sub' is canonicalized +; to 'xor', then the backend needs to be able to see through +; that transform to produce optimal asm. + define dso_local zeroext i32 @foo(ptr noundef %a) #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: @@ -22,27 +27,27 @@ ; CHECK-NEXT: [[ADD_PTR_110:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_19]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_111:%.*]] = add i32 [[TMP1]], [[ADD]] -; CHECK-NEXT: [[IDX_NEG_216:%.*]] = xor i64 [[INDVARS_IV]], -2 +; CHECK-NEXT: [[IDX_NEG_216:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_217:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_216]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_218:%.*]] = add i32 [[TMP2]], [[ADD_111]] -; CHECK-NEXT: [[IDX_NEG_3:%.*]] = xor i64 [[INDVARS_IV]], -3 +; CHECK-NEXT: [[IDX_NEG_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_3]] ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[TMP3]], [[ADD_218]] -; CHECK-NEXT: [[IDX_NEG_4:%.*]] = xor i64 [[INDVARS_IV]], -4 +; CHECK-NEXT: [[IDX_NEG_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_4]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[TMP4]], [[ADD_3]] -; CHECK-NEXT: [[IDX_NEG_5:%.*]] = xor i64 [[INDVARS_IV]], -5 +; CHECK-NEXT: [[IDX_NEG_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_5]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_5:%.*]] = add i32 [[TMP5]], [[ADD_4]] -; CHECK-NEXT: [[IDX_NEG_6:%.*]] = xor i64 [[INDVARS_IV]], -6 +; CHECK-NEXT: [[IDX_NEG_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_6]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[TMP6]], [[ADD_5]] -; CHECK-NEXT: [[IDX_NEG_7:%.*]] = xor i64 [[INDVARS_IV]], -7 +; CHECK-NEXT: [[IDX_NEG_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_7]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_7]] = add i32 [[TMP7]], [[ADD_6]] @@ -62,32 +67,32 @@ ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_1:%.*]] = shl i32 [[TMP9]], 1 ; CHECK-NEXT: [[ADD_1_1:%.*]] = add i32 [[MUL_1_1]], [[ADD_1]] -; CHECK-NEXT: [[IDX_NEG_1_2:%.*]] = xor i64 [[INDVARS_IV_1]], -2 +; CHECK-NEXT: [[IDX_NEG_1_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_2]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_2:%.*]] = shl i32 [[TMP10]], 1 ; CHECK-NEXT: [[ADD_1_2:%.*]] = add i32 [[MUL_1_2]], [[ADD_1_1]] -; CHECK-NEXT: [[IDX_NEG_1_3:%.*]] = xor i64 [[INDVARS_IV_1]], -3 +; CHECK-NEXT: [[IDX_NEG_1_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_3]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_3:%.*]] = shl i32 [[TMP11]], 1 ; CHECK-NEXT: [[ADD_1_3:%.*]] = add i32 [[MUL_1_3]], [[ADD_1_2]] -; CHECK-NEXT: [[IDX_NEG_1_4:%.*]] = xor i64 [[INDVARS_IV_1]], -4 +; CHECK-NEXT: [[IDX_NEG_1_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_4]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_4:%.*]] = shl i32 [[TMP12]], 1 ; CHECK-NEXT: [[ADD_1_4:%.*]] = add i32 [[MUL_1_4]], [[ADD_1_3]] -; CHECK-NEXT: [[IDX_NEG_1_5:%.*]] = xor i64 [[INDVARS_IV_1]], -5 +; CHECK-NEXT: [[IDX_NEG_1_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_5]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_5:%.*]] = shl i32 [[TMP13]], 1 ; CHECK-NEXT: [[ADD_1_5:%.*]] = add i32 [[MUL_1_5]], [[ADD_1_4]] -; CHECK-NEXT: [[IDX_NEG_1_6:%.*]] = xor i64 [[INDVARS_IV_1]], -6 +; CHECK-NEXT: [[IDX_NEG_1_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_6]] ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_6:%.*]] = shl i32 [[TMP14]], 1 ; CHECK-NEXT: [[ADD_1_6:%.*]] = add i32 [[MUL_1_6]], [[ADD_1_5]] -; CHECK-NEXT: [[IDX_NEG_1_7:%.*]] = xor i64 [[INDVARS_IV_1]], -7 +; CHECK-NEXT: [[IDX_NEG_1_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_7]] ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_7:%.*]] = shl i32 [[TMP15]], 1 @@ -108,32 +113,32 @@ ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP17]], 3 ; CHECK-NEXT: [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]] -; CHECK-NEXT: [[IDX_NEG_2_2:%.*]] = xor i64 [[INDVARS_IV_2]], -2 +; CHECK-NEXT: [[IDX_NEG_2_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_2]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP18]], 3 ; CHECK-NEXT: [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]] -; CHECK-NEXT: [[IDX_NEG_2_3:%.*]] = xor i64 [[INDVARS_IV_2]], -3 +; CHECK-NEXT: [[IDX_NEG_2_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_3]] ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP19]], 3 ; CHECK-NEXT: [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]] -; CHECK-NEXT: [[IDX_NEG_2_4:%.*]] = xor i64 [[INDVARS_IV_2]], -4 +; CHECK-NEXT: [[IDX_NEG_2_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_4]] ; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP20]], 3 ; CHECK-NEXT: [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]] -; CHECK-NEXT: [[IDX_NEG_2_5:%.*]] = xor i64 [[INDVARS_IV_2]], -5 +; CHECK-NEXT: [[IDX_NEG_2_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_5]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP21]], 3 ; CHECK-NEXT: [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]] -; CHECK-NEXT: [[IDX_NEG_2_6:%.*]] = xor i64 [[INDVARS_IV_2]], -6 +; CHECK-NEXT: [[IDX_NEG_2_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_6]] ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP22]], 3 ; CHECK-NEXT: [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]] -; CHECK-NEXT: [[IDX_NEG_2_7:%.*]] = xor i64 [[INDVARS_IV_2]], -7 +; CHECK-NEXT: [[IDX_NEG_2_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_7]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP23]], 3