Skip to content

Commit 2da7381

Browse files
committedSep 15, 2018
[InstCombine] Support (sub (sext x), (sext y)) --> (sext (sub x, y)) and (sub (zext x), (zext y)) --> (zext (sub x, y))
Summary: If the sub doesn't overflow in the original type we can move it above the sext/zext. This is similar to what we do for add. The overflow checking for sub is currently weaker than add, so the test cases are constructed for what is supported. Reviewers: spatel Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D52075 llvm-svn: 342335
1 parent 0bd2d30 commit 2da7381

File tree

3 files changed

+156
-7
lines changed

3 files changed

+156
-7
lines changed
 

‎llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
16971697
return SelectInst::Create(Cmp, Neg, A);
16981698
}
16991699

1700+
if (Instruction *Ext = narrowMathIfNoOverflow(I))
1701+
return Ext;
1702+
17001703
bool Changed = false;
17011704
if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {
17021705
Changed = true;

‎llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+18-7
Original file line numberDiff line numberDiff line change
@@ -1451,29 +1451,40 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
14511451
/// sure the narrow op does not overflow.
14521452
Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) {
14531453
// We need at least one extended operand.
1454-
Value *LHS = BO.getOperand(0), *RHS = BO.getOperand(1);
1454+
Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1455+
1456+
// If this is a sub, we swap the operands since we always want an extension
1457+
// on the RHS. The LHS can be an extension or a constant.
1458+
if (BO.getOpcode() == Instruction::Sub)
1459+
std::swap(Op0, Op1);
1460+
14551461
Value *X;
1456-
bool IsSext = match(LHS, m_SExt(m_Value(X)));
1457-
if (!IsSext && !match(LHS, m_ZExt(m_Value(X))))
1462+
bool IsSext = match(Op0, m_SExt(m_Value(X)));
1463+
if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
14581464
return nullptr;
14591465

14601466
// If both operands are the same extension from the same source type and we
14611467
// can eliminate at least one (hasOneUse), this might work.
14621468
CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
14631469
Value *Y;
1464-
if (!(match(RHS, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1465-
cast<Operator>(RHS)->getOpcode() == CastOpc &&
1466-
(LHS->hasOneUse() || RHS->hasOneUse()))) {
1470+
if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1471+
cast<Operator>(Op1)->getOpcode() == CastOpc &&
1472+
(Op0->hasOneUse() || Op1->hasOneUse()))) {
14671473
// If that did not match, see if we have a suitable constant operand.
14681474
// Truncating and extending must produce the same constant.
14691475
Constant *WideC;
1470-
if (!LHS->hasOneUse() || !match(RHS, m_Constant(WideC)))
1476+
if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
14711477
return nullptr;
14721478
Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
14731479
if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
14741480
return nullptr;
14751481
Y = NarrowC;
14761482
}
1483+
1484+
// Swap back now that we found our operands.
1485+
if (BO.getOpcode() == Instruction::Sub)
1486+
std::swap(X, Y);
1487+
14771488
// Both operands have narrow versions. Last step: the math must not overflow
14781489
// in the narrow width.
14791490
if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))

‎llvm/test/Transforms/InstCombine/narrow-math.ll

+135
Original file line numberDiff line numberDiff line change
@@ -491,5 +491,140 @@ define i64 @test12(i32 %V) {
491491
ret i64 %add
492492
}
493493

494+
define i64 @test13(i32 %V) {
495+
; CHECK-LABEL: @test13(
496+
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !2
497+
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !3
498+
; CHECK-NEXT: [[SUBCONV:%.*]] = sub nsw i32 [[CALL1]], [[CALL2]]
499+
; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[SUBCONV]] to i64
500+
; CHECK-NEXT: ret i64 [[SUB]]
501+
;
502+
%call1 = call i32 @callee(), !range !2
503+
%call2 = call i32 @callee(), !range !3
504+
%sext1 = sext i32 %call1 to i64
505+
%sext2 = sext i32 %call2 to i64
506+
%sub = sub i64 %sext1, %sext2
507+
ret i64 %sub
508+
}
509+
510+
define i64 @test14(i32 %V) {
511+
; CHECK-LABEL: @test14(
512+
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !2
513+
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !0
514+
; CHECK-NEXT: [[SUBCONV:%.*]] = sub nuw nsw i32 [[CALL1]], [[CALL2]]
515+
; CHECK-NEXT: [[SUB:%.*]] = zext i32 [[SUBCONV]] to i64
516+
; CHECK-NEXT: ret i64 [[SUB]]
517+
;
518+
%call1 = call i32 @callee(), !range !2
519+
%call2 = call i32 @callee(), !range !0
520+
%zext1 = zext i32 %call1 to i64
521+
%zext2 = zext i32 %call2 to i64
522+
%sub = sub i64 %zext1, %zext2
523+
ret i64 %sub
524+
}
525+
526+
define i64 @test15(i32 %V) {
527+
; CHECK-LABEL: @test15(
528+
; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1
529+
; CHECK-NEXT: [[SUBCONV:%.*]] = sub nsw i32 8, [[ASHR]]
530+
; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[SUBCONV]] to i64
531+
; CHECK-NEXT: ret i64 [[SUB]]
532+
;
533+
%ashr = ashr i32 %V, 1
534+
%sext = sext i32 %ashr to i64
535+
%sub = sub i64 8, %sext
536+
ret i64 %sub
537+
}
538+
539+
define <2 x i64> @test15vec(<2 x i32> %V) {
540+
; CHECK-LABEL: @test15vec(
541+
; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
542+
; CHECK-NEXT: [[SUBCONV:%.*]] = sub nsw <2 x i32> <i32 8, i32 8>, [[ASHR]]
543+
; CHECK-NEXT: [[SUB:%.*]] = sext <2 x i32> [[SUBCONV]] to <2 x i64>
544+
; CHECK-NEXT: ret <2 x i64> [[SUB]]
545+
;
546+
%ashr = ashr <2 x i32> %V, <i32 1, i32 1>
547+
%sext = sext <2 x i32> %ashr to <2 x i64>
548+
%sub = sub <2 x i64> <i64 8, i64 8>, %sext
549+
ret <2 x i64> %sub
550+
}
551+
552+
define i64 @test16(i32 %V) {
553+
; CHECK-LABEL: @test16(
554+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[V:%.*]], 1
555+
; CHECK-NEXT: [[SUBCONV:%.*]] = sub nuw i32 -2, [[LSHR]]
556+
; CHECK-NEXT: [[SUB:%.*]] = zext i32 [[SUBCONV]] to i64
557+
; CHECK-NEXT: ret i64 [[SUB]]
558+
;
559+
%lshr = lshr i32 %V, 1
560+
%zext = zext i32 %lshr to i64
561+
%sub = sub i64 4294967294, %zext
562+
ret i64 %sub
563+
}
564+
565+
define <2 x i64> @test16vec(<2 x i32> %V) {
566+
; CHECK-LABEL: @test16vec(
567+
; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
568+
; CHECK-NEXT: [[SUBCONV:%.*]] = sub nuw <2 x i32> <i32 -2, i32 -2>, [[LSHR]]
569+
; CHECK-NEXT: [[SUB:%.*]] = zext <2 x i32> [[SUBCONV]] to <2 x i64>
570+
; CHECK-NEXT: ret <2 x i64> [[SUB]]
571+
;
572+
%lshr = lshr <2 x i32> %V, <i32 1, i32 1>
573+
%zext = zext <2 x i32> %lshr to <2 x i64>
574+
%sub = sub <2 x i64> <i64 4294967294, i64 4294967294>, %zext
575+
ret <2 x i64> %sub
576+
}
577+
578+
; Negative test. Both have the same range so we can't guarantee the subtract
579+
; won't wrap.
580+
define i64 @test17(i32 %V) {
581+
; CHECK-LABEL: @test17(
582+
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !0
583+
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @callee(), !range !0
584+
; CHECK-NEXT: [[SEXT1:%.*]] = zext i32 [[CALL1]] to i64
585+
; CHECK-NEXT: [[SEXT2:%.*]] = zext i32 [[CALL2]] to i64
586+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
587+
; CHECK-NEXT: ret i64 [[SUB]]
588+
;
589+
%call1 = call i32 @callee(), !range !0
590+
%call2 = call i32 @callee(), !range !0
591+
%sext1 = zext i32 %call1 to i64
592+
%sext2 = zext i32 %call2 to i64
593+
%sub = sub i64 %sext1, %sext2
594+
ret i64 %sub
595+
}
596+
597+
; Negative test. LHS is large positive 32-bit number. Range of callee can
598+
; cause overflow.
599+
define i64 @test18(i32 %V) {
600+
; CHECK-LABEL: @test18(
601+
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !1
602+
; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[CALL1]] to i64
603+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 2147481648, [[SEXT1]]
604+
; CHECK-NEXT: ret i64 [[SUB]]
605+
;
606+
%call1 = call i32 @callee(), !range !1
607+
%sext1 = sext i32 %call1 to i64
608+
%sub = sub i64 2147481648, %sext1
609+
ret i64 %sub
610+
}
611+
612+
; Negative test. LHS is large negative 32-bit number. Range of callee can
613+
; cause overflow.
614+
define i64 @test19(i32 %V) {
615+
; CHECK-LABEL: @test19(
616+
; CHECK-NEXT: [[CALL1:%.*]] = call i32 @callee(), !range !0
617+
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[CALL1]] to i64
618+
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 -2147481648, [[TMP1]]
619+
; CHECK-NEXT: ret i64 [[SUB]]
620+
;
621+
%call1 = call i32 @callee(), !range !0
622+
%sext1 = sext i32 %call1 to i64
623+
%sub = sub i64 -2147481648, %sext1
624+
ret i64 %sub
625+
}
626+
494627
!0 = !{ i32 0, i32 2000 }
495628
!1 = !{ i32 -2000, i32 0 }
629+
!2 = !{ i32 -512, i32 -255 }
630+
!3 = !{ i32 -128, i32 0 }

0 commit comments

Comments
 (0)
Please sign in to comment.