diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -108,7 +108,7 @@ SetVector &Visited; SetVector &Sources; SetVector &Sinks; - SmallVectorImpl &SafeWrap; + SmallPtrSetImpl &SafeWrap; IntegerType *ExtTy = nullptr; SmallPtrSet NewInsts; SmallPtrSet InstsToRemove; @@ -116,7 +116,6 @@ SmallPtrSet Promoted; void ReplaceAllUsersOfWith(Value *From, Value *To); - void PrepareWrappingAdds(void); void ExtendSources(void); void ConvertTruncs(void); void PromoteTree(void); @@ -125,11 +124,11 @@ public: IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, - SetVector &visited, SetVector &sources, - SetVector &sinks, - SmallVectorImpl &wrap) : - Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), - Sources(sources), Sinks(sinks), SafeWrap(wrap) { + SetVector &visited, SetVector &sources, + SetVector &sinks, + SmallPtrSetImpl &wrap) + : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), + Sources(sources), Sinks(sinks), SafeWrap(wrap) { ExtTy = IntegerType::get(Ctx, PromotedWidth); assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() < ExtTy->getPrimitiveSizeInBits().getFixedSize() && @@ -145,7 +144,7 @@ unsigned RegisterBitWidth = 0; SmallPtrSet AllVisited; SmallPtrSet SafeToPromote; - SmallVector SafeWrap; + SmallPtrSet SafeWrap; // Does V have the same size result type as TypeSize. bool EqualTypeSize(Value *V); @@ -333,44 +332,46 @@ if (Opc != Instruction::Add && Opc != Instruction::Sub) return false; - if (!I->hasOneUse() || - !isa(*I->user_begin()) || + if (!I->hasOneUse() || !isa(*I->user_begin()) || !isa(I->getOperand(1))) return false; - ConstantInt *OverflowConst = cast(I->getOperand(1)); - bool NegImm = OverflowConst->isNegative(); - bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) || - ((Opc == Instruction::Add) && NegImm); - if (!IsDecreasing) - return false; - // Don't support an icmp that deals with sign bits. auto *CI = cast(*I->user_begin()); if (CI->isSigned() || CI->isEquality()) return false; - ConstantInt *ICmpConst = nullptr; + ConstantInt *ICmpConstant = nullptr; if (auto *Const = dyn_cast(CI->getOperand(0))) - ICmpConst = Const; + ICmpConstant = Const; else if (auto *Const = dyn_cast(CI->getOperand(1))) - ICmpConst = Const; + ICmpConstant = Const; else return false; - // Now check that the result can't wrap on itself. - APInt Total = ICmpConst->getValue().zextOrSelf(RegisterBitWidth); - Total += OverflowConst->getValue().abs().zextOrSelf(RegisterBitWidth); - - APInt Max = APInt::getAllOnes(TypeSize).zextOrSelf(RegisterBitWidth); - - if (Total.ugt(Max)) + const APInt &ICmpConst = ICmpConstant->getValue(); + APInt OverflowConst = cast(I->getOperand(1))->getValue(); + if (Opc == Instruction::Sub) + OverflowConst = -OverflowConst; + if (!OverflowConst.isNonPositive()) return false; - LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " << *I - << "\n"); - SafeWrap.push_back(I); - return true; + // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that: + // zext(x) + sext(C1) s C2 + // zext(x) + sext(C1) uses()) { auto *User = cast(U.getUser()); @@ -427,39 +428,6 @@ InstsToRemove.insert(I); } -void IRPromoter::PrepareWrappingAdds() { - LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n"); - IRBuilder<> Builder{Ctx}; - - // For adds that safely wrap and use a negative immediate as operand 1, we - // create an equivalent instruction using a positive immediate. - // That positive immediate can then be zext along with all the other - // immediates later. - for (auto *I : SafeWrap) { - if (I->getOpcode() != Instruction::Add) - continue; - - LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n"); - assert((isa(I->getOperand(1)) && - cast(I->getOperand(1))->isNegative()) && - "Wrapping should have a negative immediate as the second operand"); - - auto Const = cast(I->getOperand(1)); - auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); - Builder.SetInsertPoint(I); - Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst); - if (auto *NewInst = dyn_cast(NewVal)) { - NewInst->copyIRFlags(I); - NewInsts.insert(NewInst); - } - InstsToRemove.insert(I); - I->replaceAllUsesWith(NewVal); - LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n"); - } - for (auto *I : NewInsts) - Visited.insert(I); -} - void IRPromoter::ExtendSources() { IRBuilder<> Builder{Ctx}; @@ -517,7 +485,9 @@ continue; if (auto *Const = dyn_cast(Op)) { - Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy); + Constant *NewConst = SafeWrap.contains(I) + ? ConstantExpr::getSExt(Const, ExtTy) + : ConstantExpr::getZExt(Const, ExtTy); I->setOperand(i, NewConst); } else if (isa(Op)) I->setOperand(i, UndefValue::get(ExtTy)); @@ -678,10 +648,6 @@ TruncTysMap[Trunc].push_back(Trunc->getDestTy()); } - // Convert adds using negative immediates to equivalent instructions that use - // positive constants. - PrepareWrappingAdds(); - // Insert zext instructions between sources and their users. ExtendSources(); @@ -797,7 +763,7 @@ return false; LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from " - << TypeSize << " bits to " << PromotedWidth << "\n"); + << TypeSize << " bits to " << PromotedWidth << "\n"); SetVector WorkList; SetVector Sources; diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll --- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -131,8 +131,7 @@ ; CHECK-LABEL: test8_5: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub w8, w0, #123 -; CHECK-NEXT: and w8, w8, #0xff -; CHECK-NEXT: cmp w8, #150 +; CHECK-NEXT: cmn w8, #106 ; CHECK-NEXT: cset w0, hi ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll --- a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll +++ b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll @@ -173,13 +173,13 @@ ret i32 %res } -define i32 @unsafe_sub_underflow(i8 zeroext %a) { -; CHECK-LABEL: unsafe_sub_underflow: +; This is valid so long as the icmp immediate is sext. +define i32 @sext_sub_underflow(i8 zeroext %a) { +; CHECK-LABEL: sext_sub_underflow: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w9, w0, #6 ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: and w9, w9, #0xff -; CHECK-NEXT: cmp w9, #250 +; CHECK-NEXT: cmn w9, #6 ; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret @@ -217,13 +217,13 @@ ret i32 %res } -define i32 @unsafe_sub_underflow_neg(i8 zeroext %a) { -; CHECK-LABEL: unsafe_sub_underflow_neg: +; This is valid so long as the icmp immediate is sext. +define i32 @sext_sub_underflow_neg(i8 zeroext %a) { +; CHECK-LABEL: sext_sub_underflow_neg: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w9, w0, #4 ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: and w9, w9, #0xff -; CHECK-NEXT: cmp w9, #253 +; CHECK-NEXT: cmn w9, #3 ; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret diff --git a/llvm/test/Transforms/TypePromotion/ARM/casts.ll b/llvm/test/Transforms/TypePromotion/ARM/casts.ll --- a/llvm/test/Transforms/TypePromotion/ARM/casts.ll +++ b/llvm/test/Transforms/TypePromotion/ARM/casts.ll @@ -138,8 +138,8 @@ ; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[TMP2]], 1 ; CHECK-NEXT: [[ADD0:%.*]] = add nuw nsw i32 [[MUL]], 6 ; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[ARG:%.*]], [[ADD0]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[TMP3]], 3 +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[ADD1]], 3 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] ; CHECK-NEXT: ret i1 [[OR]] ; diff --git a/llvm/test/Transforms/TypePromotion/ARM/clear-structures.ll b/llvm/test/Transforms/TypePromotion/ARM/clear-structures.ll --- a/llvm/test/Transforms/TypePromotion/ARM/clear-structures.ll +++ b/llvm/test/Transforms/TypePromotion/ARM/clear-structures.ll @@ -39,9 +39,9 @@ ; CHECK-NEXT: [[INCDEC_PTR23:%.*]] = getelementptr inbounds i8, i8* [[FMT_ADDR_0_PN]], i32 2 ; CHECK-NEXT: [[DOTPR74:%.*]] = load i8, i8* [[INCDEC_PTR23]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[DOTPR74]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 48 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 10 -; CHECK-NEXT: br i1 [[TMP5]], label [[WHILE_COND24:%.*]], label [[COND_END]] +; CHECK-NEXT: [[DOTPR74_OFF:%.*]] = add i32 [[TMP3]], -48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[DOTPR74_OFF]], 10 +; CHECK-NEXT: br i1 [[TMP4]], label [[WHILE_COND24:%.*]], label [[COND_END]] ; CHECK: while.cond24: ; CHECK-NEXT: br label [[WHILE_COND24]] ; CHECK: cond.end: diff --git a/llvm/test/Transforms/TypePromotion/ARM/icmps.ll b/llvm/test/Transforms/TypePromotion/ARM/icmps.ll --- a/llvm/test/Transforms/TypePromotion/ARM/icmps.ll +++ b/llvm/test/Transforms/TypePromotion/ARM/icmps.ll @@ -65,8 +65,8 @@ ; CHECK-LABEL: @test_ugt_1_dec_imm( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ADD]], 1 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47 ; CHECK-NEXT: ret i32 [[RES]] ; @@ -193,8 +193,8 @@ ; CHECK-LABEL: @ugt_1_dec_imm( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ADD]], 1 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47 ; CHECK-NEXT: ret i32 [[RES]] ; @@ -305,8 +305,9 @@ ; CHECK-LABEL: @icmp_minus_imm( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[A:%.*]], align 1 -; CHECK-NEXT: [[ADD_I:%.*]] = add i8 [[TMP0]], -7 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[ADD_I]], -5 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP1]], -7 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ADD_I]], -5 ; CHECK-NEXT: [[CONV1:%.*]] = zext i1 [[CMP]] to i32 ; CHECK-NEXT: ret i32 [[CONV1]] ; diff --git a/llvm/test/Transforms/TypePromotion/ARM/wrapping.ll b/llvm/test/Transforms/TypePromotion/ARM/wrapping.ll --- a/llvm/test/Transforms/TypePromotion/ARM/wrapping.ll +++ b/llvm/test/Transforms/TypePromotion/ARM/wrapping.ll @@ -102,8 +102,9 @@ define i32 @unsafe_add_underflow(i8 zeroext %a) { ; CHECK-LABEL: @unsafe_add_underflow( -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[A:%.*]], -2 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[ADD]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], -2 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ADD]], -2 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16 ; CHECK-NEXT: ret i32 [[RES]] ; @@ -116,8 +117,8 @@ define i32 @safe_add_underflow(i8 zeroext %a) { ; CHECK-LABEL: @safe_add_underflow( ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP2]], 254 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ADD]], 254 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16 ; CHECK-NEXT: ret i32 [[RES]] ; @@ -130,8 +131,8 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: @safe_add_underflow_neg( ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP2]], 250 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], -2 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[ADD]], 250 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16 ; CHECK-NEXT: ret i32 [[RES]] ; @@ -154,10 +155,12 @@ ret i32 %res } -define i32 @unsafe_sub_underflow(i8 zeroext %a) { -; CHECK-LABEL: @unsafe_sub_underflow( -; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A:%.*]], 6 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[SUB]], -6 +; This is valid so long as the icmp immediate is sext. +define i32 @sext_sub_underflow(i8 zeroext %a) { +; CHECK-LABEL: @sext_sub_underflow( +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32 +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], -6 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16 ; CHECK-NEXT: ret i32 [[RES]] ; @@ -195,10 +198,12 @@ ret i32 %res } -define i32 @unsafe_sub_underflow_neg(i8 zeroext %a) { -; CHECK-LABEL: @unsafe_sub_underflow_neg( -; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A:%.*]], 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[SUB]], -3 +; This is valid so long as the icmp immediate is sext. +define i32 @sext_sub_underflow_neg(i8 zeroext %a) { +; CHECK-LABEL: @sext_sub_underflow_neg( +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32 +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SUB]], -3 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16 ; CHECK-NEXT: ret i32 [[RES]] ; @@ -288,12 +293,12 @@ ; CHECK-NEXT: [[SHL:%.*]] = or i32 [[TMP1]], 1 ; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[SHL]], 10 ; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[ADD]], 60 -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 [[SHL]], 40 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[TMP2]], 20 +; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[SHL]], -40 +; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[SUB]], 20 ; CHECK-NEXT: [[MASK_SEL:%.*]] = select i1 [[CMP_1]], i32 [[MASK_0]], i32 [[MASK_1]] ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP_0]], i32 [[MASK_SEL]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[RES]] to i8 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[RES]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] ; %mask.0 = and i8 %arg, 1 %mask.1 = and i8 %arg, 2