Index: llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -123,9 +123,10 @@ SmallPtrSetImpl *Sources; SmallPtrSetImpl *Sinks; SmallPtrSetImpl *SafeToPromote; + SmallPtrSetImpl *SafeWrap; void ReplaceAllUsersOfWith(Value *From, Value *To); - void PrepareConstants(void); + void PrepareWrappingAdds(void); void ExtendSources(void); void ConvertTruncs(void); void PromoteTree(void); @@ -141,7 +142,8 @@ SetVector &Visited, SmallPtrSetImpl &Sources, SmallPtrSetImpl &Sinks, - SmallPtrSetImpl &SafeToPromote); + SmallPtrSetImpl &SafeToPromote, + SmallPtrSetImpl &SafeWrap); }; class ARMCodeGenPrepare : public FunctionPass { @@ -149,8 +151,9 @@ IRPromoter *Promoter = nullptr; std::set AllVisited; SmallPtrSet SafeToPromote; + SmallPtrSet SafeWrap; - bool isSafeOverflow(Instruction *I); + bool isSafeWrap(Instruction *I); bool isSupportedValue(Value *V); bool isLegalToPromote(Value *V); bool TryToPromote(Value *V); @@ -278,19 +281,14 @@ return isa(V); } -/// Return whether the instruction can be promoted within any modifications to -/// its operands or result. -bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) { - // FIXME Do we need NSW too? - if (isa(I) && I->hasNoUnsignedWrap()) - return true; - - // We can support a, potentially, overflowing instruction (I) if: +/// Return whether this instruction can safely wrap. +bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) { + // We can support a, potentially, wrapping instruction (I) if: // - It is only used by an unsigned icmp. // - The icmp uses a constant. - // - The overflowing value (I) is decreasing, i.e would underflow - wrapping + // - The wrapping value (I) is decreasing, i.e would underflow - wrapping // around zero to become a larger number than before. - // - The underflowing instruction (I) also uses a constant. + // - The wrapping instruction (I) also uses a constant. // // We can then use the two constants to calculate whether the result would // wrap in respect to itself in the original bitwidth. If it doesn't wrap, @@ -392,6 +390,7 @@ return false; LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n"); + SafeWrap.insert(I); return true; } @@ -415,13 +414,16 @@ /// Return whether we can safely mutate V's type to ExtTy without having to be /// concerned with zero extending or truncation. static bool isPromotedResultSafe(Value *V) { + if (GenerateSignBits(V)) + return false; + if (!isa(V)) return true; - if (GenerateSignBits(V)) - return false; + if (!isa(V)) + return true; - return !isa(V); + return cast(V)->hasNoUnsignedWrap(); } /// Return the intrinsic for the instruction that can perform the same @@ -469,61 +471,34 @@ InstsToRemove.insert(I); } -void IRPromoter::PrepareConstants() { +void IRPromoter::PrepareWrappingAdds() { + LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n"); IRBuilder<> Builder{Ctx}; - // First step is to prepare the instructions for mutation. Most constants - // just need to be zero extended into their new type, but complications arise - // because: - // - For nuw binary operators, negative immediates would need sign extending; - // however, instead we'll change them to positive and zext them. We can do - // this because: - // > The operators that can wrap are: add, sub, mul and shl. - // > shl interprets its second operand as unsigned and if the first operand - // is an immediate, it will need zext to be nuw. - // > I'm assuming mul has to interpret immediates as unsigned for nuw. - // > Which leaves the nuw add and sub to be handled; as with shl, if an - // immediate is used as operand 0, it will need zext to be nuw. - // - We also allow add and sub to safely overflow in certain circumstances - // and only when the value (operand 0) is being decreased. - // - // For adds and subs, that are either nuw or safely wrap and use a negative - // immediate as operand 1, we create an equivalent instruction using a - // positive immediate. That positive immediate can then be zext along with - // all the other immediates later. - for (auto *V : *Visited) { - if (!isa(V)) - continue; - auto *I = cast(V); - if (SafeToPromote->count(I)) { - - if (!isa(I)) - continue; - - if (auto *Const = dyn_cast(I->getOperand(1))) { - if (!Const->isNegative()) - continue; + // For adds that safely wrap and use a negative immediate as operand 1, we + // create an equivalent instruction using a positive immediate. + // That positive immediate can then be zext along with all the other + // immediates later. + for (auto *I : *SafeWrap) { + if (I->getOpcode() != Instruction::Add) + continue; - unsigned Opc = I->getOpcode(); - if (Opc != Instruction::Add && Opc != Instruction::Sub) - continue; + LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n"); + assert((isa(I->getOperand(1)) && + cast(I->getOperand(1))->isNegative()) && + "Wrapping should have a negative immediate as the second operand"); - LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n"); - auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); - Builder.SetInsertPoint(I); - Value *NewVal = Opc == Instruction::Sub ? - Builder.CreateAdd(I->getOperand(0), NewConst) : - Builder.CreateSub(I->getOperand(0), NewConst); - LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n"); - - if (auto *NewInst = dyn_cast(NewVal)) { - NewInst->copyIRFlags(I); - NewInsts.insert(NewInst); - } - InstsToRemove.insert(I); - I->replaceAllUsesWith(NewVal); - } + auto Const = cast(I->getOperand(1)); + auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); + Builder.SetInsertPoint(I); + Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst); + if (auto *NewInst = dyn_cast(NewVal)) { + NewInst->copyIRFlags(I); + NewInsts.insert(NewInst); } + InstsToRemove.insert(I); + I->replaceAllUsesWith(NewVal); + LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n"); } for (auto *I : NewInsts) Visited->insert(I); @@ -731,6 +706,8 @@ NewInsts.clear(); TruncTysMap.clear(); Promoted.clear(); + SafeToPromote->clear(); + SafeWrap->clear(); } void IRPromoter::ConvertTruncs() { @@ -762,7 +739,8 @@ SetVector &Visited, SmallPtrSetImpl &Sources, SmallPtrSetImpl &Sinks, - SmallPtrSetImpl &SafeToPromote) { + SmallPtrSetImpl &SafeToPromote, + SmallPtrSetImpl &SafeWrap) { LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << ARMCodeGenPrepare::TypeSize << " to 32-bits\n"); @@ -775,6 +753,7 @@ this->Sources = &Sources; this->Sinks = &Sinks; this->SafeToPromote = &SafeToPromote; + this->SafeWrap = &SafeWrap; // Cache original types of the values that will likely need truncating for (auto *I : Sinks) { @@ -797,9 +776,9 @@ TruncTysMap[Trunc].push_back(Trunc->getDestTy()); } - // Convert adds and subs using negative immediates to equivalent instructions - // that use positive constants. - PrepareConstants(); + // Convert adds using negative immediates to equivalent instructions that use + // positive constants. + PrepareWrappingAdds(); // Insert zext instructions between sources and their users. ExtendSources(); @@ -889,7 +868,7 @@ if (SafeToPromote.count(I)) return true; - if (isPromotedResultSafe(V) || isSafeOverflow(I)) { + if (isPromotedResultSafe(V) || isSafeWrap(I)) { SafeToPromote.insert(I); return true; } @@ -1025,7 +1004,8 @@ if (ToPromote < 2) return false; - Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote); + Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote, + SafeWrap); return true; } Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -mattr=-use-misched %s -arm-disable-cgp=false -o - | FileCheck %s ; CHECK: overflow_add ; CHECK: add @@ -194,7 +194,7 @@ } ; CHECK-LABEL: safe_sub_var_imm -; CHECK: add.w [[ADD:r[0-9]+]], r0, #8 +; CHECK: sub.w [[ADD:r[0-9]+]], r0, #248 ; CHECK-NOT: uxt ; CHECK: cmp [[ADD]], #252 define i32 @safe_sub_var_imm(i8* %b) { @@ -220,7 +220,7 @@ } ; CHECK-LABEL: safe_add_var_imm -; CHECK: sub.w [[SUB:r[0-9]+]], r0, #127 +; CHECK: add.w [[SUB:r[0-9]+]], r0, #129 ; CHECK-NOT: uxt ; CHECK: cmp [[SUB]], #127 define i32 @safe_add_var_imm(i8* %b) { @@ -248,3 +248,33 @@ %res = select i1 %cmp.0, i8 %mask.sel, i8 %arg ret i8 %res } + +; CHECK-LABEL: underflow_if_sub +; CHECK: add{{.}} [[ADD:r[0-9]+]], #245 +; CHECK: cmp [[ADD]], r1 +define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) { + %cmp = icmp sgt i32 %arg, 0 + %conv = zext i1 %cmp to i32 + %and = and i32 %arg, %conv + %trunc = trunc i32 %and to i8 + %conv1 = add nuw nsw i8 %trunc, -11 + %cmp.1 = icmp ult i8 %conv1, %arg1 + %res = select i1 %cmp.1, i8 %conv1, i8 100 + ret i8 %res +} + +; CHECK-LABEL: underflow_if_sub_signext +; CHECK: uxtb [[UXT1:r[0-9]+]], r1 +; CHECK: sub{{.*}} [[SUB:r[0-9]+]], #11 +; CHECK: uxtb [[UXT_SUB:r[0-9]+]], [[SUB]] +; CHECK: cmp{{.*}}[[UXT_SUB]] +define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) { + %cmp = icmp sgt i32 %arg, 0 + %conv = zext i1 %cmp to i32 + %and = and i32 %arg, %conv + %trunc = trunc i32 %and to i8 + %conv1 = add nuw nsw i8 %trunc, -11 + %cmp.1 = icmp ugt i8 %arg1, %conv1 + %res = select i1 %cmp.1, i8 %conv1, i8 100 + ret i8 %res +}