Skip to content

Commit 3141bbd

Browse files
committedMay 21, 2019
[ARM][CGP] Skip nuw in PrepareConstants
PrepareConstants step converts add/sub with 'negative' immediates to sub/add with a 'positive' imm to make promotion more simple. nuw already states that the add shouldn't cause an unsigned wrap, so it shouldn't need any tweaking. Plus, we also don't allow a sub with a 'negative' immediate to be safe wrap, so this functionality has been removed. The PrepareConstants step now just handles the add instructions that we've determined would be safe if they wrap around zero. Differential Revision: https://reviews.llvm.org/D62057 llvm-svn: 361227
1 parent 295c19e commit 3141bbd

File tree

2 files changed

+85
-75
lines changed

2 files changed

+85
-75
lines changed
 

‎llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp

Lines changed: 52 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,10 @@ class IRPromoter {
123123
SmallPtrSetImpl<Value*> *Sources;
124124
SmallPtrSetImpl<Instruction*> *Sinks;
125125
SmallPtrSetImpl<Instruction*> *SafeToPromote;
126+
SmallPtrSetImpl<Instruction*> *SafeWrap;
126127

127128
void ReplaceAllUsersOfWith(Value *From, Value *To);
128-
void PrepareConstants(void);
129+
void PrepareWrappingAdds(void);
129130
void ExtendSources(void);
130131
void ConvertTruncs(void);
131132
void PromoteTree(void);
@@ -141,16 +142,18 @@ class IRPromoter {
141142
SetVector<Value*> &Visited,
142143
SmallPtrSetImpl<Value*> &Sources,
143144
SmallPtrSetImpl<Instruction*> &Sinks,
144-
SmallPtrSetImpl<Instruction*> &SafeToPromote);
145+
SmallPtrSetImpl<Instruction*> &SafeToPromote,
146+
SmallPtrSetImpl<Instruction*> &SafeWrap);
145147
};
146148

147149
class ARMCodeGenPrepare : public FunctionPass {
148150
const ARMSubtarget *ST = nullptr;
149151
IRPromoter *Promoter = nullptr;
150152
std::set<Value*> AllVisited;
151153
SmallPtrSet<Instruction*, 8> SafeToPromote;
154+
SmallPtrSet<Instruction*, 4> SafeWrap;
152155

153-
bool isSafeOverflow(Instruction *I);
156+
bool isSafeWrap(Instruction *I);
154157
bool isSupportedValue(Value *V);
155158
bool isLegalToPromote(Value *V);
156159
bool TryToPromote(Value *V);
@@ -278,19 +281,14 @@ static bool isSink(Value *V) {
278281
return isa<CallInst>(V);
279282
}
280283

281-
/// Return whether the instruction can be promoted within any modifications to
282-
/// its operands or result.
283-
bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
284-
// FIXME Do we need NSW too?
285-
if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
286-
return true;
287-
288-
// We can support a, potentially, overflowing instruction (I) if:
284+
/// Return whether this instruction can safely wrap.
285+
bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
286+
// We can support a, potentially, wrapping instruction (I) if:
289287
// - It is only used by an unsigned icmp.
290288
// - The icmp uses a constant.
291-
// - The overflowing value (I) is decreasing, i.e would underflow - wrapping
289+
// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
292290
// around zero to become a larger number than before.
293-
// - The underflowing instruction (I) also uses a constant.
291+
// - The wrapping instruction (I) also uses a constant.
294292
//
295293
// We can then use the two constants to calculate whether the result would
296294
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
@@ -392,6 +390,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
392390
return false;
393391

394392
LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
393+
SafeWrap.insert(I);
395394
return true;
396395
}
397396

@@ -415,13 +414,16 @@ static bool shouldPromote(Value *V) {
415414
/// Return whether we can safely mutate V's type to ExtTy without having to be
416415
/// concerned with zero extending or truncation.
417416
static bool isPromotedResultSafe(Value *V) {
417+
if (GenerateSignBits(V))
418+
return false;
419+
418420
if (!isa<Instruction>(V))
419421
return true;
420422

421-
if (GenerateSignBits(V))
422-
return false;
423+
if (!isa<OverflowingBinaryOperator>(V))
424+
return true;
423425

424-
return !isa<OverflowingBinaryOperator>(V);
426+
return cast<Instruction>(V)->hasNoUnsignedWrap();
425427
}
426428

427429
/// Return the intrinsic for the instruction that can perform the same
@@ -469,61 +471,34 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
469471
InstsToRemove.insert(I);
470472
}
471473

472-
void IRPromoter::PrepareConstants() {
474+
void IRPromoter::PrepareWrappingAdds() {
475+
LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n");
473476
IRBuilder<> Builder{Ctx};
474-
// First step is to prepare the instructions for mutation. Most constants
475-
// just need to be zero extended into their new type, but complications arise
476-
// because:
477-
// - For nuw binary operators, negative immediates would need sign extending;
478-
// however, instead we'll change them to positive and zext them. We can do
479-
// this because:
480-
// > The operators that can wrap are: add, sub, mul and shl.
481-
// > shl interprets its second operand as unsigned and if the first operand
482-
// is an immediate, it will need zext to be nuw.
483-
// > I'm assuming mul has to interpret immediates as unsigned for nuw.
484-
// > Which leaves the nuw add and sub to be handled; as with shl, if an
485-
// immediate is used as operand 0, it will need zext to be nuw.
486-
// - We also allow add and sub to safely overflow in certain circumstances
487-
// and only when the value (operand 0) is being decreased.
488-
//
489-
// For adds and subs, that are either nuw or safely wrap and use a negative
490-
// immediate as operand 1, we create an equivalent instruction using a
491-
// positive immediate. That positive immediate can then be zext along with
492-
// all the other immediates later.
493-
for (auto *V : *Visited) {
494-
if (!isa<Instruction>(V))
495-
continue;
496477

497-
auto *I = cast<Instruction>(V);
498-
if (SafeToPromote->count(I)) {
499-
500-
if (!isa<OverflowingBinaryOperator>(I))
501-
continue;
502-
503-
if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) {
504-
if (!Const->isNegative())
505-
continue;
478+
// For adds that safely wrap and use a negative immediate as operand 1, we
479+
// create an equivalent instruction using a positive immediate.
480+
// That positive immediate can then be zext along with all the other
481+
// immediates later.
482+
for (auto *I : *SafeWrap) {
483+
if (I->getOpcode() != Instruction::Add)
484+
continue;
506485

507-
unsigned Opc = I->getOpcode();
508-
if (Opc != Instruction::Add && Opc != Instruction::Sub)
509-
continue;
486+
LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
487+
assert((isa<ConstantInt>(I->getOperand(1)) &&
488+
cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
489+
"Wrapping should have a negative immediate as the second operand");
510490

511-
LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
512-
auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
513-
Builder.SetInsertPoint(I);
514-
Value *NewVal = Opc == Instruction::Sub ?
515-
Builder.CreateAdd(I->getOperand(0), NewConst) :
516-
Builder.CreateSub(I->getOperand(0), NewConst);
517-
LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
518-
519-
if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
520-
NewInst->copyIRFlags(I);
521-
NewInsts.insert(NewInst);
522-
}
523-
InstsToRemove.insert(I);
524-
I->replaceAllUsesWith(NewVal);
525-
}
491+
auto Const = cast<ConstantInt>(I->getOperand(1));
492+
auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
493+
Builder.SetInsertPoint(I);
494+
Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
495+
if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
496+
NewInst->copyIRFlags(I);
497+
NewInsts.insert(NewInst);
526498
}
499+
InstsToRemove.insert(I);
500+
I->replaceAllUsesWith(NewVal);
501+
LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
527502
}
528503
for (auto *I : NewInsts)
529504
Visited->insert(I);
@@ -731,6 +706,8 @@ void IRPromoter::Cleanup() {
731706
NewInsts.clear();
732707
TruncTysMap.clear();
733708
Promoted.clear();
709+
SafeToPromote->clear();
710+
SafeWrap->clear();
734711
}
735712

736713
void IRPromoter::ConvertTruncs() {
@@ -762,7 +739,8 @@ void IRPromoter::Mutate(Type *OrigTy,
762739
SetVector<Value*> &Visited,
763740
SmallPtrSetImpl<Value*> &Sources,
764741
SmallPtrSetImpl<Instruction*> &Sinks,
765-
SmallPtrSetImpl<Instruction*> &SafeToPromote) {
742+
SmallPtrSetImpl<Instruction*> &SafeToPromote,
743+
SmallPtrSetImpl<Instruction*> &SafeWrap) {
766744
LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
767745
<< ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
768746

@@ -775,6 +753,7 @@ void IRPromoter::Mutate(Type *OrigTy,
775753
this->Sources = &Sources;
776754
this->Sinks = &Sinks;
777755
this->SafeToPromote = &SafeToPromote;
756+
this->SafeWrap = &SafeWrap;
778757

779758
// Cache original types of the values that will likely need truncating
780759
for (auto *I : Sinks) {
@@ -797,9 +776,9 @@ void IRPromoter::Mutate(Type *OrigTy,
797776
TruncTysMap[Trunc].push_back(Trunc->getDestTy());
798777
}
799778

800-
// Convert adds and subs using negative immediates to equivalent instructions
801-
// that use positive constants.
802-
PrepareConstants();
779+
// Convert adds using negative immediates to equivalent instructions that use
780+
// positive constants.
781+
PrepareWrappingAdds();
803782

804783
// Insert zext instructions between sources and their users.
805784
ExtendSources();
@@ -889,7 +868,7 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
889868
if (SafeToPromote.count(I))
890869
return true;
891870

892-
if (isPromotedResultSafe(V) || isSafeOverflow(I)) {
871+
if (isPromotedResultSafe(V) || isSafeWrap(I)) {
893872
SafeToPromote.insert(I);
894873
return true;
895874
}
@@ -1025,7 +1004,8 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
10251004
if (ToPromote < 2)
10261005
return false;
10271006

1028-
Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote);
1007+
Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote,
1008+
SafeWrap);
10291009
return true;
10301010
}
10311011

‎llvm/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s
1+
; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -mattr=-use-misched %s -arm-disable-cgp=false -o - | FileCheck %s
22

33
; CHECK: overflow_add
44
; CHECK: add
@@ -194,7 +194,7 @@ entry:
194194
}
195195

196196
; CHECK-LABEL: safe_sub_var_imm
197-
; CHECK: add.w [[ADD:r[0-9]+]], r0, #8
197+
; CHECK: sub.w [[ADD:r[0-9]+]], r0, #248
198198
; CHECK-NOT: uxt
199199
; CHECK: cmp [[ADD]], #252
200200
define i32 @safe_sub_var_imm(i8* %b) {
@@ -220,7 +220,7 @@ entry:
220220
}
221221

222222
; CHECK-LABEL: safe_add_var_imm
223-
; CHECK: sub.w [[SUB:r[0-9]+]], r0, #127
223+
; CHECK: add.w [[SUB:r[0-9]+]], r0, #129
224224
; CHECK-NOT: uxt
225225
; CHECK: cmp [[SUB]], #127
226226
define i32 @safe_add_var_imm(i8* %b) {
@@ -248,3 +248,33 @@ define i8 @convert_add_order(i8 zeroext %arg) {
248248
%res = select i1 %cmp.0, i8 %mask.sel, i8 %arg
249249
ret i8 %res
250250
}
251+
252+
; CHECK-LABEL: underflow_if_sub
253+
; CHECK: add{{.}} [[ADD:r[0-9]+]], #245
254+
; CHECK: cmp [[ADD]], r1
255+
define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) {
256+
%cmp = icmp sgt i32 %arg, 0
257+
%conv = zext i1 %cmp to i32
258+
%and = and i32 %arg, %conv
259+
%trunc = trunc i32 %and to i8
260+
%conv1 = add nuw nsw i8 %trunc, -11
261+
%cmp.1 = icmp ult i8 %conv1, %arg1
262+
%res = select i1 %cmp.1, i8 %conv1, i8 100
263+
ret i8 %res
264+
}
265+
266+
; CHECK-LABEL: underflow_if_sub_signext
267+
; CHECK: uxtb [[UXT1:r[0-9]+]], r1
268+
; CHECK: sub{{.*}} [[SUB:r[0-9]+]], #11
269+
; CHECK: uxtb [[UXT_SUB:r[0-9]+]], [[SUB]]
270+
; CHECK: cmp{{.*}}[[UXT_SUB]]
271+
define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) {
272+
%cmp = icmp sgt i32 %arg, 0
273+
%conv = zext i1 %cmp to i32
274+
%and = and i32 %arg, %conv
275+
%trunc = trunc i32 %and to i8
276+
%conv1 = add nuw nsw i8 %trunc, -11
277+
%cmp.1 = icmp ugt i8 %arg1, %conv1
278+
%res = select i1 %cmp.1, i8 %conv1, i8 100
279+
ret i8 %res
280+
}

0 commit comments

Comments
 (0)
Please sign in to comment.