Index: llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -126,7 +126,7 @@ SmallPtrSetImpl &SafeToPromote); void TruncateSinks(SmallPtrSetImpl &Sources, SmallPtrSetImpl &Sinks); - void Cleanup(SmallPtrSetImpl &Sinks); + void Cleanup(SmallPtrSetImpl &Visited); public: IRPromoter(Module *M) : M(M), Ctx(M->getContext()), @@ -180,6 +180,18 @@ Opc == Instruction::SRem; } +static bool EqualTypeSize(Value *V) { + return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize; +} + +static bool LessOrEqualTypeSize(Value *V) { + return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize; +} + +static bool GreaterThanTypeSize(Value *V) { + return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize; +} + /// Some instructions can use 8- and 16-bit operands, and we don't need to /// promote anything larger. We disallow booleans to make life easier when /// dealing with icmps but allow any other integer that is <= 16 bits. Void @@ -194,11 +206,10 @@ if (auto *Ld = dyn_cast(V)) Ty = cast(Ld->getPointerOperandType())->getElementType(); - const IntegerType *IntTy = dyn_cast(Ty); - if (!IntTy) + if (!isa(Ty)) return false; - return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize; + return LessOrEqualTypeSize(V); } /// Return true if the given value is a source in the use-def chain, producing @@ -221,7 +232,7 @@ else if (auto *Call = dyn_cast(V)) return Call->hasRetAttr(Attribute::AttrKind::ZExt); else if (auto *Trunc = dyn_cast(V)) - return isSupportedType(Trunc); + return EqualTypeSize(Trunc); return false; } @@ -232,18 +243,15 @@ // TODO The truncate also isn't actually necessary because we would already // proved that the data value is kept within the range of the original data // type. - auto UsesNarrowValue = [](Value *V) { - return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize; - }; if (auto *Store = dyn_cast(V)) - return UsesNarrowValue(Store->getValueOperand()); + return LessOrEqualTypeSize(Store->getValueOperand()); if (auto *Return = dyn_cast(V)) - return UsesNarrowValue(Return->getReturnValue()); + return LessOrEqualTypeSize(Return->getReturnValue()); if (auto *Trunc = dyn_cast(V)) - return UsesNarrowValue(Trunc->getOperand(0)); + return EqualTypeSize(Trunc->getOperand(0)); if (auto *ZExt = dyn_cast(V)) - return UsesNarrowValue(ZExt->getOperand(0)); + return GreaterThanTypeSize(ZExt); if (auto *ICmp = dyn_cast(V)) return ICmp->isSigned(); @@ -649,36 +657,37 @@ } } } - } -void IRPromoter::Cleanup(SmallPtrSetImpl &Sinks) { - // Some zext sinks will now have become redundant, along with their trunc - // operands, so remove them. - for (auto I : Sinks) { - if (auto *ZExt = dyn_cast(I)) { - if (ZExt->getDestTy() != ExtTy) - continue; - - Value *Src = ZExt->getOperand(0); - if (ZExt->getSrcTy() == ZExt->getDestTy()) { - LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary zext\n"); - ReplaceAllUsersOfWith(ZExt, Src); - InstsToRemove.push_back(ZExt); - continue; - } +void IRPromoter::Cleanup(SmallPtrSetImpl &Visited) { + // Some zexts will now have become redundant, along with their trunc + // operands, so remove them + for (auto V : Visited) { + if (!isa(V)) + continue; + + auto ZExt = cast(V); + if (ZExt->getDestTy() != ExtTy) + continue; + + Value *Src = ZExt->getOperand(0); + if (ZExt->getSrcTy() == ZExt->getDestTy()) { + LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast.\n"); + ReplaceAllUsersOfWith(ZExt, Src); + InstsToRemove.push_back(ZExt); + continue; + } - // For any truncs that we insert to handle zexts, we can replace the - // result of the zext with the input to the trunc. - if (NewInsts.count(Src) && isa(Src)) { - auto *Trunc = cast(Src); - assert(Trunc->getOperand(0)->getType() == ExtTy && - "expected inserted trunc to be operating on i32"); - LLVM_DEBUG(dbgs() << "ARM CGP: Replacing zext with trunc operand: " - << *Trunc->getOperand(0)); - ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0)); - InstsToRemove.push_back(ZExt); - } + // For any truncs that we insert to handle zexts, we can replace the + // result of the zext with the input to the trunc. + if (NewInsts.count(Src) && isa(Src)) { + auto *Trunc = cast(Src); + assert(Trunc->getOperand(0)->getType() == ExtTy && + "expected inserted trunc to be operating on i32"); + LLVM_DEBUG(dbgs() << "ARM CGP: Replacing zext with trunc operand: " + << *Trunc->getOperand(0)); + ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0)); + InstsToRemove.push_back(ZExt); } } @@ -728,18 +737,9 @@ // Finally, remove unecessary zexts and truncs, delete old instructions and // clear the data structures. - Cleanup(Sinks); + Cleanup(Visited); - LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete:\n"); - LLVM_DEBUG(dbgs(); - for (auto *V : Sources) - V->dump(); - for (auto *I : NewInsts) - I->dump(); - for (auto *V : Visited) { - if (!Sources.count(V)) - V->dump(); - }); + LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n"); } /// We accept most instructions, as well as Arguments and ConstantInsts. We @@ -747,8 +747,15 @@ /// return value is zeroext. We don't allow opcodes that can introduce sign /// bits. bool ARMCodeGenPrepare::isSupportedValue(Value *V) { - if (isa(V)) - return true; + if (auto *I = dyn_cast(V)) { + // Now that we allow small types than TypeSize, only allow icmp of + // TypeSize because they will require a trunc to be legalised. + // TODO: Allow icmp of smaller types, and calculate at the end + // whether the transform would be beneficial. + if (isa(I->getOperand(0)->getType())) + return true; + return EqualTypeSize(I->getOperand(0)); + } // Memory instructions if (isa(V) || isa(V)) @@ -766,12 +773,11 @@ isa(V)) return isSupportedType(V); - // Truncs can be either sources or sinks. - if (auto *Trunc = dyn_cast(V)) - return isSupportedType(Trunc) || isSupportedType(Trunc->getOperand(0)); + if (isa(V)) + return false; - if (isa(V) && !isa(V)) - return isSupportedType(cast(V)->getOperand(0)); + if (auto *Cast = dyn_cast(V)) + return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0)); // Special cases for calls as we need to check for zeroext // TODO We should accept calls even if they don't have zeroext, as they can @@ -901,13 +907,17 @@ // Calls can be both sources and sinks. if (isSink(V)) Sinks.insert(cast(V)); + if (isSource(V)) Sources.insert(V); - else if (auto *I = dyn_cast(V)) { - // Visit operands of any instruction visited. - for (auto &U : I->operands()) { - if (!AddLegalInst(U)) - return false; + + if (!isSink(V) && !isSource(V)) { + if (auto *I = dyn_cast(V)) { + // Visit operands of any instruction visited. + for (auto &U : I->operands()) { + if (!AddLegalInst(U)) + return false; + } } } @@ -973,6 +983,8 @@ if (CI.isSigned() || !isa(CI.getOperand(0)->getType())) continue; + LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n"); + for (auto &Op : CI.operands()) { if (auto *I = dyn_cast(Op)) MadeChange |= TryToPromote(I); Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll @@ -22,12 +22,10 @@ ret i16 %conv } -; Test that the transformation bails when it finds that i16 is larger than i8. -; TODO: We should be able to remove the uxtb in these cases. ; CHECK-LABEL: promote_i8_sink_i16_1 ; CHECK: bl dummy_i8 ; CHECK: add{{.*}} r0, #1 -; CHECK: uxtb r0, r0 +; CHECK-NOT: uxt ; CHECK: cmp r0 define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) { %call = tail call zeroext i8 @dummy_i8(i8 %arg0) Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll @@ -340,8 +340,8 @@ @sh1 = hidden local_unnamed_addr global i16 0, align 2 @d_sh = hidden local_unnamed_addr global [16 x i16] zeroinitializer, align 2 -; CHECK-LABEL: two_stage_zext_trunc_mix -; CHECK-NOT: uxt +; CHECK-COMMON-LABEL: two_stage_zext_trunc_mix +; CHECK-COMMON-NOT: uxt define i8* @two_stage_zext_trunc_mix(i32* %this, i32 %__pos1, i32 %__n1, i32** %__str, i32 %__pos2, i32 %__n2) { entry: %__size_.i.i.i.i = bitcast i32** %__str to i8* @@ -363,3 +363,222 @@ %res = select i1 %tobool.i.i.i.i.i, i8* %7, i8* %8 ret i8* %res } + +; CHECK-COMMON-LABEL: search_through_zext_1 +; CHECK-COMMON-NOT: uxt +define i8 @search_through_zext_1(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c) { +entry: + %add = add nuw i8 %a, %b + %conv = zext i8 %add to i16 + %cmp = icmp ult i16 %conv, %c + br i1 %cmp, label %if.then, label %if.end + +if.then: + %sub = sub nuw i8 %b, %a + %conv2 = zext i8 %sub to i16 + %cmp2 = icmp ugt i16 %conv2, %c + %res = select i1 %cmp2, i8 %a, i8 %b + br label %if.end + +if.end: + %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] + ret i8 %retval +} + +; TODO: We should be able to remove the uxtb here. The transform fails because +; the icmp ugt uses an i32, which is too large... but this doesn't matter +; because it won't be writing a large value to a register as a result. +; CHECK-COMMON-LABEL: search_through_zext_2 +; CHECK-COMMON: uxtb +; CHECK-COMMON: uxtb +define i8 @search_through_zext_2(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) { +entry: + %add = add nuw i8 %a, %b + %conv = zext i8 %add to i16 + %cmp = icmp ult i16 %conv, %c + br i1 %cmp, label %if.then, label %if.end + +if.then: + %sub = sub nuw i8 %b, %a + %conv2 = zext i8 %sub to i32 + %cmp2 = icmp ugt i32 %conv2, %d + %res = select i1 %cmp2, i8 %a, i8 %b + br label %if.end + +if.end: + %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] + ret i8 %retval +} + +; TODO: We should be able to remove the uxtb here as all the calculations are +; performed on i8s. The promotion of i8 to i16 and then the later truncation +; results in the uxtb. +; CHECK-COMMON-LABEL: search_through_zext_3 +; CHECK-COMMON: uxtb +; CHECK-COMMON: uxtb +define i8 @search_through_zext_3(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) { +entry: + %add = add nuw i8 %a, %b + %conv = zext i8 %add to i16 + %cmp = icmp ult i16 %conv, %c + br i1 %cmp, label %if.then, label %if.end + +if.then: + %trunc = trunc i16 %conv to i8 + %sub = sub nuw i8 %b, %trunc + %conv2 = zext i8 %sub to i32 + %cmp2 = icmp ugt i32 %conv2, %d + %res = select i1 %cmp2, i8 %a, i8 %b + br label %if.end + +if.end: + %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] + ret i8 %retval +} + +; TODO: We should be able to remove the uxt that gets introduced for %conv2 +; CHECK-COMMON-LABEL: search_through_zext_cmp +; CHECK-COMMON: uxt +define i8 @search_through_zext_cmp(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c) { +entry: + %cmp = icmp ne i8 %a, %b + %conv = zext i1 %cmp to i16 + %cmp1 = icmp ult i16 %conv, %c + br i1 %cmp1, label %if.then, label %if.end + +if.then: + %sub = sub nuw i8 %b, %a + %conv2 = zext i8 %sub to i16 + %cmp3 = icmp ugt i16 %conv2, %c + %res = select i1 %cmp3, i8 %a, i8 %b + br label %if.end + +if.end: + %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] + ret i8 %retval +} + +; CHECK-COMMON-LABEL: search_through_zext_load +; CHECK-COMMON-NOT: uxt +define i8 @search_through_zext_load(i8* %a, i8 zeroext %b, i16 zeroext %c) { +entry: + %load = load i8, i8* %a + %conv = zext i8 %load to i16 + %cmp1 = icmp ult i16 %conv, %c + br i1 %cmp1, label %if.then, label %if.end + +if.then: + %sub = sub nuw i8 %b, %load + %conv2 = zext i8 %sub to i16 + %cmp3 = icmp ugt i16 %conv2, %c + %res = select i1 %cmp3, i8 %load, i8 %b + br label %if.end + +if.end: + %retval = phi i8 [ 0, %entry ], [ %res, %if.then ] + ret i8 %retval +} + +; CHECK-COMMON-LABEL: trunc_sink_less_than +; CHECK-COMMON-NOT: uxth +; CHECK-COMMON: cmp +; CHECK-COMMON: uxtb +define i16 @trunc_sink_less_than_cmp(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d) { +entry: + %sub = sub nuw i16 %b, %a + %cmp = icmp ult i16 %sub, %c + br i1 %cmp, label %if.then, label %if.end + +if.then: + %trunc = trunc i16 %sub to i8 + %add = add nuw i8 %d, 1 + %cmp2 = icmp ugt i8 %trunc, %add + %res = select i1 %cmp2, i16 %a, i16 %b + br label %if.end + +if.end: + %retval = phi i16 [ 0, %entry ], [ %res, %if.then ] + ret i16 %retval +} + +; TODO: We should be able to remove the uxth introduced to handle %sub +; CHECK-COMMON-LABEL: trunc_sink_less_than_arith +; CHECK-COMMON: uxth +; CHECK-COMMON: cmp +; CHECK-COMMON: uxtb +define i16 @trunc_sink_less_than_arith(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) { +entry: + %sub = sub nuw i16 %b, %a + %cmp = icmp ult i16 %sub, %c + br i1 %cmp, label %if.then, label %if.end + +if.then: + %trunc = trunc i16 %sub to i8 + %add = add nuw i8 %d, %trunc + %cmp2 = icmp ugt i8 %e, %add + %res = select i1 %cmp2, i16 %a, i16 %b + br label %if.end + +if.end: + %retval = phi i16 [ 0, %entry ], [ %res, %if.then ] + ret i16 %retval +} + +; CHECK-COMMON-LABEL: trunc_sink_less_than_store +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: cmp +; CHECK-COMMON-NOT: uxt +define i16 @trunc_sink_less_than_store(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8* %e) { +entry: + %sub = sub nuw i16 %b, %a + %cmp = icmp ult i16 %sub, %c + br i1 %cmp, label %if.then, label %if.end + +if.then: + %trunc = trunc i16 %sub to i8 + %add = add nuw i8 %d, %trunc + store i8 %add, i8* %e + br label %if.end + +if.end: + %retval = phi i16 [ 0, %entry ], [ %sub, %if.then ] + ret i16 %retval +} + +; CHECK-COMMON-LABEL: trunc_sink_less_than_ret +; CHECK-COMMON-NOT: uxt +define i8 @trunc_sink_less_than_ret(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) { +entry: + %sub = sub nuw i16 %b, %a + %cmp = icmp ult i16 %sub, %c + br i1 %cmp, label %if.then, label %if.end + +if.then: + %trunc = trunc i16 %sub to i8 + %add = add nuw i8 %d, %trunc + br label %if.end + +if.end: + %retval = phi i8 [ 0, %entry ], [ %add, %if.then ] + ret i8 %retval +} + +; CHECK-COMMON-LABEL: trunc_sink_less_than_zext_ret +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: cmp +; CHECK-COMMON: uxtb +define zeroext i8 @trunc_sink_less_than_zext_ret(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) { +entry: + %sub = sub nuw i16 %b, %a + %cmp = icmp ult i16 %sub, %c + br i1 %cmp, label %if.then, label %if.end + +if.then: + %trunc = trunc i16 %sub to i8 + %add = add nuw i8 %d, %trunc + br label %if.end + +if.end: + %retval = phi i8 [ 0, %entry ], [ %add, %if.then ] + ret i8 %retval +} Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll @@ -11,18 +11,17 @@ ; CHECK-NODSP: sxtb ; CHECK-NODSP: cmp -; CHECK-DSP: add -; CHECK-DSP: uxtb +; CHECK-DSP: uadd8 +; CHECK-DSP: sub ; CHECK-DSP: cmp ; CHECK-DSP: sxtb -; CHECK-DSP: sub ; CHECK-DSP: sxtb ; CHECK-DSP: cmp ; CHECK-DSP-IMM: uadd8 [[ADD:r[0-9]+]], ; CHECK-DSP-IMM: cmp [[ADD]], +; CHECK-DSP-IMM: subs [[SUB:r[0-9]+]], ; CHECK-DSP-IMM: sxtb [[SEXT0:r[0-9]+]], [[ADD]] -; CHECK-DSP-IMM: usub8 [[SUB:r[0-9]+]], ; CHECK-DSP-IMM: sxtb [[SEXT1:r[0-9]+]], [[SUB]] ; CHECK-DSP-IMM: cmp [[SEXT1]], [[SEXT0]] define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) {