Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -810,8 +810,9 @@ /// Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. - int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind) const; + int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, + TargetCostKind CostKind, + Instruction *Inst = nullptr) const; int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const; @@ -1461,7 +1462,8 @@ virtual int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) = 0; virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind) = 0; + Type *Ty, TargetCostKind CostKind, + Instruction *Inst = nullptr) = 0; virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) = 0; @@ -1850,9 +1852,10 @@ TargetCostKind CostKind) override { return Impl.getIntImmCost(Imm, Ty, CostKind); } - int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind) override { - return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind); + int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, + TargetCostKind CostKind, + Instruction *Inst = nullptr) override { + return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst); } int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) override { Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -314,7 +314,8 @@ } unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind) { + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr) { return TTI::TCC_Free; } Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -570,11 +570,11 @@ return Cost; } -int -TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) const { - int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); +int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) const { + int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -74,7 +74,8 @@ int getIntImmCost(int64_t Val); int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind); + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -84,7 +84,8 @@ int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind, + Instruction *Inst) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -126,7 +126,8 @@ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind); + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); /// @} Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -283,8 +283,10 @@ return 1; } -int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind) { +int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) { // Division by a constant can be turned into multiplication, but only if we // know it's constant. So it's not so much that the immediate is cheap (it's // not), but that the alternative is worse. @@ -323,6 +325,38 @@ if (Opcode == Instruction::Xor && Imm.isAllOnesValue()) return 0; + // Ensures constants of min(max()) or max(min()) patterns don't get hoisted + if (Inst && (ST->hasV6Ops() || ST->isThumb2()) && + Ty->getIntegerBitWidth() <= 32) { + Value *LHS, *RHS; + ConstantInt *C; + SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor; + + if (InstSPF == SPF_SMAX && + PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) && + C->getUniqueInteger() == Imm && Imm.isNegative() && + (-Imm).isPowerOf2()) { + + auto isSSatMin = [=](Value *MinInst) { + if (isa(MinInst)) { + Value *MinLHS, *MinRHS; + ConstantInt *MinC; + SelectPatternFlavor MinSPF = + matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor; + if (PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) && + MinC->getUniqueInteger() == ((-Imm) - 1) && MinSPF == SPF_SMIN) + return true; + } + return false; + }; + + if (isSSatMin(Inst->getOperand(1)) || + (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) || + isSSatMin(*(++Inst->user_begin()))))) + return 0; + } + } + return getIntImmCost(Imm, Ty, CostKind); } Index: llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h =================================================================== --- llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h +++ llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h @@ -67,7 +67,8 @@ } int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr) { return getIntImmCost(Imm, Ty, CostKind); } Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -52,7 +52,8 @@ TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind); + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -233,9 +233,10 @@ int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind, + Instruction *Inst) { if (DisablePPCConstHoist) - return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); + return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst); assert(Ty->isIntegerTy()); Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -42,8 +42,9 @@ TLI(ST->getTargetLowering()) {} int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); }; Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -30,8 +30,10 @@ getST()->is64Bit()); } -int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind) { +int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) { assert(Ty->isIntegerTy() && "getIntImmCost can only estimate cost of materialising integers"); Index: llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -41,7 +41,8 @@ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind); + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); Index: llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -64,8 +64,9 @@ } int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); Index: llvm/lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- llvm/lib/Target/X86/X86TargetTransformInfo.h +++ llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -203,8 +203,9 @@ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, Index: llvm/lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3841,8 +3841,10 @@ return std::max(1, Cost); } -int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind) { +int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); Index: llvm/lib/Transforms/Scalar/ConstantHoisting.cpp =================================================================== --- llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -366,9 +366,9 @@ ConstInt->getValue(), ConstInt->getType(), TargetTransformInfo::TCK_SizeAndLatency); else - Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(), - ConstInt->getType(), - TargetTransformInfo::TCK_SizeAndLatency); + Cost = TTI->getIntImmCostInst( + Inst->getOpcode(), Idx, ConstInt->getValue(), ConstInt->getType(), + TargetTransformInfo::TCK_SizeAndLatency, Inst); // Ignore cheap integer constants. if (Cost > TargetTransformInfo::TCC_Basic) { @@ -418,8 +418,9 @@ // usually lowered to a load from constant pool. Such operation is unlikely // to be cheaper than compute it by , which can be lowered to // an ADD instruction or folded into Load/Store instruction. - int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy, - TargetTransformInfo::TCK_SizeAndLatency); + int Cost = + TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy, + TargetTransformInfo::TCK_SizeAndLatency, Inst); ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; ConstCandMapType::iterator Itr; bool Inserted; Index: llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -1106,8 +1106,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB5_8 +; CHECK-NEXT: cbz r3, .LBB5_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB5_3 @@ -1139,23 +1138,16 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader21 -; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 -; CHECK-NEXT: movt r0, #65535 -; CHECK-NEXT: movw r1, #32767 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB5_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh r2, [r12], #2 -; CHECK-NEXT: ldrsh r3, [r6], #2 -; CHECK-NEXT: muls r2, r3, r2 -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp.w r0, r2, asr #15 -; CHECK-NEXT: it lt -; CHECK-NEXT: asrlt r3, r2, #15 -; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: csel r2, r3, r1, lt -; CHECK-NEXT: strh r2, [r4], #2 +; CHECK-NEXT: ldrsh r0, [r12], #2 +; CHECK-NEXT: ldrsh r1, [r6], #2 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: asrs r0, r0, #15 +; CHECK-NEXT: ssat r0, #16, r0 +; CHECK-NEXT: strh r0, [r4], #2 ; CHECK-NEXT: le lr, .LBB5_7 ; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -1280,23 +1272,16 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB6_6: @ %for.body.preheader21 -; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 -; CHECK-NEXT: movt r0, #65535 -; CHECK-NEXT: movw r1, #32767 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh r2, [r12], #2 -; CHECK-NEXT: ldrsh r3, [r6], #2 -; CHECK-NEXT: muls r2, r3, r2 -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp.w r0, r2, asr #15 -; CHECK-NEXT: it lt -; CHECK-NEXT: asrlt r3, r2, #15 -; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: csel r2, r3, r1, lt -; CHECK-NEXT: strh r2, [r4], #2 +; CHECK-NEXT: ldrsh r0, [r12], #2 +; CHECK-NEXT: ldrsh r1, [r6], #2 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: asrs r0, r0, #15 +; CHECK-NEXT: ssat r0, #16, r0 +; CHECK-NEXT: strh r0, [r4], #2 ; CHECK-NEXT: le lr, .LBB6_7 ; CHECK-NEXT: .LBB6_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -1418,23 +1403,16 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader21 -; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 -; CHECK-NEXT: movt r0, #65535 -; CHECK-NEXT: movw r1, #32767 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh r2, [r12], #2 -; CHECK-NEXT: ldrsh r3, [r6], #2 -; CHECK-NEXT: muls r2, r3, r2 -; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: cmp.w r0, r2, asr #15 -; CHECK-NEXT: it lt -; CHECK-NEXT: asrlt r3, r2, #15 -; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: csel r2, r3, r1, lt -; CHECK-NEXT: strh r2, [r4], #2 +; CHECK-NEXT: ldrsh r0, [r12], #2 +; CHECK-NEXT: ldrsh r1, [r6], #2 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: asrs r0, r0, #15 +; CHECK-NEXT: ssat r0, #16, r0 +; CHECK-NEXT: strh r0, [r4], #2 ; CHECK-NEXT: le lr, .LBB7_7 ; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -2329,8 +2307,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB14_8 +; CHECK-NEXT: cbz r3, .LBB14_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #7 ; CHECK-NEXT: bhi .LBB14_3 @@ -2363,21 +2340,15 @@ ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB14_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 -; CHECK-NEXT: mvn r0, #127 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB14_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsb r1, [r12], #1 -; CHECK-NEXT: ldrsb r2, [r6], #1 -; CHECK-NEXT: muls r1, r2, r1 -; CHECK-NEXT: mvn r2, #127 -; CHECK-NEXT: cmp.w r0, r1, asr #7 -; CHECK-NEXT: it lt -; CHECK-NEXT: asrlt r2, r1, #7 -; CHECK-NEXT: cmp r2, #127 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r2, #127 -; CHECK-NEXT: strb r2, [r4], #1 +; CHECK-NEXT: ldrsb r0, [r12], #1 +; CHECK-NEXT: ldrsb r1, [r6], #1 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: asrs r0, r0, #7 +; CHECK-NEXT: ssat r0, #8, r0 +; CHECK-NEXT: strb r0, [r4], #1 ; CHECK-NEXT: le lr, .LBB14_7 ; CHECK-NEXT: .LBB14_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -2503,21 +2474,15 @@ ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB15_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 -; CHECK-NEXT: mvn r0, #127 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB15_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsb r1, [r12], #1 -; CHECK-NEXT: ldrsb r2, [r6], #1 -; CHECK-NEXT: muls r1, r2, r1 -; CHECK-NEXT: mvn r2, #127 -; CHECK-NEXT: cmp.w r0, r1, asr #7 -; CHECK-NEXT: it lt -; CHECK-NEXT: asrlt r2, r1, #7 -; CHECK-NEXT: cmp r2, #127 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r2, #127 -; CHECK-NEXT: strb r2, [r4], #1 +; CHECK-NEXT: ldrsb r0, [r12], #1 +; CHECK-NEXT: ldrsb r1, [r6], #1 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: asrs r0, r0, #7 +; CHECK-NEXT: ssat r0, #8, r0 +; CHECK-NEXT: strb r0, [r4], #1 ; CHECK-NEXT: le lr, .LBB15_7 ; CHECK-NEXT: .LBB15_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -2640,21 +2605,15 @@ ; CHECK-NEXT: popeq {r4, r5, r6, pc} ; CHECK-NEXT: .LBB16_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 -; CHECK-NEXT: mvn r0, #127 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsb r1, [r12], #1 -; CHECK-NEXT: ldrsb r2, [r6], #1 -; CHECK-NEXT: muls r1, r2, r1 -; CHECK-NEXT: mvn r2, #127 -; CHECK-NEXT: cmp.w r0, r1, asr #7 -; CHECK-NEXT: it lt -; CHECK-NEXT: asrlt r2, r1, #7 -; CHECK-NEXT: cmp r2, #127 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r2, #127 -; CHECK-NEXT: strb r2, [r4], #1 +; CHECK-NEXT: ldrsb r0, [r12], #1 +; CHECK-NEXT: ldrsb r1, [r6], #1 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: asrs r0, r0, #7 +; CHECK-NEXT: ssat r0, #8, r0 +; CHECK-NEXT: strb r0, [r4], #1 ; CHECK-NEXT: le lr, .LBB16_7 ; CHECK-NEXT: .LBB16_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc}