diff --git a/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h b/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h @@ -0,0 +1,29 @@ +//===----- ExpandReductions.h - Expand large div/rem ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EXPANDLARGEDIVREM_H +#define LLVM_CODEGEN_EXPANDLARGEDIVREM_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Expands div/rem instructions with a bitwidth above a threshold +/// into a call to auto-generated functions. +/// This is useful for backends like x86 that cannot lower divisions +/// with more than 128 bits. +class ExpandLargeDivRemPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + // The backend asserts when seeing large div/rem instructions. + static bool isRequired() { return true; } +}; +} // end namespace llvm + +#endif // LLVM_CODEGEN_EXPANDLARGEDIVREM_H diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -43,6 +43,7 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) +FUNCTION_PASS("expandlargedivrem", ExpandLargeDivRemPass, ()) FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -485,6 +485,9 @@ /// predicate mask. FunctionPass *createExpandVectorPredicationPass(); + // Expands large div/rem instructions. + ModulePass *createExpandLargeDivRemPass(); + // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -150,6 +150,7 @@ void initializeEHContGuardCatchretPass(PassRegistry &); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&); void initializeEntryExitInstrumenterPass(PassRegistry&); +void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&); void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -196,6 +196,7 @@ (void) llvm::createReversePostOrderFunctionAttrsPass(); (void) llvm::createMergeFunctionsPass(); (void) llvm::createMergeICmpsLegacyPass(); + (void) llvm::createExpandLargeDivRemPass(); (void) llvm::createExpandMemCmpPass(); (void) llvm::createExpandVectorPredicationPass(); std::string buf; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -55,6 +55,7 @@ EdgeBundles.cpp EHContGuardCatchret.cpp ExecutionDomainFix.cpp + ExpandLargeDivRem.cpp ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -36,6 +36,7 @@ initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); + initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); diff --git a/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp @@ -0,0 +1,296 @@ +//===--- ExpandMemCmp.cpp - Expand large div/rem ---------------------------===/ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass expands div/rem instructions with a bitwidth above a threshold +// into a call to auto-generated functions. +// This is useful for backends like x86 that cannot lower divisions +// with more than 128 bits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ExpandLargeDivRem.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt + ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(128), + cl::desc("div and rem instructions on integers with " + "or more bits are expanded.")); + +static Value *getNthBit(IRBuilder<> &Builder, Value *V, Value *N) { + auto *Shl = Builder.CreateLShr(V, N); + return Builder.CreateTrunc(Shl, Type::getInt1Ty(V->getContext())); +} + +static Function *getOrCreateUDivRem(bool IsDiv, Module &M, IntegerType &Ty) { + std::string Name = + (IsDiv ? "__llvm_udiv" : "__llvm_urem") + utostr(Ty.getBitWidth()); + + Function *F = M.getFunction(Name); + if (F) + return F; + + F = Function::Create(FunctionType::get(&Ty, {&Ty, &Ty}, /*isVarArg=*/false), + GlobalVariable::InternalLinkage, + M.getDataLayout().getProgramAddressSpace(), Name); + M.getFunctionList().push_back(F); + + F->addFnAttr(Attribute::NoUnwind); + F->addFnAttr(Attribute::WillReturn); + F->addFnAttr(Attribute::NoRecurse); + F->addFnAttr(Attribute::ReadNone); + // In general, div cannot be 'speculatable' due to UB when dividing by + // zero, but the algorithm used here doesn't produce UB. + F->addFnAttr(Attribute::Speculatable); + + BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", F); + BasicBlock *LoopBB = BasicBlock::Create(M.getContext(), "loop", F); + BasicBlock *ThenBB = BasicBlock::Create(M.getContext(), "then", F); + BasicBlock *IfEndBB = BasicBlock::Create(M.getContext(), "if.end", F); + BasicBlock *ExitBB = BasicBlock::Create(M.getContext(), "exit", F); + + auto *I32Ty = Type::getInt32Ty(M.getContext()); + + Argument *B = F->getArg(1); + + auto *One = ConstantInt::get(&Ty, 1); + + PHINode *Q = nullptr; + Value *NewQ = nullptr; + PHINode *QNext = nullptr; + + // entry: + IRBuilder<> Builder(EntryBB); + Builder.CreateBr(LoopBB); + + // loop: + Builder.SetInsertPoint(LoopBB); + + auto *I = Builder.CreatePHI(I32Ty, 3, "i"); + I->addIncoming(ConstantInt::get(I32Ty, Ty.getBitWidth() - 1), EntryBB); + + if (IsDiv) { + Q = Builder.CreatePHI(&Ty, 2, "q"); + Q->addIncoming(ConstantInt::get(&Ty, 0), EntryBB); + } + + auto *R = Builder.CreatePHI(&Ty, 2, "r"); + R->addIncoming(ConstantInt::get(&Ty, 0), EntryBB); + + auto *IExt = Builder.CreateZExtOrTrunc(I, &Ty, "iext"); + + // A_nth = (A >> i) & 1 + Value *An = getNthBit(Builder, F->getArg(0), IExt); + + // R = R << 1 + auto *NewR = Builder.CreateShl(R, One, "new_r"); + + // R = R | A_nth + NewR = Builder.CreateOr(NewR, Builder.CreateZExt(An, &Ty), "new_r"); + + auto *ExitLoopCond = + Builder.CreateICmpEQ(I, ConstantInt::get(I32Ty, 0), "loop_exit_cond"); + + // I++ + auto *NewI = Builder.CreateAdd(I, ConstantInt::getSigned(I32Ty, -1), "new_i"); + I->addIncoming(NewI, IfEndBB); + + auto *RGreater = Builder.CreateICmpUGE(NewR, B); + // if (R >= B) + Builder.CreateCondBr(RGreater, ThenBB, IfEndBB); + + // then: + Builder.SetInsertPoint(ThenBB); + + // R = R - B + auto *NewR2 = Builder.CreateSub(NewR, B, "new_r"); + + if (IsDiv) { + // Q = Q | (1 << I) + NewQ = Builder.CreateOr(Q, Builder.CreateShl(One, IExt), "new_q"); + } + // if (i == 0) break; + Builder.CreateBr(IfEndBB); + + // else: + Builder.SetInsertPoint(IfEndBB); + + // New + auto *RNext = Builder.CreatePHI(&Ty, 2, "r"); + RNext->addIncoming(NewR2, ThenBB); + RNext->addIncoming(NewR, LoopBB); + R->addIncoming(RNext, IfEndBB); + + if (IsDiv) { + QNext = Builder.CreatePHI(&Ty, 2, "r"); + QNext->addIncoming(NewQ, ThenBB); + QNext->addIncoming(Q, LoopBB); + Q->addIncoming(QNext, IfEndBB); + } + + // if (i == 0) break; + Builder.CreateCondBr(ExitLoopCond, ExitBB, LoopBB); + + // exit: + Builder.SetInsertPoint(ExitBB); + if (IsDiv) { + // return Q + Builder.CreateRet(QNext); + } else { + // return R + Builder.CreateRet(RNext); + } + return F; +} + +static Function *getOrCreateSDivRem(bool IsDiv, Module &M, IntegerType &Ty) { + std::string Name = + (IsDiv ? "__llvm_sdiv" : "__llvm_srem") + utostr(Ty.getBitWidth()); + + Function *F = M.getFunction(Name); + if (F) + return F; + + F = Function::Create(FunctionType::get(&Ty, {&Ty, &Ty}, /*isVarArg=*/false), + GlobalVariable::InternalLinkage, + M.getDataLayout().getProgramAddressSpace(), Name); + M.getFunctionList().push_back(F); + + Function *UDivRemFn = getOrCreateUDivRem(IsDiv, M, Ty); + F->setAttributes(UDivRemFn->getAttributes()); + + Value *A = F->getArg(0); + Value *B = F->getArg(1); + auto *Zero = ConstantInt::get(&Ty, 0); + + // entry: + IRBuilder<> Builder(BasicBlock::Create(M.getContext(), "entry", F)); + + // A = A < 0 ? -A : A + auto *ANeg = Builder.CreateICmpSLT(A, Zero); + A = Builder.CreateSelect(ANeg, Builder.CreateNeg(A), A); + + // B = B < 0 ? -B : B; + auto *BNeg = Builder.CreateICmpSLT(B, Zero); + B = Builder.CreateSelect(BNeg, Builder.CreateNeg(B), B); + + auto *Call = Builder.CreateCall(UDivRemFn, {A, B}); + + // Quo = ANeg != BNeg ? -Quo : Q; + // Res = A < 0 ? -Res : R + auto *NegateResultCond = IsDiv ? Builder.CreateICmpNE(ANeg, BNeg) : ANeg; + + auto *Ret = + Builder.CreateSelect(NegateResultCond, Builder.CreateNeg(Call), Call); + + Builder.CreateRet(Ret); + + return F; +} + +static Function *getOrCreateDivRem(unsigned int Opcode, Module &M, + IntegerType &Ty) { + + bool IsDiv = Opcode == Instruction::UDiv || Opcode == Instruction::SDiv; + + if (Opcode == Instruction::SDiv || Opcode == Instruction::SRem) + return getOrCreateSDivRem(IsDiv, M, Ty); + + return getOrCreateUDivRem(IsDiv, M, Ty); +} + +static bool runImpl(Function &F) { + SmallVector Replace; + + for (auto &I : instructions(F)) { + switch (I.getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + if (!isa(I.getType()) || + I.getType()->getIntegerBitWidth() <= ExpandDivRemBits) + continue; + Replace.push_back(&I); + break; + default: + break; + } + } + + if (Replace.empty()) + return false; + + while (!Replace.empty()) { + Instruction *I = Replace.pop_back_val(); + + IRBuilder<> Builder(I); + auto *Ty = cast(I->getType()); + + I->replaceAllUsesWith(Builder.CreateCall( + getOrCreateDivRem(I->getOpcode(), *F.getParent(), *Ty), + {I->getOperand(0), I->getOperand(1)})); + I->eraseFromParent(); + } + + return true; +} + +PreservedAnalyses ExpandLargeDivRemPass::run(Module &M, + ModuleAnalysisManager &AM) { + + bool Changed = false; + for (auto &F : M) + Changed |= runImpl(F); + + if (Changed) + return PreservedAnalyses::none(); + + return PreservedAnalyses::all(); +} + +class ExpandLargeDivRemLegacyPass : public ModulePass { +public: + static char ID; + + ExpandLargeDivRemLegacyPass() : ModulePass(ID) { + initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + bool Changed = false; + for (auto &F : M) + Changed |= runImpl(F); + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + AU.addPreserved(); + } +}; + +char ExpandLargeDivRemLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expandlargedivrem", + "Expand large div/rem", false, false) +INITIALIZE_PASS_END(ExpandLargeDivRemLegacyPass, "expandlargedivrem", + "Expand large div/rem", false, false) + +ModulePass *llvm::createExpandLargeDivRemPass() { + return new ExpandLargeDivRemLegacyPass(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1113,6 +1113,7 @@ if (TM->useEmulatedTLS()) addPass(createLowerEmuTLSPass()); + addPass(createExpandLargeDivRemPass()); addPass(createPreISelIntrinsicLoweringPass()); PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); addIRPasses(); diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Expand large div/rem ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -16,6 +16,7 @@ ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Expand large div/rem ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions diff --git a/llvm/test/CodeGen/AArch64/udivmodei5.ll b/llvm/test/CodeGen/AArch64/udivmodei5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/udivmodei5.ll @@ -0,0 +1,133 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnuabi < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-linux-gnuabi < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @udiv129(i129* %ptr, i129* %out) nounwind { +; CHECK-LABEL: udiv129: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: ldrb w2, [x0, #16] +; CHECK-NEXT: ldp x8, x1, [x0] +; CHECK-NEXT: mov w4, #3 +; CHECK-NEXT: mov x5, xzr +; CHECK-NEXT: mov x6, xzr +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: bl __llvm_udiv129 +; CHECK-NEXT: and w8, w2, #0x1 +; CHECK-NEXT: stp x0, x1, [x19] +; CHECK-NEXT: strb w8, [x19, #16] +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: udiv129: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-BE-NEXT: ldp x9, x8, [x0] +; CHECK-BE-NEXT: mov x19, x1 +; CHECK-BE-NEXT: mov x4, xzr +; CHECK-BE-NEXT: ldrb w2, [x0, #16] +; CHECK-BE-NEXT: mov x5, xzr +; CHECK-BE-NEXT: mov w6, #3 +; CHECK-BE-NEXT: lsr x0, x9, #56 +; CHECK-BE-NEXT: extr x1, x9, x8, #56 +; CHECK-BE-NEXT: bfi x2, x8, #8, #56 +; CHECK-BE-NEXT: bl __llvm_udiv129 +; CHECK-BE-NEXT: extr x8, x0, x1, #8 +; CHECK-BE-NEXT: extr x9, x1, x2, #8 +; CHECK-BE-NEXT: strb w2, [x19, #16] +; CHECK-BE-NEXT: and x8, x8, #0x1ffffffffffffff +; CHECK-BE-NEXT: stp x8, x9, [x19] +; CHECK-BE-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-BE-NEXT: ret + %a = load i129, i129* %ptr + %res = udiv i129 %a, 3 + store i129 %res, i129* %out + ret void +} + +define i129 @urem129(i129 %a, i129 %b) nounwind { +; CHECK-LABEL: urem129: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl __llvm_urem129 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: urem129: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-BE-NEXT: bl __llvm_urem129 +; CHECK-BE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-BE-NEXT: ret + %res = urem i129 %a, %b + ret i129 %res +} + +define i129 @sdiv129(i129 %a, i129 %b) nounwind { +; CHECK-LABEL: sdiv129: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl __llvm_sdiv129 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: sdiv129: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-BE-NEXT: bl __llvm_sdiv129 +; CHECK-BE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-BE-NEXT: ret + %res = sdiv i129 %a, %b + ret i129 %res +} + +define i129 @srem129(i129 %a, i129 %b) nounwind { +; CHECK-LABEL: srem129: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl __llvm_srem129 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: srem129: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-BE-NEXT: bl __llvm_srem129 +; CHECK-BE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-BE-NEXT: ret + %res = srem i129 %a, %b + ret i129 %res +} + +; Some higher sizes +define i257 @sdiv257(i257 %a, i257 %b) nounwind { +; CHECK-LABEL: sdiv257: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: ldr q0, [sp, #48] +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: ldr x8, [sp, #64] +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: bl __llvm_sdiv257 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: sdiv257: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: sub sp, sp, #48 +; CHECK-BE-NEXT: add x8, sp, #48 +; CHECK-BE-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-BE-NEXT: ld1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ldr x8, [sp, #64] +; CHECK-BE-NEXT: str x8, [sp, #16] +; CHECK-BE-NEXT: st1 { v0.2d }, [sp] +; CHECK-BE-NEXT: bl __llvm_sdiv257 +; CHECK-BE-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-BE-NEXT: add sp, sp, #48 +; CHECK-BE-NEXT: ret + %res = sdiv i257 %a, %b + ret i257 %res +} diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -3,6 +3,7 @@ ; REQUIRES: asserts ; CHECK: ModulePass Manager +; CHECK-NEXT: Expand large div/rem ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -15,6 +15,7 @@ ; CHECK-NEXT: Profile summary info ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Expand large div/rem ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -24,6 +24,7 @@ ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: Default Regalloc Eviction Advisor ; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: Expand large div/rem ; CHECK-NEXT: Pre-ISel Intrinsic Lowering ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Expand Atomic instructions diff --git a/llvm/test/CodeGen/X86/udivmodei5.ll b/llvm/test/CodeGen/X86/udivmodei5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/udivmodei5.ll @@ -0,0 +1,693 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 + +define i129 @udiv129(i129 %a, i129 %b) nounwind { +; X86-LABEL: udiv129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %esp, %eax +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __llvm_udiv129 +; X86-NEXT: addl $40, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NEXT: movb %bl, 16(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: udiv129: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq __llvm_udiv129 +; X64-NEXT: popq %rsi +; X64-NEXT: retq + %res = udiv i129 %a, %b + ret i129 %res +} + +define i129 @urem129(i129 %a, i129 %b) nounwind { +; X86-LABEL: urem129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %esp, %eax +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __llvm_urem129 +; X86-NEXT: addl $40, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NEXT: movb %bl, 16(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: urem129: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq __llvm_urem129 +; X64-NEXT: popq %rsi +; X64-NEXT: retq + %res = urem i129 %a, %b + ret i129 %res +} + +define i129 @sdiv129(i129 %a, i129 %b) nounwind { +; X86-LABEL: sdiv129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %esp, %eax +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __llvm_sdiv129 +; X86-NEXT: addl $40, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NEXT: movb %bl, 16(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: sdiv129: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq __llvm_sdiv129 +; X64-NEXT: popq %rsi +; X64-NEXT: retq + %res = sdiv i129 %a, %b + ret i129 %res +} + +define i129 @srem129(i129 %a, i129 %b) nounwind { +; X86-LABEL: srem129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %esp, %eax +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __llvm_srem129 +; X86-NEXT: addl $40, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NEXT: movb %bl, 16(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: srem129: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq __llvm_srem129 +; X64-NEXT: popq %rsi +; X64-NEXT: retq + %res = srem i129 %a, %b + ret i129 %res +} + +; Some higher sizes +define i257 @sdiv257(i257 %a, i257 %b) nounwind { +; X86-LABEL: sdiv257: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl 80(%ebp) +; X86-NEXT: pushl 76(%ebp) +; X86-NEXT: pushl 72(%ebp) +; X86-NEXT: pushl 68(%ebp) +; X86-NEXT: pushl 64(%ebp) +; X86-NEXT: pushl 60(%ebp) +; X86-NEXT: pushl 56(%ebp) +; X86-NEXT: pushl 52(%ebp) +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __llvm_sdiv257 +; X86-NEXT: addl $72, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb %bl, 32(%esi) +; X86-NEXT: movl %eax, 24(%esi) +; X86-NEXT: movl %ecx, 28(%esi) +; X86-NEXT: movl %edx, 16(%esi) +; X86-NEXT: movl %edi, 20(%esi) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 8(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 12(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: sdiv257: +; X64: # %bb.0: +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $48, %rsp +; X64-NEXT: movq %rdi, %rbx +; X64-NEXT: subq $8, %rsp +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: callq __llvm_sdiv257 +; X64-NEXT: addq $48, %rsp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: movb {{[0-9]+}}(%rsp), %al +; X64-NEXT: movb %al, 32(%rbx) +; X64-NEXT: movq %rsi, 16(%rbx) +; X64-NEXT: movq %rdx, 24(%rbx) +; X64-NEXT: movq %rdi, (%rbx) +; X64-NEXT: movq %rcx, 8(%rbx) +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: addq $48, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: retq + %res = sdiv i257 %a, %b + ret i257 %res +} + +define i1001 @srem1001(i1001 %a, i1001 %b) nounwind { +; X86-LABEL: srem1001: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $248, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl 264(%ebp) +; X86-NEXT: pushl 260(%ebp) +; X86-NEXT: pushl 256(%ebp) +; X86-NEXT: pushl 252(%ebp) +; X86-NEXT: pushl 248(%ebp) +; X86-NEXT: pushl 244(%ebp) +; X86-NEXT: pushl 240(%ebp) +; X86-NEXT: pushl 236(%ebp) +; X86-NEXT: pushl 232(%ebp) +; X86-NEXT: pushl 228(%ebp) +; X86-NEXT: pushl 224(%ebp) +; X86-NEXT: pushl 220(%ebp) +; X86-NEXT: pushl 216(%ebp) +; X86-NEXT: pushl 212(%ebp) +; X86-NEXT: pushl 208(%ebp) +; X86-NEXT: pushl 204(%ebp) +; X86-NEXT: pushl 200(%ebp) +; X86-NEXT: pushl 196(%ebp) +; X86-NEXT: pushl 192(%ebp) +; X86-NEXT: pushl 188(%ebp) +; X86-NEXT: pushl 184(%ebp) +; X86-NEXT: pushl 180(%ebp) +; X86-NEXT: pushl 176(%ebp) +; X86-NEXT: pushl 172(%ebp) +; X86-NEXT: pushl 168(%ebp) +; X86-NEXT: pushl 164(%ebp) +; X86-NEXT: pushl 160(%ebp) +; X86-NEXT: pushl 156(%ebp) +; X86-NEXT: pushl 152(%ebp) +; X86-NEXT: pushl 148(%ebp) +; X86-NEXT: pushl 144(%ebp) +; X86-NEXT: pushl 140(%ebp) +; X86-NEXT: pushl 136(%ebp) +; X86-NEXT: pushl 132(%ebp) +; X86-NEXT: pushl 128(%ebp) +; X86-NEXT: pushl 124(%ebp) +; X86-NEXT: pushl 120(%ebp) +; X86-NEXT: pushl 116(%ebp) +; X86-NEXT: pushl 112(%ebp) +; X86-NEXT: pushl 108(%ebp) +; X86-NEXT: pushl 104(%ebp) +; X86-NEXT: pushl 100(%ebp) +; X86-NEXT: pushl 96(%ebp) +; X86-NEXT: pushl 92(%ebp) +; X86-NEXT: pushl 88(%ebp) +; X86-NEXT: pushl 84(%ebp) +; X86-NEXT: pushl 80(%ebp) +; X86-NEXT: pushl 76(%ebp) +; X86-NEXT: pushl 72(%ebp) +; X86-NEXT: pushl 68(%ebp) +; X86-NEXT: pushl 64(%ebp) +; X86-NEXT: pushl 60(%ebp) +; X86-NEXT: pushl 56(%ebp) +; X86-NEXT: pushl 52(%ebp) +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __llvm_srem1001 +; X86-NEXT: addl $256, %esp # imm = 0x100 +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 120(%esi) +; X86-NEXT: movl %eax, 116(%esi) +; X86-NEXT: movl %ecx, 112(%esi) +; X86-NEXT: movl %edx, 108(%esi) +; X86-NEXT: movl %edi, 104(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 100(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 96(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 92(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 88(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 84(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 80(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 76(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 72(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 68(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 64(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 60(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 56(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 52(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 48(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 44(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 40(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 36(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 32(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 28(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 24(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 20(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 16(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 12(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 8(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 4(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl $511, %eax # imm = 0x1FF +; X86-NEXT: movw %ax, 124(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: srem1001: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %r15 +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r13 +; X64-NEXT: pushq %r12 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $152, %rsp +; X64-NEXT: movq %rdi, %rbx +; X64-NEXT: subq $8, %rsp +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: pushq {{[0-9]+}}(%rsp) +; X64-NEXT: callq __llvm_srem1001 +; X64-NEXT: addq $224, %rsp +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %edx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; X64-NEXT: movq %rax, (%rbx) +; X64-NEXT: movq %rcx, 96(%rbx) +; X64-NEXT: movq %rbp, 112(%rbx) +; X64-NEXT: movq %rdi, 104(%rbx) +; X64-NEXT: movq %rsi, 80(%rbx) +; X64-NEXT: movq %r8, 88(%rbx) +; X64-NEXT: movq %r10, 72(%rbx) +; X64-NEXT: movq %r13, 48(%rbx) +; X64-NEXT: movq %r12, 56(%rbx) +; X64-NEXT: movq %r15, 40(%rbx) +; X64-NEXT: movq %r14, 24(%rbx) +; X64-NEXT: movq %r9, 8(%rbx) +; X64-NEXT: movq %r11, 16(%rbx) +; X64-NEXT: movq (%rsp), %rax # 8-byte Reload +; X64-NEXT: movq %rax, 32(%rbx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: movq %rax, 64(%rbx) +; X64-NEXT: andl $511, %edx # imm = 0x1FF +; X64-NEXT: movw %dx, 124(%rbx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: movl %eax, 120(%rbx) +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: addq $152, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r12 +; X64-NEXT: popq %r13 +; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 +; X64-NEXT: popq %rbp +; X64-NEXT: retq + %res = srem i1001 %a, %b + ret i1001 %res +} + +define i129 @chain129(i129 %a, i129 %b) nounwind { +; X86-LABEL: chain129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __llvm_udiv129 +; X86-NEXT: addl $40, %esp +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %esp, %ecx +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $17 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %ecx +; X86-NEXT: calll __llvm_sdiv129 +; X86-NEXT: addl $40, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NEXT: movb %bl, 16(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: chain129: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: callq __llvm_udiv129 +; X64-NEXT: movq %rcx, %r8 +; X64-NEXT: movl $17, %ecx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %r8, %rdx +; X64-NEXT: xorl %r8d, %r8d +; X64-NEXT: xorl %r9d, %r9d +; X64-NEXT: callq __llvm_sdiv129 +; X64-NEXT: popq %rsi +; X64-NEXT: retq + %res = udiv i129 %a, %b + %res2 = sdiv i129 %res, 17 + ret i129 %res2 +} diff --git a/llvm/test/CodeGen/X86/urem-seteq.ll b/llvm/test/CodeGen/X86/urem-seteq.ll --- a/llvm/test/CodeGen/X86/urem-seteq.ll +++ b/llvm/test/CodeGen/X86/urem-seteq.ll @@ -363,20 +363,159 @@ define void @ossfuzz34366() { ; X86-LABEL: ossfuzz34366: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: subl $64, %esp +; X86-NEXT: .cfi_def_cfa_offset 80 +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 ; X86-NEXT: movl (%eax), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: orl %eax, %ecx +; X86-NEXT: subl $12, %esp +; X86-NEXT: .cfi_adjust_cfa_offset 12 +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl $-2147483648 # imm = 0x80000000 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $0 +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl %ecx +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll __llvm_urem448 +; X86-NEXT: .cfi_adjust_cfa_offset -4 +; X86-NEXT: addl $124, %esp +; X86-NEXT: .cfi_adjust_cfa_offset -124 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: sete (%eax) +; X86-NEXT: addl $64, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %edi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: ossfuzz34366: ; X64: # %bb.0: -; X64-NEXT: movq (%rax), %rax -; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: andq %rax, %rcx +; X64-NEXT: subq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 64 +; X64-NEXT: movq (%rax), %rcx +; X64-NEXT: subq $8, %rsp +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: movq %rcx, %r8 +; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: pushq $0 +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: pushq $0 +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: pushq $0 +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: pushq $0 +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: pushq $0 +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: pushq $0 +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: pushq %rcx +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: pushq %rcx +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: callq __llvm_urem448 +; X64-NEXT: addq $80, %rsp +; X64-NEXT: .cfi_adjust_cfa_offset -80 +; X64-NEXT: movq (%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: orq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: orq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: orq %rdx, %rax +; X64-NEXT: orq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: orq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: sete (%rax) +; X64-NEXT: addq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %L10 = load i448, i448* undef, align 4 %B18 = urem i448 %L10, -363419362147803445274661903944002267176820680343659030140745099590319644056698961663095525356881782780381260803133088966767300814307328 diff --git a/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -expandlargedivrem < %s | FileCheck %s + +define void @sdiv129(i129* %ptr, i129* %out) nounwind { + %a = load i129, i129* %ptr + %res = sdiv i129 %a, 3 + store i129 %res, i129* %out + ret void +} + +; CHECK-LABEL: @sdiv129( +; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i129 @__llvm_sdiv129(i129 [[A]], i129 3) +; CHECK-NEXT: store i129 [[TMP1]], i129* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i129 @__llvm_sdiv129(i129 %0, i129 %1) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i129 [[TMP0:%.*]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 0, [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i129 [[TMP3]], i129 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i129 [[TMP1:%.*]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sub i129 0, [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], i129 [[TMP6]], i129 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = call i129 @__llvm_udiv129(i129 [[TMP4]], i129 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i1 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = sub i129 0, [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i129 [[TMP10]], i129 [[TMP8]] +; CHECK-NEXT: ret i129 [[TMP11]] +; +; +; CHECK-LABEL: define internal i129 @__llvm_udiv129(i129 %0, i129 %1) +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 128, [[ENTRY:%.*]] ], [ [[NEW_I:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[Q:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R4:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R3:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[IEXT:%.*]] = zext i32 [[I]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i129 [[TMP0:%.*]], [[IEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i129 [[TMP2]] to i1 +; CHECK-NEXT: [[NEW_R:%.*]] = shl i129 [[R]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i129 +; CHECK-NEXT: [[NEW_R1:%.*]] = or i129 [[NEW_R]], [[TMP4]] +; CHECK-NEXT: [[LOOP_EXIT_COND:%.*]] = icmp eq i32 [[I]], 0 +; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i129 [[NEW_R1]], [[TMP1:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[THEN:%.*]], label [[ELSE]] +; CHECK: then: +; CHECK-NEXT: [[NEW_R2:%.*]] = sub i129 [[NEW_R1]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = shl i129 1, [[IEXT]] +; CHECK-NEXT: [[NEW_Q:%.*]] = or i129 [[Q]], [[TMP6]] +; CHECK-NEXT: br label [[ELSE]] +; CHECK: if.end: +; CHECK-NEXT: [[R3]] = phi i129 [ [[NEW_R2]], [[THEN]] ], [ [[NEW_R1]], [[LOOP]] ] +; CHECK-NEXT: [[R4]] = phi i129 [ [[NEW_Q]], [[THEN]] ], [ [[Q]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[LOOP_EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i129 [[R4]] +; diff --git a/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -expandlargedivrem < %s | FileCheck %s + +define void @test(i129* %ptr, i129* %out) nounwind { + %a = load i129, i129* %ptr + %res = srem i129 %a, 3 + store i129 %res, i129* %out + ret void +} +; CHECK-LABEL: @test( +; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i129 @__llvm_srem129(i129 [[A]], i129 3) +; CHECK-NEXT: store i129 [[TMP1]], i129* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i129 @__llvm_srem129(i129 %0, i129 %1) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i129 [[TMP0:%.*]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = sub i129 0, [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i129 [[TMP3]], i129 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i129 [[TMP1:%.*]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sub i129 0, [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], i129 [[TMP6]], i129 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = call i129 @__llvm_urem129(i129 [[TMP4]], i129 [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = sub i129 0, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP2]], i129 [[TMP9]], i129 [[TMP8]] +; CHECK-NEXT: ret i129 [[TMP10]] +; +; +; CHECK-LABEL: define internal i129 @__llvm_urem129(i129 %0, i129 %1) +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 128, [[ENTRY:%.*]] ], [ [[NEW_I:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[R:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R3:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[IEXT:%.*]] = zext i32 [[I]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i129 [[TMP0:%.*]], [[IEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i129 [[TMP2]] to i1 +; CHECK-NEXT: [[NEW_R:%.*]] = shl i129 [[R]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i129 +; CHECK-NEXT: [[NEW_R1:%.*]] = or i129 [[NEW_R]], [[TMP4]] +; CHECK-NEXT: [[LOOP_EXIT_COND:%.*]] = icmp eq i32 [[I]], 0 +; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i129 [[NEW_R1]], [[TMP1:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[THEN:%.*]], label [[ELSE]] +; CHECK: then: +; CHECK-NEXT: [[NEW_R2:%.*]] = sub i129 [[NEW_R1]], [[TMP1]] +; CHECK-NEXT: br label [[ELSE]] +; CHECK: if.end: +; CHECK-NEXT: [[R3]] = phi i129 [ [[NEW_R2]], [[THEN]] ], [ [[NEW_R1]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[LOOP_EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i129 [[R3]] +; diff --git a/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -expandlargedivrem < %s | FileCheck %s + +define void @test(i129* %ptr, i129* %out) nounwind { + %a = load i129, i129* %ptr + %res = udiv i129 %a, 3 + store i129 %res, i129* %out + ret void +} + +; CHECK-LABEL: @test( +; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i129 @__llvm_udiv129(i129 [[A]], i129 3) +; CHECK-NEXT: store i129 [[TMP1]], i129* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i129 @__llvm_udiv129(i129 %0, i129 %1) +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 128, [[ENTRY:%.*]] ], [ [[NEW_I:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[Q:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R4:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[R:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R3:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[IEXT:%.*]] = zext i32 [[I]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i129 [[TMP0:%.*]], [[IEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i129 [[TMP2]] to i1 +; CHECK-NEXT: [[NEW_R:%.*]] = shl i129 [[R]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i129 +; CHECK-NEXT: [[NEW_R1:%.*]] = or i129 [[NEW_R]], [[TMP4]] +; CHECK-NEXT: [[LOOP_EXIT_COND:%.*]] = icmp eq i32 [[I]], 0 +; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i129 [[NEW_R1]], [[TMP1:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[THEN:%.*]], label [[ELSE]] +; CHECK: then: +; CHECK-NEXT: [[NEW_R2:%.*]] = sub i129 [[NEW_R1]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = shl i129 1, [[IEXT]] +; CHECK-NEXT: [[NEW_Q:%.*]] = or i129 [[Q]], [[TMP6]] +; CHECK-NEXT: br label [[ELSE]] +; CHECK: if.end: +; CHECK-NEXT: [[R3]] = phi i129 [ [[NEW_R2]], [[THEN]] ], [ [[NEW_R1]], [[LOOP]] ] +; CHECK-NEXT: [[R4]] = phi i129 [ [[NEW_Q]], [[THEN]] ], [ [[Q]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[LOOP_EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i129 [[R4]] +; diff --git a/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -expandlargedivrem < %s | FileCheck %s + +define void @test(i129* %ptr, i129* %out) nounwind { + %a = load i129, i129* %ptr + %res = urem i129 %a, 3 + store i129 %res, i129* %out + ret void +} + +; CHECK-LABEL: @test( +; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i129 @__llvm_urem129(i129 [[A]], i129 3) +; CHECK-NEXT: store i129 [[TMP1]], i129* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i129 @__llvm_urem129(i129 %0, i129 %1) +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 128, [[ENTRY:%.*]] ], [ [[NEW_I:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[R:%.*]] = phi i129 [ 0, [[ENTRY]] ], [ [[R3:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[IEXT:%.*]] = zext i32 [[I]] to i129 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i129 [[TMP0:%.*]], [[IEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i129 [[TMP2]] to i1 +; CHECK-NEXT: [[NEW_R:%.*]] = shl i129 [[R]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i129 +; CHECK-NEXT: [[NEW_R1:%.*]] = or i129 [[NEW_R]], [[TMP4]] +; CHECK-NEXT: [[LOOP_EXIT_COND:%.*]] = icmp eq i32 [[I]], 0 +; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i129 [[NEW_R1]], [[TMP1:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[THEN:%.*]], label [[ELSE]] +; CHECK: then: +; CHECK-NEXT: [[NEW_R2:%.*]] = sub i129 [[NEW_R1]], [[TMP1]] +; CHECK-NEXT: br label [[ELSE]] +; CHECK: if.end: +; CHECK-NEXT: [[R3]] = phi i129 [ [[NEW_R2]], [[THEN]] ], [ [[NEW_R1]], [[LOOP]] ] +; CHECK-NEXT: br i1 [[LOOP_EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i129 [[R3]] +; diff --git a/llvm/test/Transforms/ExpandLargeDivRem/values129.ll b/llvm/test/Transforms/ExpandLargeDivRem/values129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/values129.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py + +; This test checks that constant propagation done on the div/rem instruction +; (via -O2) gives the same result as using the expandlargedivrem and then +; constant evaluating the result. +; RUN: opt -S -O2 < %s | FileCheck %s +; RUN: opt -S -expandlargedivrem < %s | opt -unroll-count=129 -inline-threshold=100000 -O2 -S | FileCheck %s + +define {i129, i129} @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: ret { i129, i129 } zeroinitializer +; + %ret = call {i129, i129} @udivrem(i129 0, i129 1) + ret {i129, i129} %ret +} + +define {i129, i129} @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: ret { i129, i129 } { i129 1, i129 0 } +; + %ret = call {i129, i129} @udivrem(i129 1, i129 1) + ret {i129, i129} %ret +} + +define {i129, i129} @test3() { +; CHECK-LABEL: @test3( +; CHECK-NEXT: ret { i129, i129 } { i129 2, i129 0 } +; + %ret = call {i129, i129} @udivrem(i129 2, i129 1) + ret {i129, i129} %ret +} + +define {i129, i129} @test4() { +; CHECK-LABEL: @test4( +; CHECK-NEXT: ret { i129, i129 } { i129 1, i129 2 } +; + %ret = call {i129, i129} @udivrem(i129 7, i129 5) + ret {i129, i129} %ret +} + +define {i129, i129} @test5() { +; CHECK-LABEL: @test5( +; CHECK-NEXT: ret { i129, i129 } { i129 3, i129 4 } +; + %ret = call {i129, i129} @udivrem(i129 19, i129 5) + ret {i129, i129} %ret +} + +define {i129, i129} @test6() { +; CHECK-LABEL: @test6( +; CHECK-NEXT: ret { i129, i129 } { i129 340282366920938463463374607431768211455, i129 1 } +; + + %all_bits_set = sub i129 0, 1 + %ret = call {i129, i129} @udivrem(i129 %all_bits_set, i129 2) + ret {i129, i129} %ret +} + + +; Signed test start here + +define {i129, i129} @stest1() { +; CHECK-LABEL: @stest1( +; CHECK-NEXT: ret { i129, i129 } zeroinitializer +; + %ret = call {i129, i129} @sdivrem(i129 0, i129 1) + ret {i129, i129} %ret +} + +define {i129, i129} @stest2() { +; CHECK-LABEL: @stest2( +; CHECK-NEXT: ret { i129, i129 } zeroinitializer +; + %ret = call {i129, i129} @sdivrem(i129 0, i129 -1) + ret {i129, i129} %ret +} + +define {i129, i129} @stest3() { +; CHECK-LABEL: @stest3( +; CHECK-NEXT: ret { i129, i129 } { i129 1, i129 0 } +; + %ret = call {i129, i129} @sdivrem(i129 1, i129 1) + ret {i129, i129} %ret +} + + +define {i129, i129} @stest4() { +; CHECK-LABEL: @stest4( +; CHECK-NEXT: ret { i129, i129 } { i129 -1, i129 0 } +; + %ret = call {i129, i129} @sdivrem(i129 1, i129 -1) + ret {i129, i129} %ret +} + +define {i129, i129} @stest5() { +; CHECK-LABEL: @stest5( +; CHECK-NEXT: ret { i129, i129 } { i129 2, i129 0 } +; + %ret = call {i129, i129} @sdivrem(i129 2, i129 1) + ret {i129, i129} %ret +} + +define {i129, i129} @stest6() { +; CHECK-LABEL: @stest6( +; CHECK-NEXT: ret { i129, i129 } { i129 -2, i129 0 } +; + %ret = call {i129, i129} @sdivrem(i129 2, i129 -1) + ret {i129, i129} %ret +} + +define {i129, i129} @stest7() { +; CHECK-LABEL: @stest7( +; CHECK-NEXT: ret { i129, i129 } { i129 -2, i129 0 } +; + %ret = call {i129, i129} @sdivrem(i129 -2, i129 1) + ret {i129, i129} %ret +} + +define {i129, i129} @stest8() { +; CHECK-LABEL: @stest8( +; CHECK-NEXT: ret { i129, i129 } { i129 2, i129 0 } +; + %ret = call {i129, i129} @sdivrem(i129 -2, i129 -1) + ret {i129, i129} %ret +} + +define {i129, i129} @stest9() { +; CHECK-LABEL: @stest9( +; CHECK-NEXT: ret { i129, i129 } { i129 1, i129 2 } +; + %ret = call {i129, i129} @sdivrem(i129 7, i129 5) + ret {i129, i129} %ret +} + +define {i129, i129} @stest10() { +; CHECK-LABEL: @stest10( +; CHECK-NEXT: ret { i129, i129 } { i129 -1, i129 -2 } +; + %ret = call {i129, i129} @sdivrem(i129 -7, i129 5) + ret {i129, i129} %ret +} + +define {i129, i129} @stest11() { +; CHECK-LABEL: @stest11( +; CHECK-NEXT: ret { i129, i129 } { i129 3, i129 4 } +; + %ret = call {i129, i129} @sdivrem(i129 19, i129 5) + ret {i129, i129} %ret +} + +define {i129, i129} @stest12() { +; CHECK-LABEL: @stest12( +; CHECK-NEXT: ret { i129, i129 } { i129 -3, i129 4 } +; + %ret = call {i129, i129} @sdivrem(i129 19, i129 -5) + ret {i129, i129} %ret +} + +define {i129, i129} @test13() { +; CHECK-LABEL: @test13( +; CHECK-NEXT: ret { i129, i129 } { i129 -340282366920938463463374607431768211455, i129 0 } +; + + %min_int = shl i129 1, 128 + %max_int = sub i129 %min_int, 1 + %ret = call {i129, i129} @sdivrem(i129 %max_int, i129 -1) + ret {i129, i129} %ret +} + +define internal {i129, i129} @udivrem(i129 %a, i129 %b) { + %q = udiv i129 %a, %b + %r = urem i129 %a, %b + + %agg1 = insertvalue {i129, i129} undef, i129 %q, 0 + %agg2 = insertvalue {i129, i129} %agg1, i129 %r, 1 + + ret {i129, i129} %agg2 +} + +define internal {i129, i129} @sdivrem(i129 %a, i129 %b) { + %q = sdiv i129 %a, %b + %r = srem i129 %a, %b + + %agg1 = insertvalue {i129, i129} undef, i129 %q, 0 + %agg2 = insertvalue {i129, i129} %agg1, i129 %r, 1 + + ret {i129, i129} %agg2 +} + diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -494,7 +494,7 @@ "replace-with-veclib", "jmc-instrument", "dot-regions", "dot-regions-only", "view-regions", "view-regions-only", - "select-optimize"}; + "select-optimize", "expandlargedivrem"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -543,6 +543,7 @@ initializeTarget(Registry); // For codegen passes, only passes that do IR to IR transformation are // supported. + initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry);