diff --git a/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h b/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h @@ -0,0 +1,29 @@ +//===----- ExpandReductions.h - Expand large div/rem ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EXPANDLARGEDIVREM_H +#define LLVM_CODEGEN_EXPANDLARGEDIVREM_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Expands div/rem instructions with a bitwidth above a threshold +/// into a loop. +/// This is useful for backends like x86 that cannot lower divisions +/// with more than 128 bits. +class ExpandLargeDivRemPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // The backend asserts when seeing large div/rem instructions. + static bool isRequired() { return true; } +}; +} // end namespace llvm + +#endif // LLVM_CODEGEN_EXPANDLARGEDIVREM_H diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -43,6 +43,7 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) +FUNCTION_PASS("expandlargedivrem", ExpandLargeDivRemPass, ()) FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -485,6 +485,9 @@ /// predicate mask. FunctionPass *createExpandVectorPredicationPass(); + // Expands large div/rem instructions. + FunctionPass *createExpandLargeDivRemPass(); + // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -150,6 +150,7 @@ void initializeEHContGuardCatchretPass(PassRegistry &); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&); void initializeEntryExitInstrumenterPass(PassRegistry&); +void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&); void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -196,6 +196,7 @@ (void) llvm::createReversePostOrderFunctionAttrsPass(); (void) llvm::createMergeFunctionsPass(); (void) llvm::createMergeICmpsLegacyPass(); + (void) llvm::createExpandLargeDivRemPass(); (void) llvm::createExpandMemCmpPass(); (void) llvm::createExpandVectorPredicationPass(); std::string buf; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -55,6 +55,7 @@ EdgeBundles.cpp EHContGuardCatchret.cpp ExecutionDomainFix.cpp + ExpandLargeDivRem.cpp ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -36,6 +36,7 @@ initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); + initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); diff --git a/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp @@ -0,0 +1,112 @@ +//===--- ExpandMemCmp.cpp - Expand large div/rem ---------------------------===/ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass expands div/rem instructions with a bitwidth above a threshold +// into a call to auto-generated functions. +// This is useful for targets like x86_64 that cannot lower divisions +// with more than 128 bits or targets like x86_32 that cannot lower divisions +// with more than 64 bits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ExpandLargeDivRem.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/IntegerDivision.h" + +using namespace llvm; + +static cl::opt + ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(128), + cl::desc("div and rem instructions on integers with " + "more than bits are expanded.")); + +static bool runImpl(Function &F) { + SmallVector Replace; + bool Modified = false; + + for (auto &I : instructions(F)) { + switch (I.getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: { + // TODO: This doesn't handle vectors. + auto *IntTy = dyn_cast(I.getType()); + if (!IntTy || IntTy->getIntegerBitWidth() <= ExpandDivRemBits) + continue; + + Replace.push_back(&cast(I)); + Modified = true; + break; + } + default: + break; + } + } + + if (Replace.empty()) + return false; + + while (!Replace.empty()) { + BinaryOperator *I = Replace.pop_back_val(); + + if (I->getOpcode() == Instruction::UDiv || + I->getOpcode() == Instruction::SDiv) { + expandDivision(I); + } else { + expandRemainder(I); + } + } + + return Modified; +} + +PreservedAnalyses ExpandLargeDivRemPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = runImpl(F); + + if (Changed) + return PreservedAnalyses::none(); + + return PreservedAnalyses::all(); +} + +class ExpandLargeDivRemLegacyPass : public FunctionPass { +public: + static char ID; + + ExpandLargeDivRemLegacyPass() : FunctionPass(ID) { + initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { return runImpl(F); } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + AU.addPreserved(); + } +}; + +char ExpandLargeDivRemLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expandlargedivrem", + "Expand large div/rem", false, false) +INITIALIZE_PASS_END(ExpandLargeDivRemLegacyPass, "expandlargedivrem", + "Expand large div/rem", false, false) + +FunctionPass *llvm::createExpandLargeDivRemPass() { + return new ExpandLargeDivRemLegacyPass(); +} diff --git a/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/llvm/lib/Transforms/Utils/IntegerDivision.cpp --- a/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -32,14 +32,7 @@ static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); - ConstantInt *Shift; - - if (BitWidth == 64) { - Shift = Builder.getInt64(63); - } else { - assert(BitWidth == 32 && "Unexpected bit width"); - Shift = Builder.getInt32(31); - } + ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1); // Following instructions are generated for both i32 (shift 31) and // i64 (shift 63). @@ -104,14 +97,7 @@ // Implementation taken from compiler-rt's __divsi3 and __divdi3 unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); - ConstantInt *Shift; - - if (BitWidth == 64) { - Shift = Builder.getInt64(63); - } else { - assert(BitWidth == 32 && "Unexpected bit width"); - Shift = Builder.getInt32(31); - } + ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1); // Following instructions are generated for both i32 (shift 31) and // i64 (shift 63). @@ -156,23 +142,10 @@ IntegerType *DivTy = cast(Dividend->getType()); unsigned BitWidth = DivTy->getBitWidth(); - ConstantInt *Zero; - ConstantInt *One; - ConstantInt *NegOne; - ConstantInt *MSB; - - if (BitWidth == 64) { - Zero = Builder.getInt64(0); - One = Builder.getInt64(1); - NegOne = ConstantInt::getSigned(DivTy, -1); - MSB = Builder.getInt64(63); - } else { - assert(BitWidth == 32 && "Unexpected bit width"); - Zero = Builder.getInt32(0); - One = Builder.getInt32(1); - NegOne = ConstantInt::getSigned(DivTy, -1); - MSB = Builder.getInt32(31); - } + ConstantInt *Zero = ConstantInt::get(DivTy, 0); + ConstantInt *One = ConstantInt::get(DivTy, 1); + ConstantInt *NegOne = ConstantInt::getSigned(DivTy, -1); + ConstantInt *MSB = ConstantInt::get(DivTy, BitWidth - 1); ConstantInt *True = Builder.getTrue(); @@ -367,8 +340,7 @@ /// Generate code to calculate the remainder of two integers, replacing Rem with /// the generated code. This currently generates code using the udiv expansion, /// but future work includes generating more specialized code, e.g. when more -/// information about the operands are known. Implements both 32bit and 64bit -/// scalar division. +/// information about the operands are known. /// /// Replace Rem with generated code. bool llvm::expandRemainder(BinaryOperator *Rem) { @@ -379,9 +351,6 @@ IRBuilder<> Builder(Rem); assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported"); - assert((Rem->getType()->getIntegerBitWidth() == 32 || - Rem->getType()->getIntegerBitWidth() == 64) && - "Div of bitwidth other than 32 or 64 not supported"); // First prepare the sign if it's a signed remainder if (Rem->getOpcode() == Instruction::SRem) { @@ -421,12 +390,10 @@ return true; } - /// Generate code to divide two integers, replacing Div with the generated /// code. This currently generates code similarly to compiler-rt's /// implementations, but future work includes generating more specialized code -/// when more information about the operands are known. Implements both -/// 32bit and 64bit scalar division. +/// when more information about the operands are known. /// /// Replace Div with generated code. bool llvm::expandDivision(BinaryOperator *Div) { @@ -437,9 +404,6 @@ IRBuilder<> Builder(Div); assert(!Div->getType()->isVectorTy() && "Div over vectors not supported"); - assert((Div->getType()->getIntegerBitWidth() == 32 || - Div->getType()->getIntegerBitWidth() == 64) && - "Div of bitwidth other than 32 or 64 not supported"); // First prepare the sign if it's a signed division if (Div->getOpcode() == Instruction::SDiv) { @@ -540,9 +504,7 @@ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported"); - - if (RemTyBitWidth == 64) + if (RemTyBitWidth >= 64) return expandRemainder(Rem); // If bitwidth smaller than 64 extend inputs, extend output and proceed @@ -637,10 +599,7 @@ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - assert(DivTyBitWidth <= 64 && - "Div of bitwidth greater than 64 not supported"); - - if (DivTyBitWidth == 64) + if (DivTyBitWidth >= 64) return expandDivision(Div); // If bitwidth smaller than 64 extend inputs, extend output and proceed diff --git a/llvm/test/CodeGen/X86/urem-seteq.ll b/llvm/test/CodeGen/X86/urem-seteq.ll --- a/llvm/test/CodeGen/X86/urem-seteq.ll +++ b/llvm/test/CodeGen/X86/urem-seteq.ll @@ -362,22 +362,7 @@ ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34366 define void @ossfuzz34366() { ; X86-LABEL: ossfuzz34366: -; X86: # %bb.0: -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: sete (%eax) -; X86-NEXT: retl -; ; X64-LABEL: ossfuzz34366: -; X64: # %bb.0: -; X64-NEXT: movq (%rax), %rax -; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: andq %rax, %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: sete (%rax) -; X64-NEXT: retq %L10 = load i448, i448* undef, align 4 %B18 = urem i448 %L10, -363419362147803445274661903944002267176820680343659030140745099590319644056698961663095525356881782780381260803133088966767300814307328 %C13 = icmp ule i448 %B18, 0 diff --git a/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -expandlargedivrem < %s | FileCheck %s + +define void @sdiv129(i129* %ptr, i129* %out) nounwind { +; CHECK-LABEL: @sdiv129( +; CHECK-NEXT: _udiv-special-cases: +; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP1:%.*]] = xor i129 [[TMP0]], [[A]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i129 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i129 0, [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i129 [[TMP2]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = or i1 false, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = sub i129 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i129 [[TMP8]], 128 +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP5]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i129 [[TMP8]], 128 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], i129 0, i129 [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP10]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP13]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]] +; CHECK: udiv-loop-exit: +; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP29:%.*]], [[UDIV_DO_WHILE:%.*]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i129 [ [[TMP37:%.*]], [[UDIV_BB1]] ], [ [[TMP26:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = shl i129 [[TMP15]], 1 +; CHECK-NEXT: [[TMP17:%.*]] = or i129 [[TMP14]], [[TMP16]] +; CHECK-NEXT: br label [[UDIV_END]] +; CHECK: udiv-do-while: +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP29]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i129 [ [[TMP35:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP32:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP34:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP31:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i129 [ [[TMP37]], [[UDIV_PREHEADER]] ], [ [[TMP26]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP22:%.*]] = shl i129 [[TMP20]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = lshr i129 [[TMP21]], 128 +; CHECK-NEXT: [[TMP24:%.*]] = or i129 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = shl i129 [[TMP21]], 1 +; CHECK-NEXT: [[TMP26]] = or i129 [[TMP18]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = sub i129 2, [[TMP24]] +; CHECK-NEXT: [[TMP28:%.*]] = ashr i129 [[TMP27]], 128 +; CHECK-NEXT: [[TMP29]] = and i129 [[TMP28]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = and i129 [[TMP28]], 3 +; CHECK-NEXT: [[TMP31]] = sub i129 [[TMP24]], [[TMP30]] +; CHECK-NEXT: [[TMP32]] = add i129 [[TMP19]], -1 +; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i129 [[TMP32]], 0 +; CHECK-NEXT: br i1 [[TMP33]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]] +; CHECK: udiv-preheader: +; CHECK-NEXT: [[TMP34]] = lshr i129 [[TMP2]], [[TMP35]] +; CHECK-NEXT: br label [[UDIV_DO_WHILE]] +; CHECK: udiv-bb1: +; CHECK-NEXT: [[TMP35]] = add i129 [[TMP8]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = sub i129 128, [[TMP8]] +; CHECK-NEXT: [[TMP37]] = shl i129 [[TMP2]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i129 [[TMP35]], 0 +; CHECK-NEXT: br i1 [[TMP38]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]] +; CHECK: udiv-end: +; CHECK-NEXT: [[TMP39:%.*]] = phi i129 [ [[TMP17]], [[UDIV_LOOP_EXIT]] ], [ [[TMP12]], [[_UDIV_SPECIAL_CASES:%.*]] ] +; CHECK-NEXT: [[TMP40:%.*]] = xor i129 [[TMP39]], [[TMP3]] +; CHECK-NEXT: [[TMP41:%.*]] = sub i129 [[TMP40]], [[TMP3]] +; CHECK-NEXT: store i129 [[TMP41]], i129* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %a = load i129, i129* %ptr + %res = sdiv i129 %a, 3 + store i129 %res, i129* %out + ret void +} diff --git a/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -expandlargedivrem < %s | FileCheck %s + +define void @test(i129* %ptr, i129* %out) nounwind { +; CHECK-LABEL: @test( +; CHECK-NEXT: _udiv-special-cases: +; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = ashr i129 [[A]], 128 +; CHECK-NEXT: [[TMP1:%.*]] = xor i129 [[A]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i129 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i129 [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 false, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = sub i129 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i129 [[TMP7]], 128 +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i129 [[TMP7]], 128 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i129 0, i129 [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP10]] +; CHECK-NEXT: br i1 [[TMP12]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]] +; CHECK: udiv-loop-exit: +; CHECK-NEXT: [[TMP13:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE:%.*]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ [[TMP36:%.*]], [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP15:%.*]] = shl i129 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = or i129 [[TMP13]], [[TMP15]] +; CHECK-NEXT: br label [[UDIV_END]] +; CHECK: udiv-do-while: +; CHECK-NEXT: [[TMP17:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP28]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[TMP34:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP31:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP30:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP36]], [[UDIV_PREHEADER]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP21:%.*]] = shl i129 [[TMP19]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i129 [[TMP20]], 128 +; CHECK-NEXT: [[TMP23:%.*]] = or i129 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = shl i129 [[TMP20]], 1 +; CHECK-NEXT: [[TMP25]] = or i129 [[TMP17]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = sub i129 2, [[TMP23]] +; CHECK-NEXT: [[TMP27:%.*]] = ashr i129 [[TMP26]], 128 +; CHECK-NEXT: [[TMP28]] = and i129 [[TMP27]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = and i129 [[TMP27]], 3 +; CHECK-NEXT: [[TMP30]] = sub i129 [[TMP23]], [[TMP29]] +; CHECK-NEXT: [[TMP31]] = add i129 [[TMP18]], -1 +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i129 [[TMP31]], 0 +; CHECK-NEXT: br i1 [[TMP32]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]] +; CHECK: udiv-preheader: +; CHECK-NEXT: [[TMP33]] = lshr i129 [[TMP2]], [[TMP34]] +; CHECK-NEXT: br label [[UDIV_DO_WHILE]] +; CHECK: udiv-bb1: +; CHECK-NEXT: [[TMP34]] = add i129 [[TMP7]], 1 +; CHECK-NEXT: [[TMP35:%.*]] = sub i129 128, [[TMP7]] +; CHECK-NEXT: [[TMP36]] = shl i129 [[TMP2]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i129 [[TMP34]], 0 +; CHECK-NEXT: br i1 [[TMP37]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]] +; CHECK: udiv-end: +; CHECK-NEXT: [[TMP38:%.*]] = phi i129 [ [[TMP16]], [[UDIV_LOOP_EXIT]] ], [ [[TMP11]], [[_UDIV_SPECIAL_CASES:%.*]] ] +; CHECK-NEXT: [[TMP39:%.*]] = mul i129 3, [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = sub i129 [[TMP2]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = xor i129 [[TMP40]], [[TMP0]] +; CHECK-NEXT: [[TMP42:%.*]] = sub i129 [[TMP41]], [[TMP0]] +; CHECK-NEXT: store i129 [[TMP42]], i129* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %a = load i129, i129* %ptr + %res = srem i129 %a, 3 + store i129 %res, i129* %out + ret void +} diff --git a/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -expandlargedivrem < %s | FileCheck %s + +define void @test(i129* %ptr, i129* %out) nounwind { +; CHECK-LABEL: @test( +; CHECK-NEXT: _udiv-special-cases: +; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = or i1 false, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = sub i129 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i129 [[TMP4]], 128 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i129 [[TMP4]], 128 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], i129 0, i129 [[A]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP9]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]] +; CHECK: udiv-loop-exit: +; CHECK-NEXT: [[TMP10:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_BB1]] ], [ [[TMP22:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP12:%.*]] = shl i129 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = or i129 [[TMP10]], [[TMP12]] +; CHECK-NEXT: br label [[UDIV_END]] +; CHECK: udiv-do-while: +; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i129 [ [[TMP31:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i129 [ [[TMP30:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP27:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i129 [ [[TMP33]], [[UDIV_PREHEADER]] ], [ [[TMP22]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP18:%.*]] = shl i129 [[TMP16]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = lshr i129 [[TMP17]], 128 +; CHECK-NEXT: [[TMP20:%.*]] = or i129 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = shl i129 [[TMP17]], 1 +; CHECK-NEXT: [[TMP22]] = or i129 [[TMP14]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = sub i129 2, [[TMP20]] +; CHECK-NEXT: [[TMP24:%.*]] = ashr i129 [[TMP23]], 128 +; CHECK-NEXT: [[TMP25]] = and i129 [[TMP24]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = and i129 [[TMP24]], 3 +; CHECK-NEXT: [[TMP27]] = sub i129 [[TMP20]], [[TMP26]] +; CHECK-NEXT: [[TMP28]] = add i129 [[TMP15]], -1 +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i129 [[TMP28]], 0 +; CHECK-NEXT: br i1 [[TMP29]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]] +; CHECK: udiv-preheader: +; CHECK-NEXT: [[TMP30]] = lshr i129 [[A]], [[TMP31]] +; CHECK-NEXT: br label [[UDIV_DO_WHILE]] +; CHECK: udiv-bb1: +; CHECK-NEXT: [[TMP31]] = add i129 [[TMP4]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = sub i129 128, [[TMP4]] +; CHECK-NEXT: [[TMP33]] = shl i129 [[A]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i129 [[TMP31]], 0 +; CHECK-NEXT: br i1 [[TMP34]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]] +; CHECK: udiv-end: +; CHECK-NEXT: [[TMP35:%.*]] = phi i129 [ [[TMP13]], [[UDIV_LOOP_EXIT]] ], [ [[TMP8]], [[_UDIV_SPECIAL_CASES:%.*]] ] +; CHECK-NEXT: store i129 [[TMP35]], i129* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %a = load i129, i129* %ptr + %res = udiv i129 %a, 3 + store i129 %res, i129* %out + ret void +} diff --git a/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -expandlargedivrem < %s | FileCheck %s + +define void @test(i129* %ptr, i129* %out) nounwind { +; CHECK-LABEL: @test( +; CHECK-NEXT: _udiv-special-cases: +; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = or i1 false, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = sub i129 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i129 [[TMP4]], 128 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i129 [[TMP4]], 128 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], i129 0, i129 [[A]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP9]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]] +; CHECK: udiv-loop-exit: +; CHECK-NEXT: [[TMP10:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_BB1]] ], [ [[TMP22:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP12:%.*]] = shl i129 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = or i129 [[TMP10]], [[TMP12]] +; CHECK-NEXT: br label [[UDIV_END]] +; CHECK: udiv-do-while: +; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i129 [ [[TMP31:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i129 [ [[TMP30:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP27:%.*]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i129 [ [[TMP33]], [[UDIV_PREHEADER]] ], [ [[TMP22]], [[UDIV_DO_WHILE]] ] +; CHECK-NEXT: [[TMP18:%.*]] = shl i129 [[TMP16]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = lshr i129 [[TMP17]], 128 +; CHECK-NEXT: [[TMP20:%.*]] = or i129 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = shl i129 [[TMP17]], 1 +; CHECK-NEXT: [[TMP22]] = or i129 [[TMP14]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = sub i129 2, [[TMP20]] +; CHECK-NEXT: [[TMP24:%.*]] = ashr i129 [[TMP23]], 128 +; CHECK-NEXT: [[TMP25]] = and i129 [[TMP24]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = and i129 [[TMP24]], 3 +; CHECK-NEXT: [[TMP27]] = sub i129 [[TMP20]], [[TMP26]] +; CHECK-NEXT: [[TMP28]] = add i129 [[TMP15]], -1 +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i129 [[TMP28]], 0 +; CHECK-NEXT: br i1 [[TMP29]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]] +; CHECK: udiv-preheader: +; CHECK-NEXT: [[TMP30]] = lshr i129 [[A]], [[TMP31]] +; CHECK-NEXT: br label [[UDIV_DO_WHILE]] +; CHECK: udiv-bb1: +; CHECK-NEXT: [[TMP31]] = add i129 [[TMP4]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = sub i129 128, [[TMP4]] +; CHECK-NEXT: [[TMP33]] = shl i129 [[A]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i129 [[TMP31]], 0 +; CHECK-NEXT: br i1 [[TMP34]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]] +; CHECK: udiv-end: +; CHECK-NEXT: [[TMP35:%.*]] = phi i129 [ [[TMP13]], [[UDIV_LOOP_EXIT]] ], [ [[TMP8]], [[_UDIV_SPECIAL_CASES:%.*]] ] +; CHECK-NEXT: [[TMP36:%.*]] = mul i129 3, [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = sub i129 [[A]], [[TMP36]] +; CHECK-NEXT: store i129 [[TMP37]], i129* [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; + %a = load i129, i129* %ptr + %res = urem i129 %a, 3 + store i129 %res, i129* %out + ret void +} diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -493,7 +493,7 @@ "replace-with-veclib", "jmc-instrument", "dot-regions", "dot-regions-only", "view-regions", "view-regions-only", - "select-optimize"}; + "select-optimize", "expandlargedivrem"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -542,6 +542,7 @@ initializeTarget(Registry); // For codegen passes, only passes that do IR to IR transformation are // supported. + initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry);