Index: llvm/lib/Transforms/Utils/BypassSlowDivision.cpp =================================================================== --- llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -90,6 +90,11 @@ return false; } + // If the numerator is a constant, bail if it doesn't fit into BypassType. + if (ConstantInt *ConstDividend = dyn_cast(Dividend)) + if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) + return false; + // Basic Block is split before divide BasicBlock *MainBB = &*I->getParent(); BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); @@ -151,7 +156,17 @@ // Combine operands into a single value with OR for value testing below MainBB->getInstList().back().eraseFromParent(); IRBuilder<> MainBuilder(MainBB, MainBB->end()); - Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // We bailed out above that the divisor is not a constant, but the dividend + // may still be a constant. Set OrV to our non-constant operands OR'ed + // together. + assert(!isa(Divisor)); + + Value *OrV; + if (!isa(Dividend)) + OrV = MainBuilder.CreateOr(Dividend, Divisor); + else + OrV = Divisor; // BitMask is inverted to check if the operands are // larger than the bypass type Index: llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; When we bypass slow div with a constant numerator which fits into the bypass +; width, we still emit the bypass code, but we don't 'or' the numerator with +; the denominator. +; CHECK-LABEL: @small_constant_numer +define i64 @small_constant_numer(i64 %a) { + ; CHECK: [[AND:%[0-9]+]] = and i64 %a, -4294967296 + ; CHECK: icmp eq i64 [[AND]], 0 + + ; CHECK: [[TRUNC:%[0-9]+]] = trunc i64 %a to i32 + ; CHECK: udiv i32 -1, [[TRUNC]] + %d = sdiv i64 4294967295, %a ; 0xffff'ffff + ret i64 %d +} + +; When we try to bypass slow div with a constant numerator which *doesn't* fit +; into the bypass width, leave it as a plain 64-bit div with no bypass. +; CHECK-LABEL: @large_constant_numer +define i64 @large_constant_numer(i64 %a) { + ; CHECK-NOT: udiv i32 + %d = sdiv i64 4294967296, %a ; 0x1'0000'0000 + ret i64 %d +} + +; For good measure, try a value larger than 2^32. +; CHECK-LABEL: @larger_constant_numer +define i64 @larger_constant_numer(i64 %a) { + ; CHECK-NOT: udiv i32 + %d = sdiv i64 5000000000, %a + ret i64 %d +}