Index: llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp +++ llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -81,17 +82,18 @@ typedef DenseMap DivCacheTy; typedef DenseMap BypassWidthsTy; + typedef SmallPtrSet VisitedSetTy; } namespace { enum ValueRange { /// Operand definitely fits into BypassType. No runtime checks are needed. - VALRNG_SHORT, + VALRNG_KNOWN_SHORT, /// A runtime check is required, as value range is unknown. VALRNG_UNKNOWN, /// Operand is unlikely to fit into BypassType. The bypassing should be /// disabled. - VALRNG_LONG + VALRNG_LIKELY_LONG }; class FastDivInsertionTask { @@ -100,7 +102,8 @@ IntegerType *BypassType = nullptr; BasicBlock *MainBB = nullptr; - ValueRange getValueRange(Value *Op); + bool isHashLikeValue(Value *V, VisitedSetTy &Visited); + ValueRange getValueRange(Value *Op, VisitedSetTy &Visited); QuotRemWithBB createSlowBB(BasicBlock *Successor); QuotRemWithBB createFastBB(BasicBlock *Successor); QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, @@ -187,8 +190,65 @@ return isDivisionOp() ? Value.Quotient : Value.Remainder; } +/// \brief Check if a value looks like a hash. +/// +/// The routine is expected to detect values computed using the most common hash +/// algorithms. Typically, hash computations end with one of the following +/// instructions: +/// +/// 1) MUL with a constant wider than BypassType +/// 2) XOR instruction +/// +/// And even if we are wrong and the value is not a hash, it is still quite +/// unlikely that such values will fit into BypassType. +/// +/// To detect string hash algorithms like FNV we have to look through PHI-nodes. +/// It is implemented as a depth-first search for values that look neither long +/// nor hash-like. +bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { + Instruction *I = dyn_cast(V); + if (!I) + return false; + + switch (I->getOpcode()) { + case Instruction::Xor: + return true; + case Instruction::Mul: { + // After Constant Hoisting pass, long constants may be represented as + // bitcast instructions. As a result, some constants may look like an + // instruction at first, and an additional check is necessary to find out if + // an operand is actually a constant. + Value *Op1 = I->getOperand(1); + ConstantInt *C = dyn_cast(Op1); + if (!C && isa(Op1)) + C = dyn_cast(cast(Op1)->getOperand(0)); + return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); + } + case Instruction::PHI: { + // Stop IR traversal in case of a crazy input code. This limits recursion + // depth. + if (Visited.size() >= 16) + return false; + // Do not visit nodes that have been visited already. We return true because + // it means that we couldn't find any value that doesn't look hash-like. + if (Visited.find(I) != Visited.end()) + return true; + Visited.insert(I); + return llvm::all_of(cast(I)->incoming_values(), [&](Value *V) { + // Ignore undef values as they probably don't affect the division + // operands. + return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || + isa(V); + }); + } + default: + return false; + } +} + /// Check if an integer value fits into our bypass type. -ValueRange FastDivInsertionTask::getValueRange(Value *V) { +ValueRange FastDivInsertionTask::getValueRange(Value *V, + VisitedSetTy &Visited) { unsigned ShortLen = BypassType->getBitWidth(); unsigned LongLen = V->getType()->getIntegerBitWidth(); @@ -201,10 +261,17 @@ computeKnownBits(V, Zeros, Ones, DL); if (Zeros.countLeadingOnes() >= HiBits) - return VALRNG_SHORT; + return VALRNG_KNOWN_SHORT; if (Ones.countLeadingZeros() < HiBits) - return VALRNG_LONG; + return VALRNG_LIKELY_LONG; + + // Long integer divisions are often used in hashtable implementations. It's + // not worth bypassing such divisions because hash values are extremely + // unlikely to have enough leading zeros. The call below tries to detect + // values that are unlikely to fit BypassType (including hashes). + if (isHashLikeValue(V, Visited)) + return VALRNG_LIKELY_LONG; return VALRNG_UNKNOWN; } @@ -308,16 +375,18 @@ return None; } - ValueRange DividendRange = getValueRange(Dividend); - if (DividendRange == VALRNG_LONG) + VisitedSetTy SetL; + ValueRange DividendRange = getValueRange(Dividend, SetL); + if (DividendRange == VALRNG_LIKELY_LONG) return None; - ValueRange DivisorRange = getValueRange(Divisor); - if (DivisorRange == VALRNG_LONG) + VisitedSetTy SetR; + ValueRange DivisorRange = getValueRange(Divisor, SetR); + if (DivisorRange == VALRNG_LIKELY_LONG) return None; - bool DividendShort = (DividendRange == VALRNG_SHORT); - bool DivisorShort = (DivisorRange == VALRNG_SHORT); + bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); + bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); if (DividendShort && DivisorShort) { // If both operands are known to be short then just replace the long Index: llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll =================================================================== --- llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll +++ llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll @@ -93,3 +93,124 @@ store i64 %res, i64* %retptr ret void } + + +; Do not bypass a division if one of the operands looks like a hash value. +define void @Test_dont_bypass_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) { +; CHECK-LABEL: @Test_dont_bypass_xor( +; CHECK-NEXT: [[C:%.*]] = xor i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[RES:%.*]] = udiv i64 [[C]], [[L:%.*]] +; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]] +; CHECK-NEXT: ret void +; + %c = xor i64 %a, %b + %res = udiv i64 %c, %l + store i64 %res, i64* %retptr + ret void +} + +define void @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) { +; CHECK-LABEL: @Test_dont_bypass_phi_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[MERGE:%.*]], label [[XORPATH:%.*]] +; CHECK: xorpath: +; CHECK-NEXT: [[C:%.*]] = xor i64 [[A:%.*]], [[B]] +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[E:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[C]], [[XORPATH]] ] +; CHECK-NEXT: [[RES:%.*]] = sdiv i64 [[E]], [[L:%.*]] +; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp eq i64 %b, 0 + br i1 %cmp, label %merge, label %xorpath + +xorpath: + %c = xor i64 %a, %b + br label %merge + +merge: + %e = phi i64 [ undef, %entry ], [ %c, %xorpath ] + %res = sdiv i64 %e, %l + store i64 %res, i64* %retptr + ret void +} + +define void @Test_dont_bypass_mul_long_const(i64 %a, i64 %l, i64* %retptr) { +; CHECK-LABEL: @Test_dont_bypass_mul_long_const( +; CHECK-NEXT: [[C:%.*]] = mul i64 [[A:%.*]], 5229553307 +; CHECK-NEXT: [[RES:%.*]] = urem i64 [[C]], [[L:%.*]] +; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]] +; CHECK-NEXT: ret void +; + %c = mul i64 %a, 5229553307 ; the constant doesn't fit 32 bits + %res = urem i64 %c, %l + store i64 %res, i64* %retptr + ret void +} + +define void @Test_bypass_phi_mul_const(i64 %a, i64 %b, i64* %retptr) { +; CHECK-LABEL: @Test_bypass_phi_mul_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_MUL:%.*]] = mul nsw i64 [[A:%.*]], 34806414968801 +; CHECK-NEXT: [[P:%.*]] = icmp sgt i64 [[A]], [[B:%.*]] +; CHECK-NEXT: br i1 [[P]], label [[BRANCH:%.*]], label [[MERGE:%.*]] +; CHECK: branch: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[LHS:%.*]] = phi i64 [ 42, [[BRANCH]] ], [ [[A_MUL]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[LHS]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP8:%.*]] +; CHECK: [[TMP4:%.*]] = trunc i64 [[B]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[LHS]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: br label [[TMP10:%.*]] +; CHECK: [[TMP9:%.*]] = sdiv i64 [[LHS]], [[B]] +; CHECK-NEXT: br label [[TMP10]] +; CHECK: [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ] +; CHECK-NEXT: store i64 [[TMP11]], i64* [[RETPTR:%.*]] +; CHECK-NEXT: ret void +; +entry: + %a.mul = mul nsw i64 %a, 34806414968801 + %p = icmp sgt i64 %a, %b + br i1 %p, label %branch, label %merge + +branch: + br label %merge + +merge: + %lhs = phi i64 [ 42, %branch ], [ %a.mul, %entry ] + %res = sdiv i64 %lhs, %b + store i64 %res, i64* %retptr + ret void +} + +define void @Test_bypass_mul_short_const(i64 %a, i64 %l, i64* %retptr) { +; CHECK-LABEL: @Test_bypass_mul_short_const( +; CHECK-NEXT: [[C:%.*]] = mul i64 [[A:%.*]], -42 +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[C]], [[L:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]] +; CHECK: [[TMP5:%.*]] = trunc i64 [[L]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[C]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = urem i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +; CHECK-NEXT: br label [[TMP11:%.*]] +; CHECK: [[TMP10:%.*]] = urem i64 [[C]], [[L]] +; CHECK-NEXT: br label [[TMP11]] +; CHECK: [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ] +; CHECK-NEXT: store i64 [[TMP12]], i64* [[RETPTR:%.*]] +; CHECK-NEXT: ret void +; + %c = mul i64 %a, -42 + %res = urem i64 %c, %l + store i64 %res, i64* %retptr + ret void +}