Index: lib/Transforms/Utils/BypassSlowDivision.cpp =================================================================== --- lib/Transforms/Utils/BypassSlowDivision.cpp +++ lib/Transforms/Utils/BypassSlowDivision.cpp @@ -100,6 +100,7 @@ IntegerType *BypassType = nullptr; BasicBlock *MainBB = nullptr; + bool isHashLikeValue(Value *V, int Depth = 0); ValueRange getValueRange(Value *Op); QuotRemWithBB createSlowBB(BasicBlock *Successor); QuotRemWithBB createFastBB(BasicBlock *Successor); @@ -187,6 +188,58 @@ return isDivisionOp() ? Value.Quotient : Value.Remainder; } +/// \brief Check if a value looks like a hash. +/// +/// The routine is expected to detect values computed using the most common hash +/// algorithms. Typically, hash computations end with one of the following +/// instructions: +/// +/// 1) MUL with a constant wider than BypassType +/// 2) XOR instruction +/// +/// And even if we are wrong and the value is not a hash, it is still quite +/// unlikely that such values will fit into BypassType. +/// +/// To detect string hash algorithms like FNV we have to look through up to 2 +/// levels of PHI-nodes (if the loop is unrolled) for suspicious MUL and XOR +/// instructions. +bool FastDivInsertionTask::isHashLikeValue(Value *V, int Depth) { + if (Depth++ > 2) + return false; + + Instruction *I = dyn_cast(V); + if (!I) + return false; + + switch (I->getOpcode()) { + case Instruction::Xor: + return true; + case Instruction::Mul: { + // After Constant Hoisting pass, long constants may be represented as + // bitcast instructions. As a result, some constants may look like an + // instruction at first, and an additional check is necessary to find out if + // an operand is actually a constant. + Value *Op1 = I->getOperand(1); + ConstantInt *C = dyn_cast(Op1); + if (!C && isa(Op1)) + C = dyn_cast(cast(Op1)->getOperand(0)); + if (C && C->getValue().getMinSignedBits() > BypassType->getBitWidth()) + return true; + break; + } + case Instruction::PHI: { + const PHINode *P = cast(I); + int NumVal = P->getNumIncomingValues(); + for (int i = 0; i < NumVal; ++i) + if (isHashLikeValue(P->getIncomingValue(i), Depth)) + return true; + break; + } + } + + return false; +} + /// Check if an integer value fits into our bypass type. ValueRange FastDivInsertionTask::getValueRange(Value *V) { unsigned ShortLen = BypassType->getBitWidth(); @@ -206,6 +259,13 @@ if (Ones.countLeadingZeros() < HiBits) return VALRNG_LONG; + // Long integer divisions are often used in hashtable implementations. It's + // not worth bypassing such divisions because hash values are extremely + // unlikely to have enough leading zeros. The call below is supposed to detect + // values that are unlikely to fit BypassType (including hashes). + if (isHashLikeValue(V)) + return VALRNG_LONG; + return VALRNG_UNKNOWN; } Index: test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-64.ll +++ test/CodeGen/X86/bypass-slow-division-64.ll @@ -161,3 +161,46 @@ %res = add i64 %div, %rem ret i64 %res } + +; Do not bypass a division if one of the operands looks like a hash value. +define i64 @Test_dont_bypass_xor(i64 %a, i64 %b, i64 %l) nounwind { +; CHECK-LABEL: Test_dont_bypass_xor +; CHECK-NOT: divl + %c = xor i64 %a, %b + %res = udiv i64 %c, %l + ret i64 %res +} + +define i64 @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l) nounwind { +; CHECK-LABEL: Test_dont_bypass_phi_xor +; CHECK-NOT: divl +entry: + %cmp = icmp eq i64 %b, 0 + br i1 %cmp, label %merge, label %xorpath + +xorpath: + %c = xor i64 %a, %b + br label %merge + +merge: + %e = phi i64 [ 0, %entry ], [ %c, %xorpath ] + %res = sdiv i64 %e, %l + ret i64 %res +} + +define i64 @Test_dont_bypass_mul_long_const(i64 %a, i64 %l) nounwind { +; CHECK-LABEL: Test_dont_bypass_mul_long_const +; CHECK-NOT: divl + %c = mul i64 %a, 5229553307 ; the constant doesn't fit 32 bits + %res = urem i64 %c, %l + ret i64 %res +} + +define i64 @Test_bypass_mul_short_const(i64 %a, i64 %l) nounwind { +; CHECK-LABEL: Test_bypass_mul_short_const +; CHECK-DAG: divl +; CHECK-DAG: divq + %c = mul i64 %a, -42 + %res = urem i64 %c, %l + ret i64 %res +} Index: test/CodeGen/X86/bypass-slow-division-fnv.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/bypass-slow-division-fnv.ll @@ -0,0 +1,67 @@ +; Check that division is not bypassed for hash-like values in optimized code. +; RUN: opt < %s -O3 -unroll-count=4 | llc -mattr=+idivq-to-divl | FileCheck %s +; CHECK-NOT: divl + +target triple = "x86_64-unknown-linux-gnu" + +; Fowler–Noll–Vo hash functions. +; +; A simple but widely used hash algorithm with hash operations hidden inside a +; (probably) unrolled loop. The test is supposed to verify that such code is +; recognized even after running loop unrolling on it and any other applicable +; optimizations. + +define i64 @fnv_1(i8* %str, i64 %len, i64 %tsz) nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %hash.0 = phi i64 [ -3750763034362895579, %entry ], [ %xor, %for.inc ] + %cmp = icmp ult i64 %i.0, %len + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %mul = mul i64 %hash.0, 1099511628211 + %arrayidx = getelementptr inbounds i8, i8* %str, i64 %i.0 + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i64 + %xor = xor i64 %mul, %conv + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %rem = urem i64 %hash.0, %tsz + ret i64 %rem +} + + +define i64 @fnv_1a(i8* %str, i64 %len, i64 %tsz) nounwind { + entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %hash.0 = phi i64 [ -3750763034362895579, %entry ], [ %mul, %for.inc ] + %cmp = icmp ult i64 %i.0, %len + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i8, i8* %str, i64 %i.0 + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i64 + %xor = xor i64 %hash.0, %conv + %mul = mul i64 %xor, 1099511628211 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %rem = urem i64 %hash.0, %tsz + ret i64 %rem +}