Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1527,20 +1527,25 @@ /// ccmp B, inv(CB), CA /// check for CB flags /// -/// In general we can create code for arbitrary "... (and (and A B) C)" -/// sequences. We can also implement some "or" expressions, because "(or A B)" -/// is equivalent to "not (and (not A) (not B))" and we can implement some -/// negation operations: -/// We can negate the results of a single comparison by inverting the flags -/// used when the predicate fails and inverting the flags tested in the next -/// instruction; We can also negate the results of the whole previous -/// conditional compare sequence by inverting the flags tested in the next -/// instruction. However there is no way to negate the result of a partial -/// sequence. +/// This allows us to implement chains of ADD operations with SETCC operands. +/// We can in some cases deal with NEG operations: +/// - We can implement (NEG SETCC) i.e. negating a single comparison by +/// negating the flags used in a CCMP/FCCMP operations. +/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations +/// by negating the flags we test for afterwards. i.e. +/// NEG (CMP CCMP CCCMP ...) can be implemented. +/// - Note that we can only ever negate all previously processed results. +/// What we can not implement by flipping the flags to test is a negation +/// of two sub-trees (because the negation affects all sub-trees emitted so +/// far, so the 2nd sub-tree we emit would also affect the first). +/// With those tools we can implement some OR operations: +/// - (OR (SETCC A) (SETCC B)) can be implemented via: +/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B))) +/// - After transforming OR to NEG/AND combinations we may be able to use NEG +/// elimination rules from earlier to implement the whole thing as a +/// CCMP/FCCMP chain. /// -/// Therefore on encountering an "or" expression we can negate the subtree on -/// one side and have to be able to push the negate to the leafs of the subtree -/// on the other side (see also the comments in code). As complete example: +/// As complete example: /// "or (or (setCA (cmp A)) (setCB (cmp B))) /// (and (setCC (cmp C)) (setCD (cmp D)))" /// is transformed to @@ -1552,8 +1557,11 @@ /// ccmp A, CA, inv(CD) /// ccmp B, CB, inv(CA) /// check for CB flags -/// A counterexample is "or (and A B) (and C D)" which cannot be implemented -/// by conditional compare sequences. +/// +/// A counterexample is "or (and A B) (and C D)" which translates to +/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we +/// can only implement 1 of the inner (not) operations, by implementing them +/// but not both! /// @{ /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate. @@ -1593,9 +1601,12 @@ /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be /// expressed as a conjunction. See \ref AArch64CCMP. -/// \param CanNegate Set to true if we can also emit the negation of the -/// tree as a conjunction. +/// \param CanNegate Set to true if we can also emit the negation of the tree +/// as a conjunction. +/// \param MustBeFirst Set to true if we must emit this subtree first because +/// we negate the condition to test. static bool canEmitConjunction(const SDValue Val, bool &CanNegate, + bool &MustBeFirst, bool WillNegate, unsigned Depth = 0) { if (!Val.hasOneUse()) return false; @@ -1604,19 +1615,26 @@ if (Val->getOperand(0).getValueType() == MVT::f128) return false; CanNegate = true; + MustBeFirst = false; return true; } // Protect against exponential runtime and stack overflow. if (Depth > 6) return false; if (Opcode == ISD::AND || Opcode == ISD::OR) { + bool IsOR = Opcode == ISD::OR; SDValue O0 = Val->getOperand(0); SDValue O1 = Val->getOperand(1); bool CanNegateL; - if (!canEmitConjunction(O0, CanNegateL, Depth+1)) + bool MustBeFirstL; + if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) return false; bool CanNegateR; - if (!canEmitConjunction(O1, CanNegateR, Depth+1)) + bool MustBeFirstR; + if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) + return false; + + if (MustBeFirstL && MustBeFirstR) return false; if (Opcode == ISD::OR) { @@ -1630,16 +1648,15 @@ // (and (not x) (not y)) // to eliminate the outer negation. CanNegate = CanNegateL && CanNegateR; + // Usually we need to negate the result of an OR so it must come first, + // however if we can negate the operands for free and the OR itself will + // get negated (because it is the operand of another or) then we don't + // need to be first. + MustBeFirst = !WillNegate || !CanNegateL || !CanNegateR; } else { - // If the operands are OR expressions then we finally need to negate their - // outputs, we can only do that for the operand with emitted last by - // negating OutCC, not for both operands. - bool NeedsNegOutL = O0->getOpcode() == ISD::OR; - bool NeedsNegOutR = O1->getOpcode() == ISD::OR; - if (NeedsNegOutL && NeedsNegOutR) - return false; // We cannot negate an AND operation. CanNegate = false; + MustBeFirst = MustBeFirstL || MustBeFirstR; } return true; } @@ -1697,61 +1714,69 @@ return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, DAG); } - assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && - "Valid conjunction/disjunction tree"); + assert(Val->hasOneUse() && "Valid conjunction/disjunction tree"); - // Check if both sides can be transformed. - SDValue LHS = Val->getOperand(0); - SDValue RHS = Val->getOperand(1); - - // In case of an OR we need to negate our operands and the result. - // (A v B) <=> not(not(A) ^ not(B)) - bool NegateOpsAndResult = Opcode == ISD::OR; - // We can negate the results of all previous operations by inverting the - // predicate flags giving us a free negation for one side. The other side - // must be negatable by itself. - if (NegateOpsAndResult) { - // See which side we can negate. - bool CanNegateL; - bool isValidL = canEmitConjunction(LHS, CanNegateL); - assert(isValidL && "Valid conjunction/disjunction tree"); - (void)isValidL; + bool IsOR = Opcode == ISD::OR; -#ifndef NDEBUG - bool CanNegateR; - bool isValidR = canEmitConjunction(RHS, CanNegateR); - assert(isValidR && "Valid conjunction/disjunction tree"); - assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree"); -#endif + SDValue LHS = Val->getOperand(0); + bool CanNegateL; + bool MustBeFirstL; + bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); + assert(ValidL && "Valid conjunction/disjunction tree"); + (void)ValidL; - // Order the side which we cannot negate to RHS so we can emit it first. - if (!CanNegateL) + SDValue RHS = Val->getOperand(1); + bool CanNegateR; + bool MustBeFirstR; + bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); + assert(ValidR && "Valid conjunction/disjunction tree"); + (void)ValidR; + + // Swap subtree that needs the result negated to the right if necessary. + if (MustBeFirstL) { + assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); + std::swap(LHS, RHS); + std::swap(CanNegateL, CanNegateR); + std::swap(MustBeFirstL, MustBeFirstR); + } + + bool NegateR; + bool NegateAfterR; + bool NegateL; + bool NegateAfterAll; + if (Opcode == ISD::OR) { + // Swap negatable subtree to the left side if necessary. + if (!CanNegateL) { + assert(CanNegateR && "at least one side must be negatable"); + assert(!MustBeFirstR && "invalid conjunction/disjunction tree"); + assert(!Negate); std::swap(LHS, RHS); + NegateR = false; + NegateAfterR = true; + } else { + // Negate the left subtree if possible, otherwise negate the result. + NegateR = CanNegateR; + NegateAfterR = !CanNegateR; + } + NegateL = true; + NegateAfterAll = !Negate; } else { - bool NeedsNegOutL = LHS->getOpcode() == ISD::OR; - assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && - "Valid conjunction/disjunction tree"); - // Order the side where we need to negate the output flags to RHS so it - // gets emitted first. - if (NeedsNegOutL) - std::swap(LHS, RHS); + assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree"); + assert(!Negate && "Valid conjunction/disjunction tree"); + + NegateL = false; + NegateR = false; + NegateAfterR = false; + NegateAfterAll = false; } - // Emit RHS. If we want to negate the tree we only need to push a negate - // through if we are already in a PushNegate case, otherwise we can negate - // the "flags to test" afterwards. + // Emit subtrees. AArch64CC::CondCode RHSCC; - SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, Negate, - CCOp, Predicate); - if (NegateOpsAndResult && !Negate) + SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate); + if (NegateAfterR) RHSCC = AArch64CC::getInvertedCondCode(RHSCC); - // Emit LHS. We may need to negate it. - SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, - NegateOpsAndResult, CmpR, - RHSCC); - // If we transformed an OR to and AND then we have to negate the result - // (or absorb the Negate parameter). - if (NegateOpsAndResult && !Negate) + SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC); + if (NegateAfterAll) OutCC = AArch64CC::getInvertedCondCode(OutCC); return CmpL; } @@ -1763,7 +1788,8 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC) { bool DummyCanNegate; - if (!canEmitConjunction(Val, DummyCanNegate)) + bool DummyResultNeedsNeg; + if (!canEmitConjunction(Val, DummyCanNegate, DummyResultNeedsNeg, false)) return SDValue(); return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); Index: test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- test/CodeGen/AArch64/arm64-ccmp.ll +++ test/CodeGen/AArch64/arm64-ccmp.ll @@ -526,8 +526,8 @@ ; CHECK-LABEL: select_or_one_olt: ; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 -; CHECK-NEXT: fccmp d0, d1, #1, ne -; CHECK-NEXT: fccmp d2, d3, #8, vs +; CHECK-NEXT: fccmp d0, d1, #8, le +; CHECK-NEXT: fccmp d2, d3, #8, pl ; CHECK-NEXT: csel w0, w0, w1, mi ; CHECK-NEXT: ret define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { @@ -556,8 +556,8 @@ ; CHECK-LABEL: select_or_ueq_olt: ; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 -; CHECK-NEXT: fccmp d0, d1, #8, le -; CHECK-NEXT: fccmp d2, d3, #8, mi +; CHECK-NEXT: fccmp d0, d1, #1, ne +; CHECK-NEXT: fccmp d2, d3, #8, vc ; CHECK-NEXT: csel w0, w0, w1, mi ; CHECK-NEXT: ret define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { @@ -656,4 +656,68 @@ ret i32 %sel } +; This testcase resembles the core problem of http://llvm.org/PR39550 +; (an OR operation is 2 levels deep but needs to be implemented first) +; CHECK-LABEL: deep_or +; CHECK: cmp w2, #20 +; CHECK-NEXT: ccmp w2, #15, #4, ne +; CHECK-NEXT: ccmp w1, #0, #4, eq +; CHECK-NEXT: ccmp w0, #0, #4, ne +; CHECK-NEXT: csel w0, w4, w5, ne +; CHECK-NEXT: ret +define i32 @deep_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { + %c0 = icmp ne i32 %a0, 0 + %c1 = icmp ne i32 %a1, 0 + %c2 = icmp eq i32 %a2, 15 + %c3 = icmp eq i32 %a2, 20 + + %or = or i1 %c2, %c3 + %and0 = and i1 %or, %c1 + %and1 = and i1 %and0, %c0 + %sel = select i1 %and1, i32 %x, i32 %y + ret i32 %sel +} + +; Variation of deep_or, we still need to implement the OR first though. +; CHECK-LABEL: deep_or1 +; CHECK: cmp w2, #20 +; CHECK-NEXT: ccmp w2, #15, #4, ne +; CHECK-NEXT: ccmp w0, #0, #4, eq +; CHECK-NEXT: ccmp w1, #0, #4, ne +; CHECK-NEXT: csel w0, w4, w5, ne +; CHECK-NEXT: ret +define i32 @deep_or1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { + %c0 = icmp ne i32 %a0, 0 + %c1 = icmp ne i32 %a1, 0 + %c2 = icmp eq i32 %a2, 15 + %c3 = icmp eq i32 %a2, 20 + + %or = or i1 %c2, %c3 + %and0 = and i1 %c0, %or + %and1 = and i1 %and0, %c1 + %sel = select i1 %and1, i32 %x, i32 %y + ret i32 %sel +} + +; Variation of deep_or, we still need to implement the OR first though. +; CHECK-LABEL: deep_or2 +; CHECK: cmp w2, #20 +; CHECK-NEXT: ccmp w2, #15, #4, ne +; CHECK-NEXT: ccmp w1, #0, #4, eq +; CHECK-NEXT: ccmp w0, #0, #4, ne +; CHECK-NEXT: csel w0, w4, w5, ne +; CHECK-NEXT: ret +define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { + %c0 = icmp ne i32 %a0, 0 + %c1 = icmp ne i32 %a1, 0 + %c2 = icmp eq i32 %a2, 15 + %c3 = icmp eq i32 %a2, 20 + + %or = or i1 %c2, %c3 + %and0 = and i1 %c0, %c1 + %and1 = and i1 %and0, %or + %sel = select i1 %and1, i32 %x, i32 %y + ret i32 %sel +} + attributes #0 = { nounwind }