Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -569,37 +569,6 @@ } return false; // Don't fall through, will infinitely loop. case ISD::AND: - // If the RHS is a constant, check to see if the LHS would be zero without - // using the bits from the RHS. Below, we use knowledge about the RHS to - // simplify the LHS, here we're using information from the LHS to simplify - // the RHS. - if (ConstantSDNode *RHSC = dyn_cast(Op.getOperand(1))) { - SDValue Op0 = Op.getOperand(0); - APInt LHSZero, LHSOne; - // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth); - // If the LHS already has zeros where RHSC does, this and is dead. - if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) - return TLO.CombineTo(Op, Op0); - - // If any of the set bits in the RHS are known zero on the LHS, shrink - // the constant. - if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) - return true; - - // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its - // constant, but if this 'and' is only clearing bits that were just set by - // the xor, then this 'and' can be eliminated by shrinking the mask of - // the xor. For example, for a 32-bit X: - // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 - if (isBitwiseNot(Op0) && Op0.hasOneUse() && - LHSOne == ~RHSC->getAPIntValue()) { - SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), - Op0.getOperand(0), Op.getOperand(1)); - return TLO.CombineTo(Op, Xor); - } - } - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; @@ -625,6 +594,21 @@ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; + if (ConstantSDNode *RHSC = dyn_cast(Op.getOperand(1))) { + SDValue Op0 = Op.getOperand(0); + // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its + // constant, but if this 'and' is only clearing bits that were just set by + // the xor, then this 'and' can be eliminated by shrinking the mask of + // the xor. For example, for a 32-bit X: + // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 + if (isBitwiseNot(Op0) && Op0.hasOneUse() && + KnownOne2 == ~RHSC->getAPIntValue()) { + SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), + Op0.getOperand(0), Op.getOperand(1)); + return TLO.CombineTo(Op, Xor); + } + } + // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. Index: test/CodeGen/AArch64/fast-isel-select.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-select.ll +++ test/CodeGen/AArch64/fast-isel-select.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SLOWISEL +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK -check-prefix=FASTISEL ; First test the different supported value types for select. define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) { @@ -294,9 +294,14 @@ } define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) { -; CHECK-LABEL: select_opt2 -; CHECK: eor [[REG:w[0-9]+]], w0, #0x1 -; CHECK: orr {{w[0-9]+}}, [[REG]], w1 +; SLOWISEL-LABEL: select_opt2 +; SLOWISEL: orn [[REG:w[0-9]+]], w1, w0 +; SLOWISEL: and {{w[0-9]+}}, [[REG]], #0x1 +; +; FASTISEL-LABEL: select_opt2 +; FASTISEL: eor [[REG:w[0-9]+]], w0, #0x1 +; FASTISEL: orr [[REG2:w[0-9]+]], [[REG]], w1 +; FASTISEL: and {{w[0-9]+}}, [[REG2]], #0x1 %1 = select i1 %c, i1 %a, i1 true ret i1 %1 } Index: test/CodeGen/AMDGPU/fneg.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg.f16.ll +++ test/CodeGen/AMDGPU/fneg.f16.ll @@ -64,8 +64,9 @@ ; GCN-LABEL: {{^}}s_fneg_v2f16: ; CI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} ; CI: v_xor_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} -; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} ; CI: v_xor_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} ; CI: v_or_b32_e32 ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x8000{{$}} Index: test/CodeGen/PowerPC/rlwimi-and.ll =================================================================== --- test/CodeGen/PowerPC/rlwimi-and.ll +++ test/CodeGen/PowerPC/rlwimi-and.ll @@ -29,8 +29,8 @@ unreachable ; CHECK: @test -; CHECK: clrlwi [[R1:[0-9]+]], {{[0-9]+}}, 31 -; CHECK: rlwimi [[R1]], {{[0-9]+}}, 8, 23, 23 +; CHECK: rlwimi [[R1:[0-9]+]], {{[0-9]+}}, 8, 16, 23 +; CHECK: andi. {{[0-9]+}}, [[R1]], 257 codeRepl29: ; preds = %codeRepl1 unreachable