Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -571,37 +571,6 @@ } return false; // Don't fall through, will infinitely loop. case ISD::AND: - // If the RHS is a constant, check to see if the LHS would be zero without - // using the bits from the RHS. Below, we use knowledge about the RHS to - // simplify the LHS, here we're using information from the LHS to simplify - // the RHS. - if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) { - SDValue Op0 = Op.getOperand(0); - KnownBits LHSKnown; - // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); - // If the LHS already has zeros where RHSC does, this and is dead. - if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) - return TLO.CombineTo(Op, Op0); - - // If any of the set bits in the RHS are known zero on the LHS, shrink - // the constant. - if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO)) - return true; - - // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its - // constant, but if this 'and' is only clearing bits that were just set by - // the xor, then this 'and' can be eliminated by shrinking the mask of - // the xor. For example, for a 32-bit X: - // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 - if (isBitwiseNot(Op0) && Op0.hasOneUse() && - LHSKnown.One == ~RHSC->getAPIntValue()) { - SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), - Op0.getOperand(0), Op.getOperand(1)); - return TLO.CombineTo(Op, Xor); - } - } - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); @@ -626,6 +595,21 @@ if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; + if (ConstantSDNode *RHSC = dyn_cast(Op.getOperand(1))) { + SDValue Op0 = Op.getOperand(0); + // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its + // constant, but if this 'and' is only clearing bits that were just set by + // the xor, then this 'and' can be eliminated by shrinking the mask of + // the xor. For example, for a 32-bit X: + // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 + if (isBitwiseNot(Op0) && Op0.hasOneUse() && + Known2.One == ~RHSC->getAPIntValue()) { + SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), + Op0.getOperand(0), Op.getOperand(1)); + return TLO.CombineTo(Op, Xor); + } + } + // Output known-1 bits are only known if set in both the LHS & RHS. Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. Index: test/CodeGen/AArch64/fast-isel-select.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-select.ll +++ test/CodeGen/AArch64/fast-isel-select.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SLOWISEL +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK -check-prefix=FASTISEL ; First test the different supported value types for select. define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) { @@ -294,9 +294,14 @@ } define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) { -; CHECK-LABEL: select_opt2 -; CHECK: eor [[REG:w[0-9]+]], w0, #0x1 -; CHECK: orr {{w[0-9]+}}, [[REG]], w1 +; SLOWISEL-LABEL: select_opt2 +; SLOWISEL: orn [[REG:w[0-9]+]], w1, w0 +; SLOWISEL: and {{w[0-9]+}}, [[REG]], #0x1 +; +; FASTISEL-LABEL: select_opt2 +; FASTISEL: eor [[REG:w[0-9]+]], w0, #0x1 +; FASTISEL: orr [[REG2:w[0-9]+]], [[REG]], w1 +; FASTISEL: and {{w[0-9]+}}, [[REG2]], #0x1 %1 = select i1 %c, i1 %a, i1 true ret i1 %1 } Index: test/CodeGen/AMDGPU/fneg.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg.f16.ll +++ test/CodeGen/AMDGPU/fneg.f16.ll @@ -64,8 +64,9 @@ ; GCN-LABEL: {{^}}s_fneg_v2f16: ; CI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} ; CI: v_xor_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} -; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} ; CI: v_xor_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} ; CI: v_or_b32_e32 ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x8000{{$}} Index: test/CodeGen/PowerPC/rlwimi-and.ll =================================================================== --- test/CodeGen/PowerPC/rlwimi-and.ll +++ test/CodeGen/PowerPC/rlwimi-and.ll @@ -29,8 +29,8 @@ unreachable ; CHECK: @test -; CHECK: clrlwi [[R1:[0-9]+]], {{[0-9]+}}, 31 -; CHECK: rlwimi [[R1]], {{[0-9]+}}, 8, 23, 23 +; CHECK: rlwimi [[R1:[0-9]+]], {{[0-9]+}}, 8, 16, 23 +; CHECK: andi. {{[0-9]+}}, [[R1]], 257 codeRepl29: ; preds = %codeRepl1 unreachable Index: test/CodeGen/X86/combine-and.ll =================================================================== --- test/CodeGen/X86/combine-and.ll +++ test/CodeGen/X86/combine-and.ll @@ -254,6 +254,7 @@ ; CHECK-LABEL: ashr_mask1_v8i16: ; CHECK: # BB#0: ; CHECK-NEXT: psrlw $15, %xmm0 +; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %1 = ashr <8 x i16> %a0, %2 = and <8 x i16> %1,