Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5430,6 +5430,16 @@ } } + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) + if (N0.getOpcode() == ISD::AND && isBitwiseNot(N0.getOperand(1)) && + N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); + + // fold (or Y, (and X, (xor Y, -1))) -> (or Y, X) + if (N1.getOpcode() == ISD::AND && isBitwiseNot(N1.getOperand(1)) && + N1.getOperand(1).getOperand(0) == N0) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1.getOperand(0)); + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6922,6 +6922,10 @@ if (!BVN) return Op; + // Might be a freshly created and that hasn't been combined yet. + if (ISD::isBuildVectorAllOnes(BVN)) + return LHS; + APInt DefBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); if (resolveBuildVector(BVN, DefBits, UndefBits)) { Index: llvm/test/CodeGen/AArch64/sat-add.ll =================================================================== --- llvm/test/CodeGen/AArch64/sat-add.ll +++ llvm/test/CodeGen/AArch64/sat-add.ll @@ -365,7 +365,6 @@ ; CHECK-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <16 x i8> %x, @@ -382,7 +381,6 @@ ; CHECK-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.16b, v0.16b, v2.16b ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <16 x i8> %x, @@ -412,7 +410,6 @@ ; CHECK-NEXT: add v1.8h, v0.8h, v1.8h ; CHECK-NEXT: cmhi v0.8h, v0.8h, v1.8h ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <8 x i16> %x, @@ -429,7 +426,6 @@ ; CHECK-NEXT: add v1.8h, v0.8h, v1.8h ; CHECK-NEXT: cmhi v0.8h, v0.8h, v2.8h ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <8 x i16> %x, @@ -458,8 +454,6 @@ ; CHECK-NEXT: movi v1.4s, #42 ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <4 x i32> %x, @@ -475,8 +469,6 @@ ; CHECK-NEXT: mvni v2.4s, #42 ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <4 x i32> %x, @@ -509,8 +501,6 @@ ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <2 x i64> %x, @@ -528,8 +518,6 @@ ; CHECK-NEXT: dup v2.2d, x9 ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <2 x i64> %x, @@ -558,7 +546,6 @@ ; CHECK-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <16 x i8> %x, %y @@ -574,7 +561,6 @@ ; CHECK-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.16b, v0.16b, v2.16b ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %noty = xor <16 x i8> %y, @@ -604,7 +590,6 @@ ; CHECK-NEXT: add v1.8h, v0.8h, v1.8h ; CHECK-NEXT: cmhi v0.8h, v0.8h, v1.8h ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <8 x i16> %x, %y @@ -620,7 +605,6 @@ ; CHECK-NEXT: add v1.8h, v0.8h, v1.8h ; CHECK-NEXT: cmhi v0.8h, v0.8h, v2.8h ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %noty = xor <8 x i16> %y, @@ -649,8 +633,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <4 x i32> %x, %y @@ -665,8 +647,6 @@ ; CHECK-NEXT: mvn v2.16b, v1.16b ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %noty = xor <4 x i32> %y, @@ -696,8 +676,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a = add <2 x i64> %x, %y @@ -712,8 +690,6 @@ ; CHECK-NEXT: mvn v2.16b, v1.16b ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %noty = xor <2 x i64> %y, Index: llvm/test/CodeGen/AArch64/uadd_sat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -404,8 +404,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y) @@ -419,10 +417,6 @@ ; CHECK-NEXT: add v3.2d, v1.2d, v3.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d ; CHECK-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-NEXT: bic v2.16b, v2.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 -; CHECK-NEXT: bic v3.16b, v3.16b, v1.16b -; CHECK-NEXT: bic v1.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b ; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b ; CHECK-NEXT: ret @@ -441,14 +435,6 @@ ; CHECK-NEXT: cmhi v1.2d, v1.2d, v5.2d ; CHECK-NEXT: cmhi v2.2d, v2.2d, v6.2d ; CHECK-NEXT: cmhi v3.2d, v3.2d, v7.2d -; CHECK-NEXT: bic v4.16b, v4.16b, v0.16b -; CHECK-NEXT: bic v0.4s, #0 -; CHECK-NEXT: bic v5.16b, v5.16b, v1.16b -; CHECK-NEXT: bic v1.4s, #0 -; CHECK-NEXT: bic v6.16b, v6.16b, v2.16b -; CHECK-NEXT: bic v2.4s, #0 -; CHECK-NEXT: bic v7.16b, v7.16b, v3.16b -; CHECK-NEXT: bic v3.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v4.16b ; CHECK-NEXT: orr v1.16b, v1.16b, v5.16b ; CHECK-NEXT: orr v2.16b, v2.16b, v6.16b Index: llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll =================================================================== --- llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll +++ llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll @@ -132,8 +132,7 @@ define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: in_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b -; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b +; CHECK-NEXT: orr v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %n0 = xor <4 x i32> , %y ; %x %n1 = and <4 x i32> %n0, %mask Index: llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll =================================================================== --- llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -355,17 +355,14 @@ ; ; CHECK-SSE2-LABEL: in_constant_mone_vary: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1 -; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 -; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 -; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 +; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 +; CHECK-SSE2-NEXT: orps (%rsi), %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_mone_vary: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 -; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1 -; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 +; CHECK-XOP-NEXT: vorps (%rsi), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16