Index: test/Transforms/InstCombine/invert-const-mask-in-masked-merge-scalar.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/invert-const-mask-in-masked-merge-scalar.ll @@ -0,0 +1,436 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; If we have a masked merge, in the form of: (M is constant) +; ((x ^ y) & M) ^ y +; Invert it, if the inverted mask looks better +; ((x ^ y) & ~M) ^ x + +; This file, unlike vector.ll is kinda big. This allows to keep that file small. +; And to have a good test coverage, for all the combinations that could result +; in cyclic canonicalization, if the canon form is not well-defined. + +; Naming scheme: @t__ + +define i3 @t_3_1 (i3 %x, i3 %y) { +; CHECK-LABEL: @t_3_1( +; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: ret i3 [[R]] +; + %n0 = xor i3 %x, %y + %n1 = and i3 %n0, 1 + %r = xor i3 %n1, %y + ret i3 %r +} + +define i3 @t_3_2 (i3 %x, i3 %y) { +; CHECK-LABEL: @t_3_2( +; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], 2 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: ret i3 [[R]] +; + %n0 = xor i3 %x, %y + %n1 = and i3 %n0, 2 + %r = xor i3 %n1, %y + ret i3 %r +} + +define i3 @t_3_3 (i3 %x, i3 %y) { +; CHECK-LABEL: @t_3_3( +; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], 3 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: ret i3 [[R]] +; + %n0 = xor i3 %x, %y + %n1 = and i3 %n0, 3 + %r = xor i3 %n1, %y + ret i3 %r +} + +define i3 @t_3_4 (i3 %x, i3 %y) { +; CHECK-LABEL: @t_3_4( +; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], -4 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: ret i3 [[R]] +; + %n0 = xor i3 %x, %y + %n1 = and i3 %n0, -4 + %r = xor i3 %n1, %y + ret i3 %r +} + +define i3 @t_3_5 (i3 %x, i3 %y) { +; CHECK-LABEL: @t_3_5( +; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], -3 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: ret i3 [[R]] +; + %n0 = xor i3 %x, %y + %n1 = and i3 %n0, -3 + %r = xor i3 %n1, %y + ret i3 %r +} + +define i3 @t_3_6 (i3 %x, i3 %y) { +; CHECK-LABEL: @t_3_6( +; CHECK-NEXT: [[N0:%.*]] = xor i3 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i3 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i3 [[N1]], [[Y]] +; CHECK-NEXT: ret i3 [[R]] +; + %n0 = xor i3 %x, %y + %n1 = and i3 %n0, -2 + %r = xor i3 %n1, %y + ret i3 %r +} + +define i4 @t_4_1 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_1( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, 1 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_2 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_2( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, 2 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_3 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_3( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 3 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, 3 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_4 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_4( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 4 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, 4 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_5 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_5( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 5 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, 5 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_6 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_6( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 6 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, 6 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_7 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_7( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], 7 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, 7 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_8 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_8( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -8 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -8 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_9 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_9( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -7 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -7 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_10 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_10( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -6 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -6 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_11 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_11( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -5 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -5 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_12 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_12( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -4 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -4 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_13 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_13( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -3 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -3 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @t_4_14 (i4 %x, i4 %y) { +; CHECK-LABEL: @t_4_14( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -2 + %r = xor i4 %n1, %y + ret i4 %r +} + +; ============================================================================ ; +; Commutativity +; ============================================================================ ; + +; Used to make sure that the IR complexity sorting does not interfere. +declare i4 @gen4() + +define i4 @c_1_0_0 (i4 %x, i4 %y) { +; CHECK-LABEL: @c_1_0_0( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %y, %x ; swapped order + %n1 = and i4 %n0, -2 + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @c_0_1_0 (i4 %x, i4 %y) { +; CHECK-LABEL: @c_0_1_0( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -2 + %r = xor i4 %n1, %x ; %x instead of %y + ret i4 %r +} + +define i4 @c_0_0_1 () { +; CHECK-LABEL: @c_0_0_1( +; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: ret i4 [[R]] +; + %x = call i4 @gen4() + %y = call i4 @gen4() + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -2 + %r = xor i4 %y, %n1 ; swapped order + ret i4 %r +} + +define i4 @c_1_1_0 (i4 %x, i4 %y) { +; CHECK-LABEL: @c_1_1_0( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %y, %x ; swapped order + %n1 = and i4 %n0, -2 + %r = xor i4 %n1, %x ; %x instead of %y + ret i4 %r +} + +define i4 @c_1_0_1 (i4 %x) { +; CHECK-LABEL: @c_1_0_1( +; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: ret i4 [[R]] +; + %y = call i4 @gen4() + %n0 = xor i4 %y, %x ; swapped order + %n1 = and i4 %n0, -2 + %r = xor i4 %y, %n1 ; swapped order + ret i4 %r +} + +define i4 @c_0_1_1 (i4 %y) { +; CHECK-LABEL: @c_0_1_1( +; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: ret i4 [[R]] +; + %x = call i4 @gen4() + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -2 + %r = xor i4 %x, %n1 ; swapped order, %x instead of %y + ret i4 %r +} + +define i4 @c_1_1_1 () { +; CHECK-LABEL: @c_1_1_1( +; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: ret i4 [[R]] +; + %x = call i4 @gen4() + %y = call i4 @gen4() + %n0 = xor i4 %y, %x ; swapped order + %n1 = and i4 %n0, -2 + %r = xor i4 %x, %n1 ; swapped order, %x instead of %y + ret i4 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; One use only. + +declare void @use4(i4) + +define i4 @n_oneuse (i4 %x, i4 %y) { +; CHECK-LABEL: @n_oneuse( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: call void @use4(i4 [[N1]]) +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -2 ; two uses of %n1, which is going to be replaced + %r = xor i4 %n1, %y + call void @use4(i4 %n1) + ret i4 %r +} + +; Mask is not constant + +define i4 @n_var_mask (i4 %x, i4 %y, i4 %m) { +; CHECK-LABEL: @n_var_mask( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, %m + %r = xor i4 %n1, %y + ret i4 %r +} + +; Some third variable is used + +define i4 @n_third_var (i4 %x, i4 %y, i4 %z) { +; CHECK-LABEL: @n_third_var( +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], -2 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Z:%.*]] +; CHECK-NEXT: ret i4 [[R]] +; + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, -2 + %r = xor i4 %n1, %z ; not %x or %y + ret i4 %r +} Index: test/Transforms/InstCombine/invert-const-mask-in-masked-merge-vector.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/invert-const-mask-in-masked-merge-vector.ll @@ -0,0 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; If we have a masked merge, in the form of: (M is constant) +; ((x ^ y) & M) ^ y +; Invert it, if the inverted mask looks better +; ((x ^ y) & ~M) ^ x + +; In vector case, do inversion if there are more mask elements that are +; non-canonical, than canonical elements. Undef is considered canonical. + +; All the heavy lifting has already been done in scalar.ll testfile, +; so here we can simply test just a few simple cases. + +define <2 x i4> @splat (<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @splat( +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %n1, %y + ret <2 x i4> %r +} + +define <3 x i4> @splat_undef (<3 x i4> %x, <3 x i4> %y) { +; CHECK-LABEL: @splat_undef( +; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <3 x i4> [[R]] +; + %n0 = xor <3 x i4> %x, %y + %n1 = and <3 x i4> %n0, + %r = xor <3 x i4> %n1, %y + ret <3 x i4> %r +} + +define <3 x i4> @nonsplat (<3 x i4> %x, <3 x i4> %y) { +; CHECK-LABEL: @nonsplat( +; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <3 x i4> [[R]] +; + %n0 = xor <3 x i4> %x, %y + %n1 = and <3 x i4> %n0, ; 2 of 3 -> do invert + %r = xor <3 x i4> %n1, %y + ret <3 x i4> %r +} + +; ============================================================================ ; +; Commutativity +; ============================================================================ ; + +; Used to make sure that the IR complexity sorting does not interfere. +declare <2 x i4> @gen4() + +define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @c_1_0_0( +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %n0 = xor <2 x i4> %y, %x ; swapped order + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %n1, %y + ret <2 x i4> %r +} + +define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @c_0_1_0( +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %n1, %x ; %x instead of %y + ret <2 x i4> %r +} + +define <2 x i4> @c_0_0_1 () { +; CHECK-LABEL: @c_0_0_1( +; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %x = call <2 x i4> @gen4() + %y = call <2 x i4> @gen4() + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %y, %n1 ; swapped order + ret <2 x i4> %r +} + +define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @c_1_1_0( +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %n0 = xor <2 x i4> %y, %x ; swapped order + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %n1, %x ; %x instead of %y + ret <2 x i4> %r +} + +define <2 x i4> @c_1_0_1 (<2 x i4> %x) { +; CHECK-LABEL: @c_1_0_1( +; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %y = call <2 x i4> @gen4() + %n0 = xor <2 x i4> %y, %x ; swapped order + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %y, %n1 ; swapped order + ret <2 x i4> %r +} + +define <2 x i4> @c_0_1_1 (<2 x i4> %y) { +; CHECK-LABEL: @c_0_1_1( +; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %x = call <2 x i4> @gen4() + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %x, %n1 ; swapped order, %x instead of %y + ret <2 x i4> %r +} + +define <2 x i4> @c_1_1_1 () { +; CHECK-LABEL: @c_1_1_1( +; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %x = call <2 x i4> @gen4() + %y = call <2 x i4> @gen4() + %n0 = xor <2 x i4> %y, %x ; swapped order + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %x, %n1 ; swapped order, %x instead of %y + ret <2 x i4> %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; One use only. + +declare void @use4(<2 x i4>) + +define <2 x i4> @n_oneuse (<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @n_oneuse( +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: call void @use4(<2 x i4> [[N1]]) +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, ; two uses of %n1, which is going to be replaced + %r = xor <2 x i4> %n1, %y + call void @use4(<2 x i4> %n1) + ret <2 x i4> %r +} + +; Mask is not constant + +define <2 x i4> @n_var_mask (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @n_var_mask( +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %m + %r = xor <2 x i4> %n1, %y + ret <2 x i4> %r +} + +; Some third variable is used + +define <2 x i4> @n_third_var (<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { +; CHECK-LABEL: @n_third_var( +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Z:%.*]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, + %r = xor <2 x i4> %n1, %z ; not %x or %y + ret <2 x i4> %r +} + +; ============================================================================ ; +; Negative tests with non-splat vectors. Should not be folded. +; ============================================================================ ; + +define <2 x i4> @n_vec (<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @n_vec( +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, ; 1 of 2 -> don't invert + %r = xor <2 x i4> %n1, %y + ret <2 x i4> %r +} + +define <3 x i4> @n_vec_undef (<3 x i4> %x, <3 x i4> %y) { +; CHECK-LABEL: @n_vec_undef( +; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <3 x i4> [[R]] +; + %n0 = xor <3 x i4> %x, %y + %n1 = and <3 x i4> %n0, ; 1 of 3 -> don't invert + %r = xor <3 x i4> %n1, %y + ret <3 x i4> %r +} + +define <3 x i4> @n_vec_good (<3 x i4> %x, <3 x i4> %y) { +; CHECK-LABEL: @n_vec_good( +; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <3 x i4> [[R]] +; + %n0 = xor <3 x i4> %x, %y + %n1 = and <3 x i4> %n0, ; 1 of 3 -> don't invert + %r = xor <3 x i4> %n1, %y + ret <3 x i4> %r +}