Index: test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-scalar.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-scalar.ll @@ -0,0 +1,205 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; If we have a masked merge, in the form of: (M is not constant) +; ((x ^ y) & ~M) ^ y +; We can drop de-invert the M: +; ((x ^ y) & M) ^ x + +define i4 @scalar (i4 %x, i4 %y, i4 %m) { +; CHECK-LABEL: @scalar( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, %im + %r = xor i4 %n1, %y + ret i4 %r +} + +; ============================================================================ ; +; Commutativity +; ============================================================================ ; + +; Used to make sure that the IR complexity sorting does not interfere. +declare i4 @gen4() + +; FIXME: should the %n1 = and i4 %im, %n0 swapped order pattern be tested? + +define i4 @c_1_0_0 (i4 %x, i4 %y, i4 %m) { +; CHECK-LABEL: @c_1_0_0( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %n0 = xor i4 %y, %x ; swapped order + %n1 = and i4 %n0, %im + %r = xor i4 %n1, %y + ret i4 %r +} + +define i4 @c_0_1_0 (i4 %x, i4 %y, i4 %m) { +; CHECK-LABEL: @c_0_1_0( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, %im + %r = xor i4 %n1, %x ; %x instead of %y + ret i4 %r +} + +define i4 @c_0_0_1 (i4 %m) { +; CHECK-LABEL: @c_0_0_1( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %x = call i4 @gen4() + %y = call i4 @gen4() + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, %im + %r = xor i4 %y, %n1 ; swapped order + ret i4 %r +} + +define i4 @c_1_1_0 (i4 %x, i4 %y, i4 %m) { +; CHECK-LABEL: @c_1_1_0( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[X]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %n0 = xor i4 %y, %x ; swapped order + %n1 = and i4 %n0, %im + %r = xor i4 %n1, %x ; %x instead of %y + ret i4 %r +} + +define i4 @c_1_0_1 (i4 %x, i4 %m) { +; CHECK-LABEL: @c_1_0_1( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[Y]], [[N1]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %y = call i4 @gen4() + %n0 = xor i4 %y, %x ; swapped order + %n1 = and i4 %n0, %im + %r = xor i4 %y, %n1 ; swapped order + ret i4 %r +} + +define i4 @c_0_1_1 (i4 %y, i4 %m) { +; CHECK-LABEL: @c_0_1_1( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %x = call i4 @gen4() + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, %im + %r = xor i4 %x, %n1 ; swapped order, %x instead of %y + ret i4 %r +} + +define i4 @c_1_1_1 (i4 %m) { +; CHECK-LABEL: @c_1_1_1( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[X:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[Y:%.*]] = call i4 @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[Y]], [[X]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[X]], [[N1]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %x = call i4 @gen4() + %y = call i4 @gen4() + %n0 = xor i4 %y, %x ; swapped order + %n1 = and i4 %n0, %im + %r = xor i4 %x, %n1 ; swapped order, %x instead of %y + ret i4 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; One use only. + +declare void @use4(i4) + +define i4 @n_oneuse (i4 %x, i4 %y, i4 %m) { +; CHECK-LABEL: @n_oneuse( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: call void @use4(i4 [[N1]]) +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, %im ; two uses of %n1, which is going to be replaced + %r = xor i4 %n1, %y + call void @use4(i4 %n1) + ret i4 %r +} + +; Some third variable is used + +define i4 @n_third_var (i4 %x, i4 %y, i4 %z, i4 %m) { +; CHECK-LABEL: @n_third_var( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], -1 +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Z:%.*]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, -1 + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, %im + %r = xor i4 %n1, %z ; not %x or %y + ret i4 %r +} + +define i4 @n_badxor (i4 %x, i4 %y, i4 %m) { +; CHECK-LABEL: @n_badxor( +; CHECK-NEXT: [[IM:%.*]] = xor i4 [[M:%.*]], 1 +; CHECK-NEXT: [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and i4 [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[N1]], [[Y]] +; CHECK-NEXT: ret i4 [[R]] +; + %im = xor i4 %m, 1 ; not -1 + %n0 = xor i4 %x, %y + %n1 = and i4 %n0, %im + %r = xor i4 %n1, %y + ret i4 %r +} Index: test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; If we have a masked merge, in the form of: (M is not constant) +; ((x ^ y) & ~M) ^ y +; We can drop de-invert the M: +; ((x ^ y) & M) ^ x + +define <2 x i4> @vector (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @vector( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %n1, %y + ret <2 x i4> %r +} + +define <3 x i4> @vector_undef (<3 x i4> %x, <3 x i4> %y, <3 x i4> %m) { +; CHECK-LABEL: @vector_undef( +; CHECK-NEXT: [[IM:%.*]] = xor <3 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <3 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <3 x i4> [[R]] +; + %im = xor <3 x i4> %m, + %n0 = xor <3 x i4> %x, %y + %n1 = and <3 x i4> %n0, %im + %r = xor <3 x i4> %n1, %y + ret <3 x i4> %r +} + +; ============================================================================ ; +; Commutativity +; ============================================================================ ; + +; Used to make sure that the IR complexity sorting does not interfere. +declare <2 x i4> @gen4() + +; FIXME: should %n1 = and <2 x i4> %im, %n0 swapped order pattern be tested? + +define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @c_1_0_0( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %n0 = xor <2 x i4> %y, %x ; swapped order + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %n1, %y + ret <2 x i4> %r +} + +define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @c_0_1_0( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %n1, %x ; %x instead of %y + ret <2 x i4> %r +} + +define <2 x i4> @c_0_0_1 (<2 x i4> %m) { +; CHECK-LABEL: @c_0_0_1( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %x = call <2 x i4> @gen4() + %y = call <2 x i4> @gen4() + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %y, %n1 ; swapped order + ret <2 x i4> %r +} + +define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @c_1_1_0( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[X]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %n0 = xor <2 x i4> %y, %x ; swapped order + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %n1, %x ; %x instead of %y + ret <2 x i4> %r +} + +define <2 x i4> @c_1_0_1 (<2 x i4> %x, <2 x i4> %m) { +; CHECK-LABEL: @c_1_0_1( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[Y]], [[N1]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %y = call <2 x i4> @gen4() + %n0 = xor <2 x i4> %y, %x ; swapped order + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %y, %n1 ; swapped order + ret <2 x i4> %r +} + +define <2 x i4> @c_0_1_1 (<2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @c_0_1_1( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %x = call <2 x i4> @gen4() + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %x, %n1 ; swapped order, %x instead of %y + ret <2 x i4> %r +} + +define <2 x i4> @c_1_1_1 (<2 x i4> %m) { +; CHECK-LABEL: @c_1_1_1( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y]], [[X]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[X]], [[N1]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %x = call <2 x i4> @gen4() + %y = call <2 x i4> @gen4() + %n0 = xor <2 x i4> %y, %x ; swapped order + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %x, %n1 ; swapped order, %x instead of %y + ret <2 x i4> %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; One use only. + +declare void @use4(<2 x i4>) + +define <2 x i4> @n_oneuse (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @n_oneuse( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: call void @use4(<2 x i4> [[N1]]) +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %im ; two uses of %n1, which is going to be replaced + %r = xor <2 x i4> %n1, %y + call void @use4(<2 x i4> %n1) + ret <2 x i4> %r +} + +; Some third variable is used + +define <2 x i4> @n_third_var (<2 x i4> %x, <2 x i4> %y, <2 x i4> %z, <2 x i4> %m) { +; CHECK-LABEL: @n_third_var( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Z:%.*]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %im + %r = xor <2 x i4> %n1, %z ; not %x or %y + ret <2 x i4> %r +} + +; Bad xor + +define <2 x i4> @n_badxor_splat (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @n_badxor_splat( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, ; not -1 + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %im ; two uses of %n1, which is going to be replaced + %r = xor <2 x i4> %n1, %y + ret <2 x i4> %r +} + +define <2 x i4> @n_badxor (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { +; CHECK-LABEL: @n_badxor( +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %im = xor <2 x i4> %m, ; not -1 + %n0 = xor <2 x i4> %x, %y + %n1 = and <2 x i4> %n0, %im ; two uses of %n1, which is going to be replaced + %r = xor <2 x i4> %n1, %y + ret <2 x i4> %r +}