Index: llvm/test/CodeGen/AArch64/sve-cmp-folds.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-cmp-folds.ll +++ llvm/test/CodeGen/AArch64/sve-cmp-folds.ll @@ -53,6 +53,129 @@ ret <vscale x 4 x i1> %not } +define <vscale x 16 x i8> @icmp_cnot_nxv16i8(<vscale x 16 x i8> %a) { +; CHECK-LABEL: icmp_cnot_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %mask = icmp eq <vscale x 16 x i8> %a, zeroinitializer + %zext = zext <vscale x 16 x i1> %mask to <vscale x 16 x i8> + ret <vscale x 16 x i8> %zext +} + +define <vscale x 8 x i16> @icmp_cnot_nxv8i16(<vscale x 8 x i16> %a) { +; CHECK-LABEL: icmp_cnot_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %mask = icmp eq <vscale x 8 x i16> %a, zeroinitializer + %zext = zext <vscale x 8 x i1> %mask to <vscale x 8 x i16> + ret <vscale x 8 x i16> %zext +} + +define <vscale x 4 x i32> @icmp_cnot_nxv4i32(<vscale x 4 x i32> %a) { +; CHECK-LABEL: icmp_cnot_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %mask = icmp eq <vscale x 4 x i32> %a, zeroinitializer + %zext = zext <vscale x 4 x i1> %mask to <vscale x 4 x i32> + ret <vscale x 4 x i32> %zext +} + +define <vscale x 2 x i64> @icmp_cnot_nxv2i64(<vscale x 2 x i64> %a) { +; CHECK-LABEL: icmp_cnot_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %mask = icmp eq <vscale x 2 x i64> %a, zeroinitializer + %zext = zext <vscale x 2 x i1> %mask to <vscale x 2 x i64> + ret <vscale x 2 x i64> %zext +} + +define <vscale x 2 x half> @fcmp_cnot_nxv2f16(<vscale x 2 x half> %a) { +; CHECK-LABEL: fcmp_cnot_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: ret + %mask = fcmp oeq <vscale x 2 x half> %a, zeroinitializer + %conv = uitofp <vscale x 2 x i1> %mask to <vscale x 2 x half> + ret <vscale x 2 x half> %conv +} + +define <vscale x 4 x half> @fcmp_cnot_nxv4f16(<vscale x 4 x half> %a) { +; CHECK-LABEL: fcmp_cnot_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %mask = fcmp oeq <vscale x 4 x half> %a, zeroinitializer + %conv = uitofp <vscale x 4 x i1> %mask to <vscale x 4 x half> + ret <vscale x 4 x half> %conv +} + +define <vscale x 8 x half> @fcmp_cnot_nxv8f16(<vscale x 8 x half> %a) { +; CHECK-LABEL: fcmp_cnot_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %mask = fcmp oeq <vscale x 8 x half> %a, zeroinitializer + %conv = uitofp <vscale x 8 x i1> %mask to <vscale x 8 x half> + ret <vscale x 8 x half> %conv +} + +define <vscale x 2 x float> @fcmp_cnot_nxv2f32(<vscale x 2 x float> %a) { +; CHECK-LABEL: fcmp_cnot_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: ret + %mask = fcmp oeq <vscale x 2 x float> %a, zeroinitializer + %conv = uitofp <vscale x 2 x i1> %mask to <vscale x 2 x float> + ret <vscale x 2 x float> %conv +} + +define <vscale x 4 x float> @fcmp_cnot_nxv4f32(<vscale x 4 x float> %a) { +; CHECK-LABEL: fcmp_cnot_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %mask = fcmp oeq <vscale x 4 x float> %a, zeroinitializer + %conv = uitofp <vscale x 4 x i1> %mask to <vscale x 4 x float> + ret <vscale x 4 x float> %conv +} + +define <vscale x 2 x double> @fcmp_cnot_nxv2f64(<vscale x 2 x double> %a) { +; CHECK-LABEL: fcmp_cnot_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ret + %mask = fcmp oeq <vscale x 2 x double> %a, zeroinitializer + %conv = uitofp <vscale x 2 x i1> %mask to <vscale x 2 x double> + ret <vscale x 2 x double> %conv +} + define i1 @foo_first(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: foo_first: ; CHECK: // %bb.0: