diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4174,6 +4174,21 @@ defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; +def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), + (CMLTv8i8rz V64:$Rn)>; +def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), + (CMLTv4i16rz V64:$Rn)>; +def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), + (CMLTv2i32rz V64:$Rn)>; +def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), + (CMLTv16i8rz V128:$Rn)>; +def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), + (CMLTv8i16rz V128:$Rn)>; +def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), + (CMLTv4i32rz V128:$Rn)>; +def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), + (CMLTv2i64rz V128:$Rn)>; + defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; @@ -4825,6 +4840,9 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; +def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), + (CMLTv1i64rz V64:$Rn)>; + def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), (FCVTASv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -143,7 +143,7 @@ ; CODE: bb.0 ; CODE-NEXT: ushll v{{.+}}.2d, v{{.+}}.2s, #0 ; CODE-NEXT: shl v{{.+}}.2d, v{{.+}}.2d, #63 -; CODE-NEXT: sshr v{{.+}}.2d, v{{.+}}.2d, #63 +; CODE-NEXT: cmlt v{{.+}}.2d, v{{.+}}.2d, #0 ; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b ; CODE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -348,8 +348,8 @@ ; CHECK-NEXT: mov.b v1[15], w8 ; CHECK-NEXT: shl.16b v0, v0, #7 ; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: sshr.16b v0, v0, #7 -; CHECK-NEXT: sshr.16b v1, v1, #7 +; CHECK-NEXT: cmlt.16b v0, v0, #0 +; CHECK-NEXT: cmlt.16b v1, v1, #0 ; CHECK-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -615,10 +615,10 @@ ; CHECK-NEXT: shl.16b v2, v2, #7 ; CHECK-NEXT: shl.16b v4, v1, #7 ; CHECK-NEXT: shl.16b v5, v0, #7 -; CHECK-NEXT: sshr.16b v0, v3, #7 -; CHECK-NEXT: sshr.16b v1, v2, #7 -; CHECK-NEXT: sshr.16b v2, v4, #7 -; CHECK-NEXT: sshr.16b v3, v5, #7 +; CHECK-NEXT: cmlt.16b v0, v3, #0 +; CHECK-NEXT: cmlt.16b v1, v2, #0 +; CHECK-NEXT: cmlt.16b v2, v4, #0 +; CHECK-NEXT: cmlt.16b v3, v5, #0 ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res diff --git a/llvm/test/CodeGen/AArch64/arm64-vshr.ll b/llvm/test/CodeGen/AArch64/arm64-vshr.ll --- a/llvm/test/CodeGen/AArch64/arm64-vshr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshr.ll @@ -48,7 +48,14 @@ define <1 x i64> @sshr_v1i64(<1 x i64> %A) nounwind { ; CHECK-LABEL: sshr_v1i64: -; CHECK: sshr d0, d0, #63 +; CHECK: sshr d0, d0, #42 + %tmp3 = ashr <1 x i64> %A, < i64 42 > + ret <1 x i64> %tmp3 +} + +define <1 x i64> @cmlt_v1i64(<1 x i64> %A) nounwind { +; CHECK-LABEL: cmlt_v1i64: +; CHECK: cmlt d0, d0, #0 %tmp3 = ashr <1 x i64> %A, < i64 63 > ret <1 x i64> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -115,7 +115,7 @@ ; CHECK-LABEL: sign_7xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <7 x i8> %a, @@ -127,7 +127,7 @@ ; CHECK-LABEL: sign_8xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <8 x i8> %a, @@ -139,7 +139,7 @@ ; CHECK-LABEL: sign_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.16b, #1 -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %c = icmp sgt <16 x i8> %a, @@ -150,7 +150,7 @@ define <3 x i32> @sign_3xi32(<3 x i32> %a) { ; CHECK-LABEL: sign_3xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: orr v0.4s, #1 ; CHECK-NEXT: ret %c = icmp sgt <3 x i32> %a, @@ -161,7 +161,7 @@ define <4 x i32> @sign_4xi32(<4 x i32> %a) { ; CHECK-LABEL: sign_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: orr v0.4s, #1 ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, @@ -177,7 +177,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-NEXT: sshr v2.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v2.4s, #1 ; CHECK-NEXT: xtn v0.4h, v0.4s @@ -214,7 +214,7 @@ ; CHECK-LABEL: not_sign_4xi32_2: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll --- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -19,7 +19,7 @@ ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: umov w3, v0.h[3] ; CHECK-NEXT: b foo diff --git a/llvm/test/CodeGen/AArch64/div_minsize.ll b/llvm/test/CodeGen/AArch64/div_minsize.ll --- a/llvm/test/CodeGen/AArch64/div_minsize.ll +++ b/llvm/test/CodeGen/AArch64/div_minsize.ll @@ -35,7 +35,7 @@ define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { entry: ; CHECK: sdiv_vec8x16_minsize -; CHECK: sshr v1.8h, v0.8h, #15 +; CHECK: cmlt v1.8h, v0.8h, #0 ; CHECK: usra v0.8h, v1.8h, #11 ; CHECK: sshr v0.8h, v0.8h, #5 ; CHECK: ret diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -167,7 +167,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: movi v1.16b, #128 -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl = select <16 x i1> %t, <16 x i8> , <16 x i8> zeroinitializer @@ -180,7 +180,7 @@ ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: movi v1.8h, #128 ; CHECK-NEXT: shl v0.8h, v0.8h, #15 -; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl= select <8 x i1> %t, <8 x i16> , <8 x i16> zeroinitializer @@ -193,7 +193,7 @@ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: movi v1.4s, #64 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl = select <4 x i1> %t, <4 x i32> , <4 x i32> zeroinitializer @@ -207,7 +207,7 @@ ; CHECK-NEXT: mov w8, #65536 ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl = select <2 x i1> %t, <2 x i64> , <2 x i64> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll @@ -184,7 +184,7 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v2.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 ; CHECK-NEXT: mov v3.16b, v0.16b ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: usra v3.4s, v2.4s, #28 @@ -203,7 +203,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v2.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 ; CHECK-NEXT: mov v3.16b, v0.16b ; CHECK-NEXT: movi v1.4s, #128, lsl #24 ; CHECK-NEXT: usra v3.4s, v2.4s, #1 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll @@ -25,7 +25,7 @@ ; CHECK-LABEL: select_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.4h, v2.4h, #15 -; CHECK-NEXT: sshr v2.4h, v2.4h, #15 +; CHECK-NEXT: cmlt v2.4h, v2.4h, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2 @@ -38,7 +38,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-NEXT: shl v2.8h, v2.8h, #15 -; CHECK-NEXT: sshr v2.8h, v2.8h, #15 +; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2 @@ -122,7 +122,7 @@ ; CHECK-LABEL: select_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.2s, v2.2s, #31 -; CHECK-NEXT: sshr v2.2s, v2.2s, #31 +; CHECK-NEXT: cmlt v2.2s, v2.2s, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2 @@ -135,7 +135,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2 @@ -233,7 +233,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-NEXT: shl v2.2d, v2.2d, #63 -; CHECK-NEXT: sshr v2.2d, v2.2d, #63 +; CHECK-NEXT: cmlt v2.2d, v2.2d, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll @@ -25,7 +25,7 @@ ; CHECK-LABEL: select_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.8b, v2.8b, #7 -; CHECK-NEXT: sshr v2.8b, v2.8b, #7 +; CHECK-NEXT: cmlt v2.8b, v2.8b, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2 @@ -37,7 +37,7 @@ ; CHECK-LABEL: select_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.16b, v2.16b, #7 -; CHECK-NEXT: sshr v2.16b, v2.16b, #7 +; CHECK-NEXT: cmlt v2.16b, v2.16b, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2 @@ -1137,7 +1137,7 @@ ; CHECK-LABEL: select_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.4h, v2.4h, #15 -; CHECK-NEXT: sshr v2.4h, v2.4h, #15 +; CHECK-NEXT: cmlt v2.4h, v2.4h, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2 @@ -1150,7 +1150,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-NEXT: shl v2.8h, v2.8h, #15 -; CHECK-NEXT: sshr v2.8h, v2.8h, #15 +; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2 @@ -1767,7 +1767,7 @@ ; CHECK-LABEL: select_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.2s, v2.2s, #31 -; CHECK-NEXT: sshr v2.2s, v2.2s, #31 +; CHECK-NEXT: cmlt v2.2s, v2.2s, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2 @@ -1780,7 +1780,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2 @@ -2110,7 +2110,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-NEXT: shl v2.2d, v2.2d, #63 -; CHECK-NEXT: sshr v2.2d, v2.2d, #63 +; CHECK-NEXT: cmlt v2.2d, v2.2d, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -632,7 +632,7 @@ ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -42,7 +42,7 @@ ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -581,7 +581,7 @@ ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -42,7 +42,7 @@ ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: st1h { z1.h }, p0, [x1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -152,12 +152,12 @@ ; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: shl v5.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v2.4s, #0 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 ; CHECK-NEXT: shl v6.4s, v1.4s, #31 -; CHECK-NEXT: sshr v1.4s, v5.4s, #31 -; CHECK-NEXT: sshr v2.4s, v3.4s, #31 -; CHECK-NEXT: sshr v3.4s, v6.4s, #31 +; CHECK-NEXT: cmlt v1.4s, v5.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v6.4s, #0 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -180,8 +180,8 @@ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-NEXT: shl v3.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v1.4s, #31 -; CHECK-NEXT: sshr v1.4s, v3.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v1.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v3.4s, #0 ; CHECK-NEXT: ret %t = call {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16> %a0, <8 x i16> %a1) %val = extractvalue {<8 x i16>, <8 x i1>} %t, 0 @@ -296,7 +296,7 @@ ; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: stp x11, x12, [x10] -; CHECK-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -181,10 +181,10 @@ ; CHECK-NEXT: shl v2.4s, v2.4s, #31 ; CHECK-NEXT: shl v6.4s, v5.4s, #31 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: sshr v4.4s, v4.4s, #31 -; CHECK-NEXT: sshr v5.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v6.4s, #31 -; CHECK-NEXT: sshr v3.4s, v3.4s, #31 +; CHECK-NEXT: cmlt v4.4s, v4.4s, #0 +; CHECK-NEXT: cmlt v5.4s, v2.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v6.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 ; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b ; CHECK-NEXT: mov v0.16b, v4.16b ; CHECK-NEXT: mov v1.16b, v5.16b @@ -212,8 +212,8 @@ ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 ; CHECK-NEXT: shl v4.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v3.4s, #31 -; CHECK-NEXT: sshr v3.4s, v4.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v4.4s, #0 ; CHECK-NEXT: mul v4.8h, v0.8h, v1.8h ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: mov v1.16b, v3.16b @@ -370,7 +370,7 @@ ; CHECK-NEXT: mul x9, x2, x6 ; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: stp x9, x8, [x10, #16] -; CHECK-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll --- a/llvm/test/CodeGen/AArch64/vselect-constants.ll +++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll @@ -16,7 +16,7 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -47,7 +47,7 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -78,7 +78,7 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1] -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -105,7 +105,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> ret <4 x i32> %add @@ -149,7 +149,7 @@ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -196,7 +196,7 @@ define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: signbit_mask_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <16 x i8> %a, zeroinitializer @@ -209,7 +209,7 @@ define <16 x i8> @signbit_mask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: signbit_mask_swap_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp sgt <16 x i8> %a, @@ -220,7 +220,7 @@ define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: signbit_mask_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <8 x i16> %a, zeroinitializer @@ -231,7 +231,7 @@ define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: signbit_mask_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <4 x i32> %a, zeroinitializer @@ -242,7 +242,7 @@ define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: signbit_mask_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <2 x i64> %a, zeroinitializer @@ -253,7 +253,7 @@ define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: signbit_setmask_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <16 x i8> %a, zeroinitializer @@ -264,7 +264,7 @@ define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: signbit_setmask_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <8 x i16> %a, zeroinitializer @@ -277,7 +277,7 @@ define <8 x i16> @signbit_setmask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: signbit_setmask_swap_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp sgt <8 x i16> %a, @@ -288,7 +288,7 @@ define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: signbit_setmask_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <4 x i32> %a, zeroinitializer @@ -299,7 +299,7 @@ define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: signbit_setmask_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <2 x i64> %a, zeroinitializer