Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5617,6 +5617,34 @@ defm : Neon_mul_acc_widen_patterns; + +multiclass Neon_addl_extract_patterns { + def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)), + (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), + (EXTRACT_SUBREG (v8i16 (!cast(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>; + def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)), + (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), + (EXTRACT_SUBREG (v4i32 (!cast(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>; + def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)), + (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), + (EXTRACT_SUBREG (v2i64 (!cast(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>; + + def : Pat<(v4i16 (opnode (v4i16 V64:$Rn), + (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), + (EXTRACT_SUBREG (v8i16 (!cast(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; + def : Pat<(v2i32 (opnode (v2i32 V64:$Rn), + (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), + (EXTRACT_SUBREG (v4i32 (!cast(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; + def : Pat<(v1i64 (opnode (v1i64 V64:$Rn), + (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), + (EXTRACT_SUBREG (v2i64 (!cast(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; +} + +defm : Neon_addl_extract_patterns; +defm : Neon_addl_extract_patterns; +defm : Neon_addl_extract_patterns; +defm : Neon_addl_extract_patterns; + // CodeGen patterns for addhn and subhn instructions, which can actually be // written in LLVM IR without too much difficulty. Index: llvm/test/CodeGen/AArch64/aarch64-load-ext.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -381,9 +381,7 @@ ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldr s0, [x0] ; CHECK-LE-NEXT: ldr s1, [x1] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-LE-NEXT: uaddl v0.8h, v0.8b, v1.8b ; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-LE-NEXT: ret @@ -394,9 +392,7 @@ ; CHECK-BE-NEXT: ldr s1, [x1] ; CHECK-BE-NEXT: rev32 v0.8b, v0.8b ; CHECK-BE-NEXT: rev32 v1.8b, v1.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-BE-NEXT: uaddl v0.8h, v0.8b, v1.8b ; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h @@ -413,9 +409,7 @@ ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldr s0, [x0] ; CHECK-LE-NEXT: ldr s1, [x1] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-LE-NEXT: uaddl v0.8h, v0.8b, v1.8b ; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-LE-NEXT: shl v0.4s, v0.4s, #24 ; CHECK-LE-NEXT: sshr v0.4s, v0.4s, #24 @@ -427,9 +421,7 @@ ; CHECK-BE-NEXT: ldr s1, [x1] ; CHECK-BE-NEXT: rev32 v0.8b, v0.8b ; CHECK-BE-NEXT: rev32 v1.8b, v1.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-BE-NEXT: uaddl v0.8h, v0.8b, v1.8b ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24 ; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24 Index: llvm/test/CodeGen/AArch64/arm64-ld1.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -913,11 +913,9 @@ define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %diff) nounwind ssp { entry: ; CHECK: ld1r_2s_from_dup -; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0] -; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1] -; CHECK-NEXT: ushll.8h [[ARG1]], [[ARG1]], #0 -; CHECK-NEXT: ushll.8h [[ARG2]], [[ARG2]], #0 -; CHECK-NEXT: sub.4h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]] +; CHECK: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1] +; CHECK-NEXT: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0] +; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]] ; CHECK-NEXT: str d[[RESREGNUM]], [x2] ; CHECK-NEXT: ret %tmp1 = load i32, ptr %a, align 4 Index: llvm/test/CodeGen/AArch64/neon-extadd-extract.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-extadd-extract.ll +++ llvm/test/CodeGen/AArch64/neon-extadd-extract.ll @@ -4,9 +4,8 @@ define <4 x i16> @addls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-LABEL: addls_v8i8_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = sext <8 x i8> %s0 to <8 x i16> @@ -20,8 +19,9 @@ define <4 x i16> @addws_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) { ; CHECK-LABEL: addws_v8i8_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: saddw v0.8h, v0.8h, v1.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = sext <8 x i8> %s1 to <8 x i16> @@ -33,9 +33,8 @@ define <4 x i16> @addlu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-LABEL: addlu_v8i8_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = zext <8 x i8> %s0 to <8 x i16> @@ -49,8 +48,9 @@ define <4 x i16> @addwu_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) { ; CHECK-LABEL: addwu_v8i8_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = zext <8 x i8> %s1 to <8 x i16> @@ -62,9 +62,8 @@ define <4 x i16> @subls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-LABEL: subls_v8i8_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ssubl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = sext <8 x i8> %s0 to <8 x i16> @@ -78,8 +77,9 @@ define <4 x i16> @subws_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) { ; CHECK-LABEL: subws_v8i8_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ssubw v0.8h, v0.8h, v1.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = sext <8 x i8> %s1 to <8 x i16> @@ -91,9 +91,8 @@ define <4 x i16> @sublu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-LABEL: sublu_v8i8_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = zext <8 x i8> %s0 to <8 x i16> @@ -107,8 +106,9 @@ define <4 x i16> @subwu_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) { ; CHECK-LABEL: subwu_v8i8_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = zext <8 x i8> %s1 to <8 x i16> @@ -178,9 +178,8 @@ define <2 x i32> @addls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) { ; CHECK-LABEL: addls_v4i16_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = sext <4 x i16> %s0 to <4 x i32> @@ -194,8 +193,9 @@ define <2 x i32> @addws_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) { ; CHECK-LABEL: addws_v4i16_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: saddw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = sext <4 x i16> %s1 to <4 x i32> @@ -207,9 +207,8 @@ define <2 x i32> @addlu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) { ; CHECK-LABEL: addlu_v4i16_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = zext <4 x i16> %s0 to <4 x i32> @@ -223,8 +222,9 @@ define <2 x i32> @addwu_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) { ; CHECK-LABEL: addwu_v4i16_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = zext <4 x i16> %s1 to <4 x i32> @@ -236,9 +236,8 @@ define <2 x i32> @subls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) { ; CHECK-LABEL: subls_v4i16_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ssubl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = sext <4 x i16> %s0 to <4 x i32> @@ -252,8 +251,9 @@ define <2 x i32> @subws_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) { ; CHECK-LABEL: subws_v4i16_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ssubw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = sext <4 x i16> %s1 to <4 x i32> @@ -265,9 +265,8 @@ define <2 x i32> @sublu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) { ; CHECK-LABEL: sublu_v4i16_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s +; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = zext <4 x i16> %s0 to <4 x i32> @@ -281,8 +280,9 @@ define <2 x i32> @subwu_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) { ; CHECK-LABEL: subwu_v4i16_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = zext <4 x i16> %s1 to <4 x i32> @@ -352,9 +352,8 @@ define <1 x i64> @addls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) { ; CHECK-LABEL: addls_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-NEXT: add d0, d0, d1 +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = sext <2 x i32> %s0 to <2 x i64> @@ -368,8 +367,9 @@ define <1 x i64> @addws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) { ; CHECK-LABEL: addws_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-NEXT: add d0, d0, d1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: saddw v0.2d, v0.2d, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = sext <2 x i32> %s1 to <2 x i64> @@ -381,9 +381,8 @@ define <1 x i64> @addlu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) { ; CHECK-LABEL: addlu_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: add d0, d0, d1 +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = zext <2 x i32> %s0 to <2 x i64> @@ -397,8 +396,9 @@ define <1 x i64> @addwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) { ; CHECK-LABEL: addwu_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: add d0, d0, d1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = zext <2 x i32> %s1 to <2 x i64> @@ -410,9 +410,8 @@ define <1 x i64> @subls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) { ; CHECK-LABEL: subls_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-NEXT: sub d0, d0, d1 +; CHECK-NEXT: ssubl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = sext <2 x i32> %s0 to <2 x i64> @@ -426,8 +425,9 @@ define <1 x i64> @subws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) { ; CHECK-LABEL: subws_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-NEXT: sub d0, d0, d1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ssubw v0.2d, v0.2d, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = sext <2 x i32> %s1 to <2 x i64> @@ -439,9 +439,8 @@ define <1 x i64> @sublu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) { ; CHECK-LABEL: sublu_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: sub d0, d0, d1 +; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s0s = zext <2 x i32> %s0 to <2 x i64> @@ -455,8 +454,9 @@ define <1 x i64> @subwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) { ; CHECK-LABEL: subwu_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: sub d0, d0, d1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %s1s = zext <2 x i32> %s1 to <2 x i64> Index: llvm/test/CodeGen/AArch64/neon-extadd.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-extadd.ll +++ llvm/test/CodeGen/AArch64/neon-extadd.ll @@ -503,17 +503,15 @@ ; CHECK-NEXT: ld1 { v3.b }[6], [x9] ; CHECK-NEXT: add x9, sp, #88 ; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: ld1 { v4.b }[3], [x12] ; CHECK-NEXT: ld1 { v2.b }[7], [x11] +; CHECK-NEXT: ld1 { v4.b }[3], [x12] ; CHECK-NEXT: ld1 { v5.b }[3], [x9] ; CHECK-NEXT: ld1 { v3.b }[7], [x10] ; CHECK-NEXT: mov v1.b[7], w7 -; CHECK-NEXT: ushll v4.8h, v4.8b, #0 -; CHECK-NEXT: ushll v5.8h, v5.8b, #0 +; CHECK-NEXT: uaddl v4.8h, v5.8b, v4.8b ; CHECK-NEXT: uaddl v2.8h, v3.8b, v2.8b -; CHECK-NEXT: add v3.4h, v5.4h, v4.4h ; CHECK-NEXT: uaddl v0.8h, v1.8b, v0.8b -; CHECK-NEXT: ushll v1.4s, v3.4h, #0 +; CHECK-NEXT: ushll v1.4s, v4.4h, #0 ; CHECK-NEXT: ushll2 v3.4s, v2.8h, #0 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: stp q3, q1, [x8, #48] Index: llvm/test/CodeGen/AArch64/uadd_sat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -112,13 +112,11 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0xff00ff00ff00ff -; CHECK-NEXT: ldr s2, [x1] -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: movi d2, #0xff00ff00ff00ff +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret