Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13545,21 +13545,21 @@ SDValue N0 = N->getOperand(0); unsigned Lg2 = Divisor.countTrailingZeros(); SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); - // Add (N0 < 0) ? Pow2 - 1 : 0; - SDValue CCVal; - SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL); - SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); - SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp); + SDValue Sign = + DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(Lg2 - 1, DL, MVT::i64)); + SDValue Srl = DAG.getNode( + ISD::SRL, DL, VT, Sign, + DAG.getConstant(VT.getScalarSizeInBits() - Lg2, DL, MVT::i64)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); - Created.push_back(Cmp.getNode()); + Created.push_back(Sign.getNode()); + Created.push_back(Srl.getNode()); Created.push_back(Add.getNode()); - Created.push_back(CSel.getNode()); // Divide by pow2. SDValue SRA = - DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64)); + DAG.getNode(ISD::SRA, DL, VT, Add, DAG.getConstant(Lg2, DL, MVT::i64)); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. Index: llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll +++ llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll @@ -1,36 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=ISEL +; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=FAST define i32 @sdiv_i32_exact(i32 %a) { -; CHECK-LABEL: sdiv_i32_exact: -; CHECK: // %bb.0: -; CHECK-NEXT: asr w0, w0, #3 -; CHECK-NEXT: ret +; ISEL-LABEL: sdiv_i32_exact: +; ISEL: // %bb.0: +; ISEL-NEXT: asr w0, w0, #3 +; ISEL-NEXT: ret +; +; FAST-LABEL: sdiv_i32_exact: +; FAST: // %bb.0: +; FAST-NEXT: asr w0, w0, #3 +; FAST-NEXT: ret %1 = sdiv exact i32 %a, 8 ret i32 %1 } define i32 @sdiv_i32_pos(i32 %a) { -; CHECK-LABEL: sdiv_i32_pos: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #7 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel w8, w8, w0, lt -; CHECK-NEXT: asr w0, w8, #3 -; CHECK-NEXT: ret +; ISEL-LABEL: sdiv_i32_pos: +; ISEL: // %bb.0: +; ISEL-NEXT: asr w8, w0, #2 +; ISEL-NEXT: add w8, w0, w8, lsr #29 +; ISEL-NEXT: asr w0, w8, #3 +; ISEL-NEXT: ret +; +; FAST-LABEL: sdiv_i32_pos: +; FAST: // %bb.0: +; FAST-NEXT: add w8, w0, #7 +; FAST-NEXT: cmp w0, #0 +; FAST-NEXT: csel w8, w8, w0, lt +; FAST-NEXT: asr w0, w8, #3 +; FAST-NEXT: ret %1 = sdiv i32 %a, 8 ret i32 %1 } define i32 @sdiv_i32_neg(i32 %a) { -; CHECK-LABEL: sdiv_i32_neg: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #7 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel w8, w8, w0, lt -; CHECK-NEXT: neg w0, w8, asr #3 -; CHECK-NEXT: ret +; ISEL-LABEL: sdiv_i32_neg: +; ISEL: // %bb.0: +; ISEL-NEXT: asr w8, w0, #2 +; ISEL-NEXT: add w8, w0, w8, lsr #29 +; ISEL-NEXT: neg w0, w8, asr #3 +; ISEL-NEXT: ret +; +; FAST-LABEL: sdiv_i32_neg: +; FAST: // %bb.0: +; FAST-NEXT: add w8, w0, #7 +; FAST-NEXT: cmp w0, #0 +; FAST-NEXT: csel w8, w8, w0, lt +; FAST-NEXT: neg w0, w8, asr #3 +; FAST-NEXT: ret %1 = sdiv i32 %a, -8 ret i32 %1 } @@ -40,30 +59,53 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: asr x0, x0, #4 ; CHECK-NEXT: ret +; ISEL-LABEL: sdiv_i64_exact: +; ISEL: // %bb.0: +; ISEL-NEXT: asr x0, x0, #4 +; ISEL-NEXT: ret +; +; FAST-LABEL: sdiv_i64_exact: +; FAST: // %bb.0: +; FAST-NEXT: asr x0, x0, #4 +; FAST-NEXT: ret %1 = sdiv exact i64 %a, 16 ret i64 %1 } define i64 @sdiv_i64_pos(i64 %a) { -; CHECK-LABEL: sdiv_i64_pos: -; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #15 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel x8, x8, x0, lt -; CHECK-NEXT: asr x0, x8, #4 -; CHECK-NEXT: ret +; ISEL-LABEL: sdiv_i64_pos: +; ISEL: // %bb.0: +; ISEL-NEXT: asr x8, x0, #3 +; ISEL-NEXT: add x8, x0, x8, lsr #60 +; ISEL-NEXT: asr x0, x8, #4 +; ISEL-NEXT: ret +; +; FAST-LABEL: sdiv_i64_pos: +; FAST: // %bb.0: +; FAST-NEXT: add x8, x0, #15 +; FAST-NEXT: cmp x0, #0 +; FAST-NEXT: csel x8, x8, x0, lt +; FAST-NEXT: asr x0, x8, #4 +; FAST-NEXT: ret %1 = sdiv i64 %a, 16 ret i64 %1 } define i64 @sdiv_i64_neg(i64 %a) { -; CHECK-LABEL: sdiv_i64_neg: -; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #15 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel x8, x8, x0, lt -; CHECK-NEXT: neg x0, x8, asr #4 -; CHECK-NEXT: ret +; ISEL-LABEL: sdiv_i64_neg: +; ISEL: // %bb.0: +; ISEL-NEXT: asr x8, x0, #3 +; ISEL-NEXT: add x8, x0, x8, lsr #60 +; ISEL-NEXT: neg x0, x8, asr #4 +; ISEL-NEXT: ret +; +; FAST-LABEL: sdiv_i64_neg: +; FAST: // %bb.0: +; FAST-NEXT: add x8, x0, #15 +; FAST-NEXT: cmp x0, #0 +; FAST-NEXT: csel x8, x8, x0, lt +; FAST-NEXT: neg x0, x8, asr #4 +; FAST-NEXT: ret %1 = sdiv i64 %a, -16 ret i64 %1 } Index: llvm/test/CodeGen/AArch64/sdivpow2.ll =================================================================== --- llvm/test/CodeGen/AArch64/sdivpow2.ll +++ llvm/test/CodeGen/AArch64/sdivpow2.ll @@ -1,88 +1,137 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ISEL -; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,FAST +; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s --check-prefix=ISEL +; RUN: llc -mtriple=aarch64-linux-gnu -fast-isel=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=FAST define i32 @test1(i32 %x) { -; CHECK-LABEL: test1: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #7 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel w8, w8, w0, lt -; CHECK-NEXT: asr w0, w8, #3 -; CHECK-NEXT: ret +; ISEL-LABEL: test1: +; ISEL: // %bb.0: +; ISEL-NEXT: asr w8, w0, #2 +; ISEL-NEXT: add w8, w0, w8, lsr #29 +; ISEL-NEXT: asr w0, w8, #3 +; ISEL-NEXT: ret +; +; FAST-LABEL: test1: +; FAST: // %bb.0: +; FAST-NEXT: add w8, w0, #7 +; FAST-NEXT: cmp w0, #0 +; FAST-NEXT: csel w8, w8, w0, lt +; FAST-NEXT: asr w0, w8, #3 +; FAST-NEXT: ret %div = sdiv i32 %x, 8 ret i32 %div } define i32 @test2(i32 %x) { -; CHECK-LABEL: test2: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #7 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel w8, w8, w0, lt -; CHECK-NEXT: neg w0, w8, asr #3 -; CHECK-NEXT: ret +; ISEL-LABEL: test2: +; ISEL: // %bb.0: +; ISEL-NEXT: asr w8, w0, #2 +; ISEL-NEXT: add w8, w0, w8, lsr #29 +; ISEL-NEXT: neg w0, w8, asr #3 +; ISEL-NEXT: ret +; +; FAST-LABEL: test2: +; FAST: // %bb.0: +; FAST-NEXT: add w8, w0, #7 +; FAST-NEXT: cmp w0, #0 +; FAST-NEXT: csel w8, w8, w0, lt +; FAST-NEXT: neg w0, w8, asr #3 +; FAST-NEXT: ret %div = sdiv i32 %x, -8 ret i32 %div } define i32 @test3(i32 %x) { -; CHECK-LABEL: test3: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #31 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel w8, w8, w0, lt -; CHECK-NEXT: asr w0, w8, #5 -; CHECK-NEXT: ret +; ISEL-LABEL: test3: +; ISEL: // %bb.0: +; ISEL-NEXT: asr w8, w0, #4 +; ISEL-NEXT: add w8, w0, w8, lsr #27 +; ISEL-NEXT: asr w0, w8, #5 +; ISEL-NEXT: ret +; +; FAST-LABEL: test3: +; FAST: // %bb.0: +; FAST-NEXT: add w8, w0, #31 +; FAST-NEXT: cmp w0, #0 +; FAST-NEXT: csel w8, w8, w0, lt +; FAST-NEXT: asr w0, w8, #5 +; FAST-NEXT: ret %div = sdiv i32 %x, 32 ret i32 %div } define i64 @test4(i64 %x) { -; CHECK-LABEL: test4: -; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #7 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel x8, x8, x0, lt -; CHECK-NEXT: asr x0, x8, #3 -; CHECK-NEXT: ret +; ISEL-LABEL: test4: +; ISEL: // %bb.0: +; ISEL-NEXT: asr x8, x0, #2 +; ISEL-NEXT: add x8, x0, x8, lsr #61 +; ISEL-NEXT: asr x0, x8, #3 +; ISEL-NEXT: ret +; +; FAST-LABEL: test4: +; FAST: // %bb.0: +; FAST-NEXT: add x8, x0, #7 +; FAST-NEXT: cmp x0, #0 +; FAST-NEXT: csel x8, x8, x0, lt +; FAST-NEXT: asr x0, x8, #3 +; FAST-NEXT: ret %div = sdiv i64 %x, 8 ret i64 %div } define i64 @test5(i64 %x) { -; CHECK-LABEL: test5: -; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #7 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel x8, x8, x0, lt -; CHECK-NEXT: neg x0, x8, asr #3 -; CHECK-NEXT: ret +; ISEL-LABEL: test5: +; ISEL: // %bb.0: +; ISEL-NEXT: asr x8, x0, #2 +; ISEL-NEXT: add x8, x0, x8, lsr #61 +; ISEL-NEXT: neg x0, x8, asr #3 +; ISEL-NEXT: ret +; +; FAST-LABEL: test5: +; FAST: // %bb.0: +; FAST-NEXT: add x8, x0, #7 +; FAST-NEXT: cmp x0, #0 +; FAST-NEXT: csel x8, x8, x0, lt +; FAST-NEXT: neg x0, x8, asr #3 +; FAST-NEXT: ret %div = sdiv i64 %x, -8 ret i64 %div } define i64 @test6(i64 %x) { -; CHECK-LABEL: test6: -; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #63 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel x8, x8, x0, lt -; CHECK-NEXT: asr x0, x8, #6 -; CHECK-NEXT: ret +; ISEL-LABEL: test6: +; ISEL: // %bb.0: +; ISEL-NEXT: asr x8, x0, #5 +; ISEL-NEXT: add x8, x0, x8, lsr #58 +; ISEL-NEXT: asr x0, x8, #6 +; ISEL-NEXT: ret +; +; FAST-LABEL: test6: +; FAST: // %bb.0: +; FAST-NEXT: add x8, x0, #63 +; FAST-NEXT: cmp x0, #0 +; FAST-NEXT: csel x8, x8, x0, lt +; FAST-NEXT: asr x0, x8, #6 +; FAST-NEXT: ret %div = sdiv i64 %x, 64 ret i64 %div } define i64 @test7(i64 %x) { -; CHECK-LABEL: test7: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281474976710655 -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: csel x8, x8, x0, lt -; CHECK-NEXT: asr x0, x8, #48 -; CHECK-NEXT: ret +; ISEL-LABEL: test7: +; ISEL: // %bb.0: +; ISEL-NEXT: asr x8, x0, #47 +; ISEL-NEXT: add x8, x0, x8, lsr #16 +; ISEL-NEXT: asr x0, x8, #48 +; ISEL-NEXT: ret +; +; FAST-LABEL: test7: +; FAST: // %bb.0: +; FAST-NEXT: mov x8, #281474976710655 +; FAST-NEXT: cmp x0, #0 +; FAST-NEXT: add x8, x0, x8 +; FAST-NEXT: csel x8, x8, x0, lt +; FAST-NEXT: asr x0, x8, #48 +; FAST-NEXT: ret %div = sdiv i64 %x, 281474976710656 ret i64 %div } @@ -90,8 +139,7 @@ define i64 @test8(i64 %x) { ; ISEL-LABEL: test8: ; ISEL: // %bb.0: -; ISEL-NEXT: cmp x0, #0 -; ISEL-NEXT: cinc x8, x0, lt +; ISEL-NEXT: add x8, x0, x0, lsr #63 ; ISEL-NEXT: asr x0, x8, #1 ; ISEL-NEXT: ret ; Index: llvm/test/CodeGen/AArch64/srem-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-lkk.ll +++ llvm/test/CodeGen/AArch64/srem-lkk.ll @@ -95,9 +95,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) { ; CHECK-LABEL: dont_fold_srem_power_of_two: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #63 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: asr w8, w0, #5 +; CHECK-NEXT: add w8, w0, w8, lsr #26 ; CHECK-NEXT: and w8, w8, #0xffffffc0 ; CHECK-NEXT: sub w0, w0, w8 ; CHECK-NEXT: ret @@ -119,10 +118,8 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) { ; CHECK-LABEL: dont_fold_srem_i32_smax: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: add w8, w0, w8 -; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: asr w8, w0, #30 +; CHECK-NEXT: add w8, w0, w8, lsr #1 ; CHECK-NEXT: and w8, w8, #0x80000000 ; CHECK-NEXT: add w0, w0, w8 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/srem-seteq.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq.ll +++ llvm/test/CodeGen/AArch64/srem-seteq.ll @@ -234,9 +234,8 @@ define i32 @test_srem_pow2(i32 %X) nounwind { ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #15 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: asr w8, w0, #3 +; CHECK-NEXT: add w8, w0, w8, lsr #28 ; CHECK-NEXT: and w8, w8, #0xfffffff0 ; CHECK-NEXT: cmp w0, w8 ; CHECK-NEXT: cset w0, eq @@ -251,10 +250,8 @@ define i32 @test_srem_int_min(i32 %X) nounwind { ; CHECK-LABEL: test_srem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: add w8, w0, w8 -; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: asr w8, w0, #30 +; CHECK-NEXT: add w8, w0, w8, lsr #1 ; CHECK-NEXT: and w8, w8, #0x80000000 ; CHECK-NEXT: cmn w0, w8 ; CHECK-NEXT: cset w0, eq Index: llvm/test/CodeGen/AArch64/srem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -156,39 +156,33 @@ ; CHECK-LABEL: dont_fold_srem_power_of_two: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w9, v0.h[1] -; CHECK-NEXT: smov w10, v0.h[0] +; CHECK-NEXT: smov w9, v0.h[0] +; CHECK-NEXT: smov w10, v0.h[3] +; CHECK-NEXT: smov w11, v0.h[1] ; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: add w11, w9, #31 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: add w12, w10, #63 -; CHECK-NEXT: csel w11, w11, w9, lt -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: and w11, w11, #0xffffffe0 -; CHECK-NEXT: csel w12, w12, w10, lt -; CHECK-NEXT: sub w9, w9, w11 +; CHECK-NEXT: add w12, w9, w9, lsr #26 +; CHECK-NEXT: smull x8, w10, w8 ; CHECK-NEXT: and w12, w12, #0xffffffc0 -; CHECK-NEXT: sub w10, w10, w12 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: smov w10, v0.h[2] -; CHECK-NEXT: smull x8, w12, w8 -; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: add w13, w11, w11, lsr #27 +; CHECK-NEXT: sub w9, w9, w12 +; CHECK-NEXT: and w12, w13, #0xffffffe0 +; CHECK-NEXT: smov w13, v0.h[2] ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w9, w10, #7 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel w9, w9, w10, lt -; CHECK-NEXT: add w8, w8, w12 +; CHECK-NEXT: sub w11, w11, w12 +; CHECK-NEXT: add w8, w8, w10 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: asr w9, w8, #6 +; CHECK-NEXT: add w8, w9, w8, lsr #31 +; CHECK-NEXT: add w9, w13, w13, lsr #29 +; CHECK-NEXT: mov v0.h[1], w11 ; CHECK-NEXT: and w9, w9, #0xfffffff8 -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: add w8, w10, w8, lsr #31 -; CHECK-NEXT: mov w10, #95 -; CHECK-NEXT: mov v1.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w12 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: sub w9, w13, w9 +; CHECK-NEXT: mov w11, #95 +; CHECK-NEXT: msub w8, w8, w11, w10 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -245,35 +239,32 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v0.h[2] ; CHECK-NEXT: mov w9, #17097 -; CHECK-NEXT: smov w10, v0.h[1] ; CHECK-NEXT: movk w9, #45590, lsl #16 -; CHECK-NEXT: mov w11, #32767 +; CHECK-NEXT: smov w10, v0.h[1] ; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: mov w11, #23 ; CHECK-NEXT: smull x9, w8, w9 -; CHECK-NEXT: add w11, w10, w11 -; CHECK-NEXT: cmp w10, #0 ; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: csel w11, w11, w10, lt ; CHECK-NEXT: add w9, w9, w8 -; CHECK-NEXT: and w11, w11, #0xffff8000 ; CHECK-NEXT: asr w13, w9, #4 -; CHECK-NEXT: sub w10, w10, w11 -; CHECK-NEXT: mov w11, #47143 ; CHECK-NEXT: add w9, w13, w9, lsr #31 -; CHECK-NEXT: mov w13, #23 -; CHECK-NEXT: movk w11, #24749, lsl #16 -; CHECK-NEXT: mov v1.h[1], w10 -; CHECK-NEXT: msub w8, w9, w13, w8 -; CHECK-NEXT: smull x9, w12, w11 +; CHECK-NEXT: add w13, w10, w10, lsr #17 +; CHECK-NEXT: and w13, w13, #0xffff8000 +; CHECK-NEXT: sub w10, w10, w13 +; CHECK-NEXT: mov w13, #47143 +; CHECK-NEXT: movk w13, #24749, lsl #16 +; CHECK-NEXT: msub w8, w9, w11, w8 +; CHECK-NEXT: smull x9, w12, w13 +; CHECK-NEXT: mov v0.h[1], w10 ; CHECK-NEXT: lsr x10, x9, #63 ; CHECK-NEXT: asr x9, x9, #43 ; CHECK-NEXT: add w9, w9, w10 ; CHECK-NEXT: mov w10, #5423 -; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: mov v0.h[2], w8 ; CHECK-NEXT: msub w8, w9, w10, w12 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1