Index: llvm/test/CodeGen/AArch64/and-mask-removal.ll =================================================================== --- llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -270,15 +270,15 @@ define zeroext i1 @test16_0(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_0: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #5086 +; CHECK-SD-NEXT: mov w8, #5086 ; =0x13de ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_0: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #18547 -; CHECK-GI-NEXT: mov w9, #23633 +; CHECK-GI-NEXT: mov w8, #18547 ; =0x4873 +; CHECK-GI-NEXT: mov w9, #23633 ; =0x5c51 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -296,8 +296,8 @@ define zeroext i1 @test16_2(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_2: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #16882 -; CHECK-SD-NEXT: mov w9, #40700 +; CHECK-SD-NEXT: mov w8, #16882 ; =0x41f2 +; CHECK-SD-NEXT: mov w9, #40700 ; =0x9efc ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, hi @@ -305,8 +305,8 @@ ; ; CHECK-GI-LABEL: test16_2: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #16882 -; CHECK-GI-NEXT: mov w9, #40699 +; CHECK-GI-NEXT: mov w8, #16882 ; =0x41f2 +; CHECK-GI-NEXT: mov w9, #40699 ; =0x9efb ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, hs @@ -324,15 +324,15 @@ define zeroext i1 @test16_3(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_3: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #53200 +; CHECK-SD-NEXT: mov w8, #53200 ; =0xcfd0 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_3: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #29283 -; CHECK-GI-NEXT: mov w9, #16947 +; CHECK-GI-NEXT: mov w8, #29283 ; =0x7263 +; CHECK-GI-NEXT: mov w9, #16947 ; =0x4233 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -350,8 +350,8 @@ define zeroext i1 @test16_4(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_4: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #29985 -; CHECK-SD-NEXT: mov w9, #15676 +; CHECK-SD-NEXT: mov w8, #29985 ; =0x7521 +; CHECK-SD-NEXT: mov w9, #15676 ; =0x3d3c ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, lo @@ -359,8 +359,8 @@ ; ; CHECK-GI-LABEL: test16_4: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #29985 -; CHECK-GI-NEXT: mov w9, #15677 +; CHECK-GI-NEXT: mov w8, #29985 ; =0x7521 +; CHECK-GI-NEXT: mov w9, #15677 ; =0x3d3d ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ls @@ -378,15 +378,15 @@ define zeroext i1 @test16_5(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_5: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #23282 +; CHECK-SD-NEXT: mov w8, #23282 ; =0x5af2 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_5: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #-25214 -; CHECK-GI-NEXT: mov w9, #63604 +; CHECK-GI-NEXT: mov w8, #-25214 ; =0xffff9d82 +; CHECK-GI-NEXT: mov w9, #63604 ; =0xf874 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -404,8 +404,8 @@ define zeroext i1 @test16_6(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_6: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #-32194 -; CHECK-SD-NEXT: mov w9, #24320 +; CHECK-SD-NEXT: mov w8, #-32194 ; =0xffff823e +; CHECK-SD-NEXT: mov w9, #24320 ; =0x5f00 ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w8, w9 ; CHECK-SD-NEXT: cset w0, hi @@ -413,8 +413,8 @@ ; ; CHECK-GI-LABEL: test16_6: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #-32194 -; CHECK-GI-NEXT: mov w9, #24321 +; CHECK-GI-NEXT: mov w8, #-32194 ; =0xffff823e +; CHECK-GI-NEXT: mov w9, #24321 ; =0x5f01 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: cset w0, hs @@ -432,8 +432,8 @@ define zeroext i1 @test16_7(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_7: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #9272 -; CHECK-SD-NEXT: mov w9, #22619 +; CHECK-SD-NEXT: mov w8, #9272 ; =0x2438 +; CHECK-SD-NEXT: mov w9, #22619 ; =0x585b ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, lo @@ -441,8 +441,8 @@ ; ; CHECK-GI-LABEL: test16_7: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #9272 -; CHECK-GI-NEXT: mov w9, #22620 +; CHECK-GI-NEXT: mov w8, #9272 ; =0x2438 +; CHECK-GI-NEXT: mov w9, #22620 ; =0x585c ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ls @@ -460,7 +460,7 @@ define zeroext i1 @test16_8(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_8: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #4919 +; CHECK-SD-NEXT: mov w8, #4919 ; =0x1337 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret @@ -468,7 +468,7 @@ ; CHECK-GI-LABEL: test16_8: ; CHECK-GI: ; %bb.0: ; %entry ; CHECK-GI-NEXT: add w8, w0, #1787 -; CHECK-GI-NEXT: mov w9, #6706 +; CHECK-GI-NEXT: mov w9, #6706 ; =0x1a32 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne ; CHECK-GI-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-stp.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-stp.ll +++ llvm/test/CodeGen/AArch64/arm64-stp.ll @@ -1,44 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s -; CHECK-LABEL: stp_int -; CHECK: stp w0, w1, [x2] define void @stp_int(i32 %a, i32 %b, ptr nocapture %p) nounwind { +; CHECK-LABEL: stp_int: +; CHECK: // %bb.0: +; CHECK-NEXT: stp w0, w1, [x2] +; CHECK-NEXT: ret store i32 %a, ptr %p, align 4 %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 store i32 %b, ptr %add.ptr, align 4 ret void } -; CHECK-LABEL: stp_long -; CHECK: stp x0, x1, [x2] define void @stp_long(i64 %a, i64 %b, ptr nocapture %p) nounwind { +; CHECK-LABEL: stp_long: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x0, x1, [x2] +; CHECK-NEXT: ret store i64 %a, ptr %p, align 8 %add.ptr = getelementptr inbounds i64, ptr %p, i64 1 store i64 %b, ptr %add.ptr, align 8 ret void } -; CHECK-LABEL: stp_float -; CHECK: stp s0, s1, [x0] define void @stp_float(float %a, float %b, ptr nocapture %p) nounwind { +; CHECK-LABEL: stp_float: +; CHECK: // %bb.0: +; CHECK-NEXT: stp s0, s1, [x0] +; CHECK-NEXT: ret store float %a, ptr %p, align 4 %add.ptr = getelementptr inbounds float, ptr %p, i64 1 store float %b, ptr %add.ptr, align 4 ret void } -; CHECK-LABEL: stp_double -; CHECK: stp d0, d1, [x0] define void @stp_double(double %a, double %b, ptr nocapture %p) nounwind { +; CHECK-LABEL: stp_double: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d0, d1, [x0] +; CHECK-NEXT: ret store double %a, ptr %p, align 8 %add.ptr = getelementptr inbounds double, ptr %p, i64 1 store double %b, ptr %add.ptr, align 8 ret void } -; CHECK-LABEL: stp_doublex2 -; CHECK: stp q0, q1, [x0] define void @stp_doublex2(<2 x double> %a, <2 x double> %b, ptr nocapture %p) nounwind { +; CHECK-LABEL: stp_doublex2: +; CHECK: // %bb.0: +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret store <2 x double> %a, ptr %p, align 16 %add.ptr = getelementptr inbounds <2 x double>, ptr %p, i64 1 store <2 x double> %b, ptr %add.ptr, align 16 @@ -47,9 +58,10 @@ ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate define void @stur_int(i32 %a, i32 %b, ptr nocapture %p) nounwind { -; CHECK-LABEL: stur_int -; CHECK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8] -; CHECK-NEXT: ret +; CHECK-LABEL: stur_int: +; CHECK: // %bb.0: +; CHECK-NEXT: stp w1, w0, [x2, #-8] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, ptr %p, i32 -1 store i32 %a, ptr %p1, align 2 %p2 = getelementptr inbounds i32, ptr %p, i32 -2 @@ -58,9 +70,10 @@ } define void @stur_long(i64 %a, i64 %b, ptr nocapture %p) nounwind { -; CHECK-LABEL: stur_long -; CHECK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16] -; CHECK-NEXT: ret +; CHECK-LABEL: stur_long: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x1, x0, [x2, #-16] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, ptr %p, i32 -1 store i64 %a, ptr %p1, align 2 %p2 = getelementptr inbounds i64, ptr %p, i32 -2 @@ -69,9 +82,10 @@ } define void @stur_float(float %a, float %b, ptr nocapture %p) nounwind { -; CHECK-LABEL: stur_float -; CHECK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8] -; CHECK-NEXT: ret +; CHECK-LABEL: stur_float: +; CHECK: // %bb.0: +; CHECK-NEXT: stp s1, s0, [x0, #-8] +; CHECK-NEXT: ret %p1 = getelementptr inbounds float, ptr %p, i32 -1 store float %a, ptr %p1, align 2 %p2 = getelementptr inbounds float, ptr %p, i32 -2 @@ -80,9 +94,10 @@ } define void @stur_double(double %a, double %b, ptr nocapture %p) nounwind { -; CHECK-LABEL: stur_double -; CHECK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16] -; CHECK-NEXT: ret +; CHECK-LABEL: stur_double: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d1, d0, [x0, #-16] +; CHECK-NEXT: ret %p1 = getelementptr inbounds double, ptr %p, i32 -1 store double %a, ptr %p1, align 2 %p2 = getelementptr inbounds double, ptr %p, i32 -2 @@ -91,9 +106,10 @@ } define void @stur_doublex2(<2 x double> %a, <2 x double> %b, ptr nocapture %p) nounwind { -; CHECK-LABEL: stur_doublex2 -; CHECK: stp q{{[0-9]+}}, q{{[0-9]+}}, [x{{[0-9]+}}, #-32] -; CHECK-NEXT: ret +; CHECK-LABEL: stur_doublex2: +; CHECK: // %bb.0: +; CHECK-NEXT: stp q1, q0, [x0, #-32] +; CHECK-NEXT: ret %p1 = getelementptr inbounds <2 x double>, ptr %p, i32 -1 store <2 x double> %a, ptr %p1, align 2 %p2 = getelementptr inbounds <2 x double>, ptr %p, i32 -2 @@ -102,13 +118,12 @@ } define void @splat_v4i32(i32 %v, ptr %p) { +; CHECK-LABEL: splat_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v0.4s, w0 +; CHECK-NEXT: str q0, [x1] +; CHECK-NEXT: ret entry: - -; CHECK-LABEL: splat_v4i32 -; CHECK-DAG: dup v0.4s, w0 -; CHECK-DAG: str q0, [x1] -; CHECK: ret - %p17 = insertelement <4 x i32> undef, i32 %v, i32 0 %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1 %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2 @@ -120,17 +135,22 @@ ; Check that a non-splat store that is storing a vector created by 4 ; insertelements that is not a splat vector does not get split. define void @nosplat_v4i32(i32 %v, ptr %p) { -entry: - ; CHECK-LABEL: nosplat_v4i32: -; CHECK: str w0, -; CHECK: ldr q[[REG1:[0-9]+]], -; CHECK-DAG: mov v[[REG1]].s[1], w0 -; CHECK-DAG: mov v[[REG1]].s[2], w0 -; CHECK-DAG: mov v[[REG1]].s[3], w0 -; CHECK: str q[[REG1]], [x1] -; CHECK: ret - +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: bfi x8, x0, #2, #2 +; CHECK-NEXT: str w0, [x8] +; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: mov v0.s[2], w0 +; CHECK-NEXT: mov v0.s[3], w0 +; CHECK-NEXT: str q0, [x1] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +entry: %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1 %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2 @@ -142,15 +162,14 @@ ; Check that a non-splat store that is storing a vector created by 4 ; insertelements that is not a splat vector does not get split. define void @nosplat2_v4i32(i32 %v, ptr %p, <4 x i32> %vin) { -entry: - ; CHECK-LABEL: nosplat2_v4i32: -; CHECK: mov v[[REG1]].s[1], w0 -; CHECK-DAG: mov v[[REG1]].s[2], w0 -; CHECK-DAG: mov v[[REG1]].s[3], w0 -; CHECK: str q[[REG1]], [x1] -; CHECK: ret - +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: mov v0.s[2], w0 +; CHECK-NEXT: mov v0.s[3], w0 +; CHECK-NEXT: str q0, [x1] +; CHECK-NEXT: ret +entry: %p18 = insertelement <4 x i32> %vin, i32 %v, i32 1 %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2 %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3 @@ -159,12 +178,14 @@ } ; Read of %b to compute %tmp2 shouldn't prevent formation of stp -; CHECK-LABEL: stp_int_rar_hazard -; CHECK: ldr [[REG:w[0-9]+]], [x2, #8] -; CHECK: add w8, [[REG]], w1 -; CHECK: stp w0, w1, [x2] -; CHECK: ret define i32 @stp_int_rar_hazard(i32 %a, i32 %b, ptr nocapture %p) nounwind { +; CHECK-LABEL: stp_int_rar_hazard: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x2, #8] +; CHECK-NEXT: add w8, w8, w1 +; CHECK-NEXT: stp w0, w1, [x2] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret store i32 %a, ptr %p, align 4 %ld.ptr = getelementptr inbounds i32, ptr %p, i64 2 %tmp = load i32, ptr %ld.ptr, align 4 @@ -175,12 +196,13 @@ } ; Read of %b to compute %tmp2 shouldn't prevent formation of stp -; CHECK-LABEL: stp_int_rar_hazard_after -; CHECK: ldr [[REG:w[0-9]+]], [x3, #4] -; CHECK: add w0, [[REG]], w2 -; CHECK: stp w1, w2, [x3] -; CHECK: ret define i32 @stp_int_rar_hazard_after(i32 %w0, i32 %a, i32 %b, ptr nocapture %p) nounwind { +; CHECK-LABEL: stp_int_rar_hazard_after: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x3, #4] +; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: stp w1, w2, [x3] +; CHECK-NEXT: ret store i32 %a, ptr %p, align 4 %ld.ptr = getelementptr inbounds i32, ptr %p, i64 1 %tmp = load i32, ptr %ld.ptr, align 4 Index: llvm/test/CodeGen/AArch64/arm64-xaluo.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-xaluo.ll +++ llvm/test/CodeGen/AArch64/arm64-xaluo.ll @@ -102,7 +102,7 @@ define zeroext i1 @saddo4.i32(i32 %v1, ptr %res) { ; SDAG-LABEL: saddo4.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: mov w8, #16777215 +; SDAG-NEXT: mov w8, #16777215 // =0xffffff ; SDAG-NEXT: adds w8, w0, w8 ; SDAG-NEXT: cset w0, vs ; SDAG-NEXT: str w8, [x1] @@ -110,7 +110,7 @@ ; ; FAST-LABEL: saddo4.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: mov w8, #16777215 +; FAST-NEXT: mov w8, #16777215 // =0xffffff ; FAST-NEXT: adds w8, w0, w8 ; FAST-NEXT: cset w9, vs ; FAST-NEXT: and w0, w9, #0x1 @@ -119,7 +119,7 @@ ; ; GISEL-LABEL: saddo4.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mov w8, #16777215 +; GISEL-NEXT: mov w8, #16777215 // =0xffffff ; GISEL-NEXT: adds w8, w0, w8 ; GISEL-NEXT: cset w0, vs ; GISEL-NEXT: str w8, [x1] @@ -1327,7 +1327,7 @@ ; SDAG-LABEL: uaddo.selectboth.i8: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: and w8, w0, #0xff -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: add w8, w8, w1, uxtb ; SDAG-NEXT: tst w8, #0x100 ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1336,7 +1336,7 @@ ; FAST-LABEL: uaddo.selectboth.i8: ; FAST: // %bb.0: // %entry ; FAST-NEXT: and w8, w0, #0xff -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: add w8, w8, w1, uxtb ; FAST-NEXT: tst w8, #0x100 ; FAST-NEXT: csel w0, w8, w9, ne @@ -1345,7 +1345,7 @@ ; GISEL-LABEL: uaddo.selectboth.i8: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: and w8, w1, #0xff -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: add w8, w8, w0, uxtb ; GISEL-NEXT: cmp w8, w8, uxtb ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1362,7 +1362,7 @@ ; SDAG-LABEL: saddo.selectboth.i8: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: sxtb w8, w0 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: add w8, w8, w1, sxtb ; SDAG-NEXT: cmp w8, w8, sxtb ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1371,7 +1371,7 @@ ; FAST-LABEL: saddo.selectboth.i8: ; FAST: // %bb.0: // %entry ; FAST-NEXT: sxtb w8, w0 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: add w8, w8, w1, sxtb ; FAST-NEXT: cmp w8, w8, sxtb ; FAST-NEXT: csel w0, w8, w9, ne @@ -1380,7 +1380,7 @@ ; GISEL-LABEL: saddo.selectboth.i8: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: sxtb w8, w1 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: add w8, w8, w0, sxtb ; GISEL-NEXT: cmp w8, w8, sxtb ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1397,7 +1397,7 @@ ; SDAG-LABEL: uaddo.selectboth.i16: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: and w8, w0, #0xffff -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: add w8, w8, w1, uxth ; SDAG-NEXT: tst w8, #0x10000 ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1406,7 +1406,7 @@ ; FAST-LABEL: uaddo.selectboth.i16: ; FAST: // %bb.0: // %entry ; FAST-NEXT: and w8, w0, #0xffff -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: add w8, w8, w1, uxth ; FAST-NEXT: tst w8, #0x10000 ; FAST-NEXT: csel w0, w8, w9, ne @@ -1415,7 +1415,7 @@ ; GISEL-LABEL: uaddo.selectboth.i16: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: and w8, w1, #0xffff -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: add w8, w8, w0, uxth ; GISEL-NEXT: cmp w8, w8, uxth ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1432,7 +1432,7 @@ ; SDAG-LABEL: saddo.selectboth.i16: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: sxth w8, w0 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: add w8, w8, w1, sxth ; SDAG-NEXT: cmp w8, w8, sxth ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1441,7 +1441,7 @@ ; FAST-LABEL: saddo.selectboth.i16: ; FAST: // %bb.0: // %entry ; FAST-NEXT: sxth w8, w0 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: add w8, w8, w1, sxth ; FAST-NEXT: cmp w8, w8, sxth ; FAST-NEXT: csel w0, w8, w9, ne @@ -1450,7 +1450,7 @@ ; GISEL-LABEL: saddo.selectboth.i16: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: sxth w8, w1 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: add w8, w8, w0, sxth ; GISEL-NEXT: cmp w8, w8, sxth ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1467,21 +1467,21 @@ ; SDAG-LABEL: uaddo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: adds w8, w0, w1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel w0, w8, w9, hs ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, w1 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: csel w0, w8, w9, hs ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds w8, w0, w1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, hs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel w0, w8, w10, ne @@ -1498,21 +1498,21 @@ ; SDAG-LABEL: saddo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: adds w8, w0, w1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel w0, w8, w9, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, w1 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: csel w0, w8, w9, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds w8, w0, w1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, vs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel w0, w8, w10, ne @@ -1529,21 +1529,21 @@ ; SDAG-LABEL: uaddo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: adds x8, x0, x1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel x0, x8, x9, hs ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x1 -; FAST-NEXT: mov x9, #10 +; FAST-NEXT: mov x9, #10 // =0xa ; FAST-NEXT: csel x0, x8, x9, hs ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, x1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, hs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel x0, x8, x10, ne @@ -1560,21 +1560,21 @@ ; SDAG-LABEL: saddo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: adds x8, x0, x1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel x0, x8, x9, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x1 -; FAST-NEXT: mov x9, #10 +; FAST-NEXT: mov x9, #10 // =0xa ; FAST-NEXT: csel x0, x8, x9, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, x1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, vs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel x0, x8, x10, ne @@ -1591,7 +1591,7 @@ ; SDAG-LABEL: usubo.selectboth.i8: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: and w8, w0, #0xff -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: sub w8, w8, w1, uxtb ; SDAG-NEXT: tst w8, #0xffffff00 ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1600,7 +1600,7 @@ ; FAST-LABEL: usubo.selectboth.i8: ; FAST: // %bb.0: // %entry ; FAST-NEXT: and w8, w0, #0xff -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: sub w8, w8, w1, uxtb ; FAST-NEXT: tst w8, #0xffffff00 ; FAST-NEXT: csel w0, w8, w9, ne @@ -1609,7 +1609,7 @@ ; GISEL-LABEL: usubo.selectboth.i8: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: and w8, w0, #0xff -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: sub w8, w8, w1, uxtb ; GISEL-NEXT: cmp w8, w8, uxtb ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1626,7 +1626,7 @@ ; CHECK-LABEL: ssubo.selectboth.i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: mov w9, #10 // =0xa ; CHECK-NEXT: sub w8, w8, w1, sxtb ; CHECK-NEXT: cmp w8, w8, sxtb ; CHECK-NEXT: csel w0, w8, w9, ne @@ -1643,7 +1643,7 @@ ; SDAG-LABEL: usubo.selectboth.i16: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: and w8, w0, #0xffff -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: sub w8, w8, w1, uxth ; SDAG-NEXT: tst w8, #0xffff0000 ; SDAG-NEXT: csel w0, w8, w9, ne @@ -1652,7 +1652,7 @@ ; FAST-LABEL: usubo.selectboth.i16: ; FAST: // %bb.0: // %entry ; FAST-NEXT: and w8, w0, #0xffff -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: sub w8, w8, w1, uxth ; FAST-NEXT: tst w8, #0xffff0000 ; FAST-NEXT: csel w0, w8, w9, ne @@ -1661,7 +1661,7 @@ ; GISEL-LABEL: usubo.selectboth.i16: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: and w8, w0, #0xffff -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: sub w8, w8, w1, uxth ; GISEL-NEXT: cmp w8, w8, uxth ; GISEL-NEXT: csel w0, w8, w9, ne @@ -1678,7 +1678,7 @@ ; CHECK-LABEL: ssubo.selectboth.i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: mov w9, #10 // =0xa ; CHECK-NEXT: sub w8, w8, w1, sxth ; CHECK-NEXT: cmp w8, w8, sxth ; CHECK-NEXT: csel w0, w8, w9, ne @@ -1695,21 +1695,21 @@ ; SDAG-LABEL: usubo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: subs w8, w0, w1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel w0, w8, w9, lo ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, w1 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: csel w0, w8, w9, lo ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs w8, w0, w1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, lo ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel w0, w8, w10, ne @@ -1726,21 +1726,21 @@ ; SDAG-LABEL: ssubo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: subs w8, w0, w1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel w0, w8, w9, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: ssubo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, w1 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: csel w0, w8, w9, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs w8, w0, w1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, vs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel w0, w8, w10, ne @@ -1757,21 +1757,21 @@ ; SDAG-LABEL: usubo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: subs x8, x0, x1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel x0, x8, x9, lo ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, x1 -; FAST-NEXT: mov x9, #10 +; FAST-NEXT: mov x9, #10 // =0xa ; FAST-NEXT: csel x0, x8, x9, lo ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs x8, x0, x1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, lo ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel x0, x8, x10, ne @@ -1788,21 +1788,21 @@ ; SDAG-LABEL: ssubo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: subs x8, x0, x1 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: csel x0, x8, x9, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: ssubo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, x1 -; FAST-NEXT: mov x9, #10 +; FAST-NEXT: mov x9, #10 // =0xa ; FAST-NEXT: csel x0, x8, x9, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs x8, x0, x1 -; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mov w10, #10 // =0xa ; GISEL-NEXT: cset w9, vs ; GISEL-NEXT: tst w9, #0x1 ; GISEL-NEXT: csel x0, x8, x10, ne @@ -1822,7 +1822,7 @@ ; SDAG-NEXT: and w8, w1, #0xff ; SDAG-NEXT: and w9, w0, #0xff ; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: tst w8, #0xff00 ; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret @@ -1832,7 +1832,7 @@ ; FAST-NEXT: and w8, w1, #0xff ; FAST-NEXT: and w9, w0, #0xff ; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: tst w8, #0xff00 ; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret @@ -1842,7 +1842,7 @@ ; GISEL-NEXT: and w8, w0, #0xff ; GISEL-NEXT: and w9, w1, #0xff ; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: cmp w8, w8, uxtb ; GISEL-NEXT: csel w0, w8, w9, ne ; GISEL-NEXT: ret @@ -1860,7 +1860,7 @@ ; SDAG-NEXT: sxtb w8, w1 ; SDAG-NEXT: sxtb w9, w0 ; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: cmp w8, w8, sxtb ; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret @@ -1870,7 +1870,7 @@ ; FAST-NEXT: sxtb w8, w1 ; FAST-NEXT: sxtb w9, w0 ; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: cmp w8, w8, sxtb ; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret @@ -1880,7 +1880,7 @@ ; GISEL-NEXT: sxtb w8, w0 ; GISEL-NEXT: sxtb w9, w1 ; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: cmp w8, w8, sxtb ; GISEL-NEXT: csel w0, w8, w9, ne ; GISEL-NEXT: ret @@ -1898,7 +1898,7 @@ ; SDAG-NEXT: and w8, w1, #0xffff ; SDAG-NEXT: and w9, w0, #0xffff ; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: tst w8, #0xffff0000 ; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret @@ -1908,7 +1908,7 @@ ; FAST-NEXT: and w8, w1, #0xffff ; FAST-NEXT: and w9, w0, #0xffff ; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: tst w8, #0xffff0000 ; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret @@ -1918,7 +1918,7 @@ ; GISEL-NEXT: and w8, w0, #0xffff ; GISEL-NEXT: and w9, w1, #0xffff ; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: cmp w8, w8, uxth ; GISEL-NEXT: csel w0, w8, w9, ne ; GISEL-NEXT: ret @@ -1936,7 +1936,7 @@ ; SDAG-NEXT: sxth w8, w1 ; SDAG-NEXT: sxth w9, w0 ; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: mov w9, #10 // =0xa ; SDAG-NEXT: cmp w8, w8, sxth ; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret @@ -1946,7 +1946,7 @@ ; FAST-NEXT: sxth w8, w1 ; FAST-NEXT: sxth w9, w0 ; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 +; FAST-NEXT: mov w9, #10 // =0xa ; FAST-NEXT: cmp w8, w8, sxth ; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret @@ -1956,7 +1956,7 @@ ; GISEL-NEXT: sxth w8, w0 ; GISEL-NEXT: sxth w9, w1 ; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 +; GISEL-NEXT: mov w9, #10 // =0xa ; GISEL-NEXT: cmp w8, w8, sxth ; GISEL-NEXT: csel w0, w8, w9, ne ; GISEL-NEXT: ret @@ -1972,7 +1972,7 @@ ; SDAG-LABEL: umulo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umull x9, w0, w1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: tst x9, #0xffffffff00000000 ; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret @@ -1980,7 +1980,7 @@ ; FAST-LABEL: umulo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umull x9, w0, w1 -; FAST-NEXT: mov w8, #10 +; FAST-NEXT: mov w8, #10 // =0xa ; FAST-NEXT: tst x9, #0xffffffff00000000 ; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret @@ -1988,7 +1988,7 @@ ; GISEL-LABEL: umulo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: umull x9, w0, w1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul w10, w0, w1 ; GISEL-NEXT: lsr x9, x9, #32 ; GISEL-NEXT: cmp w9, #0 @@ -2006,7 +2006,7 @@ ; SDAG-LABEL: smulo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: smull x9, w0, w1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: cmp x9, w9, sxtw ; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret @@ -2014,7 +2014,7 @@ ; FAST-LABEL: smulo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: smull x9, w0, w1 -; FAST-NEXT: mov w8, #10 +; FAST-NEXT: mov w8, #10 // =0xa ; FAST-NEXT: cmp x9, w9, sxtw ; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret @@ -2022,7 +2022,7 @@ ; GISEL-LABEL: smulo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: smull x9, w0, w1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul w10, w0, w1 ; GISEL-NEXT: asr x9, x9, #32 ; GISEL-NEXT: cmp w9, w10, asr #31 @@ -2040,7 +2040,7 @@ ; SDAG-LABEL: umulo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umulh x9, x0, x1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: mul x10, x0, x1 ; SDAG-NEXT: cmp xzr, x9 ; SDAG-NEXT: csel x0, x10, x8, ne @@ -2049,7 +2049,7 @@ ; FAST-LABEL: umulo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umulh x9, x0, x1 -; FAST-NEXT: mov x8, #10 +; FAST-NEXT: mov x8, #10 // =0xa ; FAST-NEXT: mul x10, x0, x1 ; FAST-NEXT: cmp xzr, x9 ; FAST-NEXT: csel x0, x10, x8, ne @@ -2058,7 +2058,7 @@ ; GISEL-LABEL: umulo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: umulh x9, x0, x1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul x10, x0, x1 ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: csel x0, x10, x8, ne @@ -2075,7 +2075,7 @@ ; SDAG-LABEL: smulo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: mul x9, x0, x1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: smulh x10, x0, x1 ; SDAG-NEXT: cmp x10, x9, asr #63 ; SDAG-NEXT: csel x0, x9, x8, ne @@ -2084,7 +2084,7 @@ ; FAST-LABEL: smulo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: mul x9, x0, x1 -; FAST-NEXT: mov x8, #10 +; FAST-NEXT: mov x8, #10 // =0xa ; FAST-NEXT: smulh x10, x0, x1 ; FAST-NEXT: cmp x10, x9, asr #63 ; FAST-NEXT: csel x0, x9, x8, ne @@ -2093,7 +2093,7 @@ ; GISEL-LABEL: smulo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: mul x9, x0, x1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: smulh x10, x0, x1 ; GISEL-NEXT: cmp x10, x9, asr #63 ; GISEL-NEXT: csel x0, x9, x8, ne @@ -2120,7 +2120,7 @@ ; FAST-LABEL: saddo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn w0, w1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, vs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2155,7 +2155,7 @@ ; FAST-LABEL: saddo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, vs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2190,7 +2190,7 @@ ; FAST-LABEL: uaddo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn w0, w1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, hs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2225,7 +2225,7 @@ ; FAST-LABEL: uaddo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, hs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2260,7 +2260,7 @@ ; FAST-LABEL: ssubo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp w0, w1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, vs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2295,7 +2295,7 @@ ; FAST-LABEL: ssubo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp x0, x1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, vs ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2330,7 +2330,7 @@ ; FAST-LABEL: usubo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp w0, w1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, lo ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2365,7 +2365,7 @@ ; FAST-LABEL: usubo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp x0, x1 -; FAST-NEXT: mov w9, #1 +; FAST-NEXT: mov w9, #1 // =0x1 ; FAST-NEXT: cset w8, lo ; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 @@ -2401,7 +2401,7 @@ ; FAST-LABEL: smulo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: smull x9, w0, w1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cmp x9, w9, sxtw ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2442,7 +2442,7 @@ ; FAST-LABEL: smulo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: mul x9, x0, x1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: smulh x10, x0, x1 ; FAST-NEXT: cmp x10, x9, asr #63 ; FAST-NEXT: cset w9, ne @@ -2481,7 +2481,7 @@ ; FAST-LABEL: smulo2.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x0 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cset w9, vs ; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 @@ -2517,7 +2517,7 @@ ; FAST-LABEL: umulo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umull x9, w0, w1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: tst x9, #0xffffffff00000000 ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2556,7 +2556,7 @@ ; FAST-LABEL: umulo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umulh x9, x0, x1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cmp xzr, x9 ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2593,7 +2593,7 @@ ; FAST-LABEL: umulo2.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x0 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cset w9, hs ; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 @@ -2621,17 +2621,17 @@ define i8 @pr60530() { ; SDAG-LABEL: pr60530: ; SDAG: // %bb.0: -; SDAG-NEXT: mov w0, #-1 +; SDAG-NEXT: mov w0, #-1 // =0xffffffff ; SDAG-NEXT: ret ; ; FAST-LABEL: pr60530: ; FAST: // %bb.0: -; FAST-NEXT: mov w0, #-1 +; FAST-NEXT: mov w0, #-1 // =0xffffffff ; FAST-NEXT: ret ; ; GISEL-LABEL: pr60530: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #1 +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: sbfx w0, w8, #0, #1 ; GISEL-NEXT: ret %1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1) Index: llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: stlxr w8, w2, [x0] ; CHECK-NEXT: cbnz w8, LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w0, wzr @@ -64,7 +64,7 @@ ; CHECK-NEXT: stlxrb w9, w2, [x0] ; CHECK-NEXT: cbnz w9, LBB1_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB1_4: ; %cmpxchg.nostore @@ -188,13 +188,13 @@ ; CHECK-NEXT: stlxr w8, w20, [x19] ; CHECK-NEXT: cbnz w8, LBB3_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: b LBB3_5 ; CHECK-NEXT: LBB3_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: clrex ; CHECK-NEXT: LBB3_5: ; %for.cond.preheader -; CHECK-NEXT: mov w22, #2 +; CHECK-NEXT: mov w22, #2 ; =0x2 ; CHECK-NEXT: LBB3_6: ; %for.cond ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cbz w22, LBB3_9 @@ -236,7 +236,7 @@ ; OUTLINE-ATOMICS-NEXT: mov w21, w0 ; OUTLINE-ATOMICS-NEXT: bl ___aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: cmp w0, w21 -; OUTLINE-ATOMICS-NEXT: mov w22, #2 +; OUTLINE-ATOMICS-NEXT: mov w22, #2 ; =0x2 ; OUTLINE-ATOMICS-NEXT: cset w8, eq ; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond ; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/AArch64/optimize-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/optimize-imm.ll +++ llvm/test/CodeGen/AArch64/optimize-imm.ll @@ -44,7 +44,7 @@ define i32 @and4(i32 %a) { ; CHECK-LABEL: and4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #61951 +; CHECK-NEXT: mov w8, #61951 // =0xf1ff ; CHECK-NEXT: and w9, w0, #0xfffc07ff ; CHECK-NEXT: movk w8, #65521, lsl #16 ; CHECK-NEXT: orr w0, w9, w8 @@ -61,7 +61,7 @@ define i32 @xor1(i32 %a) { ; CHECK-LABEL: xor1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #56 +; CHECK-NEXT: mov w8, #56 // =0x38 ; CHECK-NEXT: bic w0, w8, w0, lsl #3 ; CHECK-NEXT: ret entry: @@ -78,9 +78,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov w8, #129 +; CHECK-NEXT: mov w8, #129 // =0x81 ; CHECK-NEXT: eor x0, x0, x8 -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 // =0x8 ; CHECK-NEXT: str x8, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sink-and-fold.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sink-and-fold.ll @@ -0,0 +1,425 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s +target triple = "aarch64-linux" + +declare i32 @use(...) + +define i32 @f0(i1 %c1, ptr %p) nounwind { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: add x0, x1, #8 +; CHECK-NEXT: tbz w8, #0, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %if.else +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +entry: + %a = getelementptr i32, ptr %p, i32 2 + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = load i32, ptr %a + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +} + +define i32 @f1(i1 %c1, ptr %p, i64 %i) nounwind { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: add x0, x1, x2 +; CHECK-NEXT: tbz w8, #0, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %if.else +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +entry: + %a = getelementptr i8, ptr %p, i64 %i + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = load i32, ptr %a + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +} + +; Address calculation too slow. +%S = type {i32, [7 x i32] } +define i32 @f2(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+lsl-fast" { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: add x1, x1, x2, lsl #5 +; CHECK-NEXT: tbz w0, #0, .LBB2_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %if.else +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %a = getelementptr %S, ptr %p, i64 %i + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = call i32 @use(i32 1, ptr %a) + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +} + +; Address calculation cheap enough on some cores. +define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+lsl-fast" { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: add x0, x1, x2, lsl #2 +; CHECK-NEXT: tbz w8, #0, .LBB3_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %if.else +; CHECK-NEXT: ldr w0, [x0] +; CHECK-NEXT: ret +entry: + %a = getelementptr i32, ptr %p, i64 %i + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = load i32, ptr %a + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +} + +define void @f4(ptr %a, i64 %n) nounwind "target-features"="+lsl-fast" { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp x1, #1 +; CHECK-NEXT: b.lt .LBB4_9 +; CHECK-NEXT: // %bb.1: // %LI.preheader +; CHECK-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x23, xzr +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: mov x20, x0 +; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: b .LBB4_3 +; CHECK-NEXT: .LBB4_2: // %LI.latch +; CHECK-NEXT: // in Loop: Header=BB4_3 Depth=1 +; CHECK-NEXT: cmp x23, x19 +; CHECK-NEXT: mov x23, x24 +; CHECK-NEXT: b.ge .LBB4_8 +; CHECK-NEXT: .LBB4_3: // %LI +; CHECK-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NEXT: // Child Loop BB4_6 Depth 2 +; CHECK-NEXT: mov x21, xzr +; CHECK-NEXT: add x24, x23, #1 +; CHECK-NEXT: add x22, x20, x23, lsl #2 +; CHECK-NEXT: b .LBB4_6 +; CHECK-NEXT: .LBB4_4: // %if.else +; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2 +; CHECK-NEXT: ldr w0, [x22] +; CHECK-NEXT: .LBB4_5: // %LJ.latch +; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2 +; CHECK-NEXT: add x8, x21, #1 +; CHECK-NEXT: str w0, [x20, x21, lsl #2] +; CHECK-NEXT: mov x21, x8 +; CHECK-NEXT: sub x9, x8, #1 +; CHECK-NEXT: cmp x9, x19 +; CHECK-NEXT: b.ge .LBB4_2 +; CHECK-NEXT: .LBB4_6: // %LJ +; CHECK-NEXT: // Parent Loop BB4_3 Depth=1 +; CHECK-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ldr w8, [x20, x21, lsl #2] +; CHECK-NEXT: tbz w8, #31, .LBB4_4 +; CHECK-NEXT: // %bb.7: // %if.then +; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2 +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: mov x1, x21 +; CHECK-NEXT: bl use +; CHECK-NEXT: b .LBB4_5 +; CHECK-NEXT: .LBB4_8: +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload +; CHECK-NEXT: .LBB4_9: // %exit +; CHECK-NEXT: ret +entry: + %c0 = icmp slt i64 %n, 1 + br i1 %c0, label %exit, label %LI + +LI: + %i = phi i64 [0, %entry], [%i.next, %LI.latch] + %i.next = add i64 %i, 1 + %ai.ptr = getelementptr i32, ptr %a, i64 %i + br label %LJ + +LJ: + %j = phi i64 [0, %LI], [%j.next, %LJ.latch] + %j.next = add i64 %j, 1 + %aj.ptr = getelementptr i32, ptr %a, i64 %j + %aj = load i32, ptr %aj.ptr + %c1 = icmp slt i32 %aj, 0 + br i1 %c1, label %if.then, label %if.else + +if.then: + %v = call i32 @use(ptr %ai.ptr, i64 %j) + store i32 %v, ptr %aj.ptr + br label %LJ.latch + +if.else: + %ai = load i32, ptr %ai.ptr + store i32 %ai, ptr %aj.ptr + br label %LJ.latch + +LJ.latch: + %c2 = icmp slt i64 %j, %n + br i1 %c2, label %LJ, label %LI.latch + +LI.latch: + %c3 = icmp slt i64 %i, %n + br i1 %c3, label %LI, label %exit + +exit: + ret void +} + +%T = type { i32, i32, i32 } + +define void @f5(ptr %a, i32 %n, i32 %k) nounwind { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w1, #1 +; CHECK-NEXT: b.lt .LBB5_7 +; CHECK-NEXT: // %bb.1: // %L.preheader +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: mov w8, #12 // =0xc +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: smaddl x8, w2, w8, x0 +; CHECK-NEXT: add x21, x0, #8 +; CHECK-NEXT: mov w22, #-1 // =0xffffffff +; CHECK-NEXT: add x20, x8, #4 +; CHECK-NEXT: b .LBB5_4 +; CHECK-NEXT: .LBB5_2: // %if.else +; CHECK-NEXT: // in Loop: Header=BB5_4 Depth=1 +; CHECK-NEXT: ldr w0, [x20] +; CHECK-NEXT: .LBB5_3: // %L.latch +; CHECK-NEXT: // in Loop: Header=BB5_4 Depth=1 +; CHECK-NEXT: add w22, w22, #1 +; CHECK-NEXT: cmp w22, w19 +; CHECK-NEXT: str w0, [x21], #12 +; CHECK-NEXT: b.ge .LBB5_6 +; CHECK-NEXT: .LBB5_4: // %L +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr w8, [x21] +; CHECK-NEXT: tbz w8, #31, .LBB5_2 +; CHECK-NEXT: // %bb.5: // %if.then +; CHECK-NEXT: // in Loop: Header=BB5_4 Depth=1 +; CHECK-NEXT: add w1, w22, #1 +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: bl use +; CHECK-NEXT: b .LBB5_3 +; CHECK-NEXT: .LBB5_6: +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: .LBB5_7: // %exit +; CHECK-NEXT: ret +entry: + %p = getelementptr %T, ptr %a, i32 %k, i32 1 + %c0 = icmp slt i32 %n, 1 + br i1 %c0, label %exit, label %L + +L: + %i = phi i32 [0, %entry], [%i.next, %L.latch] + %i.next = add i32 %i, 1 + %ai.ptr = getelementptr %T, ptr %a, i32 %i, i32 2 + %ai = load i32, ptr %ai.ptr + %c1 = icmp slt i32 %ai, 0 + br i1 %c1, label %if.then, label %if.else + +if.then: + %u.0 = call i32 @use(ptr %p, i32 %i) + br label %L.latch + +if.else: + %u.1 = load i32, ptr %p + br label %L.latch + +L.latch: + %u = phi i32 [%u.0, %if.then], [%u.1, %if.else] + store i32 %u, ptr %ai.ptr + %c2 = icmp slt i32 %i, %n + br i1 %c2, label %L, label %exit + +exit: + ret void +} + +define i32 @f6(i1 %c, ptr %a, i32 %i) { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-NEXT: sxtw x8, w2 +; CHECK-NEXT: tbz w0, #0, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: str wzr, [x1, x8, lsl #2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %if.else +; CHECK-NEXT: ldr w0, [x1, x8, lsl #2] +; CHECK-NEXT: ret +entry: + %j = sext i32 %i to i64 + br i1 %c, label %if.then, label %if.else + +if.then: + %p0 = getelementptr i32, ptr %a, i64 %j + store i32 0, ptr %p0 + br label %exit + +if.else: + %p1 = getelementptr i32, ptr %a, i64 %j + %v0 = load i32, ptr %p1 + br label %exit + +exit: + %v = phi i32 [0, %if.then], [%v0, %if.else] + ret i32 %v +} + +define i8 @f7(i1 %c, ptr %a, i32 %i) { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w2 +; CHECK-NEXT: tbz w0, #0, .LBB7_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: strb wzr, [x1, x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: // %if.else +; CHECK-NEXT: ldrb w0, [x1, x8] +; CHECK-NEXT: ret +entry: + %j = zext i32 %i to i64 + br i1 %c, label %if.then, label %if.else + +if.then: + %p0 = getelementptr i8, ptr %a, i64 %j + store i8 0, ptr %p0 + br label %exit + +if.else: + %p1 = getelementptr i8, ptr %a, i64 %j + %v0 = load i8, ptr %p1 + br label %exit + +exit: + %v = phi i8 [0, %if.then], [%v0, %if.else] + ret i8 %v +} + +define i32 @f8(i1 %c, ptr %a, i32 %i) { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add x8, x1, w2, sxtw #2 +; CHECK-NEXT: tbz w0, #0, .LBB8_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: str wzr, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_2: // %if.else +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +entry: + %p = getelementptr i32, ptr %a, i32 %i + br i1 %c, label %if.then, label %if.else + +if.then: + store i32 0, ptr %p + br label %exit + +if.else: + %v0 = load i32, ptr %p + br label %exit + +exit: + %v = phi i32 [0, %if.then], [%v0, %if.else] + ret i32 %v +} + +define i64 @f9(i1 %c, ptr %a, i32 %i) { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w2 +; CHECK-NEXT: tbz w0, #0, .LBB9_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: str xzr, [x1, x8, lsl #3] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB9_2: // %if.else +; CHECK-NEXT: ldr x0, [x1, x8, lsl #3] +; CHECK-NEXT: ret +entry: + %j = zext i32 %i to i64 + %p = getelementptr i64, ptr %a, i64 %j + br i1 %c, label %if.then, label %if.else + +if.then: + store i64 0, ptr %p + br label %exit + +if.else: + %v0 = load i64, ptr %p + br label %exit + +exit: + %v = phi i64 [0, %if.then], [%v0, %if.else] + ret i64 %v +} Index: llvm/test/CodeGen/AArch64/swift-async-win.ll =================================================================== --- llvm/test/CodeGen/AArch64/swift-async-win.ll +++ llvm/test/CodeGen/AArch64/swift-async-win.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple aarch64-unknown-windows -swift-async-fp=never -filetype asm -o - %s | FileCheck %s ; ModuleID = '_Concurrency.ll' @@ -10,8 +11,35 @@ ; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0 +; NOTE: we do not see the canonical windows frame setup due to the `nounwind` +; attribtue on the function. + ; Function Attrs: nounwind define hidden swifttailcc void @"$ss23withCheckedContinuation8function_xSS_yScCyxs5NeverOGXEtYalFTQ0_"(ptr nocapture readonly %0) #1 { +; CHECK-LABEL: $ss23withCheckedContinuation8function_xSS_yScCyxs5NeverOGXEtYalFTQ0_: +; CHECK: // %bb.0: // %entryresume.0 +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x29, [sp, #24] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #24 +; CHECK-NEXT: str x19, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: sub x8, x29, #8 +; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: adrp x19, __imp_swift_task_dealloc +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: str x9, [x8] +; CHECK-NEXT: ldr x20, [x0] +; CHECK-NEXT: ldp x22, x0, [x9, #16] +; CHECK-NEXT: str x20, [x8] +; CHECK-NEXT: ldr x19, [x19, :lo12:__imp_swift_task_dealloc] +; CHECK-NEXT: blr x19 +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: blr x19 +; CHECK-NEXT: ldp x30, x29, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: ldr x1, [x20, #8] +; CHECK-NEXT: ldr x19, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: br x1 entryresume.0: %1 = load ptr, ptr %0, align 8 %2 = tail call ptr @llvm.swift.async.context.addr() #4 @@ -31,16 +59,6 @@ ret void } -; NOTE: we do not see the canonical windows frame setup due to the `nounwind` -; attribtue on the function. - -; CHECK: sub sp, sp, #48 -; CHECK: stp x30, x29, [sp, #24] -; CHECK: add x29, sp, #24 -; CHECK: str x19, [sp, #40] -; CHECK: sub x8, x29, #8 -; CHECK: ldr x9, [x0] -; CHECK: str x9, [x8] ; Function Attrs: nounwind readnone declare ptr @llvm.swift.async.context.addr() #2