diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll --- a/llvm/test/CodeGen/ARM/sadd_sat.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 ; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP ; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP -; RUN: llc < %s -mtriple=armv5t-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMNODPS +; RUN: llc < %s -mtriple=armv5t-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMNODSP ; RUN: llc < %s -mtriple=armv5te-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP ; RUN: llc < %s -mtriple=armv5te-none-eabi -mattr=+dsp | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP ; RUN: llc < %s -mtriple=armv6-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMDSP @@ -12,6 +12,335 @@ declare i16 @llvm.sadd.sat.i16(i16, i16) declare i32 @llvm.sadd.sat.i32(i32, i32) declare i64 @llvm.sadd.sat.i64(i64, i64) +declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) + +define <2 x i16> @funcv2i16(<2 x i16> %x, <2 x i16> %y) nounwind { +; CHECK-T1-LABEL: funcv2i16: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, lr} +; CHECK-T1-NEXT: push {r4, lr} +; CHECK-T1-NEXT: sxth r2, r2 +; CHECK-T1-NEXT: sxth r0, r0 +; CHECK-T1-NEXT: adds r0, r0, r2 +; CHECK-T1-NEXT: ldr r2, .LCPI0_0 +; CHECK-T1-NEXT: cmp r0, r2 +; CHECK-T1-NEXT: blt .LBB0_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r2 +; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: ldr r4, .LCPI0_1 +; CHECK-T1-NEXT: cmp r0, r4 +; CHECK-T1-NEXT: bgt .LBB0_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: .LBB0_4: +; CHECK-T1-NEXT: sxth r3, r3 +; CHECK-T1-NEXT: sxth r1, r1 +; CHECK-T1-NEXT: adds r1, r1, r3 +; CHECK-T1-NEXT: cmp r1, r2 +; CHECK-T1-NEXT: bge .LBB0_7 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: cmp r1, r4 +; CHECK-T1-NEXT: ble .LBB0_8 +; CHECK-T1-NEXT: .LBB0_6: +; CHECK-T1-NEXT: pop {r4, pc} +; CHECK-T1-NEXT: .LBB0_7: +; CHECK-T1-NEXT: mov r1, r2 +; CHECK-T1-NEXT: cmp r1, r4 +; CHECK-T1-NEXT: bgt .LBB0_6 +; CHECK-T1-NEXT: .LBB0_8: +; CHECK-T1-NEXT: mov r1, r4 +; CHECK-T1-NEXT: pop {r4, pc} +; CHECK-T1-NEXT: .p2align 2 +; CHECK-T1-NEXT: @ %bb.9: +; CHECK-T1-NEXT: .LCPI0_0: +; CHECK-T1-NEXT: .long 32767 @ 0x7fff +; CHECK-T1-NEXT: .LCPI0_1: +; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 +; +; CHECK-T2NODSP-LABEL: funcv2i16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: sxth r0, r0 +; CHECK-T2NODSP-NEXT: sxth r2, r2 +; CHECK-T2NODSP-NEXT: add r0, r2 +; CHECK-T2NODSP-NEXT: movw r2, #32767 +; CHECK-T2NODSP-NEXT: cmp r0, r2 +; CHECK-T2NODSP-NEXT: sxth r1, r1 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, r2 +; CHECK-T2NODSP-NEXT: movw r12, #32768 +; CHECK-T2NODSP-NEXT: sxth r3, r3 +; CHECK-T2NODSP-NEXT: cmn.w r0, #32768 +; CHECK-T2NODSP-NEXT: add r1, r3 +; CHECK-T2NODSP-NEXT: movt r12, #65535 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r0, r12 +; CHECK-T2NODSP-NEXT: cmp r1, r2 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r2, r1 +; CHECK-T2NODSP-NEXT: cmn.w r2, #32768 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r2, r12 +; CHECK-T2NODSP-NEXT: mov r1, r2 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: funcv2i16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qadd16 r0, r0, r2 +; CHECK-T2DSP-NEXT: qadd16 r1, r1, r3 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODSP-LABEL: funcv2i16: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: lsl r0, r0, #16 +; CHECK-ARMNODSP-NEXT: lsl r2, r2, #16 +; CHECK-ARMNODSP-NEXT: asr r0, r0, #16 +; CHECK-ARMNODSP-NEXT: lsl r1, r1, #16 +; CHECK-ARMNODSP-NEXT: add r0, r0, r2, asr #16 +; CHECK-ARMNODSP-NEXT: mov r2, #255 +; CHECK-ARMNODSP-NEXT: orr r2, r2, #32512 +; CHECK-ARMNODSP-NEXT: ldr r12, .LCPI0_0 +; CHECK-ARMNODSP-NEXT: cmp r0, r2 +; CHECK-ARMNODSP-NEXT: lsl r3, r3, #16 +; CHECK-ARMNODSP-NEXT: asr r1, r1, #16 +; CHECK-ARMNODSP-NEXT: movge r0, r2 +; CHECK-ARMNODSP-NEXT: cmn r0, #32768 +; CHECK-ARMNODSP-NEXT: add r1, r1, r3, asr #16 +; CHECK-ARMNODSP-NEXT: movle r0, r12 +; CHECK-ARMNODSP-NEXT: cmp r1, r2 +; CHECK-ARMNODSP-NEXT: movlt r2, r1 +; CHECK-ARMNODSP-NEXT: cmn r2, #32768 +; CHECK-ARMNODSP-NEXT: movle r2, r12 +; CHECK-ARMNODSP-NEXT: mov r1, r2 +; CHECK-ARMNODSP-NEXT: bx lr +; CHECK-ARMNODSP-NEXT: .p2align 2 +; CHECK-ARMNODSP-NEXT: @ %bb.1: +; CHECK-ARMNODSP-NEXT: .LCPI0_0: +; CHECK-ARMNODSP-NEXT: .long 4294934528 @ 0xffff8000 +; +; CHECK-ARMBASEDSP-LABEL: funcv2i16: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: lsl r2, r2, #16 +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #16 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #16 +; CHECK-ARMBASEDSP-NEXT: qadd r0, r0, r2 +; CHECK-ARMBASEDSP-NEXT: lsl r2, r3, #16 +; CHECK-ARMBASEDSP-NEXT: qadd r1, r1, r2 +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #16 +; CHECK-ARMBASEDSP-NEXT: asr r1, r1, #16 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: funcv2i16: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r2 +; CHECK-ARMDSP-NEXT: qadd16 r1, r1, r3 +; CHECK-ARMDSP-NEXT: bx lr + %tmp = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y) + ret <2 x i16> %tmp +} + +define <4 x i8> @funcv4i8(<4 x i8> %x, <4 x i8> %y) nounwind { +; CHECK-T1-LABEL: funcv4i8: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-T1-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-T1-NEXT: movs r6, #0 +; CHECK-T1-NEXT: add r4, sp, #20 +; CHECK-T1-NEXT: ldrsb r4, [r4, r6] +; CHECK-T1-NEXT: sxtb r0, r0 +; CHECK-T1-NEXT: adds r0, r0, r4 +; CHECK-T1-NEXT: movs r4, #127 +; CHECK-T1-NEXT: cmp r0, #127 +; CHECK-T1-NEXT: blt .LBB1_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: mvns r5, r4 +; CHECK-T1-NEXT: cmp r0, r5 +; CHECK-T1-NEXT: bgt .LBB1_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r0, r5 +; CHECK-T1-NEXT: .LBB1_4: +; CHECK-T1-NEXT: add r7, sp, #24 +; CHECK-T1-NEXT: ldrsb r7, [r7, r6] +; CHECK-T1-NEXT: sxtb r1, r1 +; CHECK-T1-NEXT: adds r1, r1, r7 +; CHECK-T1-NEXT: cmp r1, #127 +; CHECK-T1-NEXT: blt .LBB1_6 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: mov r1, r4 +; CHECK-T1-NEXT: .LBB1_6: +; CHECK-T1-NEXT: cmp r1, r5 +; CHECK-T1-NEXT: bgt .LBB1_8 +; CHECK-T1-NEXT: @ %bb.7: +; CHECK-T1-NEXT: mov r1, r5 +; CHECK-T1-NEXT: .LBB1_8: +; CHECK-T1-NEXT: add r7, sp, #28 +; CHECK-T1-NEXT: ldrsb r7, [r7, r6] +; CHECK-T1-NEXT: sxtb r2, r2 +; CHECK-T1-NEXT: adds r2, r2, r7 +; CHECK-T1-NEXT: cmp r2, #127 +; CHECK-T1-NEXT: blt .LBB1_10 +; CHECK-T1-NEXT: @ %bb.9: +; CHECK-T1-NEXT: mov r2, r4 +; CHECK-T1-NEXT: .LBB1_10: +; CHECK-T1-NEXT: cmp r2, r5 +; CHECK-T1-NEXT: bgt .LBB1_12 +; CHECK-T1-NEXT: @ %bb.11: +; CHECK-T1-NEXT: mov r2, r5 +; CHECK-T1-NEXT: .LBB1_12: +; CHECK-T1-NEXT: add r7, sp, #32 +; CHECK-T1-NEXT: ldrsb r6, [r7, r6] +; CHECK-T1-NEXT: sxtb r3, r3 +; CHECK-T1-NEXT: adds r3, r3, r6 +; CHECK-T1-NEXT: cmp r3, #127 +; CHECK-T1-NEXT: bge .LBB1_15 +; CHECK-T1-NEXT: @ %bb.13: +; CHECK-T1-NEXT: cmp r3, r5 +; CHECK-T1-NEXT: ble .LBB1_16 +; CHECK-T1-NEXT: .LBB1_14: +; CHECK-T1-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-T1-NEXT: .LBB1_15: +; CHECK-T1-NEXT: mov r3, r4 +; CHECK-T1-NEXT: cmp r3, r5 +; CHECK-T1-NEXT: bgt .LBB1_14 +; CHECK-T1-NEXT: .LBB1_16: +; CHECK-T1-NEXT: mov r3, r5 +; CHECK-T1-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-T2NODSP-LABEL: funcv4i8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r4, lr} +; CHECK-T2NODSP-NEXT: push {r4, lr} +; CHECK-T2NODSP-NEXT: mov r12, r3 +; CHECK-T2NODSP-NEXT: ldrsb.w r3, [sp, #8] +; CHECK-T2NODSP-NEXT: sxtb r0, r0 +; CHECK-T2NODSP-NEXT: ldrsb.w r4, [sp, #12] +; CHECK-T2NODSP-NEXT: add r0, r3 +; CHECK-T2NODSP-NEXT: sxtb r1, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #127 +; CHECK-T2NODSP-NEXT: mov.w r3, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, r3 +; CHECK-T2NODSP-NEXT: add r1, r4 +; CHECK-T2NODSP-NEXT: cmn.w r0, #128 +; CHECK-T2NODSP-NEXT: ldrsb.w r4, [sp, #16] +; CHECK-T2NODSP-NEXT: sxtb r2, r2 +; CHECK-T2NODSP-NEXT: mvn lr, #127 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r0, lr +; CHECK-T2NODSP-NEXT: cmp r1, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r1, r3 +; CHECK-T2NODSP-NEXT: cmn.w r1, #128 +; CHECK-T2NODSP-NEXT: add r2, r4 +; CHECK-T2NODSP-NEXT: ldrsb.w r4, [sp, #20] +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r1, lr +; CHECK-T2NODSP-NEXT: cmp r2, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r2, r3 +; CHECK-T2NODSP-NEXT: cmn.w r2, #128 +; CHECK-T2NODSP-NEXT: sxtb.w r12, r12 +; CHECK-T2NODSP-NEXT: add r4, r12 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r2, lr +; CHECK-T2NODSP-NEXT: cmp r4, #127 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r3, r4 +; CHECK-T2NODSP-NEXT: cmn.w r3, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r3, lr +; CHECK-T2NODSP-NEXT: pop {r4, pc} +; +; CHECK-T2DSP-LABEL: funcv4i8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp] +; CHECK-T2DSP-NEXT: qadd8 r0, r0, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #4] +; CHECK-T2DSP-NEXT: qadd8 r1, r1, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #8] +; CHECK-T2DSP-NEXT: qadd8 r2, r2, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #12] +; CHECK-T2DSP-NEXT: qadd8 r3, r3, r12 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODSP-LABEL: funcv4i8: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: .save {r4, lr} +; CHECK-ARMNODSP-NEXT: push {r4, lr} +; CHECK-ARMNODSP-NEXT: mov r12, r3 +; CHECK-ARMNODSP-NEXT: ldrsb r3, [sp, #8] +; CHECK-ARMNODSP-NEXT: lsl r0, r0, #24 +; CHECK-ARMNODSP-NEXT: ldrsb r4, [sp, #12] +; CHECK-ARMNODSP-NEXT: add r0, r3, r0, asr #24 +; CHECK-ARMNODSP-NEXT: lsl r1, r1, #24 +; CHECK-ARMNODSP-NEXT: mov r3, #127 +; CHECK-ARMNODSP-NEXT: cmp r0, #127 +; CHECK-ARMNODSP-NEXT: movge r0, r3 +; CHECK-ARMNODSP-NEXT: add r1, r4, r1, asr #24 +; CHECK-ARMNODSP-NEXT: mvn lr, #127 +; CHECK-ARMNODSP-NEXT: cmn r0, #128 +; CHECK-ARMNODSP-NEXT: ldrsb r4, [sp, #16] +; CHECK-ARMNODSP-NEXT: lsl r2, r2, #24 +; CHECK-ARMNODSP-NEXT: movle r0, lr +; CHECK-ARMNODSP-NEXT: cmp r1, #127 +; CHECK-ARMNODSP-NEXT: movge r1, r3 +; CHECK-ARMNODSP-NEXT: cmn r1, #128 +; CHECK-ARMNODSP-NEXT: add r2, r4, r2, asr #24 +; CHECK-ARMNODSP-NEXT: ldrsb r4, [sp, #20] +; CHECK-ARMNODSP-NEXT: movle r1, lr +; CHECK-ARMNODSP-NEXT: cmp r2, #127 +; CHECK-ARMNODSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMNODSP-NEXT: movge r2, r3 +; CHECK-ARMNODSP-NEXT: cmn r2, #128 +; CHECK-ARMNODSP-NEXT: add r4, r4, r12, asr #24 +; CHECK-ARMNODSP-NEXT: movle r2, lr +; CHECK-ARMNODSP-NEXT: cmp r4, #127 +; CHECK-ARMNODSP-NEXT: movlt r3, r4 +; CHECK-ARMNODSP-NEXT: cmn r3, #128 +; CHECK-ARMNODSP-NEXT: movle r3, lr +; CHECK-ARMNODSP-NEXT: pop {r4, pc} +; +; CHECK-ARMBASEDSP-LABEL: funcv4i8: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: ldrb r12, [sp] +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r2, r2, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r3, r3, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r0, r0, r12 +; CHECK-ARMBASEDSP-NEXT: ldrb r12, [sp, #4] +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r1, r1, r12 +; CHECK-ARMBASEDSP-NEXT: ldrb r12, [sp, #8] +; CHECK-ARMBASEDSP-NEXT: asr r1, r1, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r2, r2, r12 +; CHECK-ARMBASEDSP-NEXT: ldrb r12, [sp, #12] +; CHECK-ARMBASEDSP-NEXT: asr r2, r2, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r3, r3, r12 +; CHECK-ARMBASEDSP-NEXT: asr r3, r3, #24 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: funcv4i8: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: ldrsb r12, [sp] +; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r12 +; CHECK-ARMDSP-NEXT: ldrsb r12, [sp, #4] +; CHECK-ARMDSP-NEXT: qadd8 r1, r1, r12 +; CHECK-ARMDSP-NEXT: ldrsb r12, [sp, #8] +; CHECK-ARMDSP-NEXT: qadd8 r2, r2, r12 +; CHECK-ARMDSP-NEXT: ldrsb r12, [sp, #12] +; CHECK-ARMDSP-NEXT: qadd8 r3, r3, r12 +; CHECK-ARMDSP-NEXT: bx lr + %tmp = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y) + ret <4 x i8> %tmp +} define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-T1-LABEL: func: @@ -20,28 +349,28 @@ ; CHECK-T1-NEXT: movs r3, #1 ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: mov r1, r3 -; CHECK-T1-NEXT: bmi .LBB0_2 +; CHECK-T1-NEXT: bmi .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r1, #0 -; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: bne .LBB0_4 +; CHECK-T1-NEXT: bne .LBB2_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: lsls r1, r3, #31 ; CHECK-T1-NEXT: cmp r0, r2 -; CHECK-T1-NEXT: bvs .LBB0_5 -; CHECK-T1-NEXT: b .LBB0_6 -; CHECK-T1-NEXT: .LBB0_4: -; CHECK-T1-NEXT: ldr r1, .LCPI0_0 +; CHECK-T1-NEXT: bvs .LBB2_5 +; CHECK-T1-NEXT: b .LBB2_6 +; CHECK-T1-NEXT: .LBB2_4: +; CHECK-T1-NEXT: ldr r1, .LCPI2_0 ; CHECK-T1-NEXT: cmp r0, r2 -; CHECK-T1-NEXT: bvc .LBB0_6 -; CHECK-T1-NEXT: .LBB0_5: +; CHECK-T1-NEXT: bvc .LBB2_6 +; CHECK-T1-NEXT: .LBB2_5: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB0_6: +; CHECK-T1-NEXT: .LBB2_6: ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: .LCPI0_0: +; CHECK-T1-NEXT: .LCPI2_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; ; CHECK-T2NODSP-LABEL: func: @@ -65,18 +394,18 @@ ; CHECK-T2DSP-NEXT: qadd r0, r0, r1 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: func: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: adds r2, r0, r1 -; CHECK-ARMNODPS-NEXT: mov r3, #0 -; CHECK-ARMNODPS-NEXT: movmi r3, #1 -; CHECK-ARMNODPS-NEXT: mov r1, #-2147483648 -; CHECK-ARMNODPS-NEXT: cmp r3, #0 -; CHECK-ARMNODPS-NEXT: mvnne r1, #-2147483648 -; CHECK-ARMNODPS-NEXT: cmp r2, r0 -; CHECK-ARMNODPS-NEXT: movvc r1, r2 -; CHECK-ARMNODPS-NEXT: mov r0, r1 -; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODSP-LABEL: func: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: adds r2, r0, r1 +; CHECK-ARMNODSP-NEXT: mov r3, #0 +; CHECK-ARMNODSP-NEXT: movmi r3, #1 +; CHECK-ARMNODSP-NEXT: mov r1, #-2147483648 +; CHECK-ARMNODSP-NEXT: cmp r3, #0 +; CHECK-ARMNODSP-NEXT: mvnne r1, #-2147483648 +; CHECK-ARMNODSP-NEXT: cmp r2, r0 +; CHECK-ARMNODSP-NEXT: movvc r1, r2 +; CHECK-ARMNODSP-NEXT: mov r0, r1 +; CHECK-ARMNODSP-NEXT: bx lr ; ; CHECK-ARMBASEDSP-LABEL: func: ; CHECK-ARMBASEDSP: @ %bb.0: @@ -102,30 +431,30 @@ ; CHECK-T1-NEXT: adcs r3, r1 ; CHECK-T1-NEXT: eors r1, r3 ; CHECK-T1-NEXT: bics r1, r4 -; CHECK-T1-NEXT: bpl .LBB1_2 +; CHECK-T1-NEXT: bpl .LBB3_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: asrs r0, r3, #31 -; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: bmi .LBB1_4 +; CHECK-T1-NEXT: bmi .LBB3_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: movs r2, #1 ; CHECK-T1-NEXT: lsls r2, r2, #31 ; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: bpl .LBB1_5 -; CHECK-T1-NEXT: b .LBB1_6 -; CHECK-T1-NEXT: .LBB1_4: -; CHECK-T1-NEXT: ldr r2, .LCPI1_0 +; CHECK-T1-NEXT: bpl .LBB3_5 +; CHECK-T1-NEXT: b .LBB3_6 +; CHECK-T1-NEXT: .LBB3_4: +; CHECK-T1-NEXT: ldr r2, .LCPI3_0 ; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: bmi .LBB1_6 -; CHECK-T1-NEXT: .LBB1_5: +; CHECK-T1-NEXT: bmi .LBB3_6 +; CHECK-T1-NEXT: .LBB3_5: ; CHECK-T1-NEXT: mov r2, r3 -; CHECK-T1-NEXT: .LBB1_6: +; CHECK-T1-NEXT: .LBB3_6: ; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, pc} ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: .LCPI1_0: +; CHECK-T1-NEXT: .LCPI3_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; ; CHECK-T2-LABEL: func2: @@ -170,24 +499,24 @@ ; CHECK-T1-LABEL: func16: ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: ldr r1, .LCPI2_0 +; CHECK-T1-NEXT: ldr r1, .LCPI4_0 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: blt .LBB2_2 +; CHECK-T1-NEXT: blt .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB2_2: -; CHECK-T1-NEXT: ldr r1, .LCPI2_1 +; CHECK-T1-NEXT: .LBB4_2: +; CHECK-T1-NEXT: ldr r1, .LCPI4_1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB2_4 +; CHECK-T1-NEXT: bgt .LBB4_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB2_4: +; CHECK-T1-NEXT: .LBB4_4: ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.5: -; CHECK-T1-NEXT: .LCPI2_0: +; CHECK-T1-NEXT: .LCPI4_0: ; CHECK-T1-NEXT: .long 32767 @ 0x7fff -; CHECK-T1-NEXT: .LCPI2_1: +; CHECK-T1-NEXT: .LCPI4_1: ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-T2NODSP-LABEL: func16: @@ -210,21 +539,21 @@ ; CHECK-T2DSP-NEXT: sxth r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: func16: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: add r0, r0, r1 -; CHECK-ARMNODPS-NEXT: mov r1, #255 -; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512 -; CHECK-ARMNODPS-NEXT: cmp r0, r1 -; CHECK-ARMNODPS-NEXT: movlt r1, r0 -; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0 -; CHECK-ARMNODPS-NEXT: cmn r1, #32768 -; CHECK-ARMNODPS-NEXT: movgt r0, r1 -; CHECK-ARMNODPS-NEXT: bx lr -; CHECK-ARMNODPS-NEXT: .p2align 2 -; CHECK-ARMNODPS-NEXT: @ %bb.1: -; CHECK-ARMNODPS-NEXT: .LCPI2_0: -; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000 +; CHECK-ARMNODSP-LABEL: func16: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: add r0, r0, r1 +; CHECK-ARMNODSP-NEXT: mov r1, #255 +; CHECK-ARMNODSP-NEXT: orr r1, r1, #32512 +; CHECK-ARMNODSP-NEXT: cmp r0, r1 +; CHECK-ARMNODSP-NEXT: movlt r1, r0 +; CHECK-ARMNODSP-NEXT: ldr r0, .LCPI4_0 +; CHECK-ARMNODSP-NEXT: cmn r1, #32768 +; CHECK-ARMNODSP-NEXT: movgt r0, r1 +; CHECK-ARMNODSP-NEXT: bx lr +; CHECK-ARMNODSP-NEXT: .p2align 2 +; CHECK-ARMNODSP-NEXT: @ %bb.1: +; CHECK-ARMNODSP-NEXT: .LCPI4_0: +; CHECK-ARMNODSP-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-ARMBASEDSP-LABEL: func16: ; CHECK-ARMBASEDSP: @ %bb.0: @@ -249,16 +578,16 @@ ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: movs r1, #127 ; CHECK-T1-NEXT: cmp r0, #127 -; CHECK-T1-NEXT: blt .LBB3_2 +; CHECK-T1-NEXT: blt .LBB5_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB3_2: +; CHECK-T1-NEXT: .LBB5_2: ; CHECK-T1-NEXT: mvns r1, r1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB3_4 +; CHECK-T1-NEXT: bgt .LBB5_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB3_4: +; CHECK-T1-NEXT: .LBB5_4: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func8: @@ -278,14 +607,14 @@ ; CHECK-T2DSP-NEXT: sxtb r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: func8: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: add r0, r0, r1 -; CHECK-ARMNODPS-NEXT: cmp r0, #127 -; CHECK-ARMNODPS-NEXT: movge r0, #127 -; CHECK-ARMNODPS-NEXT: cmn r0, #128 -; CHECK-ARMNODPS-NEXT: mvnle r0, #127 -; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODSP-LABEL: func8: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: add r0, r0, r1 +; CHECK-ARMNODSP-NEXT: cmp r0, #127 +; CHECK-ARMNODSP-NEXT: movge r0, #127 +; CHECK-ARMNODSP-NEXT: cmn r0, #128 +; CHECK-ARMNODSP-NEXT: mvnle r0, #127 +; CHECK-ARMNODSP-NEXT: bx lr ; ; CHECK-ARMBASEDSP-LABEL: func8: ; CHECK-ARMBASEDSP: @ %bb.0: @@ -310,16 +639,16 @@ ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: movs r1, #7 ; CHECK-T1-NEXT: cmp r0, #7 -; CHECK-T1-NEXT: blt .LBB4_2 +; CHECK-T1-NEXT: blt .LBB6_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB4_2: +; CHECK-T1-NEXT: .LBB6_2: ; CHECK-T1-NEXT: mvns r1, r1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB4_4 +; CHECK-T1-NEXT: bgt .LBB6_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB4_4: +; CHECK-T1-NEXT: .LBB6_4: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func3: @@ -341,14 +670,14 @@ ; CHECK-T2DSP-NEXT: asrs r0, r0, #28 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: func3: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: add r0, r0, r1 -; CHECK-ARMNODPS-NEXT: cmp r0, #7 -; CHECK-ARMNODPS-NEXT: movge r0, #7 -; CHECK-ARMNODPS-NEXT: cmn r0, #8 -; CHECK-ARMNODPS-NEXT: mvnle r0, #7 -; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODSP-LABEL: func3: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: add r0, r0, r1 +; CHECK-ARMNODSP-NEXT: cmp r0, #7 +; CHECK-ARMNODSP-NEXT: movge r0, #7 +; CHECK-ARMNODSP-NEXT: cmn r0, #8 +; CHECK-ARMNODSP-NEXT: mvnle r0, #7 +; CHECK-ARMNODSP-NEXT: bx lr ; ; CHECK-ARMBASEDSP-LABEL: func3: ; CHECK-ARMBASEDSP: @ %bb.0: diff --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll --- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll @@ -9,6 +9,291 @@ declare i16 @llvm.sadd.sat.i16(i16, i16) declare i32 @llvm.sadd.sat.i32(i32, i32) declare i64 @llvm.sadd.sat.i64(i64, i64) +declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) + +define <2 x i16> @funcv2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z) nounwind { +; CHECK-T1-LABEL: funcv2i16: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, r5, r7, lr} +; CHECK-T1-NEXT: push {r4, r5, r7, lr} +; CHECK-T1-NEXT: ldr r4, [sp, #16] +; CHECK-T1-NEXT: muls r4, r2, r4 +; CHECK-T1-NEXT: sxth r2, r4 +; CHECK-T1-NEXT: sxth r0, r0 +; CHECK-T1-NEXT: adds r0, r0, r2 +; CHECK-T1-NEXT: ldr r2, .LCPI0_0 +; CHECK-T1-NEXT: cmp r0, r2 +; CHECK-T1-NEXT: blt .LBB0_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r2 +; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: ldr r4, .LCPI0_1 +; CHECK-T1-NEXT: cmp r0, r4 +; CHECK-T1-NEXT: bgt .LBB0_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: .LBB0_4: +; CHECK-T1-NEXT: ldr r5, [sp, #20] +; CHECK-T1-NEXT: muls r5, r3, r5 +; CHECK-T1-NEXT: sxth r3, r5 +; CHECK-T1-NEXT: sxth r1, r1 +; CHECK-T1-NEXT: adds r1, r1, r3 +; CHECK-T1-NEXT: cmp r1, r2 +; CHECK-T1-NEXT: bge .LBB0_7 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: cmp r1, r4 +; CHECK-T1-NEXT: ble .LBB0_8 +; CHECK-T1-NEXT: .LBB0_6: +; CHECK-T1-NEXT: pop {r4, r5, r7, pc} +; CHECK-T1-NEXT: .LBB0_7: +; CHECK-T1-NEXT: mov r1, r2 +; CHECK-T1-NEXT: cmp r1, r4 +; CHECK-T1-NEXT: bgt .LBB0_6 +; CHECK-T1-NEXT: .LBB0_8: +; CHECK-T1-NEXT: mov r1, r4 +; CHECK-T1-NEXT: pop {r4, r5, r7, pc} +; CHECK-T1-NEXT: .p2align 2 +; CHECK-T1-NEXT: @ %bb.9: +; CHECK-T1-NEXT: .LCPI0_0: +; CHECK-T1-NEXT: .long 32767 @ 0x7fff +; CHECK-T1-NEXT: .LCPI0_1: +; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 +; +; CHECK-T2NODSP-LABEL: funcv2i16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r7, lr} +; CHECK-T2NODSP-NEXT: push {r7, lr} +; CHECK-T2NODSP-NEXT: ldrh.w r12, [sp, #8] +; CHECK-T2NODSP-NEXT: sxth r0, r0 +; CHECK-T2NODSP-NEXT: movw lr, #32767 +; CHECK-T2NODSP-NEXT: sxth r1, r1 +; CHECK-T2NODSP-NEXT: mul r2, r2, r12 +; CHECK-T2NODSP-NEXT: movw r12, #32768 +; CHECK-T2NODSP-NEXT: movt r12, #65535 +; CHECK-T2NODSP-NEXT: sxth r2, r2 +; CHECK-T2NODSP-NEXT: add r0, r2 +; CHECK-T2NODSP-NEXT: ldrh.w r2, [sp, #12] +; CHECK-T2NODSP-NEXT: cmp r0, lr +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, lr +; CHECK-T2NODSP-NEXT: cmn.w r0, #32768 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r0, r12 +; CHECK-T2NODSP-NEXT: muls r2, r3, r2 +; CHECK-T2NODSP-NEXT: sxth r2, r2 +; CHECK-T2NODSP-NEXT: add r1, r2 +; CHECK-T2NODSP-NEXT: cmp r1, lr +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt lr, r1 +; CHECK-T2NODSP-NEXT: cmn.w lr, #32768 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle lr, r12 +; CHECK-T2NODSP-NEXT: mov r1, lr +; CHECK-T2NODSP-NEXT: pop {r7, pc} +; +; CHECK-T2DSP-LABEL: funcv2i16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: ldrh.w r12, [sp] +; CHECK-T2DSP-NEXT: mul r2, r2, r12 +; CHECK-T2DSP-NEXT: qadd16 r0, r0, r2 +; CHECK-T2DSP-NEXT: ldrh.w r2, [sp, #4] +; CHECK-T2DSP-NEXT: muls r2, r3, r2 +; CHECK-T2DSP-NEXT: qadd16 r1, r1, r2 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: funcv2i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vldr d16, [sp] +; CHECK-ARM-NEXT: vmov d17, r2, r3 +; CHECK-ARM-NEXT: vmul.i32 d16, d17, d16 +; CHECK-ARM-NEXT: vmov d17, r0, r1 +; CHECK-ARM-NEXT: vshl.i32 d17, d17, #16 +; CHECK-ARM-NEXT: vshl.i32 d16, d16, #16 +; CHECK-ARM-NEXT: vqadd.s32 d16, d17, d16 +; CHECK-ARM-NEXT: vshr.s32 d16, d16, #16 +; CHECK-ARM-NEXT: vmov r0, r1, d16 +; CHECK-ARM-NEXT: bx lr + %a = mul <2 x i16> %y, %z + %tmp = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %x, <2 x i16> %a) + ret <2 x i16> %tmp +} + +define <4 x i8> @funcv4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %z) nounwind { +; CHECK-T1-LABEL: funcv4i8: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-T1-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-T1-NEXT: ldr r4, [sp, #36] +; CHECK-T1-NEXT: ldr r5, [sp, #20] +; CHECK-T1-NEXT: muls r5, r4, r5 +; CHECK-T1-NEXT: sxtb r4, r5 +; CHECK-T1-NEXT: sxtb r0, r0 +; CHECK-T1-NEXT: adds r0, r0, r4 +; CHECK-T1-NEXT: movs r4, #127 +; CHECK-T1-NEXT: cmp r0, #127 +; CHECK-T1-NEXT: blt .LBB1_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: mvns r5, r4 +; CHECK-T1-NEXT: cmp r0, r5 +; CHECK-T1-NEXT: bgt .LBB1_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r0, r5 +; CHECK-T1-NEXT: .LBB1_4: +; CHECK-T1-NEXT: ldr r6, [sp, #40] +; CHECK-T1-NEXT: ldr r7, [sp, #24] +; CHECK-T1-NEXT: muls r7, r6, r7 +; CHECK-T1-NEXT: sxtb r6, r7 +; CHECK-T1-NEXT: sxtb r1, r1 +; CHECK-T1-NEXT: adds r1, r1, r6 +; CHECK-T1-NEXT: cmp r1, #127 +; CHECK-T1-NEXT: blt .LBB1_6 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: mov r1, r4 +; CHECK-T1-NEXT: .LBB1_6: +; CHECK-T1-NEXT: cmp r1, r5 +; CHECK-T1-NEXT: bgt .LBB1_8 +; CHECK-T1-NEXT: @ %bb.7: +; CHECK-T1-NEXT: mov r1, r5 +; CHECK-T1-NEXT: .LBB1_8: +; CHECK-T1-NEXT: ldr r6, [sp, #44] +; CHECK-T1-NEXT: ldr r7, [sp, #28] +; CHECK-T1-NEXT: muls r7, r6, r7 +; CHECK-T1-NEXT: sxtb r6, r7 +; CHECK-T1-NEXT: sxtb r2, r2 +; CHECK-T1-NEXT: adds r2, r2, r6 +; CHECK-T1-NEXT: cmp r2, #127 +; CHECK-T1-NEXT: blt .LBB1_10 +; CHECK-T1-NEXT: @ %bb.9: +; CHECK-T1-NEXT: mov r2, r4 +; CHECK-T1-NEXT: .LBB1_10: +; CHECK-T1-NEXT: cmp r2, r5 +; CHECK-T1-NEXT: bgt .LBB1_12 +; CHECK-T1-NEXT: @ %bb.11: +; CHECK-T1-NEXT: mov r2, r5 +; CHECK-T1-NEXT: .LBB1_12: +; CHECK-T1-NEXT: ldr r6, [sp, #48] +; CHECK-T1-NEXT: ldr r7, [sp, #32] +; CHECK-T1-NEXT: muls r7, r6, r7 +; CHECK-T1-NEXT: sxtb r6, r7 +; CHECK-T1-NEXT: sxtb r3, r3 +; CHECK-T1-NEXT: adds r3, r3, r6 +; CHECK-T1-NEXT: cmp r3, #127 +; CHECK-T1-NEXT: bge .LBB1_15 +; CHECK-T1-NEXT: @ %bb.13: +; CHECK-T1-NEXT: cmp r3, r5 +; CHECK-T1-NEXT: ble .LBB1_16 +; CHECK-T1-NEXT: .LBB1_14: +; CHECK-T1-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-T1-NEXT: .LBB1_15: +; CHECK-T1-NEXT: mov r3, r4 +; CHECK-T1-NEXT: cmp r3, r5 +; CHECK-T1-NEXT: bgt .LBB1_14 +; CHECK-T1-NEXT: .LBB1_16: +; CHECK-T1-NEXT: mov r3, r5 +; CHECK-T1-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-T2NODSP-LABEL: funcv4i8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r4, r5, r7, lr} +; CHECK-T2NODSP-NEXT: push {r4, r5, r7, lr} +; CHECK-T2NODSP-NEXT: ldrb.w r4, [sp, #36] +; CHECK-T2NODSP-NEXT: sxtb r1, r1 +; CHECK-T2NODSP-NEXT: ldrb.w r5, [sp, #20] +; CHECK-T2NODSP-NEXT: mov r12, r3 +; CHECK-T2NODSP-NEXT: ldrb.w lr, [sp, #32] +; CHECK-T2NODSP-NEXT: sxtb r2, r2 +; CHECK-T2NODSP-NEXT: ldrb.w r3, [sp, #16] +; CHECK-T2NODSP-NEXT: sxtb r0, r0 +; CHECK-T2NODSP-NEXT: muls r4, r5, r4 +; CHECK-T2NODSP-NEXT: ldrb.w r5, [sp, #24] +; CHECK-T2NODSP-NEXT: mul r3, r3, lr +; CHECK-T2NODSP-NEXT: mvn lr, #127 +; CHECK-T2NODSP-NEXT: sxtb r4, r4 +; CHECK-T2NODSP-NEXT: add r1, r4 +; CHECK-T2NODSP-NEXT: ldrb.w r4, [sp, #40] +; CHECK-T2NODSP-NEXT: sxtb r3, r3 +; CHECK-T2NODSP-NEXT: add r0, r3 +; CHECK-T2NODSP-NEXT: cmp r0, #127 +; CHECK-T2NODSP-NEXT: mov.w r3, #127 +; CHECK-T2NODSP-NEXT: mul r4, r5, r4 +; CHECK-T2NODSP-NEXT: ldrb.w r5, [sp, #28] +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, r3 +; CHECK-T2NODSP-NEXT: cmn.w r0, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r0, lr +; CHECK-T2NODSP-NEXT: cmp r1, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r1, r3 +; CHECK-T2NODSP-NEXT: cmn.w r1, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r1, lr +; CHECK-T2NODSP-NEXT: sxtb r4, r4 +; CHECK-T2NODSP-NEXT: add r2, r4 +; CHECK-T2NODSP-NEXT: ldrb.w r4, [sp, #44] +; CHECK-T2NODSP-NEXT: cmp r2, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r2, r3 +; CHECK-T2NODSP-NEXT: cmn.w r2, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r2, lr +; CHECK-T2NODSP-NEXT: muls r4, r5, r4 +; CHECK-T2NODSP-NEXT: sxtb.w r5, r12 +; CHECK-T2NODSP-NEXT: sxtb r4, r4 +; CHECK-T2NODSP-NEXT: add r5, r4 +; CHECK-T2NODSP-NEXT: cmp r5, #127 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r3, r5 +; CHECK-T2NODSP-NEXT: cmn.w r3, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: movle r3, lr +; CHECK-T2NODSP-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-T2DSP-LABEL: funcv4i8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: .save {r7, lr} +; CHECK-T2DSP-NEXT: push {r7, lr} +; CHECK-T2DSP-NEXT: ldrb.w r12, [sp, #24] +; CHECK-T2DSP-NEXT: ldrb.w lr, [sp, #8] +; CHECK-T2DSP-NEXT: mul r12, lr, r12 +; CHECK-T2DSP-NEXT: ldrb.w lr, [sp, #12] +; CHECK-T2DSP-NEXT: qadd8 r0, r0, r12 +; CHECK-T2DSP-NEXT: ldrb.w r12, [sp, #28] +; CHECK-T2DSP-NEXT: mul r12, lr, r12 +; CHECK-T2DSP-NEXT: ldrb.w lr, [sp, #16] +; CHECK-T2DSP-NEXT: qadd8 r1, r1, r12 +; CHECK-T2DSP-NEXT: ldrb.w r12, [sp, #32] +; CHECK-T2DSP-NEXT: mul r12, lr, r12 +; CHECK-T2DSP-NEXT: ldrb.w lr, [sp, #20] +; CHECK-T2DSP-NEXT: qadd8 r2, r2, r12 +; CHECK-T2DSP-NEXT: ldrb.w r12, [sp, #36] +; CHECK-T2DSP-NEXT: mul r12, lr, r12 +; CHECK-T2DSP-NEXT: qadd8 r3, r3, r12 +; CHECK-T2DSP-NEXT: pop {r7, pc} +; +; CHECK-ARM-LABEL: funcv4i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vldr d16, [sp] +; CHECK-ARM-NEXT: vmov d17, r2, r3 +; CHECK-ARM-NEXT: vmul.i16 d16, d17, d16 +; CHECK-ARM-NEXT: vmov d17, r0, r1 +; CHECK-ARM-NEXT: vshl.i16 d17, d17, #8 +; CHECK-ARM-NEXT: vshl.i16 d16, d16, #8 +; CHECK-ARM-NEXT: vshr.s16 d17, d17, #8 +; CHECK-ARM-NEXT: vshr.s16 d16, d16, #8 +; CHECK-ARM-NEXT: vshl.i16 d17, d17, #8 +; CHECK-ARM-NEXT: vshl.i16 d16, d16, #8 +; CHECK-ARM-NEXT: vqadd.s16 d16, d17, d16 +; CHECK-ARM-NEXT: vshr.s16 d16, d16, #8 +; CHECK-ARM-NEXT: vmov r0, r1, d16 +; CHECK-ARM-NEXT: bx lr + %a = mul <4 x i8> %y, %z + %tmp = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %x, <4 x i8> %a) + ret <4 x i8> %tmp +} define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-T1-LABEL: func32: @@ -18,28 +303,28 @@ ; CHECK-T1-NEXT: movs r2, #1 ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: mov r1, r2 -; CHECK-T1-NEXT: bmi .LBB0_2 +; CHECK-T1-NEXT: bmi .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r1, #0 -; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: bne .LBB0_4 +; CHECK-T1-NEXT: bne .LBB2_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: lsls r1, r2, #31 ; CHECK-T1-NEXT: cmp r0, r3 -; CHECK-T1-NEXT: bvs .LBB0_5 -; CHECK-T1-NEXT: b .LBB0_6 -; CHECK-T1-NEXT: .LBB0_4: -; CHECK-T1-NEXT: ldr r1, .LCPI0_0 +; CHECK-T1-NEXT: bvs .LBB2_5 +; CHECK-T1-NEXT: b .LBB2_6 +; CHECK-T1-NEXT: .LBB2_4: +; CHECK-T1-NEXT: ldr r1, .LCPI2_0 ; CHECK-T1-NEXT: cmp r0, r3 -; CHECK-T1-NEXT: bvc .LBB0_6 -; CHECK-T1-NEXT: .LBB0_5: +; CHECK-T1-NEXT: bvc .LBB2_6 +; CHECK-T1-NEXT: .LBB2_5: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB0_6: +; CHECK-T1-NEXT: .LBB2_6: ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: .LCPI0_0: +; CHECK-T1-NEXT: .LCPI2_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; ; CHECK-T2NODSP-LABEL: func32: @@ -88,30 +373,30 @@ ; CHECK-T1-NEXT: adcs r3, r1 ; CHECK-T1-NEXT: eors r1, r3 ; CHECK-T1-NEXT: bics r1, r2 -; CHECK-T1-NEXT: bpl .LBB1_2 +; CHECK-T1-NEXT: bpl .LBB3_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: asrs r0, r3, #31 -; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: bmi .LBB1_4 +; CHECK-T1-NEXT: bmi .LBB3_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: movs r2, #1 ; CHECK-T1-NEXT: lsls r2, r2, #31 ; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: bpl .LBB1_5 -; CHECK-T1-NEXT: b .LBB1_6 -; CHECK-T1-NEXT: .LBB1_4: -; CHECK-T1-NEXT: ldr r2, .LCPI1_0 +; CHECK-T1-NEXT: bpl .LBB3_5 +; CHECK-T1-NEXT: b .LBB3_6 +; CHECK-T1-NEXT: .LBB3_4: +; CHECK-T1-NEXT: ldr r2, .LCPI3_0 ; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: bmi .LBB1_6 -; CHECK-T1-NEXT: .LBB1_5: +; CHECK-T1-NEXT: bmi .LBB3_6 +; CHECK-T1-NEXT: .LBB3_5: ; CHECK-T1-NEXT: mov r2, r3 -; CHECK-T1-NEXT: .LBB1_6: +; CHECK-T1-NEXT: .LBB3_6: ; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, pc} ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: .LCPI1_0: +; CHECK-T1-NEXT: .LCPI3_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; ; CHECK-T2-LABEL: func64: @@ -163,24 +448,24 @@ ; CHECK-T1-NEXT: muls r1, r2, r1 ; CHECK-T1-NEXT: sxth r1, r1 ; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: ldr r1, .LCPI2_0 +; CHECK-T1-NEXT: ldr r1, .LCPI4_0 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: blt .LBB2_2 +; CHECK-T1-NEXT: blt .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB2_2: -; CHECK-T1-NEXT: ldr r1, .LCPI2_1 +; CHECK-T1-NEXT: .LBB4_2: +; CHECK-T1-NEXT: ldr r1, .LCPI4_1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB2_4 +; CHECK-T1-NEXT: bgt .LBB4_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB2_4: +; CHECK-T1-NEXT: .LBB4_4: ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.5: -; CHECK-T1-NEXT: .LCPI2_0: +; CHECK-T1-NEXT: .LCPI4_0: ; CHECK-T1-NEXT: .long 32767 @ 0x7fff -; CHECK-T1-NEXT: .LCPI2_1: +; CHECK-T1-NEXT: .LCPI4_1: ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-T2NODSP-LABEL: func16: @@ -225,16 +510,16 @@ ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: movs r1, #127 ; CHECK-T1-NEXT: cmp r0, #127 -; CHECK-T1-NEXT: blt .LBB3_2 +; CHECK-T1-NEXT: blt .LBB5_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB3_2: +; CHECK-T1-NEXT: .LBB5_2: ; CHECK-T1-NEXT: mvns r1, r1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB3_4 +; CHECK-T1-NEXT: bgt .LBB5_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB3_4: +; CHECK-T1-NEXT: .LBB5_4: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func8: @@ -277,16 +562,16 @@ ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: movs r1, #7 ; CHECK-T1-NEXT: cmp r0, #7 -; CHECK-T1-NEXT: blt .LBB4_2 +; CHECK-T1-NEXT: blt .LBB6_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB4_2: +; CHECK-T1-NEXT: .LBB6_2: ; CHECK-T1-NEXT: mvns r1, r1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB4_4 +; CHECK-T1-NEXT: bgt .LBB6_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB4_4: +; CHECK-T1-NEXT: .LBB6_4: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func4: diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll --- a/llvm/test/CodeGen/ARM/ssub_sat.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 ; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP ; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP -; RUN: llc < %s -mtriple=armv5t-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMNODPS +; RUN: llc < %s -mtriple=armv5t-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMNODSP ; RUN: llc < %s -mtriple=armv5te-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP ; RUN: llc < %s -mtriple=armv6-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMDSP @@ -12,6 +12,8 @@ declare i32 @llvm.ssub.sat.i32(i32, i32) declare i64 @llvm.ssub.sat.i64(i64, i64) declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-T1-LABEL: func: @@ -67,18 +69,18 @@ ; CHECK-T2DSP-NEXT: qsub r0, r0, r1 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: func: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: subs r12, r0, r1 -; CHECK-ARMNODPS-NEXT: mov r3, #0 -; CHECK-ARMNODPS-NEXT: movmi r3, #1 -; CHECK-ARMNODPS-NEXT: mov r2, #-2147483648 -; CHECK-ARMNODPS-NEXT: cmp r3, #0 -; CHECK-ARMNODPS-NEXT: mvnne r2, #-2147483648 -; CHECK-ARMNODPS-NEXT: cmp r0, r1 -; CHECK-ARMNODPS-NEXT: movvc r2, r12 -; CHECK-ARMNODPS-NEXT: mov r0, r2 -; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODSP-LABEL: func: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: subs r12, r0, r1 +; CHECK-ARMNODSP-NEXT: mov r3, #0 +; CHECK-ARMNODSP-NEXT: movmi r3, #1 +; CHECK-ARMNODSP-NEXT: mov r2, #-2147483648 +; CHECK-ARMNODSP-NEXT: cmp r3, #0 +; CHECK-ARMNODSP-NEXT: mvnne r2, #-2147483648 +; CHECK-ARMNODSP-NEXT: cmp r0, r1 +; CHECK-ARMNODSP-NEXT: movvc r2, r12 +; CHECK-ARMNODSP-NEXT: mov r0, r2 +; CHECK-ARMNODSP-NEXT: bx lr ; ; CHECK-ARMBASEDSP-LABEL: func: ; CHECK-ARMBASEDSP: @ %bb.0: @@ -211,21 +213,21 @@ ; CHECK-T2DSP-NEXT: sxth r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: func16: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: sub r0, r0, r1 -; CHECK-ARMNODPS-NEXT: mov r1, #255 -; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512 -; CHECK-ARMNODPS-NEXT: cmp r0, r1 -; CHECK-ARMNODPS-NEXT: movlt r1, r0 -; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0 -; CHECK-ARMNODPS-NEXT: cmn r1, #32768 -; CHECK-ARMNODPS-NEXT: movgt r0, r1 -; CHECK-ARMNODPS-NEXT: bx lr -; CHECK-ARMNODPS-NEXT: .p2align 2 -; CHECK-ARMNODPS-NEXT: @ %bb.1: -; CHECK-ARMNODPS-NEXT: .LCPI2_0: -; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000 +; CHECK-ARMNODSP-LABEL: func16: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: sub r0, r0, r1 +; CHECK-ARMNODSP-NEXT: mov r1, #255 +; CHECK-ARMNODSP-NEXT: orr r1, r1, #32512 +; CHECK-ARMNODSP-NEXT: cmp r0, r1 +; CHECK-ARMNODSP-NEXT: movlt r1, r0 +; CHECK-ARMNODSP-NEXT: ldr r0, .LCPI2_0 +; CHECK-ARMNODSP-NEXT: cmn r1, #32768 +; CHECK-ARMNODSP-NEXT: movgt r0, r1 +; CHECK-ARMNODSP-NEXT: bx lr +; CHECK-ARMNODSP-NEXT: .p2align 2 +; CHECK-ARMNODSP-NEXT: @ %bb.1: +; CHECK-ARMNODSP-NEXT: .LCPI2_0: +; CHECK-ARMNODSP-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-ARMBASEDSP-LABEL: func16: ; CHECK-ARMBASEDSP: @ %bb.0: @@ -279,14 +281,14 @@ ; CHECK-T2DSP-NEXT: sxtb r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: func8: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: sub r0, r0, r1 -; CHECK-ARMNODPS-NEXT: cmp r0, #127 -; CHECK-ARMNODPS-NEXT: movge r0, #127 -; CHECK-ARMNODPS-NEXT: cmn r0, #128 -; CHECK-ARMNODPS-NEXT: mvnle r0, #127 -; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODSP-LABEL: func8: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: sub r0, r0, r1 +; CHECK-ARMNODSP-NEXT: cmp r0, #127 +; CHECK-ARMNODSP-NEXT: movge r0, #127 +; CHECK-ARMNODSP-NEXT: cmn r0, #128 +; CHECK-ARMNODSP-NEXT: mvnle r0, #127 +; CHECK-ARMNODSP-NEXT: bx lr ; ; CHECK-ARMBASEDSP-LABEL: func8: ; CHECK-ARMBASEDSP: @ %bb.0: @@ -342,14 +344,14 @@ ; CHECK-T2DSP-NEXT: asrs r0, r0, #28 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: func3: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: sub r0, r0, r1 -; CHECK-ARMNODPS-NEXT: cmp r0, #7 -; CHECK-ARMNODPS-NEXT: movge r0, #7 -; CHECK-ARMNODPS-NEXT: cmn r0, #8 -; CHECK-ARMNODPS-NEXT: mvnle r0, #7 -; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODSP-LABEL: func3: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: sub r0, r0, r1 +; CHECK-ARMNODSP-NEXT: cmp r0, #7 +; CHECK-ARMNODSP-NEXT: movge r0, #7 +; CHECK-ARMNODSP-NEXT: cmn r0, #8 +; CHECK-ARMNODSP-NEXT: mvnle r0, #7 +; CHECK-ARMNODSP-NEXT: bx lr ; ; CHECK-ARMBASEDSP-LABEL: func3: ; CHECK-ARMBASEDSP: @ %bb.0: @@ -545,51 +547,51 @@ ; CHECK-T2DSP-NEXT: qsub r3, r3, r12 ; CHECK-T2DSP-NEXT: bx lr ; -; CHECK-ARMNODPS-LABEL: vec: -; CHECK-ARMNODPS: @ %bb.0: -; CHECK-ARMNODPS-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-ARMNODPS-NEXT: push {r4, r5, r6, r7, r11, lr} -; CHECK-ARMNODPS-NEXT: ldr r4, [sp, #24] -; CHECK-ARMNODPS-NEXT: mov lr, r0 -; CHECK-ARMNODPS-NEXT: ldr r7, [sp, #28] -; CHECK-ARMNODPS-NEXT: mov r5, #0 -; CHECK-ARMNODPS-NEXT: subs r6, r0, r4 -; CHECK-ARMNODPS-NEXT: mov r0, #0 -; CHECK-ARMNODPS-NEXT: movmi r0, #1 -; CHECK-ARMNODPS-NEXT: cmp r0, #0 -; CHECK-ARMNODPS-NEXT: mov r0, #-2147483648 -; CHECK-ARMNODPS-NEXT: mov r12, #-2147483648 -; CHECK-ARMNODPS-NEXT: mvnne r0, #-2147483648 -; CHECK-ARMNODPS-NEXT: cmp lr, r4 -; CHECK-ARMNODPS-NEXT: movvc r0, r6 -; CHECK-ARMNODPS-NEXT: subs r6, r1, r7 -; CHECK-ARMNODPS-NEXT: mov r4, #0 -; CHECK-ARMNODPS-NEXT: mov lr, #-2147483648 -; CHECK-ARMNODPS-NEXT: movmi r4, #1 -; CHECK-ARMNODPS-NEXT: cmp r4, #0 -; CHECK-ARMNODPS-NEXT: mvnne lr, #-2147483648 -; CHECK-ARMNODPS-NEXT: cmp r1, r7 -; CHECK-ARMNODPS-NEXT: ldr r1, [sp, #32] -; CHECK-ARMNODPS-NEXT: movvc lr, r6 -; CHECK-ARMNODPS-NEXT: mov r4, #0 -; CHECK-ARMNODPS-NEXT: subs r6, r2, r1 -; CHECK-ARMNODPS-NEXT: movmi r4, #1 -; CHECK-ARMNODPS-NEXT: cmp r4, #0 -; CHECK-ARMNODPS-NEXT: mov r4, #-2147483648 -; CHECK-ARMNODPS-NEXT: mvnne r4, #-2147483648 -; CHECK-ARMNODPS-NEXT: cmp r2, r1 -; CHECK-ARMNODPS-NEXT: ldr r1, [sp, #36] -; CHECK-ARMNODPS-NEXT: movvc r4, r6 -; CHECK-ARMNODPS-NEXT: subs r2, r3, r1 -; CHECK-ARMNODPS-NEXT: movmi r5, #1 -; CHECK-ARMNODPS-NEXT: cmp r5, #0 -; CHECK-ARMNODPS-NEXT: mvnne r12, #-2147483648 -; CHECK-ARMNODPS-NEXT: cmp r3, r1 -; CHECK-ARMNODPS-NEXT: movvc r12, r2 -; CHECK-ARMNODPS-NEXT: mov r1, lr -; CHECK-ARMNODPS-NEXT: mov r2, r4 -; CHECK-ARMNODPS-NEXT: mov r3, r12 -; CHECK-ARMNODPS-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-ARMNODSP-LABEL: vec: +; CHECK-ARMNODSP: @ %bb.0: +; CHECK-ARMNODSP-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-ARMNODSP-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARMNODSP-NEXT: ldr r4, [sp, #24] +; CHECK-ARMNODSP-NEXT: mov lr, r0 +; CHECK-ARMNODSP-NEXT: ldr r7, [sp, #28] +; CHECK-ARMNODSP-NEXT: mov r5, #0 +; CHECK-ARMNODSP-NEXT: subs r6, r0, r4 +; CHECK-ARMNODSP-NEXT: mov r0, #0 +; CHECK-ARMNODSP-NEXT: movmi r0, #1 +; CHECK-ARMNODSP-NEXT: cmp r0, #0 +; CHECK-ARMNODSP-NEXT: mov r0, #-2147483648 +; CHECK-ARMNODSP-NEXT: mov r12, #-2147483648 +; CHECK-ARMNODSP-NEXT: mvnne r0, #-2147483648 +; CHECK-ARMNODSP-NEXT: cmp lr, r4 +; CHECK-ARMNODSP-NEXT: movvc r0, r6 +; CHECK-ARMNODSP-NEXT: subs r6, r1, r7 +; CHECK-ARMNODSP-NEXT: mov r4, #0 +; CHECK-ARMNODSP-NEXT: mov lr, #-2147483648 +; CHECK-ARMNODSP-NEXT: movmi r4, #1 +; CHECK-ARMNODSP-NEXT: cmp r4, #0 +; CHECK-ARMNODSP-NEXT: mvnne lr, #-2147483648 +; CHECK-ARMNODSP-NEXT: cmp r1, r7 +; CHECK-ARMNODSP-NEXT: ldr r1, [sp, #32] +; CHECK-ARMNODSP-NEXT: movvc lr, r6 +; CHECK-ARMNODSP-NEXT: mov r4, #0 +; CHECK-ARMNODSP-NEXT: subs r6, r2, r1 +; CHECK-ARMNODSP-NEXT: movmi r4, #1 +; CHECK-ARMNODSP-NEXT: cmp r4, #0 +; CHECK-ARMNODSP-NEXT: mov r4, #-2147483648 +; CHECK-ARMNODSP-NEXT: mvnne r4, #-2147483648 +; CHECK-ARMNODSP-NEXT: cmp r2, r1 +; CHECK-ARMNODSP-NEXT: ldr r1, [sp, #36] +; CHECK-ARMNODSP-NEXT: movvc r4, r6 +; CHECK-ARMNODSP-NEXT: subs r2, r3, r1 +; CHECK-ARMNODSP-NEXT: movmi r5, #1 +; CHECK-ARMNODSP-NEXT: cmp r5, #0 +; CHECK-ARMNODSP-NEXT: mvnne r12, #-2147483648 +; CHECK-ARMNODSP-NEXT: cmp r3, r1 +; CHECK-ARMNODSP-NEXT: movvc r12, r2 +; CHECK-ARMNODSP-NEXT: mov r1, lr +; CHECK-ARMNODSP-NEXT: mov r2, r4 +; CHECK-ARMNODSP-NEXT: mov r3, r12 +; CHECK-ARMNODSP-NEXT: pop {r4, r5, r6, r7, r11, pc} ; ; CHECK-ARMBASEDSP-LABEL: vec: ; CHECK-ARMBASEDSP: @ %bb.0: @@ -617,3 +619,13 @@ %tmp = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp } + +define <2 x i16> @funcv2i16(<2 x i16> %x, <2 x i16> %y) nounwind { + %tmp = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %x, <2 x i16> %y) + ret <2 x i16> %tmp +} + +define <4 x i8> @funcv4i8(<4 x i8> %x, <4 x i8> %y) nounwind { + %tmp = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %x, <4 x i8> %y) + ret <4 x i8> %tmp +} diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll --- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll @@ -9,6 +9,20 @@ declare i16 @llvm.ssub.sat.i16(i16, i16) declare i32 @llvm.ssub.sat.i32(i32, i32) declare i64 @llvm.ssub.sat.i64(i64, i64) +declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) + +define <2 x i16> @funcv2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z) nounwind { + %a = mul <2 x i16> %y, %z + %tmp = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %x, <2 x i16> %a) + ret <2 x i16> %tmp +} + +define <4 x i8> @funcv4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %z) nounwind { + %a = mul <4 x i8> %y, %z + %tmp = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %x, <4 x i8> %a) + ret <4 x i8> %tmp +} define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-T1-LABEL: func32: diff --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll --- a/llvm/test/CodeGen/ARM/uadd_sat.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat.ll @@ -9,16 +9,299 @@ declare i16 @llvm.uadd.sat.i16(i16, i16) declare i32 @llvm.uadd.sat.i32(i32, i32) declare i64 @llvm.uadd.sat.i64(i64, i64) +declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) + +define <2 x i16> @funcv2i16(<2 x i16> %x, <2 x i16> %y) nounwind { +; CHECK-T1-LABEL: funcv2i16: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: uxth r2, r2 +; CHECK-T1-NEXT: uxth r0, r0 +; CHECK-T1-NEXT: adds r0, r0, r2 +; CHECK-T1-NEXT: ldr r2, .LCPI0_0 +; CHECK-T1-NEXT: cmp r0, r2 +; CHECK-T1-NEXT: blo .LBB0_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r2 +; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: uxth r3, r3 +; CHECK-T1-NEXT: uxth r1, r1 +; CHECK-T1-NEXT: adds r1, r1, r3 +; CHECK-T1-NEXT: cmp r1, r2 +; CHECK-T1-NEXT: blo .LBB0_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r1, r2 +; CHECK-T1-NEXT: .LBB0_4: +; CHECK-T1-NEXT: bx lr +; CHECK-T1-NEXT: .p2align 2 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: .LCPI0_0: +; CHECK-T1-NEXT: .long 65535 @ 0xffff +; +; CHECK-T2NODSP-LABEL: funcv2i16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: uxth r0, r0 +; CHECK-T2NODSP-NEXT: uxth r2, r2 +; CHECK-T2NODSP-NEXT: add r0, r2 +; CHECK-T2NODSP-NEXT: uxth r1, r1 +; CHECK-T2NODSP-NEXT: movw r2, #65535 +; CHECK-T2NODSP-NEXT: cmp r0, r2 +; CHECK-T2NODSP-NEXT: uxth r3, r3 +; CHECK-T2NODSP-NEXT: add r1, r3 +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r0, r2 +; CHECK-T2NODSP-NEXT: cmp r1, r2 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r2, r1 +; CHECK-T2NODSP-NEXT: mov r1, r2 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: funcv2i16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqadd16 r0, r0, r2 +; CHECK-T2DSP-NEXT: uqadd16 r1, r1, r3 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: funcv2i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vmov.i32 d16, #0xffff +; CHECK-ARM-NEXT: vmov d17, r2, r3 +; CHECK-ARM-NEXT: vmov d18, r0, r1 +; CHECK-ARM-NEXT: vand d17, d17, d16 +; CHECK-ARM-NEXT: vand d18, d18, d16 +; CHECK-ARM-NEXT: vadd.i32 d17, d18, d17 +; CHECK-ARM-NEXT: vmin.u32 d16, d17, d16 +; CHECK-ARM-NEXT: vmov r0, r1, d16 +; CHECK-ARM-NEXT: bx lr +; CHECK-ARMNODPS-LABEL: funcv2i16: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: lsl r0, r0, #16 +; CHECK-ARMNODPS-NEXT: lsl r2, r2, #16 +; CHECK-ARMNODPS-NEXT: asr r0, r0, #16 +; CHECK-ARMNODPS-NEXT: lsl r1, r1, #16 +; CHECK-ARMNODPS-NEXT: add r0, r0, r2, asr #16 +; CHECK-ARMNODPS-NEXT: mov r2, #255 +; CHECK-ARMNODPS-NEXT: orr r2, r2, #32512 +; CHECK-ARMNODPS-NEXT: ldr r12, .LCPI0_0 +; CHECK-ARMNODPS-NEXT: cmp r0, r2 +; CHECK-ARMNODPS-NEXT: lsl r3, r3, #16 +; CHECK-ARMNODPS-NEXT: asr r1, r1, #16 +; CHECK-ARMNODPS-NEXT: movge r0, r2 +; CHECK-ARMNODPS-NEXT: cmn r0, #32768 +; CHECK-ARMNODPS-NEXT: add r1, r1, r3, asr #16 +; CHECK-ARMNODPS-NEXT: movle r0, r12 +; CHECK-ARMNODPS-NEXT: cmp r1, r2 +; CHECK-ARMNODPS-NEXT: movlt r2, r1 +; CHECK-ARMNODPS-NEXT: cmn r2, #32768 +; CHECK-ARMNODPS-NEXT: movle r2, r12 +; CHECK-ARMNODPS-NEXT: mov r1, r2 +; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODPS-NEXT: .p2align 2 +; CHECK-ARMNODPS-NEXT: @ %bb.1: +; CHECK-ARMNODPS-NEXT: .LCPI0_0: +; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000 +; CHECK-ARMBASEDSP-LABEL: funcv2i16: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: lsl r2, r2, #16 +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #16 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #16 +; CHECK-ARMBASEDSP-NEXT: qadd r0, r0, r2 +; CHECK-ARMBASEDSP-NEXT: lsl r2, r3, #16 +; CHECK-ARMBASEDSP-NEXT: qadd r1, r1, r2 +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #16 +; CHECK-ARMBASEDSP-NEXT: asr r1, r1, #16 +; CHECK-ARMBASEDSP-NEXT: bx lr +; CHECK-ARMDSP-LABEL: funcv2i16: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r2 +; CHECK-ARMDSP-NEXT: qadd16 r1, r1, r3 +; CHECK-ARMDSP-NEXT: bx lr + %tmp = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y) + ret <2 x i16> %tmp +} + +define <4 x i8> @funcv4i8(<4 x i8> %x, <4 x i8> %y) nounwind { +; CHECK-T1-LABEL: funcv4i8: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, r5, r7, lr} +; CHECK-T1-NEXT: push {r4, r5, r7, lr} +; CHECK-T1-NEXT: add r4, sp, #16 +; CHECK-T1-NEXT: ldrb r4, [r4] +; CHECK-T1-NEXT: uxtb r0, r0 +; CHECK-T1-NEXT: adds r0, r0, r4 +; CHECK-T1-NEXT: movs r4, #255 +; CHECK-T1-NEXT: cmp r0, #255 +; CHECK-T1-NEXT: blo .LBB1_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: add r5, sp, #20 +; CHECK-T1-NEXT: ldrb r5, [r5] +; CHECK-T1-NEXT: uxtb r1, r1 +; CHECK-T1-NEXT: adds r1, r1, r5 +; CHECK-T1-NEXT: cmp r1, #255 +; CHECK-T1-NEXT: blo .LBB1_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r1, r4 +; CHECK-T1-NEXT: .LBB1_4: +; CHECK-T1-NEXT: add r5, sp, #24 +; CHECK-T1-NEXT: ldrb r5, [r5] +; CHECK-T1-NEXT: uxtb r2, r2 +; CHECK-T1-NEXT: adds r2, r2, r5 +; CHECK-T1-NEXT: cmp r2, #255 +; CHECK-T1-NEXT: blo .LBB1_6 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: mov r2, r4 +; CHECK-T1-NEXT: .LBB1_6: +; CHECK-T1-NEXT: add r5, sp, #28 +; CHECK-T1-NEXT: ldrb r5, [r5] +; CHECK-T1-NEXT: uxtb r3, r3 +; CHECK-T1-NEXT: adds r3, r3, r5 +; CHECK-T1-NEXT: cmp r3, #255 +; CHECK-T1-NEXT: blo .LBB1_8 +; CHECK-T1-NEXT: @ %bb.7: +; CHECK-T1-NEXT: mov r3, r4 +; CHECK-T1-NEXT: .LBB1_8: +; CHECK-T1-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-T2NODSP-LABEL: funcv4i8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r7, lr} +; CHECK-T2NODSP-NEXT: push {r7, lr} +; CHECK-T2NODSP-NEXT: uxtb.w lr, r1 +; CHECK-T2NODSP-NEXT: ldrb.w r1, [sp, #12] +; CHECK-T2NODSP-NEXT: uxtb.w r12, r0 +; CHECK-T2NODSP-NEXT: ldrb.w r0, [sp, #8] +; CHECK-T2NODSP-NEXT: add r1, lr +; CHECK-T2NODSP-NEXT: uxtb.w lr, r2 +; CHECK-T2NODSP-NEXT: ldrb.w r2, [sp, #16] +; CHECK-T2NODSP-NEXT: add r0, r12 +; CHECK-T2NODSP-NEXT: cmp r0, #255 +; CHECK-T2NODSP-NEXT: mov.w r12, #255 +; CHECK-T2NODSP-NEXT: add r2, lr +; CHECK-T2NODSP-NEXT: uxtb.w lr, r3 +; CHECK-T2NODSP-NEXT: ldrb.w r3, [sp, #20] +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r0, r12 +; CHECK-T2NODSP-NEXT: cmp r1, #255 +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r1, r12 +; CHECK-T2NODSP-NEXT: cmp r2, #255 +; CHECK-T2NODSP-NEXT: add r3, lr +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r2, r12 +; CHECK-T2NODSP-NEXT: cmp r3, #255 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r12, r3 +; CHECK-T2NODSP-NEXT: mov r3, r12 +; CHECK-T2NODSP-NEXT: pop {r7, pc} +; +; CHECK-T2DSP-LABEL: funcv4i8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp] +; CHECK-T2DSP-NEXT: uqadd8 r0, r0, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #4] +; CHECK-T2DSP-NEXT: uqadd8 r1, r1, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #8] +; CHECK-T2DSP-NEXT: uqadd8 r2, r2, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #12] +; CHECK-T2DSP-NEXT: uqadd8 r3, r3, r12 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: funcv4i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vmov d16, r2, r3 +; CHECK-ARM-NEXT: vmov d17, r0, r1 +; CHECK-ARM-NEXT: vbic.i16 d16, #0xff00 +; CHECK-ARM-NEXT: vbic.i16 d17, #0xff00 +; CHECK-ARM-NEXT: vmov.i16 d18, #0xff +; CHECK-ARM-NEXT: vadd.i16 d16, d17, d16 +; CHECK-ARM-NEXT: vmin.u16 d16, d16, d18 +; CHECK-ARM-NEXT: vmov r0, r1, d16 +; CHECK-ARM-NEXT: bx lr +; CHECK-ARMNODPS-LABEL: funcv4i8: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: .save {r4, lr} +; CHECK-ARMNODPS-NEXT: push {r4, lr} +; CHECK-ARMNODPS-NEXT: mov r12, r3 +; CHECK-ARMNODPS-NEXT: ldrsb r3, [sp, #8] +; CHECK-ARMNODPS-NEXT: lsl r0, r0, #24 +; CHECK-ARMNODPS-NEXT: ldrsb r4, [sp, #12] +; CHECK-ARMNODPS-NEXT: add r0, r3, r0, asr #24 +; CHECK-ARMNODPS-NEXT: lsl r1, r1, #24 +; CHECK-ARMNODPS-NEXT: mov r3, #127 +; CHECK-ARMNODPS-NEXT: cmp r0, #127 +; CHECK-ARMNODPS-NEXT: movge r0, r3 +; CHECK-ARMNODPS-NEXT: add r1, r4, r1, asr #24 +; CHECK-ARMNODPS-NEXT: mvn lr, #127 +; CHECK-ARMNODPS-NEXT: cmn r0, #128 +; CHECK-ARMNODPS-NEXT: ldrsb r4, [sp, #16] +; CHECK-ARMNODPS-NEXT: lsl r2, r2, #24 +; CHECK-ARMNODPS-NEXT: movle r0, lr +; CHECK-ARMNODPS-NEXT: cmp r1, #127 +; CHECK-ARMNODPS-NEXT: movge r1, r3 +; CHECK-ARMNODPS-NEXT: cmn r1, #128 +; CHECK-ARMNODPS-NEXT: add r2, r4, r2, asr #24 +; CHECK-ARMNODPS-NEXT: ldrsb r4, [sp, #20] +; CHECK-ARMNODPS-NEXT: movle r1, lr +; CHECK-ARMNODPS-NEXT: cmp r2, #127 +; CHECK-ARMNODPS-NEXT: lsl r12, r12, #24 +; CHECK-ARMNODPS-NEXT: movge r2, r3 +; CHECK-ARMNODPS-NEXT: cmn r2, #128 +; CHECK-ARMNODPS-NEXT: add r4, r4, r12, asr #24 +; CHECK-ARMNODPS-NEXT: movle r2, lr +; CHECK-ARMNODPS-NEXT: cmp r4, #127 +; CHECK-ARMNODPS-NEXT: movlt r3, r4 +; CHECK-ARMNODPS-NEXT: cmn r3, #128 +; CHECK-ARMNODPS-NEXT: movle r3, lr +; CHECK-ARMNODPS-NEXT: pop {r4, pc} +; CHECK-ARMBASEDSP-LABEL: funcv4i8: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: ldrb r12, [sp] +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r2, r2, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r3, r3, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r0, r0, r12 +; CHECK-ARMBASEDSP-NEXT: ldrb r12, [sp, #4] +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r1, r1, r12 +; CHECK-ARMBASEDSP-NEXT: ldrb r12, [sp, #8] +; CHECK-ARMBASEDSP-NEXT: asr r1, r1, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r2, r2, r12 +; CHECK-ARMBASEDSP-NEXT: ldrb r12, [sp, #12] +; CHECK-ARMBASEDSP-NEXT: asr r2, r2, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r12, r12, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r3, r3, r12 +; CHECK-ARMBASEDSP-NEXT: asr r3, r3, #24 +; CHECK-ARMBASEDSP-NEXT: bx lr +; CHECK-ARMDSP-LABEL: funcv4i8: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: ldrsb r12, [sp] +; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r12 +; CHECK-ARMDSP-NEXT: ldrsb r12, [sp, #4] +; CHECK-ARMDSP-NEXT: qadd8 r1, r1, r12 +; CHECK-ARMDSP-NEXT: ldrsb r12, [sp, #8] +; CHECK-ARMDSP-NEXT: qadd8 r2, r2, r12 +; CHECK-ARMDSP-NEXT: ldrsb r12, [sp, #12] +; CHECK-ARMDSP-NEXT: qadd8 r3, r3, r12 +; CHECK-ARMDSP-NEXT: bx lr + %tmp = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y) + ret <4 x i8> %tmp +} define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-T1-LABEL: func: ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: blo .LBB0_2 +; CHECK-T1-NEXT: blo .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #0 ; CHECK-T1-NEXT: mvns r0, r0 -; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func: @@ -50,18 +333,18 @@ ; CHECK-T1-NEXT: mvns r2, r5 ; CHECK-T1-NEXT: cmp r3, #0 ; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: beq .LBB1_3 +; CHECK-T1-NEXT: beq .LBB3_3 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: beq .LBB1_4 -; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: beq .LBB3_4 +; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} -; CHECK-T1-NEXT: .LBB1_3: +; CHECK-T1-NEXT: .LBB3_3: ; CHECK-T1-NEXT: mov r0, r4 ; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: bne .LBB1_2 -; CHECK-T1-NEXT: .LBB1_4: +; CHECK-T1-NEXT: bne .LBB3_2 +; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: mov r2, r1 ; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} @@ -94,16 +377,16 @@ ; CHECK-T1-LABEL: func16: ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: ldr r1, .LCPI2_0 +; CHECK-T1-NEXT: ldr r1, .LCPI4_0 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: blo .LBB2_2 +; CHECK-T1-NEXT: blo .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB2_2: +; CHECK-T1-NEXT: .LBB4_2: ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: .LCPI2_0: +; CHECK-T1-NEXT: .LCPI4_0: ; CHECK-T1-NEXT: .long 65535 @ 0xffff ; ; CHECK-T2NODSP-LABEL: func16: @@ -135,10 +418,10 @@ ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: cmp r0, #255 -; CHECK-T1-NEXT: blo .LBB3_2 +; CHECK-T1-NEXT: blo .LBB5_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #255 -; CHECK-T1-NEXT: .LBB3_2: +; CHECK-T1-NEXT: .LBB5_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func8: @@ -169,10 +452,10 @@ ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: cmp r0, #15 -; CHECK-T1-NEXT: blo .LBB4_2 +; CHECK-T1-NEXT: blo .LBB6_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #15 -; CHECK-T1-NEXT: .LBB4_2: +; CHECK-T1-NEXT: .LBB6_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func3: diff --git a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll --- a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll @@ -9,17 +9,237 @@ declare i16 @llvm.uadd.sat.i16(i16, i16) declare i32 @llvm.uadd.sat.i32(i32, i32) declare i64 @llvm.uadd.sat.i64(i64, i64) +declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) + +define <2 x i16> @funcv2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z) nounwind { +; CHECK-T1-LABEL: funcv2i16: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, lr} +; CHECK-T1-NEXT: push {r4, lr} +; CHECK-T1-NEXT: ldr r4, [sp, #8] +; CHECK-T1-NEXT: muls r4, r2, r4 +; CHECK-T1-NEXT: uxth r2, r4 +; CHECK-T1-NEXT: uxth r0, r0 +; CHECK-T1-NEXT: adds r0, r0, r2 +; CHECK-T1-NEXT: ldr r2, .LCPI0_0 +; CHECK-T1-NEXT: cmp r0, r2 +; CHECK-T1-NEXT: blo .LBB0_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r2 +; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: ldr r4, [sp, #12] +; CHECK-T1-NEXT: muls r4, r3, r4 +; CHECK-T1-NEXT: uxth r3, r4 +; CHECK-T1-NEXT: uxth r1, r1 +; CHECK-T1-NEXT: adds r1, r1, r3 +; CHECK-T1-NEXT: cmp r1, r2 +; CHECK-T1-NEXT: blo .LBB0_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r1, r2 +; CHECK-T1-NEXT: .LBB0_4: +; CHECK-T1-NEXT: pop {r4, pc} +; CHECK-T1-NEXT: .p2align 2 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: .LCPI0_0: +; CHECK-T1-NEXT: .long 65535 @ 0xffff +; +; CHECK-T2NODSP-LABEL: funcv2i16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: ldrh.w r12, [sp] +; CHECK-T2NODSP-NEXT: uxth r0, r0 +; CHECK-T2NODSP-NEXT: uxth r1, r1 +; CHECK-T2NODSP-NEXT: mul r2, r2, r12 +; CHECK-T2NODSP-NEXT: movw r12, #65535 +; CHECK-T2NODSP-NEXT: uxth r2, r2 +; CHECK-T2NODSP-NEXT: add r0, r2 +; CHECK-T2NODSP-NEXT: ldrh.w r2, [sp, #4] +; CHECK-T2NODSP-NEXT: cmp r0, r12 +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r0, r12 +; CHECK-T2NODSP-NEXT: muls r2, r3, r2 +; CHECK-T2NODSP-NEXT: uxth r2, r2 +; CHECK-T2NODSP-NEXT: add r1, r2 +; CHECK-T2NODSP-NEXT: cmp r1, r12 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r12, r1 +; CHECK-T2NODSP-NEXT: mov r1, r12 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: funcv2i16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: ldrh.w r12, [sp] +; CHECK-T2DSP-NEXT: mul r2, r2, r12 +; CHECK-T2DSP-NEXT: uqadd16 r0, r0, r2 +; CHECK-T2DSP-NEXT: ldrh.w r2, [sp, #4] +; CHECK-T2DSP-NEXT: muls r2, r3, r2 +; CHECK-T2DSP-NEXT: uqadd16 r1, r1, r2 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: funcv2i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vldr d16, [sp] +; CHECK-ARM-NEXT: vmov d17, r2, r3 +; CHECK-ARM-NEXT: vmov d18, r0, r1 +; CHECK-ARM-NEXT: vmul.i32 d16, d17, d16 +; CHECK-ARM-NEXT: vmov.i32 d17, #0xffff +; CHECK-ARM-NEXT: vand d18, d18, d17 +; CHECK-ARM-NEXT: vand d16, d16, d17 +; CHECK-ARM-NEXT: vadd.i32 d16, d18, d16 +; CHECK-ARM-NEXT: vmin.u32 d16, d16, d17 +; CHECK-ARM-NEXT: vmov r0, r1, d16 +; CHECK-ARM-NEXT: bx lr + %a = mul <2 x i16> %y, %z + %tmp = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %x, <2 x i16> %a) + ret <2 x i16> %tmp +} + +define <4 x i8> @funcv4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %z) nounwind { +; CHECK-T1-LABEL: funcv4i8: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, r5, r6, lr} +; CHECK-T1-NEXT: push {r4, r5, r6, lr} +; CHECK-T1-NEXT: ldr r4, [sp, #32] +; CHECK-T1-NEXT: ldr r5, [sp, #16] +; CHECK-T1-NEXT: muls r5, r4, r5 +; CHECK-T1-NEXT: uxtb r4, r5 +; CHECK-T1-NEXT: uxtb r0, r0 +; CHECK-T1-NEXT: adds r0, r0, r4 +; CHECK-T1-NEXT: movs r4, #255 +; CHECK-T1-NEXT: cmp r0, #255 +; CHECK-T1-NEXT: blo .LBB1_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: ldr r5, [sp, #36] +; CHECK-T1-NEXT: ldr r6, [sp, #20] +; CHECK-T1-NEXT: muls r6, r5, r6 +; CHECK-T1-NEXT: uxtb r5, r6 +; CHECK-T1-NEXT: uxtb r1, r1 +; CHECK-T1-NEXT: adds r1, r1, r5 +; CHECK-T1-NEXT: cmp r1, #255 +; CHECK-T1-NEXT: blo .LBB1_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r1, r4 +; CHECK-T1-NEXT: .LBB1_4: +; CHECK-T1-NEXT: ldr r5, [sp, #40] +; CHECK-T1-NEXT: ldr r6, [sp, #24] +; CHECK-T1-NEXT: muls r6, r5, r6 +; CHECK-T1-NEXT: uxtb r5, r6 +; CHECK-T1-NEXT: uxtb r2, r2 +; CHECK-T1-NEXT: adds r2, r2, r5 +; CHECK-T1-NEXT: cmp r2, #255 +; CHECK-T1-NEXT: blo .LBB1_6 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: mov r2, r4 +; CHECK-T1-NEXT: .LBB1_6: +; CHECK-T1-NEXT: ldr r5, [sp, #44] +; CHECK-T1-NEXT: ldr r6, [sp, #28] +; CHECK-T1-NEXT: muls r6, r5, r6 +; CHECK-T1-NEXT: uxtb r5, r6 +; CHECK-T1-NEXT: uxtb r3, r3 +; CHECK-T1-NEXT: adds r3, r3, r5 +; CHECK-T1-NEXT: cmp r3, #255 +; CHECK-T1-NEXT: blo .LBB1_8 +; CHECK-T1-NEXT: @ %bb.7: +; CHECK-T1-NEXT: mov r3, r4 +; CHECK-T1-NEXT: .LBB1_8: +; CHECK-T1-NEXT: pop {r4, r5, r6, pc} +; +; CHECK-T2NODSP-LABEL: funcv4i8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r4, lr} +; CHECK-T2NODSP-NEXT: push {r4, lr} +; CHECK-T2NODSP-NEXT: ldrb.w r12, [sp, #24] +; CHECK-T2NODSP-NEXT: uxtb r1, r1 +; CHECK-T2NODSP-NEXT: ldrb.w lr, [sp, #8] +; CHECK-T2NODSP-NEXT: uxtb r2, r2 +; CHECK-T2NODSP-NEXT: ldrb.w r4, [sp, #12] +; CHECK-T2NODSP-NEXT: uxtb r0, r0 +; CHECK-T2NODSP-NEXT: uxtb r3, r3 +; CHECK-T2NODSP-NEXT: mul r12, lr, r12 +; CHECK-T2NODSP-NEXT: ldrb.w lr, [sp, #28] +; CHECK-T2NODSP-NEXT: mul r4, r4, lr +; CHECK-T2NODSP-NEXT: ldrb.w lr, [sp, #32] +; CHECK-T2NODSP-NEXT: uxtb.w r12, r12 +; CHECK-T2NODSP-NEXT: add r0, r12 +; CHECK-T2NODSP-NEXT: cmp r0, #255 +; CHECK-T2NODSP-NEXT: mov.w r12, #255 +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r0, r12 +; CHECK-T2NODSP-NEXT: uxtb r4, r4 +; CHECK-T2NODSP-NEXT: add r1, r4 +; CHECK-T2NODSP-NEXT: ldrb.w r4, [sp, #16] +; CHECK-T2NODSP-NEXT: cmp r1, #255 +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r1, r12 +; CHECK-T2NODSP-NEXT: mul r4, r4, lr +; CHECK-T2NODSP-NEXT: ldrb.w lr, [sp, #36] +; CHECK-T2NODSP-NEXT: uxtb r4, r4 +; CHECK-T2NODSP-NEXT: add r2, r4 +; CHECK-T2NODSP-NEXT: ldrb.w r4, [sp, #20] +; CHECK-T2NODSP-NEXT: cmp r2, #255 +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r2, r12 +; CHECK-T2NODSP-NEXT: mul r4, r4, lr +; CHECK-T2NODSP-NEXT: uxtb r4, r4 +; CHECK-T2NODSP-NEXT: add r3, r4 +; CHECK-T2NODSP-NEXT: cmp r3, #255 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r12, r3 +; CHECK-T2NODSP-NEXT: mov r3, r12 +; CHECK-T2NODSP-NEXT: pop {r4, pc} +; +; CHECK-T2DSP-LABEL: funcv4i8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: .save {r7, lr} +; CHECK-T2DSP-NEXT: push {r7, lr} +; CHECK-T2DSP-NEXT: ldrb.w r12, [sp, #24] +; CHECK-T2DSP-NEXT: ldrb.w lr, [sp, #8] +; CHECK-T2DSP-NEXT: mul r12, lr, r12 +; CHECK-T2DSP-NEXT: ldrb.w lr, [sp, #12] +; CHECK-T2DSP-NEXT: uqadd8 r0, r0, r12 +; CHECK-T2DSP-NEXT: ldrb.w r12, [sp, #28] +; CHECK-T2DSP-NEXT: mul r12, lr, r12 +; CHECK-T2DSP-NEXT: ldrb.w lr, [sp, #16] +; CHECK-T2DSP-NEXT: uqadd8 r1, r1, r12 +; CHECK-T2DSP-NEXT: ldrb.w r12, [sp, #32] +; CHECK-T2DSP-NEXT: mul r12, lr, r12 +; CHECK-T2DSP-NEXT: ldrb.w lr, [sp, #20] +; CHECK-T2DSP-NEXT: uqadd8 r2, r2, r12 +; CHECK-T2DSP-NEXT: ldrb.w r12, [sp, #36] +; CHECK-T2DSP-NEXT: mul r12, lr, r12 +; CHECK-T2DSP-NEXT: uqadd8 r3, r3, r12 +; CHECK-T2DSP-NEXT: pop {r7, pc} +; +; CHECK-ARM-LABEL: funcv4i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vldr d16, [sp] +; CHECK-ARM-NEXT: vmov d17, r2, r3 +; CHECK-ARM-NEXT: vmul.i16 d16, d17, d16 +; CHECK-ARM-NEXT: vmov d18, r0, r1 +; CHECK-ARM-NEXT: vbic.i16 d18, #0xff00 +; CHECK-ARM-NEXT: vbic.i16 d16, #0xff00 +; CHECK-ARM-NEXT: vmov.i16 d17, #0xff +; CHECK-ARM-NEXT: vadd.i16 d16, d18, d16 +; CHECK-ARM-NEXT: vmin.u16 d16, d16, d17 +; CHECK-ARM-NEXT: vmov r0, r1, d16 +; CHECK-ARM-NEXT: bx lr + %a = mul <4 x i8> %y, %z + %tmp = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %a) + ret <4 x i8> %tmp +} define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-T1-LABEL: func32: ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: muls r1, r2, r1 ; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: blo .LBB0_2 +; CHECK-T1-NEXT: blo .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #0 ; CHECK-T1-NEXT: mvns r0, r0 -; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func32: @@ -56,17 +276,17 @@ ; CHECK-T1-NEXT: mvns r1, r5 ; CHECK-T1-NEXT: cmp r4, #0 ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: beq .LBB1_3 +; CHECK-T1-NEXT: beq .LBB3_3 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: beq .LBB1_4 -; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: beq .LBB3_4 +; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} -; CHECK-T1-NEXT: .LBB1_3: +; CHECK-T1-NEXT: .LBB3_3: ; CHECK-T1-NEXT: mov r0, r3 ; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: bne .LBB1_2 -; CHECK-T1-NEXT: .LBB1_4: +; CHECK-T1-NEXT: bne .LBB3_2 +; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} ; @@ -104,16 +324,16 @@ ; CHECK-T1-NEXT: muls r1, r2, r1 ; CHECK-T1-NEXT: uxth r1, r1 ; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: ldr r1, .LCPI2_0 +; CHECK-T1-NEXT: ldr r1, .LCPI4_0 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: blo .LBB2_2 +; CHECK-T1-NEXT: blo .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: .LBB2_2: +; CHECK-T1-NEXT: .LBB4_2: ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: .LCPI2_0: +; CHECK-T1-NEXT: .LCPI4_0: ; CHECK-T1-NEXT: .long 65535 @ 0xffff ; ; CHECK-T2NODSP-LABEL: func16: @@ -152,10 +372,10 @@ ; CHECK-T1-NEXT: uxtb r1, r1 ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: cmp r0, #255 -; CHECK-T1-NEXT: blo .LBB3_2 +; CHECK-T1-NEXT: blo .LBB5_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #255 -; CHECK-T1-NEXT: .LBB3_2: +; CHECK-T1-NEXT: .LBB5_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func8: @@ -194,10 +414,10 @@ ; CHECK-T1-NEXT: ands r1, r2 ; CHECK-T1-NEXT: adds r0, r0, r1 ; CHECK-T1-NEXT: cmp r0, #15 -; CHECK-T1-NEXT: blo .LBB4_2 +; CHECK-T1-NEXT: blo .LBB6_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: .LBB4_2: +; CHECK-T1-NEXT: .LBB6_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func4: diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll --- a/llvm/test/CodeGen/ARM/usub_sat.ll +++ b/llvm/test/CodeGen/ARM/usub_sat.ll @@ -9,15 +9,168 @@ declare i16 @llvm.usub.sat.i16(i16, i16) declare i32 @llvm.usub.sat.i32(i32, i32) declare i64 @llvm.usub.sat.i64(i64, i64) +declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) + +define <2 x i16> @funcv2i16(<2 x i16> %x, <2 x i16> %y) nounwind { +; CHECK-T1-LABEL: funcv2i16: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, r5, r7, lr} +; CHECK-T1-NEXT: push {r4, r5, r7, lr} +; CHECK-T1-NEXT: uxth r4, r2 +; CHECK-T1-NEXT: uxth r5, r0 +; CHECK-T1-NEXT: subs r0, r5, r4 +; CHECK-T1-NEXT: movs r2, #0 +; CHECK-T1-NEXT: cmp r5, r4 +; CHECK-T1-NEXT: bhs .LBB0_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r2 +; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: uxth r3, r3 +; CHECK-T1-NEXT: uxth r1, r1 +; CHECK-T1-NEXT: subs r1, r1, r3 +; CHECK-T1-NEXT: bhs .LBB0_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r1, r2 +; CHECK-T1-NEXT: .LBB0_4: +; CHECK-T1-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-T2NODSP-LABEL: funcv2i16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: uxth r0, r0 +; CHECK-T2NODSP-NEXT: uxth r2, r2 +; CHECK-T2NODSP-NEXT: subs r0, r0, r2 +; CHECK-T2NODSP-NEXT: uxth r1, r1 +; CHECK-T2NODSP-NEXT: mov.w r2, #0 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, r2 +; CHECK-T2NODSP-NEXT: uxth r3, r3 +; CHECK-T2NODSP-NEXT: subs r1, r1, r3 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r1, r2 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: funcv2i16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqsub16 r0, r0, r2 +; CHECK-T2DSP-NEXT: uqsub16 r1, r1, r3 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: funcv2i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vmov.i32 d16, #0xffff +; CHECK-ARM-NEXT: vmov d17, r2, r3 +; CHECK-ARM-NEXT: vmov d18, r0, r1 +; CHECK-ARM-NEXT: vand d17, d17, d16 +; CHECK-ARM-NEXT: vand d16, d18, d16 +; CHECK-ARM-NEXT: vqsub.u32 d16, d16, d17 +; CHECK-ARM-NEXT: vmov r0, r1, d16 +; CHECK-ARM-NEXT: bx lr + %tmp = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %x, <2 x i16> %y) + ret <2 x i16> %tmp +} + +define <4 x i8> @funcv4i8(<4 x i8> %x, <4 x i8> %y) nounwind { +; CHECK-T1-LABEL: funcv4i8: +; CHECK-T1: @ %bb.0: +; CHECK-T1-NEXT: .save {r4, r5, r6, lr} +; CHECK-T1-NEXT: push {r4, r5, r6, lr} +; CHECK-T1-NEXT: add r4, sp, #16 +; CHECK-T1-NEXT: ldrb r5, [r4] +; CHECK-T1-NEXT: uxtb r6, r0 +; CHECK-T1-NEXT: subs r0, r6, r5 +; CHECK-T1-NEXT: movs r4, #0 +; CHECK-T1-NEXT: cmp r6, r5 +; CHECK-T1-NEXT: bhs .LBB1_2 +; CHECK-T1-NEXT: @ %bb.1: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: add r5, sp, #20 +; CHECK-T1-NEXT: ldrb r5, [r5] +; CHECK-T1-NEXT: uxtb r1, r1 +; CHECK-T1-NEXT: subs r1, r1, r5 +; CHECK-T1-NEXT: bhs .LBB1_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r1, r4 +; CHECK-T1-NEXT: .LBB1_4: +; CHECK-T1-NEXT: add r5, sp, #24 +; CHECK-T1-NEXT: ldrb r5, [r5] +; CHECK-T1-NEXT: uxtb r2, r2 +; CHECK-T1-NEXT: subs r2, r2, r5 +; CHECK-T1-NEXT: bhs .LBB1_6 +; CHECK-T1-NEXT: @ %bb.5: +; CHECK-T1-NEXT: mov r2, r4 +; CHECK-T1-NEXT: .LBB1_6: +; CHECK-T1-NEXT: add r5, sp, #28 +; CHECK-T1-NEXT: ldrb r5, [r5] +; CHECK-T1-NEXT: uxtb r3, r3 +; CHECK-T1-NEXT: subs r3, r3, r5 +; CHECK-T1-NEXT: bhs .LBB1_8 +; CHECK-T1-NEXT: @ %bb.7: +; CHECK-T1-NEXT: mov r3, r4 +; CHECK-T1-NEXT: .LBB1_8: +; CHECK-T1-NEXT: pop {r4, r5, r6, pc} +; +; CHECK-T2NODSP-LABEL: funcv4i8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r7, lr} +; CHECK-T2NODSP-NEXT: push {r7, lr} +; CHECK-T2NODSP-NEXT: uxtb.w r12, r0 +; CHECK-T2NODSP-NEXT: ldrb.w r0, [sp, #8] +; CHECK-T2NODSP-NEXT: uxtb.w lr, r1 +; CHECK-T2NODSP-NEXT: ldrb.w r1, [sp, #12] +; CHECK-T2NODSP-NEXT: subs.w r0, r12, r0 +; CHECK-T2NODSP-NEXT: mov.w r12, #0 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, r12 +; CHECK-T2NODSP-NEXT: subs.w r1, lr, r1 +; CHECK-T2NODSP-NEXT: uxtb.w lr, r2 +; CHECK-T2NODSP-NEXT: ldrb.w r2, [sp, #16] +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r1, r12 +; CHECK-T2NODSP-NEXT: subs.w r2, lr, r2 +; CHECK-T2NODSP-NEXT: uxtb.w lr, r3 +; CHECK-T2NODSP-NEXT: ldrb.w r3, [sp, #20] +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r2, r12 +; CHECK-T2NODSP-NEXT: subs.w r3, lr, r3 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r3, r12 +; CHECK-T2NODSP-NEXT: pop {r7, pc} +; +; CHECK-T2DSP-LABEL: funcv4i8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp] +; CHECK-T2DSP-NEXT: uqsub8 r0, r0, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #4] +; CHECK-T2DSP-NEXT: uqsub8 r1, r1, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #8] +; CHECK-T2DSP-NEXT: uqsub8 r2, r2, r12 +; CHECK-T2DSP-NEXT: ldrsb.w r12, [sp, #12] +; CHECK-T2DSP-NEXT: uqsub8 r3, r3, r12 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: funcv4i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vmov d16, r2, r3 +; CHECK-ARM-NEXT: vmov d17, r0, r1 +; CHECK-ARM-NEXT: vbic.i16 d16, #0xff00 +; CHECK-ARM-NEXT: vbic.i16 d17, #0xff00 +; CHECK-ARM-NEXT: vqsub.u16 d16, d17, d16 +; CHECK-ARM-NEXT: vmov r0, r1, d16 +; CHECK-ARM-NEXT: bx lr + %tmp = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %x, <4 x i8> %y) + ret <4 x i8> %tmp +} define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-T1-LABEL: func: ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: subs r0, r0, r1 -; CHECK-T1-NEXT: bhs .LBB0_2 +; CHECK-T1-NEXT: bhs .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #0 -; CHECK-T1-NEXT: .LBB0_2: +; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func: @@ -50,17 +203,17 @@ ; CHECK-T1-NEXT: movs r3, #1 ; CHECK-T1-NEXT: subs r3, r3, r0 ; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: beq .LBB1_3 +; CHECK-T1-NEXT: beq .LBB3_3 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: beq .LBB1_4 -; CHECK-T1-NEXT: .LBB1_2: +; CHECK-T1-NEXT: beq .LBB3_4 +; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: pop {r4, pc} -; CHECK-T1-NEXT: .LBB1_3: +; CHECK-T1-NEXT: .LBB3_3: ; CHECK-T1-NEXT: mov r0, r2 ; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: bne .LBB1_2 -; CHECK-T1-NEXT: .LBB1_4: +; CHECK-T1-NEXT: bne .LBB3_2 +; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: mov r1, r4 ; CHECK-T1-NEXT: pop {r4, pc} ; @@ -94,10 +247,10 @@ ; CHECK-T1-LABEL: func16: ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: subs r0, r0, r1 -; CHECK-T1-NEXT: bhs .LBB2_2 +; CHECK-T1-NEXT: bhs .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #0 -; CHECK-T1-NEXT: .LBB2_2: +; CHECK-T1-NEXT: .LBB4_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func16: @@ -126,10 +279,10 @@ ; CHECK-T1-LABEL: func8: ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: subs r0, r0, r1 -; CHECK-T1-NEXT: bhs .LBB3_2 +; CHECK-T1-NEXT: bhs .LBB5_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #0 -; CHECK-T1-NEXT: .LBB3_2: +; CHECK-T1-NEXT: .LBB5_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func8: @@ -158,10 +311,10 @@ ; CHECK-T1-LABEL: func3: ; CHECK-T1: @ %bb.0: ; CHECK-T1-NEXT: subs r0, r0, r1 -; CHECK-T1-NEXT: bhs .LBB4_2 +; CHECK-T1-NEXT: bhs .LBB6_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: movs r0, #0 -; CHECK-T1-NEXT: .LBB4_2: +; CHECK-T1-NEXT: .LBB6_2: ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func3: diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll --- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll @@ -9,6 +9,20 @@ declare i16 @llvm.usub.sat.i16(i16, i16) declare i32 @llvm.usub.sat.i32(i32, i32) declare i64 @llvm.usub.sat.i64(i64, i64) +declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) + +define <2 x i16> @funcv2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z) nounwind { + %a = mul <2 x i16> %y, %z + %tmp = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %x, <2 x i16> %a) + ret <2 x i16> %tmp +} + +define <4 x i8> @funcv4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %z) nounwind { + %a = mul <4 x i8> %y, %z + %tmp = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %x, <4 x i8> %a) + ret <4 x i8> %tmp +} define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-T1-LABEL: func32: