diff --git a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll @@ -0,0 +1,388 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M +; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A +; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T +; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +; Test sink of `and` instructions to fold in to `tst`, `lsls`, do cmov-bfi combine, etc. +define void @f(i32 %v, ptr noalias %outp) { +; V7M-LABEL: f: +; V7M: @ %bb.0: @ %entry +; V7M-NEXT: movs r2, #0 +; V7M-NEXT: and r12, r0, #14 +; V7M-NEXT: str r2, [r1] +; V7M-NEXT: and r3, r0, #4 +; V7M-NEXT: and r2, r0, #2 +; V7M-NEXT: lsls r0, r0, #31 +; V7M-NEXT: bne .LBB0_2 +; V7M-NEXT: @ %bb.1: @ %if.then +; V7M-NEXT: cmp r2, #0 +; V7M-NEXT: it ne +; V7M-NEXT: movne.w r2, #33024 +; V7M-NEXT: cmp r3, #0 +; V7M-NEXT: it ne +; V7M-NEXT: addne.w r2, r2, #16512 +; V7M-NEXT: b .LBB0_3 +; V7M-NEXT: .LBB0_2: @ %if.else +; V7M-NEXT: cmp r2, #0 +; V7M-NEXT: it ne +; V7M-NEXT: movne.w r2, #8256 +; V7M-NEXT: cmp r3, #0 +; V7M-NEXT: it ne +; V7M-NEXT: addne.w r2, r2, #4128 +; V7M-NEXT: .LBB0_3: @ %if.else +; V7M-NEXT: cmp.w r12, #0 +; V7M-NEXT: it ne +; V7M-NEXT: strne r2, [r1] +; V7M-NEXT: bx lr +; +; V7A-LABEL: f: +; V7A: @ %bb.0: @ %entry +; V7A-NEXT: mov r2, #0 +; V7A-NEXT: and r12, r0, #14 +; V7A-NEXT: str r2, [r1] +; V7A-NEXT: and r3, r0, #4 +; V7A-NEXT: and r2, r0, #2 +; V7A-NEXT: tst r0, #1 +; V7A-NEXT: bne .LBB0_2 +; V7A-NEXT: @ %bb.1: @ %if.then +; V7A-NEXT: cmp r2, #0 +; V7A-NEXT: movw r0, #16512 +; V7A-NEXT: movwne r2, #33024 +; V7A-NEXT: b .LBB0_3 +; V7A-NEXT: .LBB0_2: @ %if.else +; V7A-NEXT: cmp r2, #0 +; V7A-NEXT: movw r0, #4128 +; V7A-NEXT: movwne r2, #8256 +; V7A-NEXT: .LBB0_3: @ %if.else +; V7A-NEXT: cmp r3, #0 +; V7A-NEXT: orrne r2, r2, r0 +; V7A-NEXT: cmp r12, #0 +; V7A-NEXT: strne r2, [r1] +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: f: +; V7A-T: @ %bb.0: @ %entry +; V7A-T-NEXT: movs r2, #0 +; V7A-T-NEXT: and r12, r0, #14 +; V7A-T-NEXT: str r2, [r1] +; V7A-T-NEXT: and r3, r0, #4 +; V7A-T-NEXT: and r2, r0, #2 +; V7A-T-NEXT: lsls r0, r0, #31 +; V7A-T-NEXT: bne .LBB0_2 +; V7A-T-NEXT: @ %bb.1: @ %if.then +; V7A-T-NEXT: cmp r2, #0 +; V7A-T-NEXT: it ne +; V7A-T-NEXT: movne.w r2, #33024 +; V7A-T-NEXT: cmp r3, #0 +; V7A-T-NEXT: it ne +; V7A-T-NEXT: addne.w r2, r2, #16512 +; V7A-T-NEXT: b .LBB0_3 +; V7A-T-NEXT: .LBB0_2: @ %if.else +; V7A-T-NEXT: cmp r2, #0 +; V7A-T-NEXT: it ne +; V7A-T-NEXT: movne.w r2, #8256 +; V7A-T-NEXT: cmp r3, #0 +; V7A-T-NEXT: it ne +; V7A-T-NEXT: addne.w r2, r2, #4128 +; V7A-T-NEXT: .LBB0_3: @ %if.else +; V7A-T-NEXT: cmp.w r12, #0 +; V7A-T-NEXT: it ne +; V7A-T-NEXT: strne r2, [r1] +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: f: +; V6M: @ %bb.0: @ %entry +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: str r2, [r1] +; V6M-NEXT: movs r3, #14 +; V6M-NEXT: ands r3, r0 +; V6M-NEXT: movs r4, #4 +; V6M-NEXT: ands r4, r0 +; V6M-NEXT: movs r2, #2 +; V6M-NEXT: ands r2, r0 +; V6M-NEXT: lsls r0, r0, #31 +; V6M-NEXT: bne .LBB0_5 +; V6M-NEXT: @ %bb.1: @ %if.then +; V6M-NEXT: movs r0, #129 +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: beq .LBB0_3 +; V6M-NEXT: @ %bb.2: +; V6M-NEXT: lsls r2, r0, #8 +; V6M-NEXT: .LBB0_3: @ %if.then +; V6M-NEXT: cmp r4, #0 +; V6M-NEXT: beq .LBB0_10 +; V6M-NEXT: @ %bb.4: @ %if.then +; V6M-NEXT: lsls r0, r0, #7 +; V6M-NEXT: b .LBB0_9 +; V6M-NEXT: .LBB0_5: @ %if.else +; V6M-NEXT: movs r0, #129 +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: beq .LBB0_7 +; V6M-NEXT: @ %bb.6: +; V6M-NEXT: lsls r2, r0, #6 +; V6M-NEXT: .LBB0_7: @ %if.else +; V6M-NEXT: cmp r4, #0 +; V6M-NEXT: beq .LBB0_10 +; V6M-NEXT: @ %bb.8: @ %if.else +; V6M-NEXT: lsls r0, r0, #5 +; V6M-NEXT: .LBB0_9: @ %if.else +; V6M-NEXT: adds r2, r2, r0 +; V6M-NEXT: .LBB0_10: @ %if.else +; V6M-NEXT: cmp r3, #0 +; V6M-NEXT: beq .LBB0_12 +; V6M-NEXT: @ %bb.11: @ %if.end +; V6M-NEXT: str r2, [r1] +; V6M-NEXT: .LBB0_12: @ %exit +; V6M-NEXT: pop {r4, pc} +entry: + store i32 0, ptr %outp, align 4 + %and = and i32 %v, 1 + %cmp = icmp eq i32 %and, 0 + %and1 = and i32 %v, 2 + %tobool.not = icmp eq i32 %and1, 0 + %and2 = and i32 %v, 4 + %tobool1.not = icmp eq i32 %and2, 0 + %and3 = and i32 %v, 14 + %tobool2.not = icmp eq i32 %and3, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %select = select i1 %tobool.not, i32 0, i32 33024 + %or = or i32 %select, 16512 + %spec.select = select i1 %tobool1.not, i32 %select, i32 %or + br i1 %tobool2.not, label %exit, label %if.end + +if.else: + %select1 = select i1 %tobool.not, i32 0, i32 8256 + %or1 = or i32 %select1, 4128 + %spec.select1 = select i1 %tobool1.not, i32 %select1, i32 %or1 + br i1 %tobool2.not, label %exit, label %if.end + +if.end: + %spec.select.sink = phi i32 [ %spec.select, %if.then ], [ %spec.select1, %if.else ] + store i32 %spec.select.sink, ptr %outp, align 4 + br label %exit + +exit: + ret void +} + +; Test with a mask that can be encoded with T32 instruction set, but not with A32. +define i32 @f0(i1 %c0, i32 %v) { +; V7M-LABEL: f0: +; V7M: @ %bb.0: @ %E +; V7M-NEXT: bic r1, r1, #-16843010 +; V7M-NEXT: lsls r0, r0, #31 +; V7M-NEXT: beq .LBB1_2 +; V7M-NEXT: @ %bb.1: @ %A +; V7M-NEXT: cmp r1, #0 +; V7M-NEXT: itt eq +; V7M-NEXT: moveq r0, #0 +; V7M-NEXT: bxeq lr +; V7M-NEXT: b .LBB1_3 +; V7M-NEXT: .LBB1_2: @ %B +; V7M-NEXT: cmp r1, #0 +; V7M-NEXT: itt ne +; V7M-NEXT: movne r0, #0 +; V7M-NEXT: bxne lr +; V7M-NEXT: .LBB1_3: @ %D +; V7M-NEXT: movs r0, #1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: f0: +; V7A: @ %bb.0: @ %E +; V7A-NEXT: movw r2, #257 +; V7A-NEXT: tst r0, #1 +; V7A-NEXT: movt r2, #257 +; V7A-NEXT: and r1, r1, r2 +; V7A-NEXT: beq .LBB1_3 +; V7A-NEXT: @ %bb.1: @ %A +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: moveq r0, #0 +; V7A-NEXT: bxeq lr +; V7A-NEXT: .LBB1_2: @ %D +; V7A-NEXT: mov r0, #1 +; V7A-NEXT: bx lr +; V7A-NEXT: .LBB1_3: @ %B +; V7A-NEXT: mov r0, #0 +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: moveq r0, #1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: f0: +; V7A-T: @ %bb.0: @ %E +; V7A-T-NEXT: bic r1, r1, #-16843010 +; V7A-T-NEXT: lsls r0, r0, #31 +; V7A-T-NEXT: beq .LBB1_2 +; V7A-T-NEXT: @ %bb.1: @ %A +; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: itt eq +; V7A-T-NEXT: moveq r0, #0 +; V7A-T-NEXT: bxeq lr +; V7A-T-NEXT: b .LBB1_3 +; V7A-T-NEXT: .LBB1_2: @ %B +; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: itt ne +; V7A-T-NEXT: movne r0, #0 +; V7A-T-NEXT: bxne lr +; V7A-T-NEXT: .LBB1_3: @ %D +; V7A-T-NEXT: movs r0, #1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: f0: +; V6M: @ %bb.0: @ %E +; V6M-NEXT: ldr r2, .LCPI1_0 +; V6M-NEXT: ands r2, r1 +; V6M-NEXT: lsls r0, r0, #31 +; V6M-NEXT: beq .LBB1_3 +; V6M-NEXT: @ %bb.1: @ %A +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: bne .LBB1_5 +; V6M-NEXT: @ %bb.2: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .LBB1_3: @ %B +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: beq .LBB1_5 +; V6M-NEXT: @ %bb.4: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .LBB1_5: @ %D +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.6: +; V6M-NEXT: .LCPI1_0: +; V6M-NEXT: .long 16843009 @ 0x1010101 +E: + %a = and i32 %v, 16843009 + br i1 %c0, label %A, label %B + +A: + %c1 = icmp eq i32 %a, 0 + br i1 %c1, label %C, label %D + +B: + %c2 = icmp eq i32 %a, 0 + br i1 %c2, label %D, label %C + +C: + br label %X + +D: + br label %X + +X: + %x = phi i32 [0, %C], [1, %D] + ret i32 %x +} + +; Test with a mask that can be encoded both with T32 and A32 instruction sets. +define i32 @f1(i1 %c0, i32 %v) { +; V7M-LABEL: f1: +; V7M: @ %bb.0: @ %E +; V7M-NEXT: and r1, r1, #100663296 +; V7M-NEXT: lsls r0, r0, #31 +; V7M-NEXT: beq .LBB2_2 +; V7M-NEXT: @ %bb.1: @ %A +; V7M-NEXT: cmp r1, #0 +; V7M-NEXT: itt eq +; V7M-NEXT: moveq r0, #0 +; V7M-NEXT: bxeq lr +; V7M-NEXT: b .LBB2_3 +; V7M-NEXT: .LBB2_2: @ %B +; V7M-NEXT: cmp r1, #0 +; V7M-NEXT: itt ne +; V7M-NEXT: movne r0, #0 +; V7M-NEXT: bxne lr +; V7M-NEXT: .LBB2_3: @ %D +; V7M-NEXT: movs r0, #1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: f1: +; V7A: @ %bb.0: @ %E +; V7A-NEXT: and r1, r1, #100663296 +; V7A-NEXT: tst r0, #1 +; V7A-NEXT: beq .LBB2_3 +; V7A-NEXT: @ %bb.1: @ %A +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: moveq r0, #0 +; V7A-NEXT: bxeq lr +; V7A-NEXT: .LBB2_2: @ %D +; V7A-NEXT: mov r0, #1 +; V7A-NEXT: bx lr +; V7A-NEXT: .LBB2_3: @ %B +; V7A-NEXT: mov r0, #0 +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: moveq r0, #1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: f1: +; V7A-T: @ %bb.0: @ %E +; V7A-T-NEXT: and r1, r1, #100663296 +; V7A-T-NEXT: lsls r0, r0, #31 +; V7A-T-NEXT: beq .LBB2_2 +; V7A-T-NEXT: @ %bb.1: @ %A +; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: itt eq +; V7A-T-NEXT: moveq r0, #0 +; V7A-T-NEXT: bxeq lr +; V7A-T-NEXT: b .LBB2_3 +; V7A-T-NEXT: .LBB2_2: @ %B +; V7A-T-NEXT: cmp r1, #0 +; V7A-T-NEXT: itt ne +; V7A-T-NEXT: movne r0, #0 +; V7A-T-NEXT: bxne lr +; V7A-T-NEXT: .LBB2_3: @ %D +; V7A-T-NEXT: movs r0, #1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: f1: +; V6M: @ %bb.0: @ %E +; V6M-NEXT: movs r2, #3 +; V6M-NEXT: lsls r2, r2, #25 +; V6M-NEXT: ands r2, r1 +; V6M-NEXT: lsls r0, r0, #31 +; V6M-NEXT: beq .LBB2_3 +; V6M-NEXT: @ %bb.1: @ %A +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: bne .LBB2_5 +; V6M-NEXT: @ %bb.2: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .LBB2_3: @ %B +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: beq .LBB2_5 +; V6M-NEXT: @ %bb.4: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .LBB2_5: @ %D +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: bx lr +E: + %a = and i32 %v, 100663296 + br i1 %c0, label %A, label %B + +A: + %c1 = icmp eq i32 %a, 0 + br i1 %c1, label %C, label %D + +B: + %c2 = icmp eq i32 %a, 0 + br i1 %c2, label %D, label %C + +C: + br label %X + +D: + br label %X + +X: + %x = phi i32 [0, %C], [1, %D] + ret i32 %x +}