Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -1,45 +1,51 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=arm64-apple-ios -enable-shrink-wrap=true -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=DISABLE target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios" ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: -; -; Compare the arguments and jump to exit. -; No prologue needed. -; ENABLE: cmp w0, w1 -; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: sub sp, sp, #32 -; CHECK-NEXT: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[SAVE_SP]], sp, #16 -; -; Compare the arguments and jump to exit. -; After the prologue is set. -; DISABLE: cmp w0, w1 -; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Store %a in the alloca. -; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4] -; Set the alloca address in the second argument. -; CHECK-NEXT: sub x1, [[SAVE_SP]], #4 -; Set the first argument to zero. -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: bl _doSomething -; -; Without shrink-wrapping, epilogue is in the exit block. -; DISABLE: [[EXIT_LABEL]]: -; Epilogue code. -; CHECK-NEXT: ldp x{{[0-9]+}}, [[CSR]], [sp, #16] -; CHECK-NEXT: add sp, sp, #32 -; -; With shrink-wrapping, exit block is a simple return. -; ENABLE: [[EXIT_LABEL]]: -; CHECK-NEXT: ret define i32 @foo(i32 %a, i32 %b) { +; ENABLE-LABEL: foo: +; ENABLE: ; %bb.0: +; ENABLE-NEXT: cmp w0, w1 +; ENABLE-NEXT: b.ge LBB0_2 +; ENABLE-NEXT: ; %bb.1: ; %true +; ENABLE-NEXT: sub sp, sp, #32 ; =32 +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: stur w0, [x29, #-4] +; ENABLE-NEXT: sub x1, x29, #4 ; =4 +; ENABLE-NEXT: mov w0, wzr +; ENABLE-NEXT: bl _doSomething +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: add sp, sp, #32 ; =32 +; ENABLE-NEXT: LBB0_2: ; %false +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: foo: +; DISABLE: ; %bb.0: +; DISABLE-NEXT: sub sp, sp, #32 ; =32 +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: cmp w0, w1 +; DISABLE-NEXT: b.ge LBB0_2 +; DISABLE-NEXT: ; %bb.1: ; %true +; DISABLE-NEXT: stur w0, [x29, #-4] +; DISABLE-NEXT: sub x1, x29, #4 ; =4 +; DISABLE-NEXT: mov w0, wzr +; DISABLE-NEXT: bl _doSomething +; DISABLE-NEXT: LBB0_2: ; %false +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: add sp, sp, #32 ; =32 +; DISABLE-NEXT: ret %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false @@ -60,50 +66,65 @@ ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: -; -; Shrink-wrapping allows to skip the prologue in the else case. -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! -; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: mov [[SUM:w[0-9]+]], wzr -; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body -; CHECK: bl _something -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: b.ne [[LOOP]] -; -; Next BB. -; Copy SUM into the returned register + << 3. -; CHECK: lsl w0, [[SUM]], #3 -; -; Jump to epilogue. -; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] -; -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; DISABLE: lsl w0, w1, #1 -; DISABLE: [[EPILOG_BB]]: ; %if.end -; -; Epilogue code. -; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16] -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 -; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; ENABLE: lsl w0, w1, #1 -; ENABLE: ret define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB1_4 +; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: mov w19, wzr +; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: LBB1_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: subs w20, w20, #1 ; =1 +; ENABLE-NEXT: add w19, w0, w19 +; ENABLE-NEXT: b.ne LBB1_2 +; ENABLE-NEXT: ; %bb.3: ; %for.end +; ENABLE-NEXT: lsl w0, w19, #3 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB1_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: cbz w0, LBB1_4 +; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader +; DISABLE-NEXT: mov w19, wzr +; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: LBB1_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: subs w20, w20, #1 ; =1 +; DISABLE-NEXT: add w19, w0, w19 +; DISABLE-NEXT: b.ne LBB1_2 +; DISABLE-NEXT: ; %bb.3: ; %for.end +; DISABLE-NEXT: lsl w0, w19, #3 +; DISABLE-NEXT: b LBB1_5 +; DISABLE-NEXT: LBB1_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: LBB1_5: ; %if.end +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.body @@ -134,26 +155,54 @@ ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: -; Prologue code. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! -; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 -; CHECK: mov [[SUM:w[0-9]+]], wzr -; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 -; Next BB. -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; CHECK: bl _something -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: b.ne [[LOOP_LABEL]] -; Next BB. -; CHECK: ; %for.end -; CHECK: mov w0, [[SUM]] -; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16] -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 -; CHECK-NEXT: ret define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: mov w19, wzr +; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: LBB2_1: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: subs w20, w20, #1 ; =1 +; ENABLE-NEXT: add w19, w0, w19 +; ENABLE-NEXT: b.ne LBB2_1 +; ENABLE-NEXT: ; %bb.2: ; %for.end +; ENABLE-NEXT: mov w0, w19 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: mov w19, wzr +; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: LBB2_1: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: subs w20, w20, #1 ; =1 +; DISABLE-NEXT: add w19, w0, w19 +; DISABLE-NEXT: b.ne LBB2_1 +; DISABLE-NEXT: ; %bb.2: ; %for.end +; DISABLE-NEXT: mov w0, w19 +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: br label %for.body @@ -172,46 +221,67 @@ ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! -; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: mov [[SUM:w[0-9]+]], wzr -; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; CHECK: bl _something -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: b.ne [[LOOP_LABEL]] -; Next BB. -; CHECK: bl _somethingElse -; CHECK-NEXT: lsl w0, [[SUM]], #3 -; -; Jump to epilogue. -; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] -; -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; DISABLE: lsl w0, w1, #1 -; DISABLE: [[EPILOG_BB]]: ; %if.end -; Epilogue code. -; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16] -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 -; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; ENABLE: lsl w0, w1, #1 -; ENABLE: ret define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-LABEL: loopInfoSaveOutsideLoop: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB3_4 +; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: mov w19, wzr +; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: LBB3_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: subs w20, w20, #1 ; =1 +; ENABLE-NEXT: add w19, w0, w19 +; ENABLE-NEXT: b.ne LBB3_2 +; ENABLE-NEXT: ; %bb.3: ; %for.end +; ENABLE-NEXT: bl _somethingElse +; ENABLE-NEXT: lsl w0, w19, #3 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB3_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: loopInfoSaveOutsideLoop: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: cbz w0, LBB3_4 +; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader +; DISABLE-NEXT: mov w19, wzr +; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: LBB3_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: subs w20, w20, #1 ; =1 +; DISABLE-NEXT: add w19, w0, w19 +; DISABLE-NEXT: b.ne LBB3_2 +; DISABLE-NEXT: ; %bb.3: ; %for.end +; DISABLE-NEXT: bl _somethingElse +; DISABLE-NEXT: lsl w0, w19, #3 +; DISABLE-NEXT: b LBB3_5 +; DISABLE-NEXT: LBB3_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: LBB3_5: ; %if.end +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.body @@ -243,45 +313,57 @@ ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! -; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: bl _somethingElse -; CHECK-NEXT: mov [[SUM:w[0-9]+]], wzr -; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; CHECK: bl _something -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: b.ne [[LOOP_LABEL]] -; Next BB. -; CHECK: lsl w0, [[SUM]], #3 -; -; Jump to epilogue. -; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] -; -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; DISABLE: lsl w0, w1, #1 -; DISABLE: [[EPILOG_BB]]: ; %if.end -; Epilogue code. -; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16] -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 -; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; ENABLE: lsl w0, w1, #1 -; ENABLE: ret define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { +; ENABLE-LABEL: loopInfoRestoreOutsideLoop: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB4_4 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: bl _somethingElse +; ENABLE-NEXT: mov w19, wzr +; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: LBB4_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: subs w20, w20, #1 ; =1 +; ENABLE-NEXT: add w19, w0, w19 +; ENABLE-NEXT: b.ne LBB4_2 +; ENABLE-NEXT: ; %bb.3: ; %for.end +; ENABLE-NEXT: lsl w0, w19, #3 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB4_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: loopInfoRestoreOutsideLoop: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: cbz w0, LBB4_4 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: bl _somethingElse +; DISABLE-NEXT: mov w19, wzr +; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: LBB4_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: subs w20, w20, #1 ; =1 +; DISABLE-NEXT: add w19, w0, w19 +; DISABLE-NEXT: b.ne LBB4_2 +; DISABLE-NEXT: ; %bb.3: ; %for.end +; DISABLE-NEXT: lsl w0, w19, #3 +; DISABLE-NEXT: b LBB4_5 +; DISABLE-NEXT: LBB4_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: LBB4_5: ; %if.end +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -313,49 +395,74 @@ } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: ; %entry -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret define i32 @emptyFrame() { +; ENABLE-LABEL: emptyFrame: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: mov w0, wzr +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: emptyFrame: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: mov w0, wzr +; DISABLE-NEXT: ret entry: ret i32 0 } ; Check that we handle variadic function correctly. -; CHECK-LABEL: variadicFunc: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: sub sp, sp, #16 -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Sum is merged with the returned register. -; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16 -; CHECK-NEXT: cmp w1, #1 -; CHECK-NEXT: str [[VA_BASE]], [sp, #8] -; CHECK-NEXT: mov [[SUM:w0]], wzr -; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]] -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8] -; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8 -; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8] -; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]] -; CHECK-NEXT: subs w1, w1, #1 -; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]] -; CHECK-NEXT: b.ne [[LOOP_LABEL]] -; CHECK-NEXT: [[IFEND_LABEL]]: -; Epilogue code. -; CHECK: add sp, sp, #16 -; CHECK-NEXT: ret -; -; CHECK: [[ELSE_LABEL]]: ; %if.else -; CHECK-NEXT: lsl w0, w1, #1 -; DISABLE-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret define i32 @variadicFunc(i32 %cond, i32 %count, ...) nounwind { +; ENABLE-LABEL: variadicFunc: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB6_4 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: sub sp, sp, #16 ; =16 +; ENABLE-NEXT: add x8, sp, #16 ; =16 +; ENABLE-NEXT: cmp w1, #1 ; =1 +; ENABLE-NEXT: str x8, [sp, #8] +; ENABLE-NEXT: mov w0, wzr +; ENABLE-NEXT: b.lt LBB6_3 +; ENABLE-NEXT: LBB6_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ldr x8, [sp, #8] +; ENABLE-NEXT: add x9, x8, #8 ; =8 +; ENABLE-NEXT: str x9, [sp, #8] +; ENABLE-NEXT: ldr w8, [x8] +; ENABLE-NEXT: subs w1, w1, #1 ; =1 +; ENABLE-NEXT: add w0, w0, w8 +; ENABLE-NEXT: b.ne LBB6_2 +; ENABLE-NEXT: LBB6_3: ; %for.end +; ENABLE-NEXT: add sp, sp, #16 ; =16 +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB6_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: variadicFunc: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: sub sp, sp, #16 ; =16 +; DISABLE-NEXT: cbz w0, LBB6_4 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: add x8, sp, #16 ; =16 +; DISABLE-NEXT: cmp w1, #1 ; =1 +; DISABLE-NEXT: str x8, [sp, #8] +; DISABLE-NEXT: mov w0, wzr +; DISABLE-NEXT: b.lt LBB6_3 +; DISABLE-NEXT: LBB6_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ldr x8, [sp, #8] +; DISABLE-NEXT: add x9, x8, #8 ; =8 +; DISABLE-NEXT: str x9, [sp, #8] +; DISABLE-NEXT: ldr w8, [x8] +; DISABLE-NEXT: subs w1, w1, #1 ; =1 +; DISABLE-NEXT: add w0, w0, w8 +; DISABLE-NEXT: b.ne LBB6_2 +; DISABLE-NEXT: LBB6_3: ; %if.end +; DISABLE-NEXT: add sp, sp, #16 ; =16 +; DISABLE-NEXT: ret +; DISABLE-NEXT: LBB6_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: add sp, sp, #16 ; =16 +; DISABLE-NEXT: ret entry: %ap = alloca i8*, align 8 %tobool = icmp eq i32 %cond, 0 @@ -395,35 +502,55 @@ declare void @llvm.va_end(i8*) ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: x19. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x19]], [sp, #-16]! -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: mov [[IV:w[0-9]+]], #10 -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; Inline asm statement. -; CHECK: subs [[IV]], [[IV]], #1 -; CHECK: add x19, x19, #1 -; CHECK: b.ne [[LOOP_LABEL]] -; Next BB. -; CHECK: mov w0, wzr -; Epilogue code. -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 -; CHECK-NEXT: ret -; Next BB. -; CHECK: [[ELSE_LABEL]]: ; %if.else -; CHECK-NEXT: lsl w0, w1, #1 -; Epilogue code. -; DISABLE-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 -; CHECK-NEXT: ret define i32 @inlineAsm(i32 %cond, i32 %N) { +; ENABLE-LABEL: inlineAsm: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB7_4 +; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader +; ENABLE-NEXT: stp x20, x19, [sp, #-16]! ; 16-byte Folded Spill +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset w19, -8 +; ENABLE-NEXT: .cfi_offset w20, -16 +; ENABLE-NEXT: mov w8, #10 +; ENABLE-NEXT: LBB7_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: subs w8, w8, #1 ; =1 +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: add x19, x19, #1 ; =1 +; ENABLE-NEXT: ; InlineAsm End +; ENABLE-NEXT: b.ne LBB7_2 +; ENABLE-NEXT: ; %bb.3: +; ENABLE-NEXT: mov w0, wzr +; ENABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB7_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: inlineAsm: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-16]! ; 16-byte Folded Spill +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset w19, -8 +; DISABLE-NEXT: .cfi_offset w20, -16 +; DISABLE-NEXT: cbz w0, LBB7_4 +; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader +; DISABLE-NEXT: mov w8, #10 +; DISABLE-NEXT: LBB7_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: subs w8, w8, #1 ; =1 +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: add x19, x19, #1 ; =1 +; DISABLE-NEXT: ; InlineAsm End +; DISABLE-NEXT: b.ne LBB7_2 +; DISABLE-NEXT: ; %bb.3: +; DISABLE-NEXT: mov w0, wzr +; DISABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload +; DISABLE-NEXT: ret +; DISABLE-NEXT: LBB7_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.body @@ -445,38 +572,55 @@ } ; Check that we handle calls to variadic functions correctly. -; CHECK-LABEL: callVariadicFunc: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: sub sp, sp, #64 -; CHECK-NEXT: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #48] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #48 -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; Setup of the varags. -; CHECK: stp x1, x1, [sp, #32] -; CHECK-NEXT: stp x1, x1, [sp, #16] -; CHECK-NEXT: stp x1, x1, [sp] -; CHECK-NEXT: mov w0, w1 -; CHECK-NEXT: bl _someVariadicFunc -; CHECK-NEXT: lsl w0, w0, #3 -; -; DISABLE: b [[IFEND_LABEL:LBB[0-9_]+]] -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; DISABLE-NEXT: lsl w0, w1, #1 -; DISABLE: [[IFEND_LABEL]]: ; %if.end -; -; Epilogue code. -; CHECK: ldp [[CSR1]], [[CSR2]], [sp, #48] -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; ENABLE-NEXT: lsl w0, w1, #1 -; ENABLE-NEXT: ret define i32 @callVariadicFunc(i32 %cond, i32 %N) { +; ENABLE-LABEL: callVariadicFunc: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: ; kill: def $w1 killed $w1 def $x1 +; ENABLE-NEXT: cbz w0, LBB8_2 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: sub sp, sp, #64 ; =64 +; ENABLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #48 ; =48 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: stp x1, x1, [sp, #32] +; ENABLE-NEXT: stp x1, x1, [sp, #16] +; ENABLE-NEXT: stp x1, x1, [sp] +; ENABLE-NEXT: mov w0, w1 +; ENABLE-NEXT: bl _someVariadicFunc +; ENABLE-NEXT: lsl w0, w0, #3 +; ENABLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; ENABLE-NEXT: add sp, sp, #64 ; =64 +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB8_2: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: callVariadicFunc: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: sub sp, sp, #64 ; =64 +; DISABLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #48 ; =48 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: ; kill: def $w1 killed $w1 def $x1 +; DISABLE-NEXT: cbz w0, LBB8_2 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: stp x1, x1, [sp, #32] +; DISABLE-NEXT: stp x1, x1, [sp, #16] +; DISABLE-NEXT: stp x1, x1, [sp] +; DISABLE-NEXT: mov w0, w1 +; DISABLE-NEXT: bl _someVariadicFunc +; DISABLE-NEXT: lsl w0, w0, #3 +; DISABLE-NEXT: b LBB8_3 +; DISABLE-NEXT: LBB8_2: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: LBB8_3: ; %if.end +; DISABLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; DISABLE-NEXT: add sp, sp, #64 ; =64 +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -501,24 +645,35 @@ ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. ; -; CHECK-LABEL: noreturn: -; DISABLE: stp -; -; CHECK: cbnz w0, [[ABORT:LBB[0-9_]+]] -; -; CHECK: mov w0, #42 -; -; DISABLE-NEXT: ldp -; -; CHECK-NEXT: ret -; -; CHECK: [[ABORT]]: ; %if.abort -; -; ENABLE: stp -; -; CHECK: bl _abort -; ENABLE-NOT: ldp define i32 @noreturn(i8 signext %bad_thing) { +; ENABLE-LABEL: noreturn: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbnz w0, LBB9_2 +; ENABLE-NEXT: ; %bb.1: ; %if.end +; ENABLE-NEXT: mov w0, #42 +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB9_2: ; %if.abort +; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: bl _abort +; +; DISABLE-LABEL: noreturn: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: cbnz w0, LBB9_2 +; DISABLE-NEXT: ; %bb.1: ; %if.end +; DISABLE-NEXT: mov w0, #42 +; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; DISABLE-NEXT: ret +; DISABLE-NEXT: LBB9_2: ; %if.abort +; DISABLE-NEXT: bl _abort entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort @@ -543,9 +698,60 @@ ; should return gracefully and continue compilation. ; The only condition for this test is the compilation finishes correctly. ; -; CHECK-LABEL: infiniteloop -; CHECK: ret define void @infiniteloop() { +; ENABLE-LABEL: infiniteloop: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: cbnz wzr, LBB10_3 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: sub x19, sp, #16 ; =16 +; ENABLE-NEXT: mov sp, x19 +; ENABLE-NEXT: mov w20, wzr +; ENABLE-NEXT: LBB10_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: add w20, w0, w20 +; ENABLE-NEXT: str w20, [x19] +; ENABLE-NEXT: b LBB10_2 +; ENABLE-NEXT: LBB10_3: ; %if.end +; ENABLE-NEXT: sub sp, x29, #16 ; =16 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: infiniteloop: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: cbnz wzr, LBB10_3 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: sub x19, sp, #16 ; =16 +; DISABLE-NEXT: mov sp, x19 +; DISABLE-NEXT: mov w20, wzr +; DISABLE-NEXT: LBB10_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: add w20, w0, w20 +; DISABLE-NEXT: str w20, [x19] +; DISABLE-NEXT: b LBB10_2 +; DISABLE-NEXT: LBB10_3: ; %if.end +; DISABLE-NEXT: sub sp, x29, #16 ; =16 +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: br i1 undef, label %if.then, label %if.end @@ -565,9 +771,72 @@ } ; Another infinite loop test this time with a body bigger than just one block. -; CHECK-LABEL: infiniteloop2 -; CHECK: ret define void @infiniteloop2() { +; ENABLE-LABEL: infiniteloop2: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: cbnz wzr, LBB11_3 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: sub x8, sp, #16 ; =16 +; ENABLE-NEXT: mov sp, x8 +; ENABLE-NEXT: mov w9, wzr +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: mov x10, #0 +; ENABLE-NEXT: ; InlineAsm End +; ENABLE-NEXT: LBB11_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: add w9, w10, w9 +; ENABLE-NEXT: str w9, [x8] +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ; InlineAsm End +; ENABLE-NEXT: mov w9, #1 +; ENABLE-NEXT: b LBB11_2 +; ENABLE-NEXT: LBB11_3: ; %if.end +; ENABLE-NEXT: sub sp, x29, #16 ; =16 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: infiniteloop2: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: cbnz wzr, LBB11_3 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: sub x8, sp, #16 ; =16 +; DISABLE-NEXT: mov sp, x8 +; DISABLE-NEXT: mov w9, wzr +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: mov x10, #0 +; DISABLE-NEXT: ; InlineAsm End +; DISABLE-NEXT: LBB11_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: add w9, w10, w9 +; DISABLE-NEXT: str w9, [x8] +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ; InlineAsm End +; DISABLE-NEXT: mov w9, #1 +; DISABLE-NEXT: b LBB11_2 +; DISABLE-NEXT: LBB11_3: ; %if.end +; DISABLE-NEXT: sub sp, x29, #16 ; =16 +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: br i1 undef, label %if.then, label %if.end @@ -595,9 +864,54 @@ } ; Another infinite loop test this time with two nested infinite loop. -; CHECK-LABEL: infiniteloop3 -; CHECK: ret define void @infiniteloop3() { +; ENABLE-LABEL: infiniteloop3: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbnz wzr, LBB12_5 +; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader +; ENABLE-NEXT: mov x8, xzr +; ENABLE-NEXT: mov x9, xzr +; ENABLE-NEXT: mov x11, xzr +; ENABLE-NEXT: b LBB12_3 +; ENABLE-NEXT: LBB12_2: ; %loop2b +; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 +; ENABLE-NEXT: str x10, [x11] +; ENABLE-NEXT: mov x11, x10 +; ENABLE-NEXT: LBB12_3: ; %loop1 +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: mov x10, x9 +; ENABLE-NEXT: ldr x9, [x8] +; ENABLE-NEXT: cbnz x8, LBB12_2 +; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 +; ENABLE-NEXT: mov x8, x10 +; ENABLE-NEXT: mov x11, x10 +; ENABLE-NEXT: b LBB12_3 +; ENABLE-NEXT: LBB12_5: ; %end +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: infiniteloop3: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: cbnz wzr, LBB12_5 +; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader +; DISABLE-NEXT: mov x8, xzr +; DISABLE-NEXT: mov x9, xzr +; DISABLE-NEXT: mov x11, xzr +; DISABLE-NEXT: b LBB12_3 +; DISABLE-NEXT: LBB12_2: ; %loop2b +; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 +; DISABLE-NEXT: str x10, [x11] +; DISABLE-NEXT: mov x11, x10 +; DISABLE-NEXT: LBB12_3: ; %loop1 +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: mov x10, x9 +; DISABLE-NEXT: ldr x9, [x8] +; DISABLE-NEXT: cbnz x8, LBB12_2 +; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 +; DISABLE-NEXT: mov x8, x10 +; DISABLE-NEXT: mov x11, x10 +; DISABLE-NEXT: b LBB12_3 +; DISABLE-NEXT: LBB12_5: ; %end +; DISABLE-NEXT: ret entry: br i1 undef, label %loop2a, label %body @@ -628,22 +942,50 @@ ; Re-aligned stack pointer. See bug 26642. Avoid clobbering live ; values in the prologue when re-aligning the stack pointer. -; CHECK-LABEL: stack_realign: -; ENABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1 -; ENABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0 -; DISABLE-NOT: lsl w[[LSL1:[0-9]+]], w0, w1 -; DISABLE-NOT: lsl w[[LSL2:[0-9]+]], w1, w0 -; CHECK: stp x29, x30, [sp, #-16]! -; CHECK: mov x29, sp -; ENABLE-NOT: sub x[[LSL1]], sp, #16 -; ENABLE-NOT: sub x[[LSL2]], sp, #16 -; DISABLE: sub x{{[0-9]+}}, sp, #16 -; DISABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1 -; DISABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0 -; CHECK-DAG: str w[[LSL1]], -; CHECK-DAG: str w[[LSL2]], - define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) { +; ENABLE-LABEL: stack_realign: +; ENABLE: ; %bb.0: +; ENABLE-NEXT: lsl w8, w0, w1 +; ENABLE-NEXT: cmp w0, w1 +; ENABLE-NEXT: lsl w9, w1, w0 +; ENABLE-NEXT: b.ge LBB13_2 +; ENABLE-NEXT: ; %bb.1: ; %true +; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: sub x1, sp, #16 ; =16 +; ENABLE-NEXT: and sp, x1, #0xffffffffffffffe0 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: str w0, [sp] +; ENABLE-NEXT: mov sp, x29 +; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; ENABLE-NEXT: LBB13_2: ; %false +; ENABLE-NEXT: str w8, [x2] +; ENABLE-NEXT: str w9, [x3] +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: stack_realign: +; DISABLE: ; %bb.0: +; DISABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: sub x9, sp, #16 ; =16 +; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: lsl w8, w0, w1 +; DISABLE-NEXT: cmp w0, w1 +; DISABLE-NEXT: lsl w9, w1, w0 +; DISABLE-NEXT: b.ge LBB13_2 +; DISABLE-NEXT: ; %bb.1: ; %true +; DISABLE-NEXT: str w0, [sp] +; DISABLE-NEXT: LBB13_2: ; %false +; DISABLE-NEXT: str w8, [x2] +; DISABLE-NEXT: str w9, [x3] +; DISABLE-NEXT: mov sp, x29 +; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; DISABLE-NEXT: ret %tmp = alloca i32, align 32 %shl1 = shl i32 %a, %b %shl2 = shl i32 %b, %a @@ -667,12 +1009,126 @@ ; ensuring we have a scratch register to re-align the stack pointer is ; too complicated. Output should be the same for both enabled and ; disabled shrink wrapping. -; CHECK-LABEL: stack_realign2: -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]! -; CHECK: add x29, sp, #{{[0-9]+}} -; CHECK: lsl {{w[0-9]+}}, w0, w1 - define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) { +; ENABLE-LABEL: stack_realign2: +; ENABLE: ; %bb.0: +; ENABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #80 ; =80 +; ENABLE-NEXT: sub x9, sp, #32 ; =32 +; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: .cfi_offset w21, -40 +; ENABLE-NEXT: .cfi_offset w22, -48 +; ENABLE-NEXT: .cfi_offset w23, -56 +; ENABLE-NEXT: .cfi_offset w24, -64 +; ENABLE-NEXT: .cfi_offset w25, -72 +; ENABLE-NEXT: .cfi_offset w26, -80 +; ENABLE-NEXT: .cfi_offset w27, -88 +; ENABLE-NEXT: .cfi_offset w28, -96 +; ENABLE-NEXT: lsl w8, w0, w1 +; ENABLE-NEXT: lsl w9, w1, w0 +; ENABLE-NEXT: lsr w10, w0, w1 +; ENABLE-NEXT: lsr w11, w1, w0 +; ENABLE-NEXT: add w12, w1, w0 +; ENABLE-NEXT: sub w13, w1, w0 +; ENABLE-NEXT: cmp w0, w1 +; ENABLE-NEXT: add w17, w8, w9 +; ENABLE-NEXT: sub w16, w9, w10 +; ENABLE-NEXT: add w15, w10, w11 +; ENABLE-NEXT: add w14, w11, w12 +; ENABLE-NEXT: b.ge LBB14_2 +; ENABLE-NEXT: ; %bb.1: ; %true +; ENABLE-NEXT: str w0, [sp] +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ; InlineAsm End +; ENABLE-NEXT: LBB14_2: ; %false +; ENABLE-NEXT: str w8, [x2] +; ENABLE-NEXT: str w9, [x3] +; ENABLE-NEXT: str w10, [x4] +; ENABLE-NEXT: str w11, [x5] +; ENABLE-NEXT: str w12, [x6] +; ENABLE-NEXT: str w13, [x7] +; ENABLE-NEXT: stp w0, w1, [x2, #4] +; ENABLE-NEXT: stp w17, w16, [x2, #12] +; ENABLE-NEXT: stp w15, w14, [x2, #20] +; ENABLE-NEXT: sub sp, x29, #80 ; =80 +; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: stack_realign2: +; DISABLE: ; %bb.0: +; DISABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #80 ; =80 +; DISABLE-NEXT: sub x9, sp, #32 ; =32 +; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: .cfi_offset w21, -40 +; DISABLE-NEXT: .cfi_offset w22, -48 +; DISABLE-NEXT: .cfi_offset w23, -56 +; DISABLE-NEXT: .cfi_offset w24, -64 +; DISABLE-NEXT: .cfi_offset w25, -72 +; DISABLE-NEXT: .cfi_offset w26, -80 +; DISABLE-NEXT: .cfi_offset w27, -88 +; DISABLE-NEXT: .cfi_offset w28, -96 +; DISABLE-NEXT: lsl w8, w0, w1 +; DISABLE-NEXT: lsl w9, w1, w0 +; DISABLE-NEXT: lsr w10, w0, w1 +; DISABLE-NEXT: lsr w11, w1, w0 +; DISABLE-NEXT: add w12, w1, w0 +; DISABLE-NEXT: sub w13, w1, w0 +; DISABLE-NEXT: cmp w0, w1 +; DISABLE-NEXT: add w17, w8, w9 +; DISABLE-NEXT: sub w16, w9, w10 +; DISABLE-NEXT: add w15, w10, w11 +; DISABLE-NEXT: add w14, w11, w12 +; DISABLE-NEXT: b.ge LBB14_2 +; DISABLE-NEXT: ; %bb.1: ; %true +; DISABLE-NEXT: str w0, [sp] +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ; InlineAsm End +; DISABLE-NEXT: LBB14_2: ; %false +; DISABLE-NEXT: str w8, [x2] +; DISABLE-NEXT: str w9, [x3] +; DISABLE-NEXT: str w10, [x4] +; DISABLE-NEXT: str w11, [x5] +; DISABLE-NEXT: str w12, [x6] +; DISABLE-NEXT: str w13, [x7] +; DISABLE-NEXT: stp w0, w1, [x2, #4] +; DISABLE-NEXT: stp w17, w16, [x2, #12] +; DISABLE-NEXT: stp w15, w14, [x2, #20] +; DISABLE-NEXT: sub sp, x29, #80 ; =80 +; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload +; DISABLE-NEXT: ret %tmp = alloca i32, align 32 %tmp1 = shl i32 %a, %b %tmp2 = shl i32 %b, %a Index: test/CodeGen/AArch64/branch-relax-cbz.ll =================================================================== --- test/CodeGen/AArch64/branch-relax-cbz.ll +++ test/CodeGen/AArch64/branch-relax-cbz.ll @@ -1,28 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-cbz-offset-bits=3 < %s | FileCheck %s -; CHECK-LABEL: _split_block_no_fallthrough: -; CHECK: cmn x{{[0-9]+}}, #5 -; CHECK-NEXT: b.le [[B2:LBB[0-9]+_[0-9]+]] - -; CHECK-NEXT: ; %bb.1: ; %b3 -; CHECK: ldr [[LOAD:w[0-9]+]] -; CHECK: cbnz [[LOAD]], [[B8:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: b [[B7:LBB[0-9]+_[0-9]+]] - -; CHECK-NEXT: [[B8]]: ; %b8 -; CHECK-NEXT: ret - -; CHECK-NEXT: [[B2]]: ; %b2 -; CHECK: mov w{{[0-9]+}}, #93 -; CHECK: bl _extfunc -; CHECK: cbz w{{[0-9]+}}, [[B7]] -; CHECK-NEXT: b [[B8]] - -; CHECK-NEXT: [[B7]]: ; %b7 -; CHECK: mov w{{[0-9]+}}, #13 -; CHECK: b _extfunc define void @split_block_no_fallthrough(i64 %val) #0 { +; CHECK-LABEL: split_block_no_fallthrough: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: cmn x0, #5 ; =5 +; CHECK-NEXT: b.le LBB0_3 +; CHECK-NEXT: ; %bb.1: ; %b3 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: cbnz w8, LBB0_2 +; CHECK-NEXT: b LBB0_4 +; CHECK-NEXT: LBB0_2: ; %b8 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_3: ; %b2 +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov w0, #93 +; CHECK-NEXT: bl _extfunc +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: cbz w0, LBB0_4 +; CHECK-NEXT: b LBB0_2 +; CHECK-NEXT: LBB0_4: ; %b7 +; CHECK-NEXT: mov w0, #13 +; CHECK-NEXT: b _extfunc bb: %c0 = icmp sgt i64 %val, -5 br i1 %c0, label %b3, label %b2 Index: test/CodeGen/AArch64/taildup-cfi.ll =================================================================== --- test/CodeGen/AArch64/taildup-cfi.ll +++ test/CodeGen/AArch64/taildup-cfi.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts ; RUN: llc -mtriple=arm64-unknown-linux-gnu -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LINUX ; RUN: llc -mtriple=arm64-apple-darwin -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DARWIN Index: test/CodeGen/ARM/arm-shrink-wrapping-linux.ll =================================================================== --- test/CodeGen/ARM/arm-shrink-wrapping-linux.ll +++ test/CodeGen/ARM/arm-shrink-wrapping-linux.ll @@ -1,5 +1,6 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=DISABLE ; We cannot merge this test with the main test for shrink-wrapping, because ; the code path we want to exerce is not taken with ios lowering. target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64" @@ -12,53 +13,185 @@ ; The exit block of the loop happens to also lead to defs/uses of CSRs. ; It also post-dominates the loop body and we use to generate invalid ; restore sequence. I.e., we restored too early. -; -; CHECK-LABEL: wrongUseOfPostDominate: -; -; The prologue is the first thing happening in the function -; without shrink-wrapping. -; DISABLE: push -; -; CHECK: cmn r1, #1 -; -; With shrink-wrapping, we branch to a pre-header, where the prologue -; is located. -; ENABLE-NEXT: ble [[LOOP_PREHEADER:[.a-zA-Z0-9_]+]] -; Without shrink-wrapping, we go straight into the loop. -; DISABLE-NEXT: ble [[LOOP_HEADER:[.a-zA-Z0-9_]+]] -; -; CHECK: @ %if.end29 -; DISABLE-NEXT: pop -; ENABLE-NEXT: bx lr -; -; ENABLE: [[LOOP_PREHEADER]] -; ENABLE: push -; We must not find a pop here, otherwise that means we are in the loop -; and are restoring before using the saved CSRs. -; ENABLE-NOT: pop -; ENALBE-NEXT: [[LOOP_HEADER:[.a-zA-Z0-9_]+]]: @ %while.cond2.outer -; -; DISABLE: [[LOOP_HEADER]]: @ %while.cond2.outer -; -; ENABLE-NOT: pop -; -; CHECK: @ %while.cond2 -; CHECK: add -; CHECK-NEXT: cmp r{{[0-1]+}}, #1 -; Jump to the return block -; CHECK-NEXT: beq [[RETURN_BLOCK:[.a-zA-Z0-9_]+]] -; -; Use the back edge to check we get the label of the loop right. -; This is to make sure we check the right loop pattern. -; CHECK: @ %while.body24.land.rhs14_crit_edge -; CHECK: cmp r{{[0-9]+}}, #192 -; CHECK-NEXT bhs [[LOOP_HEADER]] -; -; CHECK: [[RETURN_BLOCK]]: -; Set the return value. -; CHECK-NEXT: mov r0, -; CHECK-NEXT: pop + define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) { +; ENABLE-LABEL: wrongUseOfPostDominate: +; ENABLE: @ %bb.0: @ %entry +; ENABLE-NEXT: cmn r1, #1 +; ENABLE-NEXT: ble .LBB0_6 +; ENABLE-NEXT: @ %bb.1: @ %while.cond.preheader +; ENABLE-NEXT: cmp r1, #0 +; ENABLE-NEXT: beq .LBB0_5 +; ENABLE-NEXT: @ %bb.2: @ %while.cond.preheader +; ENABLE-NEXT: cmp r0, r2 +; ENABLE-NEXT: bhs .LBB0_5 +; ENABLE-NEXT: @ %bb.3: @ %while.body.preheader +; ENABLE-NEXT: movw r12, :lower16:skip +; ENABLE-NEXT: sub r1, r1, #1 +; ENABLE-NEXT: movt r12, :upper16:skip +; ENABLE-NEXT: .LBB0_4: @ %while.body +; ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ldrb r3, [r0] +; ENABLE-NEXT: ldrb r3, [r12, r3] +; ENABLE-NEXT: add r0, r0, r3 +; ENABLE-NEXT: sub r3, r1, #1 +; ENABLE-NEXT: cmp r3, r1 +; ENABLE-NEXT: bxhs lr +; ENABLE-NEXT: cmp r0, r2 +; ENABLE-NEXT: mov r1, r3 +; ENABLE-NEXT: blo .LBB0_4 +; ENABLE-NEXT: .LBB0_5: @ %if.end29 +; ENABLE-NEXT: bx lr +; ENABLE-NEXT: .LBB0_6: +; ENABLE-NEXT: .save {r11, lr} +; ENABLE-NEXT: push {r11, lr} +; ENABLE-NEXT: .LBB0_7: @ %while.cond2.outer +; ENABLE-NEXT: @ =>This Loop Header: Depth=1 +; ENABLE-NEXT: @ Child Loop BB0_8 Depth 2 +; ENABLE-NEXT: @ Child Loop BB0_15 Depth 2 +; ENABLE-NEXT: mov r3, r0 +; ENABLE-NEXT: .LBB0_8: @ %while.cond2 +; ENABLE-NEXT: @ Parent Loop BB0_7 Depth=1 +; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 +; ENABLE-NEXT: add r1, r1, #1 +; ENABLE-NEXT: cmp r1, #1 +; ENABLE-NEXT: beq .LBB0_18 +; ENABLE-NEXT: @ %bb.9: @ %while.body4 +; ENABLE-NEXT: @ in Loop: Header=BB0_8 Depth=2 +; ENABLE-NEXT: cmp r3, r2 +; ENABLE-NEXT: bls .LBB0_8 +; ENABLE-NEXT: @ %bb.10: @ %if.then7 +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: mov r0, r3 +; ENABLE-NEXT: ldrb r12, [r0, #-1]! +; ENABLE-NEXT: sxtb lr, r12 +; ENABLE-NEXT: cmn lr, #1 +; ENABLE-NEXT: bgt .LBB0_7 +; ENABLE-NEXT: @ %bb.11: @ %if.then7 +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: cmp r0, r2 +; ENABLE-NEXT: bls .LBB0_7 +; ENABLE-NEXT: @ %bb.12: @ %land.rhs14.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: cmn lr, #1 +; ENABLE-NEXT: bgt .LBB0_7 +; ENABLE-NEXT: @ %bb.13: @ %land.rhs14.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: cmp r12, #191 +; ENABLE-NEXT: bhi .LBB0_7 +; ENABLE-NEXT: @ %bb.14: @ %while.body24.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: sub r3, r3, #2 +; ENABLE-NEXT: .LBB0_15: @ %while.body24 +; ENABLE-NEXT: @ Parent Loop BB0_7 Depth=1 +; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 +; ENABLE-NEXT: mov r0, r3 +; ENABLE-NEXT: cmp r3, r2 +; ENABLE-NEXT: bls .LBB0_7 +; ENABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge +; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 +; ENABLE-NEXT: mov r3, r0 +; ENABLE-NEXT: ldrsb lr, [r3], #-1 +; ENABLE-NEXT: cmn lr, #1 +; ENABLE-NEXT: uxtb r12, lr +; ENABLE-NEXT: bgt .LBB0_7 +; ENABLE-NEXT: @ %bb.17: @ %while.body24.land.rhs14_crit_edge +; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 +; ENABLE-NEXT: cmp r12, #192 +; ENABLE-NEXT: blo .LBB0_15 +; ENABLE-NEXT: b .LBB0_7 +; ENABLE-NEXT: .LBB0_18: +; ENABLE-NEXT: mov r0, r3 +; ENABLE-NEXT: pop {r11, pc} +; +; DISABLE-LABEL: wrongUseOfPostDominate: +; DISABLE: @ %bb.0: @ %entry +; DISABLE-NEXT: .save {r11, lr} +; DISABLE-NEXT: push {r11, lr} +; DISABLE-NEXT: cmn r1, #1 +; DISABLE-NEXT: ble .LBB0_6 +; DISABLE-NEXT: @ %bb.1: @ %while.cond.preheader +; DISABLE-NEXT: cmp r1, #0 +; DISABLE-NEXT: beq .LBB0_5 +; DISABLE-NEXT: @ %bb.2: @ %while.cond.preheader +; DISABLE-NEXT: cmp r0, r2 +; DISABLE-NEXT: pophs {r11, pc} +; DISABLE-NEXT: movw r12, :lower16:skip +; DISABLE-NEXT: sub r1, r1, #1 +; DISABLE-NEXT: movt r12, :upper16:skip +; DISABLE-NEXT: .LBB0_3: @ %while.body +; DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ldrb r3, [r0] +; DISABLE-NEXT: ldrb r3, [r12, r3] +; DISABLE-NEXT: add r0, r0, r3 +; DISABLE-NEXT: sub r3, r1, #1 +; DISABLE-NEXT: cmp r3, r1 +; DISABLE-NEXT: bhs .LBB0_5 +; DISABLE-NEXT: @ %bb.4: @ %while.body +; DISABLE-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; DISABLE-NEXT: cmp r0, r2 +; DISABLE-NEXT: mov r1, r3 +; DISABLE-NEXT: blo .LBB0_3 +; DISABLE-NEXT: .LBB0_5: @ %if.end29 +; DISABLE-NEXT: pop {r11, pc} +; DISABLE-NEXT: .LBB0_6: @ %while.cond2.outer +; DISABLE-NEXT: @ =>This Loop Header: Depth=1 +; DISABLE-NEXT: @ Child Loop BB0_7 Depth 2 +; DISABLE-NEXT: @ Child Loop BB0_14 Depth 2 +; DISABLE-NEXT: mov r3, r0 +; DISABLE-NEXT: .LBB0_7: @ %while.cond2 +; DISABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; DISABLE-NEXT: @ => This Inner Loop Header: Depth=2 +; DISABLE-NEXT: add r1, r1, #1 +; DISABLE-NEXT: cmp r1, #1 +; DISABLE-NEXT: beq .LBB0_17 +; DISABLE-NEXT: @ %bb.8: @ %while.body4 +; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=2 +; DISABLE-NEXT: cmp r3, r2 +; DISABLE-NEXT: bls .LBB0_7 +; DISABLE-NEXT: @ %bb.9: @ %if.then7 +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: mov r0, r3 +; DISABLE-NEXT: ldrb r12, [r0, #-1]! +; DISABLE-NEXT: sxtb lr, r12 +; DISABLE-NEXT: cmn lr, #1 +; DISABLE-NEXT: bgt .LBB0_6 +; DISABLE-NEXT: @ %bb.10: @ %if.then7 +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: cmp r0, r2 +; DISABLE-NEXT: bls .LBB0_6 +; DISABLE-NEXT: @ %bb.11: @ %land.rhs14.preheader +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: cmn lr, #1 +; DISABLE-NEXT: bgt .LBB0_6 +; DISABLE-NEXT: @ %bb.12: @ %land.rhs14.preheader +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: cmp r12, #191 +; DISABLE-NEXT: bhi .LBB0_6 +; DISABLE-NEXT: @ %bb.13: @ %while.body24.preheader +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: sub r3, r3, #2 +; DISABLE-NEXT: .LBB0_14: @ %while.body24 +; DISABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; DISABLE-NEXT: @ => This Inner Loop Header: Depth=2 +; DISABLE-NEXT: mov r0, r3 +; DISABLE-NEXT: cmp r3, r2 +; DISABLE-NEXT: bls .LBB0_6 +; DISABLE-NEXT: @ %bb.15: @ %while.body24.land.rhs14_crit_edge +; DISABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; DISABLE-NEXT: mov r3, r0 +; DISABLE-NEXT: ldrsb lr, [r3], #-1 +; DISABLE-NEXT: cmn lr, #1 +; DISABLE-NEXT: uxtb r12, lr +; DISABLE-NEXT: bgt .LBB0_6 +; DISABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge +; DISABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; DISABLE-NEXT: cmp r12, #192 +; DISABLE-NEXT: blo .LBB0_14 +; DISABLE-NEXT: b .LBB0_6 +; DISABLE-NEXT: .LBB0_17: +; DISABLE-NEXT: mov r0, r3 +; DISABLE-NEXT: pop {r11, pc} entry: %cmp = icmp sgt i32 %off, -1 br i1 %cmp, label %while.cond.preheader, label %while.cond2.outer Index: test/CodeGen/ARM/arm-shrink-wrapping.ll =================================================================== --- test/CodeGen/ARM/arm-shrink-wrapping.ll +++ test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1,11 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ENABLE --check-prefix=ARM-ENABLE +; RUN: | FileCheck %s --check-prefix=ARM-ENABLE ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=DISABLE --check-prefix=ARM-DISABLE +; RUN: | FileCheck %s --check-prefix=ARM-DISABLE ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=ENABLE --check-prefix=THUMB-ENABLE +; RUN: | FileCheck %s --check-prefix=THUMB-ENABLE ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=DISABLE --check-prefix=THUMB-DISABLE +; RUN: | FileCheck %s --check-prefix=THUMB-DISABLE ; ; Note: Lots of tests use inline asm instead of regular calls. @@ -18,48 +19,111 @@ ; the diffs. ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: +; foo: ; ; Compare the arguments and jump to exit. ; No prologue needed. -; ENABLE: cmp r0, r1 -; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; cmp r0, r1 +; bge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Prologue code. -; CHECK: push {r7, lr} -; CHECK-NEXT: mov r7, sp +; push {r7, lr} +; mov r7, sp ;; ; Compare the arguments and jump to exit. ; After the prologue is set. -; DISABLE: sub sp -; DISABLE: cmp r0, r1 -; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; sub sp +; cmp r0, r1 +; bge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Store %a in the alloca. -; ARM-ENABLE: push {r0} -; THUMB-ENABLE: str r0, [sp, #-4] -; DISABLE: str r0, [sp] +; push {r0} +; str r0, [sp, #-4] +; str r0, [sp] ; Set the alloca address in the second argument. -; CHECK-NEXT: mov r1, sp +; mov r1, sp ; Set the first argument to zero. -; CHECK-NEXT: mov{{s?}} r0, #0 -; CHECK-NEXT: bl{{x?}} _doSomething +; mov{{s?}} r0, #0 +; bl{{x?}} _doSomething ; ; With shrink-wrapping, epilogue is just after the call. -; ARM-ENABLE-NEXT: mov sp, r7 -; THUMB-ENABLE-NEXT: add sp, #4 -; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr} +; mov sp, r7 +; add sp, #4 +; pop{{(\.w)?}} {r7, lr} ; -; CHECK: [[EXIT_LABEL]]: +; [[EXIT_LABEL]]: ; ; Without shrink-wrapping, epilogue is in the exit block. ; Epilogue code. (What we pop does not matter.) -; ARM-DISABLE: mov sp, r7 -; THUMB-DISABLE: add sp, -; DISABLE-NEXT: pop {r7, pc} +; mov sp, r7 +; add sp, +; pop {r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @foo(i32 %a, i32 %b) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: foo: +; ARM-ENABLE: @ %bb.0: +; ARM-ENABLE-NEXT: cmp r0, r1 +; ARM-ENABLE-NEXT: bge LBB0_2 +; ARM-ENABLE-NEXT: @ %bb.1: @ %true +; ARM-ENABLE-NEXT: push {r7, lr} +; ARM-ENABLE-NEXT: mov r7, sp +; ARM-ENABLE-NEXT: push {r0} +; ARM-ENABLE-NEXT: mov r1, sp +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: bl _doSomething +; ARM-ENABLE-NEXT: mov sp, r7 +; ARM-ENABLE-NEXT: pop {r7, lr} +; ARM-ENABLE-NEXT: LBB0_2: @ %false +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: foo: +; ARM-DISABLE: @ %bb.0: +; ARM-DISABLE-NEXT: push {r7, lr} +; ARM-DISABLE-NEXT: mov r7, sp +; ARM-DISABLE-NEXT: sub sp, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, r1 +; ARM-DISABLE-NEXT: bge LBB0_2 +; ARM-DISABLE-NEXT: @ %bb.1: @ %true +; ARM-DISABLE-NEXT: str r0, [sp] +; ARM-DISABLE-NEXT: mov r1, sp +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: bl _doSomething +; ARM-DISABLE-NEXT: LBB0_2: @ %false +; ARM-DISABLE-NEXT: mov sp, r7 +; ARM-DISABLE-NEXT: pop {r7, pc} +; +; THUMB-ENABLE-LABEL: foo: +; THUMB-ENABLE: @ %bb.0: +; THUMB-ENABLE-NEXT: cmp r0, r1 +; THUMB-ENABLE-NEXT: bge LBB0_2 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %true +; THUMB-ENABLE-NEXT: push {r7, lr} +; THUMB-ENABLE-NEXT: mov r7, sp +; THUMB-ENABLE-NEXT: str r0, [sp, #-4]! +; THUMB-ENABLE-NEXT: mov r1, sp +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: bl _doSomething +; THUMB-ENABLE-NEXT: add sp, #4 +; THUMB-ENABLE-NEXT: pop.w {r7, lr} +; THUMB-ENABLE-NEXT: LBB0_2: @ %false +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: foo: +; THUMB-DISABLE: @ %bb.0: +; THUMB-DISABLE-NEXT: push {r7, lr} +; THUMB-DISABLE-NEXT: mov r7, sp +; THUMB-DISABLE-NEXT: sub sp, #4 +; THUMB-DISABLE-NEXT: cmp r0, r1 +; THUMB-DISABLE-NEXT: bge LBB0_2 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %true +; THUMB-DISABLE-NEXT: str r0, [sp] +; THUMB-DISABLE-NEXT: mov r1, sp +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: bl _doSomething +; THUMB-DISABLE-NEXT: LBB0_2: @ %false +; THUMB-DISABLE-NEXT: add sp, #4 +; THUMB-DISABLE-NEXT: pop {r7, pc} %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false @@ -80,51 +144,156 @@ ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: +; freqSaveAndRestoreOutsideLoop: ; ; Shrink-wrapping allows to skip the prologue in the else case. -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, r7, lr} -; CHECK-NEXT: add r7, sp, #4 +; push {r4, r7, lr} +; add r7, sp, #4 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; SUM is in r0 because it is coalesced with the second ; argument on the else path. -; CHECK: mov{{s?}} [[SUM:r0]], #0 -; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; mov{{s?}} [[SUM:r0]], #0 +; mov{{s?}} [[IV:r[0-9]+]], #10 ; ; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 -; ARM: add [[SUM]], [[TMP]], [[SUM]] -; THUMB: add [[SUM]], [[TMP]] -; ARM-NEXT: subs [[IV]], [[IV]], #1 -; THUMB-NEXT: subs [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] +; [[LOOP:LBB[0-9_]+]]: @ %for.body +; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; add [[SUM]], [[TMP]], [[SUM]] +; add [[SUM]], [[TMP]] +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; bne [[LOOP]] ; ; Next BB. ; SUM << 3. -; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 -; ENABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} [[SUM]], [[SUM]], #3 +; pop {r4, r7, pc} ; ; Duplicated epilogue. -; DISABLE: pop {r4, r7, pc} +; pop {r4, r7, pc} ; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 -; DISABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} r0, r1, #1 +; pop {r4, r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB1_4 +; ARM-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: mov r1, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB1_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: subs r1, r1, #1 +; ARM-ENABLE-NEXT: bne LBB1_2 +; ARM-ENABLE-NEXT: @ %bb.3: @ %for.end +; ARM-ENABLE-NEXT: lsl r0, r0, #3 +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: LBB1_4: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB1_4 +; ARM-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: mov r1, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB1_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: subs r1, r1, #1 +; ARM-DISABLE-NEXT: bne LBB1_2 +; ARM-DISABLE-NEXT: @ %bb.3: @ %for.end +; ARM-DISABLE-NEXT: lsl r0, r0, #3 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB1_4: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB1_4 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB1_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r0, r2 +; THUMB-ENABLE-NEXT: subs r1, #1 +; THUMB-ENABLE-NEXT: bne LBB1_2 +; THUMB-ENABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-ENABLE-NEXT: lsls r0, r0, #3 +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: LBB1_4: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbz r0, LBB1_4 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB1_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r0, r2 +; THUMB-DISABLE-NEXT: subs r1, #1 +; THUMB-DISABLE-NEXT: bne LBB1_2 +; THUMB-DISABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-DISABLE-NEXT: lsls r0, r0, #3 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB1_4: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -159,26 +328,162 @@ ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: +; freqSaveAndRestoreOutsideLoop2: ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4 -; CHECK: mov{{s?}} [[SUM:r0]], #0 -; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 -; CHECK: nop +; push {r4 +; mov{{s?}} [[SUM:r0]], #0 +; mov{{s?}} [[IV:r[0-9]+]], #10 +; nop ; Next BB. -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body -; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 -; ARM: add [[SUM]], [[TMP]], [[SUM]] -; THUMB: add [[SUM]], [[TMP]] -; ARM: subs [[IV]], [[IV]], #1 -; THUMB: subs [[IV]], #1 -; CHECK-NEXT: bne [[LOOP_LABEL]] +; [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body +; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; add [[SUM]], [[TMP]], [[SUM]] +; add [[SUM]], [[TMP]] +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; bne [[LOOP_LABEL]] ; Next BB. -; CHECK: @ %for.exit -; CHECK: nop -; CHECK: pop {r4 +; @ %for.exit +; nop +; pop {r4 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) "no-frame-pointer-elim"="true" { +; ARM-LABEL: freqSaveAndRestoreOutsideLoop2: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: push {r4, r7, lr} +; ARM-NEXT: add r7, sp, #4 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: mov r1, #10 +; ARM-NEXT: @ InlineAsm Start +; ARM-NEXT: nop +; ARM-NEXT: @ InlineAsm End +; ARM-NEXT: LBB2_1: @ %for.body +; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-NEXT: @ InlineAsm Start +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: @ InlineAsm End +; ARM-NEXT: add r0, r2, r0 +; ARM-NEXT: subs r1, r1, #1 +; ARM-NEXT: bne LBB2_1 +; ARM-NEXT: @ %bb.2: @ %for.exit +; ARM-NEXT: @ InlineAsm Start +; ARM-NEXT: nop +; ARM-NEXT: @ InlineAsm End +; ARM-NEXT: pop {r4, r7, pc} +; +; THUMB-LABEL: freqSaveAndRestoreOutsideLoop2: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r4, r7, lr} +; THUMB-NEXT: add r7, sp, #4 +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: movs r1, #10 +; THUMB-NEXT: @ InlineAsm Start +; THUMB-NEXT: nop +; THUMB-NEXT: @ InlineAsm End +; THUMB-NEXT: LBB2_1: @ %for.body +; THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-NEXT: @ InlineAsm Start +; THUMB-NEXT: mov.w r2, #1 +; THUMB-NEXT: @ InlineAsm End +; THUMB-NEXT: add r0, r2 +; THUMB-NEXT: subs r1, #1 +; THUMB-NEXT: bne LBB2_1 +; THUMB-NEXT: @ %bb.2: @ %for.exit +; THUMB-NEXT: @ InlineAsm Start +; THUMB-NEXT: nop +; THUMB-NEXT: @ InlineAsm End +; THUMB-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: mov r1, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB2_1: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: subs r1, r1, #1 +; ARM-ENABLE-NEXT: bne LBB2_1 +; ARM-ENABLE-NEXT: @ %bb.2: @ %for.exit +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; +; ARM-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: mov r1, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB2_1: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: subs r1, r1, #1 +; ARM-DISABLE-NEXT: bne LBB2_1 +; ARM-DISABLE-NEXT: @ %bb.2: @ %for.exit +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB2_1: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r0, r2 +; THUMB-ENABLE-NEXT: subs r1, #1 +; THUMB-ENABLE-NEXT: bne LBB2_1 +; THUMB-ENABLE-NEXT: @ %bb.2: @ %for.exit +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB2_1: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r0, r2 +; THUMB-DISABLE-NEXT: subs r1, #1 +; THUMB-DISABLE-NEXT: bne LBB2_1 +; THUMB-DISABLE-NEXT: @ %bb.2: @ %for.exit +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: br label %for.preheader @@ -205,50 +510,167 @@ ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: +; loopInfoSaveOutsideLoop: ; -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, r7, lr} -; CHECK-NEXT: add r7, sp, #4 +; push {r4, r7, lr} +; add r7, sp, #4 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; SUM is in r0 because it is coalesced with the second ; argument on the else path. -; CHECK: mov{{s?}} [[SUM:r0]], #0 -; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; mov{{s?}} [[SUM:r0]], #0 +; mov{{s?}} [[IV:r[0-9]+]], #10 ; ; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 -; ARM: add [[SUM]], [[TMP]], [[SUM]] -; THUMB: add [[SUM]], [[TMP]] -; ARM-NEXT: subs [[IV]], [[IV]], #1 -; THUMB-NEXT: subs [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] +; [[LOOP:LBB[0-9_]+]]: @ %for.body +; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; add [[SUM]], [[TMP]], [[SUM]] +; add [[SUM]], [[TMP]] +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; bne [[LOOP]] ; ; Next BB. ; SUM << 3. -; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 -; ENABLE: pop {r4, r7, pc} +; lsl{{s?}} [[SUM]], [[SUM]], #3 +; pop {r4, r7, pc} ; ; Duplicated epilogue. -; DISABLE: pop {r4, r7, pc} +; pop {r4, r7, pc} ; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 -; DISABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} r0, r1, #1 +; pop {r4, r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: loopInfoSaveOutsideLoop: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB3_4 +; ARM-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: mov r1, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB3_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: subs r1, r1, #1 +; ARM-ENABLE-NEXT: bne LBB3_2 +; ARM-ENABLE-NEXT: @ %bb.3: @ %for.end +; ARM-ENABLE-NEXT: lsl r0, r0, #3 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: LBB3_4: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: loopInfoSaveOutsideLoop: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB3_4 +; ARM-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: mov r1, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB3_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: subs r1, r1, #1 +; ARM-DISABLE-NEXT: bne LBB3_2 +; ARM-DISABLE-NEXT: @ %bb.3: @ %for.end +; ARM-DISABLE-NEXT: lsl r0, r0, #3 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB3_4: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: loopInfoSaveOutsideLoop: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB3_4 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB3_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r0, r2 +; THUMB-ENABLE-NEXT: subs r1, #1 +; THUMB-ENABLE-NEXT: bne LBB3_2 +; THUMB-ENABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-ENABLE-NEXT: lsls r0, r0, #3 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: LBB3_4: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: loopInfoSaveOutsideLoop: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbz r0, LBB3_4 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB3_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r0, r2 +; THUMB-DISABLE-NEXT: subs r1, #1 +; THUMB-DISABLE-NEXT: bne LBB3_2 +; THUMB-DISABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-DISABLE-NEXT: lsls r0, r0, #3 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB3_4: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -284,50 +706,155 @@ ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: +; loopInfoRestoreOutsideLoop: ; -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, r7, lr} -; CHECK-NEXT: add r7, sp, #4 +; push {r4, r7, lr} +; add r7, sp, #4 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; SUM is in r0 because it is coalesced with the second ; argument on the else path. -; CHECK: mov{{s?}} [[SUM:r0]], #0 -; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; mov{{s?}} [[SUM:r0]], #0 +; mov{{s?}} [[IV:r[0-9]+]], #10 ; ; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 -; ARM: add [[SUM]], [[TMP]], [[SUM]] -; THUMB: add [[SUM]], [[TMP]] -; ARM-NEXT: subs [[IV]], [[IV]], #1 -; THUMB-NEXT: subs [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] +; [[LOOP:LBB[0-9_]+]]: @ %for.body +; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; add [[SUM]], [[TMP]], [[SUM]] +; add [[SUM]], [[TMP]] +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; bne [[LOOP]] ; ; Next BB. ; SUM << 3. -; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 -; ENABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} [[SUM]], [[SUM]], #3 +; pop {r4, r7, pc} ; ; Duplicated epilogue. -; DISABLE: pop {r4, r7, pc} +; pop {r4, r7, pc} ; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 -; DISABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} r0, r1, #1 +; pop {r4, r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" nounwind { +; ARM-ENABLE-LABEL: loopInfoRestoreOutsideLoop: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB4_4 +; ARM-ENABLE-NEXT: @ %bb.1: @ %if.then +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: mov r1, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB4_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: subs r1, r1, #1 +; ARM-ENABLE-NEXT: bne LBB4_2 +; ARM-ENABLE-NEXT: @ %bb.3: @ %for.end +; ARM-ENABLE-NEXT: lsl r0, r0, #3 +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: LBB4_4: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: loopInfoRestoreOutsideLoop: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB4_4 +; ARM-DISABLE-NEXT: @ %bb.1: @ %if.then +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: mov r1, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB4_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: subs r1, r1, #1 +; ARM-DISABLE-NEXT: bne LBB4_2 +; ARM-DISABLE-NEXT: @ %bb.3: @ %for.end +; ARM-DISABLE-NEXT: lsl r0, r0, #3 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB4_4: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: loopInfoRestoreOutsideLoop: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB4_4 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB4_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r0, r2 +; THUMB-ENABLE-NEXT: subs r1, #1 +; THUMB-ENABLE-NEXT: bne LBB4_2 +; THUMB-ENABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-ENABLE-NEXT: lsls r0, r0, #3 +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: LBB4_4: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: loopInfoRestoreOutsideLoop: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbz r0, LBB4_4 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB4_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r0, r2 +; THUMB-DISABLE-NEXT: subs r1, #1 +; THUMB-DISABLE-NEXT: bne LBB4_2 +; THUMB-DISABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-DISABLE-NEXT: lsls r0, r0, #3 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB4_4: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -359,53 +886,190 @@ } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: @ %entry -; CHECK-NEXT: mov{{s?}} r0, #0 -; CHECK-NEXT: bx lr +; emptyFrame: +; @ %entry +; mov{{s?}} r0, #0 +; bx lr define i32 @emptyFrame() { +; ARM-LABEL: emptyFrame: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: emptyFrame: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: bx lr +; ARM-ENABLE-LABEL: emptyFrame: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: emptyFrame: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: bx lr +; +; THUMB-ENABLE-LABEL: emptyFrame: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: emptyFrame: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: bx lr entry: ret i32 0 } ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: +; inlineAsm: ; -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, r7, lr} -; CHECK-NEXT: add r7, sp, #4 +; push {r4, r7, lr} +; add r7, sp, #4 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; -; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10 +; mov{{s?}} [[IV:r[0-9]+]], #10 ; ; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; ARM: subs [[IV]], [[IV]], #1 -; THUMB: subs [[IV]], #1 -; CHECK: add{{(\.w)?}} r4, r4, #1 -; CHECK: bne [[LOOP]] +; [[LOOP:LBB[0-9_]+]]: @ %for.body +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; add{{(\.w)?}} r4, r4, #1 +; bne [[LOOP]] ; ; Next BB. -; CHECK: mov{{s?}} r0, #0 +; mov{{s?}} r0, #0 ; ; Duplicated epilogue. -; DISABLE: pop {r4, r7, pc} +; pop {r4, r7, pc} ; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 -; DISABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} r0, r1, #1 +; pop {r4, r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @inlineAsm(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: inlineAsm: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB6_4 +; ARM-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB6_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: subs r0, r0, #1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: add r4, r4, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: bne LBB6_2 +; ARM-ENABLE-NEXT: @ %bb.3: @ %for.exit +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: LBB6_4: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: inlineAsm: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB6_4 +; ARM-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-DISABLE-NEXT: mov r0, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB6_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: subs r0, r0, #1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: add r4, r4, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: bne LBB6_2 +; ARM-DISABLE-NEXT: @ %bb.3: @ %for.exit +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB6_4: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: inlineAsm: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB6_4 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB6_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: subs r0, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: add.w r4, r4, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: bne LBB6_2 +; THUMB-ENABLE-NEXT: @ %bb.3: @ %for.exit +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: LBB6_4: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: inlineAsm: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbz r0, LBB6_4 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-DISABLE-NEXT: movs r0, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB6_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: subs r0, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: add.w r4, r4, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: bne LBB6_2 +; THUMB-DISABLE-NEXT: @ %bb.3: @ %for.exit +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB6_4: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -435,46 +1099,131 @@ } ; Check that we handle calls to variadic functions correctly. -; CHECK-LABEL: callVariadicFunc: +; callVariadicFunc: ; -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. -; CHECK: push {r7, lr} -; CHECK-NEXT: mov r7, sp -; CHECK-NEXT: sub sp, {{(sp, )?}}#12 +; push {r7, lr} +; mov r7, sp +; sub sp, {{(sp, )?}}#12 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE-NEXT: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Setup of the varags. -; CHECK: mov r0, r1 -; CHECK-NEXT: mov r2, r1 -; CHECK-NEXT: mov r3, r1 -; ARM-NEXT: str r1, [sp] -; ARM-NEXT: str r1, [sp, #4] -; THUMB-NEXT: strd r1, r1, [sp] -; CHECK-NEXT: str r1, [sp, #8] -; CHECK-NEXT: bl{{x?}} _someVariadicFunc -; CHECK-NEXT: lsl{{s?}} r0, r0, #3 -; ARM-NEXT: mov sp, r7 -; THUMB-NEXT: add sp, #12 -; CHECK-NEXT: pop {r7, pc} -; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; mov r0, r1 +; mov r2, r1 +; mov r3, r1 +; str r1, [sp] +; str r1, [sp, #4] +; strd r1, r1, [sp] +; str r1, [sp, #8] +; bl{{x?}} _someVariadicFunc +; lsl{{s?}} r0, r0, #3 +; mov sp, r7 +; add sp, #12 +; pop {r7, pc} +; +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 +; lsl{{s?}} r0, r1, #1 ; ; Epilogue code. -; ENABLE-NEXT: bx lr +; bx lr ; -; ARM-DISABLE-NEXT: mov sp, r7 -; THUMB-DISABLE-NEXT: add sp, #12 -; DISABLE-NEXT: pop {r7, pc} +; mov sp, r7 +; add sp, #12 +; pop {r7, pc} define i32 @callVariadicFunc(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: callVariadicFunc: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB7_2 +; ARM-ENABLE-NEXT: @ %bb.1: @ %if.then +; ARM-ENABLE-NEXT: push {r7, lr} +; ARM-ENABLE-NEXT: mov r7, sp +; ARM-ENABLE-NEXT: sub sp, sp, #12 +; ARM-ENABLE-NEXT: mov r0, r1 +; ARM-ENABLE-NEXT: mov r2, r1 +; ARM-ENABLE-NEXT: mov r3, r1 +; ARM-ENABLE-NEXT: str r1, [sp] +; ARM-ENABLE-NEXT: str r1, [sp, #4] +; ARM-ENABLE-NEXT: str r1, [sp, #8] +; ARM-ENABLE-NEXT: bl _someVariadicFunc +; ARM-ENABLE-NEXT: lsl r0, r0, #3 +; ARM-ENABLE-NEXT: mov sp, r7 +; ARM-ENABLE-NEXT: pop {r7, pc} +; ARM-ENABLE-NEXT: LBB7_2: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: callVariadicFunc: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r7, lr} +; ARM-DISABLE-NEXT: mov r7, sp +; ARM-DISABLE-NEXT: sub sp, sp, #12 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB7_2 +; ARM-DISABLE-NEXT: @ %bb.1: @ %if.then +; ARM-DISABLE-NEXT: mov r0, r1 +; ARM-DISABLE-NEXT: mov r2, r1 +; ARM-DISABLE-NEXT: mov r3, r1 +; ARM-DISABLE-NEXT: str r1, [sp] +; ARM-DISABLE-NEXT: str r1, [sp, #4] +; ARM-DISABLE-NEXT: str r1, [sp, #8] +; ARM-DISABLE-NEXT: bl _someVariadicFunc +; ARM-DISABLE-NEXT: lsl r0, r0, #3 +; ARM-DISABLE-NEXT: mov sp, r7 +; ARM-DISABLE-NEXT: pop {r7, pc} +; ARM-DISABLE-NEXT: LBB7_2: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: mov sp, r7 +; ARM-DISABLE-NEXT: pop {r7, pc} +; +; THUMB-ENABLE-LABEL: callVariadicFunc: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB7_2 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-ENABLE-NEXT: push {r7, lr} +; THUMB-ENABLE-NEXT: mov r7, sp +; THUMB-ENABLE-NEXT: sub sp, #12 +; THUMB-ENABLE-NEXT: mov r0, r1 +; THUMB-ENABLE-NEXT: mov r2, r1 +; THUMB-ENABLE-NEXT: mov r3, r1 +; THUMB-ENABLE-NEXT: strd r1, r1, [sp] +; THUMB-ENABLE-NEXT: str r1, [sp, #8] +; THUMB-ENABLE-NEXT: bl _someVariadicFunc +; THUMB-ENABLE-NEXT: lsls r0, r0, #3 +; THUMB-ENABLE-NEXT: add sp, #12 +; THUMB-ENABLE-NEXT: pop {r7, pc} +; THUMB-ENABLE-NEXT: LBB7_2: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: callVariadicFunc: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r7, lr} +; THUMB-DISABLE-NEXT: mov r7, sp +; THUMB-DISABLE-NEXT: sub sp, #12 +; THUMB-DISABLE-NEXT: cbz r0, LBB7_2 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-DISABLE-NEXT: mov r0, r1 +; THUMB-DISABLE-NEXT: mov r2, r1 +; THUMB-DISABLE-NEXT: mov r3, r1 +; THUMB-DISABLE-NEXT: strd r1, r1, [sp] +; THUMB-DISABLE-NEXT: str r1, [sp, #8] +; THUMB-DISABLE-NEXT: bl _someVariadicFunc +; THUMB-DISABLE-NEXT: lsls r0, r0, #3 +; THUMB-DISABLE-NEXT: add sp, #12 +; THUMB-DISABLE-NEXT: pop {r7, pc} +; THUMB-DISABLE-NEXT: LBB7_2: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: add sp, #12 +; THUMB-DISABLE-NEXT: pop {r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -499,29 +1248,86 @@ ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. ; -; CHECK-LABEL: noreturn: -; DISABLE: push -; ARM-ENABLE: cmp r0, #0 -; ARM-DISABLE: cmp r0, #0 -; ARM-ENABLE: bne [[ABORT:LBB[0-9_]+]] -; ARM-DISABLE: bne [[ABORT:LBB[0-9_]+]] -; THUMB-ENABLE: cbnz r0, [[ABORT:LBB[0-9_]+]] -; THUMB-DISABLE: cbnz r0, [[ABORT:LBB[0-9_]+]] +; noreturn: +; push +; cmp r0, #0 +; cmp r0, #0 +; bne [[ABORT:LBB[0-9_]+]] +; bne [[ABORT:LBB[0-9_]+]] +; cbnz r0, [[ABORT:LBB[0-9_]+]] +; cbnz r0, [[ABORT:LBB[0-9_]+]] ; -; CHECK: mov{{s?}} r0, #42 +; mov{{s?}} r0, #42 ; -; ENABLE-NEXT: bx lr +; bx lr ; -; DISABLE-NEXT: pop +; pop ;; -; CHECK: [[ABORT]]: @ %if.abort +; [[ABORT]]: @ %if.abort ; -; ENABLE: push +; push ; -; CHECK: bl{{x?}} _abort -; ENABLE-NOT: pop +; bl{{x?}} _abort +; pop define i32 @noreturn(i8 signext %bad_thing) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: noreturn: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: bne LBB8_2 +; ARM-ENABLE-NEXT: @ %bb.1: @ %if.end +; ARM-ENABLE-NEXT: mov r0, #42 +; ARM-ENABLE-NEXT: bx lr +; ARM-ENABLE-NEXT: LBB8_2: @ %if.abort +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r0, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: bl _abort +; +; ARM-DISABLE-LABEL: noreturn: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: bne LBB8_2 +; ARM-DISABLE-NEXT: @ %bb.1: @ %if.end +; ARM-DISABLE-NEXT: mov r0, #42 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB8_2: @ %if.abort +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r0, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: bl _abort +; +; THUMB-ENABLE-LABEL: noreturn: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbnz r0, LBB8_2 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.end +; THUMB-ENABLE-NEXT: movs r0, #42 +; THUMB-ENABLE-NEXT: bx lr +; THUMB-ENABLE-NEXT: LBB8_2: @ %if.abort +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r0, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: bl _abort +; +; THUMB-DISABLE-LABEL: noreturn: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbnz r0, LBB8_2 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %if.end +; THUMB-DISABLE-NEXT: movs r0, #42 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB8_2: @ %if.abort +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r0, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: bl _abort entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort @@ -546,9 +1352,142 @@ ; dominator is itself. In this case, we cannot perform shrink wrapping, but we ; should return gracefully and continue compilation. ; The only condition for this test is the compilation finishes correctly. -; CHECK-LABEL: infiniteloop -; CHECK: pop +; infiniteloop +; pop define void @infiniteloop() "no-frame-pointer-elim"="true" { +; ARM-LABEL: infiniteloop: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: push {r4, r5, r7, lr} +; ARM-NEXT: add r7, sp, #8 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: bne LBB9_3 +; ARM-NEXT: @ %bb.1: @ %if.then +; ARM-NEXT: sub r1, sp, #16 +; ARM-NEXT: mov sp, r1 +; ARM-NEXT: LBB9_2: @ %for.body +; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-NEXT: @ InlineAsm Start +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: @ InlineAsm End +; ARM-NEXT: add r0, r2, r0 +; ARM-NEXT: str r0, [r1] +; ARM-NEXT: b LBB9_2 +; ARM-NEXT: LBB9_3: @ %if.end +; ARM-NEXT: sub sp, r7, #8 +; ARM-NEXT: pop {r4, r5, r7, pc} +; +; THUMB-LABEL: infiniteloop: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r4, r5, r7, lr} +; THUMB-NEXT: add r7, sp, #8 +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: cbnz r0, LBB9_3 +; THUMB-NEXT: @ %bb.1: @ %if.then +; THUMB-NEXT: sub.w r0, sp, #16 +; THUMB-NEXT: mov sp, r0 +; THUMB-NEXT: movs r1, #0 +; THUMB-NEXT: LBB9_2: @ %for.body +; THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-NEXT: @ InlineAsm Start +; THUMB-NEXT: mov.w r2, #1 +; THUMB-NEXT: @ InlineAsm End +; THUMB-NEXT: add r1, r2 +; THUMB-NEXT: str r1, [r0] +; THUMB-NEXT: b LBB9_2 +; THUMB-NEXT: LBB9_3: @ %if.end +; THUMB-NEXT: sub.w r4, r7, #8 +; THUMB-NEXT: mov sp, r4 +; THUMB-NEXT: pop {r4, r5, r7, pc} +; ARM-ENABLE-LABEL: infiniteloop: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: push {r4, r5, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #8 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: bne LBB9_3 +; ARM-ENABLE-NEXT: @ %bb.1: @ %if.then +; ARM-ENABLE-NEXT: sub r1, sp, #16 +; ARM-ENABLE-NEXT: mov sp, r1 +; ARM-ENABLE-NEXT: LBB9_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: str r0, [r1] +; ARM-ENABLE-NEXT: b LBB9_2 +; ARM-ENABLE-NEXT: LBB9_3: @ %if.end +; ARM-ENABLE-NEXT: sub sp, r7, #8 +; ARM-ENABLE-NEXT: pop {r4, r5, r7, pc} +; +; ARM-DISABLE-LABEL: infiniteloop: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r5, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #8 +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: bne LBB9_3 +; ARM-DISABLE-NEXT: @ %bb.1: @ %if.then +; ARM-DISABLE-NEXT: sub r1, sp, #16 +; ARM-DISABLE-NEXT: mov sp, r1 +; ARM-DISABLE-NEXT: LBB9_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: str r0, [r1] +; ARM-DISABLE-NEXT: b LBB9_2 +; ARM-DISABLE-NEXT: LBB9_3: @ %if.end +; ARM-DISABLE-NEXT: sub sp, r7, #8 +; ARM-DISABLE-NEXT: pop {r4, r5, r7, pc} +; +; THUMB-ENABLE-LABEL: infiniteloop: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: push {r4, r5, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #8 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: cbnz r0, LBB9_3 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-ENABLE-NEXT: sub.w r0, sp, #16 +; THUMB-ENABLE-NEXT: mov sp, r0 +; THUMB-ENABLE-NEXT: movs r1, #0 +; THUMB-ENABLE-NEXT: LBB9_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r1, r2 +; THUMB-ENABLE-NEXT: str r1, [r0] +; THUMB-ENABLE-NEXT: b LBB9_2 +; THUMB-ENABLE-NEXT: LBB9_3: @ %if.end +; THUMB-ENABLE-NEXT: sub.w r4, r7, #8 +; THUMB-ENABLE-NEXT: mov sp, r4 +; THUMB-ENABLE-NEXT: pop {r4, r5, r7, pc} +; +; THUMB-DISABLE-LABEL: infiniteloop: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r5, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #8 +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: cbnz r0, LBB9_3 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-DISABLE-NEXT: sub.w r0, sp, #16 +; THUMB-DISABLE-NEXT: mov sp, r0 +; THUMB-DISABLE-NEXT: movs r1, #0 +; THUMB-DISABLE-NEXT: LBB9_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r1, r2 +; THUMB-DISABLE-NEXT: str r1, [r0] +; THUMB-DISABLE-NEXT: b LBB9_2 +; THUMB-DISABLE-NEXT: LBB9_3: @ %if.end +; THUMB-DISABLE-NEXT: sub.w r4, r7, #8 +; THUMB-DISABLE-NEXT: mov sp, r4 +; THUMB-DISABLE-NEXT: pop {r4, r5, r7, pc} entry: br i1 undef, label %if.then, label %if.end @@ -568,8 +1507,8 @@ } ; Another infinite loop test this time with a body bigger than just one block. -; CHECK-LABEL: infiniteloop2 -; CHECK: pop +; infiniteloop2 +; pop define void @infiniteloop2() "no-frame-pointer-elim"="true" { entry: br i1 undef, label %if.then, label %if.end @@ -598,9 +1537,169 @@ } ; Another infinite loop test this time with two nested infinite loop. -; CHECK-LABEL: infiniteloop3 -; CHECK: bx lr +; infiniteloop3 +; bx lr define void @infiniteloop3() "no-frame-pointer-elim"="true" { +; ARM-LABEL: infiniteloop3: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: bne LBB11_5 +; ARM-NEXT: @ %bb.1: @ %loop2a.preheader +; ARM-NEXT: mov r1, #0 +; ARM-NEXT: mov r2, r0 +; ARM-NEXT: b LBB11_3 +; ARM-NEXT: LBB11_2: @ %loop2b +; ARM-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; ARM-NEXT: str r1, [r2] +; ARM-NEXT: mov r2, r1 +; ARM-NEXT: mov r1, r3 +; ARM-NEXT: LBB11_3: @ %loop1 +; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-NEXT: ldr r3, [r0] +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: bne LBB11_2 +; ARM-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; ARM-NEXT: mov r0, r1 +; ARM-NEXT: mov r1, r3 +; ARM-NEXT: mov r2, r0 +; ARM-NEXT: b LBB11_3 +; ARM-NEXT: LBB11_5: @ %end +; ARM-NEXT: bx lr +; +; THUMB-LABEL: infiniteloop3: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: cbnz r0, LBB11_5 +; THUMB-NEXT: @ %bb.1: @ %loop2a.preheader +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: movs r1, #0 +; THUMB-NEXT: mov r2, r0 +; THUMB-NEXT: b LBB11_3 +; THUMB-NEXT: LBB11_2: @ %loop2b +; THUMB-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-NEXT: str r1, [r2] +; THUMB-NEXT: mov r2, r1 +; THUMB-NEXT: mov r1, r3 +; THUMB-NEXT: LBB11_3: @ %loop1 +; THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-NEXT: ldr r3, [r0] +; THUMB-NEXT: cmp r0, #0 +; THUMB-NEXT: bne LBB11_2 +; THUMB-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-NEXT: mov r0, r1 +; THUMB-NEXT: mov r1, r3 +; THUMB-NEXT: mov r2, r0 +; THUMB-NEXT: b LBB11_3 +; THUMB-NEXT: LBB11_5: @ %end +; THUMB-NEXT: bx lr +; ARM-ENABLE-LABEL: infiniteloop3: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: bne LBB11_5 +; ARM-ENABLE-NEXT: @ %bb.1: @ %loop2a.preheader +; ARM-ENABLE-NEXT: mov r1, #0 +; ARM-ENABLE-NEXT: mov r2, r0 +; ARM-ENABLE-NEXT: b LBB11_3 +; ARM-ENABLE-NEXT: LBB11_2: @ %loop2b +; ARM-ENABLE-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; ARM-ENABLE-NEXT: str r1, [r2] +; ARM-ENABLE-NEXT: mov r2, r1 +; ARM-ENABLE-NEXT: mov r1, r3 +; ARM-ENABLE-NEXT: LBB11_3: @ %loop1 +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: ldr r3, [r0] +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: bne LBB11_2 +; ARM-ENABLE-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; ARM-ENABLE-NEXT: mov r0, r1 +; ARM-ENABLE-NEXT: mov r1, r3 +; ARM-ENABLE-NEXT: mov r2, r0 +; ARM-ENABLE-NEXT: b LBB11_3 +; ARM-ENABLE-NEXT: LBB11_5: @ %end +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: infiniteloop3: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: bne LBB11_5 +; ARM-DISABLE-NEXT: @ %bb.1: @ %loop2a.preheader +; ARM-DISABLE-NEXT: mov r1, #0 +; ARM-DISABLE-NEXT: mov r2, r0 +; ARM-DISABLE-NEXT: b LBB11_3 +; ARM-DISABLE-NEXT: LBB11_2: @ %loop2b +; ARM-DISABLE-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; ARM-DISABLE-NEXT: str r1, [r2] +; ARM-DISABLE-NEXT: mov r2, r1 +; ARM-DISABLE-NEXT: mov r1, r3 +; ARM-DISABLE-NEXT: LBB11_3: @ %loop1 +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: ldr r3, [r0] +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: bne LBB11_2 +; ARM-DISABLE-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; ARM-DISABLE-NEXT: mov r0, r1 +; ARM-DISABLE-NEXT: mov r1, r3 +; ARM-DISABLE-NEXT: mov r2, r0 +; ARM-DISABLE-NEXT: b LBB11_3 +; ARM-DISABLE-NEXT: LBB11_5: @ %end +; ARM-DISABLE-NEXT: bx lr +; +; THUMB-ENABLE-LABEL: infiniteloop3: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: cbnz r0, LBB11_5 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %loop2a.preheader +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #0 +; THUMB-ENABLE-NEXT: mov r2, r0 +; THUMB-ENABLE-NEXT: b LBB11_3 +; THUMB-ENABLE-NEXT: LBB11_2: @ %loop2b +; THUMB-ENABLE-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-ENABLE-NEXT: str r1, [r2] +; THUMB-ENABLE-NEXT: mov r2, r1 +; THUMB-ENABLE-NEXT: mov r1, r3 +; THUMB-ENABLE-NEXT: LBB11_3: @ %loop1 +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: ldr r3, [r0] +; THUMB-ENABLE-NEXT: cmp r0, #0 +; THUMB-ENABLE-NEXT: bne LBB11_2 +; THUMB-ENABLE-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-ENABLE-NEXT: mov r0, r1 +; THUMB-ENABLE-NEXT: mov r1, r3 +; THUMB-ENABLE-NEXT: mov r2, r0 +; THUMB-ENABLE-NEXT: b LBB11_3 +; THUMB-ENABLE-NEXT: LBB11_5: @ %end +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: infiniteloop3: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: cbnz r0, LBB11_5 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %loop2a.preheader +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #0 +; THUMB-DISABLE-NEXT: mov r2, r0 +; THUMB-DISABLE-NEXT: b LBB11_3 +; THUMB-DISABLE-NEXT: LBB11_2: @ %loop2b +; THUMB-DISABLE-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-DISABLE-NEXT: str r1, [r2] +; THUMB-DISABLE-NEXT: mov r2, r1 +; THUMB-DISABLE-NEXT: mov r1, r3 +; THUMB-DISABLE-NEXT: LBB11_3: @ %loop1 +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: ldr r3, [r0] +; THUMB-DISABLE-NEXT: cmp r0, #0 +; THUMB-DISABLE-NEXT: bne LBB11_2 +; THUMB-DISABLE-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-DISABLE-NEXT: mov r0, r1 +; THUMB-DISABLE-NEXT: mov r1, r3 +; THUMB-DISABLE-NEXT: mov r2, r0 +; THUMB-DISABLE-NEXT: b LBB11_3 +; THUMB-DISABLE-NEXT: LBB11_5: @ %end +; THUMB-DISABLE-NEXT: bx lr entry: br i1 undef, label %loop2a, label %body @@ -636,31 +1735,226 @@ ; exercise the path where we were dereferencing the end iterator ; to access debug info location while inserting the spill code ; during PEI with shrink-wrapping enable. -; CHECK-LABEL: debug_info: +; debug_info: ; -; ENABLE: {{tst r2, #1|lsls r1, r2, #31}} -; ENABLE-NEXT: beq [[BB13:LBB[0-9_]+]] +; {{tst r2, #1|lsls r1, r2, #31}} +; beq [[BB13:LBB[0-9_]+]] ; -; CHECK: push +; push ; -; DISABLE: {{tst r2, #1|lsls r1, r2, #31}} -; DISABLE: beq [[BB13:LBB[0-9_]+]] +; {{tst r2, #1|lsls r1, r2, #31}} +; beq [[BB13:LBB[0-9_]+]] ; -; CHECK: bl{{x?}} _pow +; bl{{x?}} _pow ; ; -; ENABLE: pop +; pop ; -; CHECK: [[BB13]]: -; CHECK: vldr +; [[BB13]]: +; vldr ; -; DISABLE: pop +; pop ; ; FIXME: This is flakey passing by finding 'bl' somewhere amongst the debug ; info (like labels named 'line_table) not because it's found a bl instruction. ; -; CHECK: bl +; bl define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: debug_info: +; ARM-ENABLE: @ %bb.0: @ %bb +; ARM-ENABLE-NEXT: tst r2, #1 +; ARM-ENABLE-NEXT: beq LBB12_2 +; ARM-ENABLE-NEXT: @ %bb.1: @ %bb3 +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: sub r4, sp, #16 +; ARM-ENABLE-NEXT: bfc r4, #0, #4 +; ARM-ENABLE-NEXT: mov sp, r4 +; ARM-ENABLE-NEXT: ldr r1, [r7, #8] +; ARM-ENABLE-NEXT: mov r2, r3 +; ARM-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] +; ARM-ENABLE-NEXT: vmov s16, r0 +; ARM-ENABLE-NEXT: mov r0, r3 +; ARM-ENABLE-NEXT: vmov d9, r3, r1 +; ARM-ENABLE-NEXT: mov r3, r1 +; ARM-ENABLE-NEXT: bl _pow +; ARM-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00 +; ARM-ENABLE-NEXT: mov r4, sp +; ARM-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 +; ARM-ENABLE-NEXT: vadd.f64 d16, d9, d16 +; ARM-ENABLE-NEXT: vcmpe.f32 s16, s0 +; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr +; ARM-ENABLE-NEXT: vmov d17, r0, r1 +; ARM-ENABLE-NEXT: vmov.f64 d18, d9 +; ARM-ENABLE-NEXT: vadd.f64 d17, d17, d17 +; ARM-ENABLE-NEXT: vmovgt.f64 d18, d16 +; ARM-ENABLE-NEXT: vcmp.f64 d18, d9 +; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr +; ARM-ENABLE-NEXT: vmovne.f64 d9, d17 +; ARM-ENABLE-NEXT: vcvt.f32.f64 s0, d9 +; ARM-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] +; ARM-ENABLE-NEXT: sub sp, r7, #4 +; ARM-ENABLE-NEXT: pop {r4, r7, lr} +; ARM-ENABLE-NEXT: vmov r0, s0 +; ARM-ENABLE-NEXT: bx lr +; ARM-ENABLE-NEXT: LBB12_2: +; ARM-ENABLE-NEXT: vldr s0, LCPI12_0 +; ARM-ENABLE-NEXT: vmov r0, s0 +; ARM-ENABLE-NEXT: bx lr +; ARM-ENABLE-NEXT: .p2align 2 +; ARM-ENABLE-NEXT: @ %bb.3: +; ARM-ENABLE-NEXT: .data_region +; ARM-ENABLE-NEXT: LCPI12_0: +; ARM-ENABLE-NEXT: .long 0 @ float 0 +; ARM-ENABLE-NEXT: .end_data_region +; +; ARM-DISABLE-LABEL: debug_info: +; ARM-DISABLE: @ %bb.0: @ %bb +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: sub r4, sp, #16 +; ARM-DISABLE-NEXT: bfc r4, #0, #4 +; ARM-DISABLE-NEXT: mov sp, r4 +; ARM-DISABLE-NEXT: tst r2, #1 +; ARM-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128] +; ARM-DISABLE-NEXT: beq LBB12_2 +; ARM-DISABLE-NEXT: @ %bb.1: @ %bb3 +; ARM-DISABLE-NEXT: ldr r1, [r7, #8] +; ARM-DISABLE-NEXT: vmov s16, r0 +; ARM-DISABLE-NEXT: mov r0, r3 +; ARM-DISABLE-NEXT: mov r2, r3 +; ARM-DISABLE-NEXT: vmov d9, r3, r1 +; ARM-DISABLE-NEXT: mov r3, r1 +; ARM-DISABLE-NEXT: bl _pow +; ARM-DISABLE-NEXT: vmov.f32 s0, #1.000000e+00 +; ARM-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00 +; ARM-DISABLE-NEXT: vadd.f64 d16, d9, d16 +; ARM-DISABLE-NEXT: vcmpe.f32 s16, s0 +; ARM-DISABLE-NEXT: vmrs APSR_nzcv, fpscr +; ARM-DISABLE-NEXT: vmov d17, r0, r1 +; ARM-DISABLE-NEXT: vmov.f64 d18, d9 +; ARM-DISABLE-NEXT: vadd.f64 d17, d17, d17 +; ARM-DISABLE-NEXT: vmovgt.f64 d18, d16 +; ARM-DISABLE-NEXT: vcmp.f64 d18, d9 +; ARM-DISABLE-NEXT: vmrs APSR_nzcv, fpscr +; ARM-DISABLE-NEXT: vmovne.f64 d9, d17 +; ARM-DISABLE-NEXT: vcvt.f32.f64 s0, d9 +; ARM-DISABLE-NEXT: b LBB12_3 +; ARM-DISABLE-NEXT: LBB12_2: +; ARM-DISABLE-NEXT: vldr s0, LCPI12_0 +; ARM-DISABLE-NEXT: LBB12_3: @ %bb13 +; ARM-DISABLE-NEXT: mov r4, sp +; ARM-DISABLE-NEXT: vld1.64 {d8, d9}, [r4:128] +; ARM-DISABLE-NEXT: vmov r0, s0 +; ARM-DISABLE-NEXT: sub sp, r7, #4 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: .p2align 2 +; ARM-DISABLE-NEXT: @ %bb.4: +; ARM-DISABLE-NEXT: .data_region +; ARM-DISABLE-NEXT: LCPI12_0: +; ARM-DISABLE-NEXT: .long 0 @ float 0 +; ARM-DISABLE-NEXT: .end_data_region +; +; THUMB-ENABLE-LABEL: debug_info: +; THUMB-ENABLE: @ %bb.0: @ %bb +; THUMB-ENABLE-NEXT: lsls r1, r2, #31 +; THUMB-ENABLE-NEXT: beq LBB12_2 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %bb3 +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: sub.w r4, sp, #16 +; THUMB-ENABLE-NEXT: bfc r4, #0, #4 +; THUMB-ENABLE-NEXT: mov sp, r4 +; THUMB-ENABLE-NEXT: ldr r1, [r7, #8] +; THUMB-ENABLE-NEXT: mov r2, r3 +; THUMB-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] +; THUMB-ENABLE-NEXT: vmov s16, r0 +; THUMB-ENABLE-NEXT: mov r0, r3 +; THUMB-ENABLE-NEXT: vmov d9, r3, r1 +; THUMB-ENABLE-NEXT: mov r3, r1 +; THUMB-ENABLE-NEXT: bl _pow +; THUMB-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00 +; THUMB-ENABLE-NEXT: mov r4, sp +; THUMB-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 +; THUMB-ENABLE-NEXT: vmov.f64 d18, d9 +; THUMB-ENABLE-NEXT: vcmpe.f32 s16, s0 +; THUMB-ENABLE-NEXT: vadd.f64 d16, d9, d16 +; THUMB-ENABLE-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-ENABLE-NEXT: it gt +; THUMB-ENABLE-NEXT: vmovgt.f64 d18, d16 +; THUMB-ENABLE-NEXT: vcmp.f64 d18, d9 +; THUMB-ENABLE-NEXT: vmov d17, r0, r1 +; THUMB-ENABLE-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-ENABLE-NEXT: vadd.f64 d17, d17, d17 +; THUMB-ENABLE-NEXT: it ne +; THUMB-ENABLE-NEXT: vmovne.f64 d9, d17 +; THUMB-ENABLE-NEXT: vcvt.f32.f64 s0, d9 +; THUMB-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] +; THUMB-ENABLE-NEXT: subs r4, r7, #4 +; THUMB-ENABLE-NEXT: mov sp, r4 +; THUMB-ENABLE-NEXT: pop.w {r4, r7, lr} +; THUMB-ENABLE-NEXT: vmov r0, s0 +; THUMB-ENABLE-NEXT: bx lr +; THUMB-ENABLE-NEXT: LBB12_2: +; THUMB-ENABLE-NEXT: vldr s0, LCPI12_0 +; THUMB-ENABLE-NEXT: vmov r0, s0 +; THUMB-ENABLE-NEXT: bx lr +; THUMB-ENABLE-NEXT: .p2align 2 +; THUMB-ENABLE-NEXT: @ %bb.3: +; THUMB-ENABLE-NEXT: .data_region +; THUMB-ENABLE-NEXT: LCPI12_0: +; THUMB-ENABLE-NEXT: .long 0 @ float 0 +; THUMB-ENABLE-NEXT: .end_data_region +; +; THUMB-DISABLE-LABEL: debug_info: +; THUMB-DISABLE: @ %bb.0: @ %bb +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: sub.w r4, sp, #16 +; THUMB-DISABLE-NEXT: bfc r4, #0, #4 +; THUMB-DISABLE-NEXT: mov sp, r4 +; THUMB-DISABLE-NEXT: lsls r1, r2, #31 +; THUMB-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128] +; THUMB-DISABLE-NEXT: beq LBB12_2 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %bb3 +; THUMB-DISABLE-NEXT: ldr r1, [r7, #8] +; THUMB-DISABLE-NEXT: vmov s16, r0 +; THUMB-DISABLE-NEXT: mov r0, r3 +; THUMB-DISABLE-NEXT: mov r2, r3 +; THUMB-DISABLE-NEXT: vmov d9, r3, r1 +; THUMB-DISABLE-NEXT: mov r3, r1 +; THUMB-DISABLE-NEXT: bl _pow +; THUMB-DISABLE-NEXT: vmov.f32 s0, #1.000000e+00 +; THUMB-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00 +; THUMB-DISABLE-NEXT: vmov.f64 d18, d9 +; THUMB-DISABLE-NEXT: vcmpe.f32 s16, s0 +; THUMB-DISABLE-NEXT: vadd.f64 d16, d9, d16 +; THUMB-DISABLE-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-DISABLE-NEXT: it gt +; THUMB-DISABLE-NEXT: vmovgt.f64 d18, d16 +; THUMB-DISABLE-NEXT: vcmp.f64 d18, d9 +; THUMB-DISABLE-NEXT: vmov d17, r0, r1 +; THUMB-DISABLE-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-DISABLE-NEXT: vadd.f64 d17, d17, d17 +; THUMB-DISABLE-NEXT: it ne +; THUMB-DISABLE-NEXT: vmovne.f64 d9, d17 +; THUMB-DISABLE-NEXT: vcvt.f32.f64 s0, d9 +; THUMB-DISABLE-NEXT: b LBB12_3 +; THUMB-DISABLE-NEXT: LBB12_2: +; THUMB-DISABLE-NEXT: vldr s0, LCPI12_0 +; THUMB-DISABLE-NEXT: LBB12_3: @ %bb13 +; THUMB-DISABLE-NEXT: mov r4, sp +; THUMB-DISABLE-NEXT: vld1.64 {d8, d9}, [r4:128] +; THUMB-DISABLE-NEXT: subs r4, r7, #4 +; THUMB-DISABLE-NEXT: vmov r0, s0 +; THUMB-DISABLE-NEXT: mov sp, r4 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: .p2align 2 +; THUMB-DISABLE-NEXT: @ %bb.4: +; THUMB-DISABLE-NEXT: .data_region +; THUMB-DISABLE-NEXT: LCPI12_0: +; THUMB-DISABLE-NEXT: .long 0 @ float 0 +; THUMB-DISABLE-NEXT: .end_data_region bb: br i1 %or.cond, label %bb3, label %bb13 Index: test/CodeGen/Thumb/thumb-shrink-wrapping.ll =================================================================== --- test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -1,11 +1,8 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T -; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T -; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T -; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho | FileCheck %s --check-prefix=ENABLE-V4T +; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho | FileCheck %s --check-prefix=ENABLE-V5T +; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho | FileCheck %s --check-prefix=DISABLE-V4T +; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho | FileCheck %s --check-prefix=DISABLE-V5T ; ; Note: Lots of tests use inline asm instead of regular calls. @@ -20,49 +17,108 @@ ; results due to branches not being analyzable under v5 ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: -; -; Compare the arguments and jump to exit. -; No prologue needed. -; ENABLE: cmp r0, r1 -; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: push {r7, lr} -; CHECK: sub sp, #8 -; -; Compare the arguments and jump to exit. -; After the prologue is set. -; DISABLE: cmp r0, r1 -; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Store %a in the alloca. -; CHECK: str r0, [sp, #4] -; Set the alloca address in the second argument. -; Set the first argument to zero. -; CHECK: movs r0, #0 -; CHECK-NEXT: add r1, sp, #4 -; CHECK-NEXT: bl -; -; With shrink-wrapping, epilogue is just after the call. -; ENABLE-NEXT: add sp, #8 -; ENABLE-V5T-NEXT: pop {r7, pc} -; ENABLE-V4T-NEXT: pop {r7} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: mov lr, r1 -; -; CHECK: [[EXIT_LABEL]]: -; -; Without shrink-wrapping, epilogue is in the exit block. -; Epilogue code. (What we pop does not matter.) -; DISABLE: add sp, #8 -; DISABLE-V5T-NEXT: pop {r7, pc} -; DISABLE-V4T-NEXT: pop {r7} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-NEXT: bx lr define i32 @foo(i32 %a, i32 %b) { +; ENABLE-V4T-LABEL: foo: +; ENABLE-V4T: @ %bb.0: +; ENABLE-V4T-NEXT: cmp r0, r1 +; ENABLE-V4T-NEXT: bge LBB0_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %true +; ENABLE-V4T-NEXT: push {r7, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r7, -8 +; ENABLE-V4T-NEXT: sub sp, #8 +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-V4T-NEXT: str r0, [sp, #4] +; ENABLE-V4T-NEXT: ldr r0, LCPI0_0 +; ENABLE-V4T-NEXT: LPC0_0: +; ENABLE-V4T-NEXT: add r0, pc +; ENABLE-V4T-NEXT: ldr r2, [r0] +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: add r1, sp, #4 +; ENABLE-V4T-NEXT: bl Ltmp0 +; ENABLE-V4T-NEXT: add sp, #8 +; ENABLE-V4T-NEXT: pop {r7} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: mov lr, r1 +; ENABLE-V4T-NEXT: LBB0_2: @ %false +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI0_0: +; ENABLE-V4T-NEXT: .long L_doSomething$non_lazy_ptr-(LPC0_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: foo: +; ENABLE-V5T: @ %bb.0: +; ENABLE-V5T-NEXT: cmp r0, r1 +; ENABLE-V5T-NEXT: bge LBB0_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %true +; ENABLE-V5T-NEXT: push {r7, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r7, -8 +; ENABLE-V5T-NEXT: sub sp, #8 +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-V5T-NEXT: str r0, [sp, #4] +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: add r1, sp, #4 +; ENABLE-V5T-NEXT: bl _doSomething +; ENABLE-V5T-NEXT: add sp, #8 +; ENABLE-V5T-NEXT: pop {r7, pc} +; ENABLE-V5T-NEXT: LBB0_2: @ %false +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: foo: +; DISABLE-V4T: @ %bb.0: +; DISABLE-V4T-NEXT: push {r7, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r7, -8 +; DISABLE-V4T-NEXT: sub sp, #8 +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-V4T-NEXT: cmp r0, r1 +; DISABLE-V4T-NEXT: bge LBB0_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %true +; DISABLE-V4T-NEXT: str r0, [sp, #4] +; DISABLE-V4T-NEXT: ldr r0, LCPI0_0 +; DISABLE-V4T-NEXT: LPC0_0: +; DISABLE-V4T-NEXT: add r0, pc +; DISABLE-V4T-NEXT: ldr r2, [r0] +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: add r1, sp, #4 +; DISABLE-V4T-NEXT: bl Ltmp0 +; DISABLE-V4T-NEXT: LBB0_2: @ %false +; DISABLE-V4T-NEXT: add sp, #8 +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.3: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI0_0: +; DISABLE-V4T-NEXT: .long L_doSomething$non_lazy_ptr-(LPC0_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: foo: +; DISABLE-V5T: @ %bb.0: +; DISABLE-V5T-NEXT: push {r7, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r7, -8 +; DISABLE-V5T-NEXT: sub sp, #8 +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-V5T-NEXT: cmp r0, r1 +; DISABLE-V5T-NEXT: bge LBB0_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %true +; DISABLE-V5T-NEXT: str r0, [sp, #4] +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: add r1, sp, #4 +; DISABLE-V5T-NEXT: bl _doSomething +; DISABLE-V5T-NEXT: LBB0_2: @ %false +; DISABLE-V5T-NEXT: add sp, #8 +; DISABLE-V5T-NEXT: pop {r7, pc} %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false @@ -79,27 +135,114 @@ ; Same, but the final BB is non-trivial, so we don't duplicate the return inst. -; CHECK-LABEL: bar: -; -; With shrink-wrapping, epilogue is just after the call. -; CHECK: bl -; ENABLE-NEXT: add sp, #8 -; ENABLE-NEXT: pop {r7} -; ENABLE-NEXT: pop {r0} -; ENABLE-NEXT: mov lr, r0 -; -; CHECK: movs r0, #42 -; -; Without shrink-wrapping, epilogue is in the exit block. -; Epilogue code. (What we pop does not matter.) -; DISABLE: add sp, #8 -; DISABLE-V5T-NEXT: pop {r7, pc} -; DISABLE-V4T-NEXT: pop {r7} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-NEXT: bx lr define i32 @bar(i32 %a, i32 %b) { +; ENABLE-V4T-LABEL: bar: +; ENABLE-V4T: @ %bb.0: +; ENABLE-V4T-NEXT: cmp r0, r1 +; ENABLE-V4T-NEXT: bge LBB1_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %true +; ENABLE-V4T-NEXT: push {r7, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r7, -8 +; ENABLE-V4T-NEXT: sub sp, #8 +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-V4T-NEXT: str r0, [sp, #4] +; ENABLE-V4T-NEXT: ldr r0, LCPI1_0 +; ENABLE-V4T-NEXT: LPC1_0: +; ENABLE-V4T-NEXT: add r0, pc +; ENABLE-V4T-NEXT: ldr r2, [r0] +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: add r1, sp, #4 +; ENABLE-V4T-NEXT: bl Ltmp1 +; ENABLE-V4T-NEXT: add sp, #8 +; ENABLE-V4T-NEXT: pop {r7} +; ENABLE-V4T-NEXT: pop {r0} +; ENABLE-V4T-NEXT: mov lr, r0 +; ENABLE-V4T-NEXT: LBB1_2: @ %false +; ENABLE-V4T-NEXT: movs r0, #42 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI1_0: +; ENABLE-V4T-NEXT: .long L_doSomething$non_lazy_ptr-(LPC1_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: bar: +; ENABLE-V5T: @ %bb.0: +; ENABLE-V5T-NEXT: cmp r0, r1 +; ENABLE-V5T-NEXT: bge LBB1_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %true +; ENABLE-V5T-NEXT: push {r7, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r7, -8 +; ENABLE-V5T-NEXT: sub sp, #8 +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-V5T-NEXT: str r0, [sp, #4] +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: add r1, sp, #4 +; ENABLE-V5T-NEXT: bl _doSomething +; ENABLE-V5T-NEXT: add sp, #8 +; ENABLE-V5T-NEXT: pop {r7} +; ENABLE-V5T-NEXT: pop {r0} +; ENABLE-V5T-NEXT: mov lr, r0 +; ENABLE-V5T-NEXT: LBB1_2: @ %false +; ENABLE-V5T-NEXT: movs r0, #42 +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: bar: +; DISABLE-V4T: @ %bb.0: +; DISABLE-V4T-NEXT: push {r7, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r7, -8 +; DISABLE-V4T-NEXT: sub sp, #8 +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-V4T-NEXT: cmp r0, r1 +; DISABLE-V4T-NEXT: bge LBB1_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %true +; DISABLE-V4T-NEXT: str r0, [sp, #4] +; DISABLE-V4T-NEXT: ldr r0, LCPI1_0 +; DISABLE-V4T-NEXT: LPC1_0: +; DISABLE-V4T-NEXT: add r0, pc +; DISABLE-V4T-NEXT: ldr r2, [r0] +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: add r1, sp, #4 +; DISABLE-V4T-NEXT: bl Ltmp1 +; DISABLE-V4T-NEXT: LBB1_2: @ %false +; DISABLE-V4T-NEXT: movs r0, #42 +; DISABLE-V4T-NEXT: add sp, #8 +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.3: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI1_0: +; DISABLE-V4T-NEXT: .long L_doSomething$non_lazy_ptr-(LPC1_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: bar: +; DISABLE-V5T: @ %bb.0: +; DISABLE-V5T-NEXT: push {r7, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r7, -8 +; DISABLE-V5T-NEXT: sub sp, #8 +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-V5T-NEXT: cmp r0, r1 +; DISABLE-V5T-NEXT: bge LBB1_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %true +; DISABLE-V5T-NEXT: str r0, [sp, #4] +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: add r1, sp, #4 +; DISABLE-V5T-NEXT: bl _doSomething +; DISABLE-V5T-NEXT: LBB1_2: @ %false +; DISABLE-V5T-NEXT: movs r0, #42 +; DISABLE-V5T-NEXT: add sp, #8 +; DISABLE-V5T-NEXT: pop {r7, pc} %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false @@ -119,51 +262,128 @@ ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: -; -; Shrink-wrapping allows to skip the prologue in the else case. -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, lr} -; -; DISABLE: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; SUM is in r0 because it is coalesced with the second -; argument on the else path. -; CHECK: movs [[SUM:r0]], #0 -; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: adds [[SUM]], [[TMP]], [[SUM]] -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] -; -; Next BB. -; SUM << 3. -; CHECK: lsls [[SUM]], [[SUM]], #3 -; -; Duplicated epilogue. -; DISABLE-V5T: pop {r4, pc} -; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB2_4 +; ENABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: movs r1, #10 +; ENABLE-V4T-NEXT: LBB2_2: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r2, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: adds r0, r2, r0 +; ENABLE-V4T-NEXT: subs r1, r1, #1 +; ENABLE-V4T-NEXT: bne LBB2_2 +; ENABLE-V4T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V4T-NEXT: lsls r0, r0, #3 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB2_4: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB2_4 +; ENABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: movs r1, #10 +; ENABLE-V5T-NEXT: LBB2_2: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r2, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: adds r0, r2, r0 +; ENABLE-V5T-NEXT: subs r1, r1, #1 +; ENABLE-V5T-NEXT: bne LBB2_2 +; ENABLE-V5T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V5T-NEXT: lsls r0, r0, #3 +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB2_4: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB2_5: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB2_4 +; DISABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: movs r1, #10 +; DISABLE-V4T-NEXT: LBB2_2: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r2, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: adds r0, r2, r0 +; DISABLE-V4T-NEXT: subs r1, r1, #1 +; DISABLE-V4T-NEXT: bne LBB2_2 +; DISABLE-V4T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V4T-NEXT: lsls r0, r0, #3 +; DISABLE-V4T-NEXT: b LBB2_5 +; DISABLE-V4T-NEXT: LBB2_4: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB2_5: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB2_4 +; DISABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: movs r1, #10 +; DISABLE-V5T-NEXT: LBB2_2: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r2, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: adds r0, r2, r0 +; DISABLE-V5T-NEXT: subs r1, r1, #1 +; DISABLE-V5T-NEXT: bne LBB2_2 +; DISABLE-V5T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V5T-NEXT: lsls r0, r0, #3 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB2_4: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r4, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -198,26 +418,112 @@ ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4 -; This is the nop. -; CHECK: mov r8, r8 -; CHECK: movs [[SUM:r0]], #0 -; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 -; Next BB. -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: adds [[SUM]], [[TMP]], [[SUM]] -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP_LABEL]] -; Next BB. -; CHECK: @ %for.exit -; This is the nop. -; CHECK: mov r8, r8 -; CHECK: pop {r4 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +; ENABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop2: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: movs r1, #10 +; ENABLE-V4T-NEXT: LBB3_1: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r2, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: adds r0, r2, r0 +; ENABLE-V4T-NEXT: subs r1, r1, #1 +; ENABLE-V4T-NEXT: bne LBB3_1 +; ENABLE-V4T-NEXT: @ %bb.2: @ %for.exit +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; +; ENABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop2: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: movs r1, #10 +; ENABLE-V5T-NEXT: LBB3_1: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r2, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: adds r0, r2, r0 +; ENABLE-V5T-NEXT: subs r1, r1, #1 +; ENABLE-V5T-NEXT: bne LBB3_1 +; ENABLE-V5T-NEXT: @ %bb.2: @ %for.exit +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB3_3: @ %for.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop2: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: movs r1, #10 +; DISABLE-V4T-NEXT: LBB3_1: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r2, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: adds r0, r2, r0 +; DISABLE-V4T-NEXT: subs r1, r1, #1 +; DISABLE-V4T-NEXT: bne LBB3_1 +; DISABLE-V4T-NEXT: @ %bb.2: @ %for.exit +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop2: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: movs r1, #10 +; DISABLE-V5T-NEXT: LBB3_1: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r2, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: adds r0, r2, r0 +; DISABLE-V5T-NEXT: subs r1, r1, #1 +; DISABLE-V5T-NEXT: bne LBB3_1 +; DISABLE-V5T-NEXT: @ %bb.2: @ %for.exit +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: pop {r4, pc} entry: br label %for.preheader @@ -244,54 +550,140 @@ ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: -; -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, lr} -; -; DISABLE: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; SUM is in r0 because it is coalesced with the second -; argument on the else path. -; CHECK: movs [[SUM:r0]], #0 -; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: adds [[SUM]], [[TMP]], [[SUM]] -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] -; -; Next BB. -; SUM << 3. -; CHECK: lsls [[SUM]], [[SUM]], #3 -; ENABLE-V5T-NEXT: pop {r4, pc} -; ENABLE-V4T-NEXT: pop {r4} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: bx r1 -; -; Duplicated epilogue. -; DISABLE-V5T: pop {r4, pc} -; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-V4T-LABEL: loopInfoSaveOutsideLoop: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB4_4 +; ENABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: movs r1, #10 +; ENABLE-V4T-NEXT: LBB4_2: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r2, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: adds r0, r2, r0 +; ENABLE-V4T-NEXT: subs r1, r1, #1 +; ENABLE-V4T-NEXT: bne LBB4_2 +; ENABLE-V4T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: lsls r0, r0, #3 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB4_4: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: loopInfoSaveOutsideLoop: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB4_4 +; ENABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: movs r1, #10 +; ENABLE-V5T-NEXT: LBB4_2: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r2, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: adds r0, r2, r0 +; ENABLE-V5T-NEXT: subs r1, r1, #1 +; ENABLE-V5T-NEXT: bne LBB4_2 +; ENABLE-V5T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: lsls r0, r0, #3 +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB4_4: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB4_5: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: loopInfoSaveOutsideLoop: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB4_4 +; DISABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: movs r1, #10 +; DISABLE-V4T-NEXT: LBB4_2: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r2, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: adds r0, r2, r0 +; DISABLE-V4T-NEXT: subs r1, r1, #1 +; DISABLE-V4T-NEXT: bne LBB4_2 +; DISABLE-V4T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: lsls r0, r0, #3 +; DISABLE-V4T-NEXT: b LBB4_5 +; DISABLE-V4T-NEXT: LBB4_4: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB4_5: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: loopInfoSaveOutsideLoop: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB4_4 +; DISABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: movs r1, #10 +; DISABLE-V5T-NEXT: LBB4_2: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r2, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: adds r0, r2, r0 +; DISABLE-V5T-NEXT: subs r1, r1, #1 +; DISABLE-V5T-NEXT: bne LBB4_2 +; DISABLE-V5T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: lsls r0, r0, #3 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB4_4: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r4, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -327,54 +719,156 @@ ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: -; -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, lr} -; -; DISABLE-NEXT: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; SUM is in r0 because it is coalesced with the second -; argument on the else path. -; CHECK: movs [[SUM:r0]], #0 -; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: adds [[SUM]], [[TMP]], [[SUM]] -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] -; -; Next BB. -; SUM << 3. -; CHECK: lsls [[SUM]], [[SUM]], #3 -; ENABLE-V5T-NEXT: pop {r4, pc} -; ENABLE-V4T-NEXT: pop {r4} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: bx r1 -; -; Duplicated epilogue. -; DISABLE-V5T: pop {r4, pc} -; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { +; ENABLE-V4T-LABEL: loopInfoRestoreOutsideLoop: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB5_4 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: movs r1, #10 +; ENABLE-V4T-NEXT: LBB5_2: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r2, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: adds r0, r2, r0 +; ENABLE-V4T-NEXT: subs r1, r1, #1 +; ENABLE-V4T-NEXT: bne LBB5_2 +; ENABLE-V4T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V4T-NEXT: lsls r0, r0, #3 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB5_4: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: @ -- End function +; ENABLE-V4T-NEXT: .globl _emptyFrame @ -- Begin function emptyFrame +; ENABLE-V4T-NEXT: .p2align 1 +; ENABLE-V4T-NEXT: .code 16 @ @emptyFrame +; ENABLE-V4T-NEXT: .thumb_func _emptyFrame +; ENABLE-V4T-NEXT: _emptyFrame: +; ENABLE-V4T-NEXT: .cfi_startproc +; ENABLE-V4T-NEXT: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: loopInfoRestoreOutsideLoop: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB5_4 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: movs r1, #10 +; ENABLE-V5T-NEXT: LBB5_2: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r2, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: adds r0, r2, r0 +; ENABLE-V5T-NEXT: subs r1, r1, #1 +; ENABLE-V5T-NEXT: bne LBB5_2 +; ENABLE-V5T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V5T-NEXT: lsls r0, r0, #3 +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB5_4: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB5_5: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; ENABLE-V5T-NEXT: @ -- End function +; ENABLE-V5T-NEXT: .globl _emptyFrame @ -- Begin function emptyFrame +; ENABLE-V5T-NEXT: .p2align 1 +; ENABLE-V5T-NEXT: .code 16 @ @emptyFrame +; ENABLE-V5T-NEXT: .thumb_func _emptyFrame +; ENABLE-V5T-NEXT: _emptyFrame: +; ENABLE-V5T-NEXT: .cfi_startproc +; ENABLE-V5T-NEXT: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: loopInfoRestoreOutsideLoop: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB5_4 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: movs r1, #10 +; DISABLE-V4T-NEXT: LBB5_2: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r2, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: adds r0, r2, r0 +; DISABLE-V4T-NEXT: subs r1, r1, #1 +; DISABLE-V4T-NEXT: bne LBB5_2 +; DISABLE-V4T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V4T-NEXT: lsls r0, r0, #3 +; DISABLE-V4T-NEXT: b LBB5_5 +; DISABLE-V4T-NEXT: LBB5_4: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB5_5: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: @ -- End function +; DISABLE-V4T-NEXT: .globl _emptyFrame @ -- Begin function emptyFrame +; DISABLE-V4T-NEXT: .p2align 1 +; DISABLE-V4T-NEXT: .code 16 @ @emptyFrame +; DISABLE-V4T-NEXT: .thumb_func _emptyFrame +; DISABLE-V4T-NEXT: _emptyFrame: +; DISABLE-V4T-NEXT: .cfi_startproc +; DISABLE-V4T-NEXT: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: bx lr +; +; DISABLE-V5T-LABEL: loopInfoRestoreOutsideLoop: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB5_4 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: movs r1, #10 +; DISABLE-V5T-NEXT: LBB5_2: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r2, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: adds r0, r2, r0 +; DISABLE-V5T-NEXT: subs r1, r1, #1 +; DISABLE-V5T-NEXT: bne LBB5_2 +; DISABLE-V5T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V5T-NEXT: lsls r0, r0, #3 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB5_4: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: @ -- End function +; DISABLE-V5T-NEXT: .globl _emptyFrame @ -- Begin function emptyFrame +; DISABLE-V5T-NEXT: .p2align 1 +; DISABLE-V5T-NEXT: .code 16 @ @emptyFrame +; DISABLE-V5T-NEXT: .thumb_func _emptyFrame +; DISABLE-V5T-NEXT: _emptyFrame: +; DISABLE-V5T-NEXT: .cfi_startproc +; DISABLE-V5T-NEXT: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: bx lr entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -406,59 +900,138 @@ } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: @ %entry -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: bx lr define i32 @emptyFrame() { entry: ret i32 0 } ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: -; -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, lr} -; -; DISABLE: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: movs [[IV:r[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: movs r4, #1 -; CHECK: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] -; -; Next BB. -; CHECK: movs r0, #0 -; ENABLE-V5T-NEXT: pop {r4, pc} -; ENABLE-V4T-NEXT: pop {r4} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: bx r1 -; -; Duplicated epilogue. -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr define i32 @inlineAsm(i32 %cond, i32 %N) { +; ENABLE-V4T-LABEL: inlineAsm: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB7_4 +; ENABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #10 +; ENABLE-V4T-NEXT: LBB7_2: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r4, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: subs r0, r0, #1 +; ENABLE-V4T-NEXT: bne LBB7_2 +; ENABLE-V4T-NEXT: @ %bb.3: @ %for.exit +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB7_4: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: inlineAsm: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB7_4 +; ENABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #10 +; ENABLE-V5T-NEXT: LBB7_2: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r4, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: subs r0, r0, #1 +; ENABLE-V5T-NEXT: bne LBB7_2 +; ENABLE-V5T-NEXT: @ %bb.3: @ %for.exit +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB7_4: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB7_5: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: inlineAsm: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB7_4 +; DISABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #10 +; DISABLE-V4T-NEXT: LBB7_2: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r4, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: subs r0, r0, #1 +; DISABLE-V4T-NEXT: bne LBB7_2 +; DISABLE-V4T-NEXT: @ %bb.3: @ %for.exit +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: b LBB7_5 +; DISABLE-V4T-NEXT: LBB7_4: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB7_5: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: inlineAsm: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB7_4 +; DISABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #10 +; DISABLE-V5T-NEXT: LBB7_2: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r4, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: subs r0, r0, #1 +; DISABLE-V5T-NEXT: bne LBB7_2 +; DISABLE-V5T-NEXT: @ %bb.3: @ %for.exit +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB7_4: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r4, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -488,54 +1061,133 @@ } ; Check that we handle calls to variadic functions correctly. -; CHECK-LABEL: callVariadicFunc: -; -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: push {[[TMP:r[0-9]+]], lr} -; CHECK: sub sp, #16 -; -; DISABLE: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Setup of the varags. -; CHECK: str r1, [sp] -; CHECK-NEXT: str r1, [sp, #4] -; CHECK-NEXT: str r1, [sp, #8] -; CHECK: movs r0, r1 -; CHECK-NEXT: movs r2, r1 -; CHECK-NEXT: movs r3, r1 -; CHECK-NEXT: bl -; CHECK-NEXT: lsls r0, r0, #3 -; -; ENABLE-NEXT: add sp, #16 -; ENABLE-V5T-NEXT: pop {[[TMP]], pc} -; ENABLE-V4T-NEXT: pop {[[TMP]]} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: bx r1 -; -; Duplicated epilogue. -; DISABLE-V5T-NEXT: add sp, #16 -; DISABLE-V5T-NEXT: pop {[[TMP]], pc} -; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; -; Epilogue code. -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr -; -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-NEXT: add sp, #16 -; DISABLE-V5T-NEXT: pop {[[TMP]], pc} -; DISABLE-V4T-NEXT: pop {[[TMP]]} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 define i32 @callVariadicFunc(i32 %cond, i32 %N) { +; ENABLE-V4T-LABEL: callVariadicFunc: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB8_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: sub sp, #16 +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 24 +; ENABLE-V4T-NEXT: str r1, [sp] +; ENABLE-V4T-NEXT: str r1, [sp, #4] +; ENABLE-V4T-NEXT: str r1, [sp, #8] +; ENABLE-V4T-NEXT: ldr r0, LCPI8_0 +; ENABLE-V4T-NEXT: LPC8_0: +; ENABLE-V4T-NEXT: add r0, pc +; ENABLE-V4T-NEXT: ldr r4, [r0] +; ENABLE-V4T-NEXT: movs r0, r1 +; ENABLE-V4T-NEXT: movs r2, r1 +; ENABLE-V4T-NEXT: movs r3, r1 +; ENABLE-V4T-NEXT: bl Ltmp2 +; ENABLE-V4T-NEXT: lsls r0, r0, #3 +; ENABLE-V4T-NEXT: add sp, #16 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB8_2: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI8_0: +; ENABLE-V4T-NEXT: .long L_someVariadicFunc$non_lazy_ptr-(LPC8_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: callVariadicFunc: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB8_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V5T-NEXT: push {r7, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r7, -8 +; ENABLE-V5T-NEXT: sub sp, #16 +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 24 +; ENABLE-V5T-NEXT: str r1, [sp] +; ENABLE-V5T-NEXT: str r1, [sp, #4] +; ENABLE-V5T-NEXT: str r1, [sp, #8] +; ENABLE-V5T-NEXT: movs r0, r1 +; ENABLE-V5T-NEXT: movs r2, r1 +; ENABLE-V5T-NEXT: movs r3, r1 +; ENABLE-V5T-NEXT: bl _someVariadicFunc +; ENABLE-V5T-NEXT: lsls r0, r0, #3 +; ENABLE-V5T-NEXT: add sp, #16 +; ENABLE-V5T-NEXT: pop {r7, pc} +; ENABLE-V5T-NEXT: LBB8_2: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB8_3: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: callVariadicFunc: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: sub sp, #16 +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 24 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB8_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V4T-NEXT: str r1, [sp] +; DISABLE-V4T-NEXT: str r1, [sp, #4] +; DISABLE-V4T-NEXT: str r1, [sp, #8] +; DISABLE-V4T-NEXT: ldr r0, LCPI8_0 +; DISABLE-V4T-NEXT: LPC8_0: +; DISABLE-V4T-NEXT: add r0, pc +; DISABLE-V4T-NEXT: ldr r4, [r0] +; DISABLE-V4T-NEXT: movs r0, r1 +; DISABLE-V4T-NEXT: movs r2, r1 +; DISABLE-V4T-NEXT: movs r3, r1 +; DISABLE-V4T-NEXT: bl Ltmp2 +; DISABLE-V4T-NEXT: lsls r0, r0, #3 +; DISABLE-V4T-NEXT: b LBB8_3 +; DISABLE-V4T-NEXT: LBB8_2: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB8_3: @ %if.end +; DISABLE-V4T-NEXT: add sp, #16 +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.4: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI8_0: +; DISABLE-V4T-NEXT: .long L_someVariadicFunc$non_lazy_ptr-(LPC8_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: callVariadicFunc: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r7, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r7, -8 +; DISABLE-V5T-NEXT: sub sp, #16 +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 24 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB8_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V5T-NEXT: str r1, [sp] +; DISABLE-V5T-NEXT: str r1, [sp, #4] +; DISABLE-V5T-NEXT: str r1, [sp, #8] +; DISABLE-V5T-NEXT: movs r0, r1 +; DISABLE-V5T-NEXT: movs r2, r1 +; DISABLE-V5T-NEXT: movs r3, r1 +; DISABLE-V5T-NEXT: bl _someVariadicFunc +; DISABLE-V5T-NEXT: lsls r0, r0, #3 +; DISABLE-V5T-NEXT: add sp, #16 +; DISABLE-V5T-NEXT: pop {r7, pc} +; DISABLE-V5T-NEXT: LBB8_2: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: add sp, #16 +; DISABLE-V5T-NEXT: pop {r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -560,25 +1212,96 @@ ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. ; -; CHECK-LABEL: noreturn: -; DISABLE: push -; -; CHECK: cmp r0, #0 -; CHECK-NEXT: bne [[ABORT:LBB[0-9_]+]] -; -; CHECK: movs r0, #42 -; -; ENABLE-NEXT: bx lr -; -; DISABLE-NEXT: pop -;; -; CHECK: [[ABORT]]: @ %if.abort -; -; ENABLE: push -; -; CHECK: bl -; ENABLE-NOT: pop define i32 @noreturn(i8 signext %bad_thing) { +; ENABLE-V4T-LABEL: noreturn: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: bne LBB9_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.end +; ENABLE-V4T-NEXT: movs r0, #42 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: LBB9_2: @ %if.abort +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: ldr r0, LCPI9_0 +; ENABLE-V4T-NEXT: LPC9_0: +; ENABLE-V4T-NEXT: add r0, pc +; ENABLE-V4T-NEXT: ldr r0, [r0] +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r1, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: bl Ltmp3 +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI9_0: +; ENABLE-V4T-NEXT: .long L_abort$non_lazy_ptr-(LPC9_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: noreturn: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: bne LBB9_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.end +; ENABLE-V5T-NEXT: movs r0, #42 +; ENABLE-V5T-NEXT: bx lr +; ENABLE-V5T-NEXT: LBB9_2: @ %if.abort +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r0, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: bl _abort +; +; DISABLE-V4T-LABEL: noreturn: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: bne LBB9_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.end +; DISABLE-V4T-NEXT: movs r0, #42 +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: LBB9_2: @ %if.abort +; DISABLE-V4T-NEXT: ldr r0, LCPI9_0 +; DISABLE-V4T-NEXT: LPC9_0: +; DISABLE-V4T-NEXT: add r0, pc +; DISABLE-V4T-NEXT: ldr r0, [r0] +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r1, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: bl Ltmp3 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.3: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI9_0: +; DISABLE-V4T-NEXT: .long L_abort$non_lazy_ptr-(LPC9_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: noreturn: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: bne LBB9_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.end +; DISABLE-V5T-NEXT: movs r0, #42 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB9_2: @ %if.abort +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r0, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: bl _abort entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort @@ -595,32 +1318,113 @@ declare void @abort() #0 define i32 @b_to_bx(i32 %value) { -; CHECK-LABEL: b_to_bx: -; DISABLE: push {r7, lr} -; CHECK: cmp r0, #49 -; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]] -; ENABLE: push {r7, lr} - -; CHECK: bl -; DISABLE-V5-NEXT: pop {r7, pc} -; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] - -; ENABLE-V5-NEXT: pop {r7, pc} -; ENABLE-V4-NEXT: pop {r7} -; ENABLE-V4-NEXT: pop {r1} -; ENABLE-V4-NEXT: bx r1 - -; CHECK: [[ELSE_LABEL]]: @ %if.else -; CHECK-NEXT: lsls r0, r1, #1 -; DISABLE-V5-NEXT: pop {r7, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r7} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 - -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr - +; ENABLE-V4T-LABEL: b_to_bx: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: movs r1, r0 +; ENABLE-V4T-NEXT: cmp r0, #49 +; ENABLE-V4T-NEXT: bgt LBB10_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V4T-NEXT: push {r7, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r7, -8 +; ENABLE-V4T-NEXT: ldr r0, LCPI10_0 +; ENABLE-V4T-NEXT: ldr r2, LCPI10_1 +; ENABLE-V4T-NEXT: LPC10_0: +; ENABLE-V4T-NEXT: add r2, pc +; ENABLE-V4T-NEXT: bl Ltmp4 +; ENABLE-V4T-NEXT: pop {r7} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB10_2: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI10_0: +; ENABLE-V4T-NEXT: .long 5000 @ 0x1388 +; ENABLE-V4T-NEXT: LCPI10_1: +; ENABLE-V4T-NEXT: .long ___divsi3-(LPC10_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: b_to_bx: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: movs r1, r0 +; ENABLE-V5T-NEXT: cmp r0, #49 +; ENABLE-V5T-NEXT: bgt LBB10_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V5T-NEXT: push {r7, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r7, -8 +; ENABLE-V5T-NEXT: ldr r0, LCPI10_0 +; ENABLE-V5T-NEXT: bl ___divsi3 +; ENABLE-V5T-NEXT: pop {r7, pc} +; ENABLE-V5T-NEXT: LBB10_2: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB10_3: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; ENABLE-V5T-NEXT: .p2align 2 +; ENABLE-V5T-NEXT: @ %bb.4: +; ENABLE-V5T-NEXT: .data_region +; ENABLE-V5T-NEXT: LCPI10_0: +; ENABLE-V5T-NEXT: .long 5000 @ 0x1388 +; ENABLE-V5T-NEXT: .end_data_region +; +; DISABLE-V4T-LABEL: b_to_bx: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r7, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r7, -8 +; DISABLE-V4T-NEXT: movs r1, r0 +; DISABLE-V4T-NEXT: cmp r0, #49 +; DISABLE-V4T-NEXT: bgt LBB10_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V4T-NEXT: ldr r0, LCPI10_0 +; DISABLE-V4T-NEXT: ldr r2, LCPI10_1 +; DISABLE-V4T-NEXT: LPC10_0: +; DISABLE-V4T-NEXT: add r2, pc +; DISABLE-V4T-NEXT: bl Ltmp4 +; DISABLE-V4T-NEXT: b LBB10_3 +; DISABLE-V4T-NEXT: LBB10_2: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB10_3: @ %if.end +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.4: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI10_0: +; DISABLE-V4T-NEXT: .long 5000 @ 0x1388 +; DISABLE-V4T-NEXT: LCPI10_1: +; DISABLE-V4T-NEXT: .long ___divsi3-(LPC10_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: b_to_bx: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r7, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r7, -8 +; DISABLE-V5T-NEXT: movs r1, r0 +; DISABLE-V5T-NEXT: cmp r0, #49 +; DISABLE-V5T-NEXT: bgt LBB10_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V5T-NEXT: ldr r0, LCPI10_0 +; DISABLE-V5T-NEXT: bl ___divsi3 +; DISABLE-V5T-NEXT: pop {r7, pc} +; DISABLE-V5T-NEXT: LBB10_2: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r7, pc} +; DISABLE-V5T-NEXT: .p2align 2 +; DISABLE-V5T-NEXT: @ %bb.3: +; DISABLE-V5T-NEXT: .data_region +; DISABLE-V5T-NEXT: LCPI10_0: +; DISABLE-V5T-NEXT: .long 5000 @ 0x1388 +; DISABLE-V5T-NEXT: .end_data_region entry: %cmp = icmp slt i32 %value, 50 br i1 %cmp, label %if.then, label %if.else @@ -639,29 +1443,99 @@ } define i1 @beq_to_bx(i32* %y, i32 %head) { -; CHECK-LABEL: beq_to_bx: -; DISABLE: push {r4, lr} -; CHECK: cmp r2, #0 -; CHECK-NEXT: beq [[EXIT_LABEL:LBB[0-9_]+]] -; ENABLE: push {r4, lr} - -; CHECK: lsls r4, r3, #30 -; ENABLE-NEXT: ldr [[POP:r[4567]]], [sp, #4] -; ENABLE-NEXT: mov lr, [[POP]] -; ENABLE-NEXT: pop {[[POP]]} -; ENABLE-NEXT: add sp, #4 -; CHECK-NEXT: bpl [[EXIT_LABEL]] - -; CHECK: str r1, [r2] -; CHECK: str r3, [r2] -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup -; ENABLE-NEXT: bx lr -; DISABLE-V5-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 - +; ENABLE-V4T-LABEL: beq_to_bx: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: movs r2, r0 +; ENABLE-V4T-NEXT: movs r0, #1 +; ENABLE-V4T-NEXT: cmp r2, #0 +; ENABLE-V4T-NEXT: beq LBB11_3 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.end +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: ldr r3, [r2] +; ENABLE-V4T-NEXT: lsls r4, r3, #30 +; ENABLE-V4T-NEXT: ldr r4, [sp, #4] +; ENABLE-V4T-NEXT: mov lr, r4 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: add sp, #4 +; ENABLE-V4T-NEXT: bpl LBB11_3 +; ENABLE-V4T-NEXT: @ %bb.2: @ %if.end4 +; ENABLE-V4T-NEXT: str r1, [r2] +; ENABLE-V4T-NEXT: str r3, [r2] +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: LBB11_3: @ %cleanup +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: beq_to_bx: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: movs r2, r0 +; ENABLE-V5T-NEXT: movs r0, #1 +; ENABLE-V5T-NEXT: cmp r2, #0 +; ENABLE-V5T-NEXT: beq LBB11_3 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.end +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: ldr r3, [r2] +; ENABLE-V5T-NEXT: lsls r4, r3, #30 +; ENABLE-V5T-NEXT: ldr r4, [sp, #4] +; ENABLE-V5T-NEXT: mov lr, r4 +; ENABLE-V5T-NEXT: pop {r4} +; ENABLE-V5T-NEXT: add sp, #4 +; ENABLE-V5T-NEXT: bpl LBB11_3 +; ENABLE-V5T-NEXT: @ %bb.2: @ %if.end4 +; ENABLE-V5T-NEXT: str r1, [r2] +; ENABLE-V5T-NEXT: str r3, [r2] +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: LBB11_3: @ %cleanup +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: beq_to_bx: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: movs r2, r0 +; DISABLE-V4T-NEXT: movs r0, #1 +; DISABLE-V4T-NEXT: cmp r2, #0 +; DISABLE-V4T-NEXT: beq LBB11_3 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.end +; DISABLE-V4T-NEXT: ldr r3, [r2] +; DISABLE-V4T-NEXT: lsls r4, r3, #30 +; DISABLE-V4T-NEXT: bpl LBB11_3 +; DISABLE-V4T-NEXT: @ %bb.2: @ %if.end4 +; DISABLE-V4T-NEXT: str r1, [r2] +; DISABLE-V4T-NEXT: str r3, [r2] +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: LBB11_3: @ %cleanup +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: beq_to_bx: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: movs r2, r0 +; DISABLE-V5T-NEXT: movs r0, #1 +; DISABLE-V5T-NEXT: cmp r2, #0 +; DISABLE-V5T-NEXT: beq LBB11_3 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.end +; DISABLE-V5T-NEXT: ldr r3, [r2] +; DISABLE-V5T-NEXT: lsls r4, r3, #30 +; DISABLE-V5T-NEXT: bpl LBB11_3 +; DISABLE-V5T-NEXT: @ %bb.2: @ %if.end4 +; DISABLE-V5T-NEXT: str r1, [r2] +; DISABLE-V5T-NEXT: str r3, [r2] +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: LBB11_3: @ %cleanup +; DISABLE-V5T-NEXT: pop {r4, pc} entry: %cmp = icmp eq i32* %y, null br i1 %cmp, label %cleanup, label %if.end Index: test/CodeGen/X86/i386-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/i386-shrink-wrapping.ll +++ test/CodeGen/X86/i386-shrink-wrapping.ll @@ -1,5 +1,6 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -no-x86-call-frame-opt | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false -no-x86-call-frame-opt | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -enable-shrink-wrap=true -no-x86-call-frame-opt | FileCheck %s --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -no-x86-call-frame-opt | FileCheck %s --check-prefix=DISABLE target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386-apple-macosx10.5" @@ -15,53 +16,83 @@ ; Check that we are clobbering the flags when they are live-in of the ; prologue block and the prologue needs to adjust the stack. ; PR25607. -; -; CHECK-LABEL: eflagsLiveInPrologue: -; -; DISABLE: pushl -; DISABLE-NEXT: subl $8, %esp -; -; CHECK: movl L_a$non_lazy_ptr, [[A:%[a-z]+]] -; CHECK-NEXT: cmpl $0, ([[A]]) -; CHECK-NEXT: je [[PREHEADER_LABEL:LBB[0-9_]+]] -; -; CHECK: movb $1, _d -; -; CHECK: [[PREHEADER_LABEL]]: -; CHECK-NEXT: movl L_b$non_lazy_ptr, [[B:%[a-z]+]] -; CHECK-NEXT: movl ([[B]]), [[TMP1:%[a-z]+]] -; CHECK-NEXT: testl [[TMP1]], [[TMP1]] -; CHECK-NEXT: je [[FOREND_LABEL:LBB[0-9_]+]] -; -; Skip the loop. -; [...] -; -; The for.end block is split to accomadate the different selects. -; We are interested in the one with the call, so skip until the branch. -; CHECK: [[FOREND_LABEL]]: - -; ENABLE: pushl -; ENABLE-NEXT: subl $8, %esp - -; CHECK: xorl [[CMOVE_VAL:%edx]], [[CMOVE_VAL]] -; CHECK-NEXT: cmpb $0, _d -; CHECK-NEXT: movl $6, [[IMM_VAL:%ecx]] -; The eflags is used in the next instruction. -; If that instruction disappear, we are not exercising the bug -; anymore. -; CHECK-NEXT: cmovnel [[CMOVE_VAL]], [[IMM_VAL]] -; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]] -; CHECK-NEXT: movb %cl, ([[E]]) -; CHECK-NEXT: leal 1(%ecx), %esi - -; CHECK: calll _varfunc -; Set the return value to 0. -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: addl $8, %esp -; CHECK-NEXT: popl -; CHECK-NEXT: retl define i32 @eflagsLiveInPrologue() #0 { +; ENABLE-LABEL: eflagsLiveInPrologue: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: movl L_a$non_lazy_ptr, %eax +; ENABLE-NEXT: cmpl $0, (%eax) +; ENABLE-NEXT: je LBB0_2 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: movb $1, _d +; ENABLE-NEXT: LBB0_2: ## %for.cond.preheader +; ENABLE-NEXT: movl L_b$non_lazy_ptr, %eax +; ENABLE-NEXT: movl (%eax), %eax +; ENABLE-NEXT: testl %eax, %eax +; ENABLE-NEXT: je LBB0_4 +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB0_3: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: jmp LBB0_3 +; ENABLE-NEXT: LBB0_4: ## %for.end +; ENABLE-NEXT: pushl %esi +; ENABLE-NEXT: subl $8, %esp +; ENABLE-NEXT: xorl %edx, %edx +; ENABLE-NEXT: cmpb $0, _d +; ENABLE-NEXT: movl $6, %ecx +; ENABLE-NEXT: cmovnel %edx, %ecx +; ENABLE-NEXT: movl L_e$non_lazy_ptr, %edx +; ENABLE-NEXT: movb %cl, (%edx) +; ENABLE-NEXT: leal 1(%ecx), %esi +; ENABLE-NEXT: cltd +; ENABLE-NEXT: idivl %esi +; ENABLE-NEXT: movl L_c$non_lazy_ptr, %eax +; ENABLE-NEXT: movl %edx, (%eax) +; ENABLE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; ENABLE-NEXT: movl $L_.str, (%esp) +; ENABLE-NEXT: calll _varfunc +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: addl $8, %esp +; ENABLE-NEXT: popl %esi +; ENABLE-NEXT: retl +; +; DISABLE-LABEL: eflagsLiveInPrologue: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushl %esi +; DISABLE-NEXT: subl $8, %esp +; DISABLE-NEXT: movl L_a$non_lazy_ptr, %eax +; DISABLE-NEXT: cmpl $0, (%eax) +; DISABLE-NEXT: je LBB0_2 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: movb $1, _d +; DISABLE-NEXT: LBB0_2: ## %for.cond.preheader +; DISABLE-NEXT: movl L_b$non_lazy_ptr, %eax +; DISABLE-NEXT: movl (%eax), %eax +; DISABLE-NEXT: testl %eax, %eax +; DISABLE-NEXT: je LBB0_4 +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB0_3: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: jmp LBB0_3 +; DISABLE-NEXT: LBB0_4: ## %for.end +; DISABLE-NEXT: xorl %edx, %edx +; DISABLE-NEXT: cmpb $0, _d +; DISABLE-NEXT: movl $6, %ecx +; DISABLE-NEXT: cmovnel %edx, %ecx +; DISABLE-NEXT: movl L_e$non_lazy_ptr, %edx +; DISABLE-NEXT: movb %cl, (%edx) +; DISABLE-NEXT: leal 1(%ecx), %esi +; DISABLE-NEXT: cltd +; DISABLE-NEXT: idivl %esi +; DISABLE-NEXT: movl L_c$non_lazy_ptr, %eax +; DISABLE-NEXT: movl %edx, (%eax) +; DISABLE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; DISABLE-NEXT: movl $L_.str, (%esp) +; DISABLE-NEXT: calll _varfunc +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: addl $8, %esp +; DISABLE-NEXT: popl %esi +; DISABLE-NEXT: retl entry: %tmp = load i32, i32* @a, align 4 %tobool = icmp eq i32 %tmp, 0 Index: test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll =================================================================== --- test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll +++ test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-windows-gnu -exception-model=dwarf < %s | FileCheck %s %struct.A = type { [4096 x i8] } @@ -6,6 +7,31 @@ @b = common global i32 0, align 4 define void @fn1() nounwind uwtable { +; CHECK-LABEL: fn1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %select.true.sink +; CHECK-NEXT: cltq +; CHECK-NEXT: imulq $715827883, %rax, %rax # imm = 0x2AAAAAAB +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: shrq $63, %rcx +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: .LBB0_2: # %select.end +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl $4128, %eax # imm = 0x1020 +; CHECK-NEXT: callq ___chkstk_ms +; CHECK-NEXT: subq %rax, %rsp +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: .cfi_def_cfa_offset 4144 +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: # kill: def $ecx killed $ecx killed $rcx +; CHECK-NEXT: callq fn2 +; CHECK-NEXT: addq $4136, %rsp # imm = 0x1028 +; CHECK-NEXT: retq entry: %ctx = alloca %struct.A, align 1 %0 = load i32, i32* @a, align 4 @@ -26,11 +52,3 @@ declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) -; CHECK-LABEL: fn1: -; CHECK: pushq %rax -; CHECK: movl $4128, %eax -; CHECK: callq ___chkstk_ms -; CHECK: subq %rax, %rsp -; CHECK: movq 4128(%rsp), %rax - -; CHECK: addq $4136, %rsp Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1,6 +1,7 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -pass-remarks-output=%t | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -enable-shrink-wrap=true -pass-remarks-output=%t | FileCheck %s --check-prefix=ENABLE ; RUN: cat %t | FileCheck %s --check-prefix=REMARKS -; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=DISABLE ; ; Note: Lots of tests use inline asm instead of regular calls. ; This allows to have a better control on what the allocation will do. @@ -13,43 +14,38 @@ ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: -; -; Compare the arguments and jump to exit. -; No prologue needed. -; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] -; ENABLE-NEXT: cmpl %esi, %edi -; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; (What we push does not matter. It should be some random sratch register.) -; CHECK: pushq -; -; Compare the arguments and jump to exit. -; After the prologue is set. -; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] -; DISABLE-NEXT: cmpl %esi, %edi -; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Store %a in the alloca. -; CHECK: movl [[ARG0CPY]], 4(%rsp) -; Set the alloca address in the second argument. -; CHECK-NEXT: leaq 4(%rsp), %rsi -; Set the first argument to zero. -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: callq _doSomething -; -; With shrink-wrapping, epilogue is just after the call. -; ENABLE-NEXT: addq $8, %rsp -; -; CHECK: [[EXIT_LABEL]]: -; -; Without shrink-wrapping, epilogue is in the exit block. -; Epilogue code. (What we pop does not matter.) -; DISABLE-NEXT: popq -; -; CHECK-NEXT: retq define i32 @foo(i32 %a, i32 %b) { +; ENABLE-LABEL: foo: +; ENABLE: ## %bb.0: +; ENABLE-NEXT: movl %edi, %eax +; ENABLE-NEXT: cmpl %esi, %edi +; ENABLE-NEXT: jge LBB0_2 +; ENABLE-NEXT: ## %bb.1: ## %true +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; ENABLE-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; ENABLE-NEXT: xorl %edi, %edi +; ENABLE-NEXT: callq _doSomething +; ENABLE-NEXT: addq $8, %rsp +; ENABLE-NEXT: LBB0_2: ## %false +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: foo: +; DISABLE: ## %bb.0: +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: movl %edi, %eax +; DISABLE-NEXT: cmpl %esi, %edi +; DISABLE-NEXT: jge LBB0_2 +; DISABLE-NEXT: ## %bb.1: ## %true +; DISABLE-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; DISABLE-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; DISABLE-NEXT: xorl %edi, %edi +; DISABLE-NEXT: callq _doSomething +; DISABLE-NEXT: LBB0_2: ## %false +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false @@ -70,51 +66,69 @@ ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: -; -; Shrink-wrapping allows to skip the prologue in the else case. -; ENABLE: testl %edi, %edi -; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; -; DISABLE: testl %edi, %edi -; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: xorl [[SUM:%eax]], [[SUM]] -; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body -; CHECK: movl $1, [[TMP:%e[a-z]+]] -; CHECK: addl [[TMP]], [[SUM]] -; CHECK-NEXT: decl [[IV]] -; CHECK-NEXT: jne [[LOOP]] -; -; Next BB. -; SUM << 3. -; CHECK: shll $3, [[SUM]] -; -; DISABLE: popq -; DISABLE: retq -; -; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one in returned register. -; DISABLE: movl %esi, %eax -; DISABLE: addl %esi, %eax -; -; Epilogue code. -; CHECK-DAG: popq %rbx -; CHECK: retq -; -; ENABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; ENABLE: movl %esi, %eax -; ENABLE: addl %esi, %eax -; ENABLE-NEXT: retq define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB1_4 +; ENABLE-NEXT: ## %bb.1: ## %for.preheader +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: movl $10, %ecx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB1_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: decl %ecx +; ENABLE-NEXT: jne LBB1_2 +; ENABLE-NEXT: ## %bb.3: ## %for.end +; ENABLE-NEXT: shll $3, %eax +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB1_4: ## %if.else +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: addl %esi, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB1_4 +; DISABLE-NEXT: ## %bb.1: ## %for.preheader +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: movl $10, %ecx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB1_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: decl %ecx +; DISABLE-NEXT: jne LBB1_2 +; DISABLE-NEXT: ## %bb.3: ## %for.end +; DISABLE-NEXT: shll $3, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB1_4: ## %if.else +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: addl %esi, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -149,25 +163,58 @@ ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; CHECK: nop -; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]] -; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] -; Next BB. -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body -; CHECK: movl $1, [[TMP:%e[a-z]+]] -; CHECK: addl [[TMP]], [[SUM]] -; CHECK-NEXT: decl [[IV]] -; CHECK-NEXT: jne [[LOOP_LABEL]] -; Next BB. -; CHECK: ## %for.exit -; CHECK: nop -; CHECK: popq %rbx -; CHECK-NEXT: retq define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: movl $10, %ecx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB2_1: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: decl %ecx +; ENABLE-NEXT: jne LBB2_1 +; ENABLE-NEXT: ## %bb.2: ## %for.exit +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: movl $10, %ecx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB2_1: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: decl %ecx +; DISABLE-NEXT: jne LBB2_1 +; DISABLE-NEXT: ## %bb.2: ## %for.exit +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: br label %for.preheader @@ -194,49 +241,75 @@ ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: -; -; ENABLE: testl %edi, %edi -; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; -; DISABLE: testl %edi, %edi -; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: nop -; CHECK: xorl [[SUM:%eax]], [[SUM]] -; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body -; CHECK: movl $1, [[TMP:%e[a-z]+]] -; CHECK: addl [[TMP]], [[SUM]] -; CHECK-NEXT: decl [[IV]] -; CHECK-NEXT: jne [[LOOP_LABEL]] -; Next BB. -; CHECK: nop -; CHECK: shll $3, [[SUM]] -; -; DISABLE: popq -; DISABLE: retq -; -; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one in returned register. -; DISABLE: movl %esi, %eax -; DISABLE: addl %esi, %eax -; -; Epilogue code. -; CHECK-DAG: popq %rbx -; CHECK: retq -; -; ENABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; ENABLE: movl %esi, %eax -; ENABLE: addl %esi, %eax -; ENABLE-NEXT: retq define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-LABEL: loopInfoSaveOutsideLoop: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB3_4 +; ENABLE-NEXT: ## %bb.1: ## %for.preheader +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: movl $10, %ecx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB3_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: decl %ecx +; ENABLE-NEXT: jne LBB3_2 +; ENABLE-NEXT: ## %bb.3: ## %for.end +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: shll $3, %eax +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB3_4: ## %if.else +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: addl %esi, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: loopInfoSaveOutsideLoop: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB3_4 +; DISABLE-NEXT: ## %bb.1: ## %for.preheader +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: movl $10, %ecx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB3_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: decl %ecx +; DISABLE-NEXT: jne LBB3_2 +; DISABLE-NEXT: ## %bb.3: ## %for.end +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: shll $3, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB3_4: ## %if.else +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: addl %esi, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -270,49 +343,65 @@ ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: -; -; ENABLE: testl %edi, %edi -; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; -; DISABLE: testl %edi, %edi -; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: nop -; CHECK: xorl [[SUM:%eax]], [[SUM]] -; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body -; CHECK: movl $1, [[TMP:%e[a-z]+]] -; CHECK: addl [[TMP]], [[SUM]] -; CHECK-NEXT: decl [[IV]] -; CHECK-NEXT: jne [[LOOP_LABEL]] -; Next BB. -; CHECK: shll $3, [[SUM]] -; -; DISABLE: popq -; DISABLE: retq -; -; DISABLE: [[ELSE_LABEL]]: ## %if.else - -; Shift second argument by one in returned register. -; DISABLE: movl %esi, %eax -; DISABLE: addl %esi, %eax -; -; Epilogue code. -; CHECK-DAG: popq %rbx -; CHECK: retq -; -; ENABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; ENABLE: movl %esi, %eax -; ENABLE: addl %esi, %eax -; ENABLE-NEXT: retq define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { +; ENABLE-LABEL: loopInfoRestoreOutsideLoop: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB4_4 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: movl $10, %ecx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB4_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: decl %ecx +; ENABLE-NEXT: jne LBB4_2 +; ENABLE-NEXT: ## %bb.3: ## %for.end +; ENABLE-NEXT: shll $3, %eax +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB4_4: ## %if.else +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: addl %esi, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: loopInfoRestoreOutsideLoop: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB4_4 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: movl $10, %ecx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB4_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: decl %ecx +; DISABLE-NEXT: jne LBB4_2 +; DISABLE-NEXT: ## %bb.3: ## %for.end +; DISABLE-NEXT: shll $3, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB4_4: ## %if.else +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: addl %esi, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -344,58 +433,86 @@ } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: ## %entry -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retq define i32 @emptyFrame() { +; ENABLE-LABEL: emptyFrame: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: emptyFrame: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: retq entry: ret i32 0 } ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: -; -; ENABLE: testl %edi, %edi -; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; -; DISABLE: testl %edi, %edi -; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: nop -; CHECK: movl $10, [[IV:%e[a-z]+]] -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body -; Inline asm statement. -; CHECK: addl $1, %ebx -; CHECK: decl [[IV]] -; CHECK-NEXT: jne [[LOOP_LABEL]] -; Next BB. -; CHECK: nop -; CHECK: xorl %eax, %eax -; -; DISABLE: popq -; DISABLE: retq -; -; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one in returned register. -; DISABLE: movl %esi, %eax -; DISABLE: addl %esi, %eax -; -; Epilogue code. -; CHECK-DAG: popq %rbx -; CHECK: retq -; -; ENABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; ENABLE: movl %esi, %eax -; ENABLE: addl %esi, %eax -; ENABLE-NEXT: retq define i32 @inlineAsm(i32 %cond, i32 %N) { +; ENABLE-LABEL: inlineAsm: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB6_4 +; ENABLE-NEXT: ## %bb.1: ## %for.preheader +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: movl $10, %eax +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB6_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: addl $1, %ebx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: decl %eax +; ENABLE-NEXT: jne LBB6_2 +; ENABLE-NEXT: ## %bb.3: ## %for.exit +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB6_4: ## %if.else +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: addl %esi, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: inlineAsm: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB6_4 +; DISABLE-NEXT: ## %bb.1: ## %for.preheader +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: movl $10, %eax +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB6_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: addl $1, %ebx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: decl %eax +; DISABLE-NEXT: jne LBB6_2 +; DISABLE-NEXT: ## %bb.3: ## %for.exit +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB6_4: ## %if.else +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: addl %esi, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader @@ -425,43 +542,55 @@ } ; Check that we handle calls to variadic functions correctly. -; CHECK-LABEL: callVariadicFunc: -; -; ENABLE: movl %esi, %eax -; ENABLE: testl %edi, %edi -; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: pushq -; -; DISABLE: movl %esi, %eax -; DISABLE: testl %edi, %edi -; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Setup of the varags. -; CHECK: movl %eax, (%rsp) -; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movl %eax, %r8d -; CHECK-NEXT: movl %eax, %r9d -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: callq _someVariadicFunc -; CHECK-NEXT: shll $3, %eax -; -; ENABLE-NEXT: addq $8, %rsp -; ENABLE-NEXT: retq -; - -; CHECK: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; CHECK: addl %eax, %eax -; -; Epilogue code. -; DISABLE-NEXT: popq -; CHECK-NEXT: retq define i32 @callVariadicFunc(i32 %cond, i32 %N) { +; ENABLE-LABEL: callVariadicFunc: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB7_2 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: movl %eax, (%rsp) +; ENABLE-NEXT: movl %eax, %edi +; ENABLE-NEXT: movl %eax, %esi +; ENABLE-NEXT: movl %eax, %edx +; ENABLE-NEXT: movl %eax, %ecx +; ENABLE-NEXT: movl %eax, %r8d +; ENABLE-NEXT: movl %eax, %r9d +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _someVariadicFunc +; ENABLE-NEXT: shll $3, %eax +; ENABLE-NEXT: addq $8, %rsp +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB7_2: ## %if.else +; ENABLE-NEXT: addl %eax, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: callVariadicFunc: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB7_2 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: movl %eax, (%rsp) +; DISABLE-NEXT: movl %eax, %edi +; DISABLE-NEXT: movl %eax, %esi +; DISABLE-NEXT: movl %eax, %edx +; DISABLE-NEXT: movl %eax, %ecx +; DISABLE-NEXT: movl %eax, %r8d +; DISABLE-NEXT: movl %eax, %r9d +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _someVariadicFunc +; DISABLE-NEXT: shll $3, %eax +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB7_2: ## %if.else +; DISABLE-NEXT: addl %eax, %eax +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then @@ -507,47 +636,79 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly) -; CHECK-LABEL: useLEA: -; DISABLE: pushq -; -; CHECK: testq %rdi, %rdi -; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]] -; -; CHECK: cmpw $66, (%rdi) -; CHECK-NEXT: jne [[CLEANUP]] -; -; CHECK: movq 8(%rdi), %rdi -; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]] -; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]] -; CHECK-NEXT: cmpl $14, [[TMP]] -; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]] -; -; CHECK: movl $24599, [[TMP2:%e[a-z]+]] -; CHECK-NEXT: btl [[TMP]], [[TMP2]] -; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]] -; -; CHECK: [[CLEANUP]]: ## %cleanup -; DISABLE: popq -; CHECK-NEXT: retq -; -; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false -; CHECK: cmpl $134, %e[[BF_LOAD2]] -; CHECK-NEXT: je [[CLEANUP]] -; -; CHECK: cmpl $140, %e[[BF_LOAD2]] -; CHECK-NEXT: je [[CLEANUP]] -; -; ENABLE: pushq -; CHECK: callq _find_temp_slot_from_address -; CHECK-NEXT: testq %rax, %rax -; -; The adjustment must use LEA here (or be moved above the test). -; ENABLE-NEXT: leaq 8(%rsp), %rsp -; -; CHECK-NEXT: je [[CLEANUP]] -; -; CHECK: movb $1, 57(%rax) define void @useLEA(%struct.rtx_def* readonly %x) { +; ENABLE-LABEL: useLEA: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testq %rdi, %rdi +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.1: ## %if.end +; ENABLE-NEXT: cmpw $66, (%rdi) +; ENABLE-NEXT: jne LBB8_7 +; ENABLE-NEXT: ## %bb.2: ## %lor.lhs.false +; ENABLE-NEXT: movq 8(%rdi), %rdi +; ENABLE-NEXT: movzwl (%rdi), %eax +; ENABLE-NEXT: leal -54(%rax), %ecx +; ENABLE-NEXT: cmpl $14, %ecx +; ENABLE-NEXT: ja LBB8_3 +; ENABLE-NEXT: ## %bb.8: ## %lor.lhs.false +; ENABLE-NEXT: movl $24599, %edx ## imm = 0x6017 +; ENABLE-NEXT: btl %ecx, %edx +; ENABLE-NEXT: jae LBB8_3 +; ENABLE-NEXT: LBB8_7: ## %cleanup +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB8_3: ## %lor.lhs.false +; ENABLE-NEXT: cmpl $134, %eax +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.4: ## %lor.lhs.false +; ENABLE-NEXT: cmpl $140, %eax +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.5: ## %if.end.55 +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: callq _find_temp_slot_from_address +; ENABLE-NEXT: testq %rax, %rax +; ENABLE-NEXT: leaq {{[0-9]+}}(%rsp), %rsp +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.6: ## %if.then.60 +; ENABLE-NEXT: movb $1, 57(%rax) +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: useLEA: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: testq %rdi, %rdi +; DISABLE-NEXT: je LBB8_7 +; DISABLE-NEXT: ## %bb.1: ## %if.end +; DISABLE-NEXT: cmpw $66, (%rdi) +; DISABLE-NEXT: jne LBB8_7 +; DISABLE-NEXT: ## %bb.2: ## %lor.lhs.false +; DISABLE-NEXT: movq 8(%rdi), %rdi +; DISABLE-NEXT: movzwl (%rdi), %eax +; DISABLE-NEXT: leal -54(%rax), %ecx +; DISABLE-NEXT: cmpl $14, %ecx +; DISABLE-NEXT: ja LBB8_3 +; DISABLE-NEXT: ## %bb.8: ## %lor.lhs.false +; DISABLE-NEXT: movl $24599, %edx ## imm = 0x6017 +; DISABLE-NEXT: btl %ecx, %edx +; DISABLE-NEXT: jae LBB8_3 +; DISABLE-NEXT: LBB8_7: ## %cleanup +; DISABLE-NEXT: popq %rax +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB8_3: ## %lor.lhs.false +; DISABLE-NEXT: cmpl $134, %eax +; DISABLE-NEXT: je LBB8_7 +; DISABLE-NEXT: ## %bb.4: ## %lor.lhs.false +; DISABLE-NEXT: cmpl $140, %eax +; DISABLE-NEXT: je LBB8_7 +; DISABLE-NEXT: ## %bb.5: ## %if.end.55 +; DISABLE-NEXT: callq _find_temp_slot_from_address +; DISABLE-NEXT: testq %rax, %rax +; DISABLE-NEXT: je LBB8_7 +; DISABLE-NEXT: ## %bb.6: ## %if.then.60 +; DISABLE-NEXT: movb $1, 57(%rax) +; DISABLE-NEXT: popq %rax +; DISABLE-NEXT: retq entry: %cmp = icmp eq %struct.rtx_def* %x, null br i1 %cmp, label %cleanup, label %if.end @@ -594,26 +755,31 @@ ; Make sure we do not insert unreachable code after noreturn function. ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. -; -; CHECK-LABEL: noreturn: -; DISABLE: pushq -; -; CHECK: testb %dil, %dil -; CHECK-NEXT: jne [[ABORT:LBB[0-9_]+]] -; -; CHECK: movl $42, %eax -; -; DISABLE-NEXT: popq -; -; CHECK-NEXT: retq -; -; CHECK: [[ABORT]]: ## %if.abort -; -; ENABLE: pushq -; -; CHECK: callq _abort -; ENABLE-NOT: popq define i32 @noreturn(i8 signext %bad_thing) { +; ENABLE-LABEL: noreturn: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testb %dil, %dil +; ENABLE-NEXT: jne LBB9_2 +; ENABLE-NEXT: ## %bb.1: ## %if.end +; ENABLE-NEXT: movl $42, %eax +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB9_2: ## %if.abort +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: callq _abort +; +; DISABLE-LABEL: noreturn: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: testb %dil, %dil +; DISABLE-NEXT: jne LBB9_2 +; DISABLE-NEXT: ## %bb.1: ## %if.end +; DISABLE-NEXT: movl $42, %eax +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB9_2: ## %if.abort +; DISABLE-NEXT: callq _abort entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort @@ -639,9 +805,70 @@ ; should return gracefully and continue compilation. ; The only condition for this test is the compilation finishes correctly. ; -; CHECK-LABEL: infiniteloop -; CHECK: retq define void @infiniteloop() { +; ENABLE-LABEL: infiniteloop: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB10_3 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: pushq %rbp +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbp, -16 +; ENABLE-NEXT: movq %rsp, %rbp +; ENABLE-NEXT: .cfi_def_cfa_register %rbp +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: movq %rsp, %rcx +; ENABLE-NEXT: addq $-16, %rcx +; ENABLE-NEXT: movq %rcx, %rsp +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: leaq -8(%rbp), %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: popq %rbp +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB10_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: movl %eax, (%rcx) +; ENABLE-NEXT: jmp LBB10_2 +; ENABLE-NEXT: LBB10_3: ## %if.end +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: infiniteloop: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbp +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbp, -16 +; DISABLE-NEXT: movq %rsp, %rbp +; DISABLE-NEXT: .cfi_def_cfa_register %rbp +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB10_3 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: movq %rsp, %rcx +; DISABLE-NEXT: addq $-16, %rcx +; DISABLE-NEXT: movq %rcx, %rsp +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB10_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: movl %eax, (%rcx) +; DISABLE-NEXT: jmp LBB10_2 +; DISABLE-NEXT: LBB10_3: ## %if.end +; DISABLE-NEXT: leaq -8(%rbp), %rsp +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: popq %rbp +; DISABLE-NEXT: retq entry: br i1 undef, label %if.then, label %if.end @@ -661,9 +888,102 @@ } ; Another infinite loop test this time with a body bigger than just one block. -; CHECK-LABEL: infiniteloop2 -; CHECK: retq define void @infiniteloop2() { +; ENABLE-LABEL: infiniteloop2: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbp +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbp, -16 +; ENABLE-NEXT: movq %rsp, %rbp +; ENABLE-NEXT: .cfi_def_cfa_register %rbp +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB11_5 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: movq %rsp, %rcx +; ENABLE-NEXT: addq $-16, %rcx +; ENABLE-NEXT: movq %rcx, %rsp +; ENABLE-NEXT: xorl %edx, %edx +; ENABLE-NEXT: jmp LBB11_2 +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB11_4: ## %body2 +; ENABLE-NEXT: ## in Loop: Header=BB11_2 Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: LBB11_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: movl %edx, %esi +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %esi, %edx +; ENABLE-NEXT: movl %edx, (%rcx) +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB11_4 +; ENABLE-NEXT: ## %bb.3: ## %body1 +; ENABLE-NEXT: ## in Loop: Header=BB11_2 Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: jmp LBB11_2 +; ENABLE-NEXT: LBB11_5: ## %if.end +; ENABLE-NEXT: leaq -8(%rbp), %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: popq %rbp +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: infiniteloop2: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbp +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbp, -16 +; DISABLE-NEXT: movq %rsp, %rbp +; DISABLE-NEXT: .cfi_def_cfa_register %rbp +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB11_5 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: movq %rsp, %rcx +; DISABLE-NEXT: addq $-16, %rcx +; DISABLE-NEXT: movq %rcx, %rsp +; DISABLE-NEXT: xorl %edx, %edx +; DISABLE-NEXT: jmp LBB11_2 +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB11_4: ## %body2 +; DISABLE-NEXT: ## in Loop: Header=BB11_2 Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: LBB11_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: movl %edx, %esi +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %esi, %edx +; DISABLE-NEXT: movl %edx, (%rcx) +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB11_4 +; DISABLE-NEXT: ## %bb.3: ## %body1 +; DISABLE-NEXT: ## in Loop: Header=BB11_2 Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: jmp LBB11_2 +; DISABLE-NEXT: LBB11_5: ## %if.end +; DISABLE-NEXT: leaq -8(%rbp), %rsp +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: popq %rbp +; DISABLE-NEXT: retq entry: br i1 undef, label %if.then, label %if.end @@ -691,9 +1011,70 @@ } ; Another infinite loop test this time with two nested infinite loop. -; CHECK-LABEL: infiniteloop3 -; CHECK: retq define void @infiniteloop3() { +; ENABLE-LABEL: infiniteloop3: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB12_2 +; ENABLE-NEXT: ## %bb.1: ## %body +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB12_7 +; ENABLE-NEXT: LBB12_2: ## %loop2a.preheader +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: xorl %ecx, %ecx +; ENABLE-NEXT: movq %rax, %rsi +; ENABLE-NEXT: jmp LBB12_4 +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB12_3: ## %loop2b +; ENABLE-NEXT: ## in Loop: Header=BB12_4 Depth=1 +; ENABLE-NEXT: movq %rdx, (%rsi) +; ENABLE-NEXT: movq %rdx, %rsi +; ENABLE-NEXT: LBB12_4: ## %loop1 +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: movq %rcx, %rdx +; ENABLE-NEXT: testq %rax, %rax +; ENABLE-NEXT: movq (%rax), %rcx +; ENABLE-NEXT: jne LBB12_3 +; ENABLE-NEXT: ## %bb.5: ## in Loop: Header=BB12_4 Depth=1 +; ENABLE-NEXT: movq %rdx, %rax +; ENABLE-NEXT: movq %rdx, %rsi +; ENABLE-NEXT: jmp LBB12_4 +; ENABLE-NEXT: LBB12_7: ## %end +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: infiniteloop3: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB12_2 +; DISABLE-NEXT: ## %bb.1: ## %body +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB12_7 +; DISABLE-NEXT: LBB12_2: ## %loop2a.preheader +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: xorl %ecx, %ecx +; DISABLE-NEXT: movq %rax, %rsi +; DISABLE-NEXT: jmp LBB12_4 +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB12_3: ## %loop2b +; DISABLE-NEXT: ## in Loop: Header=BB12_4 Depth=1 +; DISABLE-NEXT: movq %rdx, (%rsi) +; DISABLE-NEXT: movq %rdx, %rsi +; DISABLE-NEXT: LBB12_4: ## %loop1 +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: movq %rcx, %rdx +; DISABLE-NEXT: testq %rax, %rax +; DISABLE-NEXT: movq (%rax), %rcx +; DISABLE-NEXT: jne LBB12_3 +; DISABLE-NEXT: ## %bb.5: ## in Loop: Header=BB12_4 Depth=1 +; DISABLE-NEXT: movq %rdx, %rax +; DISABLE-NEXT: movq %rdx, %rsi +; DISABLE-NEXT: jmp LBB12_4 +; DISABLE-NEXT: LBB12_7: ## %end +; DISABLE-NEXT: retq entry: br i1 undef, label %loop2a, label %body @@ -724,44 +1105,49 @@ ; Check that we just don't bail out on RegMask. ; In this case, the RegMask does not touch a CSR so we are good to go! -; CHECK-LABEL: regmask: -; -; Compare the arguments and jump to exit. -; No prologue needed. -; ENABLE: cmpl %esi, %edi -; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; (What we push does not matter. It should be some random sratch register.) -; CHECK: pushq -; -; Compare the arguments and jump to exit. -; After the prologue is set. -; DISABLE: cmpl %esi, %edi -; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] -; -; CHECK: nop -; Set the first argument to zero. -; CHECK: xorl %edi, %edi -; Set the second argument to addr. -; CHECK-NEXT: movq %rdx, %rsi -; CHECK-NEXT: callq _doSomething -; CHECK-NEXT: popq -; CHECK-NEXT: retq -; -; CHECK: [[EXIT_LABEL]]: -; Set the first argument to 6. -; CHECK-NEXT: movl $6, %edi -; Set the second argument to addr. -; CHECK-NEXT: movq %rdx, %rsi -; -; Without shrink-wrapping, we need to restore the stack before -; making the tail call. -; Epilogue code. -; DISABLE-NEXT: popq -; -; CHECK-NEXT: jmp _doSomething define i32 @regmask(i32 %a, i32 %b, i32* %addr) { +; ENABLE-LABEL: regmask: +; ENABLE: ## %bb.0: +; ENABLE-NEXT: cmpl %esi, %edi +; ENABLE-NEXT: jge LBB13_2 +; ENABLE-NEXT: ## %bb.1: ## %true +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %edi, %edi +; ENABLE-NEXT: movq %rdx, %rsi +; ENABLE-NEXT: callq _doSomething +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB13_2: ## %false +; ENABLE-NEXT: movl $6, %edi +; ENABLE-NEXT: movq %rdx, %rsi +; ENABLE-NEXT: jmp _doSomething ## TAILCALL +; +; DISABLE-LABEL: regmask: +; DISABLE: ## %bb.0: +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: cmpl %esi, %edi +; DISABLE-NEXT: jge LBB13_2 +; DISABLE-NEXT: ## %bb.1: ## %true +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %edi, %edi +; DISABLE-NEXT: movq %rdx, %rsi +; DISABLE-NEXT: callq _doSomething +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB13_2: ## %false +; DISABLE-NEXT: movl $6, %edi +; DISABLE-NEXT: movq %rdx, %rsi +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: jmp _doSomething ## TAILCALL %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false @@ -791,41 +1177,76 @@ ; Note: The registers may change in the following patterns, but ; because they imply register hierarchy (e.g., eax, al) this is ; tricky to write robust patterns. -; -; CHECK-LABEL: useLEAForPrologue: -; -; Prologue is at the beginning of the function when shrink-wrapping -; is disabled. -; DISABLE: pushq -; The stack adjustment can use SUB instr because we do not need to -; preserve the EFLAGS at this point. -; DISABLE-NEXT: subq $16, %rsp -; -; Load the value of b. -; Create the zero value for the select assignment. -; CHECK: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]] -; CHECK-NEXT: cmpb $0, _b(%rip) -; CHECK-NEXT: movl $48, [[IMM_VAL:%ecx]] -; CHECK-NEXT: cmovnel [[CMOVE_VAL]], [[IMM_VAL]] -; CHECK-NEXT: movb %cl, _c(%rip) -; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]] -; -; The code of the loop is not interesting. -; [...] -; -; CHECK: [[VARFUNC_CALL]]: -; Set the null parameter. -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: callq _varfunc -; -; Set the return value. -; CHECK-NEXT: xorl %eax, %eax -; -; Epilogue code. -; CHECK-NEXT: addq $16, %rsp -; CHECK-NEXT: popq -; CHECK-NEXT: retq define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 { +; ENABLE-LABEL: useLEAForPrologue: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: subq $16, %rsp +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: cmpb $0, {{.*}}(%rip) +; ENABLE-NEXT: movl $48, %ecx +; ENABLE-NEXT: cmovnel %eax, %ecx +; ENABLE-NEXT: movb %cl, {{.*}}(%rip) +; ENABLE-NEXT: je LBB14_4 +; ENABLE-NEXT: ## %bb.1: ## %for.body.lr.ph +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB14_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: cmpl %esi, %edi +; ENABLE-NEXT: setl %al +; ENABLE-NEXT: xorl %esi, %esi +; ENABLE-NEXT: movb %al, %sil +; ENABLE-NEXT: incb %dl +; ENABLE-NEXT: cmpb $45, %dl +; ENABLE-NEXT: jl LBB14_2 +; ENABLE-NEXT: ## %bb.3: ## %for.cond.for.end_crit_edge +; ENABLE-NEXT: movq _a@{{.*}}(%rip), %rax +; ENABLE-NEXT: movl %esi, (%rax) +; ENABLE-NEXT: LBB14_4: ## %for.end +; ENABLE-NEXT: xorl %edi, %edi +; ENABLE-NEXT: callq _varfunc +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: addq $16, %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: useLEAForPrologue: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: subq $16, %rsp +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: cmpb $0, {{.*}}(%rip) +; DISABLE-NEXT: movl $48, %ecx +; DISABLE-NEXT: cmovnel %eax, %ecx +; DISABLE-NEXT: movb %cl, {{.*}}(%rip) +; DISABLE-NEXT: je LBB14_4 +; DISABLE-NEXT: ## %bb.1: ## %for.body.lr.ph +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB14_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: cmpl %esi, %edi +; DISABLE-NEXT: setl %al +; DISABLE-NEXT: xorl %esi, %esi +; DISABLE-NEXT: movb %al, %sil +; DISABLE-NEXT: incb %dl +; DISABLE-NEXT: cmpb $45, %dl +; DISABLE-NEXT: jl LBB14_2 +; DISABLE-NEXT: ## %bb.3: ## %for.cond.for.end_crit_edge +; DISABLE-NEXT: movq _a@{{.*}}(%rip), %rax +; DISABLE-NEXT: movl %esi, (%rax) +; DISABLE-NEXT: LBB14_4: ## %for.end +; DISABLE-NEXT: xorl %edi, %edi +; DISABLE-NEXT: callq _varfunc +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: addq $16, %rsp +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tmp = alloca i3 %.b = load i1, i1* @b, align 1 @@ -866,24 +1287,48 @@ ; TLS calls used to be wrongly model and shrink-wrapping would have inserted ; the prologue and epilogue just around the call to doSomething. ; PR25820. -; -; CHECK-LABEL: tlsCall: -; CHECK: pushq -; CHECK: testb $1, %dil -; CHECK: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; master bb -; CHECK: movq _sum1@TLVP(%rip), %rdi -; CHECK-NEXT: callq *(%rdi) -; CHECK: jmp [[EXIT_LABEL:LBB[0-9_]+]] -; -; [[ELSE_LABEL]]: -; CHECK: callq _doSomething -; -; [[EXIT_LABEL]]: -; CHECK: popq -; CHECK-NEXT: retq define i32 @tlsCall(i1 %bool1, i32 %arg, i32* readonly dereferenceable(4) %sum1) #3 { +; ENABLE-LABEL: tlsCall: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: testb $1, %dil +; ENABLE-NEXT: je LBB15_2 +; ENABLE-NEXT: ## %bb.1: ## %master +; ENABLE-NEXT: movl (%rdx), %ecx +; ENABLE-NEXT: movq _sum1@{{.*}}(%rip), %rdi +; ENABLE-NEXT: callq *(%rdi) +; ENABLE-NEXT: movl %ecx, (%rax) +; ENABLE-NEXT: jmp LBB15_3 +; ENABLE-NEXT: LBB15_2: ## %else +; ENABLE-NEXT: xorl %edi, %edi +; ENABLE-NEXT: xorl %esi, %esi +; ENABLE-NEXT: callq _doSomething +; ENABLE-NEXT: movl %eax, %esi +; ENABLE-NEXT: LBB15_3: ## %exit +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: popq %rcx +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: tlsCall: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: testb $1, %dil +; DISABLE-NEXT: je LBB15_2 +; DISABLE-NEXT: ## %bb.1: ## %master +; DISABLE-NEXT: movl (%rdx), %ecx +; DISABLE-NEXT: movq _sum1@{{.*}}(%rip), %rdi +; DISABLE-NEXT: callq *(%rdi) +; DISABLE-NEXT: movl %ecx, (%rax) +; DISABLE-NEXT: jmp LBB15_3 +; DISABLE-NEXT: LBB15_2: ## %else +; DISABLE-NEXT: xorl %edi, %edi +; DISABLE-NEXT: xorl %esi, %esi +; DISABLE-NEXT: callq _doSomething +; DISABLE-NEXT: movl %eax, %esi +; DISABLE-NEXT: LBB15_3: ## %exit +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq entry: br i1 %bool1, label %master, label %else @@ -913,17 +1358,6 @@ ; and since we use that information to do the placement, we may end up ; inserting the prologue/epilogue at incorrect places. ; PR25988. -; -; CHECK-LABEL: irreducibleCFG: -; CHECK: %entry -; Make sure the prologue happens in the entry block. -; CHECK-NEXT: pushq -; ... -; Make sure the epilogue happens in the exit block. -; CHECK-NOT: popq -; CHECK: popq -; CHECK-NEXT: popq -; CHECK-NEXT: retq ; Make sure we emit missed optimization remarks for this. ; REMARKS: Pass: shrink-wrap ; REMARKS-NEXT: Name: UnsupportedIrreducibleCFG @@ -932,6 +1366,93 @@ ; REMARKS-NEXT: - String: Irreducible CFGs are not supported yet define i32 @irreducibleCFG() #4 { +; ENABLE-LABEL: irreducibleCFG: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbp +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbp, -16 +; ENABLE-NEXT: movq %rsp, %rbp +; ENABLE-NEXT: .cfi_def_cfa_register %rbp +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: movq _irreducibleCFGf@{{.*}}(%rip), %rax +; ENABLE-NEXT: cmpb $0, (%rax) +; ENABLE-NEXT: je LBB16_2 +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB16_1: ## %preheader +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: jmp LBB16_1 +; ENABLE-NEXT: LBB16_2: ## %split +; ENABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax +; ENABLE-NEXT: movl (%rax), %eax +; ENABLE-NEXT: testl %eax, %eax +; ENABLE-NEXT: je LBB16_3 +; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi +; ENABLE-NEXT: xorl %ebx, %ebx +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _something +; ENABLE-NEXT: jmp LBB16_5 +; ENABLE-NEXT: LBB16_3: +; ENABLE-NEXT: xorl %ebx, %ebx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB16_5: ## %for.inc +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: incl %ebx +; ENABLE-NEXT: cmpl $7, %ebx +; ENABLE-NEXT: jl LBB16_5 +; ENABLE-NEXT: ## %bb.6: ## %fn1.exit +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: addq $8, %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: popq %rbp +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: irreducibleCFG: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbp +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbp, -16 +; DISABLE-NEXT: movq %rsp, %rbp +; DISABLE-NEXT: .cfi_def_cfa_register %rbp +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: movq _irreducibleCFGf@{{.*}}(%rip), %rax +; DISABLE-NEXT: cmpb $0, (%rax) +; DISABLE-NEXT: je LBB16_2 +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB16_1: ## %preheader +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: jmp LBB16_1 +; DISABLE-NEXT: LBB16_2: ## %split +; DISABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax +; DISABLE-NEXT: movl (%rax), %eax +; DISABLE-NEXT: testl %eax, %eax +; DISABLE-NEXT: je LBB16_3 +; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi +; DISABLE-NEXT: xorl %ebx, %ebx +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _something +; DISABLE-NEXT: jmp LBB16_5 +; DISABLE-NEXT: LBB16_3: +; DISABLE-NEXT: xorl %ebx, %ebx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB16_5: ## %for.inc +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: incl %ebx +; DISABLE-NEXT: cmpl $7, %ebx +; DISABLE-NEXT: jl LBB16_5 +; DISABLE-NEXT: ## %bb.6: ## %fn1.exit +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: addq $8, %rsp +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: popq %rbp +; DISABLE-NEXT: retq entry: %i0 = load i32, i32* @irreducibleCFGa, align 4 %.pr = load i8, i8* @irreducibleCFGf, align 1 @@ -978,20 +1499,60 @@ ; loop, which can occur into a misplacement of the restore block, if we're ; looking for the nearest common post-dominator of an "unreachable" block. -; CHECK-LABEL: infiniteLoopNoSuccessor: -; CHECK: ## %bb.0: -; Make sure the prologue happens in the entry block. -; CHECK-NEXT: pushq %rbp -; ... -; Make sure we don't shrink-wrap. -; CHECK: ## %bb.1 -; CHECK-NOT: pushq %rbp -; ... -; Make sure the epilogue happens in the exit block. -; CHECK: ## %bb.5 -; CHECK: popq %rbp -; CHECK-NEXT: retq define void @infiniteLoopNoSuccessor() #5 { +; ENABLE-LABEL: infiniteLoopNoSuccessor: +; ENABLE: ## %bb.0: +; ENABLE-NEXT: pushq %rbp +; ENABLE-NEXT: movq %rsp, %rbp +; ENABLE-NEXT: movq _x@{{.*}}(%rip), %rax +; ENABLE-NEXT: cmpl $0, (%rax) +; ENABLE-NEXT: je LBB17_2 +; ENABLE-NEXT: ## %bb.1: +; ENABLE-NEXT: movl $0, (%rax) +; ENABLE-NEXT: LBB17_2: +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _somethingElse +; ENABLE-NEXT: movq _y@{{.*}}(%rip), %rax +; ENABLE-NEXT: cmpl $0, (%rax) +; ENABLE-NEXT: je LBB17_3 +; ENABLE-NEXT: ## %bb.5: +; ENABLE-NEXT: popq %rbp +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB17_3: +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _something +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB17_4: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _somethingElse +; ENABLE-NEXT: jmp LBB17_4 +; +; DISABLE-LABEL: infiniteLoopNoSuccessor: +; DISABLE: ## %bb.0: +; DISABLE-NEXT: pushq %rbp +; DISABLE-NEXT: movq %rsp, %rbp +; DISABLE-NEXT: movq _x@{{.*}}(%rip), %rax +; DISABLE-NEXT: cmpl $0, (%rax) +; DISABLE-NEXT: je LBB17_2 +; DISABLE-NEXT: ## %bb.1: +; DISABLE-NEXT: movl $0, (%rax) +; DISABLE-NEXT: LBB17_2: +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _somethingElse +; DISABLE-NEXT: movq _y@{{.*}}(%rip), %rax +; DISABLE-NEXT: cmpl $0, (%rax) +; DISABLE-NEXT: je LBB17_3 +; DISABLE-NEXT: ## %bb.5: +; DISABLE-NEXT: popq %rbp +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB17_3: +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _something +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB17_4: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _somethingElse +; DISABLE-NEXT: jmp LBB17_4 %1 = load i32, i32* @x, align 4 %2 = icmp ne i32 %1, 0 br i1 %2, label %3, label %4