Index: lib/CodeGen/ShrinkWrap.cpp =================================================================== --- lib/CodeGen/ShrinkWrap.cpp +++ lib/CodeGen/ShrinkWrap.cpp @@ -258,6 +258,16 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const { + /* This prevents premature stack poping when occurs a indirect stack + * access. It is overly agressive for the moment. + * TODO: - Obvious non-stack loads and store, such as global values, + * are known to not access the stack. + * - Further, data dependency and alias analysis can validate + * that load and stores never derive from the stack pointer. + */ + if (MI.mayLoadOrStore()) + return true; + if (MI.getOpcode() == FrameSetupOpcode || MI.getOpcode() == FrameDestroyOpcode) { LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -945,24 +945,24 @@ define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) { ; ENABLE-LABEL: stack_realign: ; ENABLE: ; %bb.0: -; ENABLE-NEXT: lsl w8, w0, w1 -; ENABLE-NEXT: cmp w0, w1 -; ENABLE-NEXT: lsl w9, w1, w0 -; ENABLE-NEXT: b.ge LBB13_2 -; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; ENABLE-NEXT: mov x29, sp -; ENABLE-NEXT: sub x1, sp, #16 ; =16 -; ENABLE-NEXT: and sp, x1, #0xffffffffffffffe0 +; ENABLE-NEXT: sub x9, sp, #16 ; =16 +; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 ; ENABLE-NEXT: .cfi_def_cfa w29, 16 ; ENABLE-NEXT: .cfi_offset w30, -8 ; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: lsl w8, w0, w1 +; ENABLE-NEXT: cmp w0, w1 +; ENABLE-NEXT: lsl w9, w1, w0 +; ENABLE-NEXT: b.ge LBB13_2 +; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: str w0, [sp] -; ENABLE-NEXT: mov sp, x29 -; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; ENABLE-NEXT: LBB13_2: ; %false ; ENABLE-NEXT: str w8, [x2] ; ENABLE-NEXT: str w9, [x3] +; ENABLE-NEXT: mov sp, x29 +; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: stack_realign: Index: test/CodeGen/AArch64/branch-relax-cbz.ll =================================================================== --- test/CodeGen/AArch64/branch-relax-cbz.ll +++ test/CodeGen/AArch64/branch-relax-cbz.ll @@ -5,6 +5,7 @@ define void @split_block_no_fallthrough(i64 %val) #0 { ; CHECK-LABEL: split_block_no_fallthrough: ; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-NEXT: cmn x0, #5 ; =5 ; CHECK-NEXT: b.le LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %b3 @@ -12,16 +13,15 @@ ; CHECK-NEXT: cbnz w8, LBB0_2 ; CHECK-NEXT: b LBB0_4 ; CHECK-NEXT: LBB0_2: ; %b8 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_3: ; %b2 -; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-NEXT: mov w0, #93 ; CHECK-NEXT: bl _extfunc -; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-NEXT: cbz w0, LBB0_4 -; CHECK-NEXT: b LBB0_2 +; CHECK-NEXT: cbnz w0, LBB0_2 ; CHECK-NEXT: LBB0_4: ; %b7 ; CHECK-NEXT: mov w0, #13 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: b _extfunc bb: %c0 = icmp sgt i64 %val, -5 Index: test/CodeGen/AArch64/pr37472.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/pr37472.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-none-eabi" + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define void @compiler_pop_stack(i32 %num) { +; CHECK-LABEL: compiler_pop_stack: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 // =64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: cmp w0, #2 // =2 +; CHECK-NEXT: b.lo .LBB0_5 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: str w0, [sp] +; CHECK-NEXT: .LBB0_2: // %while.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub w10, w9, #1 // =1 +; CHECK-NEXT: ldr w11, [x8, w10, uxtw #2] +; CHECK-NEXT: cbz w11, .LBB0_4 +; CHECK-NEXT: // %bb.3: // %if.then4 +; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: str w11, [x8, x10, lsl #2] +; CHECK-NEXT: cbnz w9, .LBB0_2 +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_4: // in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: mov w9, w10 +; CHECK-NEXT: cbnz w9, .LBB0_2 +; CHECK-NEXT: .LBB0_5: // %cleanup +; CHECK-NEXT: add sp, sp, #64 // =64 +; CHECK-NEXT: ret +entry: + %rstack = alloca [16 x i32], align 4 + %0 = bitcast [16 x i32]* %rstack to i8* + call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) + %cmp = icmp ult i32 %num, 2 + br i1 %cmp, label %cleanup, label %if.end + +if.end: + %arrayidx = getelementptr inbounds [16 x i32], [16 x i32]* %rstack, i64 0, i64 0 + store volatile i32 %num, i32* %arrayidx, align 4 + br label %while.body + +while.body: + %ptr.017 = phi i32 [ 1, %if.end ], [ %ptr.1, %if.end7 ] + %dec = add i32 %ptr.017, -1 + %idxprom = zext i32 %dec to i64 + %arrayidx2 = getelementptr inbounds [16 x i32], [16 x i32]* %rstack, i64 0, i64 %idxprom + %1 = load volatile i32, i32* %arrayidx2, align 4 + %cmp3 = icmp eq i32 %1, 0 + br i1 %cmp3, label %if.end7, label %if.then4 + +if.then4: + store volatile i32 %1, i32* %arrayidx2, align 4 + br label %if.end7 + +if.end7: + %ptr.1 = phi i32 [ %ptr.017, %if.then4 ], [ %dec, %while.body ] + %cmp1 = icmp eq i32 %ptr.1, 0 + br i1 %cmp1, label %cleanup, label %while.body + +cleanup: + call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) + ret void +} + +%struct.S = type { i32, i32 } +@__const.f.arr = private unnamed_addr constant [4 x i8] c"\01\02\03\04", align 1 + + +define i32 @f(%struct.S* nocapture, i32) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w1, #4 // =4 +; CHECK-NEXT: b.ls .LBB1_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: sub sp, sp, #16 // =16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: mov w9, #513 +; CHECK-NEXT: movk w9, #1027, lsl #16 +; CHECK-NEXT: mov w8, w1 +; CHECK-NEXT: str w9, [sp, #12] +; CHECK-NEXT: add x9, sp, #12 // =12 +; CHECK-NEXT: ldrb w10, [x9, x8] +; CHECK-NEXT: cmp w1, #2 // =2 +; CHECK-NEXT: str w10, [x0] +; CHECK-NEXT: b.hi .LBB1_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: ldrb w8, [x9, x8] +; CHECK-NEXT: str w8, [x0, #4] +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: add sp, sp, #16 // =16 +; CHECK-NEXT: ret + %3 = alloca [4 x i8], align 1 + %4 = icmp ugt i32 %1, 4 + br i1 %4, label %18, label %5 + +5: + %6 = getelementptr inbounds [4 x i8], [4 x i8]* %3, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %6) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %6, i8* align 1 getelementptr inbounds ([4 x i8], [4 x i8]* @__const.f.arr, i64 0, i64 0), i64 4, i1 true) + %7 = zext i32 %1 to i64 + %8 = getelementptr inbounds [4 x i8], [4 x i8]* %3, i64 0, i64 %7 + %9 = load volatile i8, i8* %8, align 1 + %10 = zext i8 %9 to i32 + %11 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 0 + store i32 %10, i32* %11, align 4 + %12 = icmp ult i32 %1, 3 + br i1 %12, label %13, label %17 + +13: + %14 = load volatile i8, i8* %8, align 1 + %15 = zext i8 %14 to i32 + %16 = getelementptr inbounds %struct.S, %struct.S* %0, i64 0, i32 1 + store i32 %15, i32* %16, align 4 + br label %17 + +17: + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %6) + br label %18 + +18: + %19 = phi i32 [ 0, %17 ], [ 1, %2 ] + ret i32 %19 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) + Index: test/CodeGen/AArch64/taildup-cfi.ll =================================================================== --- test/CodeGen/AArch64/taildup-cfi.ll +++ test/CodeGen/AArch64/taildup-cfi.ll @@ -33,8 +33,8 @@ store i32 0, i32* @f, align 4, !tbaa !2 br label %if.end -; DARWIN-NOT: Merging into block -; LINUX: Merging into block +; DARWIN: Merging into block +; LINUX: Merging into block if.end: ; preds = %entry.if.end_crit_edge, %if.then %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ 0, %if.then ] Index: test/CodeGen/ARM/arm-shrink-wrapping-linux.ll =================================================================== --- test/CodeGen/ARM/arm-shrink-wrapping-linux.ll +++ test/CodeGen/ARM/arm-shrink-wrapping-linux.ll @@ -17,6 +17,8 @@ define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) { ; ENABLE-LABEL: wrongUseOfPostDominate: ; ENABLE: @ %bb.0: @ %entry +; ENABLE-NEXT: .save {r11, lr} +; ENABLE-NEXT: push {r11, lr} ; ENABLE-NEXT: cmn r1, #1 ; ENABLE-NEXT: ble .LBB0_6 ; ENABLE-NEXT: @ %bb.1: @ %while.cond.preheader @@ -24,83 +26,81 @@ ; ENABLE-NEXT: beq .LBB0_5 ; ENABLE-NEXT: @ %bb.2: @ %while.cond.preheader ; ENABLE-NEXT: cmp r0, r2 -; ENABLE-NEXT: bhs .LBB0_5 -; ENABLE-NEXT: @ %bb.3: @ %while.body.preheader +; ENABLE-NEXT: pophs {r11, pc} ; ENABLE-NEXT: movw r12, :lower16:skip ; ENABLE-NEXT: sub r1, r1, #1 ; ENABLE-NEXT: movt r12, :upper16:skip -; ENABLE-NEXT: .LBB0_4: @ %while.body +; ENABLE-NEXT: .LBB0_3: @ %while.body ; ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: ldrb r3, [r0] ; ENABLE-NEXT: ldrb r3, [r12, r3] ; ENABLE-NEXT: add r0, r0, r3 ; ENABLE-NEXT: sub r3, r1, #1 ; ENABLE-NEXT: cmp r3, r1 -; ENABLE-NEXT: bxhs lr +; ENABLE-NEXT: bhs .LBB0_5 +; ENABLE-NEXT: @ %bb.4: @ %while.body +; ENABLE-NEXT: @ in Loop: Header=BB0_3 Depth=1 ; ENABLE-NEXT: cmp r0, r2 ; ENABLE-NEXT: mov r1, r3 -; ENABLE-NEXT: blo .LBB0_4 +; ENABLE-NEXT: blo .LBB0_3 ; ENABLE-NEXT: .LBB0_5: @ %if.end29 -; ENABLE-NEXT: bx lr -; ENABLE-NEXT: .LBB0_6: -; ENABLE-NEXT: .save {r11, lr} -; ENABLE-NEXT: push {r11, lr} -; ENABLE-NEXT: .LBB0_7: @ %while.cond2.outer +; ENABLE-NEXT: pop {r11, pc} +; ENABLE-NEXT: .LBB0_6: @ %while.cond2.outer ; ENABLE-NEXT: @ =>This Loop Header: Depth=1 -; ENABLE-NEXT: @ Child Loop BB0_8 Depth 2 -; ENABLE-NEXT: @ Child Loop BB0_15 Depth 2 +; ENABLE-NEXT: @ Child Loop BB0_7 Depth 2 +; ENABLE-NEXT: @ Child Loop BB0_14 Depth 2 ; ENABLE-NEXT: mov r3, r0 -; ENABLE-NEXT: .LBB0_8: @ %while.cond2 -; ENABLE-NEXT: @ Parent Loop BB0_7 Depth=1 +; ENABLE-NEXT: .LBB0_7: @ %while.cond2 +; ENABLE-NEXT: @ Parent Loop BB0_6 Depth=1 ; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 ; ENABLE-NEXT: add r1, r1, #1 ; ENABLE-NEXT: cmp r1, #1 -; ENABLE-NEXT: beq .LBB0_18 -; ENABLE-NEXT: @ %bb.9: @ %while.body4 -; ENABLE-NEXT: @ in Loop: Header=BB0_8 Depth=2 +; ENABLE-NEXT: beq .LBB0_17 +; ENABLE-NEXT: @ %bb.8: @ %while.body4 +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=2 ; ENABLE-NEXT: cmp r3, r2 -; ENABLE-NEXT: bls .LBB0_8 -; ENABLE-NEXT: @ %bb.10: @ %if.then7 -; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: bls .LBB0_7 +; ENABLE-NEXT: @ %bb.9: @ %if.then7 +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 ; ENABLE-NEXT: mov r0, r3 ; ENABLE-NEXT: ldrb r12, [r0, #-1]! ; ENABLE-NEXT: sxtb lr, r12 ; ENABLE-NEXT: cmn lr, #1 -; ENABLE-NEXT: bgt .LBB0_7 -; ENABLE-NEXT: @ %bb.11: @ %if.then7 -; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: bgt .LBB0_6 +; ENABLE-NEXT: @ %bb.10: @ %if.then7 +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 ; ENABLE-NEXT: cmp r0, r2 -; ENABLE-NEXT: bls .LBB0_7 -; ENABLE-NEXT: @ %bb.12: @ %land.rhs14.preheader -; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: bls .LBB0_6 +; ENABLE-NEXT: @ %bb.11: @ %land.rhs14.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 ; ENABLE-NEXT: cmn lr, #1 -; ENABLE-NEXT: bgt .LBB0_7 -; ENABLE-NEXT: @ %bb.13: @ %land.rhs14.preheader -; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: bgt .LBB0_6 +; ENABLE-NEXT: @ %bb.12: @ %land.rhs14.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 ; ENABLE-NEXT: cmp r12, #191 -; ENABLE-NEXT: bhi .LBB0_7 -; ENABLE-NEXT: @ %bb.14: @ %while.body24.preheader -; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: bhi .LBB0_6 +; ENABLE-NEXT: @ %bb.13: @ %while.body24.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 ; ENABLE-NEXT: sub r3, r3, #2 -; ENABLE-NEXT: .LBB0_15: @ %while.body24 -; ENABLE-NEXT: @ Parent Loop BB0_7 Depth=1 +; ENABLE-NEXT: .LBB0_14: @ %while.body24 +; ENABLE-NEXT: @ Parent Loop BB0_6 Depth=1 ; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 ; ENABLE-NEXT: mov r0, r3 ; ENABLE-NEXT: cmp r3, r2 -; ENABLE-NEXT: bls .LBB0_7 -; ENABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge -; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 +; ENABLE-NEXT: bls .LBB0_6 +; ENABLE-NEXT: @ %bb.15: @ %while.body24.land.rhs14_crit_edge +; ENABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 ; ENABLE-NEXT: mov r3, r0 ; ENABLE-NEXT: ldrsb lr, [r3], #-1 ; ENABLE-NEXT: cmn lr, #1 ; ENABLE-NEXT: uxtb r12, lr -; ENABLE-NEXT: bgt .LBB0_7 -; ENABLE-NEXT: @ %bb.17: @ %while.body24.land.rhs14_crit_edge -; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 +; ENABLE-NEXT: bgt .LBB0_6 +; ENABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge +; ENABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 ; ENABLE-NEXT: cmp r12, #192 -; ENABLE-NEXT: blo .LBB0_15 -; ENABLE-NEXT: b .LBB0_7 -; ENABLE-NEXT: .LBB0_18: +; ENABLE-NEXT: blo .LBB0_14 +; ENABLE-NEXT: b .LBB0_6 +; ENABLE-NEXT: .LBB0_17: ; ENABLE-NEXT: mov r0, r3 ; ENABLE-NEXT: pop {r11, pc} ; Index: test/CodeGen/ARM/arm-shrink-wrapping.ll =================================================================== --- test/CodeGen/ARM/arm-shrink-wrapping.ll +++ test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1760,201 +1760,102 @@ ; ; bl define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) "no-frame-pointer-elim"="true" { -; ARM-ENABLE-LABEL: debug_info: -; ARM-ENABLE: @ %bb.0: @ %bb -; ARM-ENABLE-NEXT: tst r2, #1 -; ARM-ENABLE-NEXT: beq LBB12_2 -; ARM-ENABLE-NEXT: @ %bb.1: @ %bb3 -; ARM-ENABLE-NEXT: push {r4, r7, lr} -; ARM-ENABLE-NEXT: add r7, sp, #4 -; ARM-ENABLE-NEXT: sub r4, sp, #16 -; ARM-ENABLE-NEXT: bfc r4, #0, #4 -; ARM-ENABLE-NEXT: mov sp, r4 -; ARM-ENABLE-NEXT: ldr r1, [r7, #8] -; ARM-ENABLE-NEXT: mov r2, r3 -; ARM-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] -; ARM-ENABLE-NEXT: vmov s16, r0 -; ARM-ENABLE-NEXT: mov r0, r3 -; ARM-ENABLE-NEXT: vmov d9, r3, r1 -; ARM-ENABLE-NEXT: mov r3, r1 -; ARM-ENABLE-NEXT: bl _pow -; ARM-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00 -; ARM-ENABLE-NEXT: mov r4, sp -; ARM-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 -; ARM-ENABLE-NEXT: vadd.f64 d16, d9, d16 -; ARM-ENABLE-NEXT: vcmpe.f32 s16, s0 -; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr -; ARM-ENABLE-NEXT: vmov d17, r0, r1 -; ARM-ENABLE-NEXT: vmov.f64 d18, d9 -; ARM-ENABLE-NEXT: vadd.f64 d17, d17, d17 -; ARM-ENABLE-NEXT: vmovgt.f64 d18, d16 -; ARM-ENABLE-NEXT: vcmp.f64 d18, d9 -; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr -; ARM-ENABLE-NEXT: vmovne.f64 d9, d17 -; ARM-ENABLE-NEXT: vcvt.f32.f64 s0, d9 -; ARM-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] -; ARM-ENABLE-NEXT: sub sp, r7, #4 -; ARM-ENABLE-NEXT: pop {r4, r7, lr} -; ARM-ENABLE-NEXT: vmov r0, s0 -; ARM-ENABLE-NEXT: bx lr -; ARM-ENABLE-NEXT: LBB12_2: -; ARM-ENABLE-NEXT: vldr s0, LCPI12_0 -; ARM-ENABLE-NEXT: vmov r0, s0 -; ARM-ENABLE-NEXT: bx lr -; ARM-ENABLE-NEXT: .p2align 2 -; ARM-ENABLE-NEXT: @ %bb.3: -; ARM-ENABLE-NEXT: .data_region -; ARM-ENABLE-NEXT: LCPI12_0: -; ARM-ENABLE-NEXT: .long 0 @ float 0 -; ARM-ENABLE-NEXT: .end_data_region -; -; ARM-DISABLE-LABEL: debug_info: -; ARM-DISABLE: @ %bb.0: @ %bb -; ARM-DISABLE-NEXT: push {r4, r7, lr} -; ARM-DISABLE-NEXT: add r7, sp, #4 -; ARM-DISABLE-NEXT: sub r4, sp, #16 -; ARM-DISABLE-NEXT: bfc r4, #0, #4 -; ARM-DISABLE-NEXT: mov sp, r4 -; ARM-DISABLE-NEXT: tst r2, #1 -; ARM-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128] -; ARM-DISABLE-NEXT: beq LBB12_2 -; ARM-DISABLE-NEXT: @ %bb.1: @ %bb3 -; ARM-DISABLE-NEXT: ldr r1, [r7, #8] -; ARM-DISABLE-NEXT: vmov s16, r0 -; ARM-DISABLE-NEXT: mov r0, r3 -; ARM-DISABLE-NEXT: mov r2, r3 -; ARM-DISABLE-NEXT: vmov d9, r3, r1 -; ARM-DISABLE-NEXT: mov r3, r1 -; ARM-DISABLE-NEXT: bl _pow -; ARM-DISABLE-NEXT: vmov.f32 s0, #1.000000e+00 -; ARM-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00 -; ARM-DISABLE-NEXT: vadd.f64 d16, d9, d16 -; ARM-DISABLE-NEXT: vcmpe.f32 s16, s0 -; ARM-DISABLE-NEXT: vmrs APSR_nzcv, fpscr -; ARM-DISABLE-NEXT: vmov d17, r0, r1 -; ARM-DISABLE-NEXT: vmov.f64 d18, d9 -; ARM-DISABLE-NEXT: vadd.f64 d17, d17, d17 -; ARM-DISABLE-NEXT: vmovgt.f64 d18, d16 -; ARM-DISABLE-NEXT: vcmp.f64 d18, d9 -; ARM-DISABLE-NEXT: vmrs APSR_nzcv, fpscr -; ARM-DISABLE-NEXT: vmovne.f64 d9, d17 -; ARM-DISABLE-NEXT: vcvt.f32.f64 s0, d9 -; ARM-DISABLE-NEXT: b LBB12_3 -; ARM-DISABLE-NEXT: LBB12_2: -; ARM-DISABLE-NEXT: vldr s0, LCPI12_0 -; ARM-DISABLE-NEXT: LBB12_3: @ %bb13 -; ARM-DISABLE-NEXT: mov r4, sp -; ARM-DISABLE-NEXT: vld1.64 {d8, d9}, [r4:128] -; ARM-DISABLE-NEXT: vmov r0, s0 -; ARM-DISABLE-NEXT: sub sp, r7, #4 -; ARM-DISABLE-NEXT: pop {r4, r7, pc} -; ARM-DISABLE-NEXT: .p2align 2 -; ARM-DISABLE-NEXT: @ %bb.4: -; ARM-DISABLE-NEXT: .data_region -; ARM-DISABLE-NEXT: LCPI12_0: -; ARM-DISABLE-NEXT: .long 0 @ float 0 -; ARM-DISABLE-NEXT: .end_data_region -; -; THUMB-ENABLE-LABEL: debug_info: -; THUMB-ENABLE: @ %bb.0: @ %bb -; THUMB-ENABLE-NEXT: lsls r1, r2, #31 -; THUMB-ENABLE-NEXT: beq LBB12_2 -; THUMB-ENABLE-NEXT: @ %bb.1: @ %bb3 -; THUMB-ENABLE-NEXT: push {r4, r7, lr} -; THUMB-ENABLE-NEXT: add r7, sp, #4 -; THUMB-ENABLE-NEXT: sub.w r4, sp, #16 -; THUMB-ENABLE-NEXT: bfc r4, #0, #4 -; THUMB-ENABLE-NEXT: mov sp, r4 -; THUMB-ENABLE-NEXT: ldr r1, [r7, #8] -; THUMB-ENABLE-NEXT: mov r2, r3 -; THUMB-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] -; THUMB-ENABLE-NEXT: vmov s16, r0 -; THUMB-ENABLE-NEXT: mov r0, r3 -; THUMB-ENABLE-NEXT: vmov d9, r3, r1 -; THUMB-ENABLE-NEXT: mov r3, r1 -; THUMB-ENABLE-NEXT: bl _pow -; THUMB-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00 -; THUMB-ENABLE-NEXT: mov r4, sp -; THUMB-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 -; THUMB-ENABLE-NEXT: vmov.f64 d18, d9 -; THUMB-ENABLE-NEXT: vcmpe.f32 s16, s0 -; THUMB-ENABLE-NEXT: vadd.f64 d16, d9, d16 -; THUMB-ENABLE-NEXT: vmrs APSR_nzcv, fpscr -; THUMB-ENABLE-NEXT: it gt -; THUMB-ENABLE-NEXT: vmovgt.f64 d18, d16 -; THUMB-ENABLE-NEXT: vcmp.f64 d18, d9 -; THUMB-ENABLE-NEXT: vmov d17, r0, r1 -; THUMB-ENABLE-NEXT: vmrs APSR_nzcv, fpscr -; THUMB-ENABLE-NEXT: vadd.f64 d17, d17, d17 -; THUMB-ENABLE-NEXT: it ne -; THUMB-ENABLE-NEXT: vmovne.f64 d9, d17 -; THUMB-ENABLE-NEXT: vcvt.f32.f64 s0, d9 -; THUMB-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] -; THUMB-ENABLE-NEXT: subs r4, r7, #4 -; THUMB-ENABLE-NEXT: mov sp, r4 -; THUMB-ENABLE-NEXT: pop.w {r4, r7, lr} -; THUMB-ENABLE-NEXT: vmov r0, s0 -; THUMB-ENABLE-NEXT: bx lr -; THUMB-ENABLE-NEXT: LBB12_2: -; THUMB-ENABLE-NEXT: vldr s0, LCPI12_0 -; THUMB-ENABLE-NEXT: vmov r0, s0 -; THUMB-ENABLE-NEXT: bx lr -; THUMB-ENABLE-NEXT: .p2align 2 -; THUMB-ENABLE-NEXT: @ %bb.3: -; THUMB-ENABLE-NEXT: .data_region -; THUMB-ENABLE-NEXT: LCPI12_0: -; THUMB-ENABLE-NEXT: .long 0 @ float 0 -; THUMB-ENABLE-NEXT: .end_data_region -; -; THUMB-DISABLE-LABEL: debug_info: -; THUMB-DISABLE: @ %bb.0: @ %bb -; THUMB-DISABLE-NEXT: push {r4, r7, lr} -; THUMB-DISABLE-NEXT: add r7, sp, #4 -; THUMB-DISABLE-NEXT: sub.w r4, sp, #16 -; THUMB-DISABLE-NEXT: bfc r4, #0, #4 -; THUMB-DISABLE-NEXT: mov sp, r4 -; THUMB-DISABLE-NEXT: lsls r1, r2, #31 -; THUMB-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128] -; THUMB-DISABLE-NEXT: beq LBB12_2 -; THUMB-DISABLE-NEXT: @ %bb.1: @ %bb3 -; THUMB-DISABLE-NEXT: ldr r1, [r7, #8] -; THUMB-DISABLE-NEXT: vmov s16, r0 -; THUMB-DISABLE-NEXT: mov r0, r3 -; THUMB-DISABLE-NEXT: mov r2, r3 -; THUMB-DISABLE-NEXT: vmov d9, r3, r1 -; THUMB-DISABLE-NEXT: mov r3, r1 -; THUMB-DISABLE-NEXT: bl _pow -; THUMB-DISABLE-NEXT: vmov.f32 s0, #1.000000e+00 -; THUMB-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00 -; THUMB-DISABLE-NEXT: vmov.f64 d18, d9 -; THUMB-DISABLE-NEXT: vcmpe.f32 s16, s0 -; THUMB-DISABLE-NEXT: vadd.f64 d16, d9, d16 -; THUMB-DISABLE-NEXT: vmrs APSR_nzcv, fpscr -; THUMB-DISABLE-NEXT: it gt -; THUMB-DISABLE-NEXT: vmovgt.f64 d18, d16 -; THUMB-DISABLE-NEXT: vcmp.f64 d18, d9 -; THUMB-DISABLE-NEXT: vmov d17, r0, r1 -; THUMB-DISABLE-NEXT: vmrs APSR_nzcv, fpscr -; THUMB-DISABLE-NEXT: vadd.f64 d17, d17, d17 -; THUMB-DISABLE-NEXT: it ne -; THUMB-DISABLE-NEXT: vmovne.f64 d9, d17 -; THUMB-DISABLE-NEXT: vcvt.f32.f64 s0, d9 -; THUMB-DISABLE-NEXT: b LBB12_3 -; THUMB-DISABLE-NEXT: LBB12_2: -; THUMB-DISABLE-NEXT: vldr s0, LCPI12_0 -; THUMB-DISABLE-NEXT: LBB12_3: @ %bb13 -; THUMB-DISABLE-NEXT: mov r4, sp -; THUMB-DISABLE-NEXT: vld1.64 {d8, d9}, [r4:128] -; THUMB-DISABLE-NEXT: subs r4, r7, #4 -; THUMB-DISABLE-NEXT: vmov r0, s0 -; THUMB-DISABLE-NEXT: mov sp, r4 -; THUMB-DISABLE-NEXT: pop {r4, r7, pc} -; THUMB-DISABLE-NEXT: .p2align 2 -; THUMB-DISABLE-NEXT: @ %bb.4: -; THUMB-DISABLE-NEXT: .data_region -; THUMB-DISABLE-NEXT: LCPI12_0: -; THUMB-DISABLE-NEXT: .long 0 @ float 0 -; THUMB-DISABLE-NEXT: .end_data_region +; ARM-LABEL: debug_info: +; ARM: @ %bb.0: @ %bb +; ARM-NEXT: push {r4, r7, lr} +; ARM-NEXT: add r7, sp, #4 +; ARM-NEXT: sub r4, sp, #16 +; ARM-NEXT: bfc r4, #0, #4 +; ARM-NEXT: mov sp, r4 +; ARM-NEXT: tst r2, #1 +; ARM-NEXT: vst1.64 {d8, d9}, [r4:128] +; ARM-NEXT: beq LBB12_2 +; ARM-NEXT: @ %bb.1: @ %bb3 +; ARM-NEXT: ldr r1, [r7, #8] +; ARM-NEXT: vmov s16, r0 +; ARM-NEXT: mov r0, r3 +; ARM-NEXT: mov r2, r3 +; ARM-NEXT: vmov d9, r3, r1 +; ARM-NEXT: mov r3, r1 +; ARM-NEXT: bl _pow +; ARM-NEXT: vmov.f32 s0, #1.000000e+00 +; ARM-NEXT: vmov.f64 d16, #1.000000e+00 +; ARM-NEXT: vadd.f64 d16, d9, d16 +; ARM-NEXT: vcmpe.f32 s16, s0 +; ARM-NEXT: vmrs APSR_nzcv, fpscr +; ARM-NEXT: vmov d17, r0, r1 +; ARM-NEXT: vmov.f64 d18, d9 +; ARM-NEXT: vadd.f64 d17, d17, d17 +; ARM-NEXT: vmovgt.f64 d18, d16 +; ARM-NEXT: vcmp.f64 d18, d9 +; ARM-NEXT: vmrs APSR_nzcv, fpscr +; ARM-NEXT: vmovne.f64 d9, d17 +; ARM-NEXT: vcvt.f32.f64 s0, d9 +; ARM-NEXT: b LBB12_3 +; ARM-NEXT: LBB12_2: +; ARM-NEXT: vldr s0, LCPI12_0 +; ARM-NEXT: LBB12_3: @ %bb13 +; ARM-NEXT: mov r4, sp +; ARM-NEXT: vld1.64 {d8, d9}, [r4:128] +; ARM-NEXT: vmov r0, s0 +; ARM-NEXT: sub sp, r7, #4 +; ARM-NEXT: pop {r4, r7, pc} +; ARM-NEXT: .p2align 2 +; ARM-NEXT: @ %bb.4: +; ARM-NEXT: .data_region +; ARM-NEXT: LCPI12_0: +; ARM-NEXT: .long 0 @ float 0 +; ARM-NEXT: .end_data_region +; +; THUMB-LABEL: debug_info: +; THUMB: @ %bb.0: @ %bb +; THUMB-NEXT: push {r4, r7, lr} +; THUMB-NEXT: add r7, sp, #4 +; THUMB-NEXT: sub.w r4, sp, #16 +; THUMB-NEXT: bfc r4, #0, #4 +; THUMB-NEXT: mov sp, r4 +; THUMB-NEXT: lsls r1, r2, #31 +; THUMB-NEXT: vst1.64 {d8, d9}, [r4:128] +; THUMB-NEXT: beq LBB12_2 +; THUMB-NEXT: @ %bb.1: @ %bb3 +; THUMB-NEXT: ldr r1, [r7, #8] +; THUMB-NEXT: vmov s16, r0 +; THUMB-NEXT: mov r0, r3 +; THUMB-NEXT: mov r2, r3 +; THUMB-NEXT: vmov d9, r3, r1 +; THUMB-NEXT: mov r3, r1 +; THUMB-NEXT: bl _pow +; THUMB-NEXT: vmov.f32 s0, #1.000000e+00 +; THUMB-NEXT: vmov.f64 d16, #1.000000e+00 +; THUMB-NEXT: vmov.f64 d18, d9 +; THUMB-NEXT: vcmpe.f32 s16, s0 +; THUMB-NEXT: vadd.f64 d16, d9, d16 +; THUMB-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-NEXT: it gt +; THUMB-NEXT: vmovgt.f64 d18, d16 +; THUMB-NEXT: vcmp.f64 d18, d9 +; THUMB-NEXT: vmov d17, r0, r1 +; THUMB-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-NEXT: vadd.f64 d17, d17, d17 +; THUMB-NEXT: it ne +; THUMB-NEXT: vmovne.f64 d9, d17 +; THUMB-NEXT: vcvt.f32.f64 s0, d9 +; THUMB-NEXT: b LBB12_3 +; THUMB-NEXT: LBB12_2: +; THUMB-NEXT: vldr s0, LCPI12_0 +; THUMB-NEXT: LBB12_3: @ %bb13 +; THUMB-NEXT: mov r4, sp +; THUMB-NEXT: vld1.64 {d8, d9}, [r4:128] +; THUMB-NEXT: subs r4, r7, #4 +; THUMB-NEXT: vmov r0, s0 +; THUMB-NEXT: mov sp, r4 +; THUMB-NEXT: pop {r4, r7, pc} +; THUMB-NEXT: .p2align 2 +; THUMB-NEXT: @ %bb.4: +; THUMB-NEXT: .data_region +; THUMB-NEXT: LCPI12_0: +; THUMB-NEXT: .long 0 @ float 0 +; THUMB-NEXT: .end_data_region bb: br i1 %or.cond, label %bb3, label %bb13 Index: test/CodeGen/Thumb/thumb-shrink-wrapping.ll =================================================================== --- test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -1445,53 +1445,47 @@ define i1 @beq_to_bx(i32* %y, i32 %head) { ; ENABLE-V4T-LABEL: beq_to_bx: ; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 ; ENABLE-V4T-NEXT: movs r2, r0 ; ENABLE-V4T-NEXT: movs r0, #1 ; ENABLE-V4T-NEXT: cmp r2, #0 ; ENABLE-V4T-NEXT: beq LBB11_3 ; ENABLE-V4T-NEXT: @ %bb.1: @ %if.end -; ENABLE-V4T-NEXT: push {r4, lr} -; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 -; ENABLE-V4T-NEXT: .cfi_offset lr, -4 -; ENABLE-V4T-NEXT: .cfi_offset r4, -8 ; ENABLE-V4T-NEXT: ldr r3, [r2] ; ENABLE-V4T-NEXT: lsls r4, r3, #30 -; ENABLE-V4T-NEXT: ldr r4, [sp, #4] -; ENABLE-V4T-NEXT: mov lr, r4 -; ENABLE-V4T-NEXT: pop {r4} -; ENABLE-V4T-NEXT: add sp, #4 ; ENABLE-V4T-NEXT: bpl LBB11_3 ; ENABLE-V4T-NEXT: @ %bb.2: @ %if.end4 ; ENABLE-V4T-NEXT: str r1, [r2] ; ENABLE-V4T-NEXT: str r3, [r2] ; ENABLE-V4T-NEXT: movs r0, #0 ; ENABLE-V4T-NEXT: LBB11_3: @ %cleanup -; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; ENABLE-V5T-LABEL: beq_to_bx: ; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 ; ENABLE-V5T-NEXT: movs r2, r0 ; ENABLE-V5T-NEXT: movs r0, #1 ; ENABLE-V5T-NEXT: cmp r2, #0 ; ENABLE-V5T-NEXT: beq LBB11_3 ; ENABLE-V5T-NEXT: @ %bb.1: @ %if.end -; ENABLE-V5T-NEXT: push {r4, lr} -; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 -; ENABLE-V5T-NEXT: .cfi_offset lr, -4 -; ENABLE-V5T-NEXT: .cfi_offset r4, -8 ; ENABLE-V5T-NEXT: ldr r3, [r2] ; ENABLE-V5T-NEXT: lsls r4, r3, #30 -; ENABLE-V5T-NEXT: ldr r4, [sp, #4] -; ENABLE-V5T-NEXT: mov lr, r4 -; ENABLE-V5T-NEXT: pop {r4} -; ENABLE-V5T-NEXT: add sp, #4 ; ENABLE-V5T-NEXT: bpl LBB11_3 ; ENABLE-V5T-NEXT: @ %bb.2: @ %if.end4 ; ENABLE-V5T-NEXT: str r1, [r2] ; ENABLE-V5T-NEXT: str r3, [r2] ; ENABLE-V5T-NEXT: movs r0, #0 ; ENABLE-V5T-NEXT: LBB11_3: @ %cleanup -; ENABLE-V5T-NEXT: bx lr +; ENABLE-V5T-NEXT: pop {r4, pc} ; ; DISABLE-V4T-LABEL: beq_to_bx: ; DISABLE-V4T: @ %bb.0: @ %entry Index: test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll =================================================================== --- test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll +++ test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll @@ -9,6 +9,7 @@ define i32 @main() nounwind { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq {{.*}}(%rip), %rax ; CHECK-NEXT: sbbb %al, %al @@ -21,7 +22,6 @@ ; CHECK-NEXT: .LBB0_1: # %entry.if.end_crit_edge ; CHECK-NEXT: movl {{.*}}(%rip), %esi ; CHECK-NEXT: .LBB0_3: # %if.end -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movl $.L.str, %edi ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq printf Index: test/CodeGen/X86/MachineSink-eflags.ll =================================================================== --- test/CodeGen/X86/MachineSink-eflags.ll +++ test/CodeGen/X86/MachineSink-eflags.ll @@ -14,6 +14,7 @@ define void @foo(i8* nocapture %_stubArgs) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $152, %rsp ; CHECK-NEXT: movq 48(%rdi), %rax ; CHECK-NEXT: movl 64(%rdi), %edx ; CHECK-NEXT: movl $200, %esi @@ -29,14 +30,14 @@ ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: jmp .LBB0_5 ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: movaps (%rax,%rcx), %xmm1 -; CHECK-NEXT: .LBB0_3: # %entry -; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rsp ; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: jne .LBB0_5 -; CHECK-NEXT: # %bb.4: # %entry +; CHECK-NEXT: .LBB0_4: # %entry ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: .LBB0_5: # %entry ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -78,6 +78,7 @@ define i1 @test4() nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movsbl {{.*}}(%rip), %edx ; CHECK-NEXT: movzbl %dl, %ecx ; CHECK-NEXT: shrl $7, %ecx @@ -90,7 +91,6 @@ ; CHECK-NEXT: # %bb.1: # %bb.i.i.i ; CHECK-NEXT: movb {{.*}}(%rip), %cl ; CHECK-NEXT: .LBB3_2: # %func_4.exit.i -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: setne %bl Index: test/CodeGen/X86/copy-eflags.ll =================================================================== --- test/CodeGen/X86/copy-eflags.ll +++ test/CodeGen/X86/copy-eflags.ll @@ -43,6 +43,7 @@ ; ; X64-LABEL: test1: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax ; X64-NEXT: movb {{.*}}(%rip), %cl ; X64-NEXT: leal 1(%rcx), %eax ; X64-NEXT: movb %al, {{.*}}(%rip) @@ -56,12 +57,11 @@ ; X64-NEXT: testb %dl, %dl ; X64-NEXT: jne .LBB0_2 ; X64-NEXT: # %bb.1: # %if.then -; X64-NEXT: pushq %rax ; X64-NEXT: movsbl %al, %edi ; X64-NEXT: callq external -; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB0_2: # %if.end ; X64-NEXT: xorl %eax, %eax +; X64-NEXT: popq %rcx ; X64-NEXT: retq entry: %bval = load i8, i8* @b Index: test/CodeGen/X86/fold-pcmpeqd-2.ll =================================================================== --- test/CodeGen/X86/fold-pcmpeqd-2.ll +++ test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -17,16 +17,18 @@ define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind { ; X32-LABEL: program_1: ; X32: ## %bb.0: ## %entry +; X32-NEXT: pushl %esi +; X32-NEXT: subl $88, %esp ; X32-NEXT: cmpl $0, 0 ; X32-NEXT: jle LBB0_2 ; X32-NEXT: ## %bb.1: ## %forcond ; X32-NEXT: cmpl $0, 0 ; X32-NEXT: jg LBB0_3 ; X32-NEXT: LBB0_2: ## %ifthen +; X32-NEXT: addl $88, %esp +; X32-NEXT: popl %esi ; X32-NEXT: retl ; X32-NEXT: LBB0_3: ## %forbody -; X32-NEXT: pushl %esi -; X32-NEXT: subl $88, %esp ; X32-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2] ; X32-NEXT: minps LCPI0_3, %xmm1 ; X32-NEXT: cvttps2dq %xmm1, %xmm0 @@ -99,16 +101,18 @@ ; ; X64-LABEL: program_1: ; X64: ## %bb.0: ## %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $64, %rsp ; X64-NEXT: cmpl $0, 0 ; X64-NEXT: jle LBB0_2 ; X64-NEXT: ## %bb.1: ## %forcond ; X64-NEXT: cmpl $0, 0 ; X64-NEXT: jg LBB0_3 ; X64-NEXT: LBB0_2: ## %ifthen +; X64-NEXT: addq $64, %rsp +; X64-NEXT: popq %rbx ; X64-NEXT: retq ; X64-NEXT: LBB0_3: ## %forbody -; X64-NEXT: pushq %rbx -; X64-NEXT: subq $64, %rsp ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2] Index: test/CodeGen/X86/i386-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/i386-shrink-wrapping.ll +++ test/CodeGen/X86/i386-shrink-wrapping.ll @@ -20,6 +20,8 @@ define i32 @eflagsLiveInPrologue() #0 { ; ENABLE-LABEL: eflagsLiveInPrologue: ; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushl %esi +; ENABLE-NEXT: subl $8, %esp ; ENABLE-NEXT: movl L_a$non_lazy_ptr, %eax ; ENABLE-NEXT: cmpl $0, (%eax) ; ENABLE-NEXT: je LBB0_2 @@ -35,8 +37,6 @@ ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: jmp LBB0_3 ; ENABLE-NEXT: LBB0_4: ## %for.end -; ENABLE-NEXT: pushl %esi -; ENABLE-NEXT: subl $8, %esp ; ENABLE-NEXT: xorl %edx, %edx ; ENABLE-NEXT: cmpb $0, _d ; ENABLE-NEXT: movl $6, %ecx Index: test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll =================================================================== --- test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll +++ test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll @@ -9,6 +9,10 @@ define void @fn1() nounwind uwtable { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $4136, %eax # imm = 0x1028 +; CHECK-NEXT: callq ___chkstk_ms +; CHECK-NEXT: subq %rax, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 4144 ; CHECK-NEXT: movl {{.*}}(%rip), %eax ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne .LBB0_2 @@ -20,12 +24,6 @@ ; CHECK-NEXT: shrq $32, %rax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: .LBB0_2: # %select.end -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl $4128, %eax # imm = 0x1020 -; CHECK-NEXT: callq ___chkstk_ms -; CHECK-NEXT: subq %rax, %rsp -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: .cfi_def_cfa_offset 4144 ; CHECK-NEXT: movl %eax, {{.*}}(%rip) ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: # kill: def $ecx killed $ecx killed $rcx Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -639,6 +639,8 @@ define void @useLEA(%struct.rtx_def* readonly %x) { ; ENABLE-LABEL: useLEA: ; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 ; ENABLE-NEXT: testq %rdi, %rdi ; ENABLE-NEXT: je LBB8_7 ; ENABLE-NEXT: ## %bb.1: ## %if.end @@ -655,6 +657,7 @@ ; ENABLE-NEXT: btl %ecx, %edx ; ENABLE-NEXT: jae LBB8_3 ; ENABLE-NEXT: LBB8_7: ## %cleanup +; ENABLE-NEXT: popq %rax ; ENABLE-NEXT: retq ; ENABLE-NEXT: LBB8_3: ## %lor.lhs.false ; ENABLE-NEXT: cmpl $134, %eax @@ -663,14 +666,12 @@ ; ENABLE-NEXT: cmpl $140, %eax ; ENABLE-NEXT: je LBB8_7 ; ENABLE-NEXT: ## %bb.5: ## %if.end.55 -; ENABLE-NEXT: pushq %rax -; ENABLE-NEXT: .cfi_def_cfa_offset 16 ; ENABLE-NEXT: callq _find_temp_slot_from_address ; ENABLE-NEXT: testq %rax, %rax -; ENABLE-NEXT: leaq {{[0-9]+}}(%rsp), %rsp ; ENABLE-NEXT: je LBB8_7 ; ENABLE-NEXT: ## %bb.6: ## %if.then.60 ; ENABLE-NEXT: movb $1, 57(%rax) +; ENABLE-NEXT: popq %rax ; ENABLE-NEXT: retq ; ; DISABLE-LABEL: useLEA: @@ -808,10 +809,6 @@ define void @infiniteloop() { ; ENABLE-LABEL: infiniteloop: ; ENABLE: ## %bb.0: ## %entry -; ENABLE-NEXT: xorl %eax, %eax -; ENABLE-NEXT: testb %al, %al -; ENABLE-NEXT: jne LBB10_3 -; ENABLE-NEXT: ## %bb.1: ## %if.then ; ENABLE-NEXT: pushq %rbp ; ENABLE-NEXT: .cfi_def_cfa_offset 16 ; ENABLE-NEXT: .cfi_offset %rbp, -16 @@ -820,15 +817,16 @@ ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB10_3 +; ENABLE-NEXT: ## %bb.1: ## %if.then ; ENABLE-NEXT: movq %rsp, %rcx ; ENABLE-NEXT: addq $-16, %rcx ; ENABLE-NEXT: movq %rcx, %rsp ; ENABLE-NEXT: ## InlineAsm Start ; ENABLE-NEXT: movl $1, %edx ; ENABLE-NEXT: ## InlineAsm End -; ENABLE-NEXT: leaq -8(%rbp), %rsp -; ENABLE-NEXT: popq %rbx -; ENABLE-NEXT: popq %rbp ; ENABLE-NEXT: .p2align 4, 0x90 ; ENABLE-NEXT: LBB10_2: ## %for.body ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 @@ -836,6 +834,9 @@ ; ENABLE-NEXT: movl %eax, (%rcx) ; ENABLE-NEXT: jmp LBB10_2 ; ENABLE-NEXT: LBB10_3: ## %if.end +; ENABLE-NEXT: leaq -8(%rbp), %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: popq %rbp ; ENABLE-NEXT: retq ; ; DISABLE-LABEL: infiniteloop: Index: test/CodeGen/X86/xchg-nofold.ll =================================================================== --- test/CodeGen/X86/xchg-nofold.ll +++ test/CodeGen/X86/xchg-nofold.ll @@ -9,6 +9,7 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(%"struct.std::atomic"* nocapture dereferenceable(1) %a, i1 returned zeroext %b) nounwind { ; CHECK-LABEL: _Z3fooRSt6atomicIbEb: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: movq %rdi, %rcx ; CHECK-NEXT: shrq $3, %rcx @@ -24,9 +25,9 @@ ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: xchgb %cl, (%rdi) ; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __asan_report_store1 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP