Index: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2545,6 +2545,5 @@ bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { if (MF.getInfo()->shrinkWrapDisabled()) return false; - return (MF.getSubtarget().isSVR4ABI() && - MF.getSubtarget().isPPC64()); + return !MF.getSubtarget().is32BitELFABI(); } Index: llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll +++ llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll @@ -1,5 +1,10 @@ -; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-COM --check-prefix=ENABLE-COM --check-prefix=CHECK-LINUX --check-prefix=ENABLE-LINUX +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-COM --check-prefix=DISABLE-COM --check-prefix=CHECK-LINUX --check-prefix=DISABLE-LINUX +; RUN: llc -mtriple=powerpc-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr4 %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-COM --check-prefix=ENABLE-COM --check-prefix=CHECK-AIX --check-prefix=ENABLE-AIX --check-prefix=CHECK-32AIX --check-prefix=ENABLE-32AIX +; RUN: llc -mtriple=powerpc-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-COM --check-prefix=DISABLE-COM --check-prefix=CHECK-AIX --check-prefix=DISABLE-AIX --check-prefix=CHECK-32AIX --check-prefix=DISABLE-32AIX +; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr4 %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-COM --check-prefix=ENABLE-COM --check-prefix=CHECK-AIX --check-prefix=ENABLE-AIX --check-prefix=CHECK-64AIX --check-prefix=ENABLE-64AIX +; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-COM --check-prefix=DISABLE-COM --check-prefix=CHECK-AIX --check-prefix=DISABLE-AIX --check-prefix=CHECK-64AIX --check-prefix=DISABLE-64AIX +; ; ; Note: Lots of tests use inline asm instead of regular calls. ; This allows to have a better control on what the allocation will do. @@ -10,44 +15,48 @@ ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: +; CHECK-LINUX-LABEL: foo: +; CHECK-AIX-LABEL: .foo: ; ; Compare the arguments and return ; No prologue needed. -; ENABLE: cmpw 3, 4 -; ENABLE-NEXT: bgelr 0 +; ENABLE-COM: cmpw 3, 4 +; ENABLE-COM-NEXT: bgelr 0 +; XENABLE-AIX-NEXT: bge 0, L..BB0_2 ; ; Prologue code. ; At a minimum, we save/restore the link register. Other registers may be saved -; as well. -; CHECK: mflr +; as well. +; CHECK-LINUX: mflr ; ; Compare the arguments and jump to exit. ; After the prologue is set. -; DISABLE: cmpw 3, 4 -; DISABLE-NEXT: bge 0, .[[EXIT_LABEL:LBB[0-9_]+]] +; DISABLE-COM: cmpw 3, 4 +; DISABLE-LINUX-NEXT: bge 0, .[[EXIT_LABEL:LBB[0-9_]+]] +; DISABLE-AIX-NEXT: bge 0, L..[[EXIT_LABEL:BB[0-9_]+]] ; ; Store %a on the stack -; CHECK: stw 3, {{[0-9]+([0-9]+)}} +; CHECK-COM: stw 3, {{[0-9]+([0-9]+)}} ; Set the alloca address in the second argument. -; CHECK-NEXT: addi 4, 1, {{[0-9]+}} +; CHECK-COM-NEXT: addi 4, 1, {{[0-9]+}} ; Set the first argument to zero. -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: bl doSomething +; CHECK-COM-NEXT: li 3, 0 +; CHECK-LINUX-NEXT: bl doSomething +; CHECK-AIX-NEXT: bl .doSomething[PR] ; ; With shrink-wrapping, epilogue is just after the call. ; Restore the link register and return. -; Note that there could be other epilog code before the link register is +; Note that there could be other epilog code before the link register is ; restored but we will not check for it here. -; ENABLE: mtlr -; ENABLE-NEXT: blr +; ENABLE-COM: mtlr +; ENABLE-COM-NEXT: blr ; -; DISABLE: [[EXIT_LABEL]]: +; DISABLE-COM: [[EXIT_LABEL]]: ; ; Without shrink-wrapping, epilogue is in the exit block. ; Epilogue code. (What we pop does not matter.) -; DISABLE: mtlr {{[0-9]+}} -; DISABLE-NEXT: blr +; DISABLE-COM: mtlr {{[0-9]+}} +; DISABLE-COM-NEXT: blr ; define i32 @foo(i32 %a, i32 %b) { @@ -72,50 +81,62 @@ ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: +; CHECK-LINUX-LABEL: freqSaveAndRestoreOutsideLoop: +; CHECK-AIX-LABEL: .freqSaveAndRestoreOutsideLoop: ; ; Shrink-wrapping allows to skip the prologue in the else case. -; ENABLE: cmplwi 3, 0 -; ENABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-COM: cmplwi 3, 0 +; ENABLE-LINUX: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-AIX: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] ; ; Prologue code. ; Make sure we save the link register -; CHECK: mflr {{[0-9]+}} +; CHECK-LINUX: mflr {{[0-9]+}} ; -; DISABLE: cmplwi 3, 0 -; DISABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; DISABLE-COM: cmplwi 3, 0 +; DISABLE-LINUX: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; DISABLE-AIX: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] ; ; Loop preheader -; CHECK-DAG: li [[SUM:[0-9]+]], 0 -; CHECK-DAG: li [[IV:[0-9]+]], 10 -; +; CHECK-COM-DAG: li [[SUM:[0-9]+]], 0 +; CHECK-COM-DAG: li [[IV:[0-9]+]], 10 +; ; Loop body -; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body -; CHECK: bl something -; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-NEXT: cmplwi [[IV]], 0 -; CHECK-NEXT: bne 0, .[[LOOP]] +; CHECK-LINUX: .[[LOOP:LBB[0-9_]+]]: # %for.body +; CHECK-LINUX: bl something +; +; CHECK-AIX: L..[[LOOP:BB[0-9_]+]]: # %for.body +; CHECK-AIX: bl .something[PR] +; +; CHECK-COM-DAG: addi [[IV]], [[IV]], -1 +; CHECK-COM-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-COM-DAG: cmplwi [[IV]], 0 +; CHECK-LINUX-NEXT: bne 0, .[[LOOP]] +; CHECK-AIX-NEXT: bne 0, L..[[LOOP]] ; ; Next BB. -; CHECK: slwi 3, [[SUM]], 3 +; CHECK-COM: slwi 3, [[SUM]], 3 ; ; Jump to epilogue. -; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]] +; DISABLE-LINUX: b .[[EPILOG_BB:LBB[0-9_]+]] +; DISABLE-AIX: b L..[[EPILOG_BB:BB[0-9_]+]] ; -; DISABLE: .[[ELSE_LABEL]]: # %if.else +; DISABLE-LINUX: .[[ELSE_LABEL]]: # %if.else +; DISABLE-AIX: L..[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. -; DISABLE: slwi 3, 4, 1 -; DISABLE: .[[EPILOG_BB]]: # %if.end +; DISABLE-COM: slwi 3, 4, 1 +; DISABLE-LINUX: .[[EPILOG_BB]]: # %if.end +; DISABLE-AIX: L..[[EPILOG_BB]]: # %if.end ; ; Epilogue code. -; CHECK: mtlr {{[0-9]+}} -; CHECK: blr +; CHECK-COM: mtlr {{[0-9]+}} +; CHECK-COM: blr ; -; ENABLE: .[[ELSE_LABEL]]: # %if.else +; ENABLE-LINUX: .[[ELSE_LABEL]]: # %if.else +; ENABLE-AIX: L..[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. -; ENABLE: slwi 3, 4, 1 -; ENABLE-NEXT: blr +; ENABLE-COM: slwi 3, 4, 1 +; ENABLE-COM-NEXT: blr define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { entry: %tobool = icmp eq i32 %cond, 0 @@ -151,27 +172,34 @@ ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: +; CHECK-LINUX-LABEL: freqSaveAndRestoreOutsideLoop2: +; CHECK-AIX-LABEL: .freqSaveAndRestoreOutsideLoop2: ; Prologue code. ; Make sure we save the link register before the call -; CHECK: mflr {{[0-9]+}} +; CHECK-COM: mflr {{[0-9]+}} ; ; Loop preheader -; CHECK-DAG: li [[SUM:[0-9]+]], 0 -; CHECK-DAG: li [[IV:[0-9]+]], 10 -; +; CHECK-COM-DAG: li [[SUM:[0-9]+]], 0 +; CHECK-COM-DAG: li [[IV:[0-9]+]], 10 +; ; Loop body -; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body -; CHECK: bl something -; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-NEXT: cmplwi [[IV]], 0 -; CHECK-NEXT: bne 0, .[[LOOP]] +; CHECK-LINUX: .[[LOOP:LBB[0-9_]+]]: # %for.body +; CHECK-LINUX: bl something +; +; CHECK-AIX: L..[[LOOP:BB[0-9_]+]]: # %for.body +; CHECK-AIX: bl .something[PR] +; +; CHECK-COM-DAG: addi [[IV]], [[IV]], -1 +; CHECK-COM-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-COM-DAG: cmplwi [[IV]], 0 +; +; CHECK-LINUX-NEXT: bne 0, .[[LOOP]] +; CHECK-AIX-NEXT: bne 0, L..[[LOOP]] ; ; Next BB -; CHECK: %for.exit -; CHECK: mtlr {{[0-9]+}} -; CHECK: blr +; CHECK-COM: %for.exit +; CHECK-COM: mtlr {{[0-9]+}} +; CHECK-COM: blr define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { entry: br label %for.preheader @@ -200,52 +228,72 @@ ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: +; CHECK-LINUX-LABEL: loopInfoSaveOutsideLoop: +; CHECK-AIX-LABEL: .loopInfoSaveOutsideLoop: ; -; ENABLE: cmplwi 3, 0 -; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-COM: cmplwi 3, 0 +; ENABLE-LINUX-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-AIX: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] ; ; Prologue code. -; Make sure we save the link register -; CHECK: mflr {{[0-9]+}} +; Make sure we save the link register +; CHECK-LINUX: mflr {{[0-9]+}} ; -; DISABLE: std -; DISABLE-NEXT: std -; DISABLE: cmplwi 3, 0 -; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; DISABLE-LINUX: std +; DISABLE-LINUX-NEXT: std +; DISABLE-LINUX: cmplwi 3, 0 +; DISABLE-LINUX-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; +; DISABLE-AIX: cmplwi 3, 0 +; DISABLE-32AIX: stw +; DISABLE-32AIX-NEXT: stw +; DISABLE-64AIX: std +; DISABLE-64AIX-NEXT: std +; DISABLE-AIX-NEXT: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] ; ; Loop preheader -; CHECK-DAG: li [[SUM:[0-9]+]], 0 -; CHECK-DAG: li [[IV:[0-9]+]], 10 -; +; CHECK-COM-DAG: li [[SUM:[0-9]+]], 0 +; CHECK-COM-DAG: li [[IV:[0-9]+]], 10 +; ; Loop body -; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body -; CHECK: bl something -; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-NEXT: cmplwi [[IV]], 0 -; CHECK-NEXT: bne 0, .[[LOOP]] -; +; CHECK-LINUX: .[[LOOP:LBB[0-9_]+]]: # %for.body +; CHECK-LINUX: bl something +; CHECK-AIX: L..[[LOOP:BB[0-9_]+]]: # %for.body +; CHECK-AIX: bl .something[PR] +; +; CHECK-COM-DAG: addi [[IV]], [[IV]], -1 +; CHECK-COM-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-COM-DAG: cmplwi [[IV]], 0 +; +; CHECK-LINUX-NEXT: bne 0, .[[LOOP]] +; CHECK-AIX-NEXT: bne 0, L..[[LOOP]] +; ; Next BB -; CHECK: bl somethingElse -; CHECK: slwi 3, [[SUM]], 3 +; CHECK-LINUX: bl somethingElse +; CHECK-AIX: bl .somethingElse[PR] +; CHECK-COM: slwi 3, [[SUM]], 3 ; ; Jump to epilogue -; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]] +; DISABLE-LINUX: b .[[EPILOG_BB:LBB[0-9_]+]] +; DISABLE-AIX: b L..[[EPILOG_BB:BB[0-9_]+]] ; -; DISABLE: .[[ELSE_LABEL]]: # %if.else +; DISABLE-LINUX: .[[ELSE_LABEL]]: # %if.else +; DISABLE-AIX: L..[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. -; DISABLE: slwi 3, 4, 1 +; DISABLE-COM: slwi 3, 4, 1 +; +; DISABLE-LINUX: .[[EPILOG_BB]]: # %if.end +; DISABLE-AIX: L..[[EPILOG_BB]]: # %if.end ; -; DISABLE: .[[EPILOG_BB]]: # %if.end ; Epilog code -; CHECK: mtlr {{[0-9]+}} -; CHECK: blr -; -; ENABLE: .[[ELSE_LABEL]]: # %if.else +; CHECK-COM: mtlr {{[0-9]+}} +; CHECK-COM: blr +; +; ENABLE-LINUX: .[[ELSE_LABEL]]: # %if.else +; ENABLE-AIX: L..[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. -; ENABLE: slwi 3, 4, 1 -; ENABLE-NEXT: blr +; ENABLE-COM: slwi 3, 4, 1 +; ENABLE-COM-NEXT: blr define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { entry: %tobool = icmp eq i32 %cond, 0 @@ -282,52 +330,71 @@ ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: +; CHECK-LINUX-LABEL: loopInfoRestoreOutsideLoop: +; CHECK-AIX-LABEL: .loopInfoRestoreOutsideLoop: ; -; ENABLE: cmplwi 3, 0 -; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-COM: cmplwi 3, 0 +; ENABLE-LINUX-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-AIX: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] ; ; Prologue code. ; Make sure we save the link register -; CHECK: mflr {{[0-9]+}} +; CHECK-LINUX: mflr {{[0-9]+}} ; -; DISABLE: std -; DISABLE-NEXT: std -; DISABLE: cmplwi 3, 0 -; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; DISABLE-LINUX: std +; DISABLE-LINUX-NEXT: std +; DISABLE-LINUX: cmplwi 3, 0 +; DISABLE-LINUX-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] ; -; CHECK: bl somethingElse +; DISABLE-AIX: cmplwi 3, 0 +; DISABLE-32AIX: stw +; DISABLE-32AIX-NEXT: stw +; DISABLE-64AIX: std +; DISABLE-64AIX-NEXT: std +; DISABLE-AIX-NEXT: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] +; +; CHECK-LINUX: bl somethingElse +; CHECK-AIX: bl .somethingElse[PR] ; ; Loop preheader -; CHECK-DAG: li [[SUM:[0-9]+]], 0 -; CHECK-DAG: li [[IV:[0-9]+]], 10 -; +; CHECK-COM-DAG: li [[SUM:[0-9]+]], 0 +; CHECK-COM-DAG: li [[IV:[0-9]+]], 10 +; ; Loop body -; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body -; CHECK: bl something -; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-NEXT: cmplwi [[IV]], 0 -; CHECK-NEXT: bne 0, .[[LOOP]] +; CHECK-LINUX: .[[LOOP:LBB[0-9_]+]]: # %for.body +; CHECK-LINUX: bl something +; CHECK-AIX: L..[[LOOP:BB[0-9_]+]]: # %for.body +; CHECK-AIX: bl .something[PR] ; -; Next BB. -; slwi 3, [[SUM]], 3 +; CHECK-COM-DAG: addi [[IV]], [[IV]], -1 +; CHECK-COM-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-COM-DAG: cmplwi [[IV]], 0 +; +; CHECK-LINUX-NEXT: bne 0, .[[LOOP]] +; CHECK-AIX-NEXT: bne 0, L..[[LOOP]] +; +; Next BB. +; CHECK-COM: slwi 3, [[SUM]], 3 ; -; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]] +; DISABLE-LINUX: b .[[EPILOG_BB:LBB[0-9_]+]] +; DISABLE-AIX: b L..[[EPILOG_BB:BB[0-9_]+]] ; -; DISABLE: .[[ELSE_LABEL]]: # %if.else +; DISABLE-LINUX: .[[ELSE_LABEL]]: # %if.else +; DISABLE-AIX: L..[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. -; DISABLE: slwi 3, 4, 1 -; DISABLE: .[[EPILOG_BB]]: # %if.end +; DISABLE-COM: slwi 3, 4, 1 +; DISABLE-LINUX: .[[EPILOG_BB]]: # %if.end +; DISABLE-AIX: .[[EPILOG_BB]]: # %if.end ; ; Epilogue code. -; CHECK: mtlr {{[0-9]+}} -; CHECK: blr +; CHECK-COM: mtlr {{[0-9]+}} +; CHECK-COM: blr ; -; ENABLE: .[[ELSE_LABEL]]: # %if.else +; ENABLE-LINUX: .[[ELSE_LABEL]]: # %if.else +; ENABLE-AIX: L..[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. -; ENABLE: slwi 3, 4, 1 -; ENABLE-NEXT: blr +; ENABLE-COM: slwi 3, 4, 1 +; ENABLE-COM-NEXT: blr define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { entry: %tobool = icmp eq i32 %cond, 0 @@ -360,10 +427,12 @@ } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +; CHECK-LINUX-LABEL: emptyFrame: +; CHECK-AIX-LABEL: .emptyFrame: +; CHECK-COM: # %entry +; CHECK-COM-NEXT: li 3, 0 +; CHECK-COM-NEXT: blr +; define i32 @emptyFrame() { entry: ret i32 0 @@ -371,40 +440,53 @@ ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: +; CHECK-LINUX-LABEL: inlineAsm: +; CHECK-AIX-LABEL: .inlineAsm: ; -; ENABLE: cmplwi 3, 0 -; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-COM: cmplwi 3, 0 +; ENABLE-LINUX-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-AIX-NEXT: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r14 -; ENABLE-DAG: li [[IV:[0-9]+]], 10 -; ENABLE-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill -; -; DISABLE: cmplwi 3, 0 -; DISABLE-NEXT: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill -; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] -; DISABLE: li [[IV:[0-9]+]], 10 -; -; CHECK: nop -; CHECK: mtctr [[IV]] -; -; CHECK: .[[LOOP_LABEL:LBB[0-9_]+]]: # %for.body +; ENABLE-COM-DAG: li [[IV:[0-9]+]], 10 +; ENABLE-LINUX-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill +; ENABLE-64AIX-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill +; ENABLE-32AIX-DAG: stw 14, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill +; +; DISABLE-COM: cmplwi 3, 0 +; DISABLE-LINUX-NEXT: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill +; DISABLE-64AIX-NEXT: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill +; DISABLE-32AIX-NEXT: stw 14, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill +; DISABLE-LINUX-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; DISABLE-AIX-NEXT: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] +; DISABLE-COM: li [[IV:[0-9]+]], 10 +; +; CHECK-COM: nop +; CHECK-COM: mtctr [[IV]] +; +; CHECK-LINUX: .[[LOOP_LABEL:LBB[0-9_]+]]: # %for.body +; CHECK-AIX: L..[[LOOP_LABEL:BB[0-9_]+]]: # %for.body ; Inline asm statement. -; CHECK: addi 14, 14, 1 -; CHECK: bdnz .[[LOOP_LABEL]] +; CHECK-COM: addi 14, 14, 1 +; CHECK-LINUX: bdnz .[[LOOP_LABEL]] +; CHECK-AIX: bdnz L..[[LOOP_LABEL]] ; ; Epilogue code. -; CHECK: li 3, 0 -; CHECK-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload -; CHECK: nop -; CHECK: blr -; -; CHECK: [[ELSE_LABEL]] -; CHECK-NEXT: slwi 3, 4, 1 -; DISABLE: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload -; CHECK-NEXT: blr -; +; CHECK-COM: li 3, 0 +; CHECK-LINUX-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload +; CHECK-64AIX-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload +; CHECK-32AIX-DAG: lwz 14, -[[STACK_OFFSET]](1) # 4-byte Folded Reload +; CHECK-LINUX: nop +; CHECK-COM: blr +; +; CHECK-COM: [[ELSE_LABEL]] +; CHECK-COM-NEXT: slwi 3, 4, 1 +; DISABLE-LINUX-NEXT: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload +; DISABLE-64AIX-NEXT: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload +; DISABLE-32AIX-NEXT: lwz 14, -[[STACK_OFFSET]](1) # 4-byte Folded Reload +; CHECK-COM-NEXT: blr +; define i32 @inlineAsm(i32 %cond, i32 %N) { entry: %tobool = icmp eq i32 %cond, 0 @@ -437,36 +519,59 @@ ; Check that we handle calls to variadic functions correctly. ; CHECK-LABEL: callVariadicFunc: +; CHECK-AIX-LABEL: .callVariadicFunc: ; -; ENABLE: cmplwi 3, 0 -; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-COM: cmplwi 3, 0 +; ENABLE-LINUX-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE-AIX-NEXT: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] ; ; Prologue code. -; CHECK: mflr {{[0-9]+}} -; -; DISABLE: cmplwi 3, 0 -; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; CHECK-COM: mflr {{[0-9]+}} +; +; DISABLE-COM: cmplwi 3, 0 +; DISABLE-LINUX-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] +; DISABLE-AIX-NEXT: beq 0, L..[[ELSE_LABEL:BB[0-9_]+]] ; ; Setup of the varags. -; CHECK: mr 4, 3 -; CHECK-NEXT: mr 5, 3 -; CHECK-NEXT: mr 6, 3 -; CHECK-NEXT: mr 7, 3 -; CHECK-NEXT: mr 8, 3 -; CHECK-NEXT: mr 9, 3 -; CHECK-NEXT: bl someVariadicFunc -; CHECK: slwi 3, 3, 3 -; DISABLE: b .[[EPILOGUE_BB:LBB[0-9_]+]] -; -; ENABLE: mtlr {{[0-9]+}} -; ENABLE-NEXT: blr -; -; CHECK: .[[ELSE_LABEL]]: # %if.else -; CHECK-NEXT: slwi 3, 4, 1 -; -; DISABLE: .[[EPILOGUE_BB]]: # %if.end -; DISABLE: mtlr -; CHECK: blr +; CHECK-LINUX: mr 4, 3 +; CHECK-LINUX-NEXT: mr 5, 3 +; CHECK-LINUX-NEXT: mr 6, 3 +; CHECK-LINUX-NEXT: mr 7, 3 +; CHECK-LINUX-NEXT: mr 8, 3 +; CHECK-LINUX-NEXT: mr 9, 3 +; +; CHECK-64AIX: mr 4, 3 +; CHECK-64AIX-NEXT: mr 5, 3 +; CHECK-64AIX-NEXT: mr 6, 3 +; CHECK-64AIX-NEXT: mr 7, 3 +; CHECK-64AIX-NEXT: mr 8, 3 +; CHECK-64AIX-NEXT: mr 9, 3 +; +; CHECK-32AIX: mr 3, 4 +; CHECK-32AIX-NEXT: mr 5, 4 +; CHECK-32AIX-NEXT: mr 6, 4 +; CHECK-32AIX-NEXT: mr 7, 4 +; CHECK-32AIX-NEXT: mr 8, 4 +; CHECK-32AIX-NEXT: mr 9, 4 +; +; CHECK-LINUX-NEXT: bl someVariadicFunc +; CHECK-AIX-NEXT: bl .someVariadicFunc[PR] +; CHECK-COM: slwi 3, 3, 3 +; DISABLE-LINUX: b .[[EPILOGUE_BB:LBB[0-9_]+]] +; DISABLE-AIX: b L..[[EPILOGUE_BB:BB[0-9_]+]] +; +; +; ENABLE-COM: mtlr {{[0-9]+}} +; ENABLE-COM-NEXT: blr +; +; CHECK-LINUX: .[[ELSE_LABEL]]: # %if.else +; CHECK-AIX: L..[[ELSE_LABEL]]: # %if.else +; CHECK-COM-NEXT: slwi 3, 4, 1 +; +; DISABLE-LINUX: .[[EPILOGUE_BB]]: # %if.end +; DISABLE-AIX: L..[[EPILOGUE_BB]]: # %if.end +; DISABLE-COM: mtlr +; CHECK-LINUX-COM: blr define i32 @callVariadicFunc(i32 %cond, i32 %N) { entry: %tobool = icmp eq i32 %cond, 0 @@ -494,24 +599,28 @@ ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. ; -; CHECK-LABEL: noreturn: -; DISABLE: mflr {{[0-9]+}} +; CHECK-LINUX-LABEL: noreturn: +; CHECK-AIX-LABEL: .noreturn: +; DISABLE-COM: mflr {{[0-9]+}} ; -; CHECK: cmplwi 3, 0 -; CHECK-NEXT: bne{{[-]?}} 0, .[[ABORT:LBB[0-9_]+]] +; CHECK-COM: cmplwi 3, 0 +; CHECK-LINUX-NEXT: bne{{[-]?}} 0, .[[ABORT:LBB[0-9_]+]] +; CHECK-AIX-NEXT: bne{{[-]?}} 0, L..[[ABORT:BB[0-9_]+]] ; -; CHECK: li 3, 42 +; CHECK-COM: li 3, 42 ; -; DISABLE: mtlr {{[0-9]+}} +; DISABLE-COM: mtlr {{[0-9]+}} ; -; CHECK-NEXT: blr +; CHECK-LINUX-NEXT: blr ; -; CHECK: .[[ABORT]]: # %if.abort +; CHECK-LINUX: .[[ABORT]]: # %if.abort +; CHECK-AIX: L..[[ABORT]]: # %if.abort ; -; ENABLE: mflr {{[0-9]+}} +; ENABLE-LINUX: mflr {{[0-9]+}} ; -; CHECK: bl abort -; ENABLE-NOT: mtlr {{[0-9]+}} +; CHECK-LINUX: bl abort +; CHECK-AIX: bl .abort[PR] +; ENABLE-COM-NOT: mtlr {{[0-9]+}} define i32 @noreturn(i8 signext %bad_thing) { entry: %tobool = icmp eq i8 %bad_thing, 0 @@ -537,9 +646,10 @@ ; dominator is itself. In this case, we cannot perform shrink wrapping, but we ; should return gracefully and continue compilation. ; The only condition for this test is the compilation finishes correctly. -; -; CHECK-LABEL: infiniteloop -; CHECK: blr +; +; CHECK-LINUX-LABEL: infiniteloop +; CHECK-AIX-LABEL: .infiniteloop +; CHECK-COM: blr define void @infiniteloop() { entry: br i1 undef, label %if.then, label %if.end @@ -560,8 +670,9 @@ } ; Another infinite loop test this time with a body bigger than just one block. -; CHECK-LABEL: infiniteloop2 -; CHECK: blr +; CHECK-LINUX-LABEL: infiniteloop2 +; CHECK-AIX-LABEL: .infiniteloop2 +; CHECK-COM: blr define void @infiniteloop2() { entry: br i1 undef, label %if.then, label %if.end @@ -590,10 +701,15 @@ } ; Another infinite loop test this time with two nested infinite loop. -; CHECK-LABEL: infiniteloop3 -; CHECK: Lfunc_begin[[FUNCNUM:[0-9]+]] -; CHECK: bclr -; CHECK: Lfunc_end[[FUNCNUM]] +; CHECK-LINUX-LABEL: infiniteloop3 +; CHECK-LINUX: Lfunc_begin[[FUNCNUM:[0-9]+]] +; CHECK-LINUX: bclr +; CHECK-LINUX: Lfunc_end[[FUNCNUM]] +; +; CHECK-AIX-LABEL: .infiniteloop3 +; CHECK-AIX: bclr +; CHECK-AIX: L..BB11_2 +; CHECK-AIX: L..infiniteloop30 define void @infiniteloop3() { entry: br i1 undef, label %loop2a, label %body @@ -632,7 +748,8 @@ ; Test for a bug that was caused when save point was equal to restore point. ; Function Attrs: nounwind -; CHECK-LABEL: transpose +; CHECK-LINUX-LABEL: transpose +; CHECK-AIX-LABEL: .transpose ; ; Store of callee-save register saved by shrink wrapping ; FIXME: Test disabled: Improved scheduling needs no spills/reloads any longer! @@ -642,8 +759,8 @@ ; CHECKXX: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload ; ; Ensure no subsequent uses of callee-save register before end of function -; CHECK-NOT: {{[a-z]+}} [[CSR]] -; CHECK: blr +; CHECK-COM-NOT: {{[a-z]+}} [[CSR]] +; CHECK-COM: blr define signext i32 @transpose() { entry: %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 1), align 4 Index: llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll +++ llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll @@ -2,6 +2,8 @@ ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-ONLY ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-ONLY +; RUN: llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-AIX +; RUN: llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-AIX ; Edit: D63152 prevents stack popping before loads and stores, so shrink-wrap does nothing here %"class.clang::NamedDecl" = type { i32 } declare void @__assert_fail(); @@ -37,6 +39,11 @@ ; CHECK-SCO-ONLY: b LVComputationKind ; CHECK-SCO-ONLY: #TC_RETURNd8 ; CHECK-SCO-ONLY: bl __assert_fail +; +; CHECK-AIX-LABEL: ._ZNK5clang9NamedDecl23getLinkageAndVisibilityEv: +; CHECK-AIX-ONLY: stdu 1, -{{[0-9]+}}(1) +; CHECK-AIX-ONLY: bl .LVComputationKind +; CHECK-AIX-ONLY: bl .__assert_fail[PR] } define dso_local fastcc i8 @LVComputationKind( Index: llvm/test/CodeGen/PowerPC/shrink-wrap.ll =================================================================== --- llvm/test/CodeGen/PowerPC/shrink-wrap.ll +++ llvm/test/CodeGen/PowerPC/shrink-wrap.ll @@ -1,4 +1,7 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr4 -mattr=-altivec | FileCheck %s -check-prefix=AIX -check-prefix=32AIX +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr4 -mattr=-altivec | FileCheck %s -check-prefix=AIX -check-prefix=64AIX + define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { entry: %cmp5 = icmp sgt i32 %lim, 0 @@ -26,27 +29,73 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: cmpwi ; Prolog code -; CHECK: std -; CHECK: std -; CHECK: std -; CHECK: std +; CHECK: std +; CHECK: std +; CHECK: std +; CHECK: std ; CHECK: blt 0, .LBB0_3 ; CHECK: # %bb.1: ; CHECK-NEXT: clrldi ; CHECK-NEXT: mtctr -; CHECK-NEXT: li -; CHECK: .LBB0_2: +; CHECK-NEXT: li +; CHECK: .LBB0_2: ; CHECK: add -; CHECK: bdnz .LBB0_2 +; CHECK: bdnz .LBB0_2 ; CHECK-NEXT: b .LBB0_4 -; CHECK: .LBB0_3: -; CHECK-NEXT: li -; CHECK: .LBB0_4: +; CHECK: .LBB0_3: +; CHECK-NEXT: li +; CHECK: .LBB0_4: ; Epilog code -; CHECK: ld -; CHECK: ld +; CHECK: ld +; CHECK: ld ; CHECK: extsw -; CHECK: ld -; CHECK: ld +; CHECK: ld +; CHECK: ld ; CHECK: blr + +; Aix Specfic Checks +; AIX-LABEL: .shrinkwrapme +; AIX: # %bb.0: +; AIX-NEXT: cmpwi +; +; Prolog code +; 32AIX: stw +; 32AIX: stw +; 32AIX: stw +; 32AIX: stw +; +; 64AIX: std +; 64AIX: std +; 64AIX: std +; 64AIX: std +; AIX: blt 0, L..BB0_3 +; +; AIX: # %bb.1: +; 64AIX-NEXT: clrldi +; 31AIX-NEXT: mr +; AIX-DAG: mtctr +; AIX-DAG: li +; +; AIX: L..BB0_2: +; AIX: add +; AIX: bdnz L..BB0_2 +; AIX-NEXT: b L..BB0_4 +; +; AIX: L..BB0_3: +; AIX-NEXT: li +; +; AIX: L..BB0_4: +; Epilog code +; 64AIX: extsw +; 64AIX: ld +; 64AIX: ld +; 64AIX: ld +; 64AIX: ld +; +; 32AIX: lwz +; 32AIX: lwz +; 32AIX: lwz +; 32AIX: lwz +; +; AIX: blr } Index: llvm/test/CodeGen/PowerPC/shrink-wrap.mir =================================================================== --- llvm/test/CodeGen/PowerPC/shrink-wrap.mir +++ llvm/test/CodeGen/PowerPC/shrink-wrap.mir @@ -1,42 +1,46 @@ # RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu \ # RUN: -run-pass=shrink-wrap -o - %s | FileCheck %s +# RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc-ibm-aix-xcoff \ +# RUN: -run-pass=shrink-wrap -mattr=-altivec -o - %s | FileCheck %s +# RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff \ +# RUN: -run-pass=shrink-wrap -mattr=-altivec -o - %s | FileCheck %s --- | ; ModuleID = 'test.ll' source_filename = "test.ll" target datalayout = "e-m:e-i64:64-n32:64" - + define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) { entry: %cmp5 = icmp sgt i32 %lim, 0 br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup - + for.body.preheader: ; preds = %entry %0 = add i32 %lim, -1 %1 = zext i32 %0 to i64 %2 = add nuw nsw i64 %1, 1 call void @llvm.set.loop.iterations.i64(i64 %2) br label %for.body - + for.cond.cleanup: ; preds = %for.body, %entry %Ret.0.lcssa = phi i32 [ 0, %entry ], [ %3, %for.body ] ret i32 %Ret.0.lcssa - + for.body: ; preds = %for.body, %for.body.preheader %Ret.06 = phi i32 [ %3, %for.body ], [ 0, %for.body.preheader ] %3 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"(i32 %a, i32 %Ret.06) %4 = call i1 @llvm.loop.decrement.i64(i64 1) br i1 %4, label %for.body, label %for.cond.cleanup } - + ; Function Attrs: noduplicate nounwind declare void @llvm.set.loop.iterations.i64(i64) #0 - + ; Function Attrs: noduplicate nounwind declare i1 @llvm.loop.decrement.i64(i64) #0 - + ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #1 - + attributes #0 = { noduplicate nounwind } attributes #1 = { nounwind } @@ -83,37 +87,37 @@ bb.0.entry: successors: %bb.2(0x50000000), %bb.1(0x30000000) liveins: $x3, $x4 - + renamable $cr0 = CMPWI renamable $r4, 1 BCC 4, killed renamable $cr0, %bb.2 - + bb.1: successors: %bb.3(0x80000000) - + renamable $r4 = LI 0 B %bb.3 - + bb.2.for.body.preheader: successors: %bb.4(0x80000000) liveins: $x3, $x4 - + renamable $r4 = ADDI renamable $r4, -1, implicit killed $x4, implicit-def $x4 renamable $x4 = RLDICL killed renamable $x4, 0, 32 renamable $x4 = nuw nsw ADDI8 killed renamable $x4, 1 MTCTR8loop killed renamable $x4, implicit-def dead $ctr8 renamable $r4 = LI 0 B %bb.4 - + bb.3.for.cond.cleanup: liveins: $r4 - + renamable $x3 = EXTSW_32_64 killed renamable $r4 BLR8 implicit $lr8, implicit $rm, implicit $x3 - + bb.4.for.body: successors: %bb.4(0x7c000000), %bb.3(0x04000000) liveins: $r4, $x3 - + INLINEASM &"add $0, $1, $2", 0, 131082, def renamable $r4, 131081, renamable $r3, 131081, killed renamable $r4, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15, 12, implicit-def dead early-clobber $r16, 12, implicit-def dead early-clobber $r17, 12, implicit-def dead early-clobber $r18, 12, implicit-def dead early-clobber $r19, 12, implicit-def dead early-clobber $r20, 12, implicit-def dead early-clobber $r21, 12, implicit-def dead early-clobber $r22, 12, implicit-def dead early-clobber $r23, 12, implicit-def dead early-clobber $r24, 12, implicit-def dead early-clobber $r25, 12, implicit-def dead early-clobber $r26, 12, implicit-def dead early-clobber $r27, 12, implicit-def dead early-clobber $r28, 12, implicit-def dead early-clobber $r29, 12, implicit-def dead early-clobber $r30, 12, implicit-def dead early-clobber $r31 BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8 B %bb.3