diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -303,9 +303,14 @@ R31, R0, R1, FP, BP)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. - let AltOrders = [(add (sub GPRC, R2), R2)]; + // On AIX, CSRs are allocated starting from R31 according to: + // https://www.ibm.com/docs/en/ssw_aix_72/assembler/assembler_pdf.pdf. + // This also helps setting the correct `NumOfGPRsSaved' in traceback table. + let AltOrders = [(add (sub GPRC, R2), R2), + (add (sequence "R%u", 2, 12), + (sequence "R%u", 31, 13), R0, R1, FP, BP)]; let AltOrderSelect = [{ - return MF.getSubtarget().is64BitELFABI(); + return MF.getSubtarget().getGPRAllocationOrderIdx(); }]; } @@ -314,9 +319,11 @@ X31, X13, X0, X1, FP8, BP8)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. - let AltOrders = [(add (sub G8RC, X2), X2)]; + let AltOrders = [(add (sub G8RC, X2), X2), + (add (sequence "X%u", 2, 12), + (sequence "X%u", 31, 13), X0, X1, FP8, BP8)]; let AltOrderSelect = [{ - return MF.getSubtarget().is64BitELFABI(); + return MF.getSubtarget().getGPRAllocationOrderIdx(); }]; } @@ -326,18 +333,22 @@ def GPRC_NOR0 : RegisterClass<"PPC", [i32,f32], 32, (add (sub GPRC, R0), ZERO)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. - let AltOrders = [(add (sub GPRC_NOR0, R2), R2)]; + let AltOrders = [(add (sub GPRC_NOR0, R2), R2), + (add (sequence "R%u", 2, 12), + (sequence "R%u", 31, 13), R1, FP, BP, ZERO)]; let AltOrderSelect = [{ - return MF.getSubtarget().is64BitELFABI(); + return MF.getSubtarget().getGPRAllocationOrderIdx(); }]; } def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. - let AltOrders = [(add (sub G8RC_NOX0, X2), X2)]; + let AltOrders = [(add (sub G8RC_NOX0, X2), X2), + (add (sequence "X%u", 2, 12), + (sequence "X%u", 31, 13), X1, FP8, BP8, ZERO8)]; let AltOrderSelect = [{ - return MF.getSubtarget().is64BitELFABI(); + return MF.getSubtarget().getGPRAllocationOrderIdx(); }]; } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -411,6 +411,16 @@ return PredictableSelectIsExpensive; } + // Select allocation orders of GPRC and G8RC. It should be strictly consistent + // with corresponding AltOrders in PPCRegisterInfo.td. + unsigned getGPRAllocationOrderIdx() const { + if (is64BitELFABI()) + return 1; + if (isAIXABI()) + return 2; + return 0; + } + // GlobalISEL const CallLowering *getCallLowering() const override; const RegisterBankInfo *getRegBankInfo() const override; diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -2337,7 +2337,7 @@ ; ASM64PWR4: mflr 0 ; ASM64PWR4-DAG: std 0, 16(1) -; ASM64PWR4-DAG: stdu 1, -256(1) +; ASM64PWR4-DAG: stdu 1, -240(1) ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 112(1) ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 120(1) ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 128(1) diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll @@ -203,7 +203,7 @@ ; CHECKASM-LABEL: .call_test_byval_mem3: -; ASM32BIT: stwu 1, -112(1) +; ASM32BIT: stwu 1, -96(1) ; ASM32BIT-DAG: lwz [[REG:[0-9]+]], L..C{{[0-9]+}}(2) ; ASM32BIT-DAG: addi 3, 1, 56 ; ASM32BIT-DAG: addi 4, [[REG]], 24 @@ -216,7 +216,7 @@ ; ASM32BIT-DAG: lwz 9, 16([[REG]]) ; ASM32BIT-DAG: lwz 10, 20([[REG]]) ; ASM32BIT: bl .test_byval_mem3 -; ASM32BIT: addi 1, 1, 112 +; ASM32BIT: addi 1, 1, 96 ; The memcpy call was inlined in 64-bit so MIR test is redundant and omitted. ; ASM64BIT: stdu 1, -128(1) @@ -319,7 +319,7 @@ ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 316, 0, implicit-def dead $r1, implicit $r1 -; ASM32BIT: stwu 1, -336(1) +; ASM32BIT: stwu 1, -320(1) ; ASM32BIT-NEXT: stw [[REG1:[0-9]+]], {{[0-9]+}}(1) ; ASM32BIT: lwz [[REG1]], L..C{{[0-9]+}}(2) ; ASM32BIT-DAG: lhz [[REG2:[0-9]+]], 28([[REG1]]) @@ -338,7 +338,7 @@ ; ASM32BIT-DAG: lwz 9, 20([[REG1]]) ; ASM32BIT-DAG: lwz 10, 24([[REG1]]) ; ASM32BIT: bl .test_byval_mem4 -; ASM32BIT: addi 1, 1, 336 +; ASM32BIT: addi 1, 1, 320 ; Confirm the expected memcpy call is independent of the call to test_byval_mem4. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 @@ -363,7 +363,7 @@ ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 344, 0, implicit-def dead $r1, implicit $r1 -; ASM64BIT: stdu 1, -368(1) +; ASM64BIT: stdu 1, -352(1) ; ASM64BIT-DAG: ld [[REG1:[0-9]+]], L..C{{[0-9]+}}(2) ; ASM64BIT-DAG: addi 3, 1, 112 ; ASM64BIT-DAG: addi 4, [[REG1]], 24 @@ -383,7 +383,7 @@ ; ASM64BIT-DAG: ld 9, 8([[REG1]]) ; ASM64BIT-DAG: ld 10, 16([[REG1]]) ; ASM64BIT: bl .test_byval_mem4 -; ASM64BIT: addi 1, 1, 368 +; ASM64BIT: addi 1, 1, 352 define void @test_byval_mem4(i32, %struct_S31* byval(%struct_S31) align 1, %struct_S256* byval(%struct_S256) align 1 %s) { entry: diff --git a/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll b/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-csr-alloc.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: llc -mtriple=powerpc64-aix-xcoff -debug-only=regalloc < %s 2>&1 | \ +; RUN: FileCheck --check-prefix=AIX-64 %s +; RUN: llc -mtriple=powerpc-aix-xcoff -debug-only=regalloc < %s 2>&1 | \ +; RUN: FileCheck --check-prefix=AIX-32 %s + +define i32 @g(i32 %a, i32 %b) { +; AIX-64: AllocationOrder(G8RC_and_G8RC_NOX0) = [ $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x31 $x30 $x29 $x28 $x27 $x26 $x25 $x24 $x23 $x22 $x21 $x20 $x19 $x18 $x17 $x16 $x15 $x14 ] +; AIX-64: AllocationOrder(G8RC) = [ $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x0 $x31 $x30 $x29 $x28 $x27 $x26 $x25 $x24 $x23 $x22 $x21 $x20 $x19 $x18 $x17 $x16 $x15 $x14 ] +; AIX-32: AllocationOrder(GPRC) = [ $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 $r11 $r12 $r0 $r31 $r30 $r29 $r28 $r27 $r26 $r25 $r24 $r23 $r22 $r21 $r20 $r19 $r18 $r17 $r16 $r15 $r14 $r13 ] +; AIX-32: AllocationOrder(GPRC_and_GPRC_NOR0) = [ $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10 $r11 $r12 $r31 $r30 $r29 $r28 $r27 $r26 $r25 $r24 $r23 $r22 $r21 $r20 $r19 $r18 $r17 $r16 $r15 $r14 $r13 ] + %c = add i32 %a, %b + %d = shl i32 %a, 4 + %cmp = icmp slt i32 %c, %d + %e = select i1 %cmp, i32 %a, i32 %b + ret i32 %e +} + +define float @f(float %a, float %b) { +; AIX-32: AllocationOrder(F4RC) = [ $f0 $f1 $f2 $f3 $f4 $f5 $f6 $f7 $f8 $f9 $f10 $f11 $f12 $f13 $f31 $f30 $f29 $f28 $f27 $f26 $f25 $f24 $f23 $f22 $f21 $f20 $f19 $f18 $f17 $f16 $f15 $f14 ] + %c = fadd float %a, %b + ret float %c +} + +define double @d(double %a, double %b) { +; AIX-64: AllocationOrder(VFRC) = [ $vf2 $vf3 $vf4 $vf5 $vf0 $vf1 $vf6 $vf7 $vf8 $vf9 $vf10 $vf11 $vf12 $vf13 $vf14 $vf15 $vf16 $vf17 $vf18 $vf19 $vf31 $vf30 $vf29 $vf28 $vf27 $vf26 $vf25 $vf24 $vf23 $vf22 $vf21 $vf20 ] +; AIX-64: AllocationOrder(F8RC) = [ $f0 $f1 $f2 $f3 $f4 $f5 $f6 $f7 $f8 $f9 $f10 $f11 $f12 $f13 $f31 $f30 $f29 $f28 $f27 $f26 $f25 $f24 $f23 $f22 $f21 $f20 $f19 $f18 $f17 $f16 $f15 $f14 ] + %c = fadd double %a, %b + ret double %c +} diff --git a/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll b/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll --- a/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll @@ -14,10 +14,10 @@ ; Function Attrs: nounwind define void @baz(%3* %0) local_unnamed_addr #2 { -; AIX-64: std 30 -; AIX-64: .byte 0x02 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 2 -; AIX-32: stw 30 -; AIX-32: .byte 0x02 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 2 +; AIX-64: std 31 +; AIX-64: .byte 0x01 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 1 +; AIX-32: stw 31 +; AIX-32: .byte 0x01 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 1 %2 = call signext i32 @wibble(%1* nonnull undef) #2 %3 = call fastcc zeroext i32 @spam(%1* nonnull undef, %2* nonnull undef, %3* nonnull %0) unreachable diff --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll --- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll +++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ppc32-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC32 ; RUN: llc < %s -mtriple=powerpc64-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE -; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE +; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff | FileCheck %s --check-prefixes=ALL,PPC64,PPC64BE,AIX-PPC64 ; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown | FileCheck %s --check-prefixes=ALL,PPC64,PPC64LE ; These two forms are equivalent: @@ -162,102 +162,102 @@ ; PPC32-NEXT: addi 1, 1, 64 ; PPC32-NEXT: blr ; -; PPC64BE-LABEL: vector_i128_i8: -; PPC64BE: # %bb.0: -; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 21, 207(1) -; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 22, 199(1) -; PPC64BE-NEXT: lbz 23, 191(1) -; PPC64BE-NEXT: add 6, 21, 6 -; PPC64BE-NEXT: lbz 21, 231(1) -; PPC64BE-NEXT: add 5, 22, 5 -; PPC64BE-NEXT: lbz 22, 223(1) -; PPC64BE-NEXT: add 4, 23, 4 -; PPC64BE-NEXT: lbz 23, 215(1) -; PPC64BE-NEXT: add 9, 21, 9 -; PPC64BE-NEXT: lbz 25, 127(1) -; PPC64BE-NEXT: add 8, 22, 8 -; PPC64BE-NEXT: lbz 21, 255(1) -; PPC64BE-NEXT: add 7, 23, 7 -; PPC64BE-NEXT: lbz 24, 119(1) -; PPC64BE-NEXT: addi 9, 9, 1 -; PPC64BE-NEXT: lbz 22, 247(1) -; PPC64BE-NEXT: add 25, 21, 25 -; PPC64BE-NEXT: lbz 23, 239(1) -; PPC64BE-NEXT: addi 8, 8, 1 -; PPC64BE-NEXT: lbz 28, 151(1) -; PPC64BE-NEXT: add 24, 22, 24 -; PPC64BE-NEXT: lbz 21, 279(1) -; PPC64BE-NEXT: add 10, 23, 10 -; PPC64BE-NEXT: lbz 27, 143(1) -; PPC64BE-NEXT: addi 10, 10, 1 -; PPC64BE-NEXT: lbz 22, 271(1) -; PPC64BE-NEXT: add 28, 21, 28 -; PPC64BE-NEXT: lbz 26, 135(1) -; PPC64BE-NEXT: addi 7, 7, 1 -; PPC64BE-NEXT: lbz 23, 263(1) -; PPC64BE-NEXT: add 27, 22, 27 -; PPC64BE-NEXT: lbz 11, 183(1) -; PPC64BE-NEXT: addi 6, 6, 1 -; PPC64BE-NEXT: lbz 21, 311(1) -; PPC64BE-NEXT: add 26, 23, 26 -; PPC64BE-NEXT: lbz 12, 175(1) -; PPC64BE-NEXT: addi 5, 5, 1 -; PPC64BE-NEXT: lbz 0, 303(1) -; PPC64BE-NEXT: add 11, 21, 11 -; PPC64BE-NEXT: lbz 30, 167(1) -; PPC64BE-NEXT: addi 11, 11, 1 -; PPC64BE-NEXT: lbz 22, 295(1) -; PPC64BE-NEXT: add 12, 0, 12 -; PPC64BE-NEXT: lbz 29, 159(1) -; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: lbz 23, 287(1) -; PPC64BE-NEXT: add 30, 22, 30 -; PPC64BE-NEXT: stb 11, 15(3) -; PPC64BE-NEXT: addi 11, 12, 1 -; PPC64BE-NEXT: add 29, 23, 29 -; PPC64BE-NEXT: stb 11, 14(3) -; PPC64BE-NEXT: addi 11, 30, 1 -; PPC64BE-NEXT: stb 11, 13(3) -; PPC64BE-NEXT: addi 11, 29, 1 -; PPC64BE-NEXT: stb 11, 12(3) -; PPC64BE-NEXT: addi 11, 28, 1 -; PPC64BE-NEXT: stb 11, 11(3) -; PPC64BE-NEXT: addi 11, 27, 1 -; PPC64BE-NEXT: stb 11, 10(3) -; PPC64BE-NEXT: addi 11, 26, 1 -; PPC64BE-NEXT: stb 11, 9(3) -; PPC64BE-NEXT: addi 11, 25, 1 -; PPC64BE-NEXT: stb 11, 8(3) -; PPC64BE-NEXT: addi 11, 24, 1 -; PPC64BE-NEXT: stb 11, 7(3) -; PPC64BE-NEXT: stb 10, 6(3) -; PPC64BE-NEXT: stb 9, 5(3) -; PPC64BE-NEXT: stb 8, 4(3) -; PPC64BE-NEXT: stb 7, 3(3) -; PPC64BE-NEXT: stb 6, 2(3) -; PPC64BE-NEXT: stb 5, 1(3) -; PPC64BE-NEXT: stb 4, 0(3) -; PPC64BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; PPC64BE-NEXT: blr +; AIX-PPC64-LABEL: vector_i128_i8: +; AIX-PPC64: # %bb.0: +; AIX-PPC64-NEXT: std 22, -80(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: lbz 22, 207(1) +; AIX-PPC64-NEXT: std 23, -72(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 24, -64(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 25, -56(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 28, -32(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: lbz 23, 199(1) +; AIX-PPC64-NEXT: lbz 24, 191(1) +; AIX-PPC64-NEXT: add 6, 22, 6 +; AIX-PPC64-NEXT: lbz 22, 231(1) +; AIX-PPC64-NEXT: add 5, 23, 5 +; AIX-PPC64-NEXT: lbz 23, 223(1) +; AIX-PPC64-NEXT: add 4, 24, 4 +; AIX-PPC64-NEXT: lbz 24, 215(1) +; AIX-PPC64-NEXT: add 9, 22, 9 +; AIX-PPC64-NEXT: lbz 26, 127(1) +; AIX-PPC64-NEXT: add 8, 23, 8 +; AIX-PPC64-NEXT: lbz 22, 255(1) +; AIX-PPC64-NEXT: add 7, 24, 7 +; AIX-PPC64-NEXT: lbz 25, 119(1) +; AIX-PPC64-NEXT: addi 9, 9, 1 +; AIX-PPC64-NEXT: lbz 23, 247(1) +; AIX-PPC64-NEXT: add 26, 22, 26 +; AIX-PPC64-NEXT: lbz 24, 239(1) +; AIX-PPC64-NEXT: addi 8, 8, 1 +; AIX-PPC64-NEXT: lbz 29, 151(1) +; AIX-PPC64-NEXT: add 25, 23, 25 +; AIX-PPC64-NEXT: lbz 22, 279(1) +; AIX-PPC64-NEXT: add 10, 24, 10 +; AIX-PPC64-NEXT: lbz 28, 143(1) +; AIX-PPC64-NEXT: addi 10, 10, 1 +; AIX-PPC64-NEXT: lbz 23, 271(1) +; AIX-PPC64-NEXT: add 29, 22, 29 +; AIX-PPC64-NEXT: lbz 27, 135(1) +; AIX-PPC64-NEXT: addi 7, 7, 1 +; AIX-PPC64-NEXT: lbz 24, 263(1) +; AIX-PPC64-NEXT: add 28, 23, 28 +; AIX-PPC64-NEXT: lbz 11, 183(1) +; AIX-PPC64-NEXT: addi 6, 6, 1 +; AIX-PPC64-NEXT: lbz 22, 311(1) +; AIX-PPC64-NEXT: add 27, 24, 27 +; AIX-PPC64-NEXT: lbz 12, 175(1) +; AIX-PPC64-NEXT: addi 5, 5, 1 +; AIX-PPC64-NEXT: lbz 0, 303(1) +; AIX-PPC64-NEXT: add 11, 22, 11 +; AIX-PPC64-NEXT: lbz 31, 167(1) +; AIX-PPC64-NEXT: addi 11, 11, 1 +; AIX-PPC64-NEXT: lbz 23, 295(1) +; AIX-PPC64-NEXT: add 12, 0, 12 +; AIX-PPC64-NEXT: lbz 30, 159(1) +; AIX-PPC64-NEXT: addi 4, 4, 1 +; AIX-PPC64-NEXT: lbz 24, 287(1) +; AIX-PPC64-NEXT: add 31, 23, 31 +; AIX-PPC64-NEXT: stb 11, 15(3) +; AIX-PPC64-NEXT: addi 11, 12, 1 +; AIX-PPC64-NEXT: add 30, 24, 30 +; AIX-PPC64-NEXT: stb 11, 14(3) +; AIX-PPC64-NEXT: addi 11, 31, 1 +; AIX-PPC64-NEXT: stb 11, 13(3) +; AIX-PPC64-NEXT: addi 11, 30, 1 +; AIX-PPC64-NEXT: stb 11, 12(3) +; AIX-PPC64-NEXT: addi 11, 29, 1 +; AIX-PPC64-NEXT: stb 11, 11(3) +; AIX-PPC64-NEXT: addi 11, 28, 1 +; AIX-PPC64-NEXT: stb 11, 10(3) +; AIX-PPC64-NEXT: addi 11, 27, 1 +; AIX-PPC64-NEXT: stb 11, 9(3) +; AIX-PPC64-NEXT: addi 11, 26, 1 +; AIX-PPC64-NEXT: stb 11, 8(3) +; AIX-PPC64-NEXT: addi 11, 25, 1 +; AIX-PPC64-NEXT: stb 11, 7(3) +; AIX-PPC64-NEXT: stb 10, 6(3) +; AIX-PPC64-NEXT: stb 9, 5(3) +; AIX-PPC64-NEXT: stb 8, 4(3) +; AIX-PPC64-NEXT: stb 7, 3(3) +; AIX-PPC64-NEXT: stb 6, 2(3) +; AIX-PPC64-NEXT: stb 5, 1(3) +; AIX-PPC64-NEXT: stb 4, 0(3) +; AIX-PPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 24, -64(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: blr ; ; PPC64LE-LABEL: vector_i128_i8: ; PPC64LE: # %bb.0: @@ -310,54 +310,54 @@ ; PPC32-NEXT: addi 1, 1, 32 ; PPC32-NEXT: blr ; -; PPC64BE-LABEL: vector_i128_i16: -; PPC64BE: # %bb.0: -; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 118(1) -; PPC64BE-NEXT: lhz 12, 182(1) -; PPC64BE-NEXT: lhz 0, 174(1) -; PPC64BE-NEXT: lhz 30, 166(1) -; PPC64BE-NEXT: add 11, 12, 11 -; PPC64BE-NEXT: lhz 29, 158(1) -; PPC64BE-NEXT: add 10, 0, 10 -; PPC64BE-NEXT: lhz 28, 142(1) -; PPC64BE-NEXT: add 9, 30, 9 -; PPC64BE-NEXT: lhz 27, 126(1) -; PPC64BE-NEXT: add 8, 29, 8 -; PPC64BE-NEXT: lhz 26, 134(1) -; PPC64BE-NEXT: add 6, 28, 6 -; PPC64BE-NEXT: lhz 25, 150(1) -; PPC64BE-NEXT: add 4, 27, 4 -; PPC64BE-NEXT: add 5, 26, 5 -; PPC64BE-NEXT: addi 11, 11, 1 -; PPC64BE-NEXT: add 7, 25, 7 -; PPC64BE-NEXT: addi 10, 10, 1 -; PPC64BE-NEXT: addi 9, 9, 1 -; PPC64BE-NEXT: addi 8, 8, 1 -; PPC64BE-NEXT: addi 7, 7, 1 -; PPC64BE-NEXT: addi 6, 6, 1 -; PPC64BE-NEXT: addi 5, 5, 1 -; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: sth 11, 14(3) -; PPC64BE-NEXT: sth 10, 12(3) -; PPC64BE-NEXT: sth 9, 10(3) -; PPC64BE-NEXT: sth 8, 8(3) -; PPC64BE-NEXT: sth 7, 6(3) -; PPC64BE-NEXT: sth 6, 4(3) -; PPC64BE-NEXT: sth 5, 2(3) -; PPC64BE-NEXT: sth 4, 0(3) -; PPC64BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; PPC64BE-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; PPC64BE-NEXT: blr +; AIX-PPC64-LABEL: vector_i128_i16: +; AIX-PPC64: # %bb.0: +; AIX-PPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 28, -32(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill +; AIX-PPC64-NEXT: lhz 11, 118(1) +; AIX-PPC64-NEXT: lhz 12, 182(1) +; AIX-PPC64-NEXT: lhz 0, 174(1) +; AIX-PPC64-NEXT: lhz 31, 166(1) +; AIX-PPC64-NEXT: add 11, 12, 11 +; AIX-PPC64-NEXT: lhz 30, 158(1) +; AIX-PPC64-NEXT: add 10, 0, 10 +; AIX-PPC64-NEXT: lhz 29, 142(1) +; AIX-PPC64-NEXT: add 9, 31, 9 +; AIX-PPC64-NEXT: lhz 28, 126(1) +; AIX-PPC64-NEXT: add 8, 30, 8 +; AIX-PPC64-NEXT: lhz 27, 134(1) +; AIX-PPC64-NEXT: add 6, 29, 6 +; AIX-PPC64-NEXT: lhz 26, 150(1) +; AIX-PPC64-NEXT: add 4, 28, 4 +; AIX-PPC64-NEXT: add 5, 27, 5 +; AIX-PPC64-NEXT: addi 11, 11, 1 +; AIX-PPC64-NEXT: add 7, 26, 7 +; AIX-PPC64-NEXT: addi 10, 10, 1 +; AIX-PPC64-NEXT: addi 9, 9, 1 +; AIX-PPC64-NEXT: addi 8, 8, 1 +; AIX-PPC64-NEXT: addi 7, 7, 1 +; AIX-PPC64-NEXT: addi 6, 6, 1 +; AIX-PPC64-NEXT: addi 5, 5, 1 +; AIX-PPC64-NEXT: addi 4, 4, 1 +; AIX-PPC64-NEXT: sth 11, 14(3) +; AIX-PPC64-NEXT: sth 10, 12(3) +; AIX-PPC64-NEXT: sth 9, 10(3) +; AIX-PPC64-NEXT: sth 8, 8(3) +; AIX-PPC64-NEXT: sth 7, 6(3) +; AIX-PPC64-NEXT: sth 6, 4(3) +; AIX-PPC64-NEXT: sth 5, 2(3) +; AIX-PPC64-NEXT: sth 4, 0(3) +; AIX-PPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; AIX-PPC64-NEXT: blr ; ; PPC64LE-LABEL: vector_i128_i16: ; PPC64LE: # %bb.0: