diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4217,6 +4217,35 @@ return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } +// Generate a store with an offset as defined by the Offset parameter. +// For example if we have the following structure: +// |-------- 2 bytes --------|---- 1 byte ----| +// and we want to store the 1 byte element we would call this funciton +// with the Offset = 2 (as we want to jump over the first 2 bytes.) +// Parameters are as follows: +// DAG - Selection DAG +// Val - The structure we are storing a part of. +// Arg - The address of the start of the structure. +// FuncArg - Function argument information. +// Offset - Offset into the structure where we want to start the store. +// MemVT - The type for the element being stored. +// PtrVT - The pointer type. +// dl - Debug Location. +static SDValue StoreWithOffset(SelectionDAG &DAG, SDValue Val, SDValue Arg, + const Argument *FuncArg, uint64_t Offset, + EVT MemVT, EVT PtrVT, const SDLoc &dl) { + // Generate the new address by adding the offset. + SDValue ArgOff = DAG.getConstant(Offset, dl, PtrVT); + Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); + // Generate a shift to mode the element of interest to the front. + SDValue SHRAmt = DAG.getConstant(Offset * 8, dl, MVT::i64); + SDValue Shifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val, SHRAmt); + // Generate the store to save the element. + SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Shifted, Arg, + MachinePointerInfo(FuncArg, Offset), MemVT); + return Store; +} + SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, @@ -4371,19 +4400,55 @@ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; - if (ObjSize==1 || ObjSize==2 || ObjSize==4) { - EVT ObjType = (ObjSize == 1 ? MVT::i8 : - (ObjSize == 2 ? MVT::i16 : MVT::i32)); + switch (ObjSize) { + case 1: + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, + MachinePointerInfo(&*FuncArg), MVT::i8); + break; + case 2: + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, + MachinePointerInfo(&*FuncArg), MVT::i16); + break; + case 3: + // i24 = i16 + i8 + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, + MachinePointerInfo(&*FuncArg), MVT::i16); + MemOps.push_back(Store); + Store = StoreWithOffset(DAG, Val, Arg, (&*FuncArg), 2, MVT::i8, + PtrVT, dl); + break; + case 4: Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, - MachinePointerInfo(&*FuncArg), ObjType); - } else { - // For sizes that don't fit a truncating store (3, 5, 6, 7), - // store the whole register as-is to the parameter save area - // slot. - Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(&*FuncArg)); + MachinePointerInfo(&*FuncArg), MVT::i32); + break; + case 5: + // i40 = i32 + i8 + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, + MachinePointerInfo(&*FuncArg), MVT::i32); + MemOps.push_back(Store); + Store = StoreWithOffset(DAG, Val, Arg, (&*FuncArg), 4, MVT::i8, + PtrVT, dl); + break; + case 6: + // i48 = i32 + i16 + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, + MachinePointerInfo(&*FuncArg), MVT::i32); + MemOps.push_back(Store); + Store = StoreWithOffset(DAG, Val, Arg, (&*FuncArg), 4, MVT::i16, + PtrVT, dl); + break; + case 7: + // i56 = i32+ i16 +i8 + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, + MachinePointerInfo(&*FuncArg), MVT::i32); + MemOps.push_back(Store); + Store = StoreWithOffset(DAG, Val, Arg, (&*FuncArg), 4, MVT::i16, + PtrVT, dl); + MemOps.push_back(Store); + Store = StoreWithOffset(DAG, Val, Arg, (&*FuncArg), 6, MVT::i8, + PtrVT, dl); + break; } - MemOps.push_back(Store); } // Whether we copied from a register or not, advance the offset diff --git a/llvm/test/CodeGen/PowerPC/jaggedstructs.ll b/llvm/test/CodeGen/PowerPC/jaggedstructs.ll --- a/llvm/test/CodeGen/PowerPC/jaggedstructs.ll +++ b/llvm/test/CodeGen/PowerPC/jaggedstructs.ll @@ -18,10 +18,21 @@ ret void } -; CHECK-DAG: std 3, 160(1) -; CHECK-DAG: std 6, 184(1) -; CHECK-DAG: std 5, 176(1) -; CHECK-DAG: std 4, 168(1) +; CHECK-LABEL: test +; CHECK-DAG: stw 6, 185(1) +; CHECK-DAG: stw 5, 178(1) +; CHECK-DAG: stw 4, 171(1) +; CHECK-DAG: sth 3, 165(1) +; CHECK: rldicl 7, 6, 16, 48 +; CHECK: stb 7, 191(1) +; CHECK: rldicl 6, 6, 32, 32 +; CHECK: sth 6, 189(1) +; CHECK: rldicl 5, 5, 32, 32 +; CHECK: sth 5, 182(1) +; CHECK: rldicl 4, 4, 32, 32 +; CHECK: stb 4, 175(1) +; CHECK: rldicl 3, 3, 48, 16 +; CHECK: stb 3, 167(1) ; CHECK-DAG: lbz {{[0-9]+}}, 167(1) ; CHECK-DAG: lhz {{[0-9]+}}, 165(1) ; CHECK-DAG: stb {{[0-9]+}}, 55(1) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll @@ -0,0 +1,998 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -verify-machineinstrs --mtriple powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P10LE +; RUN: llc -verify-machineinstrs --mtriple powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P8BE +; RUN: llc -verify-machineinstrs --mtriple powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -verify-machineinstrs --mtriple powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P10BE + +define signext i8 @caller_1([1 x i8]* nocapture readonly byval([1 x i8]) %data) #0 { +; P8LE-LABEL: caller_1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mflr r0 +; P8LE-NEXT: std r0, 16(r1) +; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: stb r3, 48(r1) +; P8LE-NEXT: lbz r3, 48(r1) +; P8LE-NEXT: stb r3, 63(r1) +; P8LE-NEXT: addi r3, r1, 63 +; P8LE-NEXT: bl callee +; P8LE-NEXT: nop +; P8LE-NEXT: li r3, 0 +; P8LE-NEXT: addi r1, r1, 64 +; P8LE-NEXT: ld r0, 16(r1) +; P8LE-NEXT: mtlr r0 +; P8LE-NEXT: blr +; +; P9LE-LABEL: caller_1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mflr r0 +; P9LE-NEXT: std r0, 16(r1) +; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: stb r3, 48(r1) +; P9LE-NEXT: lbz r3, 48(r1) +; P9LE-NEXT: stb r3, 63(r1) +; P9LE-NEXT: addi r3, r1, 63 +; P9LE-NEXT: bl callee +; P9LE-NEXT: nop +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: addi r1, r1, 64 +; P9LE-NEXT: ld r0, 16(r1) +; P9LE-NEXT: mtlr r0 +; P9LE-NEXT: blr +; +; P10LE-LABEL: caller_1: +; P10LE: # %bb.0: # %entry +; P10LE-NEXT: mflr r0 +; P10LE-NEXT: std r0, 16(r1) +; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: stb r3, 48(r1) +; P10LE-NEXT: lbz r3, 48(r1) +; P10LE-NEXT: stb r3, 63(r1) +; P10LE-NEXT: addi r3, r1, 63 +; P10LE-NEXT: bl callee@notoc +; P10LE-NEXT: li r3, 0 +; P10LE-NEXT: addi r1, r1, 64 +; P10LE-NEXT: ld r0, 16(r1) +; P10LE-NEXT: mtlr r0 +; P10LE-NEXT: blr +; +; P8BE-LABEL: caller_1: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: stb r3, 183(r1) +; P8BE-NEXT: lbz r3, 183(r1) +; P8BE-NEXT: stb r3, 127(r1) +; P8BE-NEXT: addi r3, r1, 127 +; P8BE-NEXT: bl callee +; P8BE-NEXT: nop +; P8BE-NEXT: li r3, 0 +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +; +; P9BE-LABEL: caller_1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: stb r3, 183(r1) +; P9BE-NEXT: lbz r3, 183(r1) +; P9BE-NEXT: stb r3, 127(r1) +; P9BE-NEXT: addi r3, r1, 127 +; P9BE-NEXT: bl callee +; P9BE-NEXT: nop +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P10BE-LABEL: caller_1: +; P10BE: # %bb.0: # %entry +; P10BE-NEXT: mflr r0 +; P10BE-NEXT: std r0, 16(r1) +; P10BE-NEXT: stdu r1, -128(r1) +; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: lbz r3, 183(r1) +; P10BE-NEXT: stb r3, 127(r1) +; P10BE-NEXT: addi r3, r1, 127 +; P10BE-NEXT: bl callee +; P10BE-NEXT: nop +; P10BE-NEXT: li r3, 0 +; P10BE-NEXT: addi r1, r1, 128 +; P10BE-NEXT: ld r0, 16(r1) +; P10BE-NEXT: mtlr r0 +; P10BE-NEXT: blr +entry: + %_param_data = alloca [1 x i8], align 1 + %.elt = getelementptr inbounds [1 x i8], [1 x i8]* %data, i64 0, i64 0 + %.unpack = load i8, i8* %.elt, align 1 + %.temp.0.gep = getelementptr inbounds [1 x i8], [1 x i8]* %_param_data, i64 0, i64 0 + store i8 %.unpack, i8* %.temp.0.gep, align 1 + call void @callee(i8* nonnull %.temp.0.gep) + ret i8 0 +} + +define signext i8 @caller_2([2 x i8]* nocapture readonly byval([2 x i8]) %data) #0 { +; P8LE-LABEL: caller_2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mflr r0 +; P8LE-NEXT: std r0, 16(r1) +; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: sth r3, 48(r1) +; P8LE-NEXT: lhz r3, 48(r1) +; P8LE-NEXT: sth r3, 62(r1) +; P8LE-NEXT: addi r3, r1, 62 +; P8LE-NEXT: bl callee +; P8LE-NEXT: nop +; P8LE-NEXT: li r3, 0 +; P8LE-NEXT: addi r1, r1, 64 +; P8LE-NEXT: ld r0, 16(r1) +; P8LE-NEXT: mtlr r0 +; P8LE-NEXT: blr +; +; P9LE-LABEL: caller_2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mflr r0 +; P9LE-NEXT: std r0, 16(r1) +; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: sth r3, 48(r1) +; P9LE-NEXT: lhz r3, 48(r1) +; P9LE-NEXT: sth r3, 62(r1) +; P9LE-NEXT: addi r3, r1, 62 +; P9LE-NEXT: bl callee +; P9LE-NEXT: nop +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: addi r1, r1, 64 +; P9LE-NEXT: ld r0, 16(r1) +; P9LE-NEXT: mtlr r0 +; P9LE-NEXT: blr +; +; P10LE-LABEL: caller_2: +; P10LE: # %bb.0: # %entry +; P10LE-NEXT: mflr r0 +; P10LE-NEXT: std r0, 16(r1) +; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: sth r3, 48(r1) +; P10LE-NEXT: lhz r3, 48(r1) +; P10LE-NEXT: sth r3, 62(r1) +; P10LE-NEXT: addi r3, r1, 62 +; P10LE-NEXT: bl callee@notoc +; P10LE-NEXT: li r3, 0 +; P10LE-NEXT: addi r1, r1, 64 +; P10LE-NEXT: ld r0, 16(r1) +; P10LE-NEXT: mtlr r0 +; P10LE-NEXT: blr +; +; P8BE-LABEL: caller_2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: sth r3, 182(r1) +; P8BE-NEXT: lhz r3, 182(r1) +; P8BE-NEXT: sth r3, 126(r1) +; P8BE-NEXT: addi r3, r1, 126 +; P8BE-NEXT: bl callee +; P8BE-NEXT: nop +; P8BE-NEXT: li r3, 0 +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +; +; P9BE-LABEL: caller_2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: sth r3, 182(r1) +; P9BE-NEXT: lhz r3, 182(r1) +; P9BE-NEXT: sth r3, 126(r1) +; P9BE-NEXT: addi r3, r1, 126 +; P9BE-NEXT: bl callee +; P9BE-NEXT: nop +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P10BE-LABEL: caller_2: +; P10BE: # %bb.0: # %entry +; P10BE-NEXT: mflr r0 +; P10BE-NEXT: std r0, 16(r1) +; P10BE-NEXT: stdu r1, -128(r1) +; P10BE-NEXT: sth r3, 182(r1) +; P10BE-NEXT: lhz r3, 182(r1) +; P10BE-NEXT: sth r3, 126(r1) +; P10BE-NEXT: addi r3, r1, 126 +; P10BE-NEXT: bl callee +; P10BE-NEXT: nop +; P10BE-NEXT: li r3, 0 +; P10BE-NEXT: addi r1, r1, 128 +; P10BE-NEXT: ld r0, 16(r1) +; P10BE-NEXT: mtlr r0 +; P10BE-NEXT: blr +entry: + %_param_data = alloca [2 x i8], align 1 + %.elt = getelementptr inbounds [2 x i8], [2 x i8]* %data, i64 0, i64 0 + %.unpack = load i8, i8* %.elt, align 1 + %.elt1 = getelementptr inbounds [2 x i8], [2 x i8]* %data, i64 0, i64 1 + %.unpack2 = load i8, i8* %.elt1, align 1 + %.temp.0.gep = getelementptr inbounds [2 x i8], [2 x i8]* %_param_data, i64 0, i64 0 + store i8 %.unpack, i8* %.temp.0.gep, align 1 + %.temp.1.gep = getelementptr inbounds [2 x i8], [2 x i8]* %_param_data, i64 0, i64 1 + store i8 %.unpack2, i8* %.temp.1.gep, align 1 + call void @callee(i8* nonnull %.temp.0.gep) + ret i8 0 +} + +define signext i8 @caller_3([3 x i8]* nocapture readonly byval([3 x i8]) %data) #0 { +; P8LE-LABEL: caller_3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mflr r0 +; P8LE-NEXT: std r0, 16(r1) +; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: rldicl r4, r3, 48, 16 +; P8LE-NEXT: sth r3, 48(r1) +; P8LE-NEXT: stb r4, 50(r1) +; P8LE-NEXT: lhz r3, 48(r1) +; P8LE-NEXT: lbz r4, 50(r1) +; P8LE-NEXT: sth r3, 61(r1) +; P8LE-NEXT: addi r3, r1, 61 +; P8LE-NEXT: stb r4, 63(r1) +; P8LE-NEXT: bl callee +; P8LE-NEXT: nop +; P8LE-NEXT: li r3, 0 +; P8LE-NEXT: addi r1, r1, 64 +; P8LE-NEXT: ld r0, 16(r1) +; P8LE-NEXT: mtlr r0 +; P8LE-NEXT: blr +; +; P9LE-LABEL: caller_3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mflr r0 +; P9LE-NEXT: std r0, 16(r1) +; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: sth r3, 48(r1) +; P9LE-NEXT: rldicl r3, r3, 48, 16 +; P9LE-NEXT: stb r3, 50(r1) +; P9LE-NEXT: lhz r3, 48(r1) +; P9LE-NEXT: lbz r4, 50(r1) +; P9LE-NEXT: sth r3, 61(r1) +; P9LE-NEXT: addi r3, r1, 61 +; P9LE-NEXT: stb r4, 63(r1) +; P9LE-NEXT: bl callee +; P9LE-NEXT: nop +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: addi r1, r1, 64 +; P9LE-NEXT: ld r0, 16(r1) +; P9LE-NEXT: mtlr r0 +; P9LE-NEXT: blr +; +; P10LE-LABEL: caller_3: +; P10LE: # %bb.0: # %entry +; P10LE-NEXT: mflr r0 +; P10LE-NEXT: std r0, 16(r1) +; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: sth r3, 48(r1) +; P10LE-NEXT: rldicl r3, r3, 48, 16 +; P10LE-NEXT: stb r3, 50(r1) +; P10LE-NEXT: lhz r3, 48(r1) +; P10LE-NEXT: lbz r4, 50(r1) +; P10LE-NEXT: sth r3, 61(r1) +; P10LE-NEXT: addi r3, r1, 61 +; P10LE-NEXT: stb r4, 63(r1) +; P10LE-NEXT: bl callee@notoc +; P10LE-NEXT: li r3, 0 +; P10LE-NEXT: addi r1, r1, 64 +; P10LE-NEXT: ld r0, 16(r1) +; P10LE-NEXT: mtlr r0 +; P10LE-NEXT: blr +; +; P8BE-LABEL: caller_3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: rldicl r4, r3, 48, 16 +; P8BE-NEXT: sth r3, 181(r1) +; P8BE-NEXT: stb r4, 183(r1) +; P8BE-NEXT: lhz r3, 181(r1) +; P8BE-NEXT: lbz r4, 183(r1) +; P8BE-NEXT: sth r3, 125(r1) +; P8BE-NEXT: addi r3, r1, 125 +; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: bl callee +; P8BE-NEXT: nop +; P8BE-NEXT: li r3, 0 +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +; +; P9BE-LABEL: caller_3: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: sth r3, 181(r1) +; P9BE-NEXT: rldicl r3, r3, 48, 16 +; P9BE-NEXT: stb r3, 183(r1) +; P9BE-NEXT: lhz r3, 181(r1) +; P9BE-NEXT: lbz r4, 183(r1) +; P9BE-NEXT: sth r3, 125(r1) +; P9BE-NEXT: addi r3, r1, 125 +; P9BE-NEXT: stb r4, 127(r1) +; P9BE-NEXT: bl callee +; P9BE-NEXT: nop +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P10BE-LABEL: caller_3: +; P10BE: # %bb.0: # %entry +; P10BE-NEXT: mflr r0 +; P10BE-NEXT: std r0, 16(r1) +; P10BE-NEXT: stdu r1, -128(r1) +; P10BE-NEXT: sth r3, 181(r1) +; P10BE-NEXT: rldicl r3, r3, 48, 16 +; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: lhz r3, 181(r1) +; P10BE-NEXT: lbz r4, 183(r1) +; P10BE-NEXT: sth r3, 125(r1) +; P10BE-NEXT: addi r3, r1, 125 +; P10BE-NEXT: stb r4, 127(r1) +; P10BE-NEXT: bl callee +; P10BE-NEXT: nop +; P10BE-NEXT: li r3, 0 +; P10BE-NEXT: addi r1, r1, 128 +; P10BE-NEXT: ld r0, 16(r1) +; P10BE-NEXT: mtlr r0 +; P10BE-NEXT: blr +entry: + %_param_data = alloca [3 x i8], align 1 + %.elt = getelementptr inbounds [3 x i8], [3 x i8]* %data, i64 0, i64 0 + %.unpack = load i8, i8* %.elt, align 1 + %.elt1 = getelementptr inbounds [3 x i8], [3 x i8]* %data, i64 0, i64 1 + %.unpack2 = load i8, i8* %.elt1, align 1 + %.elt3 = getelementptr inbounds [3 x i8], [3 x i8]* %data, i64 0, i64 2 + %.unpack4 = load i8, i8* %.elt3, align 1 + %.temp.0.gep = getelementptr inbounds [3 x i8], [3 x i8]* %_param_data, i64 0, i64 0 + store i8 %.unpack, i8* %.temp.0.gep, align 1 + %.temp.1.gep = getelementptr inbounds [3 x i8], [3 x i8]* %_param_data, i64 0, i64 1 + store i8 %.unpack2, i8* %.temp.1.gep, align 1 + %.temp.2.gep = getelementptr inbounds [3 x i8], [3 x i8]* %_param_data, i64 0, i64 2 + store i8 %.unpack4, i8* %.temp.2.gep, align 1 + call void @callee(i8* nonnull %.temp.0.gep) + ret i8 0 +} + +define signext i8 @caller_4([4 x i8]* nocapture readonly byval([4 x i8]) %data) #0 { +; P8LE-LABEL: caller_4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mflr r0 +; P8LE-NEXT: std r0, 16(r1) +; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: stw r3, 48(r1) +; P8LE-NEXT: lwz r3, 48(r1) +; P8LE-NEXT: stw r3, 60(r1) +; P8LE-NEXT: addi r3, r1, 60 +; P8LE-NEXT: bl callee +; P8LE-NEXT: nop +; P8LE-NEXT: li r3, 0 +; P8LE-NEXT: addi r1, r1, 64 +; P8LE-NEXT: ld r0, 16(r1) +; P8LE-NEXT: mtlr r0 +; P8LE-NEXT: blr +; +; P9LE-LABEL: caller_4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mflr r0 +; P9LE-NEXT: std r0, 16(r1) +; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: stw r3, 48(r1) +; P9LE-NEXT: lwz r3, 48(r1) +; P9LE-NEXT: stw r3, 60(r1) +; P9LE-NEXT: addi r3, r1, 60 +; P9LE-NEXT: bl callee +; P9LE-NEXT: nop +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: addi r1, r1, 64 +; P9LE-NEXT: ld r0, 16(r1) +; P9LE-NEXT: mtlr r0 +; P9LE-NEXT: blr +; +; P10LE-LABEL: caller_4: +; P10LE: # %bb.0: # %entry +; P10LE-NEXT: mflr r0 +; P10LE-NEXT: std r0, 16(r1) +; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: stw r3, 48(r1) +; P10LE-NEXT: lwz r3, 48(r1) +; P10LE-NEXT: stw r3, 60(r1) +; P10LE-NEXT: addi r3, r1, 60 +; P10LE-NEXT: bl callee@notoc +; P10LE-NEXT: li r3, 0 +; P10LE-NEXT: addi r1, r1, 64 +; P10LE-NEXT: ld r0, 16(r1) +; P10LE-NEXT: mtlr r0 +; P10LE-NEXT: blr +; +; P8BE-LABEL: caller_4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: stw r3, 180(r1) +; P8BE-NEXT: lwz r3, 180(r1) +; P8BE-NEXT: stw r3, 124(r1) +; P8BE-NEXT: addi r3, r1, 124 +; P8BE-NEXT: bl callee +; P8BE-NEXT: nop +; P8BE-NEXT: li r3, 0 +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +; +; P9BE-LABEL: caller_4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: stw r3, 180(r1) +; P9BE-NEXT: lwz r3, 180(r1) +; P9BE-NEXT: stw r3, 124(r1) +; P9BE-NEXT: addi r3, r1, 124 +; P9BE-NEXT: bl callee +; P9BE-NEXT: nop +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P10BE-LABEL: caller_4: +; P10BE: # %bb.0: # %entry +; P10BE-NEXT: mflr r0 +; P10BE-NEXT: std r0, 16(r1) +; P10BE-NEXT: stdu r1, -128(r1) +; P10BE-NEXT: stw r3, 180(r1) +; P10BE-NEXT: lwz r3, 180(r1) +; P10BE-NEXT: stw r3, 124(r1) +; P10BE-NEXT: addi r3, r1, 124 +; P10BE-NEXT: bl callee +; P10BE-NEXT: nop +; P10BE-NEXT: li r3, 0 +; P10BE-NEXT: addi r1, r1, 128 +; P10BE-NEXT: ld r0, 16(r1) +; P10BE-NEXT: mtlr r0 +; P10BE-NEXT: blr +entry: + %_param_data = alloca [4 x i8], align 1 + %.elt = getelementptr inbounds [4 x i8], [4 x i8]* %data, i64 0, i64 0 + %.unpack = load i8, i8* %.elt, align 1 + %.elt1 = getelementptr inbounds [4 x i8], [4 x i8]* %data, i64 0, i64 1 + %.unpack2 = load i8, i8* %.elt1, align 1 + %.elt3 = getelementptr inbounds [4 x i8], [4 x i8]* %data, i64 0, i64 2 + %.unpack4 = load i8, i8* %.elt3, align 1 + %.elt5 = getelementptr inbounds [4 x i8], [4 x i8]* %data, i64 0, i64 3 + %.unpack6 = load i8, i8* %.elt5, align 1 + %.temp.0.gep = getelementptr inbounds [4 x i8], [4 x i8]* %_param_data, i64 0, i64 0 + store i8 %.unpack, i8* %.temp.0.gep, align 1 + %.temp.1.gep = getelementptr inbounds [4 x i8], [4 x i8]* %_param_data, i64 0, i64 1 + store i8 %.unpack2, i8* %.temp.1.gep, align 1 + %.temp.2.gep = getelementptr inbounds [4 x i8], [4 x i8]* %_param_data, i64 0, i64 2 + store i8 %.unpack4, i8* %.temp.2.gep, align 1 + %.temp.3.gep = getelementptr inbounds [4 x i8], [4 x i8]* %_param_data, i64 0, i64 3 + store i8 %.unpack6, i8* %.temp.3.gep, align 1 + call void @callee(i8* nonnull %.temp.0.gep) + ret i8 0 +} + +define signext i8 @caller_5([5 x i8]* nocapture readonly byval([5 x i8]) %data) #0 { +; P8LE-LABEL: caller_5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mflr r0 +; P8LE-NEXT: std r0, 16(r1) +; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: rldicl r4, r3, 32, 32 +; P8LE-NEXT: stw r3, 48(r1) +; P8LE-NEXT: stb r4, 52(r1) +; P8LE-NEXT: lwz r3, 48(r1) +; P8LE-NEXT: lbz r4, 52(r1) +; P8LE-NEXT: stw r3, 59(r1) +; P8LE-NEXT: addi r3, r1, 59 +; P8LE-NEXT: stb r4, 63(r1) +; P8LE-NEXT: bl callee +; P8LE-NEXT: nop +; P8LE-NEXT: li r3, 0 +; P8LE-NEXT: addi r1, r1, 64 +; P8LE-NEXT: ld r0, 16(r1) +; P8LE-NEXT: mtlr r0 +; P8LE-NEXT: blr +; +; P9LE-LABEL: caller_5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mflr r0 +; P9LE-NEXT: std r0, 16(r1) +; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: stw r3, 48(r1) +; P9LE-NEXT: rldicl r3, r3, 32, 32 +; P9LE-NEXT: stb r3, 52(r1) +; P9LE-NEXT: lwz r3, 48(r1) +; P9LE-NEXT: lbz r4, 52(r1) +; P9LE-NEXT: stw r3, 59(r1) +; P9LE-NEXT: addi r3, r1, 59 +; P9LE-NEXT: stb r4, 63(r1) +; P9LE-NEXT: bl callee +; P9LE-NEXT: nop +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: addi r1, r1, 64 +; P9LE-NEXT: ld r0, 16(r1) +; P9LE-NEXT: mtlr r0 +; P9LE-NEXT: blr +; +; P10LE-LABEL: caller_5: +; P10LE: # %bb.0: # %entry +; P10LE-NEXT: mflr r0 +; P10LE-NEXT: std r0, 16(r1) +; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: stw r3, 48(r1) +; P10LE-NEXT: rldicl r3, r3, 32, 32 +; P10LE-NEXT: stb r3, 52(r1) +; P10LE-NEXT: lwz r3, 48(r1) +; P10LE-NEXT: lbz r4, 52(r1) +; P10LE-NEXT: stw r3, 59(r1) +; P10LE-NEXT: addi r3, r1, 59 +; P10LE-NEXT: stb r4, 63(r1) +; P10LE-NEXT: bl callee@notoc +; P10LE-NEXT: li r3, 0 +; P10LE-NEXT: addi r1, r1, 64 +; P10LE-NEXT: ld r0, 16(r1) +; P10LE-NEXT: mtlr r0 +; P10LE-NEXT: blr +; +; P8BE-LABEL: caller_5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: rldicl r4, r3, 32, 32 +; P8BE-NEXT: stw r3, 179(r1) +; P8BE-NEXT: stb r4, 183(r1) +; P8BE-NEXT: lwz r3, 179(r1) +; P8BE-NEXT: lbz r4, 183(r1) +; P8BE-NEXT: stw r3, 123(r1) +; P8BE-NEXT: addi r3, r1, 123 +; P8BE-NEXT: stb r4, 127(r1) +; P8BE-NEXT: bl callee +; P8BE-NEXT: nop +; P8BE-NEXT: li r3, 0 +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +; +; P9BE-LABEL: caller_5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: stw r3, 179(r1) +; P9BE-NEXT: rldicl r3, r3, 32, 32 +; P9BE-NEXT: stb r3, 183(r1) +; P9BE-NEXT: lwz r3, 179(r1) +; P9BE-NEXT: lbz r4, 183(r1) +; P9BE-NEXT: stw r3, 123(r1) +; P9BE-NEXT: addi r3, r1, 123 +; P9BE-NEXT: stb r4, 127(r1) +; P9BE-NEXT: bl callee +; P9BE-NEXT: nop +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P10BE-LABEL: caller_5: +; P10BE: # %bb.0: # %entry +; P10BE-NEXT: mflr r0 +; P10BE-NEXT: std r0, 16(r1) +; P10BE-NEXT: stdu r1, -128(r1) +; P10BE-NEXT: stw r3, 179(r1) +; P10BE-NEXT: rldicl r3, r3, 32, 32 +; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: lwz r3, 179(r1) +; P10BE-NEXT: lbz r4, 183(r1) +; P10BE-NEXT: stw r3, 123(r1) +; P10BE-NEXT: addi r3, r1, 123 +; P10BE-NEXT: stb r4, 127(r1) +; P10BE-NEXT: bl callee +; P10BE-NEXT: nop +; P10BE-NEXT: li r3, 0 +; P10BE-NEXT: addi r1, r1, 128 +; P10BE-NEXT: ld r0, 16(r1) +; P10BE-NEXT: mtlr r0 +; P10BE-NEXT: blr +entry: + %_param_data = alloca [5 x i8], align 1 + %.elt = getelementptr inbounds [5 x i8], [5 x i8]* %data, i64 0, i64 0 + %.unpack = load i8, i8* %.elt, align 1 + %.elt1 = getelementptr inbounds [5 x i8], [5 x i8]* %data, i64 0, i64 1 + %.unpack2 = load i8, i8* %.elt1, align 1 + %.elt3 = getelementptr inbounds [5 x i8], [5 x i8]* %data, i64 0, i64 2 + %.unpack4 = load i8, i8* %.elt3, align 1 + %.elt5 = getelementptr inbounds [5 x i8], [5 x i8]* %data, i64 0, i64 3 + %.unpack6 = load i8, i8* %.elt5, align 1 + %.elt7 = getelementptr inbounds [5 x i8], [5 x i8]* %data, i64 0, i64 4 + %.unpack8 = load i8, i8* %.elt7, align 1 + %.temp.0.gep = getelementptr inbounds [5 x i8], [5 x i8]* %_param_data, i64 0, i64 0 + store i8 %.unpack, i8* %.temp.0.gep, align 1 + %.temp.1.gep = getelementptr inbounds [5 x i8], [5 x i8]* %_param_data, i64 0, i64 1 + store i8 %.unpack2, i8* %.temp.1.gep, align 1 + %.temp.2.gep = getelementptr inbounds [5 x i8], [5 x i8]* %_param_data, i64 0, i64 2 + store i8 %.unpack4, i8* %.temp.2.gep, align 1 + %.temp.3.gep = getelementptr inbounds [5 x i8], [5 x i8]* %_param_data, i64 0, i64 3 + store i8 %.unpack6, i8* %.temp.3.gep, align 1 + %.temp.4.gep = getelementptr inbounds [5 x i8], [5 x i8]* %_param_data, i64 0, i64 4 + store i8 %.unpack8, i8* %.temp.4.gep, align 1 + call void @callee(i8* nonnull %.temp.0.gep) + ret i8 0 +} + +define signext i8 @caller_6([6 x i8]* nocapture readonly byval([6 x i8]) %data) #0 { +; P8LE-LABEL: caller_6: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mflr r0 +; P8LE-NEXT: std r0, 16(r1) +; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: rldicl r4, r3, 32, 32 +; P8LE-NEXT: stw r3, 48(r1) +; P8LE-NEXT: sth r4, 52(r1) +; P8LE-NEXT: lwz r3, 48(r1) +; P8LE-NEXT: lhz r4, 52(r1) +; P8LE-NEXT: stw r3, 58(r1) +; P8LE-NEXT: addi r3, r1, 58 +; P8LE-NEXT: sth r4, 62(r1) +; P8LE-NEXT: bl callee +; P8LE-NEXT: nop +; P8LE-NEXT: li r3, 0 +; P8LE-NEXT: addi r1, r1, 64 +; P8LE-NEXT: ld r0, 16(r1) +; P8LE-NEXT: mtlr r0 +; P8LE-NEXT: blr +; +; P9LE-LABEL: caller_6: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mflr r0 +; P9LE-NEXT: std r0, 16(r1) +; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: stw r3, 48(r1) +; P9LE-NEXT: rldicl r3, r3, 32, 32 +; P9LE-NEXT: sth r3, 52(r1) +; P9LE-NEXT: lwz r3, 48(r1) +; P9LE-NEXT: lhz r4, 52(r1) +; P9LE-NEXT: stw r3, 58(r1) +; P9LE-NEXT: addi r3, r1, 58 +; P9LE-NEXT: sth r4, 62(r1) +; P9LE-NEXT: bl callee +; P9LE-NEXT: nop +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: addi r1, r1, 64 +; P9LE-NEXT: ld r0, 16(r1) +; P9LE-NEXT: mtlr r0 +; P9LE-NEXT: blr +; +; P10LE-LABEL: caller_6: +; P10LE: # %bb.0: # %entry +; P10LE-NEXT: mflr r0 +; P10LE-NEXT: std r0, 16(r1) +; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: stw r3, 48(r1) +; P10LE-NEXT: rldicl r3, r3, 32, 32 +; P10LE-NEXT: sth r3, 52(r1) +; P10LE-NEXT: lwz r3, 48(r1) +; P10LE-NEXT: lhz r4, 52(r1) +; P10LE-NEXT: stw r3, 58(r1) +; P10LE-NEXT: addi r3, r1, 58 +; P10LE-NEXT: sth r4, 62(r1) +; P10LE-NEXT: bl callee@notoc +; P10LE-NEXT: li r3, 0 +; P10LE-NEXT: addi r1, r1, 64 +; P10LE-NEXT: ld r0, 16(r1) +; P10LE-NEXT: mtlr r0 +; P10LE-NEXT: blr +; +; P8BE-LABEL: caller_6: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: rldicl r4, r3, 32, 32 +; P8BE-NEXT: stw r3, 178(r1) +; P8BE-NEXT: sth r4, 182(r1) +; P8BE-NEXT: lwz r3, 178(r1) +; P8BE-NEXT: lhz r4, 182(r1) +; P8BE-NEXT: stw r3, 122(r1) +; P8BE-NEXT: addi r3, r1, 122 +; P8BE-NEXT: sth r4, 126(r1) +; P8BE-NEXT: bl callee +; P8BE-NEXT: nop +; P8BE-NEXT: li r3, 0 +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +; +; P9BE-LABEL: caller_6: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: stw r3, 178(r1) +; P9BE-NEXT: rldicl r3, r3, 32, 32 +; P9BE-NEXT: sth r3, 182(r1) +; P9BE-NEXT: lwz r3, 178(r1) +; P9BE-NEXT: lhz r4, 182(r1) +; P9BE-NEXT: stw r3, 122(r1) +; P9BE-NEXT: addi r3, r1, 122 +; P9BE-NEXT: sth r4, 126(r1) +; P9BE-NEXT: bl callee +; P9BE-NEXT: nop +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P10BE-LABEL: caller_6: +; P10BE: # %bb.0: # %entry +; P10BE-NEXT: mflr r0 +; P10BE-NEXT: std r0, 16(r1) +; P10BE-NEXT: stdu r1, -128(r1) +; P10BE-NEXT: stw r3, 178(r1) +; P10BE-NEXT: rldicl r3, r3, 32, 32 +; P10BE-NEXT: sth r3, 182(r1) +; P10BE-NEXT: lwz r3, 178(r1) +; P10BE-NEXT: lhz r4, 182(r1) +; P10BE-NEXT: stw r3, 122(r1) +; P10BE-NEXT: addi r3, r1, 122 +; P10BE-NEXT: sth r4, 126(r1) +; P10BE-NEXT: bl callee +; P10BE-NEXT: nop +; P10BE-NEXT: li r3, 0 +; P10BE-NEXT: addi r1, r1, 128 +; P10BE-NEXT: ld r0, 16(r1) +; P10BE-NEXT: mtlr r0 +; P10BE-NEXT: blr +entry: + %_param_data = alloca [6 x i8], align 1 + %.elt = getelementptr inbounds [6 x i8], [6 x i8]* %data, i64 0, i64 0 + %.unpack = load i8, i8* %.elt, align 1 + %.elt1 = getelementptr inbounds [6 x i8], [6 x i8]* %data, i64 0, i64 1 + %.unpack2 = load i8, i8* %.elt1, align 1 + %.elt3 = getelementptr inbounds [6 x i8], [6 x i8]* %data, i64 0, i64 2 + %.unpack4 = load i8, i8* %.elt3, align 1 + %.elt5 = getelementptr inbounds [6 x i8], [6 x i8]* %data, i64 0, i64 3 + %.unpack6 = load i8, i8* %.elt5, align 1 + %.elt7 = getelementptr inbounds [6 x i8], [6 x i8]* %data, i64 0, i64 4 + %.unpack8 = load i8, i8* %.elt7, align 1 + %.elt9 = getelementptr inbounds [6 x i8], [6 x i8]* %data, i64 0, i64 5 + %.unpack10 = load i8, i8* %.elt9, align 1 + %.temp.0.gep = getelementptr inbounds [6 x i8], [6 x i8]* %_param_data, i64 0, i64 0 + store i8 %.unpack, i8* %.temp.0.gep, align 1 + %.temp.1.gep = getelementptr inbounds [6 x i8], [6 x i8]* %_param_data, i64 0, i64 1 + store i8 %.unpack2, i8* %.temp.1.gep, align 1 + %.temp.2.gep = getelementptr inbounds [6 x i8], [6 x i8]* %_param_data, i64 0, i64 2 + store i8 %.unpack4, i8* %.temp.2.gep, align 1 + %.temp.3.gep = getelementptr inbounds [6 x i8], [6 x i8]* %_param_data, i64 0, i64 3 + store i8 %.unpack6, i8* %.temp.3.gep, align 1 + %.temp.4.gep = getelementptr inbounds [6 x i8], [6 x i8]* %_param_data, i64 0, i64 4 + store i8 %.unpack8, i8* %.temp.4.gep, align 1 + %.temp.5.gep = getelementptr inbounds [6 x i8], [6 x i8]* %_param_data, i64 0, i64 5 + store i8 %.unpack10, i8* %.temp.5.gep, align 1 + call void @callee(i8* nonnull %.temp.0.gep) + ret i8 0 +} + +define signext i8 @caller_7([7 x i8]* nocapture readonly byval([7 x i8]) %data) #0 { +; P8LE-LABEL: caller_7: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: mflr r0 +; P8LE-NEXT: std r0, 16(r1) +; P8LE-NEXT: stdu r1, -64(r1) +; P8LE-NEXT: rldicl r4, r3, 32, 32 +; P8LE-NEXT: stw r3, 48(r1) +; P8LE-NEXT: rldicl r3, r3, 16, 48 +; P8LE-NEXT: sth r4, 52(r1) +; P8LE-NEXT: stb r3, 54(r1) +; P8LE-NEXT: lwz r3, 48(r1) +; P8LE-NEXT: lhz r4, 52(r1) +; P8LE-NEXT: lbz r5, 54(r1) +; P8LE-NEXT: stw r3, 57(r1) +; P8LE-NEXT: addi r3, r1, 57 +; P8LE-NEXT: sth r4, 61(r1) +; P8LE-NEXT: stb r5, 63(r1) +; P8LE-NEXT: bl callee +; P8LE-NEXT: nop +; P8LE-NEXT: li r3, 0 +; P8LE-NEXT: addi r1, r1, 64 +; P8LE-NEXT: ld r0, 16(r1) +; P8LE-NEXT: mtlr r0 +; P8LE-NEXT: blr +; +; P9LE-LABEL: caller_7: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: mflr r0 +; P9LE-NEXT: std r0, 16(r1) +; P9LE-NEXT: stdu r1, -64(r1) +; P9LE-NEXT: stw r3, 48(r1) +; P9LE-NEXT: rldicl r4, r3, 32, 32 +; P9LE-NEXT: rldicl r3, r3, 16, 48 +; P9LE-NEXT: stb r3, 54(r1) +; P9LE-NEXT: lwz r3, 48(r1) +; P9LE-NEXT: sth r4, 52(r1) +; P9LE-NEXT: lhz r4, 52(r1) +; P9LE-NEXT: lbz r5, 54(r1) +; P9LE-NEXT: stw r3, 57(r1) +; P9LE-NEXT: addi r3, r1, 57 +; P9LE-NEXT: sth r4, 61(r1) +; P9LE-NEXT: stb r5, 63(r1) +; P9LE-NEXT: bl callee +; P9LE-NEXT: nop +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: addi r1, r1, 64 +; P9LE-NEXT: ld r0, 16(r1) +; P9LE-NEXT: mtlr r0 +; P9LE-NEXT: blr +; +; P10LE-LABEL: caller_7: +; P10LE: # %bb.0: # %entry +; P10LE-NEXT: mflr r0 +; P10LE-NEXT: std r0, 16(r1) +; P10LE-NEXT: stdu r1, -64(r1) +; P10LE-NEXT: stw r3, 48(r1) +; P10LE-NEXT: rldicl r4, r3, 32, 32 +; P10LE-NEXT: rldicl r3, r3, 16, 48 +; P10LE-NEXT: stb r3, 54(r1) +; P10LE-NEXT: lwz r3, 48(r1) +; P10LE-NEXT: sth r4, 52(r1) +; P10LE-NEXT: lhz r4, 52(r1) +; P10LE-NEXT: lbz r5, 54(r1) +; P10LE-NEXT: stw r3, 57(r1) +; P10LE-NEXT: addi r3, r1, 57 +; P10LE-NEXT: sth r4, 61(r1) +; P10LE-NEXT: stb r5, 63(r1) +; P10LE-NEXT: bl callee@notoc +; P10LE-NEXT: li r3, 0 +; P10LE-NEXT: addi r1, r1, 64 +; P10LE-NEXT: ld r0, 16(r1) +; P10LE-NEXT: mtlr r0 +; P10LE-NEXT: blr +; +; P8BE-LABEL: caller_7: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: mflr r0 +; P8BE-NEXT: std r0, 16(r1) +; P8BE-NEXT: stdu r1, -128(r1) +; P8BE-NEXT: rldicl r4, r3, 32, 32 +; P8BE-NEXT: stw r3, 177(r1) +; P8BE-NEXT: rldicl r3, r3, 16, 48 +; P8BE-NEXT: sth r4, 181(r1) +; P8BE-NEXT: stb r3, 183(r1) +; P8BE-NEXT: lwz r3, 177(r1) +; P8BE-NEXT: lhz r4, 181(r1) +; P8BE-NEXT: lbz r5, 183(r1) +; P8BE-NEXT: stw r3, 121(r1) +; P8BE-NEXT: addi r3, r1, 121 +; P8BE-NEXT: sth r4, 125(r1) +; P8BE-NEXT: stb r5, 127(r1) +; P8BE-NEXT: bl callee +; P8BE-NEXT: nop +; P8BE-NEXT: li r3, 0 +; P8BE-NEXT: addi r1, r1, 128 +; P8BE-NEXT: ld r0, 16(r1) +; P8BE-NEXT: mtlr r0 +; P8BE-NEXT: blr +; +; P9BE-LABEL: caller_7: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: mflr r0 +; P9BE-NEXT: std r0, 16(r1) +; P9BE-NEXT: stdu r1, -128(r1) +; P9BE-NEXT: stw r3, 177(r1) +; P9BE-NEXT: rldicl r4, r3, 32, 32 +; P9BE-NEXT: rldicl r3, r3, 16, 48 +; P9BE-NEXT: stb r3, 183(r1) +; P9BE-NEXT: lwz r3, 177(r1) +; P9BE-NEXT: sth r4, 181(r1) +; P9BE-NEXT: lhz r4, 181(r1) +; P9BE-NEXT: lbz r5, 183(r1) +; P9BE-NEXT: stw r3, 121(r1) +; P9BE-NEXT: addi r3, r1, 121 +; P9BE-NEXT: sth r4, 125(r1) +; P9BE-NEXT: stb r5, 127(r1) +; P9BE-NEXT: bl callee +; P9BE-NEXT: nop +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: addi r1, r1, 128 +; P9BE-NEXT: ld r0, 16(r1) +; P9BE-NEXT: mtlr r0 +; P9BE-NEXT: blr +; +; P10BE-LABEL: caller_7: +; P10BE: # %bb.0: # %entry +; P10BE-NEXT: mflr r0 +; P10BE-NEXT: std r0, 16(r1) +; P10BE-NEXT: stdu r1, -128(r1) +; P10BE-NEXT: stw r3, 177(r1) +; P10BE-NEXT: rldicl r4, r3, 32, 32 +; P10BE-NEXT: rldicl r3, r3, 16, 48 +; P10BE-NEXT: stb r3, 183(r1) +; P10BE-NEXT: lwz r3, 177(r1) +; P10BE-NEXT: sth r4, 181(r1) +; P10BE-NEXT: lhz r4, 181(r1) +; P10BE-NEXT: lbz r5, 183(r1) +; P10BE-NEXT: stw r3, 121(r1) +; P10BE-NEXT: addi r3, r1, 121 +; P10BE-NEXT: sth r4, 125(r1) +; P10BE-NEXT: stb r5, 127(r1) +; P10BE-NEXT: bl callee +; P10BE-NEXT: nop +; P10BE-NEXT: li r3, 0 +; P10BE-NEXT: addi r1, r1, 128 +; P10BE-NEXT: ld r0, 16(r1) +; P10BE-NEXT: mtlr r0 +; P10BE-NEXT: blr +entry: + %_param_data = alloca [7 x i8], align 1 + %.elt = getelementptr inbounds [7 x i8], [7 x i8]* %data, i64 0, i64 0 + %.unpack = load i8, i8* %.elt, align 1 + %.elt1 = getelementptr inbounds [7 x i8], [7 x i8]* %data, i64 0, i64 1 + %.unpack2 = load i8, i8* %.elt1, align 1 + %.elt3 = getelementptr inbounds [7 x i8], [7 x i8]* %data, i64 0, i64 2 + %.unpack4 = load i8, i8* %.elt3, align 1 + %.elt5 = getelementptr inbounds [7 x i8], [7 x i8]* %data, i64 0, i64 3 + %.unpack6 = load i8, i8* %.elt5, align 1 + %.elt7 = getelementptr inbounds [7 x i8], [7 x i8]* %data, i64 0, i64 4 + %.unpack8 = load i8, i8* %.elt7, align 1 + %.elt9 = getelementptr inbounds [7 x i8], [7 x i8]* %data, i64 0, i64 5 + %.unpack10 = load i8, i8* %.elt9, align 1 + %.elt11 = getelementptr inbounds [7 x i8], [7 x i8]* %data, i64 0, i64 6 + %.unpack12 = load i8, i8* %.elt11, align 1 + %.temp.0.gep = getelementptr inbounds [7 x i8], [7 x i8]* %_param_data, i64 0, i64 0 + store i8 %.unpack, i8* %.temp.0.gep, align 1 + %.temp.1.gep = getelementptr inbounds [7 x i8], [7 x i8]* %_param_data, i64 0, i64 1 + store i8 %.unpack2, i8* %.temp.1.gep, align 1 + %.temp.2.gep = getelementptr inbounds [7 x i8], [7 x i8]* %_param_data, i64 0, i64 2 + store i8 %.unpack4, i8* %.temp.2.gep, align 1 + %.temp.3.gep = getelementptr inbounds [7 x i8], [7 x i8]* %_param_data, i64 0, i64 3 + store i8 %.unpack6, i8* %.temp.3.gep, align 1 + %.temp.4.gep = getelementptr inbounds [7 x i8], [7 x i8]* %_param_data, i64 0, i64 4 + store i8 %.unpack8, i8* %.temp.4.gep, align 1 + %.temp.5.gep = getelementptr inbounds [7 x i8], [7 x i8]* %_param_data, i64 0, i64 5 + store i8 %.unpack10, i8* %.temp.5.gep, align 1 + %.temp.6.gep = getelementptr inbounds [7 x i8], [7 x i8]* %_param_data, i64 0, i64 6 + store i8 %.unpack12, i8* %.temp.6.gep, align 1 + call void @callee(i8* nonnull %.temp.0.gep) + ret i8 0 +} + +declare void @callee(i8*) local_unnamed_addr #0 + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/structsinregs.ll b/llvm/test/CodeGen/PowerPC/structsinregs.ll --- a/llvm/test/CodeGen/PowerPC/structsinregs.ll +++ b/llvm/test/CodeGen/PowerPC/structsinregs.ll @@ -187,13 +187,23 @@ ret i32 %add13 ; CHECK-LABEL: callee2 -; CHECK-DAG: std 9, 96(1) -; CHECK-DAG: std 8, 88(1) -; CHECK-DAG: std 7, 80(1) +; CHECK-DAG: stw 9, 97(1) +; CHECK-DAG: stw 8, 90(1) +; CHECK-DAG: stw 7, 83(1) ; CHECK-DAG: stw 6, 76(1) -; CHECK-DAG: std 5, 64(1) +; CHECK-DAG: sth 5, 69(1) ; CHECK-DAG: sth 4, 62(1) ; CHECK-DAG: stb 3, 55(1) +; CHECK: rldicl 3, 9, 16, 48 +; CHECK: stb 3, 103(1) +; CHECK: rldicl 3, 9, 32, 32 +; CHECK: sth 3, 101(1) +; CHECK: rldicl 3, 8, 32, 32 +; CHECK: sth 3, 94(1) +; CHECK: rldicl 3, 7, 32, 32 +; CHECK: stb 3, 87(1) +; CHECK: rldicl 3, 5, 48, 16 +; CHECK: stb 3, 71(1) ; CHECK-DAG: lha {{[0-9]+}}, 62(1) ; CHECK-DAG: lha {{[0-9]+}}, 69(1) ; CHECK-DAG: lbz {{[0-9]+}}, 55(1)