Index: llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp @@ -122,7 +122,7 @@ unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { - assert(Size == 4 && "Unsupported size"); + assert((Size == 1 || Size == 2 || Size == 4) && "Unsupported size"); auto &MFI = MIRBuilder.getMF().getFrameInfo(); @@ -138,7 +138,16 @@ void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - assert(Size == 4 && "Unsupported size"); + assert((Size == 1 || Size == 2 || Size == 4) && "Unsupported size"); + + if (VA.getLocInfo() == CCValAssign::SExt || + VA.getLocInfo() == CCValAssign::ZExt) { + // If the argument is zero- or sign-extended by the caller, its size + // becomes 4 bytes, so that's what we should load. + Size = 4; + assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm"); + MRI.setType(ValVReg, LLT::scalar(32)); + } auto MMO = MIRBuilder.getMF().getMachineMemOperand( MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0); @@ -177,18 +186,10 @@ return false; auto &Args = F.getArgumentList(); - unsigned ArgIdx = 0; - for (auto &Arg : Args) { - ArgIdx++; + for (auto &Arg : Args) if (!isSupportedType(DL, TLI, Arg.getType())) return false; - // FIXME: This check as well as ArgIdx are going away as soon as we support - // loading values < 32 bits. - if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32) - return false; - } - CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); Index: llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp +++ llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp @@ -85,6 +85,22 @@ llvm_unreachable("Unsupported opcode"); } +/// Select the opcode for simple loads. For types smaller than 32 bits, the +/// value will be zero extended. +static unsigned selectLoadOpCode(unsigned Size) { + switch (Size) { + case 1: + case 8: + return ARM::LDRBi12; + case 16: + return ARM::LDRH; + case 32: + return ARM::LDRi12; + } + + llvm_unreachable("Unsupported size"); +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -167,10 +183,22 @@ I.setDesc(TII.get(ARM::ADDri)); MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp()); break; - case G_LOAD: - I.setDesc(TII.get(ARM::LDRi12)); + case G_LOAD: { + LLT ValTy = MRI.getType(I.getOperand(0).getReg()); + const auto ValSize = ValTy.getSizeInBits(); + + if (ValSize != 32 && ValSize != 16 && ValSize != 8 && ValSize != 1) + return false; + + const auto NewOpc = selectLoadOpCode(ValSize); + I.setDesc(TII.get(NewOpc)); + + if (NewOpc == ARM::LDRH) + // LDRH has a funny addressing mode (there's already a FIXME for it). + MIB.addReg(0); MIB.addImm(0).add(predOps(ARMCC::AL)); break; + } default: return false; } Index: llvm/trunk/lib/Target/ARM/ARMLegalizerInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMLegalizerInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -35,7 +35,8 @@ setAction({G_FRAME_INDEX, p0}, Legal); - setAction({G_LOAD, s32}, Legal); + for (auto Ty : {s1, s8, s16, s32}) + setAction({G_LOAD, Ty}, Legal); setAction({G_LOAD, 1, p0}, Legal); for (auto Ty : {s1, s8, s16, s32}) Index: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir =================================================================== --- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -233,19 +233,26 @@ # CHECK-DAG: id: 2, class: gpr # CHECK-DAG: id: 3, class: gpr fixedStack: - - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false } + - { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false } - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false } -# CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8 +# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0 +# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8 body: | bb.0: liveins: %r0, %r1, %r2, %r3 %0(p0) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[FIVREG:%[0-9]+]] = ADDri %fixed-stack.[[FRAME_INDEX]], 0, 14, _, _ + ; CHECK: [[FI32VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI32]], 0, 14, _, _ %1(s32) = G_LOAD %0(p0) - ; CHECK: {{%[0-9]+}} = LDRi12 [[FIVREG]], 0, 14, _ + ; CHECK: {{%[0-9]+}} = LDRi12 [[FI32VREG]], 0, 14, _ + + %2(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[FI1VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI1]], 0, 14, _, _ + + %3(s1) = G_LOAD %2(p0) + ; CHECK: {{%[0-9]+}} = LDRBi12 [[FI1VREG]], 0, 14, _ BX_RET 14, _ ; CHECK: BX_RET 14, _ Index: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -82,8 +82,8 @@ ret i32 %sum } -define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { -; CHECK-LABEL: name: test_many_args +define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { +; CHECK-LABEL: name: test_stack_args ; CHECK: fixedStack: ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4 @@ -98,3 +98,39 @@ %sum = add i32 %p2, %p5 ret i32 %sum } + +define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, + i8 signext %p4, i16 signext %p5) { +; CHECK-LABEL: name: test_stack_args_signext +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 +; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[VREGP1:%[0-9]+]]{{.*}} = COPY %r1 +; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = G_LOAD [[FIP5]](p0) +; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP1]], [[VREGP5]] +; CHECK: %r0 = COPY [[SUM]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %sum = add i16 %p1, %p5 + ret i16 %sum +} + +define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, + i8 zeroext %p4, i16 zeroext %p5) { +; CHECK-LABEL: name: test_stack_args_zeroext +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 +; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2 +; CHECK: [[FIP4:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P4]] +; CHECK: [[VREGP4:%[0-9]+]]{{.*}} = G_LOAD [[FIP4]](p0) +; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP4]] +; CHECK: %r0 = COPY [[SUM]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %sum = add i8 %p2, %p4 + ret i8 %sum +} Index: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -67,8 +67,8 @@ ret i32 %sum } -define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { -; CHECK-LABEL: test_many_args: +define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { +; CHECK-LABEL: test_stack_args_i32: ; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 ; CHECK: ldr [[P5:r[0-9]+]], {{.*}}[[P5ADDR]] ; CHECK: add r0, r2, [[P5]] @@ -77,3 +77,36 @@ %sum = add i32 %p2, %p5 ret i32 %sum } + +define i16 @test_stack_args_mixed(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 %p4, i16 %p5) { +; CHECK-LABEL: test_stack_args_mixed: +; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 +; CHECK: ldrh [[P5:r[0-9]+]], {{.*}}[[P5ADDR]] +; CHECK: add r0, r1, [[P5]] +; CHECK: bx lr +entry: + %sum = add i16 %p1, %p5 + ret i16 %sum +} + +define i16 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i16 zeroext %p4) { +; CHECK-LABEL: test_stack_args_zeroext: +; CHECK: mov [[P4ADDR:r[0-9]+]], sp +; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]] +; CHECK: add r0, r1, [[P4]] +; CHECK: bx lr +entry: + %sum = add i16 %p1, %p4 + ret i16 %sum +} + +define i8 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 signext %p4) { +; CHECK-LABEL: test_stack_args_signext: +; CHECK: mov [[P4ADDR:r[0-9]+]], sp +; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]] +; CHECK: add r0, r2, [[P4]] +; CHECK: bx lr +entry: + %sum = add i8 %p2, %p4 + ret i8 %sum +}