Index: lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- lib/Target/ARM/ARMCallLowering.cpp +++ lib/Target/ARM/ARMCallLowering.cpp @@ -111,7 +111,7 @@ unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { - assert(Size == 4 && "Unsupported size"); + assert((Size == 1 || Size == 2 || Size == 4) && "Unsupported size"); auto &MFI = MIRBuilder.getMF().getFrameInfo(); @@ -127,11 +127,27 @@ void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - assert(Size == 4 && "Unsupported size"); + assert((Size == 1 || Size == 2 || Size == 4) && "Unsupported size"); auto MMO = MIRBuilder.getMF().getMachineMemOperand( MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0); - MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + if (VA.getLocInfo() == CCValAssign::SExt || + VA.getLocInfo() == CCValAssign::ZExt) + // If the argument is zero- or sign-extended by the caller, its size + // becomes 4 bytes, so that's what we should load. + AddDefaultPred(MIRBuilder.buildInstr(ARM::LDRi12) + .addDef(ValVReg) + .addUse(Addr) + .addMemOperand(MMO) + .addImm(0)); + else + // Otherwise, just build a generic load and let it go through the whole + // instruction selection pipeline. + // FIXME: This is just so we can obtain (more or less) the same behaviour + // as DAGISel. It's not clear in which circumstances it's possible to have + // a formal arg < 32 bits without a signext/zeroext flag, we can brush + // this up as we run into those cases. + MIRBuilder.buildLoad(ValVReg, Addr, *MMO); } void assignValueToReg(unsigned ValVReg, unsigned PhysReg, @@ -163,18 +179,10 @@ auto &TLI = *getTLI(); auto &Args = F.getArgumentList(); - unsigned ArgIdx = 0; - for (auto &Arg : Args) { - ArgIdx++; + for (auto &Arg : Args) if (!isSupportedType(DL, TLI, Arg.getType())) return false; - // FIXME: This check as well as ArgIdx are going away as soon as we support - // loading values < 32 bits. - if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32) - return false; - } - CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); Index: lib/Target/ARM/ARMInstructionSelector.cpp =================================================================== --- lib/Target/ARM/ARMInstructionSelector.cpp +++ lib/Target/ARM/ARMInstructionSelector.cpp @@ -83,6 +83,22 @@ llvm_unreachable("Unsupported opcode"); } +/// Select the opcode for simple loads. For types smaller than 32 bits, the +/// value will be zero extended. +static unsigned selectLoadOpCode(unsigned Size) { + switch (Size) { + case 1: + case 8: + return ARM::LDRBi12; + case 16: + return ARM::LDRH; + case 32: + return ARM::LDRi12; + } + + llvm_unreachable("Unsupported size"); +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -162,13 +178,25 @@ // the user(s). I.setDesc(TII.get(ARM::ADDri)); AddDefaultCC(AddDefaultPred(MIB.addImm(0))); - break; - case G_LOAD: - I.setDesc(TII.get(ARM::LDRi12)); - AddDefaultPred(MIB.addImm(0)); - break; - default: - return false; + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + case G_LOAD: { + LLT ValTy = MRI.getType(I.getOperand(0).getReg()); + const auto ValSize = ValTy.getSizeInBits(); + + if (ValSize != 32 && ValSize != 16 && ValSize != 8 && ValSize != 1) + return false; + + const auto NewOpc = selectLoadOpCode(ValSize); + I.setDesc(TII.get(NewOpc)); + + if (NewOpc == ARM::LDRH) + // LDRH has a funny addressing mode (there's already a FIXME for it). + MIB.addReg(0); + MIB.addImm(0); + + AddDefaultPred(MIB); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); Index: lib/Target/ARM/ARMLegalizerInfo.cpp =================================================================== --- lib/Target/ARM/ARMLegalizerInfo.cpp +++ lib/Target/ARM/ARMLegalizerInfo.cpp @@ -35,8 +35,9 @@ setAction({G_FRAME_INDEX, p0}, Legal); - // TODO: smaller types - setAction({G_LOAD, s32}, Legal); + for (auto Ty : {s1, s8, s16, s32}) + setAction({G_LOAD, Ty}, Legal); + setAction({G_LOAD, 1, p0}, Legal); for (auto Ty : {s1, s8, s16, s32}) Index: test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir =================================================================== --- test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -220,19 +220,26 @@ - { id: 2, class: gprb } - { id: 3, class: gprb } fixedStack: - - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false } + - { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false } - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false } -# CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8 +# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0 +# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8 body: | bb.0: liveins: %r0, %r1, %r2, %r3 %0(p0) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[FIVREG:%[0-9]+]] = ADDri %fixed-stack.[[FRAME_INDEX]], 0, 14, _, _ + ; CHECK: [[FI32VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI32]], 0, 14, _, _ %1(s32) = G_LOAD %0(p0) - ; CHECK: {{%[0-9]+}} = LDRi12 [[FIVREG]], 0, 14, _ + ; CHECK: {{%[0-9]+}} = LDRi12 [[FI32VREG]], 0, 14, _ + + %2(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[FI1VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI1]], 0, 14, _, _ + + %3(s1) = G_LOAD %2(p0) + ; CHECK: {{%[0-9]+}} = LDRBi12 [[FI1VREG]], 0, 14, _ BX_RET 14, _ ; CHECK: BX_RET 14, _ Index: test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll =================================================================== --- test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -82,8 +82,8 @@ ret i32 %sum } -define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { -; CHECK-LABEL: name: test_many_args +define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { +; CHECK-LABEL: name: test_stack_args ; CHECK: fixedStack: ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4 @@ -98,3 +98,39 @@ %sum = add i32 %p2, %p5 ret i32 %sum } + +define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, + i8 signext %p4, i16 signext %p5) { +; CHECK-LABEL: name: test_stack_args_signext +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 +; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[VREGP1:%[0-9]+]]{{.*}} = COPY %r1 +; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = LDRi12 [[FIP5]](p0), 0, 14, _ +; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP1]], [[VREGP5]] +; CHECK: %r0 = COPY [[SUM]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %sum = add i16 %p1, %p5 + ret i16 %sum +} + +define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, + i8 zeroext %p4, i16 zeroext %p5) { +; CHECK-LABEL: name: test_stack_args_zeroext +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 +; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2 +; CHECK: [[FIP4:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P4]] +; CHECK: [[VREGP4:%[0-9]+]]{{.*}} = LDRi12 [[FIP4]](p0), 0, 14, _ +; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP4]] +; CHECK: %r0 = COPY [[SUM]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %sum = add i8 %p2, %p4 + ret i8 %sum +} Index: test/CodeGen/ARM/GlobalISel/arm-isel.ll =================================================================== --- test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -67,8 +67,8 @@ ret i32 %sum } -define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { -; CHECK-LABEL: test_many_args: +define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { +; CHECK-LABEL: test_stack_args_i32: ; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 ; CHECK: ldr [[P5:r[0-9]+]], {{.*}}[[P5ADDR]] ; CHECK: add r0, r2, [[P5]] @@ -77,3 +77,36 @@ %sum = add i32 %p2, %p5 ret i32 %sum } + +define i16 @test_stack_args_mixed(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 %p4, i16 %p5) { +; CHECK-LABEL: test_stack_args_mixed: +; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 +; CHECK: ldrh [[P5:r[0-9]+]], {{.*}}[[P5ADDR]] +; CHECK: add r0, r1, [[P5]] +; CHECK: bx lr +entry: + %sum = add i16 %p1, %p5 + ret i16 %sum +} + +define i16 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i16 zeroext %p4) { +; CHECK-LABEL: test_stack_args_zeroext: +; CHECK: mov [[P4ADDR:r[0-9]+]], sp +; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]] +; CHECK: add r0, r1, [[P4]] +; CHECK: bx lr +entry: + %sum = add i16 %p1, %p4 + ret i16 %sum +} + +define i8 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 signext %p4) { +; CHECK-LABEL: test_stack_args_signext: +; CHECK: mov [[P4ADDR:r[0-9]+]], sp +; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]] +; CHECK: add r0, r2, [[P4]] +; CHECK: bx lr +entry: + %sum = add i8 %p2, %p4 + ret i8 %sum +}