Index: lib/Target/NDS32/NDS32RegisterInfo.cpp =================================================================== --- lib/Target/NDS32/NDS32RegisterInfo.cpp +++ lib/Target/NDS32/NDS32RegisterInfo.cpp @@ -78,6 +78,9 @@ MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const NDS32FrameLowering *TFI = getFrameLowering(MF); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); uint64_t stackSize = MF.getFrameInfo().getStackSize(); @@ -89,15 +92,90 @@ << "spOffset : " << Offset << "\n" << "stackSize : " << stackSize << "\n"); - unsigned BasePtr = NDS32::SP; + unsigned BasePtr = (TFI->hasFP(MF) ? NDS32::FP : NDS32::SP); + + const std::vector &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + // If the FrameIndex push a callee saved register, use SP as Base + // To avoid FP not setting yet when -fno-omit-frame-pointer + // E.g. + // addi $sp, $sp, -56 + // swi $fp, [$sp + (44)] <= use SP as base + // addi $fp, $sp, 0 + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) + BasePtr = NDS32::SP; Offset += stackSize; // Fold imm into offset Offset += MI.getOperand(FIOperandNum + 1).getImm(); + // Use R15 as temp register + unsigned Reg = NDS32::R15; + + if (isInt<15>(Offset)) { + MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } else if (isInt<20>(Offset)) { + // movi r15, Offset + BuildMI(MBB, II, DL, TII.get(NDS32::MOVI), Reg) + .addImm(Offset); + } else { + // sethi r15, hi20 (Offset) + // ori r15, lo12 (Offset) + BuildMI(MBB, II, DL, TII.get(NDS32::SETHI), Reg) + .addImm((Offset >> 12) & 0xfffff); + BuildMI(MBB, II, DL, TII.get(NDS32::ORI), Reg).addReg(Reg) + .addImm(Offset & 0xfff); + } + // Replace FI[index] to BasePtr + r15 MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 1).ChangeToRegister(Reg, false); + + unsigned NewOpc; + + switch (MI.getOpcode()) { + case NDS32::SWI: + NewOpc = NDS32::SW; + break; + case NDS32::SHI: + NewOpc = NDS32::SH; + break; + case NDS32::SBI: + NewOpc = NDS32::SB; + break; + case NDS32::LWI: + NewOpc = NDS32::LW; + break; + case NDS32::LHI: + NewOpc = NDS32::LH; + break; + case NDS32::LBI: + NewOpc = NDS32::LB; + break; + + default: + break; + } + + if (MI.getOpcode() == NDS32::ADDI) { + // addi ra, rb, Offset => add ra, rb, r15 + MI.setDesc(TII.get(NDS32::ADD)); + } else { + // Change load/store offset to r15 + // E.g. lwi ra, [rb + Offset] => lw ra, [rb + r15 << 0] + MI.setDesc(TII.get(NewOpc)); + // We define Shift amount as one of the input operand for LW + // So add Shift amount as 0 to fit the operand definition. + MI.addOperand(MF, MachineOperand::CreateImm(0)); + } return; } Index: test/CodeGen/NDS32/load-store-insns.ll =================================================================== --- test/CodeGen/NDS32/load-store-insns.ll +++ test/CodeGen/NDS32/load-store-insns.ll @@ -57,7 +57,7 @@ } ; Function Attrs: noinline nounwind -define i32 @sw_lwi_lw(i32 %a) #0 { +define i32 @sw_lw(i32 %a) #0 { entry: %a.addr = alloca i32, align 4 %b = alloca [100000 x i32], align 4 @@ -68,7 +68,8 @@ %arrayidx1 = getelementptr inbounds [100000 x i32], [100000 x i32]* %b, i32 0, i32 %0 %1 = load i32, i32* %arrayidx1, align 4 ; CHECK: sw $r2, [$r1 + $r0] -; CHECK: lwi $r1, [$sp + (400004)] +; CHECK: movi $r15, 400004 +; CHECK: lw $r1, [$sp + $r15] ; CHECK: lw $r0, [$r0 + $r1 << 2] ret i32 %1 } Index: test/CodeGen/NDS32/split-offset.ll =================================================================== --- /dev/null +++ test/CodeGen/NDS32/split-offset.ll @@ -0,0 +1,190 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-a:0:32-n32-S64" +target triple = "nds32le---elf" + +; Function Attrs: nounwind +define i8* @mem_copy(i8* returned %o, i8* nocapture readonly %i, i32 %l) local_unnamed_addr #0 { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %o, i8* %i, i32 %l, i32 1, i1 false) + ret i8* %o +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #1 + +; Function Attrs: noreturn nounwind +define i32 @main() local_unnamed_addr #2 { +entry: +; CHECK: movi $r15, -262160 +; CHECK: add $sp, $sp, $r15 +; CHECK: movi $r15, 262156 +; CHECK: sw $lp, [$sp + $r15] +; CHECK: movi $r15, 262152 +; CHECK: sw $r8, [$sp + $r15] +; CHECK: movi $r15, 262148 +; CHECK: sw $r7, [$sp + $r15] +; CHECK: movi $r15, 262144 +; CHECK: sw $r6, [$sp + $r15] + %src = alloca [131072 x i8], align 1 + %dst = alloca [131072 x i8], align 1 + %dst130 = getelementptr inbounds [131072 x i8], [131072 x i8]* %dst, i32 0, i32 0 + %0 = getelementptr inbounds [131072 x i8], [131072 x i8]* %src, i32 0, i32 0 + call void @llvm.lifetime.start(i64 131072, i8* nonnull %0) #4 + %1 = getelementptr inbounds [131072 x i8], [131072 x i8]* %dst, i32 0, i32 0 + call void @llvm.lifetime.start(i64 131072, i8* nonnull %1) #4 + call void @llvm.memset.p0i8.i32(i8* nonnull %dst130, i8 0, i32 131072, i32 1, i1 false) +; CHECK: movi $r15, 131072 +; CHECK: add $r0, $sp, $r15 + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.0129 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %conv = trunc i32 %i.0129 to i8 + %arrayidx = getelementptr inbounds [131072 x i8], [131072 x i8]* %src, i32 0, i32 %i.0129 + store i8 %conv, i8* %arrayidx, align 1 + %inc = add nuw nsw i32 %i.0129, 1 + %exitcond = icmp eq i32 %inc, 131072 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: movi $r15, 131072 +; CHECK: add $r1, $sp, $r15 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull %1, i8* nonnull %0, i32 1024, i32 1, i1 false) + br label %for.body6 + +for.cond3: ; preds = %for.body6 + %cmp4 = icmp ult i32 %inc14, 1024 + br i1 %cmp4, label %for.body6, label %for.end15 + +for.body6: ; preds = %for.end, %for.cond3 + %i.1128 = phi i32 [ 0, %for.end ], [ %inc14, %for.cond3 ] + %arrayidx7 = getelementptr inbounds [131072 x i8], [131072 x i8]* %dst, i32 0, i32 %i.1128 + %2 = load i8, i8* %arrayidx7, align 1 + %3 = trunc i32 %i.1128 to i8 + %cmp11 = icmp eq i8 %2, %3 + %inc14 = add nuw nsw i32 %i.1128, 1 + br i1 %cmp11, label %for.cond3, label %if.then + +if.then: ; preds = %for.body6 + tail call void @abort() #5 + unreachable + +for.end15: ; preds = %for.cond3 + call void @llvm.memset.p0i8.i32(i8* nonnull %1, i8 1, i32 1024, i32 1, i1 false) + br label %for.cond17 + +for.cond17: ; preds = %for.end15, %for.cond17.for.body20_crit_edge + %inc28133 = phi i32 [ 1, %for.end15 ], [ %inc28, %for.cond17.for.body20_crit_edge ] + %cmp18 = icmp ult i32 %inc28133, 1024 + br i1 %cmp18, label %for.cond17.for.body20_crit_edge, label %for.end29 + +for.cond17.for.body20_crit_edge: ; preds = %for.cond17 + %arrayidx21.phi.trans.insert = getelementptr inbounds [131072 x i8], [131072 x i8]* %dst, i32 0, i32 %inc28133 + %.pre = load i8, i8* %arrayidx21.phi.trans.insert, align 1 + %cmp23 = icmp eq i8 %.pre, 1 + %inc28 = add nuw nsw i32 %inc28133, 1 + br i1 %cmp23, label %for.cond17, label %if.then25 + +if.then25: ; preds = %for.cond17.for.body20_crit_edge + tail call void @abort() #5 + unreachable + +for.end29: ; preds = %for.cond17 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull %1, i8* nonnull %0, i32 131072, i32 1, i1 false) + br label %for.body35 + +for.cond32: ; preds = %for.body35 + %cmp33 = icmp ult i32 %inc45, 131072 + br i1 %cmp33, label %for.body35, label %for.end46 + +for.body35: ; preds = %for.end29, %for.cond32 + %i.3126 = phi i32 [ 0, %for.end29 ], [ %inc45, %for.cond32 ] + %arrayidx36 = getelementptr inbounds [131072 x i8], [131072 x i8]* %dst, i32 0, i32 %i.3126 + %4 = load i8, i8* %arrayidx36, align 1 + %5 = trunc i32 %i.3126 to i8 + %cmp40 = icmp eq i8 %4, %5 + %inc45 = add nuw nsw i32 %i.3126, 1 + br i1 %cmp40, label %for.cond32, label %if.then42 + +if.then42: ; preds = %for.body35 + tail call void @abort() #5 + unreachable + +for.end46: ; preds = %for.cond32 + call void @llvm.memset.p0i8.i32(i8* nonnull %1, i8 0, i32 131072, i32 1, i1 false) + br label %for.cond48 + +for.cond48: ; preds = %for.end46, %for.cond48.for.body51_crit_edge + %inc59132 = phi i32 [ 1, %for.end46 ], [ %inc59, %for.cond48.for.body51_crit_edge ] + %cmp49 = icmp ult i32 %inc59132, 131072 + br i1 %cmp49, label %for.cond48.for.body51_crit_edge, label %for.end60 + +for.cond48.for.body51_crit_edge: ; preds = %for.cond48 + %arrayidx52.phi.trans.insert = getelementptr inbounds [131072 x i8], [131072 x i8]* %dst, i32 0, i32 %inc59132 + %.pre131 = load i8, i8* %arrayidx52.phi.trans.insert, align 1 + %cmp54 = icmp eq i8 %.pre131, 0 + %inc59 = add nuw nsw i32 %inc59132, 1 + br i1 %cmp54, label %for.cond48, label %if.then56 + +if.then56: ; preds = %for.cond48.for.body51_crit_edge + tail call void @abort() #5 + unreachable + +for.end60: ; preds = %for.cond48 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull %1, i8* nonnull %0, i32 1024, i32 1, i1 false) #4 + br label %for.body66 + +for.cond63: ; preds = %for.body66 + %cmp64 = icmp ult i32 %inc76, 1024 + br i1 %cmp64, label %for.body66, label %for.end77 + +for.body66: ; preds = %for.end60, %for.cond63 + %i.5124 = phi i32 [ 0, %for.end60 ], [ %inc76, %for.cond63 ] + %arrayidx67 = getelementptr inbounds [131072 x i8], [131072 x i8]* %dst, i32 0, i32 %i.5124 + %6 = load i8, i8* %arrayidx67, align 1 + %7 = trunc i32 %i.5124 to i8 + %cmp71 = icmp eq i8 %6, %7 + %inc76 = add nuw nsw i32 %i.5124, 1 + br i1 %cmp71, label %for.cond63, label %if.then73 + +if.then73: ; preds = %for.body66 + tail call void @abort() #5 + unreachable + +for.end77: ; preds = %for.cond63 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull %1, i8* nonnull %0, i32 131072, i32 1, i1 false) #4 + br label %for.body85 + +for.cond82: ; preds = %for.body85 + %cmp83 = icmp ult i32 %inc95, 131072 + br i1 %cmp83, label %for.body85, label %for.end96 + +for.body85: ; preds = %for.end77, %for.cond82 + %i.6123 = phi i32 [ 0, %for.end77 ], [ %inc95, %for.cond82 ] + %arrayidx86 = getelementptr inbounds [131072 x i8], [131072 x i8]* %dst, i32 0, i32 %i.6123 + %8 = load i8, i8* %arrayidx86, align 1 + %9 = trunc i32 %i.6123 to i8 + %cmp90 = icmp eq i8 %8, %9 + %inc95 = add nuw nsw i32 %i.6123, 1 + br i1 %cmp90, label %for.cond82, label %if.then92 + +if.then92: ; preds = %for.body85 + tail call void @abort() #5 + unreachable + +for.end96: ; preds = %for.cond82 + tail call void @exit(i32 0) #5 + unreachable +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +; Function Attrs: noreturn +declare void @abort() local_unnamed_addr #3 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #1 + +; Function Attrs: noreturn +declare void @exit(i32) local_unnamed_addr #3