This is an archive of the discontinued LLVM Phabricator instance.

[clang][driver] Do not emit default '-Tdata' for AVR devices
ClosedPublic

Authored by benshi001 on Feb 21 2023, 7:46 PM.

Details

Summary

Different AVR devices have different data regions. Current clang
driver emits a default '-Tdata' option to the linker. This way
works fine if there is no user specified linker script, but it
will make conflicts if there is one.

A better solution for deciding the default data region in the
default linker script of gnu avr-ld is emiting
--defsym=__DATA_REGION_ORIGIN__=... instead.

Fixes https://github.com/llvm/llvm-project/issues/60362

Diff Detail

Event Timeline

benshi001 created this revision.Feb 21 2023, 7:46 PM
Herald added a project: Restricted Project. · View Herald TranscriptFeb 21 2023, 7:46 PM
benshi001 requested review of this revision.Feb 21 2023, 7:46 PM
Herald added a project: Restricted Project. · View Herald TranscriptFeb 21 2023, 7:46 PM
benshi001 edited the summary of this revision. (Show Details)Feb 21 2023, 7:46 PM

Different AVR device families have different default linker scripts in avr-ld, but they have 95% similarity and look like as following, we can use --defsym=__DATA_REGION_ORIGIN__ to decide the default data region range instead of current -Tdata.

/* Script for ld -r: link without relocation */
/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
   Copying and distribution of this script, with or without modification,
   are permitted in any medium without royalty provided the copyright
   notice and this notice are preserved.  */
OUTPUT_FORMAT("elf32-avr","elf32-avr","elf32-avr")
OUTPUT_ARCH(avr:6)
__TEXT_REGION_ORIGIN__ = DEFINED(__TEXT_REGION_ORIGIN__) ? __TEXT_REGION_ORIGIN__ : 0;
__DATA_REGION_ORIGIN__ = DEFINED(__DATA_REGION_ORIGIN__) ? __DATA_REGION_ORIGIN__ : 0x800200;
__TEXT_REGION_LENGTH__ = DEFINED(__TEXT_REGION_LENGTH__) ? __TEXT_REGION_LENGTH__ : 1024K;
__DATA_REGION_LENGTH__ = DEFINED(__DATA_REGION_LENGTH__) ? __DATA_REGION_LENGTH__ : 0xfe00;
__EEPROM_REGION_LENGTH__ = DEFINED(__EEPROM_REGION_LENGTH__) ? __EEPROM_REGION_LENGTH__ : 64K;
__FUSE_REGION_LENGTH__ = DEFINED(__FUSE_REGION_LENGTH__) ? __FUSE_REGION_LENGTH__ : 1K;
__LOCK_REGION_LENGTH__ = DEFINED(__LOCK_REGION_LENGTH__) ? __LOCK_REGION_LENGTH__ : 1K;
__SIGNATURE_REGION_LENGTH__ = DEFINED(__SIGNATURE_REGION_LENGTH__) ? __SIGNATURE_REGION_LENGTH__ : 1K;
__USER_SIGNATURE_REGION_LENGTH__ = DEFINED(__USER_SIGNATURE_REGION_LENGTH__) ? __USER_SIGNATURE_REGION_LENGTH__ : 1K;
MEMORY
{
  text   (rx)   : ORIGIN = __TEXT_REGION_ORIGIN__, LENGTH = __TEXT_REGION_LENGTH__
  data   (rw!x) : ORIGIN = __DATA_REGION_ORIGIN__, LENGTH = __DATA_REGION_LENGTH__
  eeprom (rw!x) : ORIGIN = 0x810000, LENGTH = __EEPROM_REGION_LENGTH__
  fuse      (rw!x) : ORIGIN = 0x820000, LENGTH = __FUSE_REGION_LENGTH__
  lock      (rw!x) : ORIGIN = 0x830000, LENGTH = __LOCK_REGION_LENGTH__
  signature (rw!x) : ORIGIN = 0x840000, LENGTH = __SIGNATURE_REGION_LENGTH__
  user_signatures (rw!x) : ORIGIN = 0x850000, LENGTH = __USER_SIGNATURE_REGION_LENGTH__
}
SECTIONS
{
  /* Read-only sections, merged into text segment: */
  .hash        0 : { *(.hash)           }
  .dynsym      0 : { *(.dynsym)         }
  .dynstr      0 : { *(.dynstr)         }
  .gnu.version 0 : { *(.gnu.version)    }
  .gnu.version_d 0 : { *(.gnu.version_d)        }
  .gnu.version_r 0 : { *(.gnu.version_r)        }
  .rel.init    0 : { *(.rel.init)               }
  .rela.init   0 : { *(.rela.init)      }
  .rel.text    0 :
    {
      *(.rel.text)
    }
  .rela.text   0 :
    {
      *(.rela.text)
    }
  .rel.fini    0 : { *(.rel.fini)               }
  .rela.fini   0 : { *(.rela.fini)      }
  .rel.rodata  0 :
    {
      *(.rel.rodata)
    }
  .rela.rodata 0 :
    {
      *(.rela.rodata)
    }
  .rel.data    0 :
    {
      *(.rel.data)
    }
  .rela.data   0 :
    {
      *(.rela.data)
    }
  .rel.ctors   0 : { *(.rel.ctors)      }
  .rela.ctors  0 : { *(.rela.ctors)     }
  .rel.dtors   0 : { *(.rel.dtors)      }
  .rela.dtors  0 : { *(.rela.dtors)     }
  .rel.got     0 : { *(.rel.got)                }
  .rela.got    0 : { *(.rela.got)               }
  .rel.bss     0 : { *(.rel.bss)                }
  .rela.bss    0 : { *(.rela.bss)               }
  .rel.plt     0 : { *(.rel.plt)                }
  .rela.plt    0 : { *(.rela.plt)               }
  /* Internal text space or external memory.  */
  .text 0 :
  {
    *(.vectors)
    KEEP(*(.vectors))
    /* For data that needs to reside in the lower 64k of progmem.  */
    /* PR 13812: Placing the trampolines here gives a better chance
       that they will be in range of the code that uses them.  */
    /* The jump trampolines for the 16-bit limited relocs will reside here.  */
    *(.trampolines)
    /* avr-libc expects these data to reside in lower 64K. */
    /* For future tablejump instruction arrays for 3 byte pc devices.
       We don't relax jump/call instructions within these sections.  */
    *(.jumptables)
    /* For code that needs to reside in the lower 128k progmem.  */
    *(.lowtext)
    KEEP(SORT(*)(.ctors))
    KEEP(SORT(*)(.dtors))
    /* From this point on, we don't bother about wether the insns are
       below or above the 16 bits boundary.  */
    *(.init0)  /* Start here after reset.  */
    KEEP (*(.init0))
    *(.init1)
    KEEP (*(.init1))
    *(.init2)  /* Clear __zero_reg__, set up stack pointer.  */
    KEEP (*(.init2))
    *(.init3)
    KEEP (*(.init3))
    *(.init4)  /* Initialize data and BSS.  */
    KEEP (*(.init4))
    *(.init5)
    KEEP (*(.init5))
    *(.init6)  /* C++ constructors.  */
    KEEP (*(.init6))
    *(.init7)
    KEEP (*(.init7))
    *(.init8)
    KEEP (*(.init8))
    *(.init9)  /* Call main().  */
    KEEP (*(.init9))
    *(.text)
    *(.fini9)  /* _exit() starts here.  */
    KEEP (*(.fini9))
    *(.fini8)
    KEEP (*(.fini8))
    *(.fini7)
    KEEP (*(.fini7))
    *(.fini6)  /* C++ destructors.  */
    KEEP (*(.fini6))
    *(.fini5)
    KEEP (*(.fini5))
    *(.fini4)
    KEEP (*(.fini4))
    *(.fini3)
    KEEP (*(.fini3))
    *(.fini2)
    KEEP (*(.fini2))
    *(.fini1)
    KEEP (*(.fini1))
    *(.fini0)  /* Infinite loop after program termination.  */
    KEEP (*(.fini0))
  }
  .data        0 :
  {
    *(.data)
    *(.gnu.linkonce.d*)
    *(.rodata)  /* We need to include .rodata here if gcc is used */
     /* with -fdata-sections.  */
    *(.gnu.linkonce.r*)
  }
  .bss  0 :
  {
    *(.bss)
    *(COMMON)
  }
  /* Global data not cleared after reset.  */
  .noinit  0:
  {
    *(.noinit*)
  }
  .eeprom 0:
  {
    /* See .data above...  */
    KEEP(*(.eeprom*))
  }
  .fuse 0:
  {
    KEEP(*(.fuse))
    KEEP(*(.lfuse))
    KEEP(*(.hfuse))
    KEEP(*(.efuse))
  }
  .lock 0:
  {
    KEEP(*(.lock*))
  }
  .signature 0:
  {
    KEEP(*(.signature*))
  }
  .user_signatures 0:
  {
    KEEP(*(.user_signatures*))
  }
  /* Stabs debugging sections.  */
  .stab 0 : { *(.stab) }
  .stabstr 0 : { *(.stabstr) }
  .stab.excl 0 : { *(.stab.excl) }
  .stab.exclstr 0 : { *(.stab.exclstr) }
  .stab.index 0 : { *(.stab.index) }
  .stab.indexstr 0 : { *(.stab.indexstr) }
  .comment 0 : { *(.comment) }
  .note.gnu.build-id : { *(.note.gnu.build-id) }
  /* DWARF debug sections.
     Symbols in the DWARF debugging sections are relative to the beginning
     of the section so we begin them at 0.  */
  /* DWARF 1 */
  .debug          0 : { *(.debug) }
  .line           0 : { *(.line) }
  /* GNU DWARF 1 extensions */
  .debug_srcinfo  0 : { *(.debug_srcinfo) }
  .debug_sfnames  0 : { *(.debug_sfnames) }
  /* DWARF 1.1 and DWARF 2 */
  .debug_aranges  0 : { *(.debug_aranges) }
  .debug_pubnames 0 : { *(.debug_pubnames) }
  /* DWARF 2 */
  .debug_info     0 : { *(.debug_info) }
  .debug_abbrev   0 : { *(.debug_abbrev) }
  .debug_line     0 : { *(.debug_line .debug_line.* .debug_line_end ) }
  .debug_frame    0 : { *(.debug_frame) }
  .debug_str      0 : { *(.debug_str) }
  .debug_loc      0 : { *(.debug_loc) }
  .debug_macinfo  0 : { *(.debug_macinfo) }
  /* SGI/MIPS DWARF 2 extensions */
  .debug_weaknames 0 : { *(.debug_weaknames) }
  .debug_funcnames 0 : { *(.debug_funcnames) }
  .debug_typenames 0 : { *(.debug_typenames) }
  .debug_varnames  0 : { *(.debug_varnames) }
  /* DWARF 3 */
  .debug_pubtypes 0 : { *(.debug_pubtypes) }
  .debug_ranges   0 : { *(.debug_ranges) }
  /* DWARF Extension.  */
  .debug_macro    0 : { *(.debug_macro) }
}

This change is an improvement but why is __DATA_REGION_ORIGIN__ defined?

benshi001 added a comment.EditedFeb 28 2023, 9:01 PM

This change is an improvement but why is __DATA_REGION_ORIGIN__ defined?

Because,

  1. Different AVR devices have different SRAM address range (start & length), and __DATA_REGION_ORIGIN__ specifies the start address.
  2. __DATA_REGION_ORIGIN__ is an expected symbol by GNU-ld's default AVR linker script for deciding SRAM region. Please refer to https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=ld/scripttempl/avr.sc;h=9abfc536121cdfad67dfed295d4d879fa3722b1b;hb=d80081ef39c729b0f5f548c9567be2d80dcc2fd0

The benefits

  1. User can fully use the SRAM region without concerning about linking, by using default values/choices.
  2. User can specify his own device's SRAM region by using his own linker script, (of course, he should not use __DATA_REGION_ORIGIN__ in his linker script, but this is much better than current -Tdata).
MaskRay accepted this revision.Feb 28 2023, 11:15 PM
This revision is now accepted and ready to land.Feb 28 2023, 11:15 PM