Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed
* [ppc64-linux]: correctly find a BFD's code entry point address
@ 2003-06-12 23:12 Jim Blandy
  2003-06-13  5:45 ` Kevin Buettner
  0 siblings, 1 reply; 4+ messages in thread
From: Jim Blandy @ 2003-06-12 23:12 UTC (permalink / raw)
  To: gdb-patches


2003-06-12  Jim Blandy  <jimb@redhat.com>

	* ppc-linux-tdep.c (ppc64_linux_bfd_entry_point): New function.
	(ppc_linux_init_abi): Register it as our bfd_entry_point address.

Index: gdb/ppc-linux-tdep.c
===================================================================
RCS file: /cvs/src/src/gdb/ppc-linux-tdep.c,v
retrieving revision 1.28.8.18
diff -c -r1.28.8.18 ppc-linux-tdep.c
*** gdb/ppc-linux-tdep.c	12 Jun 2003 21:25:50 -0000	1.28.8.18
--- gdb/ppc-linux-tdep.c	12 Jun 2003 23:11:22 -0000
***************
*** 941,946 ****
--- 941,1046 ----
  }
  
  
+ /* Return the unrelocated code address at which execution begins for
+    ABFD, under the 64-bit PowerPC Linux ABI.  On that system, the ELF
+    header e_entry field (which is what bfd_get_start_address gives
+    you) is the address of the function descriptor for the startup
+    function, not the address of the actual machine instruction you
+    jump to.
+ 
+    This function doesn't just go and read the entry point from the
+    function descriptor.  We need it to work when ABFD is the dynamic
+    linker, immediately after an exec.  But ld.so is a dynamic
+    executable itself on PPC64 Linux, so it appears in memory whereever
+    the kernel drops it; this means that bfd_get_start_address's result
+    needs to be adjusted --- by some offset we don't know.  So we can't
+    find the descriptor's address in memory to read the entry point
+    from it.
+ 
+    Instead, we do it all based on ABFD's symbol table.  We take the
+    address from bfd_get_start_address, find each symbol at that
+    address, stick a '.' on the front of its name to get the entry
+    point symbol name, try to look that up, and return the value of
+    what we find, if anything.  We never touch memory, or talk with the
+    kernel about the inferior at all.
+ 
+    Now, this address we return is straight from the symbol table, so
+    it hasn't been adjusted to take into account where ABFD was loaded.
+    But that's okay --- our job is just to return the unrelocated code
+    address.  */
+ static CORE_ADDR
+ ppc64_linux_bfd_entry_point (struct gdbarch *gdbarch, bfd *abfd)
+ {
+   long storage_needed;
+ 
+   storage_needed = bfd_get_symtab_upper_bound (abfd);
+ 
+   if (storage_needed > 0)
+     {
+       asymbol **symbol_table;
+       unsigned int symbol_table_len;
+       struct cleanup *back_to;
+       unsigned int i;
+       CORE_ADDR start_address;
+ 
+       symbol_table = (asymbol **) xmalloc (storage_needed);
+       back_to = make_cleanup (xfree, symbol_table);
+       symbol_table_len = bfd_canonicalize_symtab (abfd, symbol_table);
+ 
+       /* Find the symbol naming the start function's descriptor.  Its
+          value must be the BFD's start address, and it must be in a
+          data section.
+ 
+          Also, the symbol's name must be non-empty.  A lot of symtabs
+          seem to contain a bunch of symbols with no name whose value
+          is zero relative to the start of the data section; if the
+          start function descriptor is the first thing in the data
+          section, we'll get more false positives than we'd like.  */
+       start_address = bfd_get_start_address (abfd);
+       for (i = 0; i < symbol_table_len; i++)
+         if (bfd_asymbol_value (symbol_table[i]) == start_address
+             && symbol_table[i]->section->flags & SEC_DATA
+             && bfd_asymbol_name (symbol_table[i])[0] != '\0')
+           {
+             /* Okay, we've found a symbol whose value and section are
+                right.  Construct the name of the corresponding entry
+                point symbol and see if we can find a symbol with that
+                name in a code section.  */
+             const char *desc_name = bfd_asymbol_name (symbol_table[i]);
+             char *entry_pt_name = alloca (strlen (desc_name) + 2);
+             int j;
+ 
+             entry_pt_name[0] = '.';
+             strcpy (entry_pt_name + 1, desc_name);
+ 
+             for (j = 0; j < symbol_table_len; j++)
+               if ((strcmp (bfd_asymbol_name (symbol_table[j]), entry_pt_name)
+                    == 0)
+                   && symbol_table[j]->section->flags & SEC_CODE)
+                 /* Yay!  What a coincidence.  Let's assume this is the
+                    entry point symbol.  */
+                 {
+                   CORE_ADDR entry_point = bfd_asymbol_value (symbol_table[j]);
+                   do_cleanups (back_to);
+                   return entry_point;
+                 }
+             
+             /* No good --- there's no symbol by that name.  Perhaps
+                symbol_table[i] is just coincidentally equal to the
+                start address; after all, there could be many symbols
+                with the same value.  Continue the search.  */
+           }
+ 
+       /* No good --- there's no symbol pointing at the start
+          address.  */
+       do_cleanups (back_to);
+     }
+   
+   /* No good --- this BFD has no symbols at all.  We give up!  */
+   return 0;
+ }
+ 
+ 
  enum {
    ELF_NGREG = 48,
    ELF_NFPREG = 33,
***************
*** 1068,1073 ****
--- 1168,1175 ----
          (gdbarch, ppc64_linux_convert_from_func_ptr_addr);
  
        set_gdbarch_call_dummy_address (gdbarch, ppc64_call_dummy_address);
+       
+       set_gdbarch_bfd_entry_point (gdbarch, ppc64_linux_bfd_entry_point);
  
        set_gdbarch_in_solib_call_trampoline
          (gdbarch, ppc64_in_solib_call_trampoline);


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [ppc64-linux]: correctly find a BFD's code entry point address
  2003-06-12 23:12 [ppc64-linux]: correctly find a BFD's code entry point address Jim Blandy
@ 2003-06-13  5:45 ` Kevin Buettner
  2003-06-14  0:03   ` Jim Blandy
  0 siblings, 1 reply; 4+ messages in thread
From: Kevin Buettner @ 2003-06-13  5:45 UTC (permalink / raw)
  To: Jim Blandy, gdb-patches

On Jun 12,  6:12pm, Jim Blandy wrote:

> + /* Return the unrelocated code address at which execution begins for
> +    ABFD, under the 64-bit PowerPC Linux ABI.  On that system, the ELF
> +    header e_entry field (which is what bfd_get_start_address gives
> +    you) is the address of the function descriptor for the startup
> +    function, not the address of the actual machine instruction you
> +    jump to.
> + 
> +    This function doesn't just go and read the entry point from the
> +    function descriptor.  We need it to work when ABFD is the dynamic
> +    linker, immediately after an exec.  But ld.so is a dynamic
> +    executable itself on PPC64 Linux, so it appears in memory whereever
> +    the kernel drops it; this means that bfd_get_start_address's result
> +    needs to be adjusted --- by some offset we don't know.  So we can't
> +    find the descriptor's address in memory to read the entry point
> +    from it.
> + 
> +    Instead, we do it all based on ABFD's symbol table.  We take the
> +    address from bfd_get_start_address, find each symbol at that
> +    address, stick a '.' on the front of its name to get the entry
> +    point symbol name, try to look that up, and return the value of
> +    what we find, if anything.  We never touch memory, or talk with the
> +    kernel about the inferior at all.
> + 
> +    Now, this address we return is straight from the symbol table, so
> +    it hasn't been adjusted to take into account where ABFD was loaded.
> +    But that's okay --- our job is just to return the unrelocated code
> +    address.  */

This approach strikes me as somewhat more complicated (and fragile)
than need be.  I think it would be preferable to simply fetch the
necessary bytes from the address given by bfd_get_start_address in the
executable (or object) file.

Nice description though; I really appreciate comments like this.

Kevin


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [ppc64-linux]: correctly find a BFD's code entry point address
  2003-06-13  5:45 ` Kevin Buettner
@ 2003-06-14  0:03   ` Jim Blandy
  2003-06-18  0:22     ` Kevin Buettner
  0 siblings, 1 reply; 4+ messages in thread
From: Jim Blandy @ 2003-06-14  0:03 UTC (permalink / raw)
  To: Kevin Buettner; +Cc: gdb-patches

Kevin Buettner <kevinb@redhat.com> writes:

> On Jun 12,  6:12pm, Jim Blandy wrote:
> 
> > + /* Return the unrelocated code address at which execution begins for
> > +    ABFD, under the 64-bit PowerPC Linux ABI.  On that system, the ELF
> > +    header e_entry field (which is what bfd_get_start_address gives
> > +    you) is the address of the function descriptor for the startup
> > +    function, not the address of the actual machine instruction you
> > +    jump to.
> > + 
> > +    This function doesn't just go and read the entry point from the
> > +    function descriptor.  We need it to work when ABFD is the dynamic
> > +    linker, immediately after an exec.  But ld.so is a dynamic
> > +    executable itself on PPC64 Linux, so it appears in memory whereever
> > +    the kernel drops it; this means that bfd_get_start_address's result
> > +    needs to be adjusted --- by some offset we don't know.  So we can't
> > +    find the descriptor's address in memory to read the entry point
> > +    from it.
> > + 
> > +    Instead, we do it all based on ABFD's symbol table.  We take the
> > +    address from bfd_get_start_address, find each symbol at that
> > +    address, stick a '.' on the front of its name to get the entry
> > +    point symbol name, try to look that up, and return the value of
> > +    what we find, if anything.  We never touch memory, or talk with the
> > +    kernel about the inferior at all.
> > + 
> > +    Now, this address we return is straight from the symbol table, so
> > +    it hasn't been adjusted to take into account where ABFD was loaded.
> > +    But that's okay --- our job is just to return the unrelocated code
> > +    address.  */
> 
> This approach strikes me as somewhat more complicated (and fragile)
> than need be.  I think it would be preferable to simply fetch the
> necessary bytes from the address given by bfd_get_start_address in the
> executable (or object) file.
> 
> Nice description though; I really appreciate comments like this.

Thanks!  I redid the patch as you suggest, and it's much smaller and
simpler.  How's this:

2003-06-12  Jim Blandy  <jimb@redhat.com>

	* ppc-linux-tdep.c (ppc64_linux_bfd_entry_point): New function.
	(ppc_linux_init_abi): Register it as our bfd_entry_point method.

Index: gdb/ppc-linux-tdep.c
===================================================================
RCS file: /cvs/src/src/gdb/ppc-linux-tdep.c,v
retrieving revision 1.32
diff -c -r1.32 ppc-linux-tdep.c
*** gdb/ppc-linux-tdep.c	13 Jun 2003 00:06:11 -0000	1.32
--- gdb/ppc-linux-tdep.c	13 Jun 2003 22:46:59 -0000
***************
*** 884,889 ****
--- 884,935 ----
  }
  
  
+ /* Return the unrelocated code address at which execution begins for
+    ABFD, under the 64-bit PowerPC Linux ABI.
+ 
+    On that system, the ELF header's e_entry field (which is what
+    bfd_get_start_address gives you) is not the address of the actual
+    machine instruction you need to jump to, as it is on almost every
+    other target.  Instead, it's the address of a function descriptor
+    for the start function.  A function descriptor is a structure
+    containing three addresses: the entry point, the TOC pointer for
+    the function, and an environment pointer for the function.  The
+    first field is what we want to return.
+ 
+    So all we do is find the section containing the start address, read
+    the address-sized word there out of the BFD, and return that.  */
+ static CORE_ADDR
+ ppc64_linux_bfd_entry_point (struct gdbarch *gdbarch, bfd *abfd)
+ {
+   CORE_ADDR start_address = bfd_get_start_address (abfd);
+   CORE_ADDR addr_size = (bfd_arch_bits_per_address (abfd)
+                          / bfd_arch_bits_per_byte (abfd));
+   unsigned char *entry_pt_buf = alloca (addr_size);
+   asection *sec;
+ 
+   /* Find a data section containing an address word at the start
+      address.  */
+   for (sec = abfd->sections; sec; sec = sec->next)
+     if (bfd_get_section_vma (sec) <= start_address
+         && ((start_address + addr_size)
+             <= (bfd_get_section_vma (sec) + bfd_section_size (sec))))
+       break;
+   if (! sec)
+     return 0;
+ 
+   /* Seek to the start address, and read the address word there.  */
+   if (bfd_seek (abfd, 
+                 sec->filepos + (start_address - bfd_get_section_vma (sec)),
+                 SEEK_SET)
+       || bfd_bread (entry_pt_buf, addr_size, abfd) != addr_size)
+     return 0;
+       
+   /* That's the actual code entry point.  */
+   return (CORE_ADDR) bfd_get (bfd_arch_bits_per_address (abfd),
+                               abfd, entry_pt_buf);
+ }
+ 
+ 
  enum {
    ELF_NGREG = 48,
    ELF_NFPREG = 33,
***************
*** 1008,1013 ****
--- 1054,1061 ----
        set_gdbarch_in_solib_call_trampoline
          (gdbarch, ppc64_in_solib_call_trampoline);
        set_gdbarch_skip_trampoline_code (gdbarch, ppc64_skip_trampoline_code);
+       
+       set_gdbarch_bfd_entry_point (gdbarch, ppc64_linux_bfd_entry_point);
      }
  }
  


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [ppc64-linux]: correctly find a BFD's code entry point address
  2003-06-14  0:03   ` Jim Blandy
@ 2003-06-18  0:22     ` Kevin Buettner
  0 siblings, 0 replies; 4+ messages in thread
From: Kevin Buettner @ 2003-06-18  0:22 UTC (permalink / raw)
  To: Jim Blandy; +Cc: gdb-patches

On Jun 13,  7:04pm, Jim Blandy wrote:

> Thanks!  I redid the patch as you suggest, and it's much smaller and
> simpler.  How's this:
> 
> 2003-06-12  Jim Blandy  <jimb@redhat.com>
> 
> 	* ppc-linux-tdep.c (ppc64_linux_bfd_entry_point): New function.
> 	(ppc_linux_init_abi): Register it as our bfd_entry_point method.

Yeah, this version is much nicer.  Thanks for redoing it!

If I'm not mistaken, there's a dependency on some gdbarch stuff, right?
Once that's resolved, this can go in.

Kevin


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2003-06-18  0:22 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-06-12 23:12 [ppc64-linux]: correctly find a BFD's code entry point address Jim Blandy
2003-06-13  5:45 ` Kevin Buettner
2003-06-14  0:03   ` Jim Blandy
2003-06-18  0:22     ` Kevin Buettner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox