Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed
* [rfa] Add SYMBOL_SET_LINKAGE_NAME
@ 2004-02-16 21:24 Daniel Jacobowitz
  2004-02-16 21:53 ` Elena Zannoni
  2004-02-18  0:20 ` David Carlton
  0 siblings, 2 replies; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-16 21:24 UTC (permalink / raw)
  To: gdb-patches

This patch adds a macro, SYMBOL_SET_LINKAGE_NAME, which is used to set
a symbol's name when the name should not be demangled.  Used for things
like typedefs whose name comes from debug info.  I did not change
anything related to memory allocation when I did this - though in
hpread I found a few inconsistencies.

After this patch and my others from today there are no direct
assignments to the symbol name. In addition to the cleanup value, I'm
testing an approach which would change the storage of symbol names,
which prompted me to do this.  This patch has a trivial dependency on
the previous hpread patch (introduction of a local variable in
hpread_process_one_debug_symbol) but otherwise they can be applied in
any order.

OK?

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer

2004-02-16  Daniel Jacobowitz  <drow@mvista.com>

	* symtab.h (SYMBOL_SET_LINKAGE_NAME): Define.

	* ada-lang.c (add_symbols_from_enclosing_procs): Use
	SYMBOL_SET_LINKAGE_NAME.
	* dwarfread.c (enum_type, synthesize_typedef): Likewise.
	* jv-lang.c (add_class_symbol): Likewise.
	* stabsread.c (patch_block_stabs, define_symbol, read_enum_type)
	(common_block_end): Likewise.
	* xcoffread.c (process_xcoff_symbol): Likewise.
	* hpread.c (hpread_read_enum_type, hpread_read_function_type)
	(hpread_read_doc_function_type): Use SYMBOL_SET_LINKAGE_NAME.
	(hpread_process_one_debug_symbol): Likewise.

--- hpread.c	7 Feb 2004 23:13:47 -0000	1.45
+++ hpread.c	15 Feb 2004 23:56:30 -0000
@@ -5101,7 +5098,8 @@ hpread_process_one_debug_symbol (union d
   sym = (struct symbol *) obstack_alloc (&objfile->objfile_obstack,
 					 sizeof (struct symbol));
   memset (sym, 0, sizeof (struct symbol));
-  DEPRECATED_SYMBOL_NAME (sym) = obsavestring (name, strlen (name), &objfile->objfile_obstack);
+  set_name = obsavestring (name, strlen (name), &objfile->objfile_obstack);
+  SYMBOL_SET_LINKAGE_NAME (sym, set_name);
   SYMBOL_LANGUAGE (sym) = language_auto;
   SYMBOL_DOMAIN (sym) = VAR_DOMAIN;
   SYMBOL_LINE (sym) = 0;
@@ -5889,7 +5867,7 @@ hpread_process_one_debug_symbol (union d
 	    newsym = (struct symbol *) obstack_alloc (&objfile->objfile_obstack,
 						    sizeof (struct symbol));
 	    memset (newsym, 0, sizeof (struct symbol));
-	    DEPRECATED_SYMBOL_NAME (newsym) = name;
+	    SYMBOL_SET_LINKAGE_NAME (newsym, name);
 	    SYMBOL_LANGUAGE (newsym) = language_auto;
 	    SYMBOL_DOMAIN (newsym) = VAR_DOMAIN;
 	    SYMBOL_LINE (newsym) = 0;
--- ada-lang.c	23 Jan 2004 23:03:28 -0000	1.35
+++ ada-lang.c	15 Feb 2004 23:56:24 -0000
@@ -3533,7 +3533,7 @@ add_symbols_from_enclosing_procs (const 
       /* Initialize the local variable symbol that stands for the
        * static link (when it exists). */
       static_link = &static_link_sym;
-      DEPRECATED_SYMBOL_NAME (static_link) = "";
+      SYMBOL_SET_LINKAGE_NAME (static_link, "");
       SYMBOL_LANGUAGE (static_link) = language_unknown;
       SYMBOL_CLASS (static_link) = LOC_LOCAL;
       SYMBOL_DOMAIN (static_link) = VAR_DOMAIN;
--- jv-lang.c	7 Feb 2004 23:13:47 -0000	1.29
+++ jv-lang.c	15 Feb 2004 23:56:30 -0000
@@ -142,7 +142,7 @@ add_class_symbol (struct type *type, COR
     obstack_alloc (&dynamics_objfile->objfile_obstack, sizeof (struct symbol));
   memset (sym, 0, sizeof (struct symbol));
   SYMBOL_LANGUAGE (sym) = language_java;
-  DEPRECATED_SYMBOL_NAME (sym) = TYPE_TAG_NAME (type);
+  SYMBOL_SET_LINKAGE_NAME (sym, TYPE_TAG_NAME (type));
   SYMBOL_CLASS (sym) = LOC_TYPEDEF;
   /*  SYMBOL_VALUE (sym) = valu; */
   SYMBOL_TYPE (sym) = type;
--- stabsread.c	7 Feb 2004 23:13:47 -0000	1.74
+++ stabsread.c	15 Feb 2004 23:56:34 -0000
@@ -367,6 +367,8 @@ patch_block_stabs (struct pending *symbo
 	  sym = find_symbol_in_list (symbols, name, pp - name);
 	  if (!sym)
 	    {
+	      char *sym_name;
+
 	      /* FIXME-maybe: it would be nice if we noticed whether
 	         the variable was defined *anywhere*, not just whether
 	         it is defined in this compilation unit.  But neither
@@ -385,8 +387,9 @@ patch_block_stabs (struct pending *symbo
 	      memset (sym, 0, sizeof (struct symbol));
 	      SYMBOL_DOMAIN (sym) = VAR_DOMAIN;
 	      SYMBOL_CLASS (sym) = LOC_OPTIMIZED_OUT;
-	      DEPRECATED_SYMBOL_NAME (sym) =
-		obsavestring (name, pp - name, &objfile->objfile_obstack);
+	      sym_name = obsavestring (name, pp - name,
+				       &objfile->objfile_obstack);
+	      SYMBOL_SET_LINKAGE_NAME (sym, sym_name);
 	      pp += 2;
 	      if (*(pp - 1) == 'F' || *(pp - 1) == 'f')
 		{
@@ -641,12 +644,15 @@ define_symbol (CORE_ADDR valu, char *str
 
   if (is_cplus_marker (string[0]))
     {
+      char *sym_name;
+
       /* Special GNU C++ names.  */
       switch (string[1])
 	{
 	case 't':
-	  DEPRECATED_SYMBOL_NAME (sym) = obsavestring ("this", strlen ("this"),
-					    &objfile->objfile_obstack);
+	  sym_name = obsavestring ("this", strlen ("this"),
+				   &objfile->objfile_obstack);
+	  SYMBOL_SET_LINKAGE_NAME (sym, sym_name);
 	  break;
 
 	case 'v':		/* $vtbl_ptr_type */
@@ -654,8 +660,9 @@ define_symbol (CORE_ADDR valu, char *str
 	  goto normal;
 
 	case 'e':
-	  DEPRECATED_SYMBOL_NAME (sym) = obsavestring ("eh_throw", strlen ("eh_throw"),
-					    &objfile->objfile_obstack);
+	  sym_name = obsavestring ("eh_throw", strlen ("eh_throw"),
+				   &objfile->objfile_obstack);
+	  SYMBOL_SET_LINKAGE_NAME (sym, sym_name);
 	  break;
 
 	case '_':
@@ -1130,13 +1137,15 @@ define_symbol (CORE_ADDR valu, char *str
       SYMBOL_CLASS (sym) = LOC_STATIC;
       SYMBOL_VALUE_ADDRESS (sym) = valu;
 #ifdef STATIC_TRANSFORM_NAME
-      if (IS_STATIC_TRANSFORM_NAME (DEPRECATED_SYMBOL_NAME (sym)))
+      if (IS_STATIC_TRANSFORM_NAME (SYMBOL_LINKAGE_NAME (sym)))
 	{
 	  struct minimal_symbol *msym;
-	  msym = lookup_minimal_symbol (DEPRECATED_SYMBOL_NAME (sym), NULL, objfile);
+	  msym = lookup_minimal_symbol (SYMBOL_LINKAGE_NAME (sym), NULL, objfile);
 	  if (msym != NULL)
 	    {
-	      DEPRECATED_SYMBOL_NAME (sym) = STATIC_TRANSFORM_NAME (DEPRECATED_SYMBOL_NAME (sym));
+	      char *sym_name
+		= STATIC_TRANSFORM_NAME (SYMBOL_LINKAGE_NAME (sym));
+	      SYMBOL_SET_LINKAGE_NAME (sym, sym_name);
 	      SYMBOL_VALUE_ADDRESS (sym) = SYMBOL_VALUE_ADDRESS (msym);
 	    }
 	}
@@ -1276,13 +1285,16 @@ define_symbol (CORE_ADDR valu, char *str
       SYMBOL_CLASS (sym) = LOC_STATIC;
       SYMBOL_VALUE_ADDRESS (sym) = valu;
 #ifdef STATIC_TRANSFORM_NAME
-      if (IS_STATIC_TRANSFORM_NAME (DEPRECATED_SYMBOL_NAME (sym)))
+	}
+      if (IS_STATIC_TRANSFORM_NAME (SYMBOL_LINKAGE_NAME (sym)))
 	{
 	  struct minimal_symbol *msym;
-	  msym = lookup_minimal_symbol (DEPRECATED_SYMBOL_NAME (sym), NULL, objfile);
+	  msym = lookup_minimal_symbol (SYMBOL_LINKAGE_NAME (sym), NULL, objfile);
 	  if (msym != NULL)
 	    {
-	      DEPRECATED_SYMBOL_NAME (sym) = STATIC_TRANSFORM_NAME (DEPRECATED_SYMBOL_NAME (sym));
+	      char *sym_name
+		= STATIC_TRANSFORM_NAME (SYMBOL_LINKAGE_NAME (sym));
+	      SYMBOL_SET_LINKAGE_NAME (sym, sym_name);
 	      SYMBOL_VALUE_ADDRESS (sym) = SYMBOL_VALUE_ADDRESS (msym);
 	    }
 	}
@@ -3493,7 +3505,7 @@ read_enum_type (char **pp, struct type *
       sym = (struct symbol *)
 	obstack_alloc (&objfile->objfile_obstack, sizeof (struct symbol));
       memset (sym, 0, sizeof (struct symbol));
-      DEPRECATED_SYMBOL_NAME (sym) = name;
+      SYMBOL_SET_LINKAGE_NAME (sym, name);
       SYMBOL_LANGUAGE (sym) = current_subfile->language;
       SYMBOL_CLASS (sym) = LOC_CONST;
       SYMBOL_DOMAIN (sym) = VAR_DOMAIN;
@@ -4068,7 +4080,7 @@ common_block_end (struct objfile *objfil
     obstack_alloc (&objfile->objfile_obstack, sizeof (struct symbol));
   memset (sym, 0, sizeof (struct symbol));
   /* Note: common_block_name already saved on objfile_obstack */
-  DEPRECATED_SYMBOL_NAME (sym) = common_block_name;
+  SYMBOL_SET_LINKAGE_NAME (sym, common_block_name);
   SYMBOL_CLASS (sym) = LOC_BLOCK;
 
   /* Now we copy all the symbols which have been defined since the BCOMM.  */
--- dwarfread.c	14 Feb 2004 15:46:32 -0000	1.37
+++ dwarfread.c	15 Feb 2004 23:56:27 -0000
@@ -1710,8 +1710,9 @@ enum_type (struct dieinfo *dip, struct o
 	  sym = (struct symbol *) obstack_alloc (&objfile->objfile_obstack,
 						 sizeof (struct symbol));
 	  memset (sym, 0, sizeof (struct symbol));
-	  DEPRECATED_SYMBOL_NAME (sym) = create_name (list->field.name,
-					   &objfile->objfile_obstack);
+	  SYMBOL_SET_LINKAGE_NAME (sym,
+				   create_name (list->field.name,
+						&objfile->objfile_obstack));
 	  SYMBOL_INIT_LANGUAGE_SPECIFIC (sym, cu_language);
 	  SYMBOL_DOMAIN (sym) = VAR_DOMAIN;
 	  SYMBOL_CLASS (sym) = LOC_CONST;
@@ -3013,8 +3014,8 @@ synthesize_typedef (struct dieinfo *dip,
 	obstack_alloc (&objfile->objfile_obstack, sizeof (struct symbol));
       OBJSTAT (objfile, n_syms++);
       memset (sym, 0, sizeof (struct symbol));
-      DEPRECATED_SYMBOL_NAME (sym) = create_name (dip->at_name,
-				       &objfile->objfile_obstack);
+      SYMBOL_SET_LINKAGE_NAME (sym, create_name (dip->at_name,
+						 &objfile->objfile_obstack));
       SYMBOL_INIT_LANGUAGE_SPECIFIC (sym, cu_language);
       SYMBOL_TYPE (sym) = type;
       SYMBOL_CLASS (sym) = LOC_TYPEDEF;
--- symtab.h	7 Feb 2004 23:13:47 -0000	1.87
+++ symtab.h	15 Feb 2004 23:56:37 -0000
@@ -195,6 +195,11 @@ extern void symbol_set_names (struct gen
 			      const char *linkage_name, int len,
 			      struct objfile *objfile);
 
+#define SYMBOL_SET_LINKAGE_NAME(symbol,linkage_name)		\
+  do {								\
+    (symbol)->ginfo.name = (linkage_name);			\
+  } while (0)
+
 /* Now come lots of name accessor macros.  Short version as to when to
    use which: Use SYMBOL_NATURAL_NAME to refer to the name of the
    symbol in the original source code.  Use SYMBOL_LINKAGE_NAME if you
--- xcoffread.c	14 Feb 2004 15:46:33 -0000	1.40
+++ xcoffread.c	15 Feb 2004 23:56:38 -0000
@@ -1477,7 +1477,8 @@ process_xcoff_symbol (struct coff_symbol
          will be patched with the type from its stab entry later on in
          patch_block_stabs (), unless the file was compiled without -g.  */
 
-      DEPRECATED_SYMBOL_NAME (sym) = SYMNAME_ALLOC (name, symname_alloced);
+      SYMBOL_SET_LINKAGE_NAME (sym,
+			       SYMNAME_ALLOC (name, symname_alloced));
       SYMBOL_TYPE (sym) = func_symbol_type;
 
       SYMBOL_CLASS (sym) = LOC_BLOCK;


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-16 21:24 [rfa] Add SYMBOL_SET_LINKAGE_NAME Daniel Jacobowitz
@ 2004-02-16 21:53 ` Elena Zannoni
  2004-02-16 22:57   ` Daniel Jacobowitz
  2004-02-18  0:20 ` David Carlton
  1 sibling, 1 reply; 24+ messages in thread
From: Elena Zannoni @ 2004-02-16 21:53 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: gdb-patches

Daniel Jacobowitz writes:
 > After this patch and my others from today there are no direct
 > assignments to the symbol name. In addition to the cleanup value, I'm
 > testing an approach which would change the storage of symbol names,
 > which prompted me to do this.  

can you elaborate on where you are going?


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-16 21:53 ` Elena Zannoni
@ 2004-02-16 22:57   ` Daniel Jacobowitz
  2004-02-16 23:35     ` Paul Hilfinger
  2004-02-17 15:57     ` Andrew Cagney
  0 siblings, 2 replies; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-16 22:57 UTC (permalink / raw)
  To: Elena Zannoni; +Cc: gdb-patches

On Mon, Feb 16, 2004 at 04:48:57PM -0500, Elena Zannoni wrote:
> Daniel Jacobowitz writes:
>  > After this patch and my others from today there are no direct
>  > assignments to the symbol name. In addition to the cleanup value, I'm
>  > testing an approach which would change the storage of symbol names,
>  > which prompted me to do this.  
> 
> can you elaborate on where you are going?

Sure.  I'm not sure if it's actually going to end up this way, since
I'm thinking it wasn't a great idea and it has some truly gross bits I
haven't figured out what to do with yet - it was just a hack job last
weekend.  But here's what my current tree does.

The C++ demangled name pointer in lang_specific is removed.  The name
pointer becomes a union, and a flag bit (there's about a byte's worth
of empty space in general_symbol_info) is added.  They look like this:

  /* The name(s) of this symbol.  Storage for the names will be generally
     be allocated on the objfile_obstack for the associated objfile, through
     a hash table or bcache.  */

  union {
    /* If the flag HAS_DEMANGLED_NAMES (below) is clear, this points to
       the name of the symbol.  */
    char *name;

    /* If HAS_DEMANGLED_NAMES is set, this points to a structure describing
       this symbol's names.  The structure may be shared.  */
    struct symbol_name_info *names;
  } name_union;

  /* A flag indicating which member of NAME_UNION is in use.  */
  unsigned int has_demangled_names : 1;

struct symbol_name_info
{
  /* ADD MORE COMMENTS */
  char *linkage_name;
  char *demangled_name;
  unsigned int has_full_demangled_name : 1;
  unsigned int linkage_len : 31;
};

I store several different pieces of data here.  ->linkage_name +
->linkage_len points to the result of demangling linkage_name without
DMGL_PARAMS: that is, without any function arguments or return type
information. If HAS_FULL_DEMANGLED_NAME, then demangled_name points to
the normal (full) demangled name.  Otherwise it points to the
appropriate obstack to store the demangled name on.

Then I convert the symbol readers to only use the short demangled name,
which I call SYMBOL_DEMANGLED_SEARCH_NAME.  SYMBOL_DEMANGLED_NAME fills
in lazily when needed.  Some memory is saved directly because the
symbol_name_info structure is shared between partial, full, and minimal
symbols; removing the demangled_name pointer leaves net savings of a
(very roughly) a word per C++ symbol.  Symbols which do not have a
demangled name no longer have space allocated for one, so there is also
a savings for straight C code.

I wanted the without-arguments names available because, in 99% of
cases, they are all we need.  Less memory, less time in the demangler,
et cetera.

This whole project grew out of profiling results for a large dwarf2 C
application, which shows a similar profile to C++ : the biggest hot
spot in startup time today is compare_psymbols.  My hope is to do that
using strcmp, or something even more efficient, instead of ~6,000,000
calls to strcmp_iw.  I started working on the symbol name cleanups
first, but I think saving the hash code in the symbol_name_info might
be even more effective.

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-16 22:57   ` Daniel Jacobowitz
@ 2004-02-16 23:35     ` Paul Hilfinger
  2004-02-17  0:05       ` Daniel Jacobowitz
  2004-02-17 15:57     ` Andrew Cagney
  1 sibling, 1 reply; 24+ messages in thread
From: Paul Hilfinger @ 2004-02-16 23:35 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: gdb-patches


 > Sure.  I'm not sure if it's actually going to end up this way, since
 > I'm thinking it wasn't a great idea and it has some truly gross bits I
 > haven't figured out what to do with yet - it was just a hack job last
 > weekend.  But here's what my current tree does.

For reasons we discussed a while ago, a change along these lines would 
certainly be helpful to us.  In fact, in response to a request that I
make demangled names more permanent, I found myself forced to
introduce essentially your has_demangled_names flag into the Ada
version.  So, it's in our interest to ask what these reservations and 
"gross bits" are and whether we might offer any suggestions.

Paul Hilfinger
ACT


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-16 23:35     ` Paul Hilfinger
@ 2004-02-17  0:05       ` Daniel Jacobowitz
  2004-02-17  9:59         ` Paul N. Hilfinger
  0 siblings, 1 reply; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-17  0:05 UTC (permalink / raw)
  To: Hilfinger; +Cc: gdb-patches

On Mon, Feb 16, 2004 at 03:35:49PM -0800, Paul Hilfinger wrote:
> 
>  > Sure.  I'm not sure if it's actually going to end up this way, since
>  > I'm thinking it wasn't a great idea and it has some truly gross bits I
>  > haven't figured out what to do with yet - it was just a hack job last
>  > weekend.  But here's what my current tree does.
> 
> For reasons we discussed a while ago, a change along these lines would 
> certainly be helpful to us.  In fact, in response to a request that I
> make demangled names more permanent, I found myself forced to
> introduce essentially your has_demangled_names flag into the Ada
> version.  So, it's in our interest to ask what these reservations and 
> "gross bits" are and whether we might offer any suggestions.

Did you end up with basenames or with the equivalent of "short"
demangled names?

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-17  0:05       ` Daniel Jacobowitz
@ 2004-02-17  9:59         ` Paul N. Hilfinger
  0 siblings, 0 replies; 24+ messages in thread
From: Paul N. Hilfinger @ 2004-02-17  9:59 UTC (permalink / raw)
  To: drow; +Cc: gdb-patches


> Did you end up with basenames or with the equivalent of "short"
> demangled names?

Neither for now.  At the moment, we still do the same old thing of simply 
keeping around the whole linkage name (well, WE don't; that's just standard
GDB) and only demangling when needed.   So I am just using the 
language-specific demangled name to cache the demangled name with a 
sufficiently long lifetime.  This required keeping track of an appropriate
obstack---hence my need for the same type-tag field you used.  

As I said when you first mentioned the scheme that you appear to be working on 
here, it sounds as if we'd be able to dispense with our current 
space-and-time-saving Ada-specific code and adapt your implementation instead.
Hence, my interest.

Paul Hilfinger


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-16 22:57   ` Daniel Jacobowitz
  2004-02-16 23:35     ` Paul Hilfinger
@ 2004-02-17 15:57     ` Andrew Cagney
  2004-02-17 16:01       ` Daniel Jacobowitz
  1 sibling, 1 reply; 24+ messages in thread
From: Andrew Cagney @ 2004-02-17 15:57 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: Elena Zannoni, gdb-patches

> On Mon, Feb 16, 2004 at 04:48:57PM -0500, Elena Zannoni wrote:
> 
>> Daniel Jacobowitz writes:
>>  > After this patch and my others from today there are no direct
>>  > assignments to the symbol name. In addition to the cleanup value, I'm
>>  > testing an approach which would change the storage of symbol names,
>>  > which prompted me to do this.  
>> 
>> can you elaborate on where you are going?
> 
> 
> Sure.  I'm not sure if it's actually going to end up this way, since
> I'm thinking it wasn't a great idea and it has some truly gross bits I
> haven't figured out what to do with yet - it was just a hack job last
> weekend.  But here's what my current tree does.
> 
> The C++ demangled name pointer in lang_specific is removed.  The name
> pointer becomes a union, and a flag bit (there's about a byte's worth
> of empty space in general_symbol_info) is added.  They look like this:

Er, why not start by defining a relevant interface and then work 
inwards?  That way potential clients, such as paulh, can determine if it 
is sufficient for their needs.  The first implementation doesn't even 
need to be fast, just correct.  Once we've hard data on the interfaces 
run-time behavioral characteristics we can consider symtab internal changes.

Andrew



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-17 15:57     ` Andrew Cagney
@ 2004-02-17 16:01       ` Daniel Jacobowitz
  2004-02-17 19:14         ` Elena Zannoni
  0 siblings, 1 reply; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-17 16:01 UTC (permalink / raw)
  To: Andrew Cagney; +Cc: Elena Zannoni, gdb-patches

On Tue, Feb 17, 2004 at 10:57:39AM -0500, Andrew Cagney wrote:
> >On Mon, Feb 16, 2004 at 04:48:57PM -0500, Elena Zannoni wrote:
> >
> >>Daniel Jacobowitz writes:
> >> > After this patch and my others from today there are no direct
> >> > assignments to the symbol name. In addition to the cleanup value, I'm
> >> > testing an approach which would change the storage of symbol names,
> >> > which prompted me to do this.  
> >>
> >>can you elaborate on where you are going?
> >
> >
> >Sure.  I'm not sure if it's actually going to end up this way, since
> >I'm thinking it wasn't a great idea and it has some truly gross bits I
> >haven't figured out what to do with yet - it was just a hack job last
> >weekend.  But here's what my current tree does.
> >
> >The C++ demangled name pointer in lang_specific is removed.  The name
> >pointer becomes a union, and a flag bit (there's about a byte's worth
> >of empty space in general_symbol_info) is added.  They look like this:
> 
> Er, why not start by defining a relevant interface and then work 
> inwards?  That way potential clients, such as paulh, can determine if it 
> is sufficient for their needs.  The first implementation doesn't even 
> need to be fast, just correct.  Once we've hard data on the interfaces 
> run-time behavioral characteristics we can consider symtab internal changes.

Because the cleaner interface is not my goal - it's a side goal to my
actual aims, which are improved GDB startup time and memory usage.  An
implementation which is not fast is a step backwards.  I don't
understand how you can propose to measure "hard data" on "run-time
behavioral characteristics" without implementing the symtab internal
changes - what am I missing?

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-17 16:01       ` Daniel Jacobowitz
@ 2004-02-17 19:14         ` Elena Zannoni
  2004-02-17 19:29           ` Daniel Jacobowitz
  0 siblings, 1 reply; 24+ messages in thread
From: Elena Zannoni @ 2004-02-17 19:14 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: Andrew Cagney, Elena Zannoni, gdb-patches

Daniel Jacobowitz writes:
 > Because the cleaner interface is not my goal - it's a side goal to my
 > actual aims, which are improved GDB startup time and memory usage. 

From your previous postings I understand is that your cplusplus stuff
has a noticeable impact on performance and memory usage and you are
trying to shave gdb's time and size wherever there is a chance. From
Paul's postings instead I get the impression that he needs to revise
the current interface.

You are saying that you are not willing to take a step back and look
at things from a broader perspective.  I sincerely hope I
misunderstood your statement.

The symbol table is already a mess, with multiple redundant
interfaces.  At the moment there isn't a clear picture of what various
bits of gdb need from the symbol table.  How do we know the direction
we are going is improving things generally across the board?  How can
we know that, if this is not the case, the impact is not too heavy on
other parts and we can live with the tradeoff?

I am not saying that we have to know everything in minutious details
before we can touch the code (that's very likely totally unfeasible),
but at least, have an understanding of the big picture and the
consequences.



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-17 19:14         ` Elena Zannoni
@ 2004-02-17 19:29           ` Daniel Jacobowitz
  2004-02-17 23:10             ` Andrew Cagney
  2004-02-18  0:43             ` Elena Zannoni
  0 siblings, 2 replies; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-17 19:29 UTC (permalink / raw)
  To: Elena Zannoni; +Cc: Andrew Cagney, gdb-patches

On Tue, Feb 17, 2004 at 02:09:49PM -0500, Elena Zannoni wrote:
> Daniel Jacobowitz writes:
>  > Because the cleaner interface is not my goal - it's a side goal to my
>  > actual aims, which are improved GDB startup time and memory usage. 
> 
> >From your previous postings I understand is that your cplusplus stuff
> has a noticeable impact on performance and memory usage and you are
> trying to shave gdb's time and size wherever there is a chance. From
> Paul's postings instead I get the impression that he needs to revise
> the current interface.

This has, in fact, nothing to do with the C++ stuff.  This has to do
with the fact that when I start GDB on a 200MHz board with debug info
in glibc, it takes more than thirty seconds to load partial symbol
tables.  That's so slow as to be unusable.  It makes the entire
testsuite timeout, for one thing.

> You are saying that you are not willing to take a step back and look
> at things from a broader perspective.  I sincerely hope I
> misunderstood your statement.

You did.  Let me paste that quote with a little more context, OK?

  > Er, why not start by defining a relevant interface and then work  
  > inwards?  That way potential clients, such as paulh, can determine if it 
  > is sufficient for their needs.  The first implementation doesn't even 
  > need to be fast, just correct.  Once we've hard data on the interfaces 
  > run-time behavioral characteristics we can consider symtab internal
  > changes.

  Because the cleaner interface is not my goal - it's a side goal to my
  actual aims, which are improved GDB startup time and memory usage.  An
  implementation which is not fast is a step backwards.  I don't
  understand how you can propose to measure "hard data" on "run-time
  behavioral characteristics" without implementing the symtab internal
  changes - what am I missing?

Do you have an answer to my question?  Without one I don't understand
what Andrew is asking of me.

As for the interface, I don't think that the cleanup patches I've
posted so far have any substantial effect on the interface.  I was not
planning to post that existing grossness, my weekend hack, as a
proposal - it was an answer to "can you elaborate".  Before submitting
patches to implement it I would, I would hope, have asked for comments
on the interface.  But if I can't make the interface go faster, then
there's no point in proposing the interface.  That is a work in
progress.

You want a high level big picture?  I would like to separate the
concepts of full demangled name for language-specific use and
minimalist demangled data (basename, non-DMGL_PARAMS, whatever else)
needed for lookups in the symbol table.  This lets us reduce the
storage used by the symbol table, the data we have to generate at
startup, and the data we have to wade through when lookup things up in
the tables.

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-17 19:29           ` Daniel Jacobowitz
@ 2004-02-17 23:10             ` Andrew Cagney
  2004-02-18  0:43             ` Elena Zannoni
  1 sibling, 0 replies; 24+ messages in thread
From: Andrew Cagney @ 2004-02-17 23:10 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: Elena Zannoni, gdb-patches

>   > Er, why not start by defining a relevant interface and then work  
>   > inwards?  That way potential clients, such as paulh, can determine if it 
>   > is sufficient for their needs.  The first implementation doesn't even 
>   > need to be fast, just correct.  Once we've hard data on the interfaces 
>   > run-time behavioral characteristics we can consider symtab internal
>   > changes.
> 
>   Because the cleaner interface is not my goal - it's a side goal to my
>   actual aims, which are improved GDB startup time and memory usage.  An
>   implementation which is not fast is a step backwards.  I don't
>   understand how you can propose to measure "hard data" on "run-time
>   behavioral characteristics" without implementing the symtab internal
>   changes - what am I missing?

I was refering to the interface, not the implementation.  For instance:

- how often it is called?
- when it is called?
- with what parameters it is called?
- how time critical are the calls?

And of course, how many existing interfaces can, as a consequence, be 
deprecated, obsoleted or deleted.  The implementation is secondary.  As 
PaulH noted, the initial implementation could even be based on the 
existing structure (just stripping off trailing parameter list).

Elena wrote:

> The symbol table is already a mess, with multiple redundant
> interfaces.  At the moment there isn't a clear picture of what various
> bits of gdb need from the symbol table.  How do we know the direction
> we are going is improving things generally across the board?  How can
> we know that, if this is not the case, the impact is not too heavy on
> other parts and we can live with the tradeoff?

In the -file-list-exec-source-files discussion, notice that I'm trying 
to emphasize that things need to be a step removed from the symbol table 
- define the requirements according to the users needs, and not 
according to the existing symtab implementation.

Andrew



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-16 21:24 [rfa] Add SYMBOL_SET_LINKAGE_NAME Daniel Jacobowitz
  2004-02-16 21:53 ` Elena Zannoni
@ 2004-02-18  0:20 ` David Carlton
  2004-02-18  0:23   ` Daniel Jacobowitz
  2004-02-18  0:49   ` Paul Hilfinger
  1 sibling, 2 replies; 24+ messages in thread
From: David Carlton @ 2004-02-18  0:20 UTC (permalink / raw)
  To: gdb-patches

On Mon, 16 Feb 2004 16:24:06 -0500, Daniel Jacobowitz <drow@false.org> said:

> This patch adds a macro, SYMBOL_SET_LINKAGE_NAME, which is used to
> set a symbol's name when the name should not be demangled.  Used for
> things like typedefs whose name comes from debug info.

The idea is okay, but I don't like the name all that much.  I once had
a goal, which I've admittedly been lax about pursuing recently, that
we would have a very clear distinction between linkage names (which
really did mean names used by the linker) and natural names (i.e. the
names in the source code), to the extent that, if we were to represent
these by different types, then our code would almost compile.

When we're talking about types, however, linkage names don't make much
sense, only natural names.  So, while it's true that your macro does
set the field that, in the case of a symbol with both linkage and
natural names, corresponds to the linkage name, that's really an
implementation detail that should be shielded behind this macro.

Having said that, I don't have any great suggestions for a better
name.  SYMBOL_SET_NATURAL_NAME?  SYMBOL_SET_NATURAL_ONLY_NAME?  Hmm.

David Carlton
carlton@kealia.com


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  0:20 ` David Carlton
@ 2004-02-18  0:23   ` Daniel Jacobowitz
  2004-02-18  0:27     ` Elena Zannoni
  2004-02-18  0:49   ` Paul Hilfinger
  1 sibling, 1 reply; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-18  0:23 UTC (permalink / raw)
  To: gdb-patches

On Tue, Feb 17, 2004 at 09:23:47AM -0800, David Carlton wrote:
> On Mon, 16 Feb 2004 16:24:06 -0500, Daniel Jacobowitz <drow@false.org> said:
> 
> > This patch adds a macro, SYMBOL_SET_LINKAGE_NAME, which is used to
> > set a symbol's name when the name should not be demangled.  Used for
> > things like typedefs whose name comes from debug info.
> 
> The idea is okay, but I don't like the name all that much.  I once had
> a goal, which I've admittedly been lax about pursuing recently, that
> we would have a very clear distinction between linkage names (which
> really did mean names used by the linker) and natural names (i.e. the
> names in the source code), to the extent that, if we were to represent
> these by different types, then our code would almost compile.
> 
> When we're talking about types, however, linkage names don't make much
> sense, only natural names.  So, while it's true that your macro does
> set the field that, in the case of a symbol with both linkage and
> natural names, corresponds to the linkage name, that's really an
> implementation detail that should be shielded behind this macro.
> 
> Having said that, I don't have any great suggestions for a better
> name.  SYMBOL_SET_NATURAL_NAME?  SYMBOL_SET_NATURAL_ONLY_NAME?  Hmm.

I don't want to call it SYMBOL_SET_NATURAL_NAME.  It's not necessarily
the natural name.  Other than that, I don't know.

I'm just going to sit on this.  The HP patches need to be revised
anyway, and people want me to draft a complete interface before doing
any cleanups.

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  0:23   ` Daniel Jacobowitz
@ 2004-02-18  0:27     ` Elena Zannoni
  2004-02-18  0:32       ` Daniel Jacobowitz
  0 siblings, 1 reply; 24+ messages in thread
From: Elena Zannoni @ 2004-02-18  0:27 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: gdb-patches

Daniel Jacobowitz writes:
 > On Tue, Feb 17, 2004 at 09:23:47AM -0800, David Carlton wrote:
 > > On Mon, 16 Feb 2004 16:24:06 -0500, Daniel Jacobowitz <drow@false.org> said:
 > > 
 > > > This patch adds a macro, SYMBOL_SET_LINKAGE_NAME, which is used to
 > > > set a symbol's name when the name should not be demangled.  Used for
 > > > things like typedefs whose name comes from debug info.
 > > 
 > > The idea is okay, but I don't like the name all that much.  I once had
 > > a goal, which I've admittedly been lax about pursuing recently, that
 > > we would have a very clear distinction between linkage names (which
 > > really did mean names used by the linker) and natural names (i.e. the
 > > names in the source code), to the extent that, if we were to represent
 > > these by different types, then our code would almost compile.
 > > 
 > > When we're talking about types, however, linkage names don't make much
 > > sense, only natural names.  So, while it's true that your macro does
 > > set the field that, in the case of a symbol with both linkage and
 > > natural names, corresponds to the linkage name, that's really an
 > > implementation detail that should be shielded behind this macro.
 > > 
 > > Having said that, I don't have any great suggestions for a better
 > > name.  SYMBOL_SET_NATURAL_NAME?  SYMBOL_SET_NATURAL_ONLY_NAME?  Hmm.
 > 
 > I don't want to call it SYMBOL_SET_NATURAL_NAME.  It's not necessarily
 > the natural name.  Other than that, I don't know.
 > 
 > I'm just going to sit on this.  The HP patches need to be revised
 > anyway, and people want me to draft a complete interface before doing
 > any cleanups.

come on, that's not what I asked. 

 > 
 > -- 
 > Daniel Jacobowitz
 > MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  0:27     ` Elena Zannoni
@ 2004-02-18  0:32       ` Daniel Jacobowitz
  2004-02-18  0:54         ` Elena Zannoni
  0 siblings, 1 reply; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-18  0:32 UTC (permalink / raw)
  To: gdb-patches

On Tue, Feb 17, 2004 at 07:23:29PM -0500, Elena Zannoni wrote:
> Daniel Jacobowitz writes:
>  > On Tue, Feb 17, 2004 at 09:23:47AM -0800, David Carlton wrote:
>  > > On Mon, 16 Feb 2004 16:24:06 -0500, Daniel Jacobowitz <drow@false.org> said:
>  > > 
>  > > > This patch adds a macro, SYMBOL_SET_LINKAGE_NAME, which is used to
>  > > > set a symbol's name when the name should not be demangled.  Used for
>  > > > things like typedefs whose name comes from debug info.
>  > > 
>  > > The idea is okay, but I don't like the name all that much.  I once had
>  > > a goal, which I've admittedly been lax about pursuing recently, that
>  > > we would have a very clear distinction between linkage names (which
>  > > really did mean names used by the linker) and natural names (i.e. the
>  > > names in the source code), to the extent that, if we were to represent
>  > > these by different types, then our code would almost compile.
>  > > 
>  > > When we're talking about types, however, linkage names don't make much
>  > > sense, only natural names.  So, while it's true that your macro does
>  > > set the field that, in the case of a symbol with both linkage and
>  > > natural names, corresponds to the linkage name, that's really an
>  > > implementation detail that should be shielded behind this macro.
>  > > 
>  > > Having said that, I don't have any great suggestions for a better
>  > > name.  SYMBOL_SET_NATURAL_NAME?  SYMBOL_SET_NATURAL_ONLY_NAME?  Hmm.
>  > 
>  > I don't want to call it SYMBOL_SET_NATURAL_NAME.  It's not necessarily
>  > the natural name.  Other than that, I don't know.
>  > 
>  > I'm just going to sit on this.  The HP patches need to be revised
>  > anyway, and people want me to draft a complete interface before doing
>  > any cleanups.
> 
> come on, that's not what I asked. 

I know.  You haven't asked anything about the patch, which you haven't
reviewed yet, which is fine.  It's only a day old.  The discussion
about interfaces is on a tangent thread about my future intentions.

But there's no point in just continuing the cleanups now that I've
stirred up this much annoyed discussion asking for interface
descriptions.  So I'll write the interface description and come back
later.

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-17 19:29           ` Daniel Jacobowitz
  2004-02-17 23:10             ` Andrew Cagney
@ 2004-02-18  0:43             ` Elena Zannoni
  2004-02-18  1:04               ` Daniel Jacobowitz
  1 sibling, 1 reply; 24+ messages in thread
From: Elena Zannoni @ 2004-02-18  0:43 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: Elena Zannoni, Andrew Cagney, gdb-patches

Daniel Jacobowitz writes:
 > On Tue, Feb 17, 2004 at 02:09:49PM -0500, Elena Zannoni wrote:
 > > Daniel Jacobowitz writes:
 > >  > Because the cleaner interface is not my goal - it's a side goal to my
 > >  > actual aims, which are improved GDB startup time and memory usage. 
 > > 
 > > >From your previous postings I understand is that your cplusplus stuff
 > > has a noticeable impact on performance and memory usage and you are
 > > trying to shave gdb's time and size wherever there is a chance. From
 > > Paul's postings instead I get the impression that he needs to revise
 > > the current interface.
 > 
 > This has, in fact, nothing to do with the C++ stuff.  This has to do
 > with the fact that when I start GDB on a 200MHz board with debug info
 > in glibc, it takes more than thirty seconds to load partial symbol
 > tables.  That's so slow as to be unusable.  It makes the entire
 > testsuite timeout, for one thing.
 > 

Did you identify specific bottlenecks?

 > Do you have an answer to my question?  Without one I don't understand
 > what Andrew is asking of me.
 > 

I don't speak for Andrew. I think he replied anyway.

 > As for the interface, I don't think that the cleanup patches I've
 > posted so far have any substantial effect on the interface.  I was not
 > planning to post that existing grossness, my weekend hack, as a
 > proposal - it was an answer to "can you elaborate".  Before submitting
 > patches to implement it I would, I would hope, have asked for comments
 > on the interface.  But if I can't make the interface go faster, then
 > there's no point in proposing the interface.  That is a work in
 > progress.
 > 

I had to pry this info out of you over a few e-mail exchanges. Your
first posting didn't explain what you were doing, just that you were
testing some new approach. Since the patch seemed to be put together
in a hurry (and that's why I asked you to split it) I honestly wanted
to know what you were planning to do, especially since you are adding
a new macro. So it seemed to me that you were doing exactly that: going
ahead with the first stages of a big change but that the change itself
hadn't been discussed.

 > You want a high level big picture?  I would like to separate the
 > concepts of full demangled name for language-specific use and
 > minimalist demangled data (basename, non-DMGL_PARAMS, whatever else)
 > needed for lookups in the symbol table.  This lets us reduce the
 > storage used by the symbol table, the data we have to generate at
 > startup, and the data we have to wade through when lookup things up in
 > the tables.
 > 

thank you.  What do you mean by separate? Where would you store the
demangled name? Have it demangled on demand only?


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  0:20 ` David Carlton
  2004-02-18  0:23   ` Daniel Jacobowitz
@ 2004-02-18  0:49   ` Paul Hilfinger
  2004-02-18  1:27     ` David Carlton
  1 sibling, 1 reply; 24+ messages in thread
From: Paul Hilfinger @ 2004-02-18  0:49 UTC (permalink / raw)
  To: David Carlton; +Cc: gdb-patches


 > When we're talking about types, however, linkage names don't make much
 > sense, only natural names.  

Oh, I DO so hate to rock the boat, but this statement is not true in
the case of Ada, in which the natural name of a type is not how it's
known to the linker.  

I don't see why "linkage names don't make much sense" anyway: the name
of a (C/C++) type is, indeed, what the linker sees.  The fact that it
happens to coincide with the natural name in the case of C types is an
interesting fact that, perhaps "should be shielded behind this macro".

Paul


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  0:32       ` Daniel Jacobowitz
@ 2004-02-18  0:54         ` Elena Zannoni
  2004-02-18  1:06           ` Daniel Jacobowitz
  0 siblings, 1 reply; 24+ messages in thread
From: Elena Zannoni @ 2004-02-18  0:54 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: gdb-patches

Daniel Jacobowitz writes:

 > I know.  You haven't asked anything about the patch, which you haven't
 > reviewed yet, which is fine.  It's only a day old.  The discussion
 > about interfaces is on a tangent thread about my future intentions.
 > 

What patch? One doesn't work. Another also is in HPread and we need to
test that as well (I'll ask Michael about it). The third one you
checked in. Are there others.

 > But there's no point in just continuing the cleanups now that I've
 > stirred up this much annoyed discussion asking for interface
 > descriptions.  So I'll write the interface description and come back
 > later.
 > 

Please, now it's the perfect time to discuss what you think would be a
good approach, before you get too deep into a design/solution.  Nobody
is saying that your idea is bogus, it's just hard to grasp it at this
stage.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  0:43             ` Elena Zannoni
@ 2004-02-18  1:04               ` Daniel Jacobowitz
  0 siblings, 0 replies; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-18  1:04 UTC (permalink / raw)
  To: Elena Zannoni; +Cc: Andrew Cagney, gdb-patches

On Tue, Feb 17, 2004 at 07:38:55PM -0500, Elena Zannoni wrote:
> Daniel Jacobowitz writes:
>  > On Tue, Feb 17, 2004 at 02:09:49PM -0500, Elena Zannoni wrote:
>  > > Daniel Jacobowitz writes:
>  > >  > Because the cleaner interface is not my goal - it's a side goal to my
>  > >  > actual aims, which are improved GDB startup time and memory usage. 
>  > > 
>  > > >From your previous postings I understand is that your cplusplus stuff
>  > > has a noticeable impact on performance and memory usage and you are
>  > > trying to shave gdb's time and size wherever there is a chance. From
>  > > Paul's postings instead I get the impression that he needs to revise
>  > > the current interface.
>  > 
>  > This has, in fact, nothing to do with the C++ stuff.  This has to do
>  > with the fact that when I start GDB on a 200MHz board with debug info
>  > in glibc, it takes more than thirty seconds to load partial symbol
>  > tables.  That's so slow as to be unusable.  It makes the entire
>  > testsuite timeout, for one thing.
>  > 
> 
> Did you identify specific bottlenecks?

Yes, and this is aimed at the first of them.  From my mail yesterday:

  This whole project grew out of profiling results for a large dwarf2 C
  application, which shows a similar profile to C++ : the biggest hot
  spot in startup time today is compare_psymbols.  My hope is to do that
  using strcmp, or something even more efficient, instead of ~6,000,000
  calls to strcmp_iw.  I started working on the symbol name cleanups
  first, but I think saving the hash code in the symbol_name_info might
  be even more effective.

I don't have all my numbers handy any more, but here's a profile
(gprof-based) for loading gdb with debug info in gdb.  I verified with
oprofile that the effect is real - strcmp_iw_ordered fell to about 27%
which leaves it still dominating.  First column is percentage of total
time for the "file" command.

 36.36     10.52    10.52  3279588     0.00     0.00  strcmp_iw_ordered
  9.44     13.25     2.73   510855     0.00     0.00  bcache_data
  7.45     15.40     2.15   526225     0.00     0.00  hash
  6.48     17.28     1.88                             htab_find_slot_with_hash

I realize this explanation was short on details.  My hope was that by
shrinking the amount of data (full demangled names are more complex
than non-params names, although we still have to contend with template
arguments) I could make it practical to canonicalize these before
storing them.  Then we could canonicalize user input that we want to
look up, and use strcmp instead of strcmp_iw_ordered to sort; I'm
estimating that strcmp is in the 10x-20x faster range although
benchmarking it is still on my TODO.  But I have a pretty good idea of
how long strcmp takes on this amount of data, and we're off the scale.

I'm no longer convinced that canonicalizing was a good idea.  It may be
better to:
 - adapt msymbol_hash_iw, and sort the psymbols by a stored hash value.
   The scheme I described, with a shared struct containing name
   pointers, recovers most of the space and time cost of this.
 - or hash the psymbols, which will require substantial changes to
   the very few places that want to search all psymbols, and I don't
   know how to make this work properly with the (very important)
   psymbol bcache.
 - or something else entirely.

>  > As for the interface, I don't think that the cleanup patches I've
>  > posted so far have any substantial effect on the interface.  I was not
>  > planning to post that existing grossness, my weekend hack, as a
>  > proposal - it was an answer to "can you elaborate".  Before submitting
>  > patches to implement it I would, I would hope, have asked for comments
>  > on the interface.  But if I can't make the interface go faster, then
>  > there's no point in proposing the interface.  That is a work in
>  > progress.
>  > 
> 
> I had to pry this info out of you over a few e-mail exchanges. Your
> first posting didn't explain what you were doing, just that you were
> testing some new approach. Since the patch seemed to be put together
> in a hurry (and that's why I asked you to split it) I honestly wanted
> to know what you were planning to do, especially since you are adding
> a new macro. So it seemed to me that you were doing exactly that: going
> ahead with the first stages of a big change but that the change itself
> hadn't been discussed.

The patch was not in one piece because I was in a hurry.  It was in one
piece because I honestly thought it made _more_ sense that way, as a
group of related changes.  I get it hammered into me every couple of
months that other GDB developers have a different opinion of reasonable
patch units than I do.  I would not have submitted it if I didn't think
it was worthwhile completely independent of whatever I was hacking on
in another tree.

As soon as you asked where I was going I answered, in detail.

>  > You want a high level big picture?  I would like to separate the
>  > concepts of full demangled name for language-specific use and
>  > minimalist demangled data (basename, non-DMGL_PARAMS, whatever else)
>  > needed for lookups in the symbol table.  This lets us reduce the
>  > storage used by the symbol table, the data we have to generate at
>  > startup, and the data we have to wade through when lookup things up in
>  > the tables.
>  > 
> 
> thank you.  What do you mean by separate? Where would you store the
> demangled name? Have it demangled on demand only?

See my previous response:
  http://sources.redhat.com/ml/gdb-patches/2004-02/msg00447.html

Yes, SYMBOL_DEMANGLED_NAME would fill in the demangled name on demand. 
This is something we did, as far as I know, "once upon a time" - but
because we didn't have the concept of separate demangled names and
search names that I'm experimenting with, and because we didn't cache
the demangled names, and because the demangler was much slower, the
runtime cost was prohibitive.  I think that now it won't be.

Hmm, the idea of using the hash as a sorting key has potential.  I need
to experiment with that.  It will take back most of the memory savings
I currently have, but it should cut most of that 30% off my profile.

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  0:54         ` Elena Zannoni
@ 2004-02-18  1:06           ` Daniel Jacobowitz
  0 siblings, 0 replies; 24+ messages in thread
From: Daniel Jacobowitz @ 2004-02-18  1:06 UTC (permalink / raw)
  To: Elena Zannoni; +Cc: gdb-patches

On Tue, Feb 17, 2004 at 07:50:00PM -0500, Elena Zannoni wrote:
> Daniel Jacobowitz writes:
> 
>  > I know.  You haven't asked anything about the patch, which you haven't
>  > reviewed yet, which is fine.  It's only a day old.  The discussion
>  > about interfaces is on a tangent thread about my future intentions.
>  > 
> 
> What patch? One doesn't work. Another also is in HPread and we need to
> test that as well (I'll ask Michael about it). The third one you
> checked in. Are there others.

Yes, the one at the beginning of this thread :)

> Please, now it's the perfect time to discuss what you think would be a
> good approach, before you get too deep into a design/solution.  Nobody
> is saying that your idea is bogus, it's just hard to grasp it at this
> stage.

Done in my last message.  I need to update that working tree...

-- 
Daniel Jacobowitz
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  0:49   ` Paul Hilfinger
@ 2004-02-18  1:27     ` David Carlton
  2004-02-18  8:12       ` Paul N. Hilfinger
  0 siblings, 1 reply; 24+ messages in thread
From: David Carlton @ 2004-02-18  1:27 UTC (permalink / raw)
  To: Paul Hilfinger; +Cc: gdb-patches

On Tue, 17 Feb 2004 16:49:04 -0800, Paul Hilfinger <hilfingr@EECS.Berkeley.EDU> said:

>> When we're talking about types, however, linkage names don't make much
>> sense, only natural names.  

> Oh, I DO so hate to rock the boat, but this statement is not true in
> the case of Ada, in which the natural name of a type is not how it's
> known to the linker.

Can you clarify this?  Because:

> I don't see why "linkage names don't make much sense" anyway: the name
> of a (C/C++) type is, indeed, what the linker sees.

I don't see this as being true for C/C++.  The linker sees the names
of functions and variables, but it doesn't see the names of types.
There aren't any minimal symbols associated to types.  There are
minimal symbols associated to methods of classes, static variables of
classes, and other stuff (virtual function tables, at least), but no
minimal symbols associated to the the types themselves.

I can see how you could take the name of a type and mangle it, and I
can imagine that doing so might be useful for Ada (and perhaps even
for C++?), given the picture that you've been painting.  But I
wouldn't call that a linkage name, because there's nothing in the
object file which has that name.

So is the picture different for Ada, or have I not been being clear
with the distinction of linkage name vs. mangled name?  (Or am I
missing something even in the C/C++ case, for that matter?)

David Carlton
carlton@kealia.com


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  1:27     ` David Carlton
@ 2004-02-18  8:12       ` Paul N. Hilfinger
  2004-02-18 16:45         ` David Carlton
  0 siblings, 1 reply; 24+ messages in thread
From: Paul N. Hilfinger @ 2004-02-18  8:12 UTC (permalink / raw)
  To: carlton; +Cc: gdb-patches


> From: David Carlton <carlton@kealia.com>
> On Tue, 17 Feb 2004 16:49:04 -0800, Paul Hilfinger <hilfingr@EECS.Berkeley.EDU> said:
> 
> Can you clarify this?  Because:
> 
> > I don't see why "linkage names don't make much sense" anyway: the name
> > of a (C/C++) type is, indeed, what the linker sees.
> 
> I don't see this as being true for C/C++.  The linker sees the names
> of functions and variables, but it doesn't see the names of types.
> There aren't any minimal symbols associated to types.  There are
> minimal symbols associated to methods of classes, static variables of
> classes, and other stuff (virtual function tables, at least), but no
> minimal symbols associated to the the types themselves.
> 
> I can see how you could take the name of a type and mangle it, and I
> can imagine that doing so might be useful for Ada (and perhaps even
> for C++?), given the picture that you've been painting.  But I
> wouldn't call that a linkage name, because there's nothing in the
> object file which has that name.
> 
> So is the picture different for Ada, or have I not been being clear
> with the distinction of linkage name vs. mangled name?  (Or am I
> missing something even in the C/C++ case, for that matter?)

I admit that I have never been clear on the precise limits of
"linkage name".  The linker DOES see these things (else how do they
find their way into the executable file?) although it's true that it does
not "link" them as it does for regular symbols.

After watching you struggle manfully through a number of terminology changes,
I was a little reluctant to suggest the introduction of still another concept,
so I stuck with "linkage name".  The point is that WHATEVER you call Ada's
mangled type names, they are NOT what is written in the source code
(so can't be "natural names") and they ARE the raw names extracted from the
executable's debugging information.  Hmm; they also happen to be what
I proposed calling "search names"---i.e., the names used internally to search
by.  We could use the opportunity to introduce search names and make this
"SET_SEARCH_NAME".  Just a thought.

Paul Hilfinger


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18  8:12       ` Paul N. Hilfinger
@ 2004-02-18 16:45         ` David Carlton
  2004-02-20  9:32           ` Paul N. Hilfinger
  0 siblings, 1 reply; 24+ messages in thread
From: David Carlton @ 2004-02-18 16:45 UTC (permalink / raw)
  To: Hilfinger; +Cc: gdb-patches

On Wed, 18 Feb 2004 00:11:49 -0800, "Paul N. Hilfinger" <hilfingr@otisco.mckusick.com> said:

> I admit that I have never been clear on the precise limits of
> "linkage name".  The linker DOES see these things (else how do they
> find their way into the executable file?) although it's true that it
> does not "link" them as it does for regular symbols.

> After watching you struggle manfully through a number of terminology
> changes, I was a little reluctant to suggest the introduction of
> still another concept, so I stuck with "linkage name".  The point is
> that WHATEVER you call Ada's mangled type names, they are NOT what
> is written in the source code (so can't be "natural names") and they
> ARE the raw names extracted from the executable's debugging
> information.  Hmm; they also happen to be what I proposed calling
> "search names"---i.e., the names used internally to search by.  We
> could use the opportunity to introduce search names and make this
> "SET_SEARCH_NAME".  Just a thought.

I was actually having similar thoughts on the way into work this
morning.  Maybe what I should focus on first is trying to establish
the notion that, when calling lookup_symbol, the name you pass in
always has to be in the appropriate form: in particular, lookup_symbol
would never try to demangle a name on the fly.  In the non-Ada world,
that would mean that we always search by natural name; but it would
pave the way for you to introduce your SYMBOL_SEARCH_NAME concept.
That way, SYMBOL_SEARCH_NAME would have a clear meaning (even if
nothing else would :-) ): it would be the name that you pass to
lookup_symbol when trying to search for that symbol.

Does that make sense to you?  If so, I'll start generating appropriate
patches.

David Carlton
carlton@kealia.com


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [rfa] Add SYMBOL_SET_LINKAGE_NAME
  2004-02-18 16:45         ` David Carlton
@ 2004-02-20  9:32           ` Paul N. Hilfinger
  0 siblings, 0 replies; 24+ messages in thread
From: Paul N. Hilfinger @ 2004-02-20  9:32 UTC (permalink / raw)
  To: carlton; +Cc: gdb-patches


David,

> I was actually having similar thoughts on the way into work this
> morning.  Maybe what I should focus on first is trying to establish
> the notion that, when calling lookup_symbol, the name you pass in
> always has to be in the appropriate form: in particular, lookup_symbol
> would never try to demangle a name on the fly.  In the non-Ada world,
> that would mean that we always search by natural name; but it would
> pave the way for you to introduce your SYMBOL_SEARCH_NAME concept.
> That way, SYMBOL_SEARCH_NAME would have a clear meaning (even if
> nothing else would :-) ): it would be the name that you pass to
> lookup_symbol when trying to search for that symbol.
> 
> Does that make sense to you?  If so, I'll start generating appropriate
> patches.

It does; that certainly is the notion we currently adhere to with our
version of lookup_symbol, and also applies to C/C++.   

Paul Hilfinger

P.S. Sorry for the delay: site visit approaching.


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2004-02-20  9:32 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-02-16 21:24 [rfa] Add SYMBOL_SET_LINKAGE_NAME Daniel Jacobowitz
2004-02-16 21:53 ` Elena Zannoni
2004-02-16 22:57   ` Daniel Jacobowitz
2004-02-16 23:35     ` Paul Hilfinger
2004-02-17  0:05       ` Daniel Jacobowitz
2004-02-17  9:59         ` Paul N. Hilfinger
2004-02-17 15:57     ` Andrew Cagney
2004-02-17 16:01       ` Daniel Jacobowitz
2004-02-17 19:14         ` Elena Zannoni
2004-02-17 19:29           ` Daniel Jacobowitz
2004-02-17 23:10             ` Andrew Cagney
2004-02-18  0:43             ` Elena Zannoni
2004-02-18  1:04               ` Daniel Jacobowitz
2004-02-18  0:20 ` David Carlton
2004-02-18  0:23   ` Daniel Jacobowitz
2004-02-18  0:27     ` Elena Zannoni
2004-02-18  0:32       ` Daniel Jacobowitz
2004-02-18  0:54         ` Elena Zannoni
2004-02-18  1:06           ` Daniel Jacobowitz
2004-02-18  0:49   ` Paul Hilfinger
2004-02-18  1:27     ` David Carlton
2004-02-18  8:12       ` Paul N. Hilfinger
2004-02-18 16:45         ` David Carlton
2004-02-20  9:32           ` Paul N. Hilfinger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox