Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed
* [rfa] symbol hashing, part 1/n - updates to hash functions
@ 2001-10-09  7:57 Daniel Jacobowitz
  2001-10-11 16:51 ` Elena Zannoni
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Jacobowitz @ 2001-10-09  7:57 UTC (permalink / raw)
  To: gdb-patches

This patch still has two logical parts; if you strongly prefer I can break
it up further, but they are somewhat intertwined and I think neither should
be objectionable.  They are:
  - Fix a looping bug in msymbol_hash_iw.  It would not stop on '(' if there
was whitespace before it.
  - Update to use the identifier hash function that libiberty uses, and
more buckets.

Is this OK?

-- 
Daniel Jacobowitz                           Carnegie Mellon University
MontaVista Software                         Debian GNU/Linux Developer

2001-10-01  Daniel Jacobowitz  <drow@mvista.com>

	* minsyms.c (msymbol_hash): Use better hash function.
	Return hash value without taking modulus.
	(msymbol_hash_iw): Likewise.  Terminate loop at '(' properly.
	(add_minsym_to_hash_table): Take modulus of msymbol_hash's return
	value.
	(add_minsym_to_demangled_hash_table): Likewise for msymbol_hash_iw.
	(lookup_minimal_symbol): Likewise for both.

	* objfiles.h: Increase MINIMAL_SYMBOL_HASH_SIZE to match modern
	binaries.

Index: gdb/minsyms.c
===================================================================
RCS file: /cvs/src/src/gdb/minsyms.c,v
retrieving revision 1.17
diff -u -p -r1.17 minsyms.c
--- minsyms.c	2001/05/29 10:45:10	1.17
+++ minsyms.c	2001/10/01 22:20:47
@@ -96,10 +96,12 @@ msymbol_hash_iw (const char *string)
       while (isspace (*string))
 	++string;
       if (*string && *string != '(')
-	hash = (31 * hash) + *string;
-      ++string;
+        {
+	  hash = hash * 67 + *string - 113;
+	  ++string;
+	}
     }
-  return hash % MINIMAL_SYMBOL_HASH_SIZE;
+  return hash;
 }
 
 /* Compute a hash code for a string.  */
@@ -109,8 +111,8 @@ msymbol_hash (const char *string)
 {
   unsigned int hash = 0;
   for (; *string; ++string)
-    hash = (31 * hash) + *string;
-  return hash % MINIMAL_SYMBOL_HASH_SIZE;
+    hash = hash * 67 + *string - 113;
+  return hash;
 }
 
 /* Add the minimal symbol SYM to an objfile's minsym hash table, TABLE.  */
@@ -120,7 +122,7 @@ add_minsym_to_hash_table (struct minimal
 {
   if (sym->hash_next == NULL)
     {
-      unsigned int hash = msymbol_hash (SYMBOL_NAME (sym));
+      unsigned int hash = msymbol_hash (SYMBOL_NAME (sym)) % MINIMAL_SYMBOL_HASH_SIZE;
       sym->hash_next = table[hash];
       table[hash] = sym;
     }
@@ -134,7 +136,7 @@ add_minsym_to_demangled_hash_table (stru
 {
   if (sym->demangled_hash_next == NULL)
     {
-      unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME (sym));
+      unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME (sym)) % MINIMAL_SYMBOL_HASH_SIZE;
       sym->demangled_hash_next = table[hash];
       table[hash] = sym;
     }
@@ -162,8 +164,8 @@ lookup_minimal_symbol (register const ch
   struct minimal_symbol *found_file_symbol = NULL;
   struct minimal_symbol *trampoline_symbol = NULL;
 
-  unsigned int hash = msymbol_hash (name);
-  unsigned int dem_hash = msymbol_hash_iw (name);
+  unsigned int hash = msymbol_hash (name) % MINIMAL_SYMBOL_HASH_SIZE;
+  unsigned int dem_hash = msymbol_hash_iw (name) % MINIMAL_SYMBOL_HASH_SIZE;
 
 #ifdef SOFUN_ADDRESS_MAYBE_MISSING
   if (sfile != NULL)

Index: gdb/objfiles.h
===================================================================
RCS file: /cvs/src/src/gdb/objfiles.h,v
retrieving revision 1.8
diff -u -p -r1.8 objfiles.h
--- objfiles.h	2001/03/06 08:21:11	1.8
+++ objfiles.h	2001/10/01 22:20:53
@@ -202,7 +202,7 @@ extern void print_objfile_statistics (vo
 extern void print_symbol_bcache_statistics (void);
 
 /* Number of entries in the minimal symbol hash table.  */
-#define MINIMAL_SYMBOL_HASH_SIZE 349
+#define MINIMAL_SYMBOL_HASH_SIZE 2039
 
 /* Master structure for keeping track of each file from which
    gdb reads symbols.  There are several ways these get allocated: 1.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [rfa] symbol hashing, part 1/n - updates to hash functions
  2001-10-09  7:57 [rfa] symbol hashing, part 1/n - updates to hash functions Daniel Jacobowitz
@ 2001-10-11 16:51 ` Elena Zannoni
  2001-10-11 16:58   ` Daniel Berlin
  2001-10-11 17:01   ` Daniel Jacobowitz
  0 siblings, 2 replies; 8+ messages in thread
From: Elena Zannoni @ 2001-10-11 16:51 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: gdb-patches

Daniel Jacobowitz writes:
 > This patch still has two logical parts; if you strongly prefer I can break
 > it up further, but they are somewhat intertwined and I think neither should
 > be objectionable.  They are:
 >   - Fix a looping bug in msymbol_hash_iw.  It would not stop on '(' if there
 > was whitespace before it.
 >   - Update to use the identifier hash function that libiberty uses, and
 > more buckets.
 > 
 > Is this OK?

Looks ok to me in theory. Except that, why was the

 '% MINIMAL_SYMBOL_HASH_SIZE;'

bit moved outside of the msymbol_hash and msymbol_hash_iw functions?
You still do the same operation with the results returned by the two
functions anyway. 

Also, where are these 2 functions used besides mynsyms.c?  I think we
should make them static and remove the extern from symtab.h.

Can you give me an example where the '(' error comes up? (Just so I
understand it better).  How did you come up with the number of
buckets? Is this also used in libiberty?

Can you fix it and resubmit?

Thanks
Elena

 > 
 > -- 
 > Daniel Jacobowitz                           Carnegie Mellon University
 > MontaVista Software                         Debian GNU/Linux Developer
 > 
 > 2001-10-01  Daniel Jacobowitz  <drow@mvista.com>
 > 
 > 	* minsyms.c (msymbol_hash): Use better hash function.
 > 	Return hash value without taking modulus.
 > 	(msymbol_hash_iw): Likewise.  Terminate loop at '(' properly.
 > 	(add_minsym_to_hash_table): Take modulus of msymbol_hash's return
 > 	value.
 > 	(add_minsym_to_demangled_hash_table): Likewise for msymbol_hash_iw.
 > 	(lookup_minimal_symbol): Likewise for both.
 > 
 > 	* objfiles.h: Increase MINIMAL_SYMBOL_HASH_SIZE to match modern
 > 	binaries.
 > 
 > Index: gdb/minsyms.c
 > ===================================================================
 > RCS file: /cvs/src/src/gdb/minsyms.c,v
 > retrieving revision 1.17
 > diff -u -p -r1.17 minsyms.c
 > --- minsyms.c	2001/05/29 10:45:10	1.17
 > +++ minsyms.c	2001/10/01 22:20:47
 > @@ -96,10 +96,12 @@ msymbol_hash_iw (const char *string)
 >        while (isspace (*string))
 >  	++string;
 >        if (*string && *string != '(')
 > -	hash = (31 * hash) + *string;
 > -      ++string;
 > +        {
 > +	  hash = hash * 67 + *string - 113;
 > +	  ++string;
 > +	}
 >      }
 > -  return hash % MINIMAL_SYMBOL_HASH_SIZE;
 > +  return hash;
 >  }
 >  
 >  /* Compute a hash code for a string.  */
 > @@ -109,8 +111,8 @@ msymbol_hash (const char *string)
 >  {
 >    unsigned int hash = 0;
 >    for (; *string; ++string)
 > -    hash = (31 * hash) + *string;
 > -  return hash % MINIMAL_SYMBOL_HASH_SIZE;
 > +    hash = hash * 67 + *string - 113;
 > +  return hash;
 >  }
 >  
 >  /* Add the minimal symbol SYM to an objfile's minsym hash table, TABLE.  */
 > @@ -120,7 +122,7 @@ add_minsym_to_hash_table (struct minimal
 >  {
 >    if (sym->hash_next == NULL)
 >      {
 > -      unsigned int hash = msymbol_hash (SYMBOL_NAME (sym));
 > +      unsigned int hash = msymbol_hash (SYMBOL_NAME (sym)) % MINIMAL_SYMBOL_HASH_SIZE;
 >        sym->hash_next = table[hash];
 >        table[hash] = sym;
 >      }
 > @@ -134,7 +136,7 @@ add_minsym_to_demangled_hash_table (stru
 >  {
 >    if (sym->demangled_hash_next == NULL)
 >      {
 > -      unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME (sym));
 > +      unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME (sym)) % MINIMAL_SYMBOL_HASH_SIZE;
 >        sym->demangled_hash_next = table[hash];
 >        table[hash] = sym;
 >      }
 > @@ -162,8 +164,8 @@ lookup_minimal_symbol (register const ch
 >    struct minimal_symbol *found_file_symbol = NULL;
 >    struct minimal_symbol *trampoline_symbol = NULL;
 >  
 > -  unsigned int hash = msymbol_hash (name);
 > -  unsigned int dem_hash = msymbol_hash_iw (name);
 > +  unsigned int hash = msymbol_hash (name) % MINIMAL_SYMBOL_HASH_SIZE;
 > +  unsigned int dem_hash = msymbol_hash_iw (name) % MINIMAL_SYMBOL_HASH_SIZE;
 >  
 >  #ifdef SOFUN_ADDRESS_MAYBE_MISSING
 >    if (sfile != NULL)
 > 
 > Index: gdb/objfiles.h
 > ===================================================================
 > RCS file: /cvs/src/src/gdb/objfiles.h,v
 > retrieving revision 1.8
 > diff -u -p -r1.8 objfiles.h
 > --- objfiles.h	2001/03/06 08:21:11	1.8
 > +++ objfiles.h	2001/10/01 22:20:53
 > @@ -202,7 +202,7 @@ extern void print_objfile_statistics (vo
 >  extern void print_symbol_bcache_statistics (void);
 >  
 >  /* Number of entries in the minimal symbol hash table.  */
 > -#define MINIMAL_SYMBOL_HASH_SIZE 349
 > +#define MINIMAL_SYMBOL_HASH_SIZE 2039
 >  
 >  /* Master structure for keeping track of each file from which
 >     gdb reads symbols.  There are several ways these get allocated: 1.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [rfa] symbol hashing, part 1/n - updates to hash functions
  2001-10-11 16:51 ` Elena Zannoni
@ 2001-10-11 16:58   ` Daniel Berlin
  2001-10-12  8:54     ` Elena Zannoni
  2001-10-11 17:01   ` Daniel Jacobowitz
  1 sibling, 1 reply; 8+ messages in thread
From: Daniel Berlin @ 2001-10-11 16:58 UTC (permalink / raw)
  To: Elena Zannoni; +Cc: Daniel Jacobowitz, gdb-patches

On Thursday, October 11, 2001, at 07:58  PM, Elena Zannoni wrote:

> Daniel Jacobowitz writes:
>> This patch still has two logical parts; if you strongly prefer I can 
>> break
>> it up further, but they are somewhat intertwined and I think neither 
>> should
>> be objectionable.  They are:
>>   - Fix a looping bug in msymbol_hash_iw.  It would not stop on '(' if 
>> there
>> was whitespace before it.
>>   - Update to use the identifier hash function that libiberty uses, and
>> more buckets.
>>
>> Is this OK?
>
> Looks ok to me in theory. Except that, why was the
>
>  '% MINIMAL_SYMBOL_HASH_SIZE;'
>
> bit moved outside of the msymbol_hash and msymbol_hash_iw functions?
So msymbol_hash and hash_iw could be used elsewhere.

> You still do the same operation with the results returned by the two
> functions anyway.
>
Except, now they are just hash functions, not hash functions that only 
work for the minsym hash tables.

> Also, where are these 2 functions used besides mynsyms.c?
In a further symbol hashing patch, unless he changed it.

> I think we
> should make them static and remove the extern from symtab.h.
>

> Can you give me an example where the '(' error comes up? (Just so I
> understand it better).  How did you come up with the number of
> buckets?

Averaging based on a large number of gnome, kde, and other real 
applications (ie emacs), compiled with debug info.
349 is way too small, we ended up with chains > length 100, all the time.

>  Is this also used in libiberty?

Which, the hash function?
> Can you fix it and resubmit?
>
> Thanks
> Elena
>
>>
>> --
>> Daniel Jacobowitz                           Carnegie Mellon University
>> MontaVista Software                         Debian GNU/Linux Developer
>>
>> 2001-10-01  Daniel Jacobowitz  <drow@mvista.com>
>>
>> 	* minsyms.c (msymbol_hash): Use better hash function.
>> 	Return hash value without taking modulus.
>> 	(msymbol_hash_iw): Likewise.  Terminate loop at '(' properly.
>> 	(add_minsym_to_hash_table): Take modulus of msymbol_hash's return
>> 	value.
>> 	(add_minsym_to_demangled_hash_table): Likewise for msymbol_hash_iw.
>> 	(lookup_minimal_symbol): Likewise for both.
>>
>> 	* objfiles.h: Increase MINIMAL_SYMBOL_HASH_SIZE to match modern
>> 	binaries.
>>
>> Index: gdb/minsyms.c
>> ===================================================================
>> RCS file: /cvs/src/src/gdb/minsyms.c,v
>> retrieving revision 1.17
>> diff -u -p -r1.17 minsyms.c
>> --- minsyms.c	2001/05/29 10:45:10	1.17
>> +++ minsyms.c	2001/10/01 22:20:47
>> @@ -96,10 +96,12 @@ msymbol_hash_iw (const char *string)
>>        while (isspace (*string))
>>  	++string;
>>        if (*string && *string != '(')
>> -	hash = (31 * hash) + *string;
>> -      ++string;
>> +        {
>> +	  hash = hash * 67 + *string - 113;
>> +	  ++string;
>> +	}
>>      }
>> -  return hash % MINIMAL_SYMBOL_HASH_SIZE;
>> +  return hash;
>>  }
>>
>>  /* Compute a hash code for a string.  */
>> @@ -109,8 +111,8 @@ msymbol_hash (const char *string)
>>  {
>>    unsigned int hash = 0;
>>    for (; *string; ++string)
>> -    hash = (31 * hash) + *string;
>> -  return hash % MINIMAL_SYMBOL_HASH_SIZE;
>> +    hash = hash * 67 + *string - 113;
>> +  return hash;
>>  }
>>
>>  /* Add the minimal symbol SYM to an objfile's minsym hash table, 
>> TABLE.  */
>> @@ -120,7 +122,7 @@ add_minsym_to_hash_table (struct minimal
>>  {
>>    if (sym->hash_next == NULL)
>>      {
>> -      unsigned int hash = msymbol_hash (SYMBOL_NAME (sym));
>> +      unsigned int hash = msymbol_hash (SYMBOL_NAME (sym)) % 
>> MINIMAL_SYMBOL_HASH_SIZE;
>>        sym->hash_next = table[hash];
>>        table[hash] = sym;
>>      }
>> @@ -134,7 +136,7 @@ add_minsym_to_demangled_hash_table (stru
>>  {
>>    if (sym->demangled_hash_next == NULL)
>>      {
>> -      unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME 
>> (sym));
>> +      unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME 
>> (sym)) % MINIMAL_SYMBOL_HASH_SIZE;
>>        sym->demangled_hash_next = table[hash];
>>        table[hash] = sym;
>>      }
>> @@ -162,8 +164,8 @@ lookup_minimal_symbol (register const ch
>>    struct minimal_symbol *found_file_symbol = NULL;
>>    struct minimal_symbol *trampoline_symbol = NULL;
>>
>> -  unsigned int hash = msymbol_hash (name);
>> -  unsigned int dem_hash = msymbol_hash_iw (name);
>> +  unsigned int hash = msymbol_hash (name) % MINIMAL_SYMBOL_HASH_SIZE;
>> +  unsigned int dem_hash = msymbol_hash_iw (name) % 
>> MINIMAL_SYMBOL_HASH_SIZE;
>>
>>  #ifdef SOFUN_ADDRESS_MAYBE_MISSING
>>    if (sfile != NULL)
>>
>> Index: gdb/objfiles.h
>> ===================================================================
>> RCS file: /cvs/src/src/gdb/objfiles.h,v
>> retrieving revision 1.8
>> diff -u -p -r1.8 objfiles.h
>> --- objfiles.h	2001/03/06 08:21:11	1.8
>> +++ objfiles.h	2001/10/01 22:20:53
>> @@ -202,7 +202,7 @@ extern void print_objfile_statistics (vo
>>  extern void print_symbol_bcache_statistics (void);
>>
>>  /* Number of entries in the minimal symbol hash table.  */
>> -#define MINIMAL_SYMBOL_HASH_SIZE 349
>> +#define MINIMAL_SYMBOL_HASH_SIZE 2039
>>
>>  /* Master structure for keeping track of each file from which
>>     gdb reads symbols.  There are several ways these get allocated: 1.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [rfa] symbol hashing, part 1/n - updates to hash functions
  2001-10-11 16:51 ` Elena Zannoni
  2001-10-11 16:58   ` Daniel Berlin
@ 2001-10-11 17:01   ` Daniel Jacobowitz
  2001-10-12  8:59     ` Elena Zannoni
  1 sibling, 1 reply; 8+ messages in thread
From: Daniel Jacobowitz @ 2001-10-11 17:01 UTC (permalink / raw)
  To: Elena Zannoni; +Cc: gdb-patches

On Thu, Oct 11, 2001 at 07:58:20PM -0400, Elena Zannoni wrote:
> Daniel Jacobowitz writes:
>  > This patch still has two logical parts; if you strongly prefer I can break
>  > it up further, but they are somewhat intertwined and I think neither should
>  > be objectionable.  They are:
>  >   - Fix a looping bug in msymbol_hash_iw.  It would not stop on '(' if there
>  > was whitespace before it.
>  >   - Update to use the identifier hash function that libiberty uses, and
>  > more buckets.
>  > 
>  > Is this OK?
> 
> Looks ok to me in theory. Except that, why was the
> 
>  '% MINIMAL_SYMBOL_HASH_SIZE;'
> 
> bit moved outside of the msymbol_hash and msymbol_hash_iw functions?
> You still do the same operation with the results returned by the two
> functions anyway. 
> 
> Also, where are these 2 functions used besides mynsyms.c?  I think we
> should make them static and remove the extern from symtab.h.

Both the moving of modulus and the no-other-uses are addressed by the
hashing patches.  These are the hash functions I will use on the
symtabs; they work for symbols as well as for minsyms.  A symtab has a
dynamic number of buckets.

> Can you give me an example where the '(' error comes up? (Just so I
> understand it better).  How did you come up with the number of
> buckets? Is this also used in libiberty?

The '(' error looks like this:

Hash the string "operator* ()".
At one point, string = " ()".  The initial whitespace loop changes this
to "()".  Then the character is not hashed (because of the if test
already present), but ++string is triggered.  The while loop now
continues, because *string == ')' instead of '('.

The number of blocks I just came up with by experimentation (well, Dan
did, and then I experimented with it and was satisfied).  Libiberty
uses expandable hash tables; I could simply use them instead, but I'd
rather postpone that change until we've got the rest of hashing in
place.

> Can you fix it and resubmit?

After my explanations, does anything else need fixing?

Thanks for looking at these patches!

-- 
Daniel Jacobowitz                           Carnegie Mellon University
MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [rfa] symbol hashing, part 1/n - updates to hash functions
  2001-10-11 16:58   ` Daniel Berlin
@ 2001-10-12  8:54     ` Elena Zannoni
  0 siblings, 0 replies; 8+ messages in thread
From: Elena Zannoni @ 2001-10-12  8:54 UTC (permalink / raw)
  To: Daniel Berlin; +Cc: Elena Zannoni, Daniel Jacobowitz, gdb-patches

Daniel Berlin writes:
 > 
 > On Thursday, October 11, 2001, at 07:58  PM, Elena Zannoni wrote:
 > 
 > > Daniel Jacobowitz writes:
 > >> This patch still has two logical parts; if you strongly prefer I can 
 > >> break
 > >> it up further, but they are somewhat intertwined and I think neither 
 > >> should
 > >> be objectionable.  They are:
 > >>   - Fix a looping bug in msymbol_hash_iw.  It would not stop on '(' if 
 > >> there
 > >> was whitespace before it.
 > >>   - Update to use the identifier hash function that libiberty uses, and
 > >> more buckets.
 > >>
 > >> Is this OK?
 > >
 > > Looks ok to me in theory. Except that, why was the
 > >
 > >  '% MINIMAL_SYMBOL_HASH_SIZE;'
 > >
 > > bit moved outside of the msymbol_hash and msymbol_hash_iw functions?
 > So msymbol_hash and hash_iw could be used elsewhere.

I see, can we leave the "% MINIMAL_SYMBOL_HASH_SIZE;" where it was
then, until we get to the next patch?

 > 
 > > You still do the same operation with the results returned by the two
 > > functions anyway.
 > >
 > Except, now they are just hash functions, not hash functions that only 
 > work for the minsym hash tables.
 > 

Right, OK.

 > > Also, where are these 2 functions used besides mynsyms.c?
 > In a further symbol hashing patch, unless he changed it.
 > 

All right. Then the changes to the '%' thing should be put in the
future patch.

 > > I think we
 > > should make them static and remove the extern from symtab.h.
 > >
 > 
 > > Can you give me an example where the '(' error comes up? (Just so I
 > > understand it better).  How did you come up with the number of
 > > buckets?
 > 
 > Averaging based on a large number of gnome, kde, and other real 
 > applications (ie emacs), compiled with debug info.
 > 349 is way too small, we ended up with chains > length 100, all the time.
 > 

OK.

 > >  Is this also used in libiberty?
 > 
 > Which, the hash function?
 > > Can you fix it and resubmit?
 > >

Never mind, I  meant the number of buckets. But you answered that.

Elena


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [rfa] symbol hashing, part 1/n - updates to hash functions
  2001-10-11 17:01   ` Daniel Jacobowitz
@ 2001-10-12  8:59     ` Elena Zannoni
  2001-10-12 12:09       ` Daniel Jacobowitz
  0 siblings, 1 reply; 8+ messages in thread
From: Elena Zannoni @ 2001-10-12  8:59 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: Elena Zannoni, gdb-patches

Daniel Jacobowitz writes:
 > On Thu, Oct 11, 2001 at 07:58:20PM -0400, Elena Zannoni wrote:
 > > Daniel Jacobowitz writes:
 > >  > This patch still has two logical parts; if you strongly prefer I can break
 > >  > it up further, but they are somewhat intertwined and I think neither should
 > >  > be objectionable.  They are:
 > >  >   - Fix a looping bug in msymbol_hash_iw.  It would not stop on '(' if there
 > >  > was whitespace before it.
 > >  >   - Update to use the identifier hash function that libiberty uses, and
 > >  > more buckets.
 > >  > 
 > >  > Is this OK?
 > > 
 > > Looks ok to me in theory. Except that, why was the
 > > 
 > >  '% MINIMAL_SYMBOL_HASH_SIZE;'
 > > 
 > > bit moved outside of the msymbol_hash and msymbol_hash_iw functions?
 > > You still do the same operation with the results returned by the two
 > > functions anyway. 
 > > 
 > > Also, where are these 2 functions used besides mynsyms.c?  I think we
 > > should make them static and remove the extern from symtab.h.
 > 
 > Both the moving of modulus and the no-other-uses are addressed by the
 > hashing patches.  These are the hash functions I will use on the
 > symtabs; they work for symbols as well as for minsyms.  A symtab has a
 > dynamic number of buckets.
 > 

Ok, I can see what's coming. But then, I would definitely prefer to
move the '%' out of the functions only when the rest of the patch is
submitted. It doesn't really fit with the changes you are making in
this patch.

 > > Can you give me an example where the '(' error comes up? (Just so I
 > > understand it better).  How did you come up with the number of
 > > buckets? Is this also used in libiberty?
 > 
 > The '(' error looks like this:
 > 
 > Hash the string "operator* ()".
 > At one point, string = " ()".  The initial whitespace loop changes this
 > to "()".  Then the character is not hashed (because of the if test
 > already present), but ++string is triggered.  The while loop now
 > continues, because *string == ')' instead of '('.
 > 

OK, thanks.

 > The number of blocks I just came up with by experimentation (well, Dan
 > did, and then I experimented with it and was satisfied).  Libiberty
 > uses expandable hash tables; I could simply use them instead, but I'd
 > rather postpone that change until we've got the rest of hashing in
 > place.
 > 

Yes, ok.

 > > Can you fix it and resubmit?
 > 
 > After my explanations, does anything else need fixing?
 > 

Just hold on the '%' move until the next patch.
Everything else is fine.

 > Thanks for looking at these patches!
 > 

Thanks for bearing with me!

Elena


 > -- 
 > Daniel Jacobowitz                           Carnegie Mellon University
 > MontaVista Software                         Debian GNU/Linux Developer


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [rfa] symbol hashing, part 1/n - updates to hash functions
  2001-10-12  8:59     ` Elena Zannoni
@ 2001-10-12 12:09       ` Daniel Jacobowitz
  2001-10-12 15:34         ` Elena Zannoni
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Jacobowitz @ 2001-10-12 12:09 UTC (permalink / raw)
  To: Elena Zannoni; +Cc: gdb-patches

On Fri, Oct 12, 2001 at 12:06:32PM -0400, Elena Zannoni wrote:
> Just hold on the '%' move until the next patch.
> Everything else is fine.

OK, I've committed the attached.

-- 
Daniel Jacobowitz                           Carnegie Mellon University
MontaVista Software                         Debian GNU/Linux Developer

2001-10-12  Daniel Jacobowitz  <drow@mvista.com>

	* minsyms.c (msymbol_hash): Use better hash function.
	(msymbol_hash_iw): Likewise.  Terminate loop at '(' properly.

	* objfiles.h: Increase MINIMAL_SYMBOL_HASH_SIZE to match modern
	binaries.


Index: gdb/minsyms.c
===================================================================
RCS file: /cvs/src/src/gdb/minsyms.c,v
retrieving revision 1.17
diff -u -r1.17 minsyms.c
--- minsyms.c	2001/05/29 10:45:10	1.17
+++ minsyms.c	2001/10/12 19:02:55
@@ -96,8 +96,10 @@
       while (isspace (*string))
 	++string;
       if (*string && *string != '(')
-	hash = (31 * hash) + *string;
-      ++string;
+        {
+	  hash = hash * 67 + *string - 113;
+	  ++string;
+	}
     }
   return hash % MINIMAL_SYMBOL_HASH_SIZE;
 }
@@ -109,7 +111,7 @@
 {
   unsigned int hash = 0;
   for (; *string; ++string)
-    hash = (31 * hash) + *string;
+    hash = hash * 67 + *string - 113;
   return hash % MINIMAL_SYMBOL_HASH_SIZE;
 }
 
Index: gdb/objfiles.h
===================================================================
RCS file: /cvs/src/src/gdb/objfiles.h,v
retrieving revision 1.8
diff -u -r1.8 objfiles.h
--- objfiles.h	2001/03/06 08:21:11	1.8
+++ objfiles.h	2001/10/12 19:02:55
@@ -202,7 +202,7 @@
 extern void print_symbol_bcache_statistics (void);
 
 /* Number of entries in the minimal symbol hash table.  */
-#define MINIMAL_SYMBOL_HASH_SIZE 349
+#define MINIMAL_SYMBOL_HASH_SIZE 2039
 
 /* Master structure for keeping track of each file from which
    gdb reads symbols.  There are several ways these get allocated: 1.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [rfa] symbol hashing, part 1/n - updates to hash functions
  2001-10-12 12:09       ` Daniel Jacobowitz
@ 2001-10-12 15:34         ` Elena Zannoni
  0 siblings, 0 replies; 8+ messages in thread
From: Elena Zannoni @ 2001-10-12 15:34 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: Elena Zannoni, gdb-patches

Daniel Jacobowitz writes:
 > On Fri, Oct 12, 2001 at 12:06:32PM -0400, Elena Zannoni wrote:
 > > Just hold on the '%' move until the next patch.
 > > Everything else is fine.
 > 
 > OK, I've committed the attached.

Great, thanks!

Elena


 > 
 > -- 
 > Daniel Jacobowitz                           Carnegie Mellon University
 > MontaVista Software                         Debian GNU/Linux Developer
 > 
 > 2001-10-12  Daniel Jacobowitz  <drow@mvista.com>
 > 
 > 	* minsyms.c (msymbol_hash): Use better hash function.
 > 	(msymbol_hash_iw): Likewise.  Terminate loop at '(' properly.
 > 
 > 	* objfiles.h: Increase MINIMAL_SYMBOL_HASH_SIZE to match modern
 > 	binaries.
 > 
 > 
 > Index: gdb/minsyms.c
 > ===================================================================
 > RCS file: /cvs/src/src/gdb/minsyms.c,v
 > retrieving revision 1.17
 > diff -u -r1.17 minsyms.c
 > --- minsyms.c	2001/05/29 10:45:10	1.17
 > +++ minsyms.c	2001/10/12 19:02:55
 > @@ -96,8 +96,10 @@
 >        while (isspace (*string))
 >  	++string;
 >        if (*string && *string != '(')
 > -	hash = (31 * hash) + *string;
 > -      ++string;
 > +        {
 > +	  hash = hash * 67 + *string - 113;
 > +	  ++string;
 > +	}
 >      }
 >    return hash % MINIMAL_SYMBOL_HASH_SIZE;
 >  }
 > @@ -109,7 +111,7 @@
 >  {
 >    unsigned int hash = 0;
 >    for (; *string; ++string)
 > -    hash = (31 * hash) + *string;
 > +    hash = hash * 67 + *string - 113;
 >    return hash % MINIMAL_SYMBOL_HASH_SIZE;
 >  }
 >  
 > Index: gdb/objfiles.h
 > ===================================================================
 > RCS file: /cvs/src/src/gdb/objfiles.h,v
 > retrieving revision 1.8
 > diff -u -r1.8 objfiles.h
 > --- objfiles.h	2001/03/06 08:21:11	1.8
 > +++ objfiles.h	2001/10/12 19:02:55
 > @@ -202,7 +202,7 @@
 >  extern void print_symbol_bcache_statistics (void);
 >  
 >  /* Number of entries in the minimal symbol hash table.  */
 > -#define MINIMAL_SYMBOL_HASH_SIZE 349
 > +#define MINIMAL_SYMBOL_HASH_SIZE 2039
 >  
 >  /* Master structure for keeping track of each file from which
 >     gdb reads symbols.  There are several ways these get allocated: 1.


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2001-10-12 15:34 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-10-09  7:57 [rfa] symbol hashing, part 1/n - updates to hash functions Daniel Jacobowitz
2001-10-11 16:51 ` Elena Zannoni
2001-10-11 16:58   ` Daniel Berlin
2001-10-12  8:54     ` Elena Zannoni
2001-10-11 17:01   ` Daniel Jacobowitz
2001-10-12  8:59     ` Elena Zannoni
2001-10-12 12:09       ` Daniel Jacobowitz
2001-10-12 15:34         ` Elena Zannoni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox