[patch/rfc] Tweak bcache performance

Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed

From: Andrew Cagney <ac131313@redhat.com>
To: gdb-patches@sources.redhat.com
Subject: [patch/rfc] Tweak bcache performance
Date: Mon, 03 Nov 2003 19:07:00 -0000	[thread overview]
Message-ID: <3FA6A758.1060104@redhat.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1609 bytes --]

Hello,

This patch:

- adds a comparison of bcache and hashtab.[hc]
Summary: bcache has significantly less memory overhead

- tweaks the bcache algorithm to eliminate the one performance -ve
Summary: all memcmp's eliminated

- exploits an assumption that nothing is bigger than 64k
Summary: I figure stuff bigger than 64k shouldn't be in memory :-)

As for the tweak making a measurable difference, for "file gdb" it is 
slight (approx 0.780s vs 0.773s) but still there.

Daniel, does this answer your hashtab VS bcache question? :-)

Comments?  Baring them, I'll commit in a few days.

Andrew

PS: The stats.

   Cached 'partial symbol cache' statistics:
     Total object count:  36924
     Unique object count: 15935
     Percentage of duplicates, by count:  56%

     Total object size:   886176
     Unique object size:  382440
     Percentage of duplicates, by size:   56%

     Max entry size:     24
     Average entry size: 24
     Median entry size:  24

     Total memory used by bcache, including overhead: 526316
     Percentage memory overhead:  37%
     Net memory savings:          40%

     Hash table size:           4099
     Hash table expands:        3
     Hash table hashes:         52294
     Half hash misses:          0
     Hash table population:      97%
     Median hash chain length:    4
     Average hash chain length:   3
     Maximum hash chain length:  12

Notice how the half hash miss is zero.  This indicates that, on real 
data, all unnecessary memcmp's were eliminated!

Also notice how everything is 24 bytes in size (psymbol) because GDB 
isn't written in C++ :-(

.

[-- Attachment #2: diffs --]
[-- Type: text/plain, Size: 5607 bytes --]


	* bcache.c: Include "gdb_assert.h".
	(struct bcache): Add fields "expand_count" and
	"expand_hash_count".
	(expand_hash_table): Update the expand counts.
	(print_bcache_statistics): Use XCALLOC, not alloca.  Print stats
	on object sizes and hashes.
	* Makefile.in (bcache.o): Update dependencies.

Index: Makefile.in
===================================================================
RCS file: /cvs/src/src/gdb/Makefile.in,v
retrieving revision 1.467
diff -u -r1.467 Makefile.in
--- Makefile.in	1 Nov 2003 01:42:48 -0000	1.467
+++ Makefile.in	3 Nov 2003 17:01:26 -0000
@@ -1606,7 +1606,8 @@
 	$(target_h) $(ax_h) $(ax_gdb_h) $(gdb_string_h) $(block_h) \
 	$(regcache_h)
 ax-general.o: ax-general.c $(defs_h) $(ax_h) $(value_h) $(gdb_string_h)
-bcache.o: bcache.c $(defs_h) $(gdb_obstack_h) $(bcache_h) $(gdb_string_h)
+bcache.o: bcache.c $(defs_h) $(gdb_obstack_h) $(bcache_h) \
+	$(gdb_string_h) $(gdb_assert_h)
 bfd-target.o: bfd-target.c $(defs_h) $(target_h) $(bfd_target_h) \
 	$(gdb_assert_h) $(gdb_string_h)
 block.o: block.c $(defs_h) $(block_h) $(symtab_h) $(symfile_h) \
Index: bcache.c
===================================================================
RCS file: /cvs/src/src/gdb/bcache.c,v
retrieving revision 1.10
diff -u -r1.10 bcache.c
--- bcache.c	29 Jul 2002 22:55:26 -0000	1.10
+++ bcache.c	3 Nov 2003 17:01:26 -0000
@@ -2,7 +2,7 @@
    Written by Fred Fish <fnf@cygnus.com>
    Rewritten by Jim Blandy <jimb@cygnus.com>
 
-   Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+   Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
 
    This file is part of GDB.
 
@@ -25,6 +25,7 @@
 #include "gdb_obstack.h"
 #include "bcache.h"
 #include "gdb_string.h"		/* For memcpy declaration */
+#include "gdb_assert.h"
 
 #include <stddef.h>
 #include <stdlib.h>
@@ -71,6 +72,13 @@
   long unique_size;	/* size of unique strings, in bytes */
   long total_size;      /* total number of bytes cached, including dups */
   long structure_size;	/* total size of bcache, including infrastructure */
+  /* Number of times that the hash table is expanded and hence
+     re-built, and the corresponding number of times that a string is
+     [re]hashed as part of entering it into the expanded table.  The
+     total number of hashes can be computed by adding TOTAL_COUNT to
+     expand_hash_count.  */
+  unsigned long expand_count;
+  unsigned long expand_hash_count;
 };
 
 /* The old hash function was stolen from SDBM. This is what DB 3.0 uses now,
@@ -117,6 +125,11 @@
   struct bstring **new_buckets;
   unsigned int i;
 
+  /* Count the stats.  Every unique item needs to be re-hashed and
+     re-entered.  */
+  bcache->expand_count++;
+  bcache->expand_hash_count += bcache->unique_count;
+
   /* Find the next size.  */
   new_num_buckets = bcache->num_buckets * 2;
   for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
@@ -265,12 +278,16 @@
   int occupied_buckets;
   int max_chain_length;
   int median_chain_length;
+  int max_entry_size;
+  int median_entry_size;
 
-  /* Count the number of occupied buckets, and measure chain lengths.  */
+  /* Count the number of occupied buckets, tally the various string
+     lengths, and measure chain lengths.  */
   {
     unsigned int b;
-    int *chain_length
-      = (int *) alloca (c->num_buckets * sizeof (*chain_length));
+    int *chain_length = XCALLOC (c->num_buckets + 1, int);
+    int *entry_size = XCALLOC (c->unique_count + 1, int);
+    int stringi = 0;
 
     occupied_buckets = 0;
 
@@ -286,7 +303,10 @@
 	    
 	    while (s)
 	      {
+		gdb_assert (b < c->num_buckets);
 		chain_length[b]++;
+		gdb_assert (stringi < c->unique_count);
+		entry_size[stringi++] = s->length;
 		s = s->next;
 	      }
 	  }
@@ -295,6 +315,8 @@
     /* To compute the median, we need the set of chain lengths sorted.  */
     qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
 	   compare_ints);
+    qsort (entry_size, c->unique_count, sizeof (entry_size[0]),
+	   compare_ints);
 
     if (c->num_buckets > 0)
       {
@@ -306,6 +328,19 @@
 	max_chain_length = 0;
 	median_chain_length = 0;
       }
+    if (c->unique_count > 0)
+      {
+	max_entry_size = entry_size[c->unique_count - 1];
+	median_entry_size = entry_size[c->unique_count / 2];
+      }
+    else
+      {
+	max_entry_size = 0;
+	median_entry_size = 0;
+      }
+
+    xfree (chain_length);
+    xfree (entry_size);
   }
 
   printf_filtered ("  Cached '%s' statistics:\n", type);
@@ -321,6 +356,15 @@
   print_percentage (c->total_size - c->unique_size, c->total_size);
   printf_filtered ("\n");
 
+  printf_filtered ("    Max entry size:     %d\n", max_entry_size);
+  printf_filtered ("    Average entry size: ");
+  if (c->unique_count > 0)
+    printf_filtered ("%ld\n", c->unique_size / c->unique_count);
+  else
+    printf_filtered ("(not applicable)\n");    
+  printf_filtered ("    Median entry size:  %d\n", median_entry_size);
+  printf_filtered ("\n");
+
   printf_filtered ("    Total memory used by bcache, including overhead: %ld\n",
 		   c->structure_size);
   printf_filtered ("    Percentage memory overhead: ");
@@ -330,6 +374,10 @@
   printf_filtered ("\n");
 
   printf_filtered ("    Hash table size:           %3d\n", c->num_buckets);
+  printf_filtered ("    Hash table expands:        %lu\n",
+		   c->expand_count);
+  printf_filtered ("    Hash table hashes:         %lu\n",
+		   c->total_count + c->expand_hash_count);
   printf_filtered ("    Hash table population:     ");
   print_percentage (occupied_buckets, c->num_buckets);
   printf_filtered ("    Median hash chain length:  %3d\n",

next             reply	other threads:[~2003-11-03 19:07 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-11-03 19:07 Andrew Cagney [this message]
2003-11-03 19:12 ` Daniel Jacobowitz
2003-11-03 19:54   ` Andrew Cagney
2003-11-07 22:08     ` Andrew Cagney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3FA6A758.1060104@redhat.com \
    --to=ac131313@redhat.com \
    --cc=gdb-patches@sources.redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox