From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 21933 invoked by alias); 3 Nov 2003 19:07:10 -0000 Mailing-List: contact gdb-patches-help@sources.redhat.com; run by ezmlm Precedence: bulk List-Subscribe: List-Archive: List-Post: List-Help: , Sender: gdb-patches-owner@sources.redhat.com Received: (qmail 21910 invoked from network); 3 Nov 2003 19:07:09 -0000 Received: from unknown (HELO localhost.redhat.com) (207.219.125.105) by sources.redhat.com with SMTP; 3 Nov 2003 19:07:09 -0000 Received: from redhat.com (localhost [127.0.0.1]) by localhost.redhat.com (Postfix) with ESMTP id ED7DE2B8F for ; Mon, 3 Nov 2003 14:07:04 -0500 (EST) Message-ID: <3FA6A758.1060104@redhat.com> Date: Mon, 03 Nov 2003 19:07:00 -0000 From: Andrew Cagney User-Agent: Mozilla/5.0 (X11; U; NetBSD macppc; en-US; rv:1.0.2) Gecko/20030820 X-Accept-Language: en-us, en MIME-Version: 1.0 To: gdb-patches@sources.redhat.com Subject: [patch/rfc] Tweak bcache performance Content-Type: multipart/mixed; boundary="------------030605080708070207080506" X-SW-Source: 2003-11/txt/msg00035.txt.bz2 This is a multi-part message in MIME format. --------------030605080708070207080506 Content-Type: text/plain; charset=us-ascii; format=flowed Content-Transfer-Encoding: 7bit Content-length: 1609 Hello, This patch: - adds a comparison of bcache and hashtab.[hc] Summary: bcache has significantly less memory overhead - tweaks the bcache algorithm to eliminate the one performance -ve Summary: all memcmp's eliminated - exploits an assumption that nothing is bigger than 64k Summary: I figure stuff bigger than 64k shouldn't be in memory :-) As for the tweak making a measurable difference, for "file gdb" it is slight (approx 0.780s vs 0.773s) but still there. Daniel, does this answer your hashtab VS bcache question? :-) Comments? Baring them, I'll commit in a few days. Andrew PS: The stats. Cached 'partial symbol cache' statistics: Total object count: 36924 Unique object count: 15935 Percentage of duplicates, by count: 56% Total object size: 886176 Unique object size: 382440 Percentage of duplicates, by size: 56% Max entry size: 24 Average entry size: 24 Median entry size: 24 Total memory used by bcache, including overhead: 526316 Percentage memory overhead: 37% Net memory savings: 40% Hash table size: 4099 Hash table expands: 3 Hash table hashes: 52294 Half hash misses: 0 Hash table population: 97% Median hash chain length: 4 Average hash chain length: 3 Maximum hash chain length: 12 Notice how the half hash miss is zero. This indicates that, on real data, all unnecessary memcmp's were eliminated! Also notice how everything is 24 bytes in size (psymbol) because GDB isn't written in C++ :-( . --------------030605080708070207080506 Content-Type: text/plain; name="diffs" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="diffs" Content-length: 5607 * bcache.c: Include "gdb_assert.h". (struct bcache): Add fields "expand_count" and "expand_hash_count". (expand_hash_table): Update the expand counts. (print_bcache_statistics): Use XCALLOC, not alloca. Print stats on object sizes and hashes. * Makefile.in (bcache.o): Update dependencies. Index: Makefile.in =================================================================== RCS file: /cvs/src/src/gdb/Makefile.in,v retrieving revision 1.467 diff -u -r1.467 Makefile.in --- Makefile.in 1 Nov 2003 01:42:48 -0000 1.467 +++ Makefile.in 3 Nov 2003 17:01:26 -0000 @@ -1606,7 +1606,8 @@ $(target_h) $(ax_h) $(ax_gdb_h) $(gdb_string_h) $(block_h) \ $(regcache_h) ax-general.o: ax-general.c $(defs_h) $(ax_h) $(value_h) $(gdb_string_h) -bcache.o: bcache.c $(defs_h) $(gdb_obstack_h) $(bcache_h) $(gdb_string_h) +bcache.o: bcache.c $(defs_h) $(gdb_obstack_h) $(bcache_h) \ + $(gdb_string_h) $(gdb_assert_h) bfd-target.o: bfd-target.c $(defs_h) $(target_h) $(bfd_target_h) \ $(gdb_assert_h) $(gdb_string_h) block.o: block.c $(defs_h) $(block_h) $(symtab_h) $(symfile_h) \ Index: bcache.c =================================================================== RCS file: /cvs/src/src/gdb/bcache.c,v retrieving revision 1.10 diff -u -r1.10 bcache.c --- bcache.c 29 Jul 2002 22:55:26 -0000 1.10 +++ bcache.c 3 Nov 2003 17:01:26 -0000 @@ -2,7 +2,7 @@ Written by Fred Fish Rewritten by Jim Blandy - Copyright 1999, 2000, 2002 Free Software Foundation, Inc. + Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc. This file is part of GDB. @@ -25,6 +25,7 @@ #include "gdb_obstack.h" #include "bcache.h" #include "gdb_string.h" /* For memcpy declaration */ +#include "gdb_assert.h" #include #include @@ -71,6 +72,13 @@ long unique_size; /* size of unique strings, in bytes */ long total_size; /* total number of bytes cached, including dups */ long structure_size; /* total size of bcache, including infrastructure */ + /* Number of times that the hash table is expanded and hence + re-built, and the corresponding number of times that a string is + [re]hashed as part of entering it into the expanded table. The + total number of hashes can be computed by adding TOTAL_COUNT to + expand_hash_count. */ + unsigned long expand_count; + unsigned long expand_hash_count; }; /* The old hash function was stolen from SDBM. This is what DB 3.0 uses now, @@ -117,6 +125,11 @@ struct bstring **new_buckets; unsigned int i; + /* Count the stats. Every unique item needs to be re-hashed and + re-entered. */ + bcache->expand_count++; + bcache->expand_hash_count += bcache->unique_count; + /* Find the next size. */ new_num_buckets = bcache->num_buckets * 2; for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++) @@ -265,12 +278,16 @@ int occupied_buckets; int max_chain_length; int median_chain_length; + int max_entry_size; + int median_entry_size; - /* Count the number of occupied buckets, and measure chain lengths. */ + /* Count the number of occupied buckets, tally the various string + lengths, and measure chain lengths. */ { unsigned int b; - int *chain_length - = (int *) alloca (c->num_buckets * sizeof (*chain_length)); + int *chain_length = XCALLOC (c->num_buckets + 1, int); + int *entry_size = XCALLOC (c->unique_count + 1, int); + int stringi = 0; occupied_buckets = 0; @@ -286,7 +303,10 @@ while (s) { + gdb_assert (b < c->num_buckets); chain_length[b]++; + gdb_assert (stringi < c->unique_count); + entry_size[stringi++] = s->length; s = s->next; } } @@ -295,6 +315,8 @@ /* To compute the median, we need the set of chain lengths sorted. */ qsort (chain_length, c->num_buckets, sizeof (chain_length[0]), compare_ints); + qsort (entry_size, c->unique_count, sizeof (entry_size[0]), + compare_ints); if (c->num_buckets > 0) { @@ -306,6 +328,19 @@ max_chain_length = 0; median_chain_length = 0; } + if (c->unique_count > 0) + { + max_entry_size = entry_size[c->unique_count - 1]; + median_entry_size = entry_size[c->unique_count / 2]; + } + else + { + max_entry_size = 0; + median_entry_size = 0; + } + + xfree (chain_length); + xfree (entry_size); } printf_filtered (" Cached '%s' statistics:\n", type); @@ -321,6 +356,15 @@ print_percentage (c->total_size - c->unique_size, c->total_size); printf_filtered ("\n"); + printf_filtered (" Max entry size: %d\n", max_entry_size); + printf_filtered (" Average entry size: "); + if (c->unique_count > 0) + printf_filtered ("%ld\n", c->unique_size / c->unique_count); + else + printf_filtered ("(not applicable)\n"); + printf_filtered (" Median entry size: %d\n", median_entry_size); + printf_filtered ("\n"); + printf_filtered (" Total memory used by bcache, including overhead: %ld\n", c->structure_size); printf_filtered (" Percentage memory overhead: "); @@ -330,6 +374,10 @@ printf_filtered ("\n"); printf_filtered (" Hash table size: %3d\n", c->num_buckets); + printf_filtered (" Hash table expands: %lu\n", + c->expand_count); + printf_filtered (" Hash table hashes: %lu\n", + c->total_count + c->expand_hash_count); printf_filtered (" Hash table population: "); print_percentage (occupied_buckets, c->num_buckets); printf_filtered (" Median hash chain length: %3d\n", --------------030605080708070207080506--