* gdb script performance
@ 2006-10-05 12:16 Jean-Marc Saffroy
2006-10-05 12:59 ` Frederic RISS
0 siblings, 1 reply; 7+ messages in thread
From: Jean-Marc Saffroy @ 2006-10-05 12:16 UTC (permalink / raw)
To: gdb
[-- Attachment #1: Type: TEXT/PLAIN, Size: 3480 bytes --]
Hello,
I'm using gdb (6.5) to analyse a Linux kernel kdump (really an ELF core
file), and I find that a simple script is awwwfully slow. Here is an
example:
define cntmaps
shell date
set $map = vmlist
set $gcount = 0
while $map != 0
set $gcount = $gcount + 1
set $map = $map->next
end
printf "vmlist count: %d\n", $gcount
shell date
end
(gdb) cntmaps
Thu Oct 5 13:44:04 CEST 2006
vmlist count: 99
Thu Oct 5 13:44:14 CEST 2006
So it takes about 10 seconds to walk a list with ~100 elements, without
doing anything really useful (the host is a 2GHz dual core!). I have
another script that does more complex processing with the same list, and
it takes about 12 minutes to complete! It's the lsmaps macro below, which
ends up calling the vmap macro 2381 times.
So I rebuilt gdb with profiling (-pg), attached is the complete output
from gprof after running lsmaps in batch mode. A quick look suggests that
some symbol lookups are problematic, and I suspect that gdb easily ends up
doing linear searches in some big table (I'm working on a Linux kernel
with full debug infos).
Any suggestion for performance improvement? Maybe gdb could use hash
tables to speed up things a bit? :-)
Cheers,
--
saffroy@gmail.com
# a few useful(?) macros for x86-64 VMM hacks
# useful constants
set $PAGE_SIZE = (1<<12)
set $__PHYSICAL_MASK = (1 << 46)-1
set $PTE_MASK = ~($PAGE_SIZE-1) & $__PHYSICAL_MASK
set $__PAGE_OFFSET = 0xffff810000000000
set $_PAGE_PSE = 0x80
define vmap
set $addr = (long)$arg0
# index in each of the 4 levels of page directories
set $pgd = $addr >> 39 & (1<<9)-1
set $pud = $addr >> 30 & (1<<9)-1
set $pmd = $addr >> 21 & (1<<9)-1
set $pte = $addr >> 12 & (1<<9)-1
# offset in page
set $off = $addr & (1<<12)-1
#printf "%03x %03x %03x %03x %03x\n", $pgd, $pud, $pmd, $pte, $off
set $pgd_off = (pgd_t *) &init_level4_pgt + $pgd
#printf "pgd_off: %lx pgd: %lx\n", $pgd_off, (long)$pgd_off->pgd
set $pgd_page = ((long)$pgd_off->pgd & $PTE_MASK) + $__PAGE_OFFSET
#printf "pgd_page: %lx\n", $pgd_page
set $pud_off = ((pud_t *) $pgd_page) + $pud
#printf "pud_off: %lx pud: %lx\n", $pud_off, (long)$pud_off->pud
set $pud_page = ((long)$pud_off->pud & $PTE_MASK) + $__PAGE_OFFSET
#printf "pud_page: %lx\n", $pud_page
set $pmd_off = ((pmd_t *) $pud_page) + $pmd
#printf "pmd_off: %lx pmd: %lx\n", $pmd_off, (long)$pmd_off->pmd
set $pmd_page = ((long)$pmd_off->pmd & $PTE_MASK) + $__PAGE_OFFSET
#printf "pmd_page: %lx\n", $pmd_page
if ((long)$pmd_off->pmd & $_PAGE_PSE) != 0
#printf "PSE page! "
set $paddr = $pmd_page + ($addr & (1<<21)-1)
else
set $pte_off = ((pte_t *) $pmd_page) + $pte
#printf "pte_off: %lx pte: %lx\n", $pte_off, (long)$pte_off->pte
set $pte_page = ((long)$pte_off->pte & $PTE_MASK) + $__PAGE_OFFSET
#printf "pte_page: %lx\n", $pte_page
set $paddr = $pte_page + $off
end
#printf "remapped physical addr: %lx\n", $paddr
printf "%lx -> %lx\n", $addr, $paddr
end
define lsmaps
set $map = vmlist
set $gcount = 0
while $map != 0
if $map->pages != 0
set $vaddr = (long)$map->addr
set $count = (long)$map->size / $PAGE_SIZE
set $gcount = $gcount + $count -1
while $count > 1
vmap $vaddr
set $vaddr = $vaddr + $PAGE_SIZE
set $count = $count - 1
end
end
set $map = $map->next
end
printf "page count: %d\n", $gcount
end
[-- Attachment #2: Type: APPLICATION/octet-stream, Size: 63566 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: gdb script performance
2006-10-05 12:16 gdb script performance Jean-Marc Saffroy
@ 2006-10-05 12:59 ` Frederic RISS
2006-10-05 13:28 ` Jean-Marc Saffroy
0 siblings, 1 reply; 7+ messages in thread
From: Frederic RISS @ 2006-10-05 12:59 UTC (permalink / raw)
To: Jean-Marc Saffroy; +Cc: gdb
Hi,
On Thu, 2006-10-05 at 14:15 +0200, Jean-Marc Saffroy wrote:
> Hello,
>
> I'm using gdb (6.5) to analyse a Linux kernel kdump (really an ELF core
> file), and I find that a simple script is awwwfully slow. Here is an
> example:
I haven't looked at your gprof results, but your problem is certainly
the same as described here:
http://sources.redhat.com/ml/gdb/2006-08/msg00099.html
This discussion lead to the patch submitted here:
http://sources.redhat.com/ml/gdb-patches/2006-09/msg00088.html
It hasn't been checked in yet because of missing copyright assignement
paperwork.
Fred.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: gdb script performance
2006-10-05 12:59 ` Frederic RISS
@ 2006-10-05 13:28 ` Jean-Marc Saffroy
2006-10-05 13:32 ` Daniel Jacobowitz
0 siblings, 1 reply; 7+ messages in thread
From: Jean-Marc Saffroy @ 2006-10-05 13:28 UTC (permalink / raw)
To: Frederic RISS; +Cc: gdb
On Thu, 5 Oct 2006, Frederic RISS wrote:
> I haven't looked at your gprof results, but your problem is certainly
> the same as described here:
> http://sources.redhat.com/ml/gdb/2006-08/msg00099.html
Thanks for pointing this out! That's exactly the same problem indeed.
> This discussion lead to the patch submitted here:
> http://sources.redhat.com/ml/gdb-patches/2006-09/msg00088.html
Great! The 12' script now runs in only 2'35". :-) I can't help thinking
tt's still not blazingly fast, considering the underlying hardware, but
it's a big step forward.
> It hasn't been checked in yet because of missing copyright assignement
> paperwork.
Well I hope it will make it in the next gdb release.
Thanks again!
Cheers,
--
saffroy@gmail.com
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: gdb script performance
2006-10-05 13:28 ` Jean-Marc Saffroy
@ 2006-10-05 13:32 ` Daniel Jacobowitz
2006-10-05 15:31 ` Jean-Marc Saffroy
0 siblings, 1 reply; 7+ messages in thread
From: Daniel Jacobowitz @ 2006-10-05 13:32 UTC (permalink / raw)
To: Jean-Marc Saffroy; +Cc: Frederic RISS, gdb
On Thu, Oct 05, 2006 at 03:26:59PM +0200, Jean-Marc Saffroy wrote:
> >This discussion lead to the patch submitted here:
> >http://sources.redhat.com/ml/gdb-patches/2006-09/msg00088.html
>
> Great! The 12' script now runs in only 2'35". :-) I can't help thinking
> tt's still not blazingly fast, considering the underlying hardware, but
> it's a big step forward.
If you would like to see if further improvement is possible, you should
build a GDB with profiling enabled (--enable-profiling at configure
time), and use the maint set profile command to get a profile; then
post exact data from it.
> Well I hope it will make it in the next gdb release.
We do too; we're waiting on the paperwork.
--
Daniel Jacobowitz
CodeSourcery
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: gdb script performance
2006-10-05 13:32 ` Daniel Jacobowitz
@ 2006-10-05 15:31 ` Jean-Marc Saffroy
2006-10-05 15:37 ` Daniel Jacobowitz
0 siblings, 1 reply; 7+ messages in thread
From: Jean-Marc Saffroy @ 2006-10-05 15:31 UTC (permalink / raw)
To: Daniel Jacobowitz; +Cc: Frederic RISS, gdb
[-- Attachment #1: Type: TEXT/PLAIN, Size: 480 bytes --]
On Thu, 5 Oct 2006, Daniel Jacobowitz wrote:
> If you would like to see if further improvement is possible, you should
> build a GDB with profiling enabled (--enable-profiling at configure
> time), and use the maint set profile command to get a profile; then
> post exact data from it.
Please find attached gprof output (this is with gdb 6.5 +
gdb-lookup-internal-first-3.patch running the lsmaps script I posted
earlier).
Thanks for helping!
Cheers,
--
saffroy@gmail.com
[-- Attachment #2: Type: APPLICATION/OCTET-STREAM, Size: 23749 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: gdb script performance
2006-10-05 15:31 ` Jean-Marc Saffroy
@ 2006-10-05 15:37 ` Daniel Jacobowitz
2006-11-29 2:47 ` Jean-Marc Saffroy
0 siblings, 1 reply; 7+ messages in thread
From: Daniel Jacobowitz @ 2006-10-05 15:37 UTC (permalink / raw)
To: Jean-Marc Saffroy; +Cc: Frederic RISS, gdb
On Thu, Oct 05, 2006 at 05:29:30PM +0200, Jean-Marc Saffroy wrote:
> On Thu, 5 Oct 2006, Daniel Jacobowitz wrote:
>
> >If you would like to see if further improvement is possible, you should
> >build a GDB with profiling enabled (--enable-profiling at configure
> >time), and use the maint set profile command to get a profile; then
> >post exact data from it.
>
> Please find attached gprof output (this is with gdb 6.5 +
> gdb-lookup-internal-first-3.patch running the lsmaps script I posted
> earlier).
>
> Thanks for helping!
26.14 381.31 381.31 141867 2.69 3.50 find_pc_sect_psymtab
18.54 651.82 270.51 331876 0.82 0.82 find_pc_sect_section
17.88 912.69 260.87 144847060 0.00 0.00 lookup_partial_symbol
11.01 1073.33 160.64 1254595136 0.00 0.00 strcmp_iw
4.98 1145.97 72.64 34010 2.14 16.50 lookup_symbol_aux_psymtabs
4.78 1215.73 69.75 9584 7.28 8.12 fixup_section
4.46 1280.79 65.06 lbasename
3.42 1330.64 49.85 12183 4.09 4.09 lookup_partial_symtab
1.83 1357.34 26.70 33844664 0.00 0.00 strcmp_iw_ordered
1.68 1381.82 24.48 1288426966 0.00 0.00 symbol_natural_name
1.09 1397.71 15.89 1288426966 0.00 0.00 symbol_search_name
Well then. Observations:
- We're looking up partial symbols way too often.
- It's taking way too long.
- And oh my lord is that a lot of calls to the (fairly inefficient)
strcmp_iw.
It looks like only about three calls to find_pc_sect_psymtab per
expression evaluation, which is bad but not too bad - sounds like
a last-searched-item cache may be useful.
(To be clear: I don't have time to work on this just now.)
--
Daniel Jacobowitz
CodeSourcery
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: gdb script performance
2006-10-05 15:37 ` Daniel Jacobowitz
@ 2006-11-29 2:47 ` Jean-Marc Saffroy
0 siblings, 0 replies; 7+ messages in thread
From: Jean-Marc Saffroy @ 2006-11-29 2:47 UTC (permalink / raw)
To: gdb
[-- Attachment #1: Type: TEXT/PLAIN, Size: 2810 bytes --]
Hello,
I'm resurrecting this old thread on gdb performance since I took some time
to look into the problem recently.
On Thu, 5 Oct 2006, Daniel Jacobowitz wrote:
> On Thu, Oct 05, 2006 at 05:29:30PM +0200, Jean-Marc Saffroy wrote:
>> On Thu, 5 Oct 2006, Daniel Jacobowitz wrote:
>>
>> Please find attached gprof output (this is with gdb 6.5 +
>> gdb-lookup-internal-first-3.patch running the lsmaps script I posted
>> earlier).
>>
>> Thanks for helping!
>
> 26.14 381.31 381.31 141867 2.69 3.50 find_pc_sect_psymtab
> 18.54 651.82 270.51 331876 0.82 0.82 find_pc_sect_section
> 17.88 912.69 260.87 144847060 0.00 0.00 lookup_partial_symbol
> 11.01 1073.33 160.64 1254595136 0.00 0.00 strcmp_iw
> 4.98 1145.97 72.64 34010 2.14 16.50 lookup_symbol_aux_psymtabs
> 4.78 1215.73 69.75 9584 7.28 8.12 fixup_section
> 4.46 1280.79 65.06 lbasename
> 3.42 1330.64 49.85 12183 4.09 4.09 lookup_partial_symtab
> 1.83 1357.34 26.70 33844664 0.00 0.00 strcmp_iw_ordered
> 1.68 1381.82 24.48 1288426966 0.00 0.00 symbol_natural_name
> 1.09 1397.71 15.89 1288426966 0.00 0.00 symbol_search_name
>
> Well then. Observations:
> - We're looking up partial symbols way too often.
> - It's taking way too long.
> - And oh my lord is that a lot of calls to the (fairly inefficient)
> strcmp_iw.
>
> It looks like only about three calls to find_pc_sect_psymtab per
> expression evaluation, which is bad but not too bad - sounds like
> a last-searched-item cache may be useful.
Some interesting facts:
- readelf reports no less than 7871 section headers in the vmlinux I'm
analyzing (this is 2.6.18): any attempt to traverse the list of sections
costs a *lot*
- even with the patch lookup-internal-first-3, it seems lookup_symbol_aux
is sometimes called for gdb script vars
Anyway, I followed your advice and added caching to find_pc_sect_symtab
and lookup_symbol_aux, which together divided execution time by 6 on my
benchmark. :-) See gprof output here (baseline is gdb 6.5 with
gdb-lookup-internal-first-3.patch):
http://siegfried.dnsalias.net/~saffroy/gdb-baseline.html
http://siegfried.dnsalias.net/~saffroy/gdb-symcache.html
http://siegfried.dnsalias.net/~saffroy/gdb-symcache-pc_cache.html
Obviously there is still room for improvement with lbasename.
For completeness I have to mention that gprof reports a completely wrong
count of seconds on my system (x86-64 box), but it seems there is only a
bad multiplier somewhere.
Incomplete patches are attached: they lack proper cache invalidation (eg.
on loading of a new object), but let's discuss the approach first.
Cheers,
--
saffroy@gmail.com
[-- Attachment #2: Type: TEXT/PLAIN, Size: 1306 bytes --]
Index: gdb-6.5-prof/gdb/symtab.c
===================================================================
--- gdb-6.5-prof.orig/gdb/symtab.c 2006-11-29 03:27:18.000000000 +0100
+++ gdb-6.5-prof/gdb/symtab.c 2006-11-29 03:27:20.000000000 +0100
@@ -2022,8 +2022,8 @@
/* Find the symtab associated with PC and SECTION. Look through the
psymtabs and read in another symtab if necessary. */
-struct symtab *
-find_pc_sect_symtab (CORE_ADDR pc, asection *section)
+static struct symtab *
+find_pc_sect_symtab_uncached (CORE_ADDR pc, asection *section)
{
struct block *b;
struct blockvector *bv;
@@ -2123,6 +2123,24 @@
return (s);
}
+struct symtab *
+find_pc_sect_symtab (CORE_ADDR pc, asection *section)
+{
+ static CORE_ADDR cached_pc;
+ static asection *cached_section;
+ static struct symtab *cached_symtab;
+ static int try_cache = 0;
+
+ if (try_cache && (cached_pc == pc) && (cached_section == section))
+ return cached_symtab;
+
+ try_cache = 1;
+ cached_pc = pc;
+ cached_section = section;
+ cached_symtab = find_pc_sect_symtab_uncached (pc, section);
+ return cached_symtab;
+}
+
/* Find the symtab associated with PC. Look through the psymtabs and
read in another symtab if necessary. Backward compatibility, no section */
[-- Attachment #3: Type: TEXT/PLAIN, Size: 6375 bytes --]
Index: gdb-6.5-prof/gdb/symtab.c
===================================================================
--- gdb-6.5-prof.orig/gdb/symtab.c 2006-11-27 03:01:22.000000000 +0100
+++ gdb-6.5-prof/gdb/symtab.c 2006-11-28 01:21:28.000000000 +0100
@@ -94,6 +94,20 @@
struct symtab **symtab);
static
+int symcache_lookup (const char *name,
+ const char *linkage_name,
+ const domain_enum domain,
+ struct symbol **sym,
+ struct symtab **symtab);
+
+static
+void symcache_add (struct symbol *sym,
+ const char *name,
+ const char *linkage_name,
+ const domain_enum domain,
+ struct symtab **symtab);
+
+static
struct symbol *lookup_symbol_aux_symtabs (int block_index,
const char *name,
const char *linkage_name,
@@ -1113,6 +1127,9 @@
}
}
+ if (symcache_lookup (name, linkage_name, domain, &sym, symtab))
+ return sym;
+
/* Now do whatever is appropriate for the current language to look
up static and global variables. */
@@ -1120,7 +1137,7 @@
block, domain,
symtab);
if (sym != NULL)
- return sym;
+ goto out;
/* Now search all static file-level symbols. Not strictly correct,
but more useful than an error. Do the symtabs first, then check
@@ -1131,16 +1148,19 @@
sym = lookup_symbol_aux_symtabs (STATIC_BLOCK, name, linkage_name,
domain, symtab);
if (sym != NULL)
- return sym;
-
+ goto out;
+
sym = lookup_symbol_aux_psymtabs (STATIC_BLOCK, name, linkage_name,
domain, symtab);
if (sym != NULL)
- return sym;
+ goto out;
if (symtab != NULL)
*symtab = NULL;
- return NULL;
+
+ out:
+ symcache_add (sym, name, linkage_name, domain, symtab);
+ return sym;
}
/* Check to see if the symbol is defined in BLOCK or its superiors.
@@ -1215,6 +1235,177 @@
return NULL;
}
+/* Check for a cached symbol lookup.
+ */
+
+struct symcache_entry {
+ const char *name;
+ const char *linkage_name;
+ domain_enum domain;
+ struct symbol *sym;
+ struct symtab *symtab;
+ struct symcache_entry *next, *prev; /* mru list */
+};
+
+/* max number of entries in symcache */
+#define SYMCACHE_SIZE 256
+
+/* fixed-size hash table of symcache entries */
+static htab_t symcache_hashtab;
+
+/* most recently accessed symcache entry */
+static struct symcache_entry *symcache_newest;
+
+static hashval_t
+symcache_hash(const void *e)
+{
+ struct symcache_entry *entry = (struct symcache_entry *)e;
+
+ return htab_hash_string (entry->name) + entry->domain;
+}
+
+static int
+symcache_eq(const void *e, const void *f)
+{
+ struct symcache_entry *a = (struct symcache_entry *)e;
+ struct symcache_entry *b = (struct symcache_entry *)f;
+
+ return ! strcmp (a->name, b->name)
+ // FIXME && !strcmp(a->linkage_name, b->linkage_name)
+ && (a->domain == b->domain);
+}
+
+static void
+symcache_list_add(struct symcache_entry *entry)
+{
+ if (symcache_newest == NULL)
+ {
+ /* init mru */
+ entry->prev = entry;
+ entry->next = entry;
+ symcache_newest = entry;
+ }
+ else
+ {
+ /* add entry at head of mru */
+ entry->next = symcache_newest;
+ entry->prev = symcache_newest->prev;
+ entry->prev->next = entry;
+ entry->next->prev = entry;
+ symcache_newest = entry;
+ }
+}
+
+static void
+symcache_list_del(struct symcache_entry *entry)
+{
+ /* remove entry from mru */
+ entry->prev->next = entry->next;
+ entry->next->prev = entry->prev;
+}
+
+static void
+symcache_del(void *e)
+{
+ struct symcache_entry *entry = (struct symcache_entry *)e;
+
+ symcache_list_del (entry);
+ free (entry->name);
+ free (entry->linkage_name);
+ free (entry);
+}
+
+static void
+symcache_add (struct symbol *sym,
+ const char *name,
+ const char *linkage_name,
+ const domain_enum domain,
+ struct symtab **symtab)
+{
+ struct symcache_entry request, *entry, **slot;
+
+ /* initialize on first use */
+ if (!symcache_hashtab)
+ symcache_hashtab = htab_create_alloc (SYMCACHE_SIZE,
+ symcache_hash,
+ symcache_eq,
+ symcache_del,
+ xcalloc,
+ free);
+
+ /* sym can be known, but with unknown symtab: add it if possible */
+ request.name = name;
+ request.linkage_name = linkage_name;
+ request.domain = domain;
+ entry = htab_find (symcache_hashtab, &request);
+ if (entry != HTAB_EMPTY_ENTRY)
+ {
+ if (symtab != NULL)
+ entry->symtab = *symtab;
+ return;
+ }
+
+ /* when cache is full, remove oldest element */
+ if (htab_elements (symcache_hashtab) == SYMCACHE_SIZE)
+ {
+ struct symcache_entry *oldest;
+
+ oldest = symcache_newest->prev;
+ htab_remove_elt (symcache_hashtab, oldest);
+ }
+
+ /* this is really a new entry */
+ entry = xcalloc (1, sizeof(*entry));
+ entry->name = strdup (name);
+ entry->linkage_name = linkage_name ? strdup (linkage_name) : NULL;
+ entry->domain = domain;
+ entry->sym = sym;
+ if (symtab != NULL)
+ entry->symtab = *symtab;
+ symcache_list_add (entry);
+
+ slot = htab_find_slot (symcache_hashtab, entry, INSERT);
+ *slot = entry;
+}
+
+static int
+symcache_lookup (const char *name,
+ const char *linkage_name,
+ const domain_enum domain,
+ struct symbol **sym,
+ struct symtab **symtab)
+{
+ struct symcache_entry request, *entry;
+
+ if (!symcache_hashtab)
+ return 0;
+
+ request.name = name;
+ request.linkage_name = linkage_name;
+ request.domain = domain;
+ entry = htab_find (symcache_hashtab, &request);
+ if (entry == HTAB_EMPTY_ENTRY)
+ return 0;
+
+ if (symtab != NULL)
+ {
+ if (entry->symtab == NULL)
+ /* symtab is requested, but cached entry does not have it */
+ return 0;
+ *symtab = entry->symtab;
+ }
+ *sym = entry->sym;
+
+ /* keep mru list sorted */
+ if (entry != symcache_newest)
+ {
+ symcache_list_del (entry);
+ symcache_list_add (entry);
+ }
+
+ return 1;
+}
+
/* Check to see if the symbol is defined in one of the symtabs.
BLOCK_INDEX should be either GLOBAL_BLOCK or STATIC_BLOCK,
depending on whether or not we want to search global symbols or
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-11-29 2:47 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-10-05 12:16 gdb script performance Jean-Marc Saffroy
2006-10-05 12:59 ` Frederic RISS
2006-10-05 13:28 ` Jean-Marc Saffroy
2006-10-05 13:32 ` Daniel Jacobowitz
2006-10-05 15:31 ` Jean-Marc Saffroy
2006-10-05 15:37 ` Daniel Jacobowitz
2006-11-29 2:47 ` Jean-Marc Saffroy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox