Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed
From: John Demme <me@teqdruid.com>
To: gdb-patches@sources.redhat.com
Subject: D Symbol Demangling
Date: Mon, 04 Apr 2005 02:37:00 -0000	[thread overview]
Message-ID: <1112582221.14153.32.camel@localhost.localdomain> (raw)

[-- Attachment #1: Type: text/plain, Size: 1130 bytes --]

Greetings GDB hackers!

I'm new to GDB programming, so please excuse any stupid questions.

There is a language called D which, like C++, uses symbol mangling since
it supports things such as method overloading.  I've been attempting to
add support to GDB to demangle the names.

Thus far, I've had partial success.  In fairly simple D programs, my
demangling works, but in more complex programs with mixed C and D code
(D is link-compatible with C) it only calls the D demangler for some of
the functions.

Unfortunately, I'm not very familiar with GDB's architecture, so I've
been basically wandering around in the dark on this one. Any help you
could provide would be appreciated.

Attached is my patch against GDB 6.3.  It is not the cleanest code right
now, and certainly not anywhere near ready for a release... I'm just
trying to get it to work, and feeling out the GDB code.  Once it's
working, I'll refactor it into something decent.

Thanks
John Demme

BTW- more information about D can be found at
http://www.digitalmars.com/d and there is a forum for D GDB patches at
http://www.dsource.org/forums/viewforum.php?f=58


[-- Attachment #2: d.patch --]
[-- Type: text/x-patch, Size: 12642 bytes --]

Index: gdb/symtab.c
===================================================================
--- gdb/symtab.c	(revision 7)
+++ gdb/symtab.c	(working copy)
@@ -42,6 +42,7 @@
 #include "filenames.h"		/* for FILENAME_CMP */
 #include "objc-lang.h"
 #include "ada-lang.h"
+#include "c-lang.h"
 
 #include "hashtab.h"
 
@@ -395,7 +396,7 @@
   return (mangled_name);
 }
 
-\f
+
 /* Initialize the language dependent portion of a symbol
    depending upon the language for the symbol. */
 void
@@ -404,6 +405,7 @@
 {
   gsymbol->language = language;
   if (gsymbol->language == language_cplus
+      || gsymbol->language == language_d
       || gsymbol->language == language_java
       || gsymbol->language == language_objc)
     {
@@ -450,6 +452,15 @@
   if (gsymbol->language == language_unknown)
     gsymbol->language = language_auto;
 
+  if (gsymbol->language == language_d
+      || gsymbol->language == language_auto) {
+    demangled = d_demangle(mangled, 0);
+    if (demangled != NULL) {
+      gsymbol->language = language_d;
+      return demangled;
+    }
+  }
+
   if (gsymbol->language == language_objc
       || gsymbol->language == language_auto)
     {
@@ -609,6 +620,7 @@
 
   demangled = symbol_find_demangled_name (gsymbol, mangled);
   if (gsymbol->language == language_cplus
+      || gsymbol->language == language_d
       || gsymbol->language == language_java
       || gsymbol->language == language_objc)
     {
@@ -638,6 +650,7 @@
   switch (gsymbol->language) 
     {
     case language_cplus:
+    case language_d:
     case language_java:
     case language_objc:
       if (gsymbol->language_specific.cplus_specific.demangled_name != NULL)
@@ -663,6 +676,7 @@
   switch (gsymbol->language) 
     {
     case language_cplus:
+    case language_d:
     case language_java:
     case language_objc:
       if (gsymbol->language_specific.cplus_specific.demangled_name != NULL)
@@ -1020,7 +1034,7 @@
 
   modified_name = name;
 
-  /* If we are using C++ or Java, demangle the name before doing a lookup, so
+  /* If we are using C++, D, or Java, demangle the name before doing a lookup, so
      we can always binary search. */
   if (current_language->la_language == language_cplus)
     {
@@ -1032,6 +1046,16 @@
 	  needtofreename = 1;
 	}
     }
+  else if (current_language->la_language == language_d)
+    {
+      demangled_name = d_demangle (name, 0);
+      if (demangled_name)
+	{
+	  mangled_name = name;
+	  modified_name = demangled_name;
+	  needtofreename = 1;
+	}
+    }
   else if (current_language->la_language == language_java)
     {
       demangled_name = cplus_demangle (name, 
Index: gdb/c-lang.c
===================================================================
--- gdb/c-lang.c	(revision 7)
+++ gdb/c-lang.c	(working copy)
@@ -696,9 +696,214 @@
    a language currently not supported by GDB.  */
 
 const struct language_defn minimal_language_defn =
+ {
+   "minimal",			/* Language name */
+   language_minimal,
+   NULL,
+   range_check_off,
+   type_check_off,
+   case_sensitive_on,
+   array_row_major,
+   &exp_descriptor_standard,
+   c_preprocess_and_parse,
+   c_error,
+   null_post_parser,
+   c_printchar,			/* Print a character constant */
+   c_printstr,			/* Function to print string constant */
+   c_emit_char,			/* Print a single char */
+   c_create_fundamental_type,	/* Create fundamental type in this language */
+   c_print_type,			/* Print a type using appropriate syntax */
+   c_val_print,			/* Print a value using appropriate syntax */
+   c_value_print,		/* Print a top-level value */
+   NULL,				/* Language specific skip_trampoline */
+   NULL,				/* value_of_this */
+  basic_lookup_symbol_nonlocal,	/* lookup_symbol_nonlocal */
+  basic_lookup_transparent_type,/* lookup_transparent_type */
+  NULL,				/* Language specific symbol demangler */
+  NULL,				/* Language specific class_name_from_physname */
+  c_op_print_tab,		/* expression operators for printing */
+  1,				/* c-style arrays */
+  0,				/* String lower bound */
+  NULL,
+  default_word_break_characters,
+  c_language_arch_info,
+  LANG_MAGIC
+};
+
+
+/*****************************
+ D Language stuff
+******************************/
+#include <string.h>
+#include <ctype.h>
+
+static int extractidentifiers(char** output, char** mangled) {
+  int i = -1;
+  while (isdigit(**mangled)) {
+    i = strtol(*mangled, mangled, 10);
+    if (strlen(*mangled) < i)
+      return -1;
+    memcpy(*output, *mangled, i);
+    *mangled += i;
+    *output += i + 1;
+    (*output)[-1] = '.';
+  }
+  if (**mangled == '\0' || i == -1)
+    return -1;
+  (*output)--;
+  return 1;
+}
+
+static void append(char** dest, char* src) {
+  int i = strlen(src);
+  for(;i>0; i--) {
+    *(*dest)++ = *src++;
+  }
+}
+
+static int extracttypeinfo(char** dest, char** id) {
+  if (**id == '\0')
+    return -1;
+  // Extract the type info:
+  switch (*(*id)++) {
+    // array, static array, dynamic array:
+  case 'A': case 'G': case 'H':
+    if (extracttypeinfo(dest, id) == -1)
+      return -1;
+    append(dest, "[]");
+    return 1;
+    // pointer:
+  case 'P':
+    if (extracttypeinfo(dest, id) == -1)
+      return -1;
+    append(dest, "*");
+    return 1;
+    // reference:
+  case 'R':
+    if (extracttypeinfo(dest, id) == -1)
+      return -1;
+    append(dest, "&");
+    return 1;
+    // return value:
+  case 'Z':
+    return extracttypeinfo(dest, id);
+    // out:
+  case 'J':
+    append(dest, "out ");
+    return extracttypeinfo(dest, id);
+    // inout:
+  case 'K':
+    append(dest, "inout ");
+    return extracttypeinfo(dest, id);
+    
+    // enum:
+  case 'E': case 'T': case 'D': case 'C': case 'S': case 'I':
+    return extractidentifiers(dest, id);
+    
+    // basic types:
+  case 'n': append(dest, "none"); return 1;  // ever used?
+  case 'v': append(dest, "void"); return 1;
+  case 'g': append(dest, "byte"); return 1;
+  case 'h': append(dest, "ubyte"); return 1;
+  case 's': append(dest, "short"); return 1;
+  case 't': append(dest, "ushort"); return 1;
+  case 'i': append(dest, "int"); return 1;
+  case 'k': append(dest, "uint"); return 1;
+  case 'l': append(dest, "long"); return 1;
+  case 'm': append(dest, "ulong"); return 1;
+  case 'f': append(dest, "float"); return 1;
+  case 'd': append(dest, "double"); return 1;
+  case 'e': append(dest, "real"); return 1;
+
+  // imaginary and complex:
+  case 'o': append(dest, "ifloat"); return 1;
+  case 'p': append(dest, "idouble"); return 1;
+  case 'j': append(dest, "ireal"); return 1;
+  case 'q': append(dest, "cfloat"); return 1;
+  case 'r': append(dest, "cdouble"); return 1;
+  case 'c': append(dest, "creal"); return 1;
+
+  // other types:
+  case 'b': append(dest, "bit"); return 1;
+  case 'a': append(dest, "char"); return 1;
+  case 'u': append(dest, "wchar"); return 1;
+  case 'w': append(dest, "dchar"); return 1;
+
+  // typeinfo, error, instance:
+  case '@': return extractidentifiers(dest, id); // BUG: is this right?
+
+  default: append(dest, "unknown"); return 1;
+  }
+}
+
+char* d_demangle(const char* mangled, int options) {
+  char *symbol = mangled;
+  char *output = malloc(strlen(mangled)+20), *orig = output;
+  unsigned char isFunc = 0;
+  if (mangled == NULL) {
+    free(output);
+    return NULL;
+  } else if (strcmp(mangled, "_Dmain") == 0) {
+    free(output);
+    return strdup("D main");
+  }
+  if (symbol == strstr(symbol, "_D")) {
+    symbol += 2;
+    isFunc = 1;
+  } else if (symbol == strstr(symbol, "__Class_")) {
+    symbol += 8;
+  } else if (symbol == strstr(symbol, "__init_")) {
+    symbol += 7;
+  } else if (symbol == strstr(symbol, "__vtbl_")) {
+    symbol += 7;
+  } else if (symbol == strstr(symbol, "__modctor_")) {
+    symbol += 10;
+  } else if (symbol == strstr(symbol, "__moddtor_")) {
+    symbol += 10;
+  } else if (symbol == strstr(symbol, "__ModuleInfo_")) {
+    symbol += 13;
+  } else {
+    free(orig);
+    return NULL;
+  }
+
+  if (extractidentifiers(&output, &symbol) < 0) {
+    free(orig);
+    return NULL;
+  }
+  append(&output, "(");
+  if (isFunc == 1 && *symbol == 'F') {
+    symbol++;
+    while (*symbol != '\0' && *symbol != 'Z') {
+      if (isFunc == 1) {
+	isFunc++;
+      } else {
+	append(&output, ", ");
+      }
+      if (extracttypeinfo(&output, &symbol) < 0) {
+	free(orig);
+	return NULL;
+      }
+    }
+  }
+  append(&output, ")");
+
+  //Doesn't display the return type, but wouldn't be too hard to do.
+  
+  *output = '\0';
+  output = strdup(orig);
+  free(orig);
+  return output;
+}
+
+char* d_sym_demangle(const struct general_symbol_info *gsymbol) {
+  return d_demangle(gsymbol->name, 0);
+}
+
+const struct language_defn d_language_defn =
 {
-  "minimal",			/* Language name */
-  language_minimal,
+  "d",				/* Language name */
+  language_d,
   NULL,
   range_check_off,
   type_check_off,
@@ -719,7 +924,7 @@
   NULL,				/* value_of_this */
   basic_lookup_symbol_nonlocal,	/* lookup_symbol_nonlocal */
   basic_lookup_transparent_type,/* lookup_transparent_type */
-  NULL,				/* Language specific symbol demangler */
+  d_demangle,			/* Language specific symbol demangler */
   NULL,				/* Language specific class_name_from_physname */
   c_op_print_tab,		/* expression operators for printing */
   1,				/* c-style arrays */
@@ -733,7 +938,8 @@
 void
 _initialize_c_language (void)
 {
-  add_language (&c_language_defn);
+  add_language (&c_language_defn); 
+  add_language (&d_language_defn);
   add_language (&cplus_language_defn);
   add_language (&asm_language_defn);
   add_language (&minimal_language_defn);
Index: gdb/language.c
===================================================================
--- gdb/language.c	(revision 7)
+++ gdb/language.c	(working copy)
@@ -553,6 +553,7 @@
     {
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       if (TYPE_CODE (t1) == TYPE_CODE_FLT)
 	return TYPE_CODE (t2) == TYPE_CODE_FLT && l2 > l1 ?
@@ -664,6 +665,7 @@
     {
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       return (TYPE_CODE (type) != TYPE_CODE_INT) &&
 	(TYPE_CODE (type) != TYPE_CODE_ENUM) ? 0 : 1;
@@ -704,6 +706,7 @@
 
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       return (TYPE_CODE (type) == TYPE_CODE_INT) &&
 	TYPE_LENGTH (type) == sizeof (char)
@@ -726,6 +729,7 @@
 
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       /* C does not have distinct string type. */
       return (0);
@@ -745,6 +749,7 @@
     {
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       /* Might be more cleanly handled by having a
          TYPE_CODE_INT_NOT_BOOL for (the deleted) CHILL and such
@@ -818,6 +823,7 @@
 	}
       return builtin_type_f_logical_s2;
     case language_cplus:
+    case language_d:
     case language_pascal:
       if (current_language->la_language==language_cplus)
         {sym = lookup_symbol ("bool", NULL, VAR_DOMAIN, NULL, NULL);}
Index: gdb/c-lang.h
===================================================================
--- gdb/c-lang.h	(revision 7)
+++ gdb/c-lang.h	(working copy)
@@ -28,6 +28,7 @@
 
 #include "value.h"
 #include "macroexp.h"
+#include "symtab.h"
 
 
 extern int c_parse (void);	/* Defined in c-exp.y */
@@ -90,4 +91,13 @@
 extern int cp_is_vtbl_member (struct type *);
 
 
+/*****************************
+ D Language stuff
+******************************/
+
+char* d_demangle(const char* mangled, int options);
+
+char* d_sym_demangle(const struct general_symbol_info *gsymbol);
+
+
 #endif /* !defined (C_LANG_H) */
Index: gdb/defs.h
===================================================================
--- gdb/defs.h	(revision 7)
+++ gdb/defs.h	(working copy)
@@ -190,6 +190,7 @@
     language_auto,		/* Placeholder for automatic setting */
     language_c,			/* C */
     language_cplus,		/* C++ */
+    language_d,                 /* D */
     language_objc,		/* Objective-C */
     language_java,		/* Java */
     language_fortran,		/* Fortran */
Index: gdb/symfile.c
===================================================================
--- gdb/symfile.c	(revision 7)
+++ gdb/symfile.c	(working copy)
@@ -2169,6 +2169,7 @@
       filename_language_table =
 	xmalloc (fl_table_size * sizeof (*filename_language_table));
       add_filename_language (".c", language_c);
+      add_filename_language (".d", language_d);
       add_filename_language (".C", language_cplus);
       add_filename_language (".cc", language_cplus);
       add_filename_language (".cp", language_cplus);

             reply	other threads:[~2005-04-04  2:37 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-04  2:37 John Demme [this message]
2005-04-04 18:21 ` Michael Snyder
2005-04-04 20:44   ` John Demme
2005-04-04 20:47     ` Daniel Jacobowitz
2005-04-04 21:49     ` Michael Snyder
2005-04-04 22:39       ` John Demme
     [not found]       ` <1112654359.14153.50.camel@localhost.localdomain>
     [not found]         ` <4251CF00.5080002@redhat.com>
2005-04-08 16:47           ` John Demme
2005-04-08 16:52             ` Daniel Jacobowitz
2005-04-08 20:50               ` John Demme
2005-04-08 21:11                 ` Daniel Jacobowitz
2006-04-19 11:18 Thomas Kuehne
2006-04-20 13:20 ` Daniel Jacobowitz
2006-04-21 21:25   ` Thomas Kühne
2006-04-22 22:52     ` Thomas Kühne
2006-04-24 17:21       ` Jim Blandy
2006-04-24 20:53         ` Daniel Jacobowitz
2006-04-25  3:35           ` Eli Zaretskii
2006-04-25 14:13             ` DJ Delorie
2006-04-29  7:23               ` Thomas Kühne
2006-04-29 16:47                 ` DJ Delorie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1112582221.14153.32.camel@localhost.localdomain \
    --to=me@teqdruid.com \
    --cc=gdb-patches@sources.redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox