From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 10007 invoked by alias); 4 Apr 2005 02:37:06 -0000 Mailing-List: contact gdb-patches-help@sources.redhat.com; run by ezmlm Precedence: bulk List-Subscribe: List-Archive: List-Post: List-Help: , Sender: gdb-patches-owner@sources.redhat.com Received: (qmail 9825 invoked from network); 4 Apr 2005 02:36:55 -0000 Received: from unknown (HELO md0.mail.umd.edu) (128.8.31.162) by sourceware.org with SMTP; 4 Apr 2005 02:36:55 -0000 Received: from teqdruid.student.umd.edu (teqdruid.student.umd.edu [129.2.222.235]) by md0.mail.umd.edu (MOS 3.5.6-GR) with ESMTP id ASR62159 (AUTH teqdruid); Sun, 3 Apr 2005 22:36:54 -0400 (EDT) Subject: D Symbol Demangling From: John Demme To: gdb-patches@sources.redhat.com Content-Type: multipart/mixed; boundary="=-1HgnzB0JUNUef0erjUfM" Date: Mon, 04 Apr 2005 02:37:00 -0000 Message-Id: <1112582221.14153.32.camel@localhost.localdomain> Mime-Version: 1.0 X-SW-Source: 2005-04/txt/msg00039.txt.bz2 --=-1HgnzB0JUNUef0erjUfM Content-Type: text/plain Content-Transfer-Encoding: 7bit Content-length: 1130 Greetings GDB hackers! I'm new to GDB programming, so please excuse any stupid questions. There is a language called D which, like C++, uses symbol mangling since it supports things such as method overloading. I've been attempting to add support to GDB to demangle the names. Thus far, I've had partial success. In fairly simple D programs, my demangling works, but in more complex programs with mixed C and D code (D is link-compatible with C) it only calls the D demangler for some of the functions. Unfortunately, I'm not very familiar with GDB's architecture, so I've been basically wandering around in the dark on this one. Any help you could provide would be appreciated. Attached is my patch against GDB 6.3. It is not the cleanest code right now, and certainly not anywhere near ready for a release... I'm just trying to get it to work, and feeling out the GDB code. Once it's working, I'll refactor it into something decent. Thanks John Demme BTW- more information about D can be found at http://www.digitalmars.com/d and there is a forum for D GDB patches at http://www.dsource.org/forums/viewforum.php?f=58 --=-1HgnzB0JUNUef0erjUfM Content-Disposition: attachment; filename=d.patch Content-Type: text/x-patch; name=d.patch; charset=UTF-8 Content-Transfer-Encoding: 7bit Content-length: 12642 Index: gdb/symtab.c =================================================================== --- gdb/symtab.c (revision 7) +++ gdb/symtab.c (working copy) @@ -42,6 +42,7 @@ #include "filenames.h" /* for FILENAME_CMP */ #include "objc-lang.h" #include "ada-lang.h" +#include "c-lang.h" #include "hashtab.h" @@ -395,7 +396,7 @@ return (mangled_name); } - + /* Initialize the language dependent portion of a symbol depending upon the language for the symbol. */ void @@ -404,6 +405,7 @@ { gsymbol->language = language; if (gsymbol->language == language_cplus + || gsymbol->language == language_d || gsymbol->language == language_java || gsymbol->language == language_objc) { @@ -450,6 +452,15 @@ if (gsymbol->language == language_unknown) gsymbol->language = language_auto; + if (gsymbol->language == language_d + || gsymbol->language == language_auto) { + demangled = d_demangle(mangled, 0); + if (demangled != NULL) { + gsymbol->language = language_d; + return demangled; + } + } + if (gsymbol->language == language_objc || gsymbol->language == language_auto) { @@ -609,6 +620,7 @@ demangled = symbol_find_demangled_name (gsymbol, mangled); if (gsymbol->language == language_cplus + || gsymbol->language == language_d || gsymbol->language == language_java || gsymbol->language == language_objc) { @@ -638,6 +650,7 @@ switch (gsymbol->language) { case language_cplus: + case language_d: case language_java: case language_objc: if (gsymbol->language_specific.cplus_specific.demangled_name != NULL) @@ -663,6 +676,7 @@ switch (gsymbol->language) { case language_cplus: + case language_d: case language_java: case language_objc: if (gsymbol->language_specific.cplus_specific.demangled_name != NULL) @@ -1020,7 +1034,7 @@ modified_name = name; - /* If we are using C++ or Java, demangle the name before doing a lookup, so + /* If we are using C++, D, or Java, demangle the name before doing a lookup, so we can always binary search. */ if (current_language->la_language == language_cplus) { @@ -1032,6 +1046,16 @@ needtofreename = 1; } } + else if (current_language->la_language == language_d) + { + demangled_name = d_demangle (name, 0); + if (demangled_name) + { + mangled_name = name; + modified_name = demangled_name; + needtofreename = 1; + } + } else if (current_language->la_language == language_java) { demangled_name = cplus_demangle (name, Index: gdb/c-lang.c =================================================================== --- gdb/c-lang.c (revision 7) +++ gdb/c-lang.c (working copy) @@ -696,9 +696,214 @@ a language currently not supported by GDB. */ const struct language_defn minimal_language_defn = + { + "minimal", /* Language name */ + language_minimal, + NULL, + range_check_off, + type_check_off, + case_sensitive_on, + array_row_major, + &exp_descriptor_standard, + c_preprocess_and_parse, + c_error, + null_post_parser, + c_printchar, /* Print a character constant */ + c_printstr, /* Function to print string constant */ + c_emit_char, /* Print a single char */ + c_create_fundamental_type, /* Create fundamental type in this language */ + c_print_type, /* Print a type using appropriate syntax */ + c_val_print, /* Print a value using appropriate syntax */ + c_value_print, /* Print a top-level value */ + NULL, /* Language specific skip_trampoline */ + NULL, /* value_of_this */ + basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */ + basic_lookup_transparent_type,/* lookup_transparent_type */ + NULL, /* Language specific symbol demangler */ + NULL, /* Language specific class_name_from_physname */ + c_op_print_tab, /* expression operators for printing */ + 1, /* c-style arrays */ + 0, /* String lower bound */ + NULL, + default_word_break_characters, + c_language_arch_info, + LANG_MAGIC +}; + + +/***************************** + D Language stuff +******************************/ +#include +#include + +static int extractidentifiers(char** output, char** mangled) { + int i = -1; + while (isdigit(**mangled)) { + i = strtol(*mangled, mangled, 10); + if (strlen(*mangled) < i) + return -1; + memcpy(*output, *mangled, i); + *mangled += i; + *output += i + 1; + (*output)[-1] = '.'; + } + if (**mangled == '\0' || i == -1) + return -1; + (*output)--; + return 1; +} + +static void append(char** dest, char* src) { + int i = strlen(src); + for(;i>0; i--) { + *(*dest)++ = *src++; + } +} + +static int extracttypeinfo(char** dest, char** id) { + if (**id == '\0') + return -1; + // Extract the type info: + switch (*(*id)++) { + // array, static array, dynamic array: + case 'A': case 'G': case 'H': + if (extracttypeinfo(dest, id) == -1) + return -1; + append(dest, "[]"); + return 1; + // pointer: + case 'P': + if (extracttypeinfo(dest, id) == -1) + return -1; + append(dest, "*"); + return 1; + // reference: + case 'R': + if (extracttypeinfo(dest, id) == -1) + return -1; + append(dest, "&"); + return 1; + // return value: + case 'Z': + return extracttypeinfo(dest, id); + // out: + case 'J': + append(dest, "out "); + return extracttypeinfo(dest, id); + // inout: + case 'K': + append(dest, "inout "); + return extracttypeinfo(dest, id); + + // enum: + case 'E': case 'T': case 'D': case 'C': case 'S': case 'I': + return extractidentifiers(dest, id); + + // basic types: + case 'n': append(dest, "none"); return 1; // ever used? + case 'v': append(dest, "void"); return 1; + case 'g': append(dest, "byte"); return 1; + case 'h': append(dest, "ubyte"); return 1; + case 's': append(dest, "short"); return 1; + case 't': append(dest, "ushort"); return 1; + case 'i': append(dest, "int"); return 1; + case 'k': append(dest, "uint"); return 1; + case 'l': append(dest, "long"); return 1; + case 'm': append(dest, "ulong"); return 1; + case 'f': append(dest, "float"); return 1; + case 'd': append(dest, "double"); return 1; + case 'e': append(dest, "real"); return 1; + + // imaginary and complex: + case 'o': append(dest, "ifloat"); return 1; + case 'p': append(dest, "idouble"); return 1; + case 'j': append(dest, "ireal"); return 1; + case 'q': append(dest, "cfloat"); return 1; + case 'r': append(dest, "cdouble"); return 1; + case 'c': append(dest, "creal"); return 1; + + // other types: + case 'b': append(dest, "bit"); return 1; + case 'a': append(dest, "char"); return 1; + case 'u': append(dest, "wchar"); return 1; + case 'w': append(dest, "dchar"); return 1; + + // typeinfo, error, instance: + case '@': return extractidentifiers(dest, id); // BUG: is this right? + + default: append(dest, "unknown"); return 1; + } +} + +char* d_demangle(const char* mangled, int options) { + char *symbol = mangled; + char *output = malloc(strlen(mangled)+20), *orig = output; + unsigned char isFunc = 0; + if (mangled == NULL) { + free(output); + return NULL; + } else if (strcmp(mangled, "_Dmain") == 0) { + free(output); + return strdup("D main"); + } + if (symbol == strstr(symbol, "_D")) { + symbol += 2; + isFunc = 1; + } else if (symbol == strstr(symbol, "__Class_")) { + symbol += 8; + } else if (symbol == strstr(symbol, "__init_")) { + symbol += 7; + } else if (symbol == strstr(symbol, "__vtbl_")) { + symbol += 7; + } else if (symbol == strstr(symbol, "__modctor_")) { + symbol += 10; + } else if (symbol == strstr(symbol, "__moddtor_")) { + symbol += 10; + } else if (symbol == strstr(symbol, "__ModuleInfo_")) { + symbol += 13; + } else { + free(orig); + return NULL; + } + + if (extractidentifiers(&output, &symbol) < 0) { + free(orig); + return NULL; + } + append(&output, "("); + if (isFunc == 1 && *symbol == 'F') { + symbol++; + while (*symbol != '\0' && *symbol != 'Z') { + if (isFunc == 1) { + isFunc++; + } else { + append(&output, ", "); + } + if (extracttypeinfo(&output, &symbol) < 0) { + free(orig); + return NULL; + } + } + } + append(&output, ")"); + + //Doesn't display the return type, but wouldn't be too hard to do. + + *output = '\0'; + output = strdup(orig); + free(orig); + return output; +} + +char* d_sym_demangle(const struct general_symbol_info *gsymbol) { + return d_demangle(gsymbol->name, 0); +} + +const struct language_defn d_language_defn = { - "minimal", /* Language name */ - language_minimal, + "d", /* Language name */ + language_d, NULL, range_check_off, type_check_off, @@ -719,7 +924,7 @@ NULL, /* value_of_this */ basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */ basic_lookup_transparent_type,/* lookup_transparent_type */ - NULL, /* Language specific symbol demangler */ + d_demangle, /* Language specific symbol demangler */ NULL, /* Language specific class_name_from_physname */ c_op_print_tab, /* expression operators for printing */ 1, /* c-style arrays */ @@ -733,7 +938,8 @@ void _initialize_c_language (void) { - add_language (&c_language_defn); + add_language (&c_language_defn); + add_language (&d_language_defn); add_language (&cplus_language_defn); add_language (&asm_language_defn); add_language (&minimal_language_defn); Index: gdb/language.c =================================================================== --- gdb/language.c (revision 7) +++ gdb/language.c (working copy) @@ -553,6 +553,7 @@ { case language_c: case language_cplus: + case language_d: case language_objc: if (TYPE_CODE (t1) == TYPE_CODE_FLT) return TYPE_CODE (t2) == TYPE_CODE_FLT && l2 > l1 ? @@ -664,6 +665,7 @@ { case language_c: case language_cplus: + case language_d: case language_objc: return (TYPE_CODE (type) != TYPE_CODE_INT) && (TYPE_CODE (type) != TYPE_CODE_ENUM) ? 0 : 1; @@ -704,6 +706,7 @@ case language_c: case language_cplus: + case language_d: case language_objc: return (TYPE_CODE (type) == TYPE_CODE_INT) && TYPE_LENGTH (type) == sizeof (char) @@ -726,6 +729,7 @@ case language_c: case language_cplus: + case language_d: case language_objc: /* C does not have distinct string type. */ return (0); @@ -745,6 +749,7 @@ { case language_c: case language_cplus: + case language_d: case language_objc: /* Might be more cleanly handled by having a TYPE_CODE_INT_NOT_BOOL for (the deleted) CHILL and such @@ -818,6 +823,7 @@ } return builtin_type_f_logical_s2; case language_cplus: + case language_d: case language_pascal: if (current_language->la_language==language_cplus) {sym = lookup_symbol ("bool", NULL, VAR_DOMAIN, NULL, NULL);} Index: gdb/c-lang.h =================================================================== --- gdb/c-lang.h (revision 7) +++ gdb/c-lang.h (working copy) @@ -28,6 +28,7 @@ #include "value.h" #include "macroexp.h" +#include "symtab.h" extern int c_parse (void); /* Defined in c-exp.y */ @@ -90,4 +91,13 @@ extern int cp_is_vtbl_member (struct type *); +/***************************** + D Language stuff +******************************/ + +char* d_demangle(const char* mangled, int options); + +char* d_sym_demangle(const struct general_symbol_info *gsymbol); + + #endif /* !defined (C_LANG_H) */ Index: gdb/defs.h =================================================================== --- gdb/defs.h (revision 7) +++ gdb/defs.h (working copy) @@ -190,6 +190,7 @@ language_auto, /* Placeholder for automatic setting */ language_c, /* C */ language_cplus, /* C++ */ + language_d, /* D */ language_objc, /* Objective-C */ language_java, /* Java */ language_fortran, /* Fortran */ Index: gdb/symfile.c =================================================================== --- gdb/symfile.c (revision 7) +++ gdb/symfile.c (working copy) @@ -2169,6 +2169,7 @@ filename_language_table = xmalloc (fl_table_size * sizeof (*filename_language_table)); add_filename_language (".c", language_c); + add_filename_language (".d", language_d); add_filename_language (".C", language_cplus); add_filename_language (".cc", language_cplus); add_filename_language (".cp", language_cplus); --=-1HgnzB0JUNUef0erjUfM--