From: Tom Tromey <tom@tromey.com>
To: gdb-patches@sourceware.org
Cc: Tom Tromey <tom@tromey.com>
Subject: [PATCH v2 02/28] Change ada_decode to preserve upper-case in some situations
Date: Wed, 02 Apr 2025 17:45:01 -0600 [thread overview]
Message-ID: <20250402-search-in-psyms-v2-2-ea91704487cb@tromey.com> (raw)
In-Reply-To: <20250402-search-in-psyms-v2-0-ea91704487cb@tromey.com>
This patch is needed to avoid regressions later in the series.
The issue here is that ada_decode, when called with wide=false, would
act as though the input needed verbatim quoting. That would happen
because the 'W' character would be passed through; and then a later
loop would reject the result due to that character.
Similarly, with operators=false the upper-case-checking loop would be
skipped, but then some names that did need verbatim quoting would pass
through.
Furthermore I noticed that there isn't a need to distinguish between
the "wide" and "operators" cases -- all callers pass identical values
to both.
This patch cleans up the above, consolidating the parameters and
changing how upper-case detection is handled, so that both the
operator and wide cases pass-through without issue. I've added new
unit tests for this.
---
gdb/ada-lang.c | 83 +++++++++++++++++++++++++++++------------
gdb/ada-lang.h | 15 +++-----
gdb/dwarf2/cooked-index-shard.c | 2 +-
gdb/symtab.h | 2 +-
4 files changed, 68 insertions(+), 34 deletions(-)
diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
index a55ee12ce70d02082e64d85634b87dd27f5a0670..4bb6a808fd8c1a7f8e4b2344fdf935f94c602ed1 100644
--- a/gdb/ada-lang.c
+++ b/gdb/ada-lang.c
@@ -1308,7 +1308,7 @@ convert_from_hex_encoded (std::string &out, const char *str, int n)
/* See ada-lang.h. */
std::string
-ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
+ada_decode (const char *encoded, bool wrap, bool translate)
{
int i;
int len0;
@@ -1403,7 +1403,7 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
while (i < len0)
{
/* Is this a symbol function? */
- if (operators && at_start_name && encoded[i] == 'O')
+ if (at_start_name && encoded[i] == 'O')
{
int k;
@@ -1414,7 +1414,10 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
op_len - 1) == 0)
&& !isalnum (encoded[i + op_len]))
{
- decoded.append (ada_opname_table[k].decoded);
+ if (translate)
+ decoded.append (ada_opname_table[k].decoded);
+ else
+ decoded.append (ada_opname_table[k].encoded);
at_start_name = 0;
i += op_len;
break;
@@ -1502,28 +1505,59 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
i++;
}
- if (wide && i < len0 + 3 && encoded[i] == 'U' && isxdigit (encoded[i + 1]))
+ /* Handle wide characters while respecting the arguments to the
+ function: we may want to copy them verbatim, but in this case
+ we do not want to register that we've copied an upper-case
+ character. */
+ if (i < len0 + 3 && encoded[i] == 'U' && isxdigit (encoded[i + 1]))
{
- if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2))
+ if (translate)
{
- i += 3;
+ if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2))
+ {
+ i += 3;
+ continue;
+ }
+ }
+ else
+ {
+ decoded.push_back (encoded[i]);
+ ++i;
continue;
}
}
- else if (wide && i < len0 + 5 && encoded[i] == 'W' && isxdigit (encoded[i + 1]))
+ else if (i < len0 + 5 && encoded[i] == 'W' && isxdigit (encoded[i + 1]))
{
- if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4))
+ if (translate)
+ {
+ if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4))
+ {
+ i += 5;
+ continue;
+ }
+ }
+ else
{
- i += 5;
+ decoded.push_back (encoded[i]);
+ ++i;
continue;
}
}
- else if (wide && i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W'
+ else if (i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W'
&& isxdigit (encoded[i + 2]))
{
- if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8))
+ if (translate)
{
- i += 10;
+ if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8))
+ {
+ i += 10;
+ continue;
+ }
+ }
+ else
+ {
+ decoded.push_back (encoded[i]);
+ ++i;
continue;
}
}
@@ -1550,6 +1584,12 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
at_start_name = 1;
i += 2;
}
+ else if (isupper (encoded[i]) || encoded[i] == ' ')
+ {
+ /* Decoded names should never contain any uppercase
+ character. */
+ goto Suppress;
+ }
else
{
/* It's a character part of the decoded name, so just copy it
@@ -1559,16 +1599,6 @@ ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
}
}
- /* Decoded names should never contain any uppercase character.
- Double-check this, and abort the decoding if we find one. */
-
- if (operators)
- {
- for (i = 0; i < decoded.length(); ++i)
- if (isupper (decoded[i]) || decoded[i] == ' ')
- goto Suppress;
- }
-
/* If the compiler added a suffix, append it now. */
if (suffix >= 0)
decoded = decoded + "[" + &encoded[suffix] + "]";
@@ -1594,6 +1624,13 @@ ada_decode_tests ()
/* This isn't valid, but used to cause a crash. PR gdb/30639. The
result does not really matter very much. */
SELF_CHECK (ada_decode ("44") == "44");
+
+ /* Check that the settings used by the DWARF reader have the desired
+ effect. */
+ SELF_CHECK (ada_decode ("symada__cS", false, false) == "");
+ SELF_CHECK (ada_decode ("pkg__Oxor", false, false) == "pkg.Oxor");
+ SELF_CHECK (ada_decode ("pack__func_W017b", false, false)
+ == "pack.func_W017b");
}
#endif
@@ -13311,7 +13348,7 @@ ada_lookup_name_info::ada_lookup_name_info (const lookup_name_info &lookup_name)
else
m_standard_p = false;
- m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false, false);
+ m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false);
/* If the name contains a ".", then the user is entering a fully
qualified entity name, and the match must not be done in wild
diff --git a/gdb/ada-lang.h b/gdb/ada-lang.h
index 3582082a1a1b702595b803072ff9c345b7f3e0f7..a96a1f6e01737b03c6e6dea5024fbdd253647201 100644
--- a/gdb/ada-lang.h
+++ b/gdb/ada-lang.h
@@ -218,16 +218,13 @@ extern const char *ada_decode_symbol (const struct general_symbol_info *);
simply wrapped in <...>. If WRAP is false, then the empty string
will be returned.
- When OPERATORS is false, operator names will not be decoded. By
- default, they are decoded, e.g., 'Oadd' will be transformed to
- '"+"'.
-
- When WIDE is false, wide characters will be left as-is. By
- default, they converted from their hex encoding to the host
- charset. */
+ TRANSLATE has two effects. When true (the default), operator names
+ and wide characters will be decoded. E.g., 'Oadd' will be
+ transformed to '"+"', and wide characters converted from their hex
+ encoding to the host charset. When false, these will be left
+ alone. */
extern std::string ada_decode (const char *name, bool wrap = true,
- bool operators = true,
- bool wide = true);
+ bool translate = true);
extern std::vector<struct block_symbol> ada_lookup_symbol_list
(const char *, const struct block *, domain_search_flags);
diff --git a/gdb/dwarf2/cooked-index-shard.c b/gdb/dwarf2/cooked-index-shard.c
index 683feb2ce9615be23a39f3934e922b53574fa5ab..29a8aea513786e4c1c1ed77dee8610fc329d1c8a 100644
--- a/gdb/dwarf2/cooked-index-shard.c
+++ b/gdb/dwarf2/cooked-index-shard.c
@@ -108,7 +108,7 @@ cooked_index_shard::handle_gnat_encoded_entry
characters are left as-is. This is done to make name matching a
bit simpler; and for wide characters, it means the choice of Ada
source charset does not affect the indexer directly. */
- std::string canonical = ada_decode (entry->name, false, false, false);
+ std::string canonical = ada_decode (entry->name, false, false);
if (canonical.empty ())
{
entry->canonical = entry->name;
diff --git a/gdb/symtab.h b/gdb/symtab.h
index 7927380fca3f115fd43ecdaf683ecc07a0ff22e0..83913b1806f4a5fe39987978bb7059efc606a594 100644
--- a/gdb/symtab.h
+++ b/gdb/symtab.h
@@ -145,7 +145,7 @@ class ada_lookup_name_info final
std::string m_encoded_name;
/* The decoded lookup name. This is formed by calling ada_decode
- with both 'operators' and 'wide' set to false. */
+ with 'translate' set to false. */
std::string m_decoded_name;
/* Whether the user-provided lookup name was Ada encoded. If so,
--
2.46.1
next prev parent reply other threads:[~2025-04-02 23:47 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-02 23:44 [PATCH v2 00/28] Search symbols via quick API Tom Tromey
2025-04-02 23:45 ` [PATCH v2 01/28] Add another minor hack to cooked_index_entry::full_name Tom Tromey
2025-04-02 23:45 ` Tom Tromey [this message]
2025-04-02 23:45 ` [PATCH v2 03/28] Emit some type declarations in .gdb_index Tom Tromey
2025-04-21 2:50 ` Simon Marchi
2025-04-21 14:50 ` Tom Tromey
2025-04-23 4:11 ` Simon Marchi
2025-04-23 20:54 ` Tom Tromey
2025-04-02 23:45 ` [PATCH v2 04/28] Ada import functions not in index Tom Tromey
2025-04-02 23:45 ` [PATCH v2 05/28] Fix index's handling of DW_TAG_imported_declaration Tom Tromey
2025-04-02 23:45 ` [PATCH v2 06/28] Put all CTF symbols in global scope Tom Tromey
2025-04-02 23:45 ` [PATCH v2 07/28] Restore "ingestion" of .debug_str when writing .debug_names Tom Tromey
2025-04-02 23:45 ` [PATCH v2 08/28] Entries from anon-struct.exp not in cooked index Tom Tromey
2025-04-02 23:45 ` [PATCH v2 09/28] Remove dwarf2_per_cu_data::mark Tom Tromey
2025-04-21 3:09 ` Simon Marchi
2025-04-21 15:38 ` Tom Tromey
2025-04-23 4:12 ` Simon Marchi
2025-04-02 23:45 ` [PATCH v2 10/28] Have expand_symtabs_matching work for already-expanded CUs Tom Tromey
2025-04-23 15:53 ` Simon Marchi
2025-04-23 20:39 ` Tom Tromey
2025-04-23 20:57 ` Tom Tromey
2025-04-02 23:45 ` [PATCH v2 11/28] Rewrite the .gdb_index reader Tom Tromey
2025-04-23 17:22 ` Simon Marchi
2025-04-23 20:50 ` Tom Tromey
2025-04-24 14:37 ` Pedro Alves
2025-04-02 23:45 ` [PATCH v2 12/28] Convert default_collect_symbol_completion_matches_break_on Tom Tromey
2025-04-02 23:45 ` [PATCH v2 13/28] Convert gdbpy_lookup_static_symbols Tom Tromey
2025-04-02 23:45 ` [PATCH v2 14/28] Convert ada_add_global_exceptions Tom Tromey
2025-04-02 23:45 ` [PATCH v2 15/28] Convert ada_language_defn::collect_symbol_completion_matches Tom Tromey
2025-04-02 23:45 ` [PATCH v2 16/28] Convert ada-lang.c:map_matching_symbols Tom Tromey
2025-04-02 23:45 ` [PATCH v2 17/28] Remove expand_symtabs_matching Tom Tromey
2025-04-02 23:45 ` [PATCH v2 18/28] Simplify basic_lookup_transparent_type Tom Tromey
2025-04-02 23:45 ` [PATCH v2 19/28] Remove objfile::expand_symtabs_for_function Tom Tromey
2025-04-02 23:45 ` [PATCH v2 20/28] Convert linespec.c:iterate_over_all_matching_symtabs Tom Tromey
2025-04-02 23:45 ` [PATCH v2 21/28] Simplify block_lookup_symbol_primary Tom Tromey
2025-04-02 23:45 ` [PATCH v2 22/28] Pass lookup_name_info to block_lookup_symbol_primary Tom Tromey
2025-04-02 23:45 ` [PATCH v2 23/28] Simplify block_lookup_symbol Tom Tromey
2025-04-02 23:45 ` [PATCH v2 24/28] Add best_symbol_tracker Tom Tromey
2025-04-02 23:45 ` [PATCH v2 25/28] Convert lookup_symbol_via_quick_fns Tom Tromey
2025-04-02 23:45 ` [PATCH v2 26/28] Convert lookup_symbol_in_objfile Tom Tromey
2025-04-02 23:45 ` [PATCH v2 27/28] Make dw_expand_symtabs_matching_file_matcher static Tom Tromey
2025-04-23 20:00 ` Simon Marchi
2025-04-23 20:09 ` Tom Tromey
2025-04-23 20:44 ` Tom Tromey
2025-04-02 23:45 ` [PATCH v2 28/28] Remove enter_symbol_lookup Tom Tromey
2025-04-23 20:09 ` [PATCH v2 00/28] Search symbols via quick API Simon Marchi
2025-04-24 21:09 ` Tom Tromey
2025-04-28 14:07 ` Guinevere Larsen
2025-04-28 22:06 ` Tom Tromey
2025-04-29 19:31 ` Guinevere Larsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250402-search-in-psyms-v2-2-ea91704487cb@tromey.com \
--to=tom@tromey.com \
--cc=gdb-patches@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox