Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed
From: Sandra Loosemore <sandra@codesourcery.com>
To: Pedro Alves <palves@redhat.com>,
	gdb-patches <gdb-patches@sourceware.org>
Subject: Re: [patch] fix phony_iconv wide character support
Date: Fri, 15 Jan 2016 20:45:00 -0000	[thread overview]
Message-ID: <56995A79.5040203@codesourcery.com> (raw)
In-Reply-To: <56951484.70208@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 1607 bytes --]

On 01/12/2016 07:58 AM, Pedro Alves wrote:
> [snip]
>
> Could you indent this, like:
>
> #ifdef USE_WIN32API
> # define GDB_DEFAULT_HOST_CHARSET "CP1252"
> #else
> # define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
> #endif

Done.

>> +/* We allow conversions from UTF-32, wchar_t, and the host charset.
>> +   We allow conversions to wchar_t and the host charset
>
> Missing period.
>
>> +   Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
>> +   0 otherwise.  This is  used as a flag in calls to iconv.  */
>
> Spurious double space after "This is".

Both fixed now.

> Isn't this the same as:
>
>     enum bfd_endian endian = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
>     extract_unsigned_integer (*inbuf, 4, endian);
>
> ?

That looks much nicer.  :-)  Fixed.

>> @@ -290,6 +315,10 @@ set_be_le_names (struct gdbarch *gdbarch)
>>       return;
>>     be_le_arch = gdbarch;
>>
>> +#ifdef PHONY_ICONV
>> +  target_wide_charset_le_name = "UTF-32LE";
>> +  target_wide_charset_be_name = "UTF-32BE";
>> +#else
>>     target_wide_charset_le_name = NULL;
>>     target_wide_charset_be_name = NULL;
>>
>> @@ -313,6 +342,7 @@ set_be_le_names (struct gdbarch *gdbarch)
>>   	    target_wide_charset_le_name = charset_enum[i];
>>   	}
>>       }
>> +# endif  /* PHONY_ICONV */
>
> This change isn't obvious to me.  You wrote in the ChangeLog:
>
>> 	(set_be_le_names) [PHONY_ICONV]: Use hard-wired names to match
>> 	phony_iconv_open.
>
> But I think this "to match" comment should be here in the sources.

Done.

> Otherwise LGTM.

New patch attached.  Is this one OK to commit?

-Sandra


[-- Attachment #2: iconv.log --]
[-- Type: text/x-log, Size: 533 bytes --]

2016-01-15  Sandra Loosemore  <sandra@codesourcery.com>

	gdb/
	* charset.c [PHONY_ICONV] (GDB_DEFAULT_HOST_CHARSET):
	Conditionalize for Windows host.
	(GDB_DEFAULT_TARGET_CHARSET): Match GDB_DEFAULT_HOST_CHARSET.
	(GDB_DEFAULT_TARGET_WIDE_CHARSET): Use UTF-32.
	(phony_iconv_open): Handle both UTF-32 endiannesses.
	(phony_iconv): Likewise.  Check for output overflow and clean up
	out-of-input cases.  Correct adjustment to input buffer pointer.
	(set_be_le_names) [PHONY_ICONV]: Use hard-wired names to match
	phony_iconv_open.


[-- Attachment #3: iconv.patch --]
[-- Type: text/x-patch, Size: 3764 bytes --]

diff --git a/gdb/charset.c b/gdb/charset.c
index ee1ae20..4e1c168 100644
--- a/gdb/charset.c
+++ b/gdb/charset.c
@@ -77,9 +77,13 @@
    arrange for there to be a single available character set.  */
 
 #undef GDB_DEFAULT_HOST_CHARSET
-#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
-#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
+#ifdef USE_WIN32API
+# define GDB_DEFAULT_HOST_CHARSET "CP1252"
+#else
+# define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
+#endif
+#define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET 
+#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
 #undef DEFAULT_CHARSET_NAMES
 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
 
@@ -95,20 +99,27 @@
 #undef ICONV_CONST
 #define ICONV_CONST const
 
+/* We allow conversions from UTF-32, wchar_t, and the host charset.
+   We allow conversions to wchar_t and the host charset.
+   Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
+   0 otherwise.  This is used as a flag in calls to iconv.  */
+
 static iconv_t
 phony_iconv_open (const char *to, const char *from)
 {
-  /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
-     We allow conversions to wchar_t and the host charset.  */
-  if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
-      && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
-    return -1;
   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
     return -1;
 
-  /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
-     used as a flag in calls to iconv.  */
-  return !strcmp (from, "UTF-32BE");
+  if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32"))
+    return 1;
+
+  if (!strcmp (from, "UTF-32LE"))
+    return 2;
+
+  if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
+    return -1;
+
+  return 0;
 }
 
 static int
@@ -123,31 +134,33 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
 {
   if (utf_flag)
     {
+      enum bfd_endian endian
+	= utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
       while (*inbytesleft >= 4)
 	{
-	  size_t j;
-	  unsigned long c = 0;
-
-	  for (j = 0; j < 4; ++j)
-	    {
-	      c <<= 8;
-	      c += (*inbuf)[j] & 0xff;
-	    }
+	  unsigned long c
+	    = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian);
 
 	  if (c >= 256)
 	    {
 	      errno = EILSEQ;
 	      return -1;
 	    }
+	  if (*outbytesleft < 1)
+	    {
+	      errno = E2BIG;
+	      return -1;
+	    }
 	  **outbuf = c & 0xff;
 	  ++*outbuf;
 	  --*outbytesleft;
 
-	  ++*inbuf;
+	  *inbuf += 4;
 	  *inbytesleft -= 4;
 	}
-      if (*inbytesleft < 4)
+      if (*inbytesleft)
 	{
+	  /* Partial sequence on input.  */
 	  errno = EINVAL;
 	  return -1;
 	}
@@ -165,12 +178,11 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
       *outbuf += amt;
       *inbytesleft -= amt;
       *outbytesleft -= amt;
-    }
-
-  if (*inbytesleft)
-    {
-      errno = E2BIG;
-      return -1;
+      if (*inbytesleft)
+	{
+	  errno = E2BIG;
+	  return -1;
+	}
     }
 
   /* The number of non-reversible conversions -- but they were all
@@ -290,6 +302,11 @@ set_be_le_names (struct gdbarch *gdbarch)
     return;
   be_le_arch = gdbarch;
 
+#ifdef PHONY_ICONV
+  /* Match the wide charset names recognized by phony_iconv_open.  */
+  target_wide_charset_le_name = "UTF-32LE";
+  target_wide_charset_be_name = "UTF-32BE";
+#else
   target_wide_charset_le_name = NULL;
   target_wide_charset_be_name = NULL;
 
@@ -313,6 +330,7 @@ set_be_le_names (struct gdbarch *gdbarch)
 	    target_wide_charset_le_name = charset_enum[i];
 	}
     }
+# endif  /* PHONY_ICONV */
 }
 
 /* 'Set charset', 'set host-charset', 'set target-charset', 'set

  reply	other threads:[~2016-01-15 20:45 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-31 16:48 Sandra Loosemore
2016-01-12 14:58 ` Pedro Alves
2016-01-15 20:45   ` Sandra Loosemore [this message]
2016-01-15 21:35     ` Pedro Alves

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56995A79.5040203@codesourcery.com \
    --to=sandra@codesourcery.com \
    --cc=gdb-patches@sourceware.org \
    --cc=palves@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox