Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed
* [4/7] Adjust the ttrace target (HP-UX) to always register the main thread
@ 2008-08-08  1:36 Pedro Alves
  2008-08-08 16:41 ` John David Anglin
  0 siblings, 1 reply; 25+ messages in thread
From: Pedro Alves @ 2008-08-08  1:36 UTC (permalink / raw)
  To: gdb-patches

[-- Attachment #1: Type: text/plain, Size: 643 bytes --]

This patch adjusts the inf-ttrace target to always register the main
thread in GDB's thread table.  Again, I'm using the new thread_change_ptid
function to update inferior_ptid.

( I haven't touched the hpux-thread.c target, as it is still
waiting for someone to rewrite it as a thread_stratum target, and
remove its dependency on deprecated_child_ops.  That is, it's dead broken
code currently.  While we're on to it, does anyone still care for 
HP-UX / ptrace (not ttrace) ? )

I have no means to test this.  Can anybody with HP-UX access check
if I didn't break anything?  This patch only depends on patch 1 of
the series.

-- 
Pedro Alves

[-- Attachment #2: 004-ttrace_always_a_thread.diff --]
[-- Type: text/x-diff, Size: 5413 bytes --]

2008-08-08  Pedro Alves  <pedro@codesourcery.com>

	* inf-ttrace.c (inf_ttrace_follow_fork): Register the main thread
	of the child fork.
	(inf_ttrace_attach): Add the main thread.
	(inf_ttrace_resume_callback): Check for exited threads.  Adjust
	for always a thread.
	(inf_ttrace_wait): Decorate the main thread's ptid with lwp info
	using thread_change_ptid.  Don't add the main thread here.
	(inf_ttrace_pid_to_str): Adjust.

---
 gdb/inf-ttrace.c |   78 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 31 deletions(-)

Index: src/gdb/inf-ttrace.c
===================================================================
--- src.orig/gdb/inf-ttrace.c	2008-08-05 19:40:06.000000000 +0100
+++ src/gdb/inf-ttrace.c	2008-08-05 23:04:02.000000000 +0100
@@ -513,10 +513,22 @@ Detaching after fork from child process 
 
   if (follow_child)
     {
+      struct thread_info *ti;
+
       /* The child will start out single-threaded.  */
-      inf_ttrace_num_lwps = 0;
+      inf_ttrace_num_lwps = 1;
       inf_ttrace_num_lwps_in_syscall = 0;
 
+      /* Delete parent.  */
+      delete_thread_silent (ptid_build (pid, lwpid, 0);
+
+      /* Add child.  inferior_ptid was already set above.  */
+      ti = add_thread_silent (inferior_ptid);
+      ti->private =
+	xmalloc (sizeof (struct inf_ttrace_private_thread_info));
+      memset (ti->private, 0,
+	      sizeof (struct inf_ttrace_private_thread_info));
+
       /* Reset breakpoints in the child as appropriate.  */
       follow_inferior_reset_breakpoints ();
     }
@@ -675,6 +687,7 @@ inf_ttrace_attach (char *args, int from_
   pid_t pid;
   char *dummy;
   ttevent_t tte;
+  struct thread_info *ti;
 
   if (!args)
     error_no_arg (_("process-id to attach"));
@@ -721,8 +734,18 @@ inf_ttrace_attach (char *args, int from_
 	      (uintptr_t)&tte, sizeof tte, 0) == -1)
     perror_with_name (("ttrace"));
 
-  inferior_ptid = pid_to_ptid (pid);
   push_target (ttrace_ops_hack);
+
+  /* We'll bump inf_ttrace_num_lwps up as soon as we get to
+     inf_ttrace_wait.  At this point, we don't have lwpid info
+     yet.  */
+
+  inferior_ptid = pid_to_ptid (pid);
+  ti = add_thread_silent (inferior_ptid);
+  ti->private =
+    xmalloc (sizeof (struct inf_ttrace_private_thread_info));
+  memset (ti->private, 0,
+	  sizeof (struct inf_ttrace_private_thread_info));
 }
 
 static void
@@ -787,7 +810,7 @@ inf_ttrace_kill (void)
 static int
 inf_ttrace_resume_callback (struct thread_info *info, void *arg)
 {
-  if (!ptid_equal (info->ptid, inferior_ptid))
+  if (!ptid_equal (info->ptid, inferior_ptid) && !is_exited (info->ptid))
     {
       pid_t pid = ptid_get_pid (info->ptid);
       lwpid_t lwpid = ptid_get_lwp (info->ptid);
@@ -824,7 +847,7 @@ inf_ttrace_resume (ptid_t ptid, int step
   if (ttrace (request, pid, lwpid, TT_NOPC, sig, 0) == -1)
     perror_with_name (("ttrace"));
 
-  if (ptid_equal (ptid, minus_one_ptid) && inf_ttrace_num_lwps > 0)
+  if (ptid_equal (ptid, minus_one_ptid))
     {
       /* Let all the other threads run too.  */
       iterate_over_threads (inf_ttrace_resume_callback, NULL);
@@ -886,6 +909,16 @@ inf_ttrace_wait (ptid_t ptid, struct tar
 
   ptid = ptid_build (tts.tts_pid, tts.tts_lwpid, 0);
 
+  if (inf_ttrace_num_lwps == 0)
+    {
+      inf_ttrace_num_lwps = 1;
+
+      /* This is the earliest we hear about the lwp member of
+	 INFERIOR_PTID, after an attach or fork-child.  */
+      if (ptid_get_lwp (inferior_ptid) == 0)
+	thread_change_ptid (inferior_ptid, ptid);
+    }
+
   switch (tts.tts_event)
     {
 #ifdef TTEVT_BPT_SSTEP
@@ -958,17 +991,6 @@ inf_ttrace_wait (ptid_t ptid, struct tar
     case TTEVT_LWP_CREATE:
       lwpid = tts.tts_u.tts_thread.tts_target_lwpid;
       ptid = ptid_build (tts.tts_pid, lwpid, 0);
-      if (inf_ttrace_num_lwps == 0)
-	{
-	  /* Now that we're going to be multi-threaded, add the
-	     original thread to the list first.  */
-	  ti = add_thread (ptid_build (tts.tts_pid, tts.tts_lwpid, 0));
-	  ti->private =
-	    xmalloc (sizeof (struct inf_ttrace_private_thread_info));
-	  memset (ti->private, 0,
-		  sizeof (struct inf_ttrace_private_thread_info));
-	  inf_ttrace_num_lwps++;
-	}
       ti = add_thread (ptid);
       ti->private =
 	xmalloc (sizeof (struct inf_ttrace_private_thread_info));
@@ -1045,11 +1067,6 @@ inf_ttrace_wait (ptid_t ptid, struct tar
   if (ttrace (TT_PROC_STOP, tts.tts_pid, 0, 0, 0, 0) == -1)
     perror_with_name (("ttrace"));
 
-  /* HACK: Twiddle INFERIOR_PTID such that the initial thread of a
-     process isn't recognized as a new thread.  */
-  if (ptid_get_lwp (inferior_ptid) == 0)
-    inferior_ptid = ptid;
-
   return ptid;
 }
 
@@ -1128,18 +1145,17 @@ inf_ttrace_thread_alive (ptid_t ptid)
 static char *
 inf_ttrace_pid_to_str (ptid_t ptid)
 {
-  if (inf_ttrace_num_lwps > 0)
-    {
-      pid_t pid = ptid_get_pid (ptid);
-      lwpid_t lwpid = ptid_get_lwp (ptid);
-      static char buf[128];
-
-      xsnprintf (buf, sizeof buf, "process %ld, lwp %ld",
-		 (long)pid, (long)lwpid);
-      return buf;
-    }
+  pid_t pid = ptid_get_pid (ptid);
+  lwpid_t lwpid = ptid_get_lwp (ptid);
+  static char buf[128];
 
-  return normal_pid_to_str (ptid);
+  if (lwpid == 0)
+    xsnprintf (buf, sizeof buf, "process %ld",
+	       (long) pid);
+  else
+    xsnprintf (buf, sizeof buf, "process %ld, lwp %ld",
+	       (long) pid, (long) lwpid);
+  return buf;
 }
 \f
 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the main thread
  2008-08-08  1:36 [4/7] Adjust the ttrace target (HP-UX) to always register the main thread Pedro Alves
@ 2008-08-08 16:41 ` John David Anglin
  2008-08-08 17:24   ` Pedro Alves
  0 siblings, 1 reply; 25+ messages in thread
From: John David Anglin @ 2008-08-08 16:41 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=US-ASCII, Size: 7200 bytes --]

> ( I haven't touched the hpux-thread.c target, as it is still
> waiting for someone to rewrite it as a thread_stratum target, and
> remove its dependency on deprecated_child_ops.  That is, it's dead broken
> code currently.  While we're on to it, does anyone still care for 
> HP-UX / ptrace (not ttrace) ? )

Yes, but I may be the only one.  I tried a quick hack to get this
working awhile ago but didn't have the time to do it prperly. 

> I have no means to test this.  Can anybody with HP-UX access check
> if I didn't break anything?  This patch only depends on patch 1 of
> the series.

> +      delete_thread_silent (ptid_build (pid, lwpid, 0);

There's a typo in this line.  With this fixed, things build.  However,

# gdb/gdb main
GNU gdb (GDB) 6.8.50.20080807-cvs
Copyright (C) 2008 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "hppa2.0w-hp-hpux11.11".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
(gdb) break main
Breakpoint 1 at 0x2b20: file main.c, line 4.
(gdb) r
Starting program: /xxx/gnu/gdb/objdir/main 
Segmentation fault (core dumped)
# gdb -c core gdb/gdb
GNU gdb (GDB) 6.8.50.20080807-cvs
Copyright (C) 2008 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "hppa2.0w-hp-hpux11.11".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /usr/lib/dld.sl...done.
Loaded symbols for /usr/lib/dld.sl
Reading symbols from /xxx/gnu/gdb/objdir/gdb/gdb...done.
Loaded symbols for gdb/gdb
Reading symbols from /usr/lib/libxpdl.1...done.
Loaded symbols for /usr/lib/libxpdl.1
Reading symbols from /usr/lib/libxcurses.1...done.
Loaded symbols for /usr/lib/libxcurses.1
Reading symbols from /usr/lib/libm.2...done.
Loaded symbols for /usr/lib/libm.2
Reading symbols from /usr/lib/libc.2...done.
Loaded symbols for /usr/lib/libc.2
Reading symbols from /usr/lib/libdld.2...done.
Loaded symbols for /usr/lib/libdld.2
Reading symbols from /opt/graphics/OpenGL/lib/libogltls.sl...done.
Loaded symbols for /opt/graphics/OpenGL/lib/libogltls.sl

warning: Private mapping of shared library text was not specified
by the executable; setting a breakpoint in a shared library which
is not privately mapped will not work.  See the HP-UX 11i v3 chatr
manpage for methods to privately map shared library text.
Unable to write __dld_flags.
(gdb) bt
#0  0x000c9960 in inf_ttrace_delete_dying_threads_callback (info=0x4007bdb0, 
    arg=0x0) at ../../src/gdb/inf-ttrace.c:828
#1  0x0008b640 in iterate_over_threads (
    callback=@0x4001a712: 0xc9940 <inf_ttrace_delete_dying_threads_callback>, 
    data=0x0) at ../../src/gdb/thread.c:338
#2  0x000c98e0 in inf_ttrace_resume (ptid=
      {pid = 0, lwp = 16961, tid = 7024758}, step=1073949720, 
    signal=TARGET_SIGNAL_0) at ../../src/gdb/inf-ttrace.c:854
#3  0x000a3390 in target_resume (ptid={pid = 0, lwp = 16961, tid = 7024758}, 
    step=0, signal=TARGET_SIGNAL_0) at ../../src/gdb/target.c:1789
#4  0x00087f88 in resume (step=0, sig=TARGET_SIGNAL_0)
    at ../../src/gdb/infrun.c:1123
#5  0x000c6db8 in startup_inferior (ntraps=1074249136)
    at ../../src/gdb/fork-child.c:470
#6  0x000c8e8c in inf_ttrace_him (pid=16961) at ../../src/gdb/inf-ttrace.c:634
#7  0x000c7338 in fork_inferior (
    exec_file_arg=0x4241 "g\220??\033.X??.X\017\206;\234\017\232;\234~?-ھ?-?6\225\001?6\225q??Dg??G?.?/?.?/߱?U\235??T]?\213?5׳?5*?\033P??\033Qp\027??p\020????\205???\204)??wq??wq4r??4r??M?h2M?h2\217?s\b\217?s\bV??HV??p0???0?U???Xb8?Xc?u6??r6?\002Sq?\002Sq\216??\225\217??\225\217MʱMMʿMaX??b???]?*?-?*¦?Y??"..., allargs=0x40077520 "", env=0x4004f998, 
    traceme_fun=@0x4001a6ea: 0xc8c30 <inf_ttrace_me>, 
    init_trace_fun=@0x4001a6f2: 0xc8d40 <inf_ttrace_him>, 
    pre_trace_fun=@0x4001a6fa: 0xc8b24 <inf_ttrace_prepare>, 
    shell_file_arg=0x0) at ../../src/gdb/fork-child.c:409
#8  0x000c8a34 in inf_ttrace_create_inferior (exec_file=0x4007bdb0 "", 
    allargs=0x0, env=0x82ded040, from_tty=134217759)
    at ../../src/gdb/inf-ttrace.c:651
#9  0x000a1684 in find_default_create_inferior (
    exec_file=0x400673d0 "/xxx/gnu/gdb/objdir/main", allargs=0x40077520 "", 
    env=0x4004f998, from_tty=1) at ../../src/gdb/target.c:2079
#10 0x0003a934 in run_command_1 (args=0x7eff0e03 "?@", from_tty=1, 
    tbreak_at_main=1074230560) at ../../src/gdb/infcmd.c:565
#11 0x0004ada4 in do_cfunc (c=0x4007bdb0, args=0x0, from_tty=-2099326912)
    at ../../src/gdb/cli/cli-decode.c:60
#12 0x0004afc0 in cmd_func (cmd=0x4007bdb0, args=0x0, from_tty=-2099326912)
    at ../../src/gdb/cli/cli-decode.c:1672
#13 0x00043474 in execute_command (p=0x40036b11 "", from_tty=1)
    at ../../src/gdb/top.c:457
#14 0x0008f2fc in command_handler (command=0x1 "")
    at ../../src/gdb/event-top.c:516
#15 0x00090490 in command_line_handler (rl=0x400672f0 "r")
    at ../../src/gdb/event-top.c:747
#16 0x001a5354 in rl_callback_read_char ()
    at ../../src/readline/callback.c:205
#17 0x0008f4bc in rl_callback_read_char_wrapper (client_data=0x4007bdb0)
    at ../../src/gdb/event-top.c:178
#18 0x0008fcb8 in stdin_event_handler (error=1074249136, client_data=0x0)
    at ../../src/gdb/event-top.c:433
#19 0x0008edac in handle_file_event (event_file_desc=0)
    at ../../src/gdb/event-loop.c:732
#20 0x0008de88 in process_event () at ../../src/gdb/event-loop.c:341
#21 0x0008ea64 in gdb_do_one_event (data=0x4007bdb0)
    at ../../src/gdb/event-loop.c:378
#22 0x0003ea10 in catch_errors (
    func=@0x40018f72: 0x2b340 <captured_command_loop>, func_args=0x0, 
    errstring=0x7eff0008 "main", mask=134217759)
    at ../../src/gdb/exceptions.c:509
#23 0x000e6624 in tui_command_loop (data=0x4007bdb0)
    at ../../src/gdb/tui/tui-interp.c:153
#24 0x0003f09c in current_interp_command_loop ()
    at ../../src/gdb/interps.c:289
#25 0x0002b364 in captured_command_loop (data=0x4007bdb0)
    at ../../src/gdb/main.c:99
#26 0x0003ea10 in catch_errors (func=0x40019028 <_crt_errno+2168>, 
    func_args=0x1, 
    errstring=0x79000 "\b\034\002C?^\020\005\b\034\002Z?\037\036?\f?\020\223", mask=134217759) at ../../src/gdb/exceptions.c:509
#27 0x0002ad38 in captured_main (data=0x40032a04) at ../../src/gdb/main.c:831
#28 0x0003ea10 in catch_errors (func=0, func_args=0x0, 
    errstring=0x206050 "console", mask=134217759)
    at ../../src/gdb/exceptions.c:509
#29 0x0002a1d0 in gdb_main (args=0x4007bdb0) at ../../src/gdb/main.c:840
#30 0x0002a190 in main (argc=1074249136, argv=0x0) at ../../src/gdb/gdb.c:33

(gdb) p info->private
$2 = (struct private_thread_info *) 0x0

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the main thread
  2008-08-08 16:41 ` John David Anglin
@ 2008-08-08 17:24   ` Pedro Alves
  2008-08-08 17:49     ` John David Anglin
                       ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Pedro Alves @ 2008-08-08 17:24 UTC (permalink / raw)
  To: gdb-patches; +Cc: John David Anglin

[-- Attachment #1: Type: text/plain, Size: 1239 bytes --]

Thanks a lot John,

On Friday 08 August 2008 17:40:13, John David Anglin wrote:

> (gdb) bt
> #0  0x000c9960 in inf_ttrace_delete_dying_threads_callback
> (info=0x4007bdb0, arg=0x0) at ../../src/gdb/inf-ttrace.c:828
> #1  0x0008b640 in iterate_over_threads (
>     callback=@0x4001a712: 0xc9940
> <inf_ttrace_delete_dying_threads_callback>, data=0x0) at
> ../../src/gdb/thread.c:338
> #2  0x000c98e0 in inf_ttrace_resume (ptid=
>       {pid = 0, lwp = 16961, tid = 7024758}, step=1073949720,
>     signal=TARGET_SIGNAL_0) at ../../src/gdb/inf-ttrace.c:854
> #3  0x000a3390 in target_resume (ptid={pid = 0, lwp = 16961, tid =
> 7024758}, step=0, signal=TARGET_SIGNAL_0) at ../../src/gdb/target.c:1789 #4
>  0x00087f88 in resume (step=0, sig=TARGET_SIGNAL_0)
>     at ../../src/gdb/infrun.c:1123
> #5  0x000c6db8 in startup_inferior (ntraps=1074249136)

I wasn't setting the private thread info in the main thread
in inf_ttrace_wait, when getting there from a fork_inferior; but, I
was setting it on inf_ttrace_attach.  Since this is a
!target_attach_no_wait target (there's always a target_wait
after an attach), I just moved the setting of the private
info always to inf_ttrace_wait.

Could you check this version please?

-- 
Pedro Alves

[-- Attachment #2: 004-ttrace_always_a_thread.diff --]
[-- Type: text/x-diff, Size: 5618 bytes --]

2008-08-08  Pedro Alves  <pedro@codesourcery.com>

	* inf-ttrace.c (inf_ttrace_follow_fork): Register the main thread
	of the child fork.
	(inf_ttrace_attach): Add the main thread.
	(inf_ttrace_resume_callback): Check for exited threads.  Adjust
	for always a thread.
	(inf_ttrace_wait): Decorate the main thread's ptid with lwp info
	using thread_change_ptid, and set its private data.  Don't add the
	main thread here.
	(inf_ttrace_pid_to_str): Adjust.

---
 gdb/inf-ttrace.c |   86 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 55 insertions(+), 31 deletions(-)

Index: src/gdb/inf-ttrace.c
===================================================================
--- src.orig/gdb/inf-ttrace.c	2008-08-08 13:43:19.000000000 +0100
+++ src/gdb/inf-ttrace.c	2008-08-08 18:17:50.000000000 +0100
@@ -513,10 +513,22 @@ Detaching after fork from child process 
 
   if (follow_child)
     {
+      struct thread_info *ti;
+
       /* The child will start out single-threaded.  */
-      inf_ttrace_num_lwps = 0;
+      inf_ttrace_num_lwps = 1;
       inf_ttrace_num_lwps_in_syscall = 0;
 
+      /* Delete parent.  */
+      delete_thread_silent (ptid_build (pid, lwpid, 0));
+
+      /* Add child.  inferior_ptid was already set above.  */
+      ti = add_thread_silent (inferior_ptid);
+      ti->private =
+	xmalloc (sizeof (struct inf_ttrace_private_thread_info));
+      memset (ti->private, 0,
+	      sizeof (struct inf_ttrace_private_thread_info));
+
       /* Reset breakpoints in the child as appropriate.  */
       follow_inferior_reset_breakpoints ();
     }
@@ -721,8 +733,13 @@ inf_ttrace_attach (char *args, int from_
 	      (uintptr_t)&tte, sizeof tte, 0) == -1)
     perror_with_name (("ttrace"));
 
-  inferior_ptid = pid_to_ptid (pid);
   push_target (ttrace_ops_hack);
+
+  /* We'll bump inf_ttrace_num_lwps up and add the private data to the
+     thread as soon as we get to inf_ttrace_wait.  At this point, we
+     don't have lwpid info yet.  */
+  inferior_ptid = pid_to_ptid (pid);
+  add_thread_silent (inferior_ptid);
 }
 
 static void
@@ -787,7 +804,7 @@ inf_ttrace_kill (void)
 static int
 inf_ttrace_resume_callback (struct thread_info *info, void *arg)
 {
-  if (!ptid_equal (info->ptid, inferior_ptid))
+  if (!ptid_equal (info->ptid, inferior_ptid) && !is_exited (info->ptid))
     {
       pid_t pid = ptid_get_pid (info->ptid);
       lwpid_t lwpid = ptid_get_lwp (info->ptid);
@@ -824,7 +841,7 @@ inf_ttrace_resume (ptid_t ptid, int step
   if (ttrace (request, pid, lwpid, TT_NOPC, sig, 0) == -1)
     perror_with_name (("ttrace"));
 
-  if (ptid_equal (ptid, minus_one_ptid) && inf_ttrace_num_lwps > 0)
+  if (ptid_equal (ptid, minus_one_ptid))
     {
       /* Let all the other threads run too.  */
       iterate_over_threads (inf_ttrace_resume_callback, NULL);
@@ -886,6 +903,30 @@ inf_ttrace_wait (ptid_t ptid, struct tar
 
   ptid = ptid_build (tts.tts_pid, tts.tts_lwpid, 0);
 
+  if (inf_ttrace_num_lwps == 0)
+    {
+      struct thread_info *ti;
+
+      inf_ttrace_num_lwps = 1;
+
+      /* This is the earliest we hear about the lwp member of
+	 INFERIOR_PTID, after an attach or fork_inferior.  */
+      gdb_assert (ptid_get_lwp (inferior_ptid) == 0);
+
+      /* We haven't set the private member on the main thread yet.  Do
+	 it now.  */
+      ti = find_thread_pid (inferior_ptid);
+      gdb_assert (ti != NULL && ti->private == NULL);
+      ti->private =
+	xmalloc (sizeof (struct inf_ttrace_private_thread_info));
+      memset (ti->private, 0,
+	      sizeof (struct inf_ttrace_private_thread_info));
+
+      /* Notify the core that this ptid changed.  This changes
+	 inferior_ptid as well.  */
+      thread_change_ptid (inferior_ptid, ptid);
+    }
+
   switch (tts.tts_event)
     {
 #ifdef TTEVT_BPT_SSTEP
@@ -958,17 +999,6 @@ inf_ttrace_wait (ptid_t ptid, struct tar
     case TTEVT_LWP_CREATE:
       lwpid = tts.tts_u.tts_thread.tts_target_lwpid;
       ptid = ptid_build (tts.tts_pid, lwpid, 0);
-      if (inf_ttrace_num_lwps == 0)
-	{
-	  /* Now that we're going to be multi-threaded, add the
-	     original thread to the list first.  */
-	  ti = add_thread (ptid_build (tts.tts_pid, tts.tts_lwpid, 0));
-	  ti->private =
-	    xmalloc (sizeof (struct inf_ttrace_private_thread_info));
-	  memset (ti->private, 0,
-		  sizeof (struct inf_ttrace_private_thread_info));
-	  inf_ttrace_num_lwps++;
-	}
       ti = add_thread (ptid);
       ti->private =
 	xmalloc (sizeof (struct inf_ttrace_private_thread_info));
@@ -1045,11 +1075,6 @@ inf_ttrace_wait (ptid_t ptid, struct tar
   if (ttrace (TT_PROC_STOP, tts.tts_pid, 0, 0, 0, 0) == -1)
     perror_with_name (("ttrace"));
 
-  /* HACK: Twiddle INFERIOR_PTID such that the initial thread of a
-     process isn't recognized as a new thread.  */
-  if (ptid_get_lwp (inferior_ptid) == 0)
-    inferior_ptid = ptid;
-
   return ptid;
 }
 
@@ -1128,18 +1153,17 @@ inf_ttrace_thread_alive (ptid_t ptid)
 static char *
 inf_ttrace_pid_to_str (ptid_t ptid)
 {
-  if (inf_ttrace_num_lwps > 0)
-    {
-      pid_t pid = ptid_get_pid (ptid);
-      lwpid_t lwpid = ptid_get_lwp (ptid);
-      static char buf[128];
-
-      xsnprintf (buf, sizeof buf, "process %ld, lwp %ld",
-		 (long)pid, (long)lwpid);
-      return buf;
-    }
+  pid_t pid = ptid_get_pid (ptid);
+  lwpid_t lwpid = ptid_get_lwp (ptid);
+  static char buf[128];
 
-  return normal_pid_to_str (ptid);
+  if (lwpid == 0)
+    xsnprintf (buf, sizeof buf, "process %ld",
+	       (long) pid);
+  else
+    xsnprintf (buf, sizeof buf, "process %ld, lwp %ld",
+	       (long) pid, (long) lwpid);
+  return buf;
 }
 \f
 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the main thread
  2008-08-08 17:24   ` Pedro Alves
@ 2008-08-08 17:49     ` John David Anglin
  2008-08-08 18:34     ` ttrace: Protocal error John David Anglin
  2008-08-10  0:15     ` [4/7] Adjust the ttrace target (HP-UX) to always register the main thread Daniel Jacobowitz
  2 siblings, 0 replies; 25+ messages in thread
From: John David Anglin @ 2008-08-08 17:49 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

> Could you check this version please?

This version compiles fine and doesn't segfault on a trivial program.
Possibly, I'll get a chance to try it on a threaded application later
today.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* ttrace: Protocal error
  2008-08-08 17:24   ` Pedro Alves
  2008-08-08 17:49     ` John David Anglin
@ 2008-08-08 18:34     ` John David Anglin
  2008-08-08 20:02       ` Pedro Alves
  2008-08-10  0:15     ` [4/7] Adjust the ttrace target (HP-UX) to always register the main thread Daniel Jacobowitz
  2 siblings, 1 reply; 25+ messages in thread
From: John David Anglin @ 2008-08-08 18:34 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

While were on the subject of threads, it seems we are still not in
a position to debug the vla6.f90 failure:

-bash-3.2$ gdb vla6.x3g
GNU gdb (GDB) 6.8.50.20080807-cvs
Copyright (C) 2008 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "hppa2.0w-hp-hpux11.11".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
(gdb) r
Starting program: /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/libgomp/testsuite/vla6.x3g
warning: Private mapping of shared library text was not specified
by the executable; setting a breakpoint in a shared library which
is not privately mapped will not work.  See the HP-UX 11i v3 chatr
manpage for methods to privately map shared library text.
[New process 20069]
[New process 20069, lwp 7087826]
[process 20069, lwp 7087826 exited]
0xc0065508 in ?? ()
ttrace: Protocol error.

This doesn't happen with 6.4.50.20051230-cvs.  Unfortunately, this
version forces shared libraries private.

This is the gdb backtrace:

Breakpoint 1, perror_with_name (string=0x0) at ../../src/gdb/utils.c:847
847       err = safe_strerror (errno);
(gdb) bt
#0  perror_with_name (string=0x0) at ../../src/gdb/utils.c:847
#1  0x000c9b08 in inf_ttrace_resume_callback (info=0x2319b0, arg=0x7b019048)
    at ../../src/gdb/inf-ttrace.c:813
#2  0x0008b640 in iterate_over_threads (
    callback=@0x4001a70a: 0xc9a28 <inf_ttrace_resume_callback>, data=0x0)
    at ../../src/gdb/thread.c:338
#3  0x000c9960 in inf_ttrace_resume (ptid=
    {pid = 1953788513, lwp = 1667563520, tid = 774778670}, step=1073949720,
    signal=TARGET_SIGNAL_0) at ../../src/gdb/inf-ttrace.c:847
#4  0x000a3390 in target_resume (ptid=
    {pid = 1953788513, lwp = 1667563520, tid = 774778670}, step=0,
    signal=TARGET_SIGNAL_0) at ../../src/gdb/target.c:1789
#5  0x00087f88 in resume (step=0, sig=TARGET_SIGNAL_0)
    at ../../src/gdb/infrun.c:1123
#6  0x00088c00 in handle_inferior_event (ecs=0x7eff10d8)
    at ../../src/gdb/infrun.c:2115
#7  0x0008ab94 in wait_for_inferior (treat_exec_as_sigtrap=0)
    at ../../src/gdb/infrun.c:1538
#8  0x0008ae74 in proceed (addr=0, siggnal=TARGET_SIGNAL_0, step=0)
    at ../../src/gdb/infrun.c:1350
#9  0x0003a950 in run_command_1 (args=0x7eff0e23 "", from_tty=1,
    tbreak_at_main=1074219312) at ../../src/gdb/infcmd.c:573
...

Any thoughts on how to fix?  Since this is a TT_LWP_CONTINUE, I presume
the code is attempting to resume a thread not stopped by the debugger.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-08 18:34     ` ttrace: Protocal error John David Anglin
@ 2008-08-08 20:02       ` Pedro Alves
  2008-08-08 20:49         ` John David Anglin
  0 siblings, 1 reply; 25+ messages in thread
From: Pedro Alves @ 2008-08-08 20:02 UTC (permalink / raw)
  To: John David Anglin; +Cc: gdb-patches

You didn't mention, but I assume this also happens without my patch.

Note, I know nothing about ttrace and HP-UX.

On Friday 08 August 2008 19:33:06, John David Anglin wrote:
> While were on the subject of threads, it seems we are still not in
> a position to debug the vla6.f90 failure:

What's this test doing different?

> Breakpoint 1, perror_with_name (string=0x0) at ../../src/gdb/utils.c:847
> 847       err = safe_strerror (errno);
> (gdb) bt
> #0  perror_with_name (string=0x0) at ../../src/gdb/utils.c:847
> #1  0x000c9b08 in inf_ttrace_resume_callback (info=0x2319b0,
> arg=0x7b019048) at ../../src/gdb/inf-ttrace.c:813
> #2  0x0008b640 in iterate_over_threads (
>     callback=@0x4001a70a: 0xc9a28 <inf_ttrace_resume_callback>, data=0x0)
>     at ../../src/gdb/thread.c:338
> #3  0x000c9960 in inf_ttrace_resume (ptid=
>     {pid = 1953788513, lwp = 1667563520, tid = 774778670}, step=1073949720,
>     signal=TARGET_SIGNAL_0) at ../../src/gdb/inf-ttrace.c:847
> #4  0x000a3390 in target_resume (ptid=
>     {pid = 1953788513, lwp = 1667563520, tid = 774778670}, step=0,
>     signal=TARGET_SIGNAL_0) at ../../src/gdb/target.c:1789

             ^^^^^^^^^^        ^^^^^^^^^^        ^^^^^^^^^

I assume this ptid is GDB getting bogus info, right?
To be to getting to inf_ttrace_resume_callback, this has
to be (-1,0,0).

From your log:
> [New process 20069]
> [New process 20069, lwp 7087826]

> [process 20069, lwp 7087826 exited]

This should be setting the dying flag on the thread, but
it is still listed in gdb's thread table.

   case TTEVT_LWP_EXIT:
      if (print_thread_events)
	printf_unfiltered (_("[%s exited]\n"), target_pid_to_str (ptid));
      ti = find_thread_pid (ptid);
      gdb_assert (ti != NULL);
      ((struct inf_ttrace_private_thread_info *)ti->private)->dying = 1;
      inf_ttrace_num_lwps--;
      ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
              ptid_get_lwp (ptid), TT_NOPC, 0, 0);
      /* If we don't return -1 here, core GDB will re-add the thread.  */
      ptid = minus_one_ptid;
      break;


inf_ttrace_resume:

  if (ptid_equal (ptid, minus_one_ptid))
    {
      /* Let all the other threads run too.  */
      iterate_over_threads (inf_ttrace_resume_callback, NULL);
      iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
    }

Is this the first resume after that "exit" notification?
Any chance we're trying to resume a dead thread here then?

What happens when you delete the dying threads before resuming?

      iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
      iterate_over_threads (inf_ttrace_resume_callback, NULL);
      iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);

Hmmm, I assume not, if my sources match yours, your the program is stopped
at a syscall event:

      /* Be careful not to try to gather much state about a thread
         that's in a syscall.  It's frequently a losing proposition.  */
    case TARGET_WAITKIND_SYSCALL_ENTRY:
      if (debug_infrun)
        fprintf_unfiltered (gdb_stdlog, "infrun: 
TARGET_WAITKIND_SYSCALL_ENTRY\n");
      resume (0, TARGET_SIGNAL_0);
      prepare_to_wait (ecs);
      return;

So, there should have already been a resume in between.

Could you check which thread got the syscall event?  Is it the same
thread we fail to resume?  Is it possibly to disable syscall events,
just for checking if it is related?

-- 
Pedro Alves


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-08 20:02       ` Pedro Alves
@ 2008-08-08 20:49         ` John David Anglin
  0 siblings, 0 replies; 25+ messages in thread
From: John David Anglin @ 2008-08-08 20:49 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

> Note, I know nothing about ttrace and HP-UX.

That makes us equal.

> On Friday 08 August 2008 19:33:06, John David Anglin wrote:
> > While were on the subject of threads, it seems we are still not in
> > a position to debug the vla6.f90 failure:
> 
> What's this test doing different?

It's not entirely clear.  However, it is using emulated TLS support
and multiple lwp threads.  This support may be initialized by a constructor
run directly by the dynamic loader.  There's a timing or some other
random effect associated with the failure (could be some variable is
being randomly intialized).

> > #4  0x000a3390 in target_resume (ptid=3D
> >     {pid =3D 1953788513, lwp =3D 1667563520, tid =3D 774778670}, step=3D0,
> >     signal=3DTARGET_SIGNAL_0) at ../../src/gdb/target.c:1789
> 
>              ^^^^^^^^^^        ^^^^^^^^^^        ^^^^^^^^^
> 
> I assume this ptid is GDB getting bogus info, right?

That's pretty common for optimized code. 

> This should be setting the dying flag on the thread, but
> it is still listed in gdb's thread table.

Yes.

>    case TTEVT_LWP_EXIT:
>       if (print_thread_events)
> 	printf_unfiltered (_("[%s exited]\n"), target_pid_to_str (ptid));
>       ti =3D find_thread_pid (ptid);
>       gdb_assert (ti !=3D NULL);
>       ((struct inf_ttrace_private_thread_info *)ti->private)->dying =3D 1;
>       inf_ttrace_num_lwps--;
>       ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
>               ptid_get_lwp (ptid), TT_NOPC, 0, 0);
>       /* If we don't return -1 here, core GDB will re-add the thread.  */
>       ptid =3D minus_one_ptid;
>       break;

The dying flag is set when the resume is attempted.

> inf_ttrace_resume:
> 
>   if (ptid_equal (ptid, minus_one_ptid))
>     {
>       /* Let all the other threads run too.  */
>       iterate_over_threads (inf_ttrace_resume_callback, NULL);
>       iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
>     }
> 
> Is this the first resume after that "exit" notification?
> Any chance we're trying to resume a dead thread here then?

Yes.  That's what I think is happening.

> What happens when you delete the dying threads before resuming?
> 
>       iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
>       iterate_over_threads (inf_ttrace_resume_callback, NULL);
>       iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
> 
> Hmmm, I assume not, if my sources match yours, your the program is stopped
> at a syscall event:
> 
>       /* Be careful not to try to gather much state about a thread
>          that's in a syscall.  It's frequently a losing proposition.  */
>     case TARGET_WAITKIND_SYSCALL_ENTRY:
>       if (debug_infrun)
>         fprintf_unfiltered (gdb_stdlog, "infrun:=20
> TARGET_WAITKIND_SYSCALL_ENTRY\n");
>       resume (0, TARGET_SIGNAL_0);
>       prepare_to_wait (ecs);
>       return;
> 
> So, there should have already been a resume in between.
> 
> Could you check which thread got the syscall event?  Is it the same
> thread we fail to resume?  Is it possibly to disable syscall events,
> just for checking if it is related?

I don't know how to disable syscall events.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the  main thread
  2008-08-08 17:24   ` Pedro Alves
  2008-08-08 17:49     ` John David Anglin
  2008-08-08 18:34     ` ttrace: Protocal error John David Anglin
@ 2008-08-10  0:15     ` Daniel Jacobowitz
  2008-08-10  0:36       ` [4/7] Adjust the ttrace target (HP-UX) to always register the John David Anglin
  2008-08-10 21:04       ` John David Anglin
  2 siblings, 2 replies; 25+ messages in thread
From: Daniel Jacobowitz @ 2008-08-10  0:15 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches, John David Anglin

On Fri, Aug 08, 2008 at 06:22:25PM +0100, Pedro Alves wrote:
> Could you check this version please?

Just FYI, I've completely lost track of this thread - so when there's
anything you need approval for, please let me know.

My understanding is that 1/7, 2/7, 7/7 are approved; 3/7 has a small
discussion; 4/7 has a big discussion; and no one's looked at 5/7 or
6/7 yet.  I'll look at those as soon as I get a chance, so poke me if
3/7 and 4/7 are resolved and I haven't done it yet.

-- 
Daniel Jacobowitz
CodeSourcery


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the
  2008-08-10  0:15     ` [4/7] Adjust the ttrace target (HP-UX) to always register the main thread Daniel Jacobowitz
@ 2008-08-10  0:36       ` John David Anglin
  2008-08-10 21:05         ` Pedro Alves
  2008-08-10 21:04       ` John David Anglin
  1 sibling, 1 reply; 25+ messages in thread
From: John David Anglin @ 2008-08-10  0:36 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: pedro, gdb-patches

> My understanding is that 1/7, 2/7, 7/7 are approved; 3/7 has a small
> discussion; 4/7 has a big discussion; and no one's looked at 5/7 or
> 6/7 yet.  I'll look at those as soon as I get a chance, so poke me if
> 3/7 and 4/7 are resolved and I haven't done it yet.

I've just tried Pedro's second and third patches for 4/7.  These
apply on top of the corrected first patch.  There are a couple of
typos.  Once these are fixed, the set seems to work (i.e., they
fix the problem of ttrace errors when resuming threads).

There is a typo in the second patch here:

+  info = thread_find_pid (ptid);

This should be find_thread_pid.

The second typo is in the thread patch:

+       printf_unfiltered(_("[%s has been terminated]\n")
+                         target_pid_to_str (ptid));

There's a missing ",'.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the
  2008-08-10  0:15     ` [4/7] Adjust the ttrace target (HP-UX) to always register the main thread Daniel Jacobowitz
  2008-08-10  0:36       ` [4/7] Adjust the ttrace target (HP-UX) to always register the John David Anglin
@ 2008-08-10 21:04       ` John David Anglin
  2008-08-14 17:55         ` Daniel Jacobowitz
  1 sibling, 1 reply; 25+ messages in thread
From: John David Anglin @ 2008-08-10 21:04 UTC (permalink / raw)
  To: Daniel Jacobowitz; +Cc: pedro, gdb-patches

> Just FYI, I've completely lost track of this thread - so when there's
> anything you need approval for, please let me know.

For the record, I have consolidated Pedro's three changes below.  This
might make it easier to review the changes.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)

Index: inf-ttrace.c
===================================================================
RCS file: /cvs/src/src/gdb/inf-ttrace.c,v
retrieving revision 1.30
diff -u -3 -p -r1.30 inf-ttrace.c
--- inf-ttrace.c	9 Jul 2008 22:23:05 -0000	1.30
+++ inf-ttrace.c	10 Aug 2008 20:41:29 -0000
@@ -33,6 +33,7 @@
 #include "gdb_string.h"
 #include <sys/mman.h>
 #include <sys/ttrace.h>
+#include <signal.h>
 
 #include "inf-child.h"
 #include "inf-ttrace.h"
@@ -513,10 +514,22 @@ Detaching after fork from child process 
 
   if (follow_child)
     {
+      struct thread_info *ti;
+
       /* The child will start out single-threaded.  */
-      inf_ttrace_num_lwps = 0;
+      inf_ttrace_num_lwps = 1;
       inf_ttrace_num_lwps_in_syscall = 0;
 
+      /* Delete parent.  */
+      delete_thread_silent (ptid_build (pid, lwpid, 0));
+
+      /* Add child.  inferior_ptid was already set above.  */
+      ti = add_thread_silent (inferior_ptid);
+      ti->private =
+	xmalloc (sizeof (struct inf_ttrace_private_thread_info));
+      memset (ti->private, 0,
+	      sizeof (struct inf_ttrace_private_thread_info));
+
       /* Reset breakpoints in the child as appropriate.  */
       follow_inferior_reset_breakpoints ();
     }
@@ -721,8 +734,13 @@ inf_ttrace_attach (char *args, int from_
 	      (uintptr_t)&tte, sizeof tte, 0) == -1)
     perror_with_name (("ttrace"));
 
-  inferior_ptid = pid_to_ptid (pid);
   push_target (ttrace_ops_hack);
+
+  /* We'll bump inf_ttrace_num_lwps up and add the private data to the
+     thread as soon as we get to inf_ttrace_wait.  At this point, we
+     don't have lwpid info yet.  */
+  inferior_ptid = pid_to_ptid (pid);
+  add_thread_silent (inferior_ptid);
 }
 
 static void
@@ -784,52 +802,85 @@ inf_ttrace_kill (void)
   target_mourn_inferior ();
 }
 
+/* Check is a dying thread is dead by now, and delete it from GDBs
+   thread list if so.  */
 static int
-inf_ttrace_resume_callback (struct thread_info *info, void *arg)
+inf_ttrace_delete_dead_threads_callback (struct thread_info *info, void *arg)
 {
-  if (!ptid_equal (info->ptid, inferior_ptid))
-    {
-      pid_t pid = ptid_get_pid (info->ptid);
-      lwpid_t lwpid = ptid_get_lwp (info->ptid);
+  lwpid_t lwpid;
+  struct inf_ttrace_private_thread_info *p;
 
-      if (ttrace (TT_LWP_CONTINUE, pid, lwpid, TT_NOPC, 0, 0) == -1)
-	perror_with_name (("ttrace"));
-    }
+  if (is_exited (info->ptid))
+    return 0;
+
+  lwpid = ptid_get_lwp (info->ptid);
+  p = (struct inf_ttrace_private_thread_info *) info->private;
+
+  /* Check if an lwp that was dying is still there or not.  */
+  if (p->dying && (kill (lwpid, 0) == -1))
+    /* It's gone now.  */
+    delete_thread (info->ptid);
 
   return 0;
 }
 
+/* Resume the lwp pointed to by INFO, with REQUEST, and pass it signal
+   SIG.  */
+
+static void
+inf_ttrace_resume_lwp (struct thread_info *info, ttreq_t request, int sig)
+{
+  pid_t pid = ptid_get_pid (info->ptid);
+  lwpid_t lwpid = ptid_get_lwp (info->ptid);
+
+  if (ttrace (request, pid, lwpid, TT_NOPC, sig, 0) == -1)
+    {
+      struct inf_ttrace_private_thread_info *p
+	= (struct inf_ttrace_private_thread_info *) info->private;
+      if (p->dying && errno == EPROTO)
+	/* This is expected, it means the dying lwp is really gone
+	   by now.  If ttrace had an event to inform the debugger
+	   the lwp is really gone, this wouldn't be needed.  */
+	delete_thread (info->ptid);
+      else
+	/* This was really unexpected.  */
+	perror_with_name (("ttrace"));
+    }
+}
+
+/* Callback for iterate_over_threads.  */
+
 static int
-inf_ttrace_delete_dying_threads_callback (struct thread_info *info, void *arg)
+inf_ttrace_resume_callback (struct thread_info *info, void *arg)
 {
-  if (((struct inf_ttrace_private_thread_info *)info->private)->dying == 1)
-    delete_thread (info->ptid);
+  if (!ptid_equal (info->ptid, inferior_ptid) && !is_exited (info->ptid))
+    inf_ttrace_resume_lwp (info, TT_LWP_CONTINUE, 0);
+
   return 0;
 }
 
 static void
 inf_ttrace_resume (ptid_t ptid, int step, enum target_signal signal)
 {
-  pid_t pid = ptid_get_pid (ptid);
-  lwpid_t lwpid = ptid_get_lwp (ptid);
+  int resume_all;
   ttreq_t request = step ? TT_LWP_SINGLE : TT_LWP_CONTINUE;
   int sig = target_signal_to_host (signal);
+  struct thread_info *info;
 
-  if (pid == -1)
-    {
-      pid = ptid_get_pid (inferior_ptid);
-      lwpid = ptid_get_lwp (inferior_ptid);
-    }
+  /* A specific PTID means `step only this process id'.  */
+  resume_all = (ptid_equal (ptid, minus_one_ptid));
 
-  if (ttrace (request, pid, lwpid, TT_NOPC, sig, 0) == -1)
-    perror_with_name (("ttrace"));
-
-  if (ptid_equal (ptid, minus_one_ptid) && inf_ttrace_num_lwps > 0)
-    {
-      /* Let all the other threads run too.  */
-      iterate_over_threads (inf_ttrace_resume_callback, NULL);
-      iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
-    }
+  /* If resuming all threads, it's the current thread that should be
+     handled specially.  */
+  if (resume_all)
+    ptid = inferior_ptid;
+
+  info = find_thread_pid (ptid);
+  inf_ttrace_resume_lwp (info, request, sig);
+
+  if (resume_all)
+    /* Let all the other threads run too.  */
+    iterate_over_threads (inf_ttrace_resume_callback, NULL);
 }
 
 static ptid_t
@@ -886,6 +937,30 @@ inf_ttrace_wait (ptid_t ptid, struct tar
 
   ptid = ptid_build (tts.tts_pid, tts.tts_lwpid, 0);
 
+  if (inf_ttrace_num_lwps == 0)
+    {
+      struct thread_info *ti;
+
+      inf_ttrace_num_lwps = 1;
+
+      /* This is the earliest we hear about the lwp member of
+	 INFERIOR_PTID, after an attach or fork_inferior.  */
+      gdb_assert (ptid_get_lwp (inferior_ptid) == 0);
+
+      /* We haven't set the private member on the main thread yet.  Do
+	 it now.  */
+      ti = find_thread_pid (inferior_ptid);
+      gdb_assert (ti != NULL && ti->private == NULL);
+      ti->private =
+	xmalloc (sizeof (struct inf_ttrace_private_thread_info));
+      memset (ti->private, 0,
+	      sizeof (struct inf_ttrace_private_thread_info));
+
+      /* Notify the core that this ptid changed.  This changes
+	 inferior_ptid as well.  */
+      thread_change_ptid (inferior_ptid, ptid);
+    }
+
   switch (tts.tts_event)
     {
 #ifdef TTEVT_BPT_SSTEP
@@ -958,17 +1033,6 @@ inf_ttrace_wait (ptid_t ptid, struct tar
     case TTEVT_LWP_CREATE:
       lwpid = tts.tts_u.tts_thread.tts_target_lwpid;
       ptid = ptid_build (tts.tts_pid, lwpid, 0);
-      if (inf_ttrace_num_lwps == 0)
-	{
-	  /* Now that we're going to be multi-threaded, add the
-	     original thread to the list first.  */
-	  ti = add_thread (ptid_build (tts.tts_pid, tts.tts_lwpid, 0));
-	  ti->private =
-	    xmalloc (sizeof (struct inf_ttrace_private_thread_info));
-	  memset (ti->private, 0,
-		  sizeof (struct inf_ttrace_private_thread_info));
-	  inf_ttrace_num_lwps++;
-	}
       ti = add_thread (ptid);
       ti->private =
 	xmalloc (sizeof (struct inf_ttrace_private_thread_info));
@@ -976,7 +1040,12 @@ inf_ttrace_wait (ptid_t ptid, struct tar
 	      sizeof (struct inf_ttrace_private_thread_info));
       inf_ttrace_num_lwps++;
       ptid = ptid_build (tts.tts_pid, tts.tts_lwpid, 0);
-      break;
+      /* Let the lwp_create-caller thread continue.  */
+      ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
+              ptid_get_lwp (ptid), TT_NOPC, 0, 0);
+      /* Return without stopping the whole process.  */
+      ourstatus->kind = TARGET_WAITKIND_IGNORE;
+      return ptid;
 
     case TTEVT_LWP_EXIT:
       if (print_thread_events)
@@ -985,22 +1054,31 @@ inf_ttrace_wait (ptid_t ptid, struct tar
       gdb_assert (ti != NULL);
       ((struct inf_ttrace_private_thread_info *)ti->private)->dying = 1;
       inf_ttrace_num_lwps--;
+      /* Let the thread really exit.  */
       ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
               ptid_get_lwp (ptid), TT_NOPC, 0, 0);
-      /* If we don't return -1 here, core GDB will re-add the thread.  */
-      ptid = minus_one_ptid;
-      break;
+      /* Return without stopping the whole process.  */
+      ourstatus->kind = TARGET_WAITKIND_IGNORE;
+      return ptid;
 
     case TTEVT_LWP_TERMINATE:
       lwpid = tts.tts_u.tts_thread.tts_target_lwpid;
       ptid = ptid_build (tts.tts_pid, lwpid, 0);
-      printf_filtered(_("[%s has been terminated]\n"), target_pid_to_str (ptid));
+      if (print_thread_events)
+	printf_unfiltered(_("[%s has been terminated]\n"),
+			  target_pid_to_str (ptid));
       ti = find_thread_pid (ptid);
       gdb_assert (ti != NULL);
       ((struct inf_ttrace_private_thread_info *)ti->private)->dying = 1;
       inf_ttrace_num_lwps--;
+
+      /* Resume the lwp_terminate-caller thread.  */
       ptid = ptid_build (tts.tts_pid, tts.tts_lwpid, 0);
-      break;
+      ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
+              ptid_get_lwp (ptid), TT_NOPC, 0, 0);
+      /* Return without stopping the whole process.  */
+      ourstatus->kind = TARGET_WAITKIND_IGNORE;
+      return ptid;
 
     case TTEVT_SIGNAL:
       ourstatus->kind = TARGET_WAITKIND_STOPPED;
@@ -1045,10 +1123,15 @@ inf_ttrace_wait (ptid_t ptid, struct tar
   if (ttrace (TT_PROC_STOP, tts.tts_pid, 0, 0, 0, 0) == -1)
     perror_with_name (("ttrace"));
 
-  /* HACK: Twiddle INFERIOR_PTID such that the initial thread of a
-     process isn't recognized as a new thread.  */
-  if (ptid_get_lwp (inferior_ptid) == 0)
-    inferior_ptid = ptid;
+  /* Now that the whole process is stopped, check if any dying thread
+     is really dead by now.  If a dying thread is still alive, it will
+     be stopped too, and will still show up in `info threads', tagged
+     with "(Exiting)".  We could make `info threads' prune dead
+     threads instead via inf_ttrace_thread_alive, but doing this here
+     has the advantage that a frontend is notificed sooner of thread
+     exits.  Note that a dying lwp is still alive, it still has to be
+     resumed, like any other lwp.  */
+  iterate_over_threads (inf_ttrace_delete_dead_threads_callback, NULL);
 
   return ptid;
 }
@@ -1120,26 +1203,38 @@ inf_ttrace_files_info (struct target_ops
 static int
 inf_ttrace_thread_alive (ptid_t ptid)
 {
-  struct thread_info *ti;
-  ti = find_thread_pid (ptid);
-  return !(((struct inf_ttrace_private_thread_info *)ti->private)->dying);
+  return 1;
 }
 
+/* Return a string describing the state of the thread specified by
+   INFO.  */
+
 static char *
-inf_ttrace_pid_to_str (ptid_t ptid)
+inf_ttrace_extra_thread_info (struct thread_info *info)
 {
-  if (inf_ttrace_num_lwps > 0)
-    {
-      pid_t pid = ptid_get_pid (ptid);
-      lwpid_t lwpid = ptid_get_lwp (ptid);
-      static char buf[128];
+  struct inf_ttrace_private_thread_info* private =
+    (struct inf_ttrace_private_thread_info *) info->private;
 
-      xsnprintf (buf, sizeof buf, "process %ld, lwp %ld",
-		 (long)pid, (long)lwpid);
-      return buf;
-    }
+  if (private != NULL && private->dying)
+    return "Exiting";
+
+  return NULL;
+}
+
+static char *
+inf_ttrace_pid_to_str (ptid_t ptid)
+{
+  pid_t pid = ptid_get_pid (ptid);
+  lwpid_t lwpid = ptid_get_lwp (ptid);
+  static char buf[128];
 
-  return normal_pid_to_str (ptid);
+  if (lwpid == 0)
+    xsnprintf (buf, sizeof buf, "process %ld",
+	       (long) pid);
+  else
+    xsnprintf (buf, sizeof buf, "process %ld, lwp %ld",
+	       (long) pid, (long) lwpid);
+  return buf;
 }
 \f
 
@@ -1164,6 +1259,7 @@ inf_ttrace_target (void)
   t->to_follow_fork = inf_ttrace_follow_fork;
   t->to_mourn_inferior = inf_ttrace_mourn_inferior;
   t->to_thread_alive = inf_ttrace_thread_alive;
+  t->to_extra_thread_info = inf_ttrace_extra_thread_info;
   t->to_pid_to_str = inf_ttrace_pid_to_str;
   t->to_xfer_partial = inf_ttrace_xfer_partial;
 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the
  2008-08-10  0:36       ` [4/7] Adjust the ttrace target (HP-UX) to always register the John David Anglin
@ 2008-08-10 21:05         ` Pedro Alves
  2008-08-10 21:16           ` John David Anglin
  0 siblings, 1 reply; 25+ messages in thread
From: Pedro Alves @ 2008-08-10 21:05 UTC (permalink / raw)
  To: John David Anglin; +Cc: Daniel Jacobowitz, gdb-patches

On Sunday 10 August 2008 01:35:50, John David Anglin wrote:
> > My understanding is that 1/7, 2/7, 7/7 are approved; 3/7 has a small
> > discussion; 4/7 has a big discussion; and no one's looked at 5/7 or
> > 6/7 yet.  I'll look at those as soon as I get a chance, so poke me if
> > 3/7 and 4/7 are resolved and I haven't done it yet.
>

> I've just tried Pedro's second and third patches for 4/7.

Now *I've* lost track here.  I thought that the "ttrace: Protocal
error" / vla6.f90 bug we were resolving was independant of my 4/7 patch 
already registering a main thread (from this thread).

It so happens that I layered the bug fix on top of the 4/7 patch, so
I wouldn't have to readjust it if both patches were accepted.

-- 
Pedro Alves


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the
  2008-08-10 21:05         ` Pedro Alves
@ 2008-08-10 21:16           ` John David Anglin
  0 siblings, 0 replies; 25+ messages in thread
From: John David Anglin @ 2008-08-10 21:16 UTC (permalink / raw)
  To: Pedro Alves; +Cc: drow, gdb-patches

> > I've just tried Pedro's second and third patches for 4/7.
> 
> Now *I've* lost track here.  I thought that the "ttrace: Protocal
> error" / vla6.f90 bug we were resolving was independant of my 4/7 patch 
> already registering a main thread (from this thread).

Yes, I believe it is.  The protocol error appeared in testing your 4/7.
As such, the majority of my testing is with all three patches applied.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [4/7] Adjust the ttrace target (HP-UX) to always register the
  2008-08-10 21:04       ` John David Anglin
@ 2008-08-14 17:55         ` Daniel Jacobowitz
  0 siblings, 0 replies; 25+ messages in thread
From: Daniel Jacobowitz @ 2008-08-14 17:55 UTC (permalink / raw)
  To: John David Anglin; +Cc: pedro, gdb-patches

On Sun, Aug 10, 2008 at 05:03:54PM -0400, John David Anglin wrote:
> > Just FYI, I've completely lost track of this thread - so when there's
> > anything you need approval for, please let me know.
> 
> For the record, I have consolidated Pedro's three changes below.  This
> might make it easier to review the changes.

This combined patch (+changelog) is OK, assuming Pedro's happy with it
(I assume he is :-).

-- 
Daniel Jacobowitz
CodeSourcery


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
       [not found] <no.id>
  2008-08-08 19:30 ` ttrace: Protocal error John David Anglin
@ 2008-08-09 23:40 ` John David Anglin
  1 sibling, 0 replies; 25+ messages in thread
From: John David Anglin @ 2008-08-09 23:40 UTC (permalink / raw)
  To: John David Anglin; +Cc: pedro, gdb-patches

> (gdb) r
> Starting program: /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/libgomp/testsuite/vla6.x3g 

Trying some really old versions of gdb, I see this problem has been
around for a long time.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-09 22:51         ` Pedro Alves
@ 2008-08-09 23:19           ` John David Anglin
  0 siblings, 0 replies; 25+ messages in thread
From: John David Anglin @ 2008-08-09 23:19 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

> On Saturday 09 August 2008 23:45:23, Pedro Alves wrote:
> 
> > Hope I haven't broken anything badly.  I've never in my live logged in
> > to an HP-UX system, so wear sunglasses.
> 
> And of course, I meant to say that this applies on top of my other
> patch to register the main thread; and that this is an alternative
> patch for consideration.  It's fine with me to go the other simpler
> route, as per your recent patch.

I always work at a distance in case thing blow...

My patch isn't a perfect solution.  While EPROTO is the most
frequent error when resuming, I have also seen EINVAL and ESRCH.

Then, if I pound real hard trying to make vla6.x3g fail:

(gdb) r
Starting program: /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/libgomp/testsuite/vla6.x3g 
vfork: Resource temporarily unavailable.
(gdb) r
Starting program: /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/libgomp/testsuite/vla6.x3g 
vfork: Resource temporarily unavailable.
...

-bash-3.2$ ps -ef|grep vla|wc
73 585 8121

For some reason, the vla6.x3g processes aren't exiting when run under gdb
in spite of the fact that gdb says they exited normally.

I'll look at your patches.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-09 22:46       ` Pedro Alves
@ 2008-08-09 22:51         ` Pedro Alves
  2008-08-09 23:19           ` John David Anglin
  0 siblings, 1 reply; 25+ messages in thread
From: Pedro Alves @ 2008-08-09 22:51 UTC (permalink / raw)
  To: gdb-patches; +Cc: John David Anglin

On Saturday 09 August 2008 23:45:23, Pedro Alves wrote:

> Hope I haven't broken anything badly.  I've never in my live logged in
> to an HP-UX system, so wear sunglasses.

And of course, I meant to say that this applies on top of my other
patch to register the main thread; and that this is an alternative
patch for consideration.  It's fine with me to go the other simpler
route, as per your recent patch.

-- 
Pedro Alves


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-09 14:52     ` Pedro Alves
                         ` (2 preceding siblings ...)
  2008-08-09 22:46       ` Pedro Alves
@ 2008-08-09 22:48       ` Pedro Alves
  3 siblings, 0 replies; 25+ messages in thread
From: Pedro Alves @ 2008-08-09 22:48 UTC (permalink / raw)
  To: gdb-patches; +Cc: John David Anglin

[-- Attachment #1: Type: text/plain, Size: 660 bytes --]

On Saturday 09 August 2008 15:51:16, Pedro Alves wrote:
> Also, when we detect a TTEVT_LWP_CREATE, TTEVT_LWP_EXIT or
> TTEVT_LWP_TERMINATE, The ttrace docs indicate that only one lwp is
> stopped.  There's no reason to stop all lwps, and return
> TARGET_WAITKING_SPURIOUS, only to resume all lwps again.
> This just adds overhead and messes more with the
> scheduling of the inferior than needed.  We could just resume
> the stopped lwp, and return TARGET_WAITKIND_IGNORE.

Here's a patch to do this.  It applies on top of the other I just sent,
which itself applied on top of my other patch to always register
the main thread.

-- 
Pedro Alves

[-- Attachment #2: fix_daves_bug_part_2.diff --]
[-- Type: text/x-diff, Size: 2720 bytes --]

2008-08-09  Pedro Alves  <pedro@codesourcery.com>

	* inf-ttrace.c (inf_ttrace_wait): On TTEVT_LWP_CREATE and
	LWP_TERMINATE, resume the caller thread.  On TTEVT_LWP_CREATE,
	TTEVT_LWP_EXIT and TTEVT_LWP_TERMINATE, don't stop the whole
	process, and return TARGET_WAITKIND_IGNORE.

---
 gdb/inf-ttrace.c |   26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

Index: src/gdb/inf-ttrace.c
===================================================================
--- src.orig/gdb/inf-ttrace.c	2008-08-09 23:15:40.000000000 +0100
+++ src/gdb/inf-ttrace.c	2008-08-09 23:15:53.000000000 +0100
@@ -1041,7 +1041,12 @@ inf_ttrace_wait (ptid_t ptid, struct tar
 	      sizeof (struct inf_ttrace_private_thread_info));
       inf_ttrace_num_lwps++;
       ptid = ptid_build (tts.tts_pid, tts.tts_lwpid, 0);
-      break;
+      /* Let the lwp_create-caller thread continue.  */
+      ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
+              ptid_get_lwp (ptid), TT_NOPC, 0, 0);
+      /* Return without stopping the whole process.  */
+      ourstatus->kind = TARGET_WAITKIND_IGNORE;
+      return ptid;
 
     case TTEVT_LWP_EXIT:
       if (print_thread_events)
@@ -1050,22 +1055,31 @@ inf_ttrace_wait (ptid_t ptid, struct tar
       gdb_assert (ti != NULL);
       ((struct inf_ttrace_private_thread_info *)ti->private)->dying = 1;
       inf_ttrace_num_lwps--;
+      /* Let the thread really exit.  */
       ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
               ptid_get_lwp (ptid), TT_NOPC, 0, 0);
-      /* If we don't return -1 here, core GDB will re-add the thread.  */
-      ptid = minus_one_ptid;
-      break;
+      /* Return without stopping the whole process.  */
+      ourstatus->kind = TARGET_WAITKIND_IGNORE;
+      return ptid;
 
     case TTEVT_LWP_TERMINATE:
       lwpid = tts.tts_u.tts_thread.tts_target_lwpid;
       ptid = ptid_build (tts.tts_pid, lwpid, 0);
-      printf_filtered(_("[%s has been terminated]\n"), target_pid_to_str (ptid));
+      if (print_thread_events)
+	printf_unfiltered(_("[%s has been terminated]\n")
+			  target_pid_to_str (ptid));
       ti = find_thread_pid (ptid);
       gdb_assert (ti != NULL);
       ((struct inf_ttrace_private_thread_info *)ti->private)->dying = 1;
       inf_ttrace_num_lwps--;
+
+      /* Resume the lwp_terminate-caller thread.  */
       ptid = ptid_build (tts.tts_pid, tts.tts_lwpid, 0);
-      break;
+      ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
+              ptid_get_lwp (ptid), TT_NOPC, 0, 0);
+      /* Return without stopping the whole process.  */
+      ourstatus->kind = TARGET_WAITKIND_IGNORE;
+      return ptid;
 
     case TTEVT_SIGNAL:
       ourstatus->kind = TARGET_WAITKIND_STOPPED;

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-09 14:52     ` Pedro Alves
  2008-08-09 15:34       ` John David Anglin
  2008-08-09 18:49       ` John David Anglin
@ 2008-08-09 22:46       ` Pedro Alves
  2008-08-09 22:51         ` Pedro Alves
  2008-08-09 22:48       ` Pedro Alves
  3 siblings, 1 reply; 25+ messages in thread
From: Pedro Alves @ 2008-08-09 22:46 UTC (permalink / raw)
  To: gdb-patches; +Cc: John David Anglin

[-- Attachment #1: Type: text/plain, Size: 2804 bytes --]

On Saturday 09 August 2008 15:51:16, Pedro Alves wrote:


> inf_ttrace_wait ()
> ...
>       case TTEVT_LWP_EXIT:
>         if (print_thread_events)
>           printf_unfiltered (_("[%s exited]\n"), target_pid_to_str (ptid));
>         ti = find_thread_pid (ptid);
>         gdb_assert (ti != NULL);
>         ((struct inf_ttrace_private_thread_info *)ti->private)->dying = 1;
>         inf_ttrace_num_lwps--;
> (1)     ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
>               ptid_get_lwp (ptid), TT_NOPC, 0, 0);
>         /* If we don't return -1 here, core GDB will re-add the thread.  */
>         ptid = minus_one_ptid;
>         break;
> ...
>
>     /* Make sure all threads within the process are stopped.  */
> (2)  if (ttrace (TT_PROC_STOP, tts.tts_pid, 0, 0, 0, 0) == -1)
>        perror_with_name (("ttrace"));
>
>     return ptid;
>   }
>
>
> It seems to me, that for some reason, in most cases, the inferior was slow
> enough that when you reach (2), the dying thread hadn't exited
> yet.  The TT_PROC_STOP call stops all lwps of the process, the
> dying one included, I would think.  In that case, you still need the
> resume on the dying thread in inf_ttrace_wait.  Otherwise, you *may*
> get this bug back, depending on how the OS is waking waiting processes:


> So, to minimise the possible race, how about:
>
> - still try to resume a dying lwp.  Ignore the errno you
>   were originally seeing in that case (only).
> - on resume failure, delete it from GDBs thread table.
> - if by any chance, the lwp exits, and the inferior spawn a
>   new lwp, and the OS reuses the same lwpid of the lwp we knew
>   was dying, we delete the dying lwp, and add the new one.
>   If the OS is reusing the id, the original lwp has to be gone.
>   This is just an add_thread call, as that is already handled by it
>   internally (*).
> - If the thread is still alive, but is dying, let that show
>   in "info threads".  The linux pthread support implementation
>   also does this.

This is what the attached patch does.  In adition to what is
described above, I'm checking if any dying thread is now gone
after stopping the whole process.  I'm checking for lwp "aliveness"
with sending signal 0.  I hope it works as expected against
ttrace stopped threads, otherwise, I'd need another way to detect
if the lwp is still alive.

With this change, we no longer unconditionaly delete the dying 
lwps after the first resume.  This is to prevent that another event
that was already queued is handled and GDB stopping the whole process
before the dying thread having a chance to die.  In this case, we'll
still need another resume in the dying lwp -- until it really exits.

Hope I haven't broken anything badly.  I've never in my live logged in
to an HP-UX system, so wear sunglasses.

-- 
Pedro Alves

[-- Attachment #2: fix_daves_bug.diff --]
[-- Type: text/x-diff, Size: 6398 bytes --]

2008-08-09  Pedro Alves  <pedro@codesourcery.com>

	* inf-ttrace.c: Include <signal.h>
	(inf_ttrace_delete_dead_threads_callback): New.
	(inf_ttrace_resume_lwp): New.
	(inf_ttrace_resume_callback, inf_ttrace_resume): Rewrite.  Don't
	delete dying threads until they are really dead.
	(inf_ttrace_wait): After stopping the whole process, delete any
	dying thread that is really dead by now.
	(inf_ttrace_thread_alive): Return 1.
	(inf_ttrace_extra_thread_info): New.
	(inf_ttrace_target): Register inf_ttrace_extra_thread_info.

---
 gdb/inf-ttrace.c |  118 +++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 88 insertions(+), 30 deletions(-)

Index: src/gdb/inf-ttrace.c
===================================================================
--- src.orig/gdb/inf-ttrace.c	2008-08-09 15:10:27.000000000 +0100
+++ src/gdb/inf-ttrace.c	2008-08-09 23:24:52.000000000 +0100
@@ -33,6 +33,7 @@
 #include "gdb_string.h"
 #include <sys/mman.h>
 #include <sys/ttrace.h>
+#include <signal.h>
 
 #include "inf-child.h"
 #include "inf-ttrace.h"
@@ -801,52 +802,85 @@ inf_ttrace_kill (void)
   target_mourn_inferior ();
 }
 
+/* Check is a dying thread is dead by now, and delete it from GDBs
+   thread list if so.  */
 static int
-inf_ttrace_resume_callback (struct thread_info *info, void *arg)
+inf_ttrace_delete_dead_threads_callback (struct thread_info *info, void *arg)
 {
-  if (!ptid_equal (info->ptid, inferior_ptid) && !is_exited (info->ptid))
-    {
-      pid_t pid = ptid_get_pid (info->ptid);
-      lwpid_t lwpid = ptid_get_lwp (info->ptid);
+  lwpid_t lwpid;
+  struct inf_ttrace_private_thread_info *p;
 
-      if (ttrace (TT_LWP_CONTINUE, pid, lwpid, TT_NOPC, 0, 0) == -1)
-	perror_with_name (("ttrace"));
-    }
+  if (is_exited (info->ptid))
+    return 0;
+
+  lwpid = ptid_get_lwp (info->ptid);
+  p = (struct inf_ttrace_private_thread_info *) info->private;
+
+  /* Check if an lwp that was dying is still there or not.  */
+  if (p->dying && (kill (lwpid, 0) == -1))
+    /* It's gone now.  */
+    delete_thread (info->ptid);
 
   return 0;
 }
 
+/* Resume the lwp pointed to by INFO, with REQUEST, and pass it signal
+   SIG.  */
+
+static void
+inf_ttrace_resume_lwp (struct thread_info *info, ttreq_t request, int sig)
+{
+  pid_t pid = ptid_get_pid (info->ptid);
+  lwpid_t lwpid = ptid_get_lwp (info->ptid);
+
+  if (ttrace (request, pid, lwpid, TT_NOPC, sig, 0) == -1)
+    {
+      struct inf_ttrace_private_thread_info *p
+	= (struct inf_ttrace_private_thread_info *) info->private;
+      if (p->dying && errno == EPROTO)
+	/* This is expected, it means the dying lwp is really gone
+	   by now.  If ttrace had an event to inform the debugger
+	   the lwp is really gone, this wouldn't be needed.  */
+	delete_thread (info->ptid);
+      else
+	/* This was really unexpected.  */
+	perror_with_name (("ttrace"));
+    }
+}
+
+/* Callback for iterate_over_threads.  */
+
 static int
-inf_ttrace_delete_dying_threads_callback (struct thread_info *info, void *arg)
+inf_ttrace_resume_callback (struct thread_info *info, void *arg)
 {
-  if (((struct inf_ttrace_private_thread_info *)info->private)->dying == 1)
-    delete_thread (info->ptid);
+  if (!ptid_equal (info->ptid, inferior_ptid) && !is_exited (info->ptid))
+    inf_ttrace_resume_lwp (info, TT_LWP_CONTINUE, 0);
+
   return 0;
 }
 
 static void
 inf_ttrace_resume (ptid_t ptid, int step, enum target_signal signal)
 {
-  pid_t pid = ptid_get_pid (ptid);
-  lwpid_t lwpid = ptid_get_lwp (ptid);
+  int resume_all;
   ttreq_t request = step ? TT_LWP_SINGLE : TT_LWP_CONTINUE;
   int sig = target_signal_to_host (signal);
+  struct thread_info *info;
 
-  if (pid == -1)
-    {
-      pid = ptid_get_pid (inferior_ptid);
-      lwpid = ptid_get_lwp (inferior_ptid);
-    }
+  /* A specific PTID means `step only this process id'.  */
+  resume_all = (ptid_equal (ptid, minus_one_ptid));
 
-  if (ttrace (request, pid, lwpid, TT_NOPC, sig, 0) == -1)
-    perror_with_name (("ttrace"));
-
-  if (ptid_equal (ptid, minus_one_ptid))
-    {
-      /* Let all the other threads run too.  */
-      iterate_over_threads (inf_ttrace_resume_callback, NULL);
-      iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
-    }
+  /* If resuming all threads, it's the current thread that should be
+     handled specially.  */
+  if (resume_all)
+    ptid = inferior_ptid;
+
+  info = thread_find_pid (ptid);
+  inf_ttrace_resume_lwp (info, request, sig);
+
+  if (resume_all)
+    /* Let all the other threads run too.  */
+    iterate_over_threads (inf_ttrace_resume_callback, NULL);
 }
 
 static ptid_t
@@ -1075,6 +1109,16 @@ inf_ttrace_wait (ptid_t ptid, struct tar
   if (ttrace (TT_PROC_STOP, tts.tts_pid, 0, 0, 0, 0) == -1)
     perror_with_name (("ttrace"));
 
+  /* Now that the whole process is stopped, check if any dying thread
+     is really dead by now.  If a dying thread is still alive, it will
+     be stopped too, and will still show up in `info threads', tagged
+     with "(Exiting)".  We could make `info threads' prune dead
+     threads instead via inf_ttrace_thread_alive, but doing this here
+     has the advantage that a frontend is notificed sooner of thread
+     exits.  Note that a dying lwp is still alive, it still has to be
+     resumed, like any other lwp.  */
+  iterate_over_threads (inf_ttrace_delete_dead_threads_callback, NULL);
+
   return ptid;
 }
 
@@ -1145,9 +1189,22 @@ inf_ttrace_files_info (struct target_ops
 static int
 inf_ttrace_thread_alive (ptid_t ptid)
 {
-  struct thread_info *ti;
-  ti = find_thread_pid (ptid);
-  return !(((struct inf_ttrace_private_thread_info *)ti->private)->dying);
+  return 1;
+}
+
+/* Return a string describing the state of the thread specified by
+   INFO.  */
+
+static char *
+inf_ttrace_extra_thread_info (struct thread_info *info)
+{
+  struct inf_ttrace_private_thread_info* private =
+    (struct inf_ttrace_private_thread_info *) info->private;
+
+  if (private != NULL && private->dying)
+    return "Exiting";
+
+  return NULL;
 }
 
 static char *
@@ -1188,6 +1245,7 @@ inf_ttrace_target (void)
   t->to_follow_fork = inf_ttrace_follow_fork;
   t->to_mourn_inferior = inf_ttrace_mourn_inferior;
   t->to_thread_alive = inf_ttrace_thread_alive;
+  t->to_extra_thread_info = inf_ttrace_extra_thread_info;
   t->to_pid_to_str = inf_ttrace_pid_to_str;
   t->to_xfer_partial = inf_ttrace_xfer_partial;
 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-09 18:49       ` John David Anglin
@ 2008-08-09 22:45         ` Pedro Alves
  0 siblings, 0 replies; 25+ messages in thread
From: Pedro Alves @ 2008-08-09 22:45 UTC (permalink / raw)
  To: John David Anglin; +Cc: gdb-patches

On Saturday 09 August 2008 19:48:23, John David Anglin wrote:
> [process 5170, lwp 7674424 exited]
> infrun: infwait_normal_state
> infrun: TARGET_WAITKIND_SPURIOUS
> infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
> 0xc00268dc in ?? ()
> ttrace: Protocol error.

Ok, that clearly shows that's the first resume after a TTEVT_LWP_EXIT.
No TARGET_WAITKIND_SYSCALL_ENTRY involved after all.

Good, I can sleep better now.  ;-)

Thanks,

-- 
Pedro Alves


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-09 14:52     ` Pedro Alves
  2008-08-09 15:34       ` John David Anglin
@ 2008-08-09 18:49       ` John David Anglin
  2008-08-09 22:45         ` Pedro Alves
  2008-08-09 22:46       ` Pedro Alves
  2008-08-09 22:48       ` Pedro Alves
  3 siblings, 1 reply; 25+ messages in thread
From: John David Anglin @ 2008-08-09 18:49 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

[-- Attachment #1: Type: text/plain, Size: 1661 bytes --]

On Sat, 09 Aug 2008, Pedro Alves wrote:

> If you were really in a TARGET_WAITKIND_SYSCALL_ENTRY, this would be
> at least the second resume after the lwp exit.
> Maybe I am reading the backtrace wrong though.  If you have the patience,
> showing what GDB outputs when you do a "run" after setting
> "set debug infrun 1" would help.

Attached.

Ignoring protocol errors seems to work.  Used the following.  Note
there is a conflict with your change.

Index: inf-ttrace.c
===================================================================
RCS file: /cvs/src/src/gdb/inf-ttrace.c,v
retrieving revision 1.30
diff -u -3 -p -r1.30 inf-ttrace.c
--- inf-ttrace.c	9 Jul 2008 22:23:05 -0000	1.30
+++ inf-ttrace.c	9 Aug 2008 18:35:51 -0000
@@ -787,12 +804,16 @@ inf_ttrace_kill (void)
 static int
 inf_ttrace_resume_callback (struct thread_info *info, void *arg)
 {
-  if (!ptid_equal (info->ptid, inferior_ptid))
+  /* There is a race condition in detecting when a thread is stopped.
+     So, we need to resume "dying" threads and ignore protocol errors
+     that occur from resuming threads that aren't stopped.  */
+  if (!ptid_equal (info->ptid, inferior_ptid) && !is_exited (info->ptid))
     {
       pid_t pid = ptid_get_pid (info->ptid);
       lwpid_t lwpid = ptid_get_lwp (info->ptid);
 
-      if (ttrace (TT_LWP_CONTINUE, pid, lwpid, TT_NOPC, 0, 0) == -1)
+      if (ttrace (TT_LWP_CONTINUE, pid, lwpid, TT_NOPC, 0, 0) == -1
+	  && errno != EPROTO)
 	perror_with_name (("ttrace"));
     }
 
Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)

[-- Attachment #2: vla6.dbg.2 --]
[-- Type: text/plain, Size: 1427 bytes --]

(gdb) r
The program being debugged has been started already.
Start it from the beginning? (y or n) y

Starting program: /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/libgomp/testsuite/vla6.x3g 
infrun: wait_for_inferior (treat_exec_as_sigtrap=1)
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_STOPPED
infrun: stop_pc = 0x91d0
infrun: quietly stopped
infrun: stop_stepping
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: wait_for_inferior (treat_exec_as_sigtrap=1)
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_STOPPED
infrun: stop_pc = 0x19f8
infrun: quietly stopped
infrun: stop_stepping
warning: Private mapping of shared library text was not specified
by the executable; setting a breakpoint in a shared library which
is not privately mapped will not work.  See the HP-UX 11i v3 chatr
manpage for methods to privately map shared library text.
infrun: proceed (addr=0xffffffff, signal=0, step=0)
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: wait_for_inferior (treat_exec_as_sigtrap=0)
[New process 5170, lwp 7674424]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[process 5170, lwp 7674424 exited]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
0xc00268dc in ?? ()
ttrace: Protocol error.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-09 14:52     ` Pedro Alves
@ 2008-08-09 15:34       ` John David Anglin
  2008-08-09 18:49       ` John David Anglin
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 25+ messages in thread
From: John David Anglin @ 2008-08-09 15:34 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

[-- Attachment #1: Type: text/plain, Size: 2010 bytes --]

On Sat, 09 Aug 2008, Pedro Alves wrote:

> If I'm analising the race correctly, an alternative simpler solution
> to the above, would be to just ignore a failed resume on a
> dying thread, but still try to resume it.  Not resuming dying threads
> may be a problem.

It occured to me that my suggested change wasn't going to handle the
race associated with threads dying, although it improves the situation.

The ttrace documentation suggested that only stopped threads should
be resumed.  The thread info indicated the thread in question was
running.  I changed the code to only check for thread stopped.  However,
this didn't work.  My testcase hangs when run under gdb:

(gdb) r
The program being debugged has been started already.
Start it from the beginning? (y or n) y

Starting program: /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/libgomp/testsuite/vla6.x3g 
warning: Private mapping of shared library text was not specified
by the executable; setting a breakpoint in a shared library which
is not privately mapped will not work.  See the HP-UX 11i v3 chatr
manpage for methods to privately map shared library text.
[New process 2316, lwp 7552188]
[process 2316, lwp 7552188 exited]
[New process 2316, lwp 7552189]
[process 2316, lwp 7552189 exited]
[New process 2316, lwp 7552190]
[process 2316, lwp 7552190 exited]
[New process 2316, lwp 7552191]
[process 2316, lwp 7552191 exited]
[New process 2316, lwp 7552192]
[New process 2316, lwp 7552193]
[New process 2316, lwp 7552194]
[New process 2316, lwp 7552195]
[New process 2316, lwp 7552196]

Program received signal SIGINT, Interrupt.
[Switching to process 2316, lwp 7552196]
0xc03bea58 in __lwp_sema_wait () from /usr/lib/librt.2

So, there's apparently a race in detection when a program is stopped.
It looks like we need to try your suggestion(s).

Attached same with debug infrun 1.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)

[-- Attachment #2: vla6.dbg --]
[-- Type: text/plain, Size: 4141 bytes --]

(gdb) set debug infrun 1
(gdb) r
Starting program: /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/libgomp/testsuite/vla6.x3g 
infrun: wait_for_inferior (treat_exec_as_sigtrap=1)
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_STOPPED
infrun: stop_pc = 0x91d0
infrun: quietly stopped
infrun: stop_stepping
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: wait_for_inferior (treat_exec_as_sigtrap=1)
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_STOPPED
infrun: stop_pc = 0x19f8
infrun: quietly stopped
infrun: stop_stepping
warning: Private mapping of shared library text was not specified
by the executable; setting a breakpoint in a shared library which
is not privately mapped will not work.  See the HP-UX 11i v3 chatr
manpage for methods to privately map shared library text.
infrun: proceed (addr=0xffffffff, signal=0, step=0)
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: wait_for_inferior (treat_exec_as_sigtrap=0)
[New process 2557, lwp 7552464]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[process 2557, lwp 7552464 exited]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[New process 2557, lwp 7552465]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[process 2557, lwp 7552465 exited]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[New process 2557, lwp 7552466]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[process 2557, lwp 7552466 exited]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[New process 2557, lwp 7552467]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[process 2557, lwp 7552467 exited]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_STOPPED
infrun: stop_pc = 0x19e0
infrun: BPSTAT_WHAT_CHECK_SHLIBS
infrun: no stepping, continue
infrun: resume (step=1, signal=0), stepping_over_breakpoint=1
infrun: prepare_to_wait
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_STOPPED
infrun: stop_pc = 0x19e4
infrun: no stepping, continue
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[New process 2557, lwp 7552468]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[New process 2557, lwp 7552469]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[New process 2557, lwp 7552470]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[New process 2557, lwp 7552471]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
[New process 2557, lwp 7552472]
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_SPURIOUS
infrun: resume (step=0, signal=0), stepping_over_breakpoint=0
infrun: prepare_to_wait
infrun: infwait_normal_state
infrun: TARGET_WAITKIND_STOPPED
infrun: stop_pc = 0xc020c878
infrun: context switch
infrun: Switching context from process 2557, lwp 7552460 to process 2557, lwp 7552472
infrun: random signal 2

Program received signal SIGINT, Interrupt.
infrun: stop_stepping
[Switching to process 2557, lwp 7552472]
0xc020c878 in __ksleep () from /usr/lib/libc.2


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-08 20:16   ` John David Anglin
  2008-08-09 14:52     ` Pedro Alves
@ 2008-08-09 14:53     ` Joel Brobecker
  1 sibling, 0 replies; 25+ messages in thread
From: Joel Brobecker @ 2008-08-09 14:53 UTC (permalink / raw)
  To: John David Anglin; +Cc: pedro, gdb-patches

> 2008-08-08  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>
> 
> 	* inf-ttrace.c (inf_ttrace_resume_callback): Don't resume dying thread.

That looks OK, but did you check the effect on the testsuite results?
I want to make sure that we're not causing a program to hang at the end
just because a dying thread wasn't allowed to die.

Also, I wonder why we're deleting the "dying" threads from the thread list
at resume time:

   static void
   inf_ttrace_resume (ptid_t ptid, int step, enum target_signal signal)
   {
     [...]
     if (ptid_equal (ptid, minus_one_ptid) && inf_ttrace_num_lwps > 0)
       {
         /* Let all the other threads run too.  */
         iterate_over_threads (inf_ttrace_resume_callback, NULL);
         iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
       }
   }

-- 
Joel


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-08 20:16   ` John David Anglin
@ 2008-08-09 14:52     ` Pedro Alves
  2008-08-09 15:34       ` John David Anglin
                         ` (3 more replies)
  2008-08-09 14:53     ` Joel Brobecker
  1 sibling, 4 replies; 25+ messages in thread
From: Pedro Alves @ 2008-08-09 14:52 UTC (permalink / raw)
  To: John David Anglin; +Cc: gdb-patches

On Friday 08 August 2008 21:14:56, John David Anglin wrote:
> > Hmmm, the thread seems to have exited but state_ is still 1.
>
> The patch below seems to fix the problem.  I was finally able
> to catch an abort in vla6.f90.
>
> Ok?
>
>
>         * inf-ttrace.c (inf_ttrace_resume_callback): Don't resume dying
> thread.
>
> Index: inf-ttrace.c
> ===================================================================
> RCS file: /cvs/src/src/gdb/inf-ttrace.c,v
> retrieving revision 1.30
> diff -u -3 -p -r1.30 inf-ttrace.c
> --- inf-ttrace.c        9 Jul 2008 22:23:05 -0000       1.30
> +++ inf-ttrace.c        8 Aug 2008 19:57:13 -0000
> @@ -787,7 +804,9 @@ inf_ttrace_kill (void)
>  static int
>  inf_ttrace_resume_callback (struct thread_info *info, void *arg)
>  {
> -  if (!ptid_equal (info->ptid, inferior_ptid))
> +  if (!ptid_equal (info->ptid, inferior_ptid)
> +      && !((struct inf_ttrace_private_thread_info *)info->private)->dying
> +      && !is_exited (info->ptid))
>      {
>        pid_t pid = ptid_get_pid (info->ptid);
>        lwpid_t lwpid = ptid_get_lwp (info->ptid);

From your original backtrace, I'm still puzzled on how you got here.  When
we tag the thread as dying, we return TARGET_WAITKIND_SPURIOUS, which
triggers a resume.  This first resume should have removed the thread
from the thread list.  

If you were really in a TARGET_WAITKIND_SYSCALL_ENTRY, this would be
at least the second resume after the lwp exit.
Maybe I am reading the backtrace wrong though.  If you have the patience,
showing what GDB outputs when you do a "run" after setting
"set debug infrun 1" would help.

Looking again at the TTEVT_LWP_EXIT event, which tags the thread as dying.
When we get it, the lwp is not dead yet:

"TTEVT_LWP_EXIT

    This event flag indicates that the debugger wants to be notified when a 
thread is exiting via the lwp_exit() system call. The thread stops upon entry 
to the system call."

That's why we resume it immediatelly here,

inf_ttrace_wait ()
...
      case TTEVT_LWP_EXIT:
        if (print_thread_events)
          printf_unfiltered (_("[%s exited]\n"), target_pid_to_str (ptid));
        ti = find_thread_pid (ptid);
        gdb_assert (ti != NULL);
        ((struct inf_ttrace_private_thread_info *)ti->private)->dying = 1;
        inf_ttrace_num_lwps--;
(1)     ttrace (TT_LWP_CONTINUE, ptid_get_pid (ptid),
              ptid_get_lwp (ptid), TT_NOPC, 0, 0);
        /* If we don't return -1 here, core GDB will re-add the thread.  */
        ptid = minus_one_ptid;
        break;
...

    /* Make sure all threads within the process are stopped.  */
(2)  if (ttrace (TT_PROC_STOP, tts.tts_pid, 0, 0, 0, 0) == -1)
       perror_with_name (("ttrace"));

    return ptid;
  }


It seems to me, that for some reason, in most cases, the inferior was slow
enough that when you reach (2), the dying thread hadn't exited
yet.  The TT_PROC_STOP call stops all lwps of the process, the
dying one included, I would think.  In that case, you still need the
resume on the dying thread in inf_ttrace_wait.  Otherwise, you *may*
get this bug back, depending on how the OS is waking waiting processes:

 http://www.cygwin.com/ml/gdb-patches/2007-09/msg00238.html

 "So, if the dying thread is stopped, it should be resumed one last time.
 Otherwise, any other thread waiting for its death on pthread_join
 would be blocked forever (e.g. in attachment, a simple program which
 freezes when it is run under GDB)."

That would explain why you trip on this bug, but, Jerome didn't,
I guess.  It may be your testcase triggers the race better.

The way this is solved currently, is also racy.  We
resume all threads, dying threads included, and assume they
will die just shortly, so, we immediately delete dying
threads after resuming:

  if (ptid_equal (ptid, minus_one_ptid))
    {
      /* Let all the other threads run too.  */
      iterate_over_threads (inf_ttrace_resume_callback, NULL);
****  iterate_over_threads (inf_ttrace_delete_dying_threads_callback, NULL);
    }

It could happen, that under stress, GDB handles another event,
and stops all lwps, *before* the dying lwp having a chance
of exiting.  The symptom would again the be one Jerome was
fixing.  Having no idea on how much the thread still executes
after an TTEVT_LWP_EXIT event, I can't tell how likelly this
is to happen, say causing spurious testsuite failures.

The ttrace API doesn't have an "lwp is really gone from
the OS tables" event (sigh), but, it does have
a TTEVT_LWP_CREATE event.  This mean that we can detect
if the OS is reusing an lwpid at that point.

So, to minimise the possible race, how about:

- still try to resume a dying lwp.  Ignore the errno you
  were originally seeing in that case (only).
- on resume failure, delete it from GDBs thread table.
- if by any chance, the lwp exits, and the inferior spawn a
  new lwp, and the OS reuses the same lwpid of the lwp we knew
  was dying, we delete the dying lwp, and add the new one.
  If the OS is reusing the id, the original lwp has to be gone.
  This is just an add_thread call, as that is already handled by it
  internally (*).
- If the thread is still alive, but is dying, let that show
  in "info threads".  The linux pthread support implementation
  also does this.

I think the race below still exists, but it happens to
be innocuous:

- we know an lwp was dying, but, we didn't detect its exit,
  because we resumed it really exited.
- another thread hits breakpoint
- yet another thread spawns a new thread, which reuses the
  dying thread's lwpid.
- GDB is informed of the breakpoint hit.
- Since events are handled sequencially, we will only notice the
  new thread event after handling the breakpoint hit.
- the user resumes the inferior, after inspecting what happened
  at the breakpoint.
- GDB tries to resume the dying lwpid --- actually, this id belongs
  to a new thread by now.  Since we were handling a breakpoint, we
  had stopped the whole process, so this resume succeeds.  We consider
  that the lwp is still dying.
- GDB now handles the TTEVT_LWP_CREATE event, and adds it to GDB's
  thread list, which automatically gets rid of the dying lwp, due
  to PTID collision.

Also, when we detect a TTEVT_LWP_CREATE, TTEVT_LWP_EXIT or 
TTEVT_LWP_TERMINATE, The ttrace docs indicate that only one lwp is
stopped.  There's no reason to stop all lwps, and return
TARGET_WAITKING_SPURIOUS, only to resume all lwps again.
This just adds overhead and messes more with the
scheduling of the inferior than needed.  We could just resume
the stopped lwp, and return TARGET_WAITKIND_IGNORE.  But, we'd not
detect that the thread really is gone until the next whole inferior
resume.  I guess we could also detect if dying threads have already
exited after stopping the whole process and sending signal 0.
We can also do that in inf_ttrace_thread_alive (like inf_ptrace_thread_alive
does), so "info threads" also gets rid of dying threads that have already
really died.

Sorry, that came out longer that I was expecting.  Am I making any sense?
I can work up a patch for this.

If I'm analising the race correctly, an alternative simpler solution
to the above, would be to just ignore a failed resume on a
dying thread, but still try to resume it.  Not resuming dying threads
may be a problem.

-- 
Pedro Alves


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
  2008-08-08 19:30 ` ttrace: Protocal error John David Anglin
@ 2008-08-08 20:16   ` John David Anglin
  2008-08-09 14:52     ` Pedro Alves
  2008-08-09 14:53     ` Joel Brobecker
  0 siblings, 2 replies; 25+ messages in thread
From: John David Anglin @ 2008-08-08 20:16 UTC (permalink / raw)
  To: John David Anglin; +Cc: pedro, gdb-patches

> Hmmm, the thread seems to have exited but state_ is still 1.

The patch below seems to fix the problem.  I was finally able
to catch an abort in vla6.f90.

Ok?

(gdb) r
Starting program: /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/libgomp/testsuite/vla6.x3g
warning: Private mapping of shared library text was not specified
by the executable; setting a breakpoint in a shared library which
is not privately mapped will not work.  See the HP-UX 11i v3 chatr
manpage for methods to privately map shared library text.
[New process 6450, lwp 7134092]
[process 6450, lwp 7134092 exited]
[New process 6450, lwp 7134093]
[process 6450, lwp 7134093 exited]
[New process 6450, lwp 7134094]
[process 6450, lwp 7134094 exited]
[New process 6450, lwp 7134095]
[process 6450, lwp 7134095 exited]
[New process 6450, lwp 7134096]
[New process 6450, lwp 7134097]
[New process 6450, lwp 7134098]
[New process 6450, lwp 7134099]
[New process 6450, lwp 7134100]

Program received signal SIGABRT, Aborted.
0xc020da50 in kill () from /usr/lib/libc.2
(gdb) bt
#0  0xc020da50 in kill () from /usr/lib/libc.2
#1  0xc01a7f74 in raise () from /usr/lib/libc.2
#2  0xc01e92c8 in abort_C () from /usr/lib/libc.2
#3  0xc01e9324 in abort () from /usr/lib/libc.2
#4  0xc1c7ea20 in _gfortran_abort ()
    at ../../../gcc/libgfortran/intrinsics/abort.c:39
#5  0x00007d04 in foo.572 (c=<error reading variable>,
    d=<error reading variable>, e=<error reading variable>,
    f=<error reading variable>, g=<error reading variable>,
    h=<error reading variable>, i=<error reading variable>,
    j=<error reading variable>, k=<error reading variable>,
    n=<error reading variable>, _c=<error reading variable>,
    _d=<error reading variable>, _g=<error reading variable>,
    _h=<error reading variable>)
    at /mnt/gnu/gcc/gcc/libgomp/testsuite/libgomp.fortran/vla6.f90:175
#6  0x00007e58 in test.560 ()
    at /mnt/gnu/gcc/gcc/libgomp/testsuite/libgomp.fortran/vla6.f90:189
#7  0x00007ea8 in MAIN__ ()
    at /mnt/gnu/gcc/gcc/libgomp/testsuite/libgomp.fortran/vla6.f90:3
#8  0x00007f30 in main (argc=1, argv=0x7eff050c)
    at ../../../gcc/libgfortran/fmain.c:21
(gdb) info shared
From        To          Syms Read   Shared Object Library
0xc0010000  0xc0044084  Yes         /usr/lib/dld.sl
0xc0e1c000  0xc0e28000  Yes         /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/./libgomp/.libs/libgomp.sl.1
0xc1bc0000  0xc1c9c000  Yes         /mnt/gnu/gcc/objdir/hppa2.0w-hp-hpux11.11/./libgomp/../libgfortran/.libs/libgfortran.sl.3
0xc0e00000  0xc0e19000  Yes         /mnt/gnu/gcc/objdir/./gcc/libgcc_s.sl
0xc00c0000  0xc00ec000  Yes         /usr/lib/libm.2
0xc03bc000  0xc03c0000  Yes         /usr/lib/librt.2
0xc0050000  0xc0069000  Yes         /opt/langtools/lib/libpthread.1
0xc0100000  0xc024b000  Yes         /usr/lib/libc.2
0xc0005000  0xc0008000  Yes         /usr/lib/libdld.2
0xc0004000  0xc0005000  Yes         /opt/graphics/OpenGL/lib/libogltls.sl

This is with Pedro's change.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)

2008-08-08  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>

	* inf-ttrace.c (inf_ttrace_resume_callback): Don't resume dying thread.

Index: inf-ttrace.c
===================================================================
RCS file: /cvs/src/src/gdb/inf-ttrace.c,v
retrieving revision 1.30
diff -u -3 -p -r1.30 inf-ttrace.c
--- inf-ttrace.c	9 Jul 2008 22:23:05 -0000	1.30
+++ inf-ttrace.c	8 Aug 2008 19:57:13 -0000
@@ -787,7 +804,9 @@ inf_ttrace_kill (void)
 static int
 inf_ttrace_resume_callback (struct thread_info *info, void *arg)
 {
-  if (!ptid_equal (info->ptid, inferior_ptid))
+  if (!ptid_equal (info->ptid, inferior_ptid)
+      && !((struct inf_ttrace_private_thread_info *)info->private)->dying
+      && !is_exited (info->ptid))
     {
       pid_t pid = ptid_get_pid (info->ptid);
       lwpid_t lwpid = ptid_get_lwp (info->ptid);
 \f
 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: ttrace: Protocal error
       [not found] <no.id>
@ 2008-08-08 19:30 ` John David Anglin
  2008-08-08 20:16   ` John David Anglin
  2008-08-09 23:40 ` John David Anglin
  1 sibling, 1 reply; 25+ messages in thread
From: John David Anglin @ 2008-08-08 19:30 UTC (permalink / raw)
  To: John David Anglin; +Cc: pedro, gdb-patches

> [New process 20069]
> [New process 20069, lwp 7087826]

[New process 2692, lwp 7130128]
[process 2692, lwp 7130128 exited]

Breakpoint 1, perror_with_name (string=0x0) at ../../src/gdb/utils.c:847
847       err = safe_strerror (errno);
(gdb) frame 1
#1  0x000c9b08 in inf_ttrace_resume_callback (info=0x2319b0, arg=0x7b019048)
    at ../../src/gdb/inf-ttrace.c:813
813             perror_with_name (("ttrace"));
(gdb) p *(struct thread_info *)($r3 - 4)
$21 = {next = 0x4007bc90, ptid = {pid = 2692, lwp = 7130128, tid = 0},
  num = 2, executing_ = 0, state_ = 1, refcount = 0, prev_pc = 0,
  step_resume_breakpoint = 0x0, step_range_start = 0, step_range_end = 0,
  step_frame_id = {stack_addr = 0, code_addr = 0, special_addr = 0,
  stack_addr_p = 0, code_addr_p = 0, special_addr_p = 0}, current_line = 0,
  current_symtab = 0x0, trap_expected = 0, stepping_over_breakpoint = 0,
  stepping_through_solib_after_catch = 0,
  stepping_through_solib_catchpoints = 0x0, continuations = 0x0,
  intermediate_continuations = 0x0, proceed_to_finish = 0,
  step_over_calls = STEP_OVER_NONE, stop_step = 0, step_multi = 0,
  stop_signal = TARGET_SIGNAL_0, stop_bpstat = 0x0, private = 0x40076ac8}
(gdb) p (int)THREAD_EXITED
$22 = 2

Hmmm, the thread seems to have exited but state_ is still 1.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2008-08-14 17:55 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-08  1:36 [4/7] Adjust the ttrace target (HP-UX) to always register the main thread Pedro Alves
2008-08-08 16:41 ` John David Anglin
2008-08-08 17:24   ` Pedro Alves
2008-08-08 17:49     ` John David Anglin
2008-08-08 18:34     ` ttrace: Protocal error John David Anglin
2008-08-08 20:02       ` Pedro Alves
2008-08-08 20:49         ` John David Anglin
2008-08-10  0:15     ` [4/7] Adjust the ttrace target (HP-UX) to always register the main thread Daniel Jacobowitz
2008-08-10  0:36       ` [4/7] Adjust the ttrace target (HP-UX) to always register the John David Anglin
2008-08-10 21:05         ` Pedro Alves
2008-08-10 21:16           ` John David Anglin
2008-08-10 21:04       ` John David Anglin
2008-08-14 17:55         ` Daniel Jacobowitz
     [not found] <no.id>
2008-08-08 19:30 ` ttrace: Protocal error John David Anglin
2008-08-08 20:16   ` John David Anglin
2008-08-09 14:52     ` Pedro Alves
2008-08-09 15:34       ` John David Anglin
2008-08-09 18:49       ` John David Anglin
2008-08-09 22:45         ` Pedro Alves
2008-08-09 22:46       ` Pedro Alves
2008-08-09 22:51         ` Pedro Alves
2008-08-09 23:19           ` John David Anglin
2008-08-09 22:48       ` Pedro Alves
2008-08-09 14:53     ` Joel Brobecker
2008-08-09 23:40 ` John David Anglin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox