[PATCH] Fix failure to detach if threads exit while detaching on linux

Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed

* [PATCH] Fix failure to detach if threads exit while detaching on linux
@ 2016-06-02 14:16 Antoine Tremblay
  2016-06-03 11:43 ` Pedro Alves
  0 siblings, 1 reply; 13+ messages in thread
From: Antoine Tremblay @ 2016-06-02 14:16 UTC (permalink / raw)
  To: gdb-patches; +Cc: Antoine Tremblay

This patches fixes detaching on linux when some threads exit while we're
detaching with GDB and GDBServer.

What happened before is that as GDB/GDBserver would be detaching threads
one thread at a time and allowing them to continue, if one of these
detached threads called exit for example and the other threads were
destroyed GDB/GDBserver would still try and detach these exited threads
and fail with a message like: "Can't detach process." as ptrace could not
execute the operation.

This patch uses check_ptrace_stopped_lwp_gone or
linux_proc_pid_is_trace_stopped_nowarn like is used in the resume case to
avoid an error if this function detects that the ptrace failure is normal
since the thread has exited.

This patch adds the gdb.threads/detach-gone-thread.exp test for this case.

Tested on x86-linux with {unix, native-extended-gdbserver}

gdb/gdbserver/ChangeLog:

	* linux-low.c (check_ptrace_stopped_lwp_gone) Move up to be used
	by linux_detach_one_lwp.
	(linux_detach_one_lwp): Report the error only if
	check_ptrace_stopped_lwp_gone is false.

gdb/ChangeLog:

	* inf-ptrace.c: include nat/linux-procfs.h.
	(inf_ptrace_detach): Report an error only if the thread is not gone.
	* linux-nat.c (check_ptrace_stopped_lwp_gone): Move up to be used
	by detach_callback.
	(detach_callback): Report the error only if
	check_ptrace_stopped_lwp_gone is false.

gdb/testsuite/ChangeLog:

	* gdb.threads/detach-gone-thread.c: New file.
	* gdb.threads/detach-gone-thread.ex: New test.
---
 gdb/gdbserver/linux-low.c                        | 73 ++++++++++++------------
 gdb/inf-ptrace.c                                 |  6 +-
 gdb/linux-nat.c                                  | 69 +++++++++++-----------
 gdb/testsuite/gdb.threads/detach-gone-thread.c   | 45 +++++++++++++++
 gdb/testsuite/gdb.threads/detach-gone-thread.exp | 37 ++++++++++++
 5 files changed, 159 insertions(+), 71 deletions(-)
 create mode 100644 gdb/testsuite/gdb.threads/detach-gone-thread.c
 create mode 100644 gdb/testsuite/gdb.threads/detach-gone-thread.exp

diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 81134b0..5f02dab 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -1447,6 +1447,39 @@ get_detach_signal (struct thread_info *thread)
     }
 }
 
+/* Called when we try to resume or detach a stopped LWP and that errors
+   out.  If the LWP is no longer in ptrace-stopped state (meaning it's
+   zombie, or about to become), discard the error, clear any pending
+   status the LWP may have, and return true (we'll collect the exit status
+   soon enough).  Otherwise, return false.  */
+
+static int
+check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
+{
+  struct thread_info *thread = get_lwp_thread (lp);
+
+  /* If we get an error after resuming the LWP successfully, we'd
+     confuse !T state for the LWP being gone.  */
+  gdb_assert (lp->stopped);
+
+  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
+     because even if ptrace failed with ESRCH, the tracee may be "not
+     yet fully dead", but already refusing ptrace requests.  In that
+     case the tracee has 'R (Running)' state for a little bit
+     (observed in Linux 3.18).  See also the note on ESRCH in the
+     ptrace(2) man page.  Instead, check whether the LWP has any state
+     other than ptrace-stopped.  */
+
+  /* Don't assume anything if /proc/PID/status can't be read.  */
+  if (linux_proc_pid_is_trace_stopped_nowarn (lwpid_of (thread)) == 0)
+    {
+      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      lp->status_pending_p = 0;
+      return 1;
+    }
+  return 0;
+}
+
 static int
 linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
 {
@@ -1480,9 +1513,10 @@ linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
     the_low_target.prepare_to_resume (lwp);
   if (ptrace (PTRACE_DETACH, lwpid_of (thread), (PTRACE_TYPE_ARG3) 0,
 	      (PTRACE_TYPE_ARG4) (long) sig) < 0)
-    error (_("Can't detach %s: %s"),
-	   target_pid_to_str (ptid_of (thread)),
-	   strerror (errno));
+    if (!check_ptrace_stopped_lwp_gone (lwp))
+      error (_("Can't detach %s: %s"),
+	     target_pid_to_str (ptid_of (thread)),
+	     strerror (errno));
 
   delete_lwp (lwp);
   return 0;
@@ -4331,39 +4365,6 @@ linux_resume_one_lwp_throw (struct lwp_info *lwp,
   lwp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
 }
 
-/* Called when we try to resume a stopped LWP and that errors out.  If
-   the LWP is no longer in ptrace-stopped state (meaning it's zombie,
-   or about to become), discard the error, clear any pending status
-   the LWP may have, and return true (we'll collect the exit status
-   soon enough).  Otherwise, return false.  */
-
-static int
-check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
-{
-  struct thread_info *thread = get_lwp_thread (lp);
-
-  /* If we get an error after resuming the LWP successfully, we'd
-     confuse !T state for the LWP being gone.  */
-  gdb_assert (lp->stopped);
-
-  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
-     because even if ptrace failed with ESRCH, the tracee may be "not
-     yet fully dead", but already refusing ptrace requests.  In that
-     case the tracee has 'R (Running)' state for a little bit
-     (observed in Linux 3.18).  See also the note on ESRCH in the
-     ptrace(2) man page.  Instead, check whether the LWP has any state
-     other than ptrace-stopped.  */
-
-  /* Don't assume anything if /proc/PID/status can't be read.  */
-  if (linux_proc_pid_is_trace_stopped_nowarn (lwpid_of (thread)) == 0)
-    {
-      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
-      lp->status_pending_p = 0;
-      return 1;
-    }
-  return 0;
-}
-
 /* Like linux_resume_one_lwp_throw, but no error is thrown if the LWP
    disappears while we try to resume it.  */
 
diff --git a/gdb/inf-ptrace.c b/gdb/inf-ptrace.c
index 329d8fb..a668b7e 100644
--- a/gdb/inf-ptrace.c
+++ b/gdb/inf-ptrace.c
@@ -31,6 +31,7 @@
 #include "inf-ptrace.h"
 #include "inf-child.h"
 #include "gdbthread.h"
+#include "nat/linux-procfs.h"
 
 \f
 
@@ -259,7 +260,10 @@ inf_ptrace_detach (struct target_ops *ops, const char *args, int from_tty)
      started the process ourselves.  */
   errno = 0;
   ptrace (PT_DETACH, pid, (PTRACE_TYPE_ARG3)1, sig);
-  if (errno != 0)
+
+  /* Don't consider the error if /proc/PID/status can't be read.
+     See check_ptrace_stopped_lwp_gone.  */
+  if (errno != 0 && linux_proc_pid_is_trace_stopped_nowarn (pid) != 0)
     perror_with_name (("ptrace"));
 #else
   error (_("This system does not support detaching from a process"));
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
index e6d525f..8a517c8 100644
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -1382,6 +1382,38 @@ get_pending_status (struct lwp_info *lp, int *status)
   return 0;
 }
 
+/* Called when we try to resume or detach a stopped LWP and that errors
+   out.  If the LWP is no longer in ptrace-stopped state (meaning it's
+   zombie, or about to become), discard the error, clear any pending
+   status the LWP may have, and return true (we'll collect the exit status
+   soon enough).  Otherwise, return false.  */
+
+static int
+check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
+{
+  /* If we get an error after resuming the LWP successfully, we'd
+     confuse !T state for the LWP being gone.  */
+  gdb_assert (lp->stopped);
+
+  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
+     because even if ptrace failed with ESRCH, the tracee may be "not
+     yet fully dead", but already refusing ptrace requests.  In that
+     case the tracee has 'R (Running)' state for a little bit
+     (observed in Linux 3.18).  See also the note on ESRCH in the
+     ptrace(2) man page.  Instead, check whether the LWP has any state
+     other than ptrace-stopped.  */
+
+  /* Don't assume anything if /proc/PID/status can't be read.  */
+  if (linux_proc_pid_is_trace_stopped_nowarn (ptid_get_lwp (lp->ptid)) == 0)
+    {
+      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      lp->status = 0;
+      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
+      return 1;
+    }
+  return 0;
+}
+
 static int
 detach_callback (struct lwp_info *lp, void *data)
 {
@@ -1418,8 +1450,9 @@ detach_callback (struct lwp_info *lp, void *data)
       errno = 0;
       if (ptrace (PTRACE_DETACH, ptid_get_lwp (lp->ptid), 0,
 		  WSTOPSIG (status)) < 0)
-	error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
-	       safe_strerror (errno));
+	if (!check_ptrace_stopped_lwp_gone (lp))
+	  error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
+		 safe_strerror (errno));
 
       if (debug_linux_nat)
 	fprintf_unfiltered (gdb_stdlog,
@@ -1531,38 +1564,6 @@ linux_resume_one_lwp_throw (struct lwp_info *lp, int step,
   registers_changed_ptid (lp->ptid);
 }
 
-/* Called when we try to resume a stopped LWP and that errors out.  If
-   the LWP is no longer in ptrace-stopped state (meaning it's zombie,
-   or about to become), discard the error, clear any pending status
-   the LWP may have, and return true (we'll collect the exit status
-   soon enough).  Otherwise, return false.  */
-
-static int
-check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
-{
-  /* If we get an error after resuming the LWP successfully, we'd
-     confuse !T state for the LWP being gone.  */
-  gdb_assert (lp->stopped);
-
-  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
-     because even if ptrace failed with ESRCH, the tracee may be "not
-     yet fully dead", but already refusing ptrace requests.  In that
-     case the tracee has 'R (Running)' state for a little bit
-     (observed in Linux 3.18).  See also the note on ESRCH in the
-     ptrace(2) man page.  Instead, check whether the LWP has any state
-     other than ptrace-stopped.  */
-
-  /* Don't assume anything if /proc/PID/status can't be read.  */
-  if (linux_proc_pid_is_trace_stopped_nowarn (ptid_get_lwp (lp->ptid)) == 0)
-    {
-      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
-      lp->status = 0;
-      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
-      return 1;
-    }
-  return 0;
-}
-
 /* Like linux_resume_one_lwp_throw, but no error is thrown if the LWP
    disappears while we try to resume it.  */
 
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.c b/gdb/testsuite/gdb.threads/detach-gone-thread.c
new file mode 100644
index 0000000..08917a0
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.c
@@ -0,0 +1,45 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2016 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+pthread_barrier_t barrier;
+
+void *
+child_function (void *arg)
+{
+  pthread_barrier_wait (&barrier);
+  _exit(0);
+}
+
+int
+main ()
+{
+  pthread_t threads[256];
+  int res;
+  int i;
+
+  pthread_barrier_init (&barrier, NULL, 257);
+
+  for (i = 0; i < 256; i++)
+    res = pthread_create (&threads[i], NULL, child_function, NULL);
+
+  pthread_barrier_wait (&barrier);
+  exit(0);
+}
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.exp b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
new file mode 100644
index 0000000..41472ec
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
@@ -0,0 +1,37 @@
+# Copyright 2015-2016 Free Software Foundation, Inc.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+standard_testfile
+
+if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {debug}] != "" } {
+    return -1
+}
+
+clean_restart ${testfile}
+
+if ![runto_main] {
+    fail "Can't run to main to check for trace support"
+    return -1
+}
+
+if $use_gdb_stub {
+    # This test is about testing detaching from a process,
+    # so it doesn't make sense to run it if the target is stub-like.
+    unsupported "This test is not supported for GDB stub targets."
+    return -1
+}
+
+gdb_breakpoint "_exit"
+gdb_continue_to_breakpoint "_exit" ".*_exit.*"
+gdb_test "detach" "Detaching from .*, process $decimal"
-- 
2.8.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix failure to detach if threads exit while detaching on linux
  2016-06-02 14:16 [PATCH] Fix failure to detach if threads exit while detaching on linux Antoine Tremblay
@ 2016-06-03 11:43 ` Pedro Alves
  2016-06-03 11:55   ` Antoine Tremblay
  0 siblings, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2016-06-03 11:43 UTC (permalink / raw)
  To: Antoine Tremblay, gdb-patches

On 06/02/2016 03:16 PM, Antoine Tremblay wrote:

> gdb/ChangeLog:
> 
> 	* inf-ptrace.c: include nat/linux-procfs.h.
> 	(inf_ptrace_detach): Report an error only if the thread is not gone.

Can't do this.  inf-ptrace.c is a shared file used by non-Linux ports too.

> 	* gdb.threads/detach-gone-thread.c: New file.
> 	* gdb.threads/detach-gone-thread.ex: New test.

Typo: ".ex".  Say "New file" for that one too.

> +  _exit(0);

Space before parenthesis.

> +{
> +  pthread_t threads[256];
> +  int res;
> +  int i;
> +
> +  pthread_barrier_init (&barrier, NULL, 257);
> +
> +  for (i = 0; i < 256; i++)

Please add:

#define NTHREADS 256

and thus write:

  pthread_t threads[NTHREADS];
  int res;
  int i;

  pthread_barrier_init (&barrier, NULL, NTHREADS + 1);

  for (i = 0; i < NTHREADS; i++)


> +
> +if $use_gdb_stub {

Do this at the top, and call the new use_gdb_stub procedure instead:

if [use_gdb_stub] {

> +    # This test is about testing detaching from a process,
> +    # so it doesn't make sense to run it if the target is stub-like.
> +    unsupported "This test is not supported for GDB stub targets."

But in any case, I'm not so sure about this.  Stub targets can
detach too.  What problem did you find that needed this?

Thanks,
Pedro Alves


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 11:43 ` Pedro Alves
@ 2016-06-03 11:55   ` Antoine Tremblay
  2016-06-03 12:05     ` Pedro Alves
  0 siblings, 1 reply; 13+ messages in thread
From: Antoine Tremblay @ 2016-06-03 11:55 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Antoine Tremblay, gdb-patches


Pedro Alves writes:

> On 06/02/2016 03:16 PM, Antoine Tremblay wrote:
>
>> gdb/ChangeLog:
>> 
>> 	* inf-ptrace.c: include nat/linux-procfs.h.
>> 	(inf_ptrace_detach): Report an error only if the thread is not gone.
>
> Can't do this.  inf-ptrace.c is a shared file used by non-Linux ports too.
>

Oops I tought ptrace was linux specific, seems like bsd uses it too..

I think I can replace this with a try catch over  linux_ops->to_detach
(ops, args, from_tty); can call the rest of inf_ptrace_detach from there
I'll try it out...

>> 	* gdb.threads/detach-gone-thread.c: New file.
>> 	* gdb.threads/detach-gone-thread.ex: New test.
>
> Typo: ".ex".  Say "New file" for that one too.
>
>> +  _exit(0);
>

Done.

> Space before parenthesis.
>
>> +{
>> +  pthread_t threads[256];
>> +  int res;
>> +  int i;
>> +
>> +  pthread_barrier_init (&barrier, NULL, 257);
>> +
>> +  for (i = 0; i < 256; i++)
>
> Please add:
>
> #define NTHREADS 256
>
> and thus write:
>
>   pthread_t threads[NTHREADS];
>   int res;
>   int i;
>
>   pthread_barrier_init (&barrier, NULL, NTHREADS + 1);
>
>   for (i = 0; i < NTHREADS; i++)
>
>

Done.

>> +
>> +if $use_gdb_stub {
>
> Do this at the top, and call the new use_gdb_stub procedure instead:
>
> if [use_gdb_stub] {
>
>> +    # This test is about testing detaching from a process,
>> +    # so it doesn't make sense to run it if the target is stub-like.
>> +    unsupported "This test is not supported for GDB stub targets."
>
> But in any case, I'm not so sure about this.  Stub targets can
> detach too.  What problem did you find that needed this?
>

I did that because I though they could not detach...

Any suggestion on how to limit this to targets that can detach ?.. call
detach see if it suceeds or there's a better way ?

Thanks,
Antoine


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 11:55   ` Antoine Tremblay
@ 2016-06-03 12:05     ` Pedro Alves
  2016-06-03 14:49       ` [PATCH v2] " Antoine Tremblay
  0 siblings, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2016-06-03 12:05 UTC (permalink / raw)
  To: Antoine Tremblay; +Cc: gdb-patches

On 06/03/2016 12:55 PM, Antoine Tremblay wrote:

> I did that because I though they could not detach...
> 
> Any suggestion on how to limit this to targets that can detach ?.. 

I have trouble thinking of a target that wouldn't be able to detach.
And then it'd have to be a target that supports pthreads, otherwise
the test won't even run.

> call detach see if it suceeds or there's a better way ?

Right.  But I'd just start by assuming it works.  Just make
sure that --target_board=native-gdbserver passes.

Thanks,
Pedro Alves

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 12:05     ` Pedro Alves
@ 2016-06-03 14:49       ` Antoine Tremblay
  2016-06-03 15:16         ` Pedro Alves
  0 siblings, 1 reply; 13+ messages in thread
From: Antoine Tremblay @ 2016-06-03 14:49 UTC (permalink / raw)
  To: palves, gdb-patches; +Cc: Antoine Tremblay

In this v2:

Fixed some style issues,
Remove stub target requirement
Fixed include of nat/linux-procfs in inf-ptrace

--
This patches fixes detaching on linux when some threads exit while we're
detaching with GDB and GDBServer.

What happened before is that as GDB/GDBserver would be detaching threads
one thread at a time and allowing them to continue, if one of these
detached threads called exit for example and the other threads were
destroyed GDB/GDBserver would still try and detach these exited threads
and fail with a message like: "Can't detach process." as ptrace could not
execute the operation.

This patch uses check_ptrace_stopped_lwp_gone or
linux_proc_pid_is_trace_stopped_nowarn like is used in the resume case to
avoid an error if this function detects that the ptrace failure is normal
since the thread has exited.

This patch adds the gdb.threads/detach-gone-thread.exp test for this case.

Tested on x86-linux with {unix, native-gdbserver, native-extended-gdbserver}

gdb/gdbserver/ChangeLog:

	* linux-low.c (check_ptrace_stopped_lwp_gone) Move up to be used
	by linux_detach_one_lwp.
	(linux_detach_one_lwp): Report the error only if
	check_ptrace_stopped_lwp_gone is false.

gdb/ChangeLog:

	* linux-nat.c (check_ptrace_stopped_lwp_gone): Move up to be used
	by detach_callback.
	(detach_callback): Report the error only if
	check_ptrace_stopped_lwp_gone is false.
	(linux_nat_detach): Likewise.

gdb/testsuite/ChangeLog:

	* gdb.threads/detach-gone-thread.c: New file.
	* gdb.threads/detach-gone-thread.exp: New file.
---
 gdb/gdbserver/linux-low.c                        | 73 ++++++++++---------
 gdb/linux-nat.c                                  | 93 +++++++++++++++---------
 gdb/testsuite/gdb.threads/detach-gone-thread.c   | 47 ++++++++++++
 gdb/testsuite/gdb.threads/detach-gone-thread.exp | 52 +++++++++++++
 4 files changed, 193 insertions(+), 72 deletions(-)
 create mode 100644 gdb/testsuite/gdb.threads/detach-gone-thread.c
 create mode 100644 gdb/testsuite/gdb.threads/detach-gone-thread.exp

diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 81134b0..5f02dab 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -1447,6 +1447,39 @@ get_detach_signal (struct thread_info *thread)
     }
 }
 
+/* Called when we try to resume or detach a stopped LWP and that errors
+   out.  If the LWP is no longer in ptrace-stopped state (meaning it's
+   zombie, or about to become), discard the error, clear any pending
+   status the LWP may have, and return true (we'll collect the exit status
+   soon enough).  Otherwise, return false.  */
+
+static int
+check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
+{
+  struct thread_info *thread = get_lwp_thread (lp);
+
+  /* If we get an error after resuming the LWP successfully, we'd
+     confuse !T state for the LWP being gone.  */
+  gdb_assert (lp->stopped);
+
+  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
+     because even if ptrace failed with ESRCH, the tracee may be "not
+     yet fully dead", but already refusing ptrace requests.  In that
+     case the tracee has 'R (Running)' state for a little bit
+     (observed in Linux 3.18).  See also the note on ESRCH in the
+     ptrace(2) man page.  Instead, check whether the LWP has any state
+     other than ptrace-stopped.  */
+
+  /* Don't assume anything if /proc/PID/status can't be read.  */
+  if (linux_proc_pid_is_trace_stopped_nowarn (lwpid_of (thread)) == 0)
+    {
+      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      lp->status_pending_p = 0;
+      return 1;
+    }
+  return 0;
+}
+
 static int
 linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
 {
@@ -1480,9 +1513,10 @@ linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
     the_low_target.prepare_to_resume (lwp);
   if (ptrace (PTRACE_DETACH, lwpid_of (thread), (PTRACE_TYPE_ARG3) 0,
 	      (PTRACE_TYPE_ARG4) (long) sig) < 0)
-    error (_("Can't detach %s: %s"),
-	   target_pid_to_str (ptid_of (thread)),
-	   strerror (errno));
+    if (!check_ptrace_stopped_lwp_gone (lwp))
+      error (_("Can't detach %s: %s"),
+	     target_pid_to_str (ptid_of (thread)),
+	     strerror (errno));
 
   delete_lwp (lwp);
   return 0;
@@ -4331,39 +4365,6 @@ linux_resume_one_lwp_throw (struct lwp_info *lwp,
   lwp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
 }
 
-/* Called when we try to resume a stopped LWP and that errors out.  If
-   the LWP is no longer in ptrace-stopped state (meaning it's zombie,
-   or about to become), discard the error, clear any pending status
-   the LWP may have, and return true (we'll collect the exit status
-   soon enough).  Otherwise, return false.  */
-
-static int
-check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
-{
-  struct thread_info *thread = get_lwp_thread (lp);
-
-  /* If we get an error after resuming the LWP successfully, we'd
-     confuse !T state for the LWP being gone.  */
-  gdb_assert (lp->stopped);
-
-  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
-     because even if ptrace failed with ESRCH, the tracee may be "not
-     yet fully dead", but already refusing ptrace requests.  In that
-     case the tracee has 'R (Running)' state for a little bit
-     (observed in Linux 3.18).  See also the note on ESRCH in the
-     ptrace(2) man page.  Instead, check whether the LWP has any state
-     other than ptrace-stopped.  */
-
-  /* Don't assume anything if /proc/PID/status can't be read.  */
-  if (linux_proc_pid_is_trace_stopped_nowarn (lwpid_of (thread)) == 0)
-    {
-      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
-      lp->status_pending_p = 0;
-      return 1;
-    }
-  return 0;
-}
-
 /* Like linux_resume_one_lwp_throw, but no error is thrown if the LWP
    disappears while we try to resume it.  */
 
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
index e6d525f..1c8c82c 100644
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -1382,6 +1382,38 @@ get_pending_status (struct lwp_info *lp, int *status)
   return 0;
 }
 
+/* Called when we try to resume or detach a stopped LWP and that errors
+   out.  If the LWP is no longer in ptrace-stopped state (meaning it's
+   zombie, or about to become), discard the error, clear any pending
+   status the LWP may have, and return true (we'll collect the exit status
+   soon enough).  Otherwise, return false.  */
+
+static int
+check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
+{
+  /* If we get an error after resuming the LWP successfully, we'd
+     confuse !T state for the LWP being gone.  */
+  gdb_assert (lp->stopped);
+
+  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
+     because even if ptrace failed with ESRCH, the tracee may be "not
+     yet fully dead", but already refusing ptrace requests.  In that
+     case the tracee has 'R (Running)' state for a little bit
+     (observed in Linux 3.18).  See also the note on ESRCH in the
+     ptrace(2) man page.  Instead, check whether the LWP has any state
+     other than ptrace-stopped.  */
+
+  /* Don't assume anything if /proc/PID/status can't be read.  */
+  if (linux_proc_pid_is_trace_stopped_nowarn (ptid_get_lwp (lp->ptid)) == 0)
+    {
+      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      lp->status = 0;
+      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
+      return 1;
+    }
+  return 0;
+}
+
 static int
 detach_callback (struct lwp_info *lp, void *data)
 {
@@ -1418,8 +1450,9 @@ detach_callback (struct lwp_info *lp, void *data)
       errno = 0;
       if (ptrace (PTRACE_DETACH, ptid_get_lwp (lp->ptid), 0,
 		  WSTOPSIG (status)) < 0)
-	error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
-	       safe_strerror (errno));
+	if (!check_ptrace_stopped_lwp_gone (lp))
+	  error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
+		 safe_strerror (errno));
 
       if (debug_linux_nat)
 	fprintf_unfiltered (gdb_stdlog,
@@ -1480,7 +1513,6 @@ linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
 
   if (linux_nat_prepare_to_resume != NULL)
     linux_nat_prepare_to_resume (main_lwp);
-  delete_lwp (main_lwp->ptid);
 
   if (forks_exist_p ())
     {
@@ -1491,7 +1523,28 @@ linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
       linux_fork_detach (args, from_tty);
     }
   else
-    linux_ops->to_detach (ops, args, from_tty);
+    {
+      TRY
+	{
+	  linux_ops->to_detach (ops, args, from_tty);
+	}
+      CATCH (ex, RETURN_MASK_ERROR)
+	{
+	  if (!check_ptrace_stopped_lwp_gone (main_lwp))
+	    {
+	      delete_lwp (main_lwp->ptid);
+	      throw_exception (ex);
+	    }
+	  /* Ignore the error since the thread is gone already.  */
+	  else
+	    {
+	      inferior_ptid = null_ptid;
+	      detach_inferior (pid);
+	      inf_child_maybe_unpush_target (ops);
+	    }
+	}
+    }
+  delete_lwp (main_lwp->ptid);
 }
 
 /* Resume execution of the inferior process.  If STEP is nonzero,
@@ -1531,38 +1584,6 @@ linux_resume_one_lwp_throw (struct lwp_info *lp, int step,
   registers_changed_ptid (lp->ptid);
 }
 
-/* Called when we try to resume a stopped LWP and that errors out.  If
-   the LWP is no longer in ptrace-stopped state (meaning it's zombie,
-   or about to become), discard the error, clear any pending status
-   the LWP may have, and return true (we'll collect the exit status
-   soon enough).  Otherwise, return false.  */
-
-static int
-check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
-{
-  /* If we get an error after resuming the LWP successfully, we'd
-     confuse !T state for the LWP being gone.  */
-  gdb_assert (lp->stopped);
-
-  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
-     because even if ptrace failed with ESRCH, the tracee may be "not
-     yet fully dead", but already refusing ptrace requests.  In that
-     case the tracee has 'R (Running)' state for a little bit
-     (observed in Linux 3.18).  See also the note on ESRCH in the
-     ptrace(2) man page.  Instead, check whether the LWP has any state
-     other than ptrace-stopped.  */
-
-  /* Don't assume anything if /proc/PID/status can't be read.  */
-  if (linux_proc_pid_is_trace_stopped_nowarn (ptid_get_lwp (lp->ptid)) == 0)
-    {
-      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
-      lp->status = 0;
-      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
-      return 1;
-    }
-  return 0;
-}
-
 /* Like linux_resume_one_lwp_throw, but no error is thrown if the LWP
    disappears while we try to resume it.  */
 
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.c b/gdb/testsuite/gdb.threads/detach-gone-thread.c
new file mode 100644
index 0000000..19f6fc2
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.c
@@ -0,0 +1,47 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2016 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+pthread_barrier_t barrier;
+
+#define NTHREADS 256
+
+void *
+child_function (void *arg)
+{
+  pthread_barrier_wait (&barrier);
+  _exit (0);
+}
+
+int
+main ()
+{
+  pthread_t threads[NTHREADS];
+  int res;
+  int i;
+
+  pthread_barrier_init (&barrier, NULL, NTHREADS + 1);
+
+  for (i = 0; i < NTHREADS; i++)
+    res = pthread_create (&threads[i], NULL, child_function, NULL);
+
+  pthread_barrier_wait (&barrier);
+  exit (0);
+}
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.exp b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
new file mode 100644
index 0000000..812246c
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
@@ -0,0 +1,52 @@
+# Copyright 2015-2016 Free Software Foundation, Inc.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+standard_testfile
+
+if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {debug}] != "" } {
+    return -1
+}
+
+clean_restart ${testfile}
+
+if ![runto_main] {
+    fail "Can't run to main to check for trace support"
+    return -1
+}
+
+gdb_breakpoint "_exit"
+gdb_continue_to_breakpoint "_exit" ".*_exit.*"
+
+# Since the gdbserver-native board ends remote debugging on detach,
+# we can't use gdb_test as this function intercepts the Ending remote
+# debugging message and tries to restart gdb. However, it can't do that
+# as gdb_exit on this board will call monitor exit which will fail as
+# we're not remote debugging anymore after a detach in this case.
+send_gdb "detach\n"
+gdb_expect {
+    -re "Detaching from .*, process $decimal\r\nEnding remote debugging\." {
+	#Don't try to exit gdbserver see above comment.
+	set gdbserver_reconnect_p 1
+	pass "detach non-extended"
+    }
+    -re "Detaching from .*, process $decimal\r\n$gdb_prompt $" {
+	pass "detach"
+    }
+    -re ".*$gdb_prompt $" {
+        fail "detach"
+    }
+    timeout {
+	fail "detach timeout"
+    }
+}
-- 
2.8.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 14:49       ` [PATCH v2] " Antoine Tremblay
@ 2016-06-03 15:16         ` Pedro Alves
  2016-06-03 15:32           ` Antoine Tremblay
  0 siblings, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2016-06-03 15:16 UTC (permalink / raw)
  To: Antoine Tremblay, gdb-patches

On 06/03/2016 03:49 PM, Antoine Tremblay wrote:

> @@ -1491,7 +1523,28 @@ linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
>        linux_fork_detach (args, from_tty);
>      }
>    else
> -    linux_ops->to_detach (ops, args, from_tty);
> +    {
> +      TRY
> +	{
> +	  linux_ops->to_detach (ops, args, from_tty);
> +	}
> +      CATCH (ex, RETURN_MASK_ERROR)
> +	{
> +	  if (!check_ptrace_stopped_lwp_gone (main_lwp))
> +	    {
> +	      delete_lwp (main_lwp->ptid);

Is deleting the right thing to do?  I think we'll likely
crash, the next time we try to do something (e.g., try
to detach again).  Seems like on the gdbserver side,
the lwp isn't deleted in this case.

> +	      throw_exception (ex);
> +	    }
> +	  /* Ignore the error since the thread is gone already.  */
> +	  else
> +	    {
> +	      inferior_ptid = null_ptid;
> +	      detach_inferior (pid);
> +	      inf_child_maybe_unpush_target (ops);

Please factor these three lines out of inf_ptrace_attach into
a separate function, exported from inf-ptrace.h and called
from both places.  E.g.:

  extern void inf_ptrace_attach_success (struct target_ops *ops);

> +	    }
> +	}
> +    }
> +  delete_lwp (main_lwp->ptid);
>  }
>  

> +clean_restart ${testfile}
> +
> +if ![runto_main] {
> +    fail "Can't run to main to check for trace support"

"to check for trace support" is stale.

> +    return -1
> +}
> +
> +gdb_breakpoint "_exit"
> +gdb_continue_to_breakpoint "_exit" ".*_exit.*"
> +
> +# Since the gdbserver-native board ends remote debugging on detach,
> +# we can't use gdb_test as this function intercepts the Ending remote
> +# debugging message and tries to restart gdb.

Eh, that looks like a bogus think for gdb_test_multiple to be doing.

Seems like it's been there since "forever":

commit 19fa4a0af34ff07f260fc75e903ab92d8ca03d33
Author:     Mike Werner <mtw@cygnus>
AuthorDate: Sun Feb 21 20:03:55 1993 +0000

        * gdb/testsuite: Initial creation of gdb/testsuite.
        Migrated dejagnu testcases and support files for testing nm to
        gdb/testsuite from deja-gnu.  These files were moved "as is"
        with no modifications.  This migration is part of a major overhaul
        of dejagnu.  The modifications to these testcases, etc., which
        will allow them to work with the new version of dejagnu will be
        made in a future update.

I'd support just removing that bit.

In any case, there's no need to avoid gdb_test/gdb_test_multiple.
Any user-specified pattern overrides gdb_test_multiple's internal
patterns.  So you just need to intercept the  "Ending remote debugging"
message yourself.

> However, it can't do that
> +# as gdb_exit on this board will call monitor exit which will fail as
> +# we're not remote debugging anymore after a detach in this case.
> +send_gdb "detach\n"
> +gdb_expect {

set test "detach"
gdb_test_multiple $test $test {
    -re "Detaching from .*, process $decimal\r\nEnding remote debugging\.\r\n$gdb_prompt $" {
        # This is what you get with "target remote".
        pass $test
    }
    -re "Detaching from .*, process $decimal\r\n$gdb_prompt $" {
        pass $test
    }
}

> +    -re "Detaching from .*, process $decimal\r\nEnding remote debugging\." {

Missing $gdb_prompt match.

> +	#Don't try to exit gdbserver see above comment.
> +	set gdbserver_reconnect_p 1

I don't understand this one.  It shouldn't be needed.

> +	pass "detach non-extended"

Please use the same test message for all branches:

  pass $test

> +    }
> +    -re "Detaching from .*, process $decimal\r\n$gdb_prompt $" {
> +	pass "detach"
> +    }
> +    -re ".*$gdb_prompt $" {
> +        fail "detach"
> +    }
> +    timeout {
> +	fail "detach timeout"

FYI, this would have been the standard:

> +	fail "$test (timeout)"

> +    }
> +}
> 

Thanks,
Pedro Alves


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 15:16         ` Pedro Alves
@ 2016-06-03 15:32           ` Antoine Tremblay
  2016-06-03 15:45             ` Pedro Alves
  0 siblings, 1 reply; 13+ messages in thread
From: Antoine Tremblay @ 2016-06-03 15:32 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Antoine Tremblay, gdb-patches


Pedro Alves writes:

> On 06/03/2016 03:49 PM, Antoine Tremblay wrote:
>
>> @@ -1491,7 +1523,28 @@ linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
>>        linux_fork_detach (args, from_tty);
>>      }
>>    else
>> -    linux_ops->to_detach (ops, args, from_tty);
>> +    {
>> +      TRY
>> +	{
>> +	  linux_ops->to_detach (ops, args, from_tty);
>> +	}
>> +      CATCH (ex, RETURN_MASK_ERROR)
>> +	{
>> +	  if (!check_ptrace_stopped_lwp_gone (main_lwp))
>> +	    {
>> +	      delete_lwp (main_lwp->ptid);
>
> Is deleting the right thing to do?  I think we'll likely
> crash, the next time we try to do something (e.g., try
> to detach again).  Seems like on the gdbserver side,
> the lwp isn't deleted in this case.
>

Indeed, OK.

>> +	      throw_exception (ex);
>> +	    }
>> +	  /* Ignore the error since the thread is gone already.  */
>> +	  else
>> +	    {
>> +	      inferior_ptid = null_ptid;
>> +	      detach_inferior (pid);
>> +	      inf_child_maybe_unpush_target (ops);
>
> Please factor these three lines out of inf_ptrace_attach into
> a separate function, exported from inf-ptrace.h and called
> from both places.  E.g.:
>

OK.

>   extern void inf_ptrace_attach_success (struct target_ops *ops);
>

OK. (detach_success)

>> +	    }
>> +	}
>> +    }
>> +  delete_lwp (main_lwp->ptid);
>>  }
>>  
>
>> +clean_restart ${testfile}
>> +
>> +if ![runto_main] {
>> +    fail "Can't run to main to check for trace support"
>
> "to check for trace support" is stale.
>

Oops thanks.

>> +    return -1
>> +}
>> +
>> +gdb_breakpoint "_exit"
>> +gdb_continue_to_breakpoint "_exit" ".*_exit.*"
>> +
>> +# Since the gdbserver-native board ends remote debugging on detach,
>> +# we can't use gdb_test as this function intercepts the Ending remote
>> +# debugging message and tries to restart gdb.
>
> Eh, that looks like a bogus think for gdb_test_multiple to be doing.
>
> Seems like it's been there since "forever":
>
> commit 19fa4a0af34ff07f260fc75e903ab92d8ca03d33
> Author:     Mike Werner <mtw@cygnus>
> AuthorDate: Sun Feb 21 20:03:55 1993 +0000
>
>         * gdb/testsuite: Initial creation of gdb/testsuite.
>         Migrated dejagnu testcases and support files for testing nm to
>         gdb/testsuite from deja-gnu.  These files were moved "as is"
>         with no modifications.  This migration is part of a major overhaul
>         of dejagnu.  The modifications to these testcases, etc., which
>         will allow them to work with the new version of dejagnu will be
>         made in a future update.
>
> I'd support just removing that bit.
>
> In any case, there's no need to avoid gdb_test/gdb_test_multiple.
> Any user-specified pattern overrides gdb_test_multiple's internal
> patterns.  So you just need to intercept the  "Ending remote debugging"
> message yourself.
>

Right, I tested that before without sucess but had not the perfect regex
yet...

I'll use this now. thanks.

>> However, it can't do that
>> +# as gdb_exit on this board will call monitor exit which will fail as
>> +# we're not remote debugging anymore after a detach in this case.
>> +send_gdb "detach\n"
>> +gdb_expect {
>
> set test "detach"
> gdb_test_multiple $test $test {
>     -re "Detaching from .*, process $decimal\r\nEnding remote debugging\.\r\n$gdb_prompt $" {
>         # This is what you get with "target remote".
>         pass $test
>     }
>     -re "Detaching from .*, process $decimal\r\n$gdb_prompt $" {
>         pass $test
>     }
> }
>
>> +    -re "Detaching from .*, process $decimal\r\nEnding remote debugging\." {
>
> Missing $gdb_prompt match.
>

OK.

>> +	#Don't try to exit gdbserver see above comment.
>> +	set gdbserver_reconnect_p 1
>
> I don't understand this one.  It shouldn't be needed.
>

This one is needed since on the teardown of the .exp test gdb_exit is
called and calls monitor_exit which fails with a timeout this avoids
this and the original gdb_exit is called.

>> +	pass "detach non-extended"
>
> Please use the same test message for all branches:
>
>   pass $test
>
>> +    }
>> +    -re "Detaching from .*, process $decimal\r\n$gdb_prompt $" {
>> +	pass "detach"
>> +    }
>> +    -re ".*$gdb_prompt $" {
>> +        fail "detach"
>> +    }
>> +    timeout {
>> +	fail "detach timeout"
>
> FYI, this would have been the standard:
>
>> +	fail "$test (timeout)"
>
>> +    }
>> +}
>>
Noted, thx.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 15:32           ` Antoine Tremblay
@ 2016-06-03 15:45             ` Pedro Alves
  2016-06-03 15:59               ` Antoine Tremblay
  0 siblings, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2016-06-03 15:45 UTC (permalink / raw)
  To: Antoine Tremblay; +Cc: gdb-patches

On 06/03/2016 04:32 PM, Antoine Tremblay wrote:
>>> >> +	#Don't try to exit gdbserver see above comment.
>>> >> +	set gdbserver_reconnect_p 1
>> >
>> > I don't understand this one.  It shouldn't be needed.
>> >
> This one is needed since on the teardown of the .exp test gdb_exit is
> called and calls monitor_exit which fails with a timeout this avoids
> this and the original gdb_exit is called.

Then that strikes me as the wrong thing to do.  We should instead
expect that gdbserver exits cleanly, and then clear server_spawn_id.

E.g.:

proc test_server_exit {} {
  global server_spawn_id
  if ![info exists server_spawn_id] {
    return
  }
  
  set test "server exits"
  gdb_expect {
     -i $server_spawn_id
     eof {
        pass $test
	wait -i $server_spawn_id
        unset server_spawn_id
     }
     timeout {
       fail "$test (timeout)"
     }
   }
  }
}

and then:

    -re "Detaching from .*, process $decimal\r\nEnding remote debugging\.\r\n$gdb_prompt $" {
        # This is what you get with "target remote".
        pass $test
        # If testing with gdbserver, make sure it manages to exit.
        test_server_exit
    }

Thanks,
Pedro Alves


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 15:45             ` Pedro Alves
@ 2016-06-03 15:59               ` Antoine Tremblay
  2016-06-03 16:21                 ` Pedro Alves
  0 siblings, 1 reply; 13+ messages in thread
From: Antoine Tremblay @ 2016-06-03 15:59 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Antoine Tremblay, gdb-patches


Pedro Alves writes:

> On 06/03/2016 04:32 PM, Antoine Tremblay wrote:
>>>> >> +	#Don't try to exit gdbserver see above comment.
>>>> >> +	set gdbserver_reconnect_p 1
>>> >
>>> > I don't understand this one.  It shouldn't be needed.
>>> >
>> This one is needed since on the teardown of the .exp test gdb_exit is
>> called and calls monitor_exit which fails with a timeout this avoids
>> this and the original gdb_exit is called.
>
> Then that strikes me as the wrong thing to do.  We should instead
> expect that gdbserver exits cleanly, and then clear server_spawn_id.
>
> E.g.:
>
> proc test_server_exit {} {
>   global server_spawn_id
>   if ![info exists server_spawn_id] {
>     return
>   }
>   
>   set test "server exits"
>   gdb_expect {
>      -i $server_spawn_id
>      eof {
>         pass $test
> 	wait -i $server_spawn_id
>         unset server_spawn_id
>      }
>      timeout {
>        fail "$test (timeout)"
>      }
>    }
>   }
> }
>
> and then:
>
>     -re "Detaching from .*, process $decimal\r\nEnding remote debugging\.\r\n$gdb_prompt $" {
>         # This is what you get with "target remote".
>         pass $test
>         # If testing with gdbserver, make sure it manages to exit.
>         test_server_exit
>     }
>

Thanks does seems much better !

However, testing it it triggers the test_server_exit timeout case, I'm
guessing because eof already happened and we're waiting for nothing ?

I'm tempted just to unset the var... but ideas are welcome..

Regards,
Antoine


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 15:59               ` Antoine Tremblay
@ 2016-06-03 16:21                 ` Pedro Alves
  2016-06-03 16:43                   ` [PATCH v3] " Antoine Tremblay
  0 siblings, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2016-06-03 16:21 UTC (permalink / raw)
  To: Antoine Tremblay; +Cc: gdb-patches

On 06/03/2016 04:58 PM, Antoine Tremblay wrote:

> However, testing it it triggers the test_server_exit timeout case, I'm
> guessing because eof already happened and we're waiting for nothing ?
> 
> I'm tempted just to unset the var... but ideas are welcome..

Would be easier to try if I had access to an updated patch.. :-)

Thanks,
Pedro Alves


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v3] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 16:21                 ` Pedro Alves
@ 2016-06-03 16:43                   ` Antoine Tremblay
  2016-06-03 19:15                     ` Pedro Alves
  0 siblings, 1 reply; 13+ messages in thread
From: Antoine Tremblay @ 2016-06-03 16:43 UTC (permalink / raw)
  To: palves, gdb-patches; +Cc: Antoine Tremblay

Updated patch...

Thanks for looking at this!

Antoine
--

This patches fixes detaching on linux when some threads exit while we're
detaching with GDB and GDBServer.

What happened before is that as GDB/GDBserver would be detaching threads
one thread at a time and allowing them to continue, if one of these
detached threads called exit for example and the other threads were
destroyed GDB/GDBserver would still try and detach these exited threads
and fail with a message like: "Can't detach process." as ptrace could not
execute the operation.

This patch uses check_ptrace_stopped_lwp_gone or
linux_proc_pid_is_trace_stopped_nowarn like is used in the resume case to
avoid an error if this function detects that the ptrace failure is normal
since the thread has exited.

This patch adds the gdb.threads/detach-gone-thread.exp test for this case.

Tested on x86-linux with {unix, native-gdbserver, native-extended-gdbserver}

gdb/gdbserver/ChangeLog:

	* linux-low.c (check_ptrace_stopped_lwp_gone) Move up to be used
	by linux_detach_one_lwp.
	(linux_detach_one_lwp): Report the error only if
	check_ptrace_stopped_lwp_gone is false.

gdb/ChangeLog:

	* inf-ptrace.c (inf_ptrace_detach): Call inf_ptrace_detach_success
	on success.
	(inf_ptrace_detach_success): New function.
	* inf-ptrace.h (inf_ptrace_detach_success): New function declaration.
	* linux-nat.c (check_ptrace_stopped_lwp_gone): Move up to be used
	by detach_callback.
	(detach_callback): Report the error only if
	check_ptrace_stopped_lwp_gone is false.
	(linux_nat_detach): Likewise.

gdb/testsuite/ChangeLog:

	* gdb.threads/detach-gone-thread.c: New file.
	* gdb.threads/detach-gone-thread.exp: New file.
---
 gdb/gdbserver/linux-low.c                        | 73 +++++++++----------
 gdb/inf-ptrace.c                                 | 10 ++-
 gdb/inf-ptrace.h                                 |  4 ++
 gdb/linux-nat.c                                  | 90 ++++++++++++++----------
 gdb/testsuite/gdb.threads/detach-gone-thread.c   | 47 +++++++++++++
 gdb/testsuite/gdb.threads/detach-gone-thread.exp | 59 ++++++++++++++++
 6 files changed, 210 insertions(+), 73 deletions(-)
 create mode 100644 gdb/testsuite/gdb.threads/detach-gone-thread.c
 create mode 100644 gdb/testsuite/gdb.threads/detach-gone-thread.exp

diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 81134b0..5f02dab 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -1447,6 +1447,39 @@ get_detach_signal (struct thread_info *thread)
     }
 }
 
+/* Called when we try to resume or detach a stopped LWP and that errors
+   out.  If the LWP is no longer in ptrace-stopped state (meaning it's
+   zombie, or about to become), discard the error, clear any pending
+   status the LWP may have, and return true (we'll collect the exit status
+   soon enough).  Otherwise, return false.  */
+
+static int
+check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
+{
+  struct thread_info *thread = get_lwp_thread (lp);
+
+  /* If we get an error after resuming the LWP successfully, we'd
+     confuse !T state for the LWP being gone.  */
+  gdb_assert (lp->stopped);
+
+  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
+     because even if ptrace failed with ESRCH, the tracee may be "not
+     yet fully dead", but already refusing ptrace requests.  In that
+     case the tracee has 'R (Running)' state for a little bit
+     (observed in Linux 3.18).  See also the note on ESRCH in the
+     ptrace(2) man page.  Instead, check whether the LWP has any state
+     other than ptrace-stopped.  */
+
+  /* Don't assume anything if /proc/PID/status can't be read.  */
+  if (linux_proc_pid_is_trace_stopped_nowarn (lwpid_of (thread)) == 0)
+    {
+      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      lp->status_pending_p = 0;
+      return 1;
+    }
+  return 0;
+}
+
 static int
 linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
 {
@@ -1480,9 +1513,10 @@ linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
     the_low_target.prepare_to_resume (lwp);
   if (ptrace (PTRACE_DETACH, lwpid_of (thread), (PTRACE_TYPE_ARG3) 0,
 	      (PTRACE_TYPE_ARG4) (long) sig) < 0)
-    error (_("Can't detach %s: %s"),
-	   target_pid_to_str (ptid_of (thread)),
-	   strerror (errno));
+    if (!check_ptrace_stopped_lwp_gone (lwp))
+      error (_("Can't detach %s: %s"),
+	     target_pid_to_str (ptid_of (thread)),
+	     strerror (errno));
 
   delete_lwp (lwp);
   return 0;
@@ -4331,39 +4365,6 @@ linux_resume_one_lwp_throw (struct lwp_info *lwp,
   lwp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
 }
 
-/* Called when we try to resume a stopped LWP and that errors out.  If
-   the LWP is no longer in ptrace-stopped state (meaning it's zombie,
-   or about to become), discard the error, clear any pending status
-   the LWP may have, and return true (we'll collect the exit status
-   soon enough).  Otherwise, return false.  */
-
-static int
-check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
-{
-  struct thread_info *thread = get_lwp_thread (lp);
-
-  /* If we get an error after resuming the LWP successfully, we'd
-     confuse !T state for the LWP being gone.  */
-  gdb_assert (lp->stopped);
-
-  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
-     because even if ptrace failed with ESRCH, the tracee may be "not
-     yet fully dead", but already refusing ptrace requests.  In that
-     case the tracee has 'R (Running)' state for a little bit
-     (observed in Linux 3.18).  See also the note on ESRCH in the
-     ptrace(2) man page.  Instead, check whether the LWP has any state
-     other than ptrace-stopped.  */
-
-  /* Don't assume anything if /proc/PID/status can't be read.  */
-  if (linux_proc_pid_is_trace_stopped_nowarn (lwpid_of (thread)) == 0)
-    {
-      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
-      lp->status_pending_p = 0;
-      return 1;
-    }
-  return 0;
-}
-
 /* Like linux_resume_one_lwp_throw, but no error is thrown if the LWP
    disappears while we try to resume it.  */
 
diff --git a/gdb/inf-ptrace.c b/gdb/inf-ptrace.c
index 329d8fb..27f531b 100644
--- a/gdb/inf-ptrace.c
+++ b/gdb/inf-ptrace.c
@@ -265,9 +265,17 @@ inf_ptrace_detach (struct target_ops *ops, const char *args, int from_tty)
   error (_("This system does not support detaching from a process"));
 #endif
 
+  inf_ptrace_detach_success(ops);
+}
+
+/* See inf-ptrace.h.  */
+
+void
+inf_ptrace_detach_success (struct target_ops *ops)
+{
+  pid_t pid = ptid_get_pid (inferior_ptid);
   inferior_ptid = null_ptid;
   detach_inferior (pid);
-
   inf_child_maybe_unpush_target (ops);
 }
 
diff --git a/gdb/inf-ptrace.h b/gdb/inf-ptrace.h
index 0a26720..f1fc111 100644
--- a/gdb/inf-ptrace.h
+++ b/gdb/inf-ptrace.h
@@ -38,4 +38,8 @@ extern struct target_ops *
 
 extern pid_t get_ptrace_pid (ptid_t);
 
+
+/* Cleanup the inferior after a successful ptrace detach.  */
+extern void inf_ptrace_detach_success (struct target_ops *ops);
+
 #endif
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
index e6d525f..56a222c 100644
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -1382,6 +1382,38 @@ get_pending_status (struct lwp_info *lp, int *status)
   return 0;
 }
 
+/* Called when we try to resume or detach a stopped LWP and that errors
+   out.  If the LWP is no longer in ptrace-stopped state (meaning it's
+   zombie, or about to become), discard the error, clear any pending
+   status the LWP may have, and return true (we'll collect the exit status
+   soon enough).  Otherwise, return false.  */
+
+static int
+check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
+{
+  /* If we get an error after resuming the LWP successfully, we'd
+     confuse !T state for the LWP being gone.  */
+  gdb_assert (lp->stopped);
+
+  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
+     because even if ptrace failed with ESRCH, the tracee may be "not
+     yet fully dead", but already refusing ptrace requests.  In that
+     case the tracee has 'R (Running)' state for a little bit
+     (observed in Linux 3.18).  See also the note on ESRCH in the
+     ptrace(2) man page.  Instead, check whether the LWP has any state
+     other than ptrace-stopped.  */
+
+  /* Don't assume anything if /proc/PID/status can't be read.  */
+  if (linux_proc_pid_is_trace_stopped_nowarn (ptid_get_lwp (lp->ptid)) == 0)
+    {
+      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      lp->status = 0;
+      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
+      return 1;
+    }
+  return 0;
+}
+
 static int
 detach_callback (struct lwp_info *lp, void *data)
 {
@@ -1418,8 +1450,9 @@ detach_callback (struct lwp_info *lp, void *data)
       errno = 0;
       if (ptrace (PTRACE_DETACH, ptid_get_lwp (lp->ptid), 0,
 		  WSTOPSIG (status)) < 0)
-	error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
-	       safe_strerror (errno));
+	if (!check_ptrace_stopped_lwp_gone (lp))
+	  error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
+		 safe_strerror (errno));
 
       if (debug_linux_nat)
 	fprintf_unfiltered (gdb_stdlog,
@@ -1480,7 +1513,6 @@ linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
 
   if (linux_nat_prepare_to_resume != NULL)
     linux_nat_prepare_to_resume (main_lwp);
-  delete_lwp (main_lwp->ptid);
 
   if (forks_exist_p ())
     {
@@ -1491,7 +1523,25 @@ linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
       linux_fork_detach (args, from_tty);
     }
   else
-    linux_ops->to_detach (ops, args, from_tty);
+    {
+      TRY
+	{
+	  linux_ops->to_detach (ops, args, from_tty);
+	}
+      CATCH (ex, RETURN_MASK_ERROR)
+	{
+	  if (!check_ptrace_stopped_lwp_gone (main_lwp))
+	    {
+	      throw_exception (ex);
+	    }
+	  /* Ignore the error since the thread is gone already.  */
+	  else
+	    {
+	      inf_ptrace_detach_success(ops);
+	    }
+	}
+    }
+  delete_lwp (main_lwp->ptid);
 }
 
 /* Resume execution of the inferior process.  If STEP is nonzero,
@@ -1531,38 +1581,6 @@ linux_resume_one_lwp_throw (struct lwp_info *lp, int step,
   registers_changed_ptid (lp->ptid);
 }
 
-/* Called when we try to resume a stopped LWP and that errors out.  If
-   the LWP is no longer in ptrace-stopped state (meaning it's zombie,
-   or about to become), discard the error, clear any pending status
-   the LWP may have, and return true (we'll collect the exit status
-   soon enough).  Otherwise, return false.  */
-
-static int
-check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
-{
-  /* If we get an error after resuming the LWP successfully, we'd
-     confuse !T state for the LWP being gone.  */
-  gdb_assert (lp->stopped);
-
-  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
-     because even if ptrace failed with ESRCH, the tracee may be "not
-     yet fully dead", but already refusing ptrace requests.  In that
-     case the tracee has 'R (Running)' state for a little bit
-     (observed in Linux 3.18).  See also the note on ESRCH in the
-     ptrace(2) man page.  Instead, check whether the LWP has any state
-     other than ptrace-stopped.  */
-
-  /* Don't assume anything if /proc/PID/status can't be read.  */
-  if (linux_proc_pid_is_trace_stopped_nowarn (ptid_get_lwp (lp->ptid)) == 0)
-    {
-      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
-      lp->status = 0;
-      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
-      return 1;
-    }
-  return 0;
-}
-
 /* Like linux_resume_one_lwp_throw, but no error is thrown if the LWP
    disappears while we try to resume it.  */
 
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.c b/gdb/testsuite/gdb.threads/detach-gone-thread.c
new file mode 100644
index 0000000..19f6fc2
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.c
@@ -0,0 +1,47 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2016 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+pthread_barrier_t barrier;
+
+#define NTHREADS 256
+
+void *
+child_function (void *arg)
+{
+  pthread_barrier_wait (&barrier);
+  _exit (0);
+}
+
+int
+main ()
+{
+  pthread_t threads[NTHREADS];
+  int res;
+  int i;
+
+  pthread_barrier_init (&barrier, NULL, NTHREADS + 1);
+
+  for (i = 0; i < NTHREADS; i++)
+    res = pthread_create (&threads[i], NULL, child_function, NULL);
+
+  pthread_barrier_wait (&barrier);
+  exit (0);
+}
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.exp b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
new file mode 100644
index 0000000..b8caf18
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
@@ -0,0 +1,59 @@
+# Copyright 2015-2016 Free Software Foundation, Inc.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+standard_testfile
+
+if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {debug}] != "" } {
+    return -1
+}
+
+clean_restart ${testfile}
+
+if ![runto_main] {
+    fail "Can't run to main"
+    return -1
+}
+
+proc test_server_exit {} {
+    global server_spawn_id
+    if ![info exists server_spawn_id] {
+	return
+    }
+
+    gdb_expect {
+	-i $server_spawn_id
+	eof {
+	    pass $test
+	    wait -i $server_spawn_id
+	    unset server_spawn_id
+	}
+	timeout {
+	    fail "$test (timeout)"
+	}
+    }
+}
+
+gdb_breakpoint "_exit"
+gdb_continue_to_breakpoint "_exit" ".*_exit.*"
+set test "detach"
+gdb_test_multiple $test $test {
+    -re "Detaching from .*, process $decimal\r\nEnding remote debugging\.\r\n$gdb_prompt $" {
+        # This is what you get with "target remote".
+        pass $test
+	test_server_exit
+    }
+    -re "Detaching from .*, process $decimal\r\n$gdb_prompt $" {
+        pass $test
+    }
+}
-- 
2.8.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 16:43                   ` [PATCH v3] " Antoine Tremblay
@ 2016-06-03 19:15                     ` Pedro Alves
  2016-06-03 19:32                       ` Antoine Tremblay
  0 siblings, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2016-06-03 19:15 UTC (permalink / raw)
  To: Antoine Tremblay, gdb-patches

On 06/03/2016 05:43 PM, Antoine Tremblay wrote:
> Updated patch...
> 
> Thanks for looking at this!

The test hangs waiting for gdbserver to exit, because gdbserver
doesn't exit.  :-)

So the extra testing actually found a bug.  Good, see?  :-)

gdbserver is stuck in linux_join:

(gdb) bt
#0  0x00007f31e08af0da in __GI___waitpid (pid=13862, stat_loc=0x7fffcdf15f28, options=0) at ../sysdeps/unix/sysv/linux/waitpid.c:29
#1  0x000000000043bc53 in my_waitpid (pid=13862, status=0x7fffcdf15f28, flags=0) at /home/pedro/gdb/mygit/src/gdb/gdbserver/../nat/linux-waitpid.c:88
#2  0x000000000042e0d0 in linux_join (pid=13862) at /home/pedro/gdb/mygit/src/gdb/gdbserver/linux-low.c:1603
#3  0x0000000000414145 in process_serial_event () at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:4000
#4  0x0000000000414fb7 in handle_serial_event (err=0, client_data=0x0) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:4347
#5  0x000000000041bf42 in handle_file_event (event_file_desc=4) at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:428
#6  0x000000000041b66e in process_event () at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:184
#7  0x000000000041c4b6 in start_event_loop () at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:547
#8  0x0000000000413896 in captured_main (argc=4, argv=0x7fffcdf16358) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:3719
#9  0x0000000000413abf in main (argc=4, argv=0x7fffcdf16358) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:3804
(gdb)

Running the test manually, I can reproduce it.  In this one run, 
gdbserver is waiting for pid 32650 to exit.  And that process'es
status is:

 $ cat  /proc/32650/status 
 Name:   detach-gone-thr
 State:  Z (zombie)
 Tgid:   32650
 Ngid:   0
 Pid:    32650
 PPid:   32642
 TracerPid:      0
 ...
 Threads:        256

So why isn't gdbserver's inferior process exiting and reporting
a status to gdbserver, which is the inferior process'es parent?

Notice TracerPid == 0.  That means gdbserver successfully detached
from that lwp.

However, seems like gdbserver didn't manage to detach from
any of the other threads:

$ grep -h State /proc/32650/task/*/status | sort | uniq -c
    256 State:  Z (zombie)

$ grep -h Tracer /proc/32650/task/*/status | sort | uniq -c
      1 TracerPid:      0
    255 TracerPid:      32642

32642 is gdbserver, which again, is also 32650's parent.

gdbserver is detaching from the leader thread first,
while gdb isn't.  So I thought I'd try to make gdbserver
detach from the non-leader lwps first.  Doesn't make a
difference.

Under ptrace, the leader thread doesn't report an exit to the
ptracer until all the children are reaped.  So I thought I'd try
to make gdbserver collect the exit status of each zombie lwp,
since it's still supposedly attached to them.
And there you go, that works.  See hacky patch on top of yours below.
The important bit is this:

-    if (!check_ptrace_stopped_lwp_gone (lwp))
-      error (_("Can't detach %s: %s"),
-	     target_pid_to_str (ptid_of (thread)),
-	     strerror (errno));
+    {
+      int ret, status;
+
+      if (!check_ptrace_stopped_lwp_gone (lwp))
+	error (_("Can't detach %s: %s"),
+	       target_pid_to_str (ptid_of (thread)),
+	       strerror (errno));
+
+      ret = waitpid (lwpid, &status, __WALL);
+    }
 
I have to go right now, but I'll clean this up once I have a chance.

From 2895f87d0f59de37dcbd3f0f3cd281e26303bb7d Mon Sep 17 00:00:00 2001
From: Pedro Alves <palves@redhat.com>
Date: Fri, 3 Jun 2016 20:09:14 +0100
Subject: [PATCH] fix hang

---
 gdb/gdbserver/linux-low.c                        | 52 +++++++++++++++++-------
 gdb/testsuite/gdb.threads/detach-gone-thread.exp |  1 +
 2 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 5f02dab..aad38a4 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -1480,16 +1480,12 @@ check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
   return 0;
 }
 
-static int
-linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
+static void
+linux_detach_one_lwp (struct lwp_info *lwp)
 {
-  struct thread_info *thread = (struct thread_info *) entry;
-  struct lwp_info *lwp = get_thread_lwp (thread);
-  int pid = * (int *) args;
+  struct thread_info *thread = get_lwp_thread (lwp);
   int sig;
-
-  if (ptid_get_pid (entry->id) != pid)
-    return 0;
+  int lwpid;
 
   /* If there is a pending SIGSTOP, get rid of it.  */
   if (lwp->stop_expected)
@@ -1511,14 +1507,38 @@ linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
   /* Finally, let it resume.  */
   if (the_low_target.prepare_to_resume != NULL)
     the_low_target.prepare_to_resume (lwp);
-  if (ptrace (PTRACE_DETACH, lwpid_of (thread), (PTRACE_TYPE_ARG3) 0,
+  lwpid = lwpid_of (thread);
+  if (ptrace (PTRACE_DETACH, lwpid, (PTRACE_TYPE_ARG3) 0,
 	      (PTRACE_TYPE_ARG4) (long) sig) < 0)
-    if (!check_ptrace_stopped_lwp_gone (lwp))
-      error (_("Can't detach %s: %s"),
-	     target_pid_to_str (ptid_of (thread)),
-	     strerror (errno));
+    {
+      int ret, status;
+
+      if (!check_ptrace_stopped_lwp_gone (lwp))
+	error (_("Can't detach %s: %s"),
+	       target_pid_to_str (ptid_of (thread)),
+	       strerror (errno));
+
+      ret = waitpid (lwpid, &status, __WALL);
+    }
 
   delete_lwp (lwp);
+}
+
+static int
+linux_detach_lwp_callback (struct inferior_list_entry *entry, void *args)
+{
+  struct thread_info *thread = (struct thread_info *) entry;
+  struct lwp_info *lwp = get_thread_lwp (thread);
+  int pid = * (int *) args;
+  int lwpid = lwpid_of (thread);
+
+  if (ptid_get_pid (entry->id) != pid)
+    return 0;
+
+  // if (ptid_get_pid (entry->id) == lwpid)
+  //  return 0;
+
+  linux_detach_one_lwp (lwp);
   return 0;
 }
 
@@ -1549,7 +1569,11 @@ linux_detach (int pid)
   /* Stabilize threads (move out of jump pads).  */
   stabilize_threads ();
 
-  find_inferior (&all_threads, linux_detach_one_lwp, &pid);
+  /* Detach from the children first.  */
+  find_inferior (&all_threads, linux_detach_lwp_callback, &pid);
+
+  //  struct lwp_info *lwp = find_lwp_pid (pid_to_ptid (pid));
+  // linux_detach_one_lwp (lwp);
 
   the_target->mourn (process);
 
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.exp b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
index b8caf18..1780aeb 100644
--- a/gdb/testsuite/gdb.threads/detach-gone-thread.exp
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
@@ -31,6 +31,7 @@ proc test_server_exit {} {
 	return
     }
 
+    set test "server exits"
     gdb_expect {
 	-i $server_spawn_id
 	eof {
-- 
2.5.5



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v3] Fix failure to detach if threads exit while detaching on linux
  2016-06-03 19:15                     ` Pedro Alves
@ 2016-06-03 19:32                       ` Antoine Tremblay
  0 siblings, 0 replies; 13+ messages in thread
From: Antoine Tremblay @ 2016-06-03 19:32 UTC (permalink / raw)
  To: Pedro Alves; +Cc: Antoine Tremblay, gdb-patches


Pedro Alves writes:

> On 06/03/2016 05:43 PM, Antoine Tremblay wrote:
>> Updated patch...
>> 
>> Thanks for looking at this!
>
> The test hangs waiting for gdbserver to exit, because gdbserver
> doesn't exit.  :-)
>
> So the extra testing actually found a bug.  Good, see?  :-)
>

Haha wow indeed :)

> gdbserver is stuck in linux_join:
>
> (gdb) bt
> #0  0x00007f31e08af0da in __GI___waitpid (pid=13862, stat_loc=0x7fffcdf15f28, options=0) at ../sysdeps/unix/sysv/linux/waitpid.c:29
> #1  0x000000000043bc53 in my_waitpid (pid=13862, status=0x7fffcdf15f28, flags=0) at /home/pedro/gdb/mygit/src/gdb/gdbserver/../nat/linux-waitpid.c:88
> #2  0x000000000042e0d0 in linux_join (pid=13862) at /home/pedro/gdb/mygit/src/gdb/gdbserver/linux-low.c:1603
> #3  0x0000000000414145 in process_serial_event () at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:4000
> #4  0x0000000000414fb7 in handle_serial_event (err=0, client_data=0x0) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:4347
> #5  0x000000000041bf42 in handle_file_event (event_file_desc=4) at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:428
> #6  0x000000000041b66e in process_event () at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:184
> #7  0x000000000041c4b6 in start_event_loop () at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:547
> #8  0x0000000000413896 in captured_main (argc=4, argv=0x7fffcdf16358) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:3719
> #9  0x0000000000413abf in main (argc=4, argv=0x7fffcdf16358) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:3804
> (gdb)
>
> Running the test manually, I can reproduce it.  In this one run, 
> gdbserver is waiting for pid 32650 to exit.  And that process'es
> status is:
>
>  $ cat  /proc/32650/status 
>  Name:   detach-gone-thr
>  State:  Z (zombie)
>  Tgid:   32650
>  Ngid:   0
>  Pid:    32650
>  PPid:   32642
>  TracerPid:      0
>  ...
>  Threads:        256
>
> So why isn't gdbserver's inferior process exiting and reporting
> a status to gdbserver, which is the inferior process'es parent?
>
> Notice TracerPid == 0.  That means gdbserver successfully detached
> from that lwp.
>
> However, seems like gdbserver didn't manage to detach from
> any of the other threads:
>
> $ grep -h State /proc/32650/task/*/status | sort | uniq -c
>     256 State:  Z (zombie)
>
> $ grep -h Tracer /proc/32650/task/*/status | sort | uniq -c
>       1 TracerPid:      0
>     255 TracerPid:      32642
>
> 32642 is gdbserver, which again, is also 32650's parent.
>
> gdbserver is detaching from the leader thread first,
> while gdb isn't.  So I thought I'd try to make gdbserver
> detach from the non-leader lwps first.  Doesn't make a
> difference.
>
> Under ptrace, the leader thread doesn't report an exit to the
> ptracer until all the children are reaped.  So I thought I'd try
> to make gdbserver collect the exit status of each zombie lwp,
> since it's still supposedly attached to them.
> And there you go, that works.  See hacky patch on top of yours below.
> The important bit is this:
>
> -    if (!check_ptrace_stopped_lwp_gone (lwp))
> -      error (_("Can't detach %s: %s"),
> -	     target_pid_to_str (ptid_of (thread)),
> -	     strerror (errno));
> +    {
> +      int ret, status;
> +
> +      if (!check_ptrace_stopped_lwp_gone (lwp))
> +	error (_("Can't detach %s: %s"),
> +	       target_pid_to_str (ptid_of (thread)),
> +	       strerror (errno));
> +
> +      ret = waitpid (lwpid, &status, __WALL);
> +    }
>  
> I have to go right now, but I'll clean this up once I have a chance.
>

I have to go too, I'll pick this up Monday, I need some time to figure
this out :)

Thanks again!
Antoine

> From 2895f87d0f59de37dcbd3f0f3cd281e26303bb7d Mon Sep 17 00:00:00 2001
> From: Pedro Alves <palves@redhat.com>
> Date: Fri, 3 Jun 2016 20:09:14 +0100
> Subject: [PATCH] fix hang
>
> ---
>  gdb/gdbserver/linux-low.c                        | 52 +++++++++++++++++-------
>  gdb/testsuite/gdb.threads/detach-gone-thread.exp |  1 +
>  2 files changed, 39 insertions(+), 14 deletions(-)
>
> diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
> index 5f02dab..aad38a4 100644
> --- a/gdb/gdbserver/linux-low.c
> +++ b/gdb/gdbserver/linux-low.c
> @@ -1480,16 +1480,12 @@ check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
>    return 0;
>  }
>  
> -static int
> -linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
> +static void
> +linux_detach_one_lwp (struct lwp_info *lwp)
>  {
> -  struct thread_info *thread = (struct thread_info *) entry;
> -  struct lwp_info *lwp = get_thread_lwp (thread);
> -  int pid = * (int *) args;
> +  struct thread_info *thread = get_lwp_thread (lwp);
>    int sig;
> -
> -  if (ptid_get_pid (entry->id) != pid)
> -    return 0;
> +  int lwpid;
>  
>    /* If there is a pending SIGSTOP, get rid of it.  */
>    if (lwp->stop_expected)
> @@ -1511,14 +1507,38 @@ linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
>    /* Finally, let it resume.  */
>    if (the_low_target.prepare_to_resume != NULL)
>      the_low_target.prepare_to_resume (lwp);
> -  if (ptrace (PTRACE_DETACH, lwpid_of (thread), (PTRACE_TYPE_ARG3) 0,
> +  lwpid = lwpid_of (thread);
> +  if (ptrace (PTRACE_DETACH, lwpid, (PTRACE_TYPE_ARG3) 0,
>  	      (PTRACE_TYPE_ARG4) (long) sig) < 0)
> -    if (!check_ptrace_stopped_lwp_gone (lwp))
> -      error (_("Can't detach %s: %s"),
> -	     target_pid_to_str (ptid_of (thread)),
> -	     strerror (errno));
> +    {
> +      int ret, status;
> +
> +      if (!check_ptrace_stopped_lwp_gone (lwp))
> +	error (_("Can't detach %s: %s"),
> +	       target_pid_to_str (ptid_of (thread)),
> +	       strerror (errno));
> +
> +      ret = waitpid (lwpid, &status, __WALL);
> +    }
>  
>    delete_lwp (lwp);
> +}
> +
> +static int
> +linux_detach_lwp_callback (struct inferior_list_entry *entry, void *args)
> +{
> +  struct thread_info *thread = (struct thread_info *) entry;
> +  struct lwp_info *lwp = get_thread_lwp (thread);
> +  int pid = * (int *) args;
> +  int lwpid = lwpid_of (thread);
> +
> +  if (ptid_get_pid (entry->id) != pid)
> +    return 0;
> +
> +  // if (ptid_get_pid (entry->id) == lwpid)
> +  //  return 0;
> +
> +  linux_detach_one_lwp (lwp);
>    return 0;
>  }
>  
> @@ -1549,7 +1569,11 @@ linux_detach (int pid)
>    /* Stabilize threads (move out of jump pads).  */
>    stabilize_threads ();
>  
> -  find_inferior (&all_threads, linux_detach_one_lwp, &pid);
> +  /* Detach from the children first.  */
> +  find_inferior (&all_threads, linux_detach_lwp_callback, &pid);
> +
> +  //  struct lwp_info *lwp = find_lwp_pid (pid_to_ptid (pid));
> +  // linux_detach_one_lwp (lwp);
>  
>    the_target->mourn (process);
>  
> diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.exp b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
> index b8caf18..1780aeb 100644
> --- a/gdb/testsuite/gdb.threads/detach-gone-thread.exp
> +++ b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
> @@ -31,6 +31,7 @@ proc test_server_exit {} {
>  	return
>      }
>  
> +    set test "server exits"
>      gdb_expect {
>  	-i $server_spawn_id
>  	eof {


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-06-03 19:32 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-02 14:16 [PATCH] Fix failure to detach if threads exit while detaching on linux Antoine Tremblay
2016-06-03 11:43 ` Pedro Alves
2016-06-03 11:55   ` Antoine Tremblay
2016-06-03 12:05     ` Pedro Alves
2016-06-03 14:49       ` [PATCH v2] " Antoine Tremblay
2016-06-03 15:16         ` Pedro Alves
2016-06-03 15:32           ` Antoine Tremblay
2016-06-03 15:45             ` Pedro Alves
2016-06-03 15:59               ` Antoine Tremblay
2016-06-03 16:21                 ` Pedro Alves
2016-06-03 16:43                   ` [PATCH v3] " Antoine Tremblay
2016-06-03 19:15                     ` Pedro Alves
2016-06-03 19:32                       ` Antoine Tremblay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox