[patch] Fix for PR gdb/10757

Mirror of the gdb-patches mailing list
 help / color / mirror / Atom feed

* [patch] Fix for PR gdb/10757
@ 2009-10-15 19:10 Paul Pluzhnikov
  2009-10-17 23:26 ` Paul Pluzhnikov
  0 siblings, 1 reply; 13+ messages in thread
From: Paul Pluzhnikov @ 2009-10-15 19:10 UTC (permalink / raw)
  To: gdb-patches; +Cc: ppluzhnikov, Pedro Alves

Greetings,

Here is a proposed fix for gdb/10757:
http://sourceware.org/bugzilla/show_bug.cgi?id=10757

It appears to me that the Linux thread_db interface is horribly broken,
especially this part:

+          This could mean that the thread list inside glibc itself is in
+          inconsistent state, and libthread_db could go on looping forever
+          (observed with glibc-2.3.6).  To prevent that, terminate
+          iteration: thread_db_find_new_threads_2 will retry.  */
+       return 1;

Tested on Linux/x86_64.

[gdbserver will need similar fixes.]

Thanks,
--
Paul Pluzhnikov

2009-10-15  Paul Pluzhnikov  <ppluzhnikov@google.com>

	PR gdb/10757
	* linux-thread-db.c (attach_thread): Return success/failure
	indicator.
	(thread_db_find_new_threads_silently): Retry until no new threads.
	(struct callback_data): New.
	(find_new_threads_callback): Count new threads, stop iteration
	on error.
	(find_new_threads_once): New function.
	(thread_db_find_new_threads_2): Rename from
	thread_db_find_new_threads_1 and adjust.
	(thread_db_find_new_threads_1): New function.


Index: linux-thread-db.c
===================================================================
RCS file: /cvs/src/src/gdb/linux-thread-db.c,v
retrieving revision 1.64
diff -u -p -u -r1.64 linux-thread-db.c
--- linux-thread-db.c	16 Jul 2009 20:45:17 -0000	1.64
+++ linux-thread-db.c	15 Oct 2009 18:55:28 -0000
@@ -160,6 +160,7 @@ struct thread_db_info
 struct thread_db_info *thread_db_list;
 
 static void thread_db_find_new_threads_1 (ptid_t ptid);
+static void thread_db_find_new_threads_2 (ptid_t ptid, int num_loop);
 
 /* Add the current inferior to the list of processes using libpthread.
    Return a pointer to the newly allocated object that was added to
@@ -229,8 +230,8 @@ delete_thread_db_info (int pid)
 }
 
 /* Prototypes for local functions.  */
-static void attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
-			   const td_thrinfo_t *ti_p);
+static int attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
+			  const td_thrinfo_t *ti_p);
 static void detach_thread (ptid_t ptid);
 \f
 
@@ -597,7 +598,7 @@ thread_db_find_new_threads_silently (pti
 
   TRY_CATCH (except, RETURN_MASK_ERROR)
     {
-      thread_db_find_new_threads_1 (ptid);
+      thread_db_find_new_threads_2 (ptid, 4);
     }
 
   if (except.reason < 0 && info_verbose)
@@ -977,9 +978,9 @@ thread_db_new_objfile (struct objfile *o
 
 /* Attach to a new thread.  This function is called when we receive a
    TD_CREATE event or when we iterate over all threads and find one
-   that wasn't already in our list.  */
+   that wasn't already in our list.  Returns true on success.  */
 
-static void
+static int
 attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
 	       const td_thrinfo_t *ti_p)
 {
@@ -1013,7 +1014,7 @@ attach_thread (ptid_t ptid, const td_thr
       if (tp->private != NULL)
 	{
 	  if (!tp->private->dying)
-	    return;
+	    return 0;
 
 	  delete_thread (ptid);
 	  tp = NULL;
@@ -1023,12 +1024,12 @@ attach_thread (ptid_t ptid, const td_thr
   check_thread_signals ();
 
   if (ti_p->ti_state == TD_THR_UNKNOWN || ti_p->ti_state == TD_THR_ZOMBIE)
-    return;			/* A zombie thread -- do not attach.  */
+    return 0;			/* A zombie thread -- do not attach.  */
 
   /* Under GNU/Linux, we have to attach to each and every thread.  */
   if (tp == NULL
       && lin_lwp_attach_lwp (BUILD_LWP (ti_p->ti_lid, GET_PID (ptid))) < 0)
-    return;
+    return 0;
 
   /* Construct the thread's private data.  */
   private = xmalloc (sizeof (struct private_thread_info));
@@ -1056,6 +1057,7 @@ attach_thread (ptid_t ptid, const td_thr
   if (err != TD_OK)
     error (_("Cannot enable thread event reporting for %s: %s"),
 	   target_pid_to_str (ptid), thread_db_err_str (err));
+  return 1;
 }
 
 static void
@@ -1286,6 +1288,12 @@ thread_db_mourn_inferior (struct target_
     unpush_target (ops);
 }
 
+struct callback_data
+{
+  struct thread_db_info *info;
+  int new_threads;
+};
+
 static int
 find_new_threads_callback (const td_thrhandle_t *th_p, void *data)
 {
@@ -1293,7 +1301,8 @@ find_new_threads_callback (const td_thrh
   td_err_e err;
   ptid_t ptid;
   struct thread_info *tp;
-  struct thread_db_info *info = data;
+  struct callback_data *cb_data = data;
+  struct thread_db_info *info = cb_data->info;
 
   err = info->td_thr_get_info_p (th_p, &ti);
   if (err != TD_OK)
@@ -1336,21 +1345,76 @@ find_new_threads_callback (const td_thrh
   ptid = ptid_build (info->pid, ti.ti_lid, 0);
   tp = find_thread_ptid (ptid);
   if (tp == NULL || tp->private == NULL)
-    attach_thread (ptid, th_p, &ti);
+    {
+      if (attach_thread (ptid, th_p, &ti))
+	cb_data->new_threads += 1;
+      else
+	/* Problem attaching this thread; perhaps it exited before we
+	   could attach it?
+	   This could mean that the thread list inside glibc itself is in
+	   inconsistent state, and libthread_db could go on looping forever
+	   (observed with glibc-2.3.6).  To prevent that, terminate
+	   iteration: thread_db_find_new_threads_2 will retry.  */
+	return 1;
+    }
 
   return 0;
 }
 
+/* Helper for thread_db_find_new_threads_2.
+   Returns number of new threads found.  */
+
+static int
+find_new_threads_once (struct thread_db_info *info, int iteration,
+		       int *errp)
+{
+  volatile struct gdb_exception except;
+  struct callback_data data;
+  int err;
+
+  data.info = info;
+  data.new_threads = 0;
+
+  TRY_CATCH (except, RETURN_MASK_ERROR)
+    {
+      /* Iterate over all user-space threads to discover new threads.  */
+      err = info->td_ta_thr_iter_p (info->thread_agent,
+				    find_new_threads_callback,
+				    &data,
+				    TD_THR_ANY_STATE,
+				    TD_THR_LOWEST_PRIORITY,
+				    TD_SIGNO_MASK,
+				    TD_THR_ANY_USER_FLAGS);
+    }
+
+  if (info_verbose)
+    {
+      if (except.reason < 0)
+	exception_fprintf (gdb_stderr, except,
+			   "Warning: find_new_threads_once: ");
+
+      printf_filtered (_("Found %d new threads in iteration %d.\n"),
+		       data.new_threads, iteration);
+    }
+
+  if (errp != NULL)
+    *errp = err;
+
+  return data.new_threads;
+}
+
 /* Search for new threads, accessing memory through stopped thread
-   PTID.  */
+   PTID.  If NUM_LOOPS is non-zero, repeat searching until NUM_LOOP
+   searches in a row do not discover any new threads.  */
 
 static void
-thread_db_find_new_threads_1 (ptid_t ptid)
+thread_db_find_new_threads_2 (ptid_t ptid, int num_loop)
 {
   td_err_e err;
   struct lwp_info *lp;
   struct thread_db_info *info;
   int pid = ptid_get_pid (ptid);
+  int i, loop;
 
   /* In linux, we can only read memory through a stopped lwp.  */
   ALL_LWPS (lp, ptid)
@@ -1365,15 +1429,33 @@ thread_db_find_new_threads_1 (ptid_t pti
 
   /* Access an lwp we know is stopped.  */
   info->proc_handle.ptid = ptid;
-  /* Iterate over all user-space threads to discover new threads.  */
-  err = info->td_ta_thr_iter_p (info->thread_agent, find_new_threads_callback,
-				info, TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
-				TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
-  if (err != TD_OK)
-    error (_("Cannot find new threads: %s"), thread_db_err_str (err));
+
+  if (num_loop != 0)
+    {
+      /* Require NUM_LOOP successive iterations which do not find any new threads.  */
+      for (i = 0, loop = 0; loop < 4; ++i, ++loop)
+	if (find_new_threads_once (info, i, NULL) != 0)
+	  /* Found some new threads.  Restart the loop from beginning.  */
+	  loop = -1;
+    }
+  else
+    {
+      int err;
+
+      find_new_threads_once (info, 0, &err);
+      if (err != TD_OK)
+	error (_("Cannot find new threads: %s"), thread_db_err_str (err));
+    }
 }
 
 static void
+thread_db_find_new_threads_1 (ptid_t ptid)
+{
+  thread_db_find_new_threads_2 (ptid, 0);
+}
+
+
+static void
 thread_db_find_new_threads (struct target_ops *ops)
 {
   struct thread_db_info *info;


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-15 19:10 [patch] Fix for PR gdb/10757 Paul Pluzhnikov
@ 2009-10-17 23:26 ` Paul Pluzhnikov
  2009-10-25 17:15   ` Paul Pluzhnikov
  2009-10-25 23:54   ` Pedro Alves
  0 siblings, 2 replies; 13+ messages in thread
From: Paul Pluzhnikov @ 2009-10-17 23:26 UTC (permalink / raw)
  To: gdb-patches; +Cc: ppluzhnikov, Pedro Alves

[-- Attachment #1: Type: text/plain, Size: 862 bytes --]

On Thu, Oct 15, 2009 at 12:10 PM, Paul Pluzhnikov
<ppluzhnikov@google.com> wrote:

> Here is a proposed fix for gdb/10757:
> http://sourceware.org/bugzilla/show_bug.cgi?id=10757

This failed to compile with GCC 4.4 ('err' may be used uninitialized).
Here is an updated patch.

Thanks,
-- 
Paul Pluzhnikov


2009-10-17  Paul Pluzhnikov  <ppluzhnikov@google.com>

       PR gdb/10757
       * linux-thread-db.c (attach_thread): Return success/failure
       indicator.
       (thread_db_find_new_threads_silently): Retry until no new threads.
       (struct callback_data): New.
       (find_new_threads_callback): Count new threads, stop iteration
       on error.
       (find_new_threads_once): New function.
       (thread_db_find_new_threads_2): Rename from
       thread_db_find_new_threads_1 and adjust.
       (thread_db_find_new_threads_1): New function.

[-- Attachment #2: gdb-pr10757-20091017.txt --]
[-- Type: text/plain, Size: 6783 bytes --]

Index: linux-thread-db.c
===================================================================
RCS file: /cvs/src/src/gdb/linux-thread-db.c,v
retrieving revision 1.64
diff -u -p -u -r1.64 linux-thread-db.c
--- linux-thread-db.c	16 Jul 2009 20:45:17 -0000	1.64
+++ linux-thread-db.c	15 Oct 2009 18:55:28 -0000
@@ -160,6 +160,7 @@ struct thread_db_info
 struct thread_db_info *thread_db_list;
 
 static void thread_db_find_new_threads_1 (ptid_t ptid);
+static void thread_db_find_new_threads_2 (ptid_t ptid, int num_loop);
 
 /* Add the current inferior to the list of processes using libpthread.
    Return a pointer to the newly allocated object that was added to
@@ -229,8 +230,8 @@ delete_thread_db_info (int pid)
 }
 
 /* Prototypes for local functions.  */
-static void attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
-			   const td_thrinfo_t *ti_p);
+static int attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
+			  const td_thrinfo_t *ti_p);
 static void detach_thread (ptid_t ptid);
 \f
 
@@ -597,7 +598,7 @@ thread_db_find_new_threads_silently (pti
 
   TRY_CATCH (except, RETURN_MASK_ERROR)
     {
-      thread_db_find_new_threads_1 (ptid);
+      thread_db_find_new_threads_2 (ptid, 4);
     }
 
   if (except.reason < 0 && info_verbose)
@@ -977,9 +978,9 @@ thread_db_new_objfile (struct objfile *o
 
 /* Attach to a new thread.  This function is called when we receive a
    TD_CREATE event or when we iterate over all threads and find one
-   that wasn't already in our list.  */
+   that wasn't already in our list.  Returns true on success.  */
 
-static void
+static int
 attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
 	       const td_thrinfo_t *ti_p)
 {
@@ -1013,7 +1014,7 @@ attach_thread (ptid_t ptid, const td_thr
       if (tp->private != NULL)
 	{
 	  if (!tp->private->dying)
-	    return;
+	    return 0;
 
 	  delete_thread (ptid);
 	  tp = NULL;
@@ -1023,12 +1024,12 @@ attach_thread (ptid_t ptid, const td_thr
   check_thread_signals ();
 
   if (ti_p->ti_state == TD_THR_UNKNOWN || ti_p->ti_state == TD_THR_ZOMBIE)
-    return;			/* A zombie thread -- do not attach.  */
+    return 0;			/* A zombie thread -- do not attach.  */
 
   /* Under GNU/Linux, we have to attach to each and every thread.  */
   if (tp == NULL
       && lin_lwp_attach_lwp (BUILD_LWP (ti_p->ti_lid, GET_PID (ptid))) < 0)
-    return;
+    return 0;
 
   /* Construct the thread's private data.  */
   private = xmalloc (sizeof (struct private_thread_info));
@@ -1056,6 +1057,7 @@ attach_thread (ptid_t ptid, const td_thr
   if (err != TD_OK)
     error (_("Cannot enable thread event reporting for %s: %s"),
 	   target_pid_to_str (ptid), thread_db_err_str (err));
+  return 1;
 }
 
 static void
@@ -1286,6 +1288,12 @@ thread_db_mourn_inferior (struct target_
     unpush_target (ops);
 }
 
+struct callback_data
+{
+  struct thread_db_info *info;
+  int new_threads;
+};
+
 static int
 find_new_threads_callback (const td_thrhandle_t *th_p, void *data)
 {
@@ -1293,7 +1301,8 @@ find_new_threads_callback (const td_thrh
   td_err_e err;
   ptid_t ptid;
   struct thread_info *tp;
-  struct thread_db_info *info = data;
+  struct callback_data *cb_data = data;
+  struct thread_db_info *info = cb_data->info;
 
   err = info->td_thr_get_info_p (th_p, &ti);
   if (err != TD_OK)
@@ -1336,21 +1345,76 @@ find_new_threads_callback (const td_thrh
   ptid = ptid_build (info->pid, ti.ti_lid, 0);
   tp = find_thread_ptid (ptid);
   if (tp == NULL || tp->private == NULL)
-    attach_thread (ptid, th_p, &ti);
+    {
+      if (attach_thread (ptid, th_p, &ti))
+	cb_data->new_threads += 1;
+      else
+	/* Problem attaching this thread; perhaps it exited before we
+	   could attach it?
+	   This could mean that the thread list inside glibc itself is in
+	   inconsistent state, and libthread_db could go on looping forever
+	   (observed with glibc-2.3.6).  To prevent that, terminate
+	   iteration: thread_db_find_new_threads_2 will retry.  */
+	return 1;
+    }
 
   return 0;
 }
 
+/* Helper for thread_db_find_new_threads_2.
+   Returns number of new threads found.  */
+
+static int
+find_new_threads_once (struct thread_db_info *info, int iteration,
+		       int *errp)
+{
+  volatile struct gdb_exception except;
+  struct callback_data data;
+  int err = TD_ERR;
+
+  data.info = info;
+  data.new_threads = 0;
+
+  TRY_CATCH (except, RETURN_MASK_ERROR)
+    {
+      /* Iterate over all user-space threads to discover new threads.  */
+      err = info->td_ta_thr_iter_p (info->thread_agent,
+				    find_new_threads_callback,
+				    &data,
+				    TD_THR_ANY_STATE,
+				    TD_THR_LOWEST_PRIORITY,
+				    TD_SIGNO_MASK,
+				    TD_THR_ANY_USER_FLAGS);
+    }
+
+  if (info_verbose)
+    {
+      if (except.reason < 0)
+	exception_fprintf (gdb_stderr, except,
+			   "Warning: find_new_threads_once: ");
+
+      printf_filtered (_("Found %d new threads in iteration %d.\n"),
+		       data.new_threads, iteration);
+    }
+
+  if (errp != NULL)
+    *errp = err;
+
+  return data.new_threads;
+}
+
 /* Search for new threads, accessing memory through stopped thread
-   PTID.  */
+   PTID.  If NUM_LOOPS is non-zero, repeat searching until NUM_LOOP
+   searches in a row do not discover any new threads.  */
 
 static void
-thread_db_find_new_threads_1 (ptid_t ptid)
+thread_db_find_new_threads_2 (ptid_t ptid, int num_loop)
 {
   td_err_e err;
   struct lwp_info *lp;
   struct thread_db_info *info;
   int pid = ptid_get_pid (ptid);
+  int i, loop;
 
   /* In linux, we can only read memory through a stopped lwp.  */
   ALL_LWPS (lp, ptid)
@@ -1365,15 +1429,33 @@ thread_db_find_new_threads_1 (ptid_t pti
 
   /* Access an lwp we know is stopped.  */
   info->proc_handle.ptid = ptid;
-  /* Iterate over all user-space threads to discover new threads.  */
-  err = info->td_ta_thr_iter_p (info->thread_agent, find_new_threads_callback,
-				info, TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
-				TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
-  if (err != TD_OK)
-    error (_("Cannot find new threads: %s"), thread_db_err_str (err));
+
+  if (num_loop != 0)
+    {
+      /* Require NUM_LOOP successive iterations which do not find any new threads.  */
+      for (i = 0, loop = 0; loop < 4; ++i, ++loop)
+	if (find_new_threads_once (info, i, NULL) != 0)
+	  /* Found some new threads.  Restart the loop from beginning.  */
+	  loop = -1;
+    }
+  else
+    {
+      int err;
+
+      find_new_threads_once (info, 0, &err);
+      if (err != TD_OK)
+	error (_("Cannot find new threads: %s"), thread_db_err_str (err));
+    }
 }
 
 static void
+thread_db_find_new_threads_1 (ptid_t ptid)
+{
+  thread_db_find_new_threads_2 (ptid, 0);
+}
+
+
+static void
 thread_db_find_new_threads (struct target_ops *ops)
 {
   struct thread_db_info *info;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-17 23:26 ` Paul Pluzhnikov
@ 2009-10-25 17:15   ` Paul Pluzhnikov
  2009-10-25 23:54   ` Pedro Alves
  1 sibling, 0 replies; 13+ messages in thread
From: Paul Pluzhnikov @ 2009-10-25 17:15 UTC (permalink / raw)
  To: gdb-patches; +Cc: ppluzhnikov, Pedro Alves

On Sat, Oct 17, 2009 at 4:25 PM, Paul Pluzhnikov <ppluzhnikov@google.com> wrote:

>> Here is a proposed fix for gdb/10757:
>> http://sourceware.org/bugzilla/show_bug.cgi?id=10757

Ping?
http://sourceware.org/ml/gdb-patches/2009-10/msg00401.html

Thanks,
-- 
Paul Pluzhnikov


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-17 23:26 ` Paul Pluzhnikov
  2009-10-25 17:15   ` Paul Pluzhnikov
@ 2009-10-25 23:54   ` Pedro Alves
  2009-10-26 21:25     ` Paul Pluzhnikov
  1 sibling, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2009-10-25 23:54 UTC (permalink / raw)
  To: Paul Pluzhnikov; +Cc: gdb-patches

On Sunday 18 October 2009 00:25:56, Paul Pluzhnikov wrote:
> 2009-10-17  Paul Pluzhnikov  <ppluzhnikov@google.com>
> 
>        PR gdb/10757
>        * linux-thread-db.c (attach_thread): Return success/failure
>        indicator.
>        (thread_db_find_new_threads_silently): Retry until no new threads.
>        (struct callback_data): New.
>        (find_new_threads_callback): Count new threads, stop iteration
>        on error.
>        (find_new_threads_once): New function.
>        (thread_db_find_new_threads_2): Rename from
>        thread_db_find_new_threads_1 and adjust.
>        (thread_db_find_new_threads_1): New function.

Thanks.  This is quite unfortunate, but we get to live with it...

FTR, I've took a peek at nptl/nptl_db, to try to understand
a bit better why this happens (please do correct me if I'm
wrong):

 - First, it is worth remind us while nptl takes locks and barriers
   to protect its internal structures, thread_db/ntpl_db doesn't,
   since it runs in gdb's context.

 - nptl adds new threads to one of the __stack_user or stack_used
   lists.  New nodes are added to the _head_ of the lists.

 - nptl_db/td_ta_thr_iter.c:td_ta_thr_iter iterates over
   these thread lists from head to tail.

From these last two points alone, one can infer that an iteration
over the thread list can miss new threads if they're added just while
the list is being iterated.

Another way to tackle this, would be iterate over 
/proc/pid/task/ and attach to all lwps that way, instead of
relying on thread_db to do the initial iteration over all
threads.  We'd still iterate using the thread_db interface
once afterwards, so to learn the matching pthread
ids of the lwps.

With /proc/pid/task, you'd still need to keep iterating until
no new thread comes out, but, I think we could make the
number of loops unbounded much safely, since we wouldn't be
at risk of reading stale inferior data.  Assuming no kernel
bugs, that is.  Note that this is something that would be
a useful addition to gdb anyway, so that we could be able to
attach to multi-threaded apps that use raw clone instead of
pthreads.

> +      else
> +       /* Problem attaching this thread; perhaps it exited before we
> +          could attach it?
> +          This could mean that the thread list inside glibc itself is in
> +          inconsistent state, and libthread_db could go on looping forever
> +          (observed with glibc-2.3.6).  To prevent that, terminate
> +          iteration: thread_db_find_new_threads_2 will retry.  */
> +       return 1;

Probably related to the fact that gdb can be iterating over
the threads lists, exactly while libc is concurrently removing
a thread from the same lists.  nptl_db could be iterating over
stale inferior pointers.

>  /* Search for new threads, accessing memory through stopped thread
> -   PTID.  */
> +   PTID.  If NUM_LOOPS is non-zero, repeat searching until NUM_LOOP
> +   searches in a row do not discover any new threads.  */
>  
>  static void
> -thread_db_find_new_threads_1 (ptid_t ptid)
> +thread_db_find_new_threads_2 (ptid_t ptid, int num_loop)
>  {
(...)
> +
> +  if (num_loop != 0)
> +    {
> +      /* Require NUM_LOOP successive iterations which do not find any new threads.  */

This line too long.

> +      for (i = 0, loop = 0; loop < 4; ++i, ++loop)

Err, s/4/num_loop/ ?  A comment explain _why_ we need this is missing
as well.  The patch looks OK otherwise.

Actually, I'd rename the `num_loop' parameter into
say, `until_no_new', and do this instead:

  if (until_no_new)
    {
      /* Require a few successive iterations which do not find any new threads.  */
      for (i = 0, loop = 0; loop < 4; ++i, ++loop)

That would be one way of localizing a bit the detail (and any comments)
of how many iterations you've happened to decide were sufficient in
practice, in one place, as opposed to spread around, like in:

> @@ -597,7 +598,7 @@ thread_db_find_new_threads_silently (pti
>  
>    TRY_CATCH (except, RETURN_MASK_ERROR)
>      {
> -      thread_db_find_new_threads_1 (ptid);
> +      thread_db_find_new_threads_2 (ptid, 4);
>      }

... or any other call sites we may add in the future.

-- 
Pedro Alves

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-25 23:54   ` Pedro Alves
@ 2009-10-26 21:25     ` Paul Pluzhnikov
  2009-10-26 21:34       ` Pedro Alves
  2009-10-27 21:38       ` Paul Pluzhnikov
  0 siblings, 2 replies; 13+ messages in thread
From: Paul Pluzhnikov @ 2009-10-26 21:25 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

[-- Attachment #1: Type: text/plain, Size: 3034 bytes --]

On Sun, Oct 25, 2009 at 4:54 PM, Pedro Alves <pedro@codesourcery.com> wrote:

> FTR, I've took a peek at nptl/nptl_db, to try to understand
> a bit better why this happens (please do correct me if I'm
> wrong):
>
>  - First, it is worth remind us while nptl takes locks and barriers
>   to protect its internal structures, thread_db/ntpl_db doesn't,
>   since it runs in gdb's context.

I wonder if it *could* take some lock that would make all other threads
wait in pthread_create -- after all, thread_db does have access to inferior
via ps_pdwrite. That would certainly make the interface cleaner, at a cost
of an extra global lock on pthread_create path.

Regardless of whether that's a good idea or not, I am stuck on glibc-2.3.6
for the time being, and even if this was to go into glibc-2.11, the "broken"
glibc versions will be with us for a long time, and GDB should be able
to deal with them.

>  - nptl adds new threads to one of the __stack_user or stack_used
>   lists.  New nodes are added to the _head_ of the lists.
>
>  - nptl_db/td_ta_thr_iter.c:td_ta_thr_iter iterates over
>   these thread lists from head to tail.
>
> From these last two points alone, one can infer that an iteration
> over the thread list can miss new threads if they're added just while
> the list is being iterated.

Right.

[While nptl_db is fresh in your mind, would you mind looking at gdb/10838
to double-check my diagnosis there?
http://sourceware.org/bugzilla/show_bug.cgi?id=10838

This is related to current patch only in that linux/libthread_db is
also involved.]

> Another way to tackle this, would be iterate over
> /proc/pid/task/ and attach to all lwps that way, instead of
> relying on thread_db to do the initial iteration over all
> threads.  We'd still iterate using the thread_db interface
> once afterwards, so to learn the matching pthread
> ids of the lwps.

I should note that /proc may not be mounted. I am not sure whether GDB
currently requires /proc in other places.

> With /proc/pid/task, you'd still need to keep iterating until
> no new thread comes out, but, I think we could make the
> number of loops unbounded much safely, since we wouldn't be
> at risk of reading stale inferior data.  Assuming no kernel
> bugs, that is.  Note that this is something that would be
> a useful addition to gdb anyway, so that we could be able to
> attach to multi-threaded apps that use raw clone instead of
> pthreads.

I'll put that on my TODO list :-)

>> +      /* Require NUM_LOOP successive iterations which do not find any new threads.  */
>
> This line too long.

Fixed.

>> +      for (i = 0, loop = 0; loop < 4; ++i, ++loop)
>
> Err, s/4/num_loop/ ?  A comment explain _why_ we need this is missing
> as well.  The patch looks OK otherwise.

Fixed.

> Actually, I'd rename the `num_loop' parameter into
> say, `until_no_new', and do this instead:

Good idea, done.

I'll commit attached patch tomorrow if there are no further comments.
A similar patch is required for gdbserver as well.


Thanks,
-- 
Paul Pluzhnikov

[-- Attachment #2: gdb-pr10757-20091026.txt --]
[-- Type: text/plain, Size: 6938 bytes --]

Index: linux-thread-db.c
===================================================================
RCS file: /cvs/src/src/gdb/linux-thread-db.c,v
retrieving revision 1.65
diff -u -p -u -r1.65 linux-thread-db.c
--- linux-thread-db.c	19 Oct 2009 09:51:41 -0000	1.65
+++ linux-thread-db.c	26 Oct 2009 21:20:31 -0000
@@ -160,6 +160,7 @@ struct thread_db_info
 struct thread_db_info *thread_db_list;
 
 static void thread_db_find_new_threads_1 (ptid_t ptid);
+static void thread_db_find_new_threads_2 (ptid_t ptid, int until_no_new);
 
 /* Add the current inferior to the list of processes using libpthread.
    Return a pointer to the newly allocated object that was added to
@@ -229,8 +230,8 @@ delete_thread_db_info (int pid)
 }
 
 /* Prototypes for local functions.  */
-static void attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
-			   const td_thrinfo_t *ti_p);
+static int attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
+			  const td_thrinfo_t *ti_p);
 static void detach_thread (ptid_t ptid);
 \f
 
@@ -597,7 +598,7 @@ thread_db_find_new_threads_silently (pti
 
   TRY_CATCH (except, RETURN_MASK_ERROR)
     {
-      thread_db_find_new_threads_1 (ptid);
+      thread_db_find_new_threads_2 (ptid, 1);
     }
 
   if (except.reason < 0 && info_verbose)
@@ -977,9 +978,9 @@ thread_db_new_objfile (struct objfile *o
 
 /* Attach to a new thread.  This function is called when we receive a
    TD_CREATE event or when we iterate over all threads and find one
-   that wasn't already in our list.  */
+   that wasn't already in our list.  Returns true on success.  */
 
-static void
+static int
 attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
 	       const td_thrinfo_t *ti_p)
 {
@@ -1013,7 +1014,7 @@ attach_thread (ptid_t ptid, const td_thr
       if (tp->private != NULL)
 	{
 	  if (!tp->private->dying)
-	    return;
+	    return 0;
 
 	  delete_thread (ptid);
 	  tp = NULL;
@@ -1023,12 +1024,12 @@ attach_thread (ptid_t ptid, const td_thr
   check_thread_signals ();
 
   if (ti_p->ti_state == TD_THR_UNKNOWN || ti_p->ti_state == TD_THR_ZOMBIE)
-    return;			/* A zombie thread -- do not attach.  */
+    return 0;			/* A zombie thread -- do not attach.  */
 
   /* Under GNU/Linux, we have to attach to each and every thread.  */
   if (tp == NULL
       && lin_lwp_attach_lwp (BUILD_LWP (ti_p->ti_lid, GET_PID (ptid))) < 0)
-    return;
+    return 0;
 
   /* Construct the thread's private data.  */
   private = xmalloc (sizeof (struct private_thread_info));
@@ -1056,6 +1057,7 @@ attach_thread (ptid_t ptid, const td_thr
   if (err != TD_OK)
     error (_("Cannot enable thread event reporting for %s: %s"),
 	   target_pid_to_str (ptid), thread_db_err_str (err));
+  return 1;
 }
 
 static void
@@ -1282,6 +1284,12 @@ thread_db_mourn_inferior (struct target_
     unpush_target (ops);
 }
 
+struct callback_data
+{
+  struct thread_db_info *info;
+  int new_threads;
+};
+
 static int
 find_new_threads_callback (const td_thrhandle_t *th_p, void *data)
 {
@@ -1289,7 +1297,8 @@ find_new_threads_callback (const td_thrh
   td_err_e err;
   ptid_t ptid;
   struct thread_info *tp;
-  struct thread_db_info *info = data;
+  struct callback_data *cb_data = data;
+  struct thread_db_info *info = cb_data->info;
 
   err = info->td_thr_get_info_p (th_p, &ti);
   if (err != TD_OK)
@@ -1332,21 +1341,76 @@ find_new_threads_callback (const td_thrh
   ptid = ptid_build (info->pid, ti.ti_lid, 0);
   tp = find_thread_ptid (ptid);
   if (tp == NULL || tp->private == NULL)
-    attach_thread (ptid, th_p, &ti);
+    {
+      if (attach_thread (ptid, th_p, &ti))
+	cb_data->new_threads += 1;
+      else
+	/* Problem attaching this thread; perhaps it exited before we
+	   could attach it?
+	   This could mean that the thread list inside glibc itself is in
+	   inconsistent state, and libthread_db could go on looping forever
+	   (observed with glibc-2.3.6).  To prevent that, terminate
+	   iteration: thread_db_find_new_threads_2 will retry.  */
+	return 1;
+    }
 
   return 0;
 }
 
+/* Helper for thread_db_find_new_threads_2.
+   Returns number of new threads found.  */
+
+static int
+find_new_threads_once (struct thread_db_info *info, int iteration,
+		       int *errp)
+{
+  volatile struct gdb_exception except;
+  struct callback_data data;
+  int err = TD_ERR;
+
+  data.info = info;
+  data.new_threads = 0;
+
+  TRY_CATCH (except, RETURN_MASK_ERROR)
+    {
+      /* Iterate over all user-space threads to discover new threads.  */
+      err = info->td_ta_thr_iter_p (info->thread_agent,
+				    find_new_threads_callback,
+				    &data,
+				    TD_THR_ANY_STATE,
+				    TD_THR_LOWEST_PRIORITY,
+				    TD_SIGNO_MASK,
+				    TD_THR_ANY_USER_FLAGS);
+    }
+
+  if (info_verbose)
+    {
+      if (except.reason < 0)
+	exception_fprintf (gdb_stderr, except,
+			   "Warning: find_new_threads_once: ");
+
+      printf_filtered (_("Found %d new threads in iteration %d.\n"),
+		       data.new_threads, iteration);
+    }
+
+  if (errp != NULL)
+    *errp = err;
+
+  return data.new_threads;
+}
+
 /* Search for new threads, accessing memory through stopped thread
-   PTID.  */
+   PTID.  If UNTIL_NO_NEW is true, repeat searching until several
+   searches in a row do not discover any new threads.  */
 
 static void
-thread_db_find_new_threads_1 (ptid_t ptid)
+thread_db_find_new_threads_2 (ptid_t ptid, int until_no_new)
 {
   td_err_e err;
   struct lwp_info *lp;
   struct thread_db_info *info;
   int pid = ptid_get_pid (ptid);
+  int i, loop;
 
   /* In linux, we can only read memory through a stopped lwp.  */
   ALL_LWPS (lp, ptid)
@@ -1361,15 +1425,36 @@ thread_db_find_new_threads_1 (ptid_t pti
 
   /* Access an lwp we know is stopped.  */
   info->proc_handle.ptid = ptid;
-  /* Iterate over all user-space threads to discover new threads.  */
-  err = info->td_ta_thr_iter_p (info->thread_agent, find_new_threads_callback,
-				info, TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
-				TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
-  if (err != TD_OK)
-    error (_("Cannot find new threads: %s"), thread_db_err_str (err));
+
+  if (until_no_new)
+    {
+      /* Require 4 successive iterations which do not find any new threads.
+	 The 4 is a heuristic: there is an inherent race here, and I have
+	 seen that 2 iterations in a row are not always sufficient to
+	 "capture" all threads.  */
+      for (i = 0, loop = 0; loop < 4; ++i, ++loop)
+	if (find_new_threads_once (info, i, NULL) != 0)
+	  /* Found some new threads.  Restart the loop from beginning.	*/
+	  loop = -1;
+    }
+  else
+    {
+      int err;
+
+      find_new_threads_once (info, 0, &err);
+      if (err != TD_OK)
+	error (_("Cannot find new threads: %s"), thread_db_err_str (err));
+    }
 }
 
 static void
+thread_db_find_new_threads_1 (ptid_t ptid)
+{
+  thread_db_find_new_threads_2 (ptid, 0);
+}
+
+
+static void
 thread_db_find_new_threads (struct target_ops *ops)
 {
   struct thread_db_info *info;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-26 21:25     ` Paul Pluzhnikov
@ 2009-10-26 21:34       ` Pedro Alves
  2009-10-27  0:41         ` Paul Pluzhnikov
  2009-10-27 21:38       ` Paul Pluzhnikov
  1 sibling, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2009-10-26 21:34 UTC (permalink / raw)
  To: Paul Pluzhnikov; +Cc: gdb-patches

On Monday 26 October 2009 21:25:39, Paul Pluzhnikov wrote:
> I wonder if it *could* take some lock that would make all other threads
> wait in pthread_create -- after all, thread_db does have access to inferior
> via ps_pdwrite. That would certainly make the interface cleaner, at a cost
> of an extra global lock on pthread_create path.

the problem with that is that gdb can stop all threads while one
of them is holding the lock --- you'd have to consider
the possibility of gdb deadlocking.

-- 
Pedro Alves


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-26 21:34       ` Pedro Alves
@ 2009-10-27  0:41         ` Paul Pluzhnikov
  2009-10-27  9:43           ` Pedro Alves
  0 siblings, 1 reply; 13+ messages in thread
From: Paul Pluzhnikov @ 2009-10-27  0:41 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

On Mon, Oct 26, 2009 at 2:35 PM, Pedro Alves <pedro@codesourcery.com> wrote:

> On Monday 26 October 2009 21:25:39, Paul Pluzhnikov wrote:
>> I wonder if it *could* take some lock that would make all other threads

> the problem with that is that gdb can stop all threads while one
> of them is holding the lock --- you'd have to consider
> the possibility of gdb deadlocking.

I was thinking along the lines of

  static __volatile__ int thread_iteration_in_progress;
  static __volatile__ int thread_creation_in_progress;

  __pthread_create_2_1 (...)
  {
    while (thread_iteration_in_progress) nanoslep ...;
    atomic_increment(&thread_creation_in_progress);
    ... create new thread, put it on thread list ...
    atomic_decrement(&thread_creation_in_progress);
    return ...
  }

and in

   td_ta_thr_iter ()
   {
     set thread_iteration_in_progress in the inferior to 1
     while (thread_creation_in_progress in inferior > 0)
        nanosleep ...

     iterate over the thread lists ...

     set thread_iteration_in_progress in inferior to 0
   }

I think some variant of this could work, but I have no hopes this will
actually get into glibc.


-- 
Paul Pluzhnikov


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-27  0:41         ` Paul Pluzhnikov
@ 2009-10-27  9:43           ` Pedro Alves
  2009-10-27 16:58             ` Paul Pluzhnikov
  0 siblings, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2009-10-27  9:43 UTC (permalink / raw)
  To: Paul Pluzhnikov; +Cc: gdb-patches

On Tuesday 27 October 2009 00:41:24, Paul Pluzhnikov wrote:
> 
>   static __volatile__ int thread_iteration_in_progress;
>   static __volatile__ int thread_creation_in_progress;
> 
>   __pthread_create_2_1 (...)
>   {
>     while (thread_iteration_in_progress) nanoslep ...;
>     atomic_increment(&thread_creation_in_progress);
>     ... create new thread, put it on thread list ...
>     atomic_decrement(&thread_creation_in_progress);
>     return ...
>   }
> 
> and in
> 
>    td_ta_thr_iter ()
>    {
>      set thread_iteration_in_progress in the inferior to 1
>      while (thread_creation_in_progress in inferior > 0)
>         nanosleep ...
> 
>      iterate over the thread lists ...
> 
>      set thread_iteration_in_progress in inferior to 0
>    }
> 

This does make the race window narrower, but it doesn't
completely kill it, as new threads could have passed the
thread_iteration_in_progress check already as gdb enters the
thread iteration loop.

-- 
Pedro Alves


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-27  9:43           ` Pedro Alves
@ 2009-10-27 16:58             ` Paul Pluzhnikov
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Pluzhnikov @ 2009-10-27 16:58 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

On Tue, Oct 27, 2009 at 2:43 AM, Pedro Alves <pedro@codesourcery.com> wrote:

> This does make the race window narrower, but it doesn't
> completely kill it

Yes, I think there is still a "single instruction" race window :-(

I am pretty sure there is a way to make this race-free, but that's irrelevant:
it's not race-free now, so we'll have to leave with that.

Thanks,
-- 
Paul Pluzhnikov


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-26 21:25     ` Paul Pluzhnikov
  2009-10-26 21:34       ` Pedro Alves
@ 2009-10-27 21:38       ` Paul Pluzhnikov
  2009-10-27 21:52         ` Pedro Alves
  1 sibling, 1 reply; 13+ messages in thread
From: Paul Pluzhnikov @ 2009-10-27 21:38 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

[-- Attachment #1: Type: text/plain, Size: 523 bytes --]

On Mon, Oct 26, 2009 at 2:25 PM, Paul Pluzhnikov <ppluzhnikov@google.com> wrote:

> I'll commit attached patch tomorrow if there are no further comments.

So committed.

> A similar patch is required for gdbserver as well.

Attached.

Thanks,
-- 
Paul Pluzhnikov

2009-10-27  Paul Pluzhnikov  <ppluzhnikov@google.com>

	PR gdb/10757
	* thread-db.c (attach_thread): New function.
	(maybe_attach_thread): Return success/failure.
	(find_new_threads_callback): Adjust.
	(thread_db_find_new_threads): Loop until no new threads.

[-- Attachment #2: gdbserver-pr10757-20091027.txt --]
[-- Type: text/plain, Size: 4169 bytes --]

Index: gdbserver/thread-db.c
===================================================================
RCS file: /cvs/src/src/gdb/gdbserver/thread-db.c,v
retrieving revision 1.24
diff -u -p -u -r1.24 thread-db.c
--- gdbserver/thread-db.c	9 Oct 2009 00:31:01 -0000	1.24
+++ gdbserver/thread-db.c	27 Oct 2009 21:12:12 -0000
@@ -297,16 +297,13 @@ find_one_thread (ptid_t ptid)
   return 1;
 }
 
-static void
-maybe_attach_thread (const td_thrhandle_t *th_p, td_thrinfo_t *ti_p)
+/* Attach a thread.  Return true on success.  */
+
+static int
+attach_thread (const td_thrhandle_t *th_p, td_thrinfo_t *ti_p)
 {
-  td_err_e err;
   struct lwp_info *lwp;
 
-  lwp = find_lwp_pid (pid_to_ptid (ti_p->ti_lid));
-  if (lwp != NULL)
-    return;
-
   if (debug_threads)
     fprintf (stderr, "Attaching to thread %ld (LWP %d)\n",
 	     ti_p->ti_tid, ti_p->ti_lid);
@@ -316,7 +313,7 @@ maybe_attach_thread (const td_thrhandle_
     {
       warning ("Could not attach to thread %ld (LWP %d)\n",
 	       ti_p->ti_tid, ti_p->ti_lid);
-      return;
+      return 0;
     }
 
   lwp->thread_known = 1;
@@ -324,12 +321,39 @@ maybe_attach_thread (const td_thrhandle_
 
   if (thread_db_use_events)
     {
+      td_err_e err;
       struct thread_db *thread_db = current_process ()->private->thread_db;
+
       err = thread_db->td_thr_event_enable_p (th_p, 1);
       if (err != TD_OK)
 	error ("Cannot enable thread event reporting for %d: %s",
 	       ti_p->ti_lid, thread_db_err_str (err));
     }
+
+  return 1;
+}
+
+/* Attach thread if we haven't seen it yet.
+   Increment *COUNTER if we have attached a new thread.
+   Return false on failure.  */
+
+static int
+maybe_attach_thread (const td_thrhandle_t *th_p, td_thrinfo_t *ti_p,
+		     int *counter)
+{
+  struct lwp_info *lwp;
+
+  lwp = find_lwp_pid (pid_to_ptid (ti_p->ti_lid));
+  if (lwp != NULL)
+    return 1;
+
+  if (!attach_thread (th_p, ti_p))
+    return 0;
+
+  if (counter != NULL)
+    *counter += 1;
+
+  return 1;
 }
 
 static int
@@ -347,7 +371,12 @@ find_new_threads_callback (const td_thrh
   if (ti.ti_state == TD_THR_UNKNOWN || ti.ti_state == TD_THR_ZOMBIE)
     return 0;
 
-  maybe_attach_thread (th_p, &ti);
+  if (!maybe_attach_thread (th_p, &ti, (int *) data))
+    {
+      /* Terminate iteration early: we might be looking at stale data in
+	 the inferior.  The thread_db_find_new_threads will retry.  */
+      return 1;
+    }
 
   return 0;
 }
@@ -358,6 +387,7 @@ thread_db_find_new_threads (void)
   td_err_e err;
   ptid_t ptid = ((struct inferior_list_entry *) current_inferior)->id;
   struct thread_db *thread_db = current_process ()->private->thread_db;
+  int loop, iteration;
 
   /* This function is only called when we first initialize thread_db.
      First locate the initial thread.  If it is not ready for
@@ -365,11 +395,30 @@ thread_db_find_new_threads (void)
   if (find_one_thread (ptid) == 0)
     return;
 
-  /* Iterate over all user-space threads to discover new threads.  */
-  err = thread_db->td_ta_thr_iter_p (thread_db->thread_agent,
-				     find_new_threads_callback, NULL,
-				     TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
-				     TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
+  /* Require 4 successive iterations which do not find any new threads.
+     The 4 is a heuristic: there is an inherent race here, and I have
+     seen that 2 iterations in a row are not always sufficient to
+     "capture" all threads.  */
+  for (loop = 0, iteration = 0; loop < 4; ++loop, ++iteration)
+    {
+      int new_thread_count = 0;
+
+      /* Iterate over all user-space threads to discover new threads.  */
+      err = thread_db->td_ta_thr_iter_p (thread_db->thread_agent,
+					 find_new_threads_callback,
+					 &new_thread_count,
+					 TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
+					 TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
+      if (debug_threads)
+	fprintf (stderr, "Found %d threads in iteration %d.\n",
+		 new_thread_count, iteration);
+
+      if (new_thread_count != 0)
+	{
+	  /* Found new threads.  Restart iteration from beginning.  */
+	  loop = -1;
+	}
+    }
   if (err != TD_OK)
     error ("Cannot find new threads: %s", thread_db_err_str (err));
 }

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-27 21:38       ` Paul Pluzhnikov
@ 2009-10-27 21:52         ` Pedro Alves
  2009-10-28 17:04           ` Paul Pluzhnikov
  0 siblings, 1 reply; 13+ messages in thread
From: Pedro Alves @ 2009-10-27 21:52 UTC (permalink / raw)
  To: Paul Pluzhnikov; +Cc: gdb-patches

On Tuesday 27 October 2009 21:38:32, Paul Pluzhnikov wrote:
> 2009-10-27  Paul Pluzhnikov  <ppluzhnikov@google.com>
> 
>         PR gdb/10757
>         * thread-db.c (attach_thread): New function.
>         (maybe_attach_thread): Return success/failure.
>         (find_new_threads_callback): Adjust.
>         (thread_db_find_new_threads): Loop until no new threads.

Looks good to me.  Thanks!

-- 
Pedro Alves


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
  2009-10-27 21:52         ` Pedro Alves
@ 2009-10-28 17:04           ` Paul Pluzhnikov
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Pluzhnikov @ 2009-10-28 17:04 UTC (permalink / raw)
  To: Pedro Alves; +Cc: gdb-patches

On Tue, Oct 27, 2009 at 2:53 PM, Pedro Alves <pedro@codesourcery.com> wrote:

> Looks good to me.  Thanks!

So committed.

Thanks.
-- 
Paul Pluzhnikov


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch] Fix for PR gdb/10757
@ 2009-10-25 23:59 Pedro Alves
  0 siblings, 0 replies; 13+ messages in thread
From: Pedro Alves @ 2009-10-25 23:59 UTC (permalink / raw)
  To: Paul Pluzhnikov; +Cc: gdb-patches

On Sunday 18 October 2009 00:25:56, Paul Pluzhnikov wrote:
> 2009-10-17  Paul Pluzhnikov  <ppluzhnikov@google.com>
> 
>        PR gdb/10757
>        * linux-thread-db.c (attach_thread): Return success/failure
>        indicator.
>        (thread_db_find_new_threads_silently): Retry until no new threads.
>        (struct callback_data): New.
>        (find_new_threads_callback): Count new threads, stop iteration
>        on error.
>        (find_new_threads_once): New function.
>        (thread_db_find_new_threads_2): Rename from
>        thread_db_find_new_threads_1 and adjust.
>        (thread_db_find_new_threads_1): New function.

Thanks.  This is quite unfortunate, but we get to live with it...

FTR, I've took a peek at nptl/nptl_db, to try to understand
a bit better why this happens (please do correct me if I'm
wrong):

 - First, it is worth remind us while nptl takes locks and barriers
   to protect its internal structures, thread_db/ntpl_db doesn't,
   since it runs in gdb's context.

 - nptl adds new threads to one of the __stack_user or stack_used
   lists.  New nodes are added to the _head_ of the lists.

 - nptl_db/td_ta_thr_iter.c:td_ta_thr_iter iterates over
   these thread lists from head to tail.

From these last two points alone, one can infer that an iteration
over the thread list can miss new threads if they're added just while
the list is being iterated.

Another way to tackle this, would be iterate over 
/proc/pid/task/ and attach to all lwps that way, instead of
relying on thread_db to do the initial iteration over all
threads.  We'd still iterate using the thread_db interface
once afterwards, so to learn the matching pthread
ids of the lwps.

With /proc/pid/task, you'd still need to keep iterating until
no new thread comes out, but, I think we could make the
number of loops unbounded much safely, since we wouldn't be
at risk of reading stale inferior data.  Assuming no kernel
bugs, that is.  Note that this is something that would be
a useful addition to gdb anyway, so that we could be able to
attach to multi-threaded apps that use raw clone instead of
pthreads.

> +      else
> +       /* Problem attaching this thread; perhaps it exited before we
> +          could attach it?
> +          This could mean that the thread list inside glibc itself is in
> +          inconsistent state, and libthread_db could go on looping forever
> +          (observed with glibc-2.3.6).  To prevent that, terminate
> +          iteration: thread_db_find_new_threads_2 will retry.  */
> +       return 1;

Probably related to the fact that gdb can be iterating over
the threads lists, exactly while libc is concurrently removing
a thread from the same lists.  nptl_db could be iterating over
stale inferior pointers.

>  /* Search for new threads, accessing memory through stopped thread
> -   PTID.  */
> +   PTID.  If NUM_LOOPS is non-zero, repeat searching until NUM_LOOP
> +   searches in a row do not discover any new threads.  */
>  
>  static void
> -thread_db_find_new_threads_1 (ptid_t ptid)
> +thread_db_find_new_threads_2 (ptid_t ptid, int num_loop)
>  {
(...)
> +
> +  if (num_loop != 0)
> +    {
> +      /* Require NUM_LOOP successive iterations which do not find any new threads.  */

This line too long.

> +      for (i = 0, loop = 0; loop < 4; ++i, ++loop)

Err, s/4/num_loop/ ?  A comment explain _why_ we need this is missing
as well.  The patch looks OK otherwise.

Actually, I'd rename the `num_loop' parameter into
say, `until_no_new', and do this instead:

  if (until_no_new)
    {
      /* Require a few successive iterations which do not find any new threads.  */
      for (i = 0, loop = 0; loop < 4; ++i, ++loop)

That would be one way of localizing a bit the detail (and any comments)
of how many iterations you've happened to decide were sufficient in
practice, in one place, as opposed to spread around, like in:

> @@ -597,7 +598,7 @@ thread_db_find_new_threads_silently (pti
>  
>    TRY_CATCH (except, RETURN_MASK_ERROR)
>      {
> -      thread_db_find_new_threads_1 (ptid);
> +      thread_db_find_new_threads_2 (ptid, 4);
>      }

... or any other call sites we may add in the future.

-- 
Pedro Alves

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-10-28 17:04 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-10-15 19:10 [patch] Fix for PR gdb/10757 Paul Pluzhnikov
2009-10-17 23:26 ` Paul Pluzhnikov
2009-10-25 17:15   ` Paul Pluzhnikov
2009-10-25 23:54   ` Pedro Alves
2009-10-26 21:25     ` Paul Pluzhnikov
2009-10-26 21:34       ` Pedro Alves
2009-10-27  0:41         ` Paul Pluzhnikov
2009-10-27  9:43           ` Pedro Alves
2009-10-27 16:58             ` Paul Pluzhnikov
2009-10-27 21:38       ` Paul Pluzhnikov
2009-10-27 21:52         ` Pedro Alves
2009-10-28 17:04           ` Paul Pluzhnikov
2009-10-25 23:59 Pedro Alves

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox