From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 32412 invoked by alias); 16 Jun 2017 15:45:55 -0000 Mailing-List: contact gdb-patches-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: gdb-patches-owner@sourceware.org Received: (qmail 31714 invoked by uid 89); 16 Jun 2017 15:45:47 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-26.9 required=5.0 tests=BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_HELO_PASS,T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=lucky X-HELO: mx1.redhat.com Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Fri, 16 Jun 2017 15:45:45 +0000 Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.phx2.redhat.com [10.5.11.14]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 95E8D23E6C5; Fri, 16 Jun 2017 15:45:48 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 95E8D23E6C5 Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=palves@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 95E8D23E6C5 Received: from [127.0.0.1] (ovpn04.gateway.prod.ext.ams2.redhat.com [10.39.146.4]) by smtp.corp.redhat.com (Postfix) with ESMTP id B211053; Fri, 16 Jun 2017 15:45:45 +0000 (UTC) Subject: Re: [PATCH v4] C++ify gdb/common/environ.c To: Sergio Durigan Junior , GDB Patches References: <20170413040455.23996-1-sergiodj@redhat.com> <20170614192219.12364-1-sergiodj@redhat.com> Cc: Simon Marchi From: Pedro Alves Message-ID: Date: Fri, 16 Jun 2017 15:45:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.4.0 MIME-Version: 1.0 In-Reply-To: <20170614192219.12364-1-sergiodj@redhat.com> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit X-SW-Source: 2017-06/txt/msg00461.txt.bz2 On 06/14/2017 08:22 PM, Sergio Durigan Junior wrote: > diff --git a/gdb/Makefile.in b/gdb/Makefile.in > index 5e5fcaa..133db1a 100644 > --- a/gdb/Makefile.in > +++ b/gdb/Makefile.in > @@ -530,14 +530,16 @@ SUBDIR_UNITTESTS_SRCS = \ > unittests/offset-type-selftests.c \ > unittests/optional-selftests.c \ > unittests/ptid-selftests.c \ > - unittests/scoped_restore-selftests.c > + unittests/scoped_restore-selftests.c \ > + unittests/environ-selftests.c Please keep the list sorted. (I'm guilty of missing that before too.) > > SUBDIR_UNITTESTS_OBS = \ > function-view-selftests.o \ > offset-type-selftests.o \ > optional-selftests.o \ > ptid-selftests.o \ > - scoped_restore-selftests.o > + scoped_restore-selftests.o \ > + environ-selftests.o Ditto. > diff --git a/gdb/common/environ.c b/gdb/common/environ.c > index 3145d01..657f2e0 100644 > --- a/gdb/common/environ.c > +++ b/gdb/common/environ.c > @@ -18,165 +18,102 @@ > #include "common-defs.h" > #include "environ.h" > #include > - > +#include > > -/* Return a new environment object. */ > +/* See common/environ.h. */ > > -struct gdb_environ * > -make_environ (void) > +gdb_environ::gdb_environ () > { > - struct gdb_environ *e; > +} > > - e = XNEW (struct gdb_environ); > +/* See common/environ.h. */ > > - e->allocated = 10; > - e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *)); > - e->vector[0] = 0; > - return e; > +gdb_environ::~gdb_environ () > +{ > + clear (); > } > > -/* Free an environment and all the strings in it. */ > +/* See common/environ.h. */ > > -void > -free_environ (struct gdb_environ *e) > +gdb_environ::gdb_environ (gdb_environ &&e) > + : m_environ_vector (std::move (e.m_environ_vector)) > { > - char **vector = e->vector; > +} > > - while (*vector) > - xfree (*vector++); > +/* See common/environ.h. */ > > - xfree (e->vector); > - xfree (e); > +gdb_environ & > +gdb_environ::operator= (gdb_environ &&e) > +{ > + clear (); > + m_environ_vector = std::move (e.m_environ_vector); > + return *this; > } > > -/* Copy the environment given to this process into E. > - Also copies all the strings in it, so we can be sure > - that all strings in these environments are safe to free. */ > +/* See common/environ.h. */ > > void > -init_environ (struct gdb_environ *e) > +gdb_environ::clear () > { > - extern char **environ; > - int i; > - > - if (environ == NULL) > - return; > - > - for (i = 0; environ[i]; i++) /*EMPTY */ ; > + for (char *v : m_environ_vector) > + xfree (v); > + m_environ_vector.clear (); > +} > > - if (e->allocated < i) > - { > - e->allocated = std::max (i, e->allocated + 10); > - e->vector = (char **) xrealloc ((char *) e->vector, > - (e->allocated + 1) * sizeof (char *)); > - } > +/* See common/environ.h. */ > > - memcpy (e->vector, environ, (i + 1) * sizeof (char *)); > +const char * > +gdb_environ::get (const std::string &var) const > +{ Does this need to be a std::string instead of "const char *"? Callers always pass a string literal down, so this is going to force a deep string dup for no good reason, AFAICS. > + size_t len = var.size (); > + const char *var_str = var.c_str (); > > - while (--i >= 0) > - { > - int len = strlen (e->vector[i]); > - char *newobj = (char *) xmalloc (len + 1); > + for (char *el : m_environ_vector) > + if (el != NULL && strncmp (el, var_str, len) == 0 && el[len] == '=') > + return &el[len + 1]; > > - memcpy (newobj, e->vector[i], len + 1); > - e->vector[i] = newobj; > - } > + return NULL; > } > -char * > -get_in_environ (const struct gdb_environ *e, const char *var) > +void > +gdb_environ::set (const std::string &var, const std::string &value) Ditto. > { > - int len = strlen (var); > - char **vector = e->vector; > - char *s; > - > - for (; (s = *vector) != NULL; vector++) > - if (strncmp (s, var, len) == 0 && s[len] == '=') > - return &s[len + 1]; > + /* We have to unset the variable in the vector if it exists. */ > + unset (var); > > - return 0; > + /* Insert the element before the last one, which is always NULL. */ > + m_environ_vector.insert (m_environ_vector.end () - 1, > + concat (var.c_str (), "=", > + value.c_str (), NULL)); > } > > -/* Store the value in E of VAR as VALUE. */ > +/* See common/environ.h. */ > > void > -set_in_environ (struct gdb_environ *e, const char *var, const char *value) > +gdb_environ::unset (const std::string &var) Ditto. > - > - return; > + std::string match = var + '='; > + const char *match_str = match.c_str (); > + > + /* We iterate until '.cend () - 1' because the last element is > + always NULL. */ > + for (std::vector::const_iterator el = m_environ_vector.cbegin (); > + el != m_environ_vector.cend () - 1; > + ++el) > + if (startswith (*el, match_str)) In gdb_environ::set you used: strncmp (el, var_str, len) == 0 && el[len] == '=' It'd be better if places used the same matching code. Maybe even put this in a separate helper function. The gdb_environ::set version looks better to me for avoiding temporary heap-allocated strings. > -/* Remove the setting for variable VAR from environment E. */ > +/* See common/environ.h. */ > > -void > -unset_in_environ (struct gdb_environ *e, const char *var) > +char ** > +gdb_environ::get_char_vector () const So far, getters in gdb's classes don't have a "get_" prefix. (except "get()" or course, but that's really a getter in the same sense.) Can we drop it here? Like: char **gdb_environ::char_vector () const Though I'd rename it like this instead: char ** gdb_environ::envp () const Because that's what the env vector is traditionally called, e.g., from "man 2 execve": int execve(const char *filename, char *const argv[], char *const envp[]); int main(int argc, char *argv[], char *envp[]) Likewise I'd use that name for local variables where we call gdb_environ::get_char_vector, just to follow traditional terminology throughout. > +/* Class that represents the environment variables as seen by the > + inferior. */ > + > +class gdb_environ > +{ > +public: > + /* Regular constructor and destructor. */ > + gdb_environ (); > + ~gdb_environ (); > + > + /* Move constructor. */ > + gdb_environ (gdb_environ &&e); > + > + /* Move assignment. */ > + gdb_environ &operator= (gdb_environ &&e); > + > + /* Create a gdb_environ object using the host's environment > + variables. */ > + static gdb_environ from_host_environ () > { Nit: I find it a bit odd that the ctors/dtors are short but defined out of line, while this function is defined inline. If I was looking at controlling what the compiler could inline, then I'd do it the other way around -- small ctor/dtor in the header, and this larger function out of line in the .c file. > - /* Number of usable slots allocated in VECTOR. > - VECTOR always has one slot not counted here, > - to hold the terminating zero. */ > - int allocated; > - /* A vector of slots, ALLOCATED + 1 of them. > - The first few slots contain strings "VAR=VALUE" > - and the next one contains zero. > - Then come some unused slots. */ > - char **vector; > - }; > + extern char **environ; > + gdb_environ e; > + > + if (environ == NULL) > + return e; > + > + for (int i = 0; environ[i] != NULL; ++i) > + e.m_environ_vector.push_back (xstrdup (environ[i])); > + > + /* The last element of the vector is always going to be NULL. */ > + e.m_environ_vector.push_back (NULL); > > -extern struct gdb_environ *make_environ (void); > + return e; > + } > run_target->to_create_inferior (run_target, exec_file, > std::string (get_inferior_args ()), > - environ_vector (current_inferior ()->environment), > + current_inferior () > + ->environment.get_char_vector (), > from_tty); > @@ -270,7 +270,6 @@ mi_cmd_inferior_tty_show (const char *command, char **argv, int argc) > void > _initialize_mi_cmd_env (void) > { > - struct gdb_environ *environment; > const char *env; > > /* We want original execution path to reset to, if desired later. > @@ -278,13 +277,10 @@ _initialize_mi_cmd_env (void) > current_inferior ()->environment. Also, there's no obvious > place where this code can be moved such that it surely run > before any code possibly mangles original PATH. */ > - environment = make_environ (); > - init_environ (environment); > - env = get_in_environ (environment, path_var_name); > + env = getenv (path_var_name); > > /* Can be null if path is not set. */ > if (!env) > env = ""; > orig_path = xstrdup (env); > - free_environ (environment); > } Please split this change to a separate patch. Don't we need to update the comment just above? > diff --git a/gdb/unittests/environ-selftests.c b/gdb/unittests/environ-selftests.c > new file mode 100644 > index 0000000..948eacf > --- /dev/null > +++ b/gdb/unittests/environ-selftests.c > @@ -0,0 +1,79 @@ > +/* Self tests for gdb_environ for GDB, the GNU debugger. > + > + Copyright (C) 2017 Free Software Foundation, Inc. > + > + This file is part of GDB. > + > + This program is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + This program is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program. If not, see . */ > + > +#include "defs.h" > +#include "selftest.h" > +#include "common/environ.h" > + > +namespace selftests { > +namespace environ { "environ" as an identifier will be problematic. Please pick some other name here. On some hosts "environ" is a define. E.g., on my Fedora's mingw-w64 install I see: /usr/x86_64-w64-mingw32/sys-root/mingw/include/stdlib.h:624:#define environ _environ and gnulib has too: $ srcgrep -rn environ gnulib/import/ | grep define gnulib/import/extra/snippet/warn-on-use.h:61: # define environ (*rpl_environ ()) gnulib/import/unistd.in.h:411:# define environ (*_NSGetEnviron ()) gnulib/import/unistd.in.h:432:# define environ (*rpl_environ ()) I don't think "namespace (*_NSGetEnviron ())" would compile. :-) > + > +static void > +run_tests () > +{ > + if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0) > + error ("Could not set environment variable for testing."); > + > + gdb_environ env; > + Please add a test that that checks that get_char_vector() on an empty gdb_environ works as expected. I.e., something like: gdb_environ env; SELF_CHECK (env.get_char_vector()[0] == NULL); AFAICS from: gdb_environ::gdb_environ () { } char ** gdb_environ::get_char_vector () const { return const_cast (&m_environ_vector[0]); } we end up with a bogus envp, because it points at m_environ_vector.end(), which is not a valid pointer to dereference. Even ignoring that, it does not point to NULL. So if we pass such a pointer to execve or some syscall that accepts an envp, then it'll try iterating the vector until it finds a NULL entry, and of course do the wrong thing, maybe crash if you're lucky. Note we can get this situation from here too: static gdb_environ from_host_environ () { extern char **environ; gdb_environ e; if (environ == NULL) return e; The old code did not suffer from this because it always allocated gdb_environ::vector: struct gdb_environ * make_environ (void) { struct gdb_environ *e; e = XNEW (struct gdb_environ); e->allocated = 10; e->vector = (char **) xmalloc ((e->allocated + 1) * sizeof (char *)); e->vector[0] = 0; return e; } So we either always add a NULL to the vector, or we change gdb_environ::get_char_vector instead, like: char ** gdb_environ::get_char_vector () const { if (m_environ_vector.empty ()) { static const char *const empty_envp[1] = { NULL }; return const_cast (empty_envp); } return const_cast (&m_environ_vector[0]); } This is OK because execve etc. are garanteed to never change the envp they're passed. > + SELF_CHECK (env.get ("PWD") == NULL); > + > + env = gdb_environ::from_host_environ (); > + > + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0); > + > + env.set ("GDB_SELFTEST_ENVIRON", "test"); > + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "test") == 0); > + > + env.unset ("GDB_SELFTEST_ENVIRON"); > + SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL); > + > + env.set ("GDB_SELFTEST_ENVIRON", "1"); > + SELF_CHECK (strcmp (env.get ("GDB_SELFTEST_ENVIRON"), "1") == 0); > + > + env.clear (); > + SELF_CHECK (env.get ("GDB_SELFTEST_ENVIRON") == NULL); Like above, after env.clear() check that this works: SELF_CHECK (env.get_char_vector()[0] == NULL); > + > + if (setenv ("GDB_SELFTEST_ENVIRON", "1", 1) != 0) > + error ("Could not set environment variable for testing."); This setenv looks like a stale copy from the one at the top? I'm not seeing why it's necessary here. > + > + env = gdb_environ::from_host_environ (); > + char **penv = env.get_char_vector (); > + bool found_var = false, found_twice = false; > + > + for (size_t i = 0; penv[i] != NULL; ++i) > + if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0) > + { > + if (found_var) > + found_twice = true; > + found_var = true; > + } > + SELF_CHECK (found_var == true); > + SELF_CHECK (found_twice == false); Why not simply a count: int found_count = 0; for (size_t i = 0; penv[i] != NULL; ++i) if (strcmp (penv[i], "GDB_SELFTEST_ENVIRON=1") == 0) found_count++; SELF_CHECK (found_count == 1); I think that no test actually explicitly sets more than one var in the vector. I think you should exercise that. Also check that removing some other var doesn't remove the first. Etc. E.g., set var 1, set var 2, unset var 1, check that var 2 is still there. Thanks, Pedro Alves