From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gdb-patches-return-25847-listarch-gdb-patches=sources.redhat.com@sources.redhat.com>
Received: (qmail 22137 invoked by alias); 10 Jul 2003 01:03:32 -0000
Mailing-List: contact gdb-patches-help@sources.redhat.com; run by ezmlm
Precedence: bulk
List-Subscribe: <mailto:gdb-patches-subscribe@sources.redhat.com>
List-Archive: <http://sources.redhat.com/ml/gdb-patches/>
List-Post: <mailto:gdb-patches@sources.redhat.com>
List-Help: <mailto:gdb-patches-help@sources.redhat.com>, <http://sources.redhat.com/ml/#faqs>
Sender: gdb-patches-owner@sources.redhat.com
Received: (qmail 22108 invoked from network); 10 Jul 2003 01:03:20 -0000
Received: from unknown (HELO are.twiddle.net) (64.81.246.98)
  by sources.redhat.com with SMTP; 10 Jul 2003 01:03:20 -0000
Received: from kanga.twiddle.net (kanga.twiddle.home [172.31.0.3])
	by are.twiddle.net (8.12.8/8.12.8) with ESMTP id h6A13JLH016107;
	Wed, 9 Jul 2003 18:03:19 -0700
Received: from rth by kanga.twiddle.net with local (Exim 3.36 #1 (Debian))
	id 19aPqB-0000wr-00; Wed, 09 Jul 2003 18:03:19 -0700
Date: Thu, 10 Jul 2003 01:03:00 -0000
To: ezannoni@redhat.com
Cc: gdb-patches@gcc.gnu.org
Subject: [RFA] Don't SEGV on invalid dwarf2 frame info
Message-ID: <20030710010318.GA3645@twiddle.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
User-Agent: Mutt/1.5.4i
From: Richard Henderson <rth@twiddle.net>
X-SW-Source: 2003-07/txt/msg00204.txt.bz2

Elena, this is the patch I was thinking about.

For the audience, there is at least one bug in current cvs ld's .eh_frame
optimization code that results in padding being added between sections.
But we saw similar problems when we added support for .eh_frame generation
within the assembler (and didn't .align sections), so the discussion in 
the patch is a bit more broad than that.

Does this seem reasonable?


r~


	* dwarf2-frame.c (decode_frame_entry_1): Rename from
	decode_frame_entry; return NULL for invalid data; tidy some
	variable initialization.
	(decode_frame_entry): New.

Index: dwarf2-frame.c
===================================================================
RCS file: /cvs/src/src/gdb/dwarf2-frame.c,v
retrieving revision 1.7
diff -c -p -d -r1.7 dwarf2-frame.c
*** dwarf2-frame.c	8 Jun 2003 18:27:13 -0000	1.7
--- dwarf2-frame.c	10 Jul 2003 00:31:49 -0000
*************** add_fde (struct comp_unit *unit, struct 
*** 1058,1076 ****
  #define DW64_CIE_ID ~0
  #endif
  
! /* Read a CIE or FDE in BUF and decode it.  */
  
  static char *
! decode_frame_entry (struct comp_unit *unit, char *buf, int eh_frame_p)
  {
    LONGEST length;
    unsigned int bytes_read;
!   int dwarf64_p = 0;
!   ULONGEST cie_id = DW_CIE_ID;
    ULONGEST cie_pointer;
-   char *start = buf;
    char *end;
  
    length = read_initial_length (unit->abfd, buf, &bytes_read);
    buf += bytes_read;
    end = buf + length;
--- 1058,1078 ----
  #define DW64_CIE_ID ~0
  #endif
  
! /* Read a CIE or FDE in BUF and decode it.  Return NULL in invalid input,
!    otherwise the next byte to be processed.  */
  
  static char *
! decode_frame_entry_1 (struct comp_unit *unit, char *start, int eh_frame_p)
  {
+   char *buf;
    LONGEST length;
    unsigned int bytes_read;
!   int dwarf64_p;
!   ULONGEST cie_id;
    ULONGEST cie_pointer;
    char *end;
  
+   buf = start;
    length = read_initial_length (unit->abfd, buf, &bytes_read);
    buf += bytes_read;
    end = buf + length;
*************** decode_frame_entry (struct comp_unit *un
*** 1078,1092 ****
    if (length == 0)
      return end;
  
!   if (bytes_read == 12)
!     dwarf64_p = 1;
  
!   /* In a .eh_frame section, zero is used to distinguish CIEs from
!      FDEs.  */
    if (eh_frame_p)
      cie_id = 0;
    else if (dwarf64_p)
      cie_id = DW64_CIE_ID;
  
    if (dwarf64_p)
      {
--- 1080,1095 ----
    if (length == 0)
      return end;
  
!   /* Distinguish between 32 and 64-bit encoded frame info.  */
!   dwarf64_p = (bytes_read == 12);
  
!   /* In a .eh_frame section, zero is used to distinguish CIEs from FDEs.  */
    if (eh_frame_p)
      cie_id = 0;
    else if (dwarf64_p)
      cie_id = DW64_CIE_ID;
+   else
+     cie_id = DW_CIE_ID;
  
    if (dwarf64_p)
      {
*************** decode_frame_entry (struct comp_unit *un
*** 1124,1130 ****
        cie->encoding = encoding_for_size (unit->addr_size);
  
        /* Check version number.  */
!       gdb_assert (read_1_byte (unit->abfd, buf) == DW_CIE_VERSION);
        buf += 1;
  
        /* Interpret the interesting bits of the augmentation.  */
--- 1127,1134 ----
        cie->encoding = encoding_for_size (unit->addr_size);
  
        /* Check version number.  */
!       if (read_1_byte (unit->abfd, buf) != DW_CIE_VERSION)
! 	return NULL;
        buf += 1;
  
        /* Interpret the interesting bits of the augmentation.  */
*************** decode_frame_entry (struct comp_unit *un
*** 1159,1164 ****
--- 1163,1170 ----
  
  	  length = read_unsigned_leb128 (unit->abfd, buf, &bytes_read);
  	  buf += bytes_read;
+ 	  if (buf > end)
+ 	    return NULL;
  	  cie->initial_instructions = buf + length;
  	  augmentation++;
  	}
*************** decode_frame_entry (struct comp_unit *un
*** 1211,1226 ****
        /* This is a FDE.  */
        struct dwarf2_fde *fde;
  
        if (eh_frame_p)
  	{
- 	  /* In an .eh_frame section, the CIE pointer is the delta
-              between the address within the FDE where the CIE pointer
-              is stored and the address of the CIE.  Convert it to an
-              offset into the .eh_frame section.  */
  	  cie_pointer = buf - unit->dwarf_frame_buffer - cie_pointer;
  	  cie_pointer -= (dwarf64_p ? 8 : 4);
  	}
  
        fde = (struct dwarf2_fde *)
  	obstack_alloc (&unit->objfile->psymbol_obstack,
  		       sizeof (struct dwarf2_fde));
--- 1217,1236 ----
        /* This is a FDE.  */
        struct dwarf2_fde *fde;
  
+       /* In an .eh_frame section, the CIE pointer is the delta between the
+ 	 address within the FDE where the CIE pointer is stored and the
+ 	 address of the CIE.  Convert it to an offset into the .eh_frame
+ 	 section.  */
        if (eh_frame_p)
  	{
  	  cie_pointer = buf - unit->dwarf_frame_buffer - cie_pointer;
  	  cie_pointer -= (dwarf64_p ? 8 : 4);
  	}
  
+       /* In either case, validate the result is still within the section.  */
+       if (cie_pointer >= unit->dwarf_frame_size)
+ 	return NULL;
+ 
        fde = (struct dwarf2_fde *)
  	obstack_alloc (&unit->objfile->psymbol_obstack,
  		       sizeof (struct dwarf2_fde));
*************** decode_frame_entry (struct comp_unit *un
*** 1252,1257 ****
--- 1262,1269 ----
  
  	  length = read_unsigned_leb128 (unit->abfd, buf, &bytes_read);
  	  buf += bytes_read + length;
+ 	  if (buf > end)
+ 	    return NULL;
  	}
  
        fde->instructions = buf;
*************** decode_frame_entry (struct comp_unit *un
*** 1261,1266 ****
--- 1273,1361 ----
      }
  
    return end;
+ }
+ 
+ static char *
+ decode_frame_entry (struct comp_unit *unit, char *start, int eh_frame_p)
+ {
+   enum { NONE, ALIGN4, ALIGN8, FAIL } workaround = NONE;
+   char *ret;
+   const char *msg;
+   ptrdiff_t start_offset;
+ 
+   while (1)
+     {
+       ret = decode_frame_entry_1 (unit, start, eh_frame_p);
+ 
+       if (ret != NULL)
+ 	break;
+ 
+       /* We have corrupt input data of some form.  */
+ 
+       /* ??? Try, weakly, to work around compiler/assembler/linker bugs
+ 	 and mismatches wrt padding and alignment of debug sections.  */
+       /* Note that there is no requirement in the standard for any
+ 	 alignment at all in the frame unwind sections.  Testing for
+ 	 alignment before trying to interpret data would be incorrect.
+ 
+ 	 However, GCC traditionally arranged for frame sections to be
+ 	 sized such that the FDE length and CIE fields happen to be
+ 	 aligned (in theory, for performance).  This, unfortunately,
+ 	 was done with .align directives, which had the side effect of
+ 	 forcing the section to be aligned by the linker.
+ 
+ 	 This becomes a problem when you have some other producer that
+ 	 creates frame sections that are not as strictly aligned.  That
+ 	 produces a hole in the frame info that gets filled by the 
+ 	 linker with zeros.
+ 
+ 	 The GCC behaviour is arguably a bug, but it's effectively now
+ 	 part of the ABI, so we're now stuck with it, at least at the
+ 	 object file level.  A smart linker may decide, in the process
+ 	 of compressing duplicate CIE information, that it can rewrite
+ 	 the entire output section without this extra padding.  */
+ 
+       start_offset = start - unit->dwarf_frame_buffer;
+       if (workaround <= ALIGN4 && (start_offset & 3) != 0)
+ 	{
+ 	  start += 4 - (start_offset & 3);
+ 	  workaround = ALIGN4;
+ 	  continue;
+ 	}
+       if (workaround <= ALIGN8 && (start_offset & 7) != 0)
+ 	{
+ 	  start += 8 - (start_offset & 7);
+ 	  workaround = ALIGN8;
+ 	  continue;
+ 	}
+ 
+       /* Nothing left to try.  Arrange to return as if we've consumed
+ 	 the entire input section.  Hopefully we'll get valid info from
+ 	 the other of .debug_frame/.eh_frame.  */
+       workaround = FAIL;
+       ret = unit->dwarf_frame_buffer + unit->dwarf_frame_size;
+       break;
+     }
+ 
+   switch (workaround)
+     {
+     case NONE:
+       return ret;
+     case ALIGN4:
+       msg = "Corrupt data in %s:%s; align 4 workaround apparently succeeded";
+       break;
+     case ALIGN8:
+       msg = "Corrupt data in %s:%s; align 8 workaround apparently succeeded";
+       break;
+     case FAIL:
+       msg = "Corrupt data in %s:%s";
+       break;
+     }
+ 
+   complaint (&symfile_complaints, msg,
+ 	     unit->dwarf_frame_section->owner->filename,
+ 	     unit->dwarf_frame_section->name);
+   return ret;
  }