#!/mingw/bin/python # -*- coding: utf-8 -*- # split-debug.py - splits debug symbols from executables into separate files # Copyright © 2012 LRN # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from __future__ import print_function import os import sys import subprocess import hashlib import stat import re import struct import platform def which (program): import os def is_exe(fpath): return os.path.isfile(fpath) and os.access(fpath, os.X_OK) fpath, fname = os.path.split(program) if fpath: if is_exe(program): return program elif not program.endswith ('.exe') and is_exe (program + '.exe'): return program + '.exe' else: for path in os.environ["PATH"].split(os.pathsep): path = path.strip('"') exe_file = os.path.join(path, program) if is_exe(exe_file): return exe_file elif not program.endswith ('.exe') and is_exe (exe_file + '.exe'): return exe_file + '.exe' return None __known_binary_files = {} __known_non_binary_files = {} __linked_binary_files = {} __target_pe_magic = None if os.name == 'nt': import win32file def get_read_handle (filename): if os.path.isdir(filename): dwFlagsAndAttributes = win32file.FILE_FLAG_BACKUP_SEMANTICS else: dwFlagsAndAttributes = 0 return win32file.CreateFile ( filename, win32file.GENERIC_READ, win32file.FILE_SHARE_READ, None, win32file.OPEN_EXISTING, dwFlagsAndAttributes, None ) def get_unique_id (hFile): ( attributes, created_at, accessed_at, written_at, volume, file_hi, file_lo, n_links, index_hi, index_lo ) = win32file.GetFileInformationByHandle (hFile) return volume, index_hi, index_lo def is_same_file (filename1, filename2): hFile1 = get_read_handle (filename1) hFile2 = get_read_handle (filename2) are_equal = (get_unique_id (hFile1) == get_unique_id (hFile2)) hFile2.Close () hFile1.Close () return are_equal def nt_is_link (filename): bs_filename = filename.replace ('/', '\\') dirname = os.path.dirname (bs_filename) p = subprocess.Popen ([os.environ['ComSpec'], '/C', 'dir', dirname], stdout=subprocess.PIPE) o, e = p.communicate () if p.returncode != 0: return True bn = os.path.basename (filename) if ' ' + bn + ' [' in o and ' ' + bn + '\n' not in o: return True return False else: def is_same_file (filename1, filename2): s1 = os.stat (filename1) s2 = os.stat (filename2) are_equal = s1.st_ino == s2.st_ino return are_equal def is_binary_file (f): if f in __known_non_binary_files: return False if f in __known_binary_files: return True return is_pe_file (f) def is_pe_file (f): head = [] try: with open (f, 'rb') as r: b = r.read (30*2 + 4) if len (b) != 30*2 + 4: __known_non_binary_files[f] = False return False signature, bytes_in_last_block, blocks_in_file, num_relocs, header_paragraphs, min_extra_paragraphs, max_extra_paragraphs, ss, sp, checksum, ip, cs, \ reloc_table_offset, overlay_number, reserved1, oemid, oeminfo, reserved2, exe_offset = struct.unpack ('2s HHHHHHHHHHHHH 8s HH 20s i', b) if signature != 'MZ': __known_non_binary_files[f] = False return False b = r.read (4) r.seek (exe_offset) b = r.read (4) if b != 'PE\0\0': __known_non_binary_files[f] = False return False b = r.read (20) if len (b) != 20: __known_non_binary_files[f] = False return False machine, number_of_sections, time_date_stamp, pointer_to_symbol_table, number_of_symbols, size_of_optional_header, characteristics = struct.unpack ('H H I I I H H', b) b = r.read (96) if len (b) != 96: __known_non_binary_files[f] = False return False magic, major_linker_ver, minor_linker_ver, size_of_code, size_of_init_data, size_of_unint_data, address_of_entry_point, \ base_of_code, base_of_data, image_base, section_alignment, file_alignment, major_os_version, minor_os_version, major_image_version, minor_image_version, \ major_subsys_version, minor_subsys_version, w32_version_value, size_of_image, size_of_headers, checksum, subsystem, dll_characteristics, size_of_stack_reserve, \ size_of_stack_commit, size_of_heap_reserve, size_of_heap_commit, loader_flags, number_of_rva_and_sizes = struct.unpack ('H BB IIIIIIIII HHHHHH IIII HH IIIIII', b) if magic != __target_pe_magic: __known_non_binary_files[f] = False return False except: # Fails for files with weird names (they are usually not PE binaries anyway, so return False) return False __known_binary_files[f] = True return True #def is_archive_file (f): # head = [] # with open (f, 'rb') as r: # head = r.read (8) # if len (head) == 8 and head == '!\n': # __known_binary_files[f] = True # return True # __known_non_binary_files[f] = False # return False def main (): global __target_pe_magic dir_to_scan = sys.argv[1] nostrip = [] nostrip_unneeded = [] ignore = [] target = None for a in sys.argv[1:]: if a[:10] == '--nostrip=': nostrip.append (a[10:]) elif a[:19] == '--nostrip-unneeded=': nostrip_unneeded.append (a[19:]) elif a[:9] == '--ignore=': ignore.append (a[9:]) elif a[:9] == '--target=': target = a[9:] if target is None: cpy = "objcopy" if platform.architecture ()[0] == '32bit': __target_pe_magic = 0x10b else: __target_pe_magic = 0x20b else: cpy = target + '-objcopy' if 'x86_64' in target: __target_pe_magic = 0x20b else: __target_pe_magic = 0x10b objcopy = which (cpy) if objcopy is None: print ("Failed to find {}".format (cpy)) return -1 for root, dirs, files in os.walk (dir_to_scan): for fn in files: f = os.path.join (root, fn) if is_binary_file (f): if fn[-4:] == '.dbg': continue if os.name == 'nt': is_link = nt_is_link (f) else: st = os.lstat (f) is_link = stat.S_ISLNK (st.st_mode) if is_link: print ("Skipping, since this file is a symlink - {}".format (f)) continue print ("Processing file {} in directory {}".format (fn, root)) rc = process_binary_file (root, fn, f, nostrip, nostrip_unneeded, ignore, objcopy) if not rc == 0: print ("ERROR: {}".format (rc)) continue return 0 def get_file_hash (f): h = hashlib.md5 () with open (f, 'rb') as src: while True: r = src.read (1024 * 64) if not r: break h.update (r) return h.digest () def process_binary_file (root, fn, f, nostrip_list, nostrip_unneeded_list, ignore_list, objcopy): dbg = "{}.dbg".format (fn) dbg_abs = os.path.join (root, dbg) if os.path.exists (dbg_abs) and os.path.isfile (dbg_abs) and is_binary_file (dbg_abs): print ("Skipping: dbg file already exists: {}".format (dbg_abs)) return 0 h = get_file_hash (f) linked = __linked_binary_files.get (h, None) if linked is not None and is_same_file (f, linked[0]): print ("Skipping: file {} is already stripped as {} and linked to {}".format (f, linked[0], linked[1])) return 0 # This is can be done in a much shorter way, but gdb will warn about missing # .gnu_debuglink section in _.dbg_ file (because it does not have one; # but it isn't required to have one!) # Here's what it does: # create a dbg file with proper debug info: # objcopy --only-keep-debug orig dbg # add a link to the original file, pointing at the dbg file (adds the # .gnu_debuglink section) # objcopy --add-gnu-debuglink="dbg" orig # re-create the dbg file; this time it will ALSO have the .gnu_debuglink # section # objcopy --only-keep-debug orig dbg # remove old .gnu_debuglink section from the original file # objcopy --remove-section=.gnu_debuglink orig # strip debug-info from the original file # objcopy --strip-debug orig # add a new .gnu_debuglink section to the original file # objcopy --add-gnu-debuglink="dbg" orig # This way dbg file gets a .gnu_debuglink section (doesn't matter where # it's pointing), and its contents pass the CRC32 check # Shorter way: # objcopy --only-keep-debug orig dbg # objcopy --strip-debug orig # objcopy --add-gnu-debuglink="dbg" orig def popen_and_print (l): print ('"' + '" "'.join (l) + '"') sys.stdout.flush () return subprocess.Popen (l) for ign in ignore_list: if f.endswith (ign): print ("Ignore {}".format (f)) return 0 print ("Separating debug info from {} into {}".format (f, dbg_abs)) oc = popen_and_print ([objcopy, '--only-keep-debug', f, dbg_abs]) oc.communicate () if not oc.returncode == 0: return oc.returncode print ("Creating a debuginfo link to {} in {}".format (dbg_abs, f)) oc = popen_and_print ([objcopy, '--add-gnu-debuglink={}'.format (dbg_abs), f]) oc.communicate () if not oc.returncode == 0: return oc.returncode print ("Separating (again) debug info from {} into {}".format (f, dbg_abs)) oc = popen_and_print ([objcopy, '--only-keep-debug', f, dbg_abs]) oc.communicate () if not oc.returncode == 0: return oc.returncode print ("Removing old .gnu_debuglink section from {}".format (f)) st = popen_and_print ([objcopy, '--remove-section=.gnu_debuglink', f]) st.communicate () if not st.returncode == 0: return oc.returncode do_strip = True do_strip_unneeded = True for nostrip in nostrip_list: if f.endswith (nostrip): do_strip = False break for nostrip in nostrip_unneeded_list: if f.endswith (nostrip): do_strip_unneeded = False break if do_strip: strip_unneeded = [] if not do_strip_unneeded: strip_unneeded.append ('--strip-unneeded') print ("Stripping debug info from {}".format (f)) st = popen_and_print ([objcopy, '--strip-debug'] + strip_unneeded + [f]) st.communicate () if not st.returncode == 0: return st.returncode else: print ("Not stripping {}".format (f)) print ("Creating (again) a debuginfo link to {} in {}".format (dbg_abs, f)) oc = popen_and_print ([objcopy, '--add-gnu-debuglink={}'.format (dbg_abs), f]) oc.communicate () if not oc.returncode == 0: return oc.returncode h = get_file_hash (f) __linked_binary_files[h] = (f, dbg_abs) return 0 if __name__ == "__main__": sys.exit (main ())