#!/mingw/bin/python
# -*- coding: utf-8 -*-
# split-debug.py - splits debug symbols from executables into separate files
# Copyright © 2012 LRN
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from __future__ import print_function
import os
import sys
import subprocess
import hashlib
import stat
import re
import struct
import platform
def which (program):
import os
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
elif not program.endswith ('.exe') and is_exe (program + '.exe'):
return program + '.exe'
else:
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
elif not program.endswith ('.exe') and is_exe (exe_file + '.exe'):
return exe_file + '.exe'
return None
__known_binary_files = {}
__known_non_binary_files = {}
__linked_binary_files = {}
__target_pe_magic = None
if os.name == 'nt':
import win32file
def get_read_handle (filename):
if os.path.isdir(filename):
dwFlagsAndAttributes = win32file.FILE_FLAG_BACKUP_SEMANTICS
else:
dwFlagsAndAttributes = 0
return win32file.CreateFile (
filename,
win32file.GENERIC_READ,
win32file.FILE_SHARE_READ,
None,
win32file.OPEN_EXISTING,
dwFlagsAndAttributes,
None
)
def get_unique_id (hFile):
(
attributes,
created_at, accessed_at, written_at,
volume,
file_hi, file_lo,
n_links,
index_hi, index_lo
) = win32file.GetFileInformationByHandle (hFile)
return volume, index_hi, index_lo
def is_same_file (filename1, filename2):
hFile1 = get_read_handle (filename1)
hFile2 = get_read_handle (filename2)
are_equal = (get_unique_id (hFile1) == get_unique_id (hFile2))
hFile2.Close ()
hFile1.Close ()
return are_equal
def nt_is_link (filename):
bs_filename = filename.replace ('/', '\\')
dirname = os.path.dirname (bs_filename)
p = subprocess.Popen ([os.environ['ComSpec'], '/C', 'dir', dirname], stdout=subprocess.PIPE)
o, e = p.communicate ()
if p.returncode != 0:
return True
bn = os.path.basename (filename)
if ' ' + bn + ' [' in o and ' ' + bn + '\n' not in o:
return True
return False
else:
def is_same_file (filename1, filename2):
s1 = os.stat (filename1)
s2 = os.stat (filename2)
are_equal = s1.st_ino == s2.st_ino
return are_equal
def is_binary_file (f):
if f in __known_non_binary_files:
return False
if f in __known_binary_files:
return True
return is_pe_file (f)
def is_pe_file (f):
head = []
try:
with open (f, 'rb') as r:
b = r.read (30*2 + 4)
if len (b) != 30*2 + 4:
__known_non_binary_files[f] = False
return False
signature, bytes_in_last_block, blocks_in_file, num_relocs, header_paragraphs, min_extra_paragraphs, max_extra_paragraphs, ss, sp, checksum, ip, cs, \
reloc_table_offset, overlay_number, reserved1, oemid, oeminfo, reserved2, exe_offset = struct.unpack ('2s HHHHHHHHHHHHH 8s HH 20s i', b)
if signature != 'MZ':
__known_non_binary_files[f] = False
return False
b = r.read (4)
r.seek (exe_offset)
b = r.read (4)
if b != 'PE\0\0':
__known_non_binary_files[f] = False
return False
b = r.read (20)
if len (b) != 20:
__known_non_binary_files[f] = False
return False
machine, number_of_sections, time_date_stamp, pointer_to_symbol_table, number_of_symbols, size_of_optional_header, characteristics = struct.unpack ('H H I I I H H', b)
b = r.read (96)
if len (b) != 96:
__known_non_binary_files[f] = False
return False
magic, major_linker_ver, minor_linker_ver, size_of_code, size_of_init_data, size_of_unint_data, address_of_entry_point, \
base_of_code, base_of_data, image_base, section_alignment, file_alignment, major_os_version, minor_os_version, major_image_version, minor_image_version, \
major_subsys_version, minor_subsys_version, w32_version_value, size_of_image, size_of_headers, checksum, subsystem, dll_characteristics, size_of_stack_reserve, \
size_of_stack_commit, size_of_heap_reserve, size_of_heap_commit, loader_flags, number_of_rva_and_sizes = struct.unpack ('H BB IIIIIIIII HHHHHH IIII HH IIIIII', b)
if magic != __target_pe_magic:
__known_non_binary_files[f] = False
return False
except:
# Fails for files with weird names (they are usually not PE binaries anyway, so return False)
return False
__known_binary_files[f] = True
return True
#def is_archive_file (f):
# head = []
# with open (f, 'rb') as r:
# head = r.read (8)
# if len (head) == 8 and head == '!\n':
# __known_binary_files[f] = True
# return True
# __known_non_binary_files[f] = False
# return False
def main ():
global __target_pe_magic
dir_to_scan = sys.argv[1]
nostrip = []
nostrip_unneeded = []
ignore = []
target = None
for a in sys.argv[1:]:
if a[:10] == '--nostrip=':
nostrip.append (a[10:])
elif a[:19] == '--nostrip-unneeded=':
nostrip_unneeded.append (a[19:])
elif a[:9] == '--ignore=':
ignore.append (a[9:])
elif a[:9] == '--target=':
target = a[9:]
if target is None:
cpy = "objcopy"
if platform.architecture ()[0] == '32bit':
__target_pe_magic = 0x10b
else:
__target_pe_magic = 0x20b
else:
cpy = target + '-objcopy'
if 'x86_64' in target:
__target_pe_magic = 0x20b
else:
__target_pe_magic = 0x10b
objcopy = which (cpy)
if objcopy is None:
print ("Failed to find {}".format (cpy))
return -1
for root, dirs, files in os.walk (dir_to_scan):
for fn in files:
f = os.path.join (root, fn)
if is_binary_file (f):
if fn[-4:] == '.dbg':
continue
if os.name == 'nt':
is_link = nt_is_link (f)
else:
st = os.lstat (f)
is_link = stat.S_ISLNK (st.st_mode)
if is_link:
print ("Skipping, since this file is a symlink - {}".format (f))
continue
print ("Processing file {} in directory {}".format (fn, root))
rc = process_binary_file (root, fn, f, nostrip, nostrip_unneeded, ignore, objcopy)
if not rc == 0:
print ("ERROR: {}".format (rc))
continue
return 0
def get_file_hash (f):
h = hashlib.md5 ()
with open (f, 'rb') as src:
while True:
r = src.read (1024 * 64)
if not r:
break
h.update (r)
return h.digest ()
def process_binary_file (root, fn, f, nostrip_list, nostrip_unneeded_list, ignore_list, objcopy):
dbg = "{}.dbg".format (fn)
dbg_abs = os.path.join (root, dbg)
if os.path.exists (dbg_abs) and os.path.isfile (dbg_abs) and is_binary_file (dbg_abs):
print ("Skipping: dbg file already exists: {}".format (dbg_abs))
return 0
h = get_file_hash (f)
linked = __linked_binary_files.get (h, None)
if linked is not None and is_same_file (f, linked[0]):
print ("Skipping: file {} is already stripped as {} and linked to {}".format (f, linked[0], linked[1]))
return 0
# This is can be done in a much shorter way, but gdb will warn about missing
# .gnu_debuglink section in _.dbg_ file (because it does not have one;
# but it isn't required to have one!)
# Here's what it does:
# create a dbg file with proper debug info:
# objcopy --only-keep-debug orig dbg
# add a link to the original file, pointing at the dbg file (adds the
# .gnu_debuglink section)
# objcopy --add-gnu-debuglink="dbg" orig
# re-create the dbg file; this time it will ALSO have the .gnu_debuglink
# section
# objcopy --only-keep-debug orig dbg
# remove old .gnu_debuglink section from the original file
# objcopy --remove-section=.gnu_debuglink orig
# strip debug-info from the original file
# objcopy --strip-debug orig
# add a new .gnu_debuglink section to the original file
# objcopy --add-gnu-debuglink="dbg" orig
# This way dbg file gets a .gnu_debuglink section (doesn't matter where
# it's pointing), and its contents pass the CRC32 check
# Shorter way:
# objcopy --only-keep-debug orig dbg
# objcopy --strip-debug orig
# objcopy --add-gnu-debuglink="dbg" orig
def popen_and_print (l):
print ('"' + '" "'.join (l) + '"')
sys.stdout.flush ()
return subprocess.Popen (l)
for ign in ignore_list:
if f.endswith (ign):
print ("Ignore {}".format (f))
return 0
print ("Separating debug info from {} into {}".format (f, dbg_abs))
oc = popen_and_print ([objcopy, '--only-keep-debug', f, dbg_abs])
oc.communicate ()
if not oc.returncode == 0:
return oc.returncode
print ("Creating a debuginfo link to {} in {}".format (dbg_abs, f))
oc = popen_and_print ([objcopy, '--add-gnu-debuglink={}'.format (dbg_abs), f])
oc.communicate ()
if not oc.returncode == 0:
return oc.returncode
print ("Separating (again) debug info from {} into {}".format (f, dbg_abs))
oc = popen_and_print ([objcopy, '--only-keep-debug', f, dbg_abs])
oc.communicate ()
if not oc.returncode == 0:
return oc.returncode
print ("Removing old .gnu_debuglink section from {}".format (f))
st = popen_and_print ([objcopy, '--remove-section=.gnu_debuglink', f])
st.communicate ()
if not st.returncode == 0:
return oc.returncode
do_strip = True
do_strip_unneeded = True
for nostrip in nostrip_list:
if f.endswith (nostrip):
do_strip = False
break
for nostrip in nostrip_unneeded_list:
if f.endswith (nostrip):
do_strip_unneeded = False
break
if do_strip:
strip_unneeded = []
if not do_strip_unneeded:
strip_unneeded.append ('--strip-unneeded')
print ("Stripping debug info from {}".format (f))
st = popen_and_print ([objcopy, '--strip-debug'] + strip_unneeded + [f])
st.communicate ()
if not st.returncode == 0:
return st.returncode
else:
print ("Not stripping {}".format (f))
print ("Creating (again) a debuginfo link to {} in {}".format (dbg_abs, f))
oc = popen_and_print ([objcopy, '--add-gnu-debuglink={}'.format (dbg_abs), f])
oc.communicate ()
if not oc.returncode == 0:
return oc.returncode
h = get_file_hash (f)
__linked_binary_files[h] = (f, dbg_abs)
return 0
if __name__ == "__main__":
sys.exit (main ())