* [RFC] Improve amd64 prologue analysis
@ 2010-11-15 17:28 Pierre Muller
2010-11-18 17:22 ` Joel Brobecker
0 siblings, 1 reply; 16+ messages in thread
From: Pierre Muller @ 2010-11-15 17:28 UTC (permalink / raw)
To: gdb-patches
Free Pascal for x86_64 (amd64) CPU generates losts of functions
that do not use RBP as a frame pointer.
I tried to improve amd64-tdep.c to better cope
with this case.
The patch does mainly two things:
1) Fix the subl val,%rsp
(the REX prefix was missing)
2) Add a new function called amd64_analyze_register_saves
that tries to support:
pushq %reg
movq %reg,ofs(%rsp)
and
movq %reg,ofs(%rbp)
It took me a while to understand the generated code,
but I hope that I don't get any false positives
concerning register saves.
The output for a Free Pascal compiled program
greatly improved with that patch.
On the other hand, testing on complier farm
x86_64-unknown-linux-gnu showed no changes in the
testsuite results, which was a disappointment for me...
Comments most welcome.
Pierre Muller
GDB pascal language maintainer
2010-11-15 Pierre Muller <muller@ics.u-strasbg.fr>
* amd64-tdep.c (struct amd64_frame_cache): Add LOCALS field.
(amd64_init_frame_cache): Set LOCALS field to -1.
(amd64_analyze_register_saves): New function.
(amd64_analyze_prologue): Correct analyzis of 'subl val,%rsp'
and call amd64_analyze_register_saves.
Index: src/gdb/amd64-tdep.c
===================================================================
RCS file: /cvs/src/src/gdb/amd64-tdep.c,v
retrieving revision 1.82
diff -u -p -r1.82 amd64-tdep.c
--- src/gdb/amd64-tdep.c 11 Sep 2010 19:09:34 -0000 1.82
+++ src/gdb/amd64-tdep.c 15 Nov 2010 16:55:55 -0000
@@ -1629,6 +1629,7 @@ struct amd64_frame_cache
/* Do we have a frame? */
int frameless_p;
+ long locals;
};
/* Initialize a frame cache. */
@@ -1642,6 +1643,7 @@ amd64_init_frame_cache (struct amd64_fra
cache->base = 0;
cache->sp_offset = -8;
cache->pc = 0;
+ cache->locals = -1;
/* Saved registers. We initialize these to -1 since zero is a valid
offset (that's where %rbp is supposed to be stored).
@@ -1824,6 +1826,123 @@ amd64_analyze_stack_align (CORE_ADDR pc,
return min (pc + offset + 2, current_pc);
}
+/* Check whether PC points at code that saves registers on the stack.
+ If so, it updates CACHE and returns the address of the first
+ instruction after the register saves or CURRENT_PC, whichever is
+ smaller. Otherwise, return PC. */
+
+static CORE_ADDR
+amd64_analyze_register_saves (CORE_ADDR pc, CORE_ADDR current_pc,
+ struct amd64_frame_cache *cache,
+ enum bfd_endian byte_order)
+{
+ CORE_ADDR offset = 0;
+ gdb_byte op, opa[5];
+ int i, loc, regnum, has_rex_prefix;
+ gdb_byte modrm, mod, reg1, reg2;
+
+ if (cache->locals > 0)
+ offset -= cache->locals;
+ /* pushq $reg */
+ for (i = 0; i < 16 && pc < current_pc; i++)
+ {
+ target_read_memory (pc, &op, 1);
+ regnum = 0;
+ has_rex_prefix = 0;
+ /* REX prefix might be used for r8-r15 registers. */
+ if (rex_prefix_p (op))
+ {
+ /* Typically 0x41 will be used.
+ Prefix for r8-r15 registers. */
+ regnum = (op & 1) ? 8 : 0;
+ target_read_memory (pc + 1, &op, 1);
+ has_rex_prefix = 1;
+ }
+ if (op < 0x50 || op > 0x57)
+ break;
+
+ offset -= 8;
+ regnum += op -0x50;
+ if (regnum < AMD64_NUM_SAVED_REGS)
+ cache->saved_regs[amd64_arch_regmap[regnum]] = offset;
+ cache->sp_offset += 8;
+ /* Skip 2 bytes for r8-r15, one otherwise. */
+ if (has_rex_prefix)
+ pc++;
+ pc++;
+ }
+
+ /* movq $reg,offset (%rsp/%rbp)
+ exists in two version, with 1byte or 4byte offset.
+ rbp versions are shorter. */
+ for (i = 0; i < 16 * 4 && pc + 4 < current_pc; i++)
+ {
+ target_read_memory (pc, opa, 5);
+ if ((opa[0] != 0x48 && opa[0] !=0x4c)
+ || opa[1] != 0x89)
+ break;
+ modrm = opa[2];
+ reg1 = modrm & 7;
+ reg2 = (modrm >> 3) & 7;
+ mod = modrm >> 6;
+ if (mod == 3) /* eliminate movq %reg1,%reg2 */
+ break;
+
+ /* %rsp base needs use of SIB byte. */
+ if ((reg1 == 4) /* SIB indicator */
+ && (opa[3] != 0x24))
+ break;
+ if ((reg1 != 5) && (reg1 != 4))
+ break;
+
+ if (opa[0] == 0x4c)
+ regnum = 8;
+ else
+ regnum = 0;
+
+ loc = 0;
+ if (reg1 == 5) /* Indicator of rbp base */
+ {
+ if (mod == 0) /* This is not rbp base. */
+ break;
+ if (mod == 2)
+ loc = read_memory_integer (pc + 3, 4, byte_order);
+ else
+ loc = opa[3];
+ }
+ else if ((reg1 == 4) && (opa[3] == 0x24)) /* rsp base */
+ {
+ /* 4byte flag is MSB of third byte */
+ if (mod == 2)
+ loc = read_memory_integer (pc + 4, 4, byte_order);
+ else if (mod == 1)
+ loc = opa[4];
+ else
+ loc = 0;
+ /* Add rsp to base offset. */
+ loc += offset;
+ pc++;
+ }
+
+ regnum += reg2;
+
+ if (regnum < AMD64_NUM_SAVED_REGS)
+ cache->saved_regs[amd64_arch_regmap[regnum]] = loc;
+
+ /* Advance to next instruction. */
+ if (mod == 2)
+ pc += 7;
+ else if (mod == 1)
+ pc += 4;
+ else
+ pc += 3;
+ }
+
+
+ return pc;
+}
+
+
/* Do a limited analysis of the prologue at PC and update CACHE
accordingly. Bail out early if CURRENT_PC is reached. Return the
address where the analysis stopped.
@@ -1867,13 +1986,45 @@ amd64_analyze_prologue (struct gdbarch *
/* Check for `movq %rsp, %rbp'. */
read_memory (pc + 1, buf, 3);
if (memcmp (buf, proto, 3) != 0)
- return pc + 1;
+ pc++;
+ else
+ {
+ /* OK, we actually have a frame. */
+ cache->frameless_p = 0;
+ pc += 4;
+ }
+ }
- /* OK, we actually have a frame. */
- cache->frameless_p = 0;
- return pc + 4;
+ op = read_memory_unsigned_integer (pc, 1, byte_order);
+
+ /* Check for stack adjustment
+
+ subl $XXX, %rsp
+
+ NOTE: You can't subtract a 16-bit immediate from a 64-bit
+ reg, so we don't have to worry about a data16 prefix.
+ A REX prefix is required for 64bit sub instruction. */
+ if (op == 0x48 && current_pc >= pc + 4) /* subl value,%rsp */
+ {
+ read_memory (pc + 1, buf, 2);
+ /* Pattern for a 4 byte signed value. */
+ if (buf[0] == 0x81 && buf[1] == 0xec)
+ {
+ cache->locals = read_memory_integer (pc + 3, 4, byte_order);
+ cache->sp_offset += cache->locals;
+ pc = pc + 7;
+ }
+ /* Pattern for a 2 byte signed value. */
+ else if (buf[0] == 0x83 && buf[1] == 0xec)
+ {
+ cache->locals = read_memory_integer (pc + 3, 1, byte_order);
+ cache->sp_offset += cache->locals;
+ pc = pc + 4;
+ }
}
+ pc = amd64_analyze_register_saves (pc, current_pc, cache, byte_order);
+
return pc;
}
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [RFC] Improve amd64 prologue analysis 2010-11-15 17:28 [RFC] Improve amd64 prologue analysis Pierre Muller @ 2010-11-18 17:22 ` Joel Brobecker 2010-11-19 8:15 ` Pierre Muller 0 siblings, 1 reply; 16+ messages in thread From: Joel Brobecker @ 2010-11-18 17:22 UTC (permalink / raw) To: Pierre Muller; +Cc: gdb-patches > Free Pascal for x86_64 (amd64) CPU generates losts of functions > that do not use RBP as a frame pointer. > I tried to improve amd64-tdep.c to better cope > with this case. I thought that on amd64, we weren't going to have prologue parsers and rely on frame unwinding info instead? At AdaCore, we have the same sort of things because of Windows where the system code (mostly system DLLs) does not have the unwinding info in DWARF format. But for code generated by us, we have all the debugging information needed to unwind without parsing the prologue, even on Windows64. -- Joel ^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [RFC] Improve amd64 prologue analysis 2010-11-18 17:22 ` Joel Brobecker @ 2010-11-19 8:15 ` Pierre Muller 2010-11-19 17:20 ` Joel Brobecker 2010-11-24 21:19 ` Mark Kettenis 0 siblings, 2 replies; 16+ messages in thread From: Pierre Muller @ 2010-11-19 8:15 UTC (permalink / raw) To: 'Joel Brobecker'; +Cc: gdb-patches > -----Message d'origine----- > De : gdb-patches-owner@sourceware.org [mailto:gdb-patches- > owner@sourceware.org] De la part de Joel Brobecker > Envoyé : jeudi 18 novembre 2010 18:22 > À : Pierre Muller > Cc : gdb-patches@sourceware.org > Objet : Re: [RFC] Improve amd64 prologue analysis > > > Free Pascal for x86_64 (amd64) CPU generates losts of functions > > that do not use RBP as a frame pointer. > > I tried to improve amd64-tdep.c to better cope > > with this case. > > I thought that on amd64, we weren't going to have prologue parsers > and rely on frame unwinding info instead? At AdaCore, we have the > same sort of things because of Windows where the system code (mostly > system DLLs) does not have the unwinding info in DWARF format. But > for code generated by us, we have all the debugging information needed > to unwind without parsing the prologue, even on Windows64. Does this means that we should only use that code if no dwarf debug info is available? The problem currently on Windows-64bit generated code is that dwarf debug information is more deeply broken than stabs, so that I am still mainly using stabs (especially to debug dwarf problems...). In any case it, this code would still be useful for frames that have no debug information at all, as it allows for frame that do not uses RBP to figure out the correct offset for RSP, and thus the correct caller frame position. Pierre ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-19 8:15 ` Pierre Muller @ 2010-11-19 17:20 ` Joel Brobecker 2010-11-19 22:50 ` Pierre Muller 2010-11-24 21:19 ` Mark Kettenis 1 sibling, 1 reply; 16+ messages in thread From: Joel Brobecker @ 2010-11-19 17:20 UTC (permalink / raw) To: Pierre Muller, kettenis; +Cc: gdb-patches [-- Attachment #1: Type: text/plain, Size: 1955 bytes --] > Does this means that we should only use that code if no dwarf debug > info is available? If the DWARF *frame* information is available, then it will automaticallly be used over prologue-parsing. > The problem currently on Windows-64bit generated code is that dwarf > debug information is more deeply broken than stabs, so that I am > still mainly using stabs (especially to debug dwarf problems...). I completely understand the predicament you are in. I would do the same. I don't know the compiler switches all that well, but isn't there a way for us to force it to generate the DWARF frame info alone? This info is used for unwinding during exceptions (unless one uses an exception mechanism based on setjmp/longjmp). If you had that, then you would still be using stabs for symbolic debugging, and the DWARF unwinder for backtracing. > In any case it, this code would still be useful for frames that have > no debug information at all, as it allows for frame that do not uses > RBP to figure out the correct offset for RSP, and thus the correct > caller frame position. I'd have no problem with that either. In fact, I can contribute some code as well that seems to deal with most of the prologues found in system code (patch attached). There is one bit where I was lazy and couldn't figure out how to disassemble some intructions, but that can be fixed as well. However, with that being said, the reason why I never contributed this code is that eventually we will probably have to use the pdata/xdata stuff. This is the equivalent of the DWARF frame info. In retrospect, I think that was a little silly of me to hold that code and not make things better while waiting for pdata/xdata support (Tristan expressed interest in that (so did I), but got stuck on VMS). This part of the code is maintained by MarkK, so he has the final word. But I'm OK with improving the prologue analyzer in the way that you are suggesting. -- Joel [-- Attachment #2: amd64-windows-prologue.diff --] [-- Type: text/x-diff, Size: 9816 bytes --] diff --git a/gdb/amd64-tdep.c b/gdb/amd64-tdep.c index 5472db1..397d101 100644 --- a/gdb/amd64-tdep.c +++ b/gdb/amd64-tdep.c @@ -41,6 +41,8 @@ #include "amd64-tdep.h" #include "i387-tdep.h" +#include "ui-file.h" +#include "disasm.h" #include "features/i386/amd64.c" #include "features/i386/amd64-avx.c" @@ -1824,36 +1826,152 @@ amd64_analyze_stack_align (CORE_ADDR pc, CORE_ADDR current_pc, return min (pc + offset + 2, current_pc); } -/* Do a limited analysis of the prologue at PC and update CACHE - accordingly. Bail out early if CURRENT_PC is reached. Return the - address where the analysis stopped. +/* Return non-zero if the instruction at PC saves a register at + a location relative to the RSP register. If it is, then set: + - INSN_LEN: The length of the instruction (in bytes); + - REGNUM: The register being saved; + - OFFSET: The offset from the stack pointer. */ - We will handle only functions beginning with: +static int +amd64_register_save (struct gdbarch *gdbarch, CORE_ADDR pc, + int *insn_len, int *regnum, int *offset) +{ + gdb_byte buf[5]; + int is_save = 0; + enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); - pushq %rbp 0x55 - movq %rsp, %rbp 0x48 0x89 0xe5 + read_memory (pc, buf, sizeof (buf)); + if (buf[0] == 0x48 + && buf[1] == 0x89 + && (buf[2] & 0xc0) == 0x40 /* ModRM mod == 01 in binary */ + && (buf[2] & 0x07) == 0x04 /* Base register is %rsp */ + && buf[3] == 0x24) + { + int mov_regnum = (buf[2] & 0x38) >> 3; - Any function that doesn't start with this sequence will be assumed - to have no prologue and thus no valid frame pointer in %rbp. */ + *insn_len = 5; + switch (mov_regnum) + { + case 0: *regnum = AMD64_RAX_REGNUM; break; + case 1: *regnum = AMD64_RCX_REGNUM; break; + case 2: *regnum = AMD64_RDX_REGNUM; break; + case 3: *regnum = AMD64_RBX_REGNUM; break; + case 4: *regnum = AMD64_RSP_REGNUM; break; + case 5: *regnum = AMD64_RBP_REGNUM; break; + case 6: *regnum = AMD64_RSI_REGNUM; break; + case 7: *regnum = AMD64_RDI_REGNUM; break; + } + *offset = extract_signed_integer (buf + 4, 1, byte_order); + is_save = 1; + } + else if (buf[0] == 0x4c + && buf[1] == 0x89 + && (buf[2] & 0xc0) == 0x40 /* ModRM mod == 01 in binary */ + && (buf[2] & 0x07) == 0x04 /* Base register is %rsp */ + && buf[3] == 0x24) + { + int mov_regnum = (buf[2] & 0x38) >> 3; + + *insn_len = 5; + *regnum = AMD64_R8_REGNUM + mov_regnum; + *offset = extract_signed_integer (buf + 4, 1, byte_order); + is_save = 1; + } + + return is_save; +} + +/* Return a string representation of the instruction at PC. + Set LEN to the number of bytes used by this instruction. + + Disassembling code on x86_64 is so complicated compared to most + archictectures, that it can be easier to just parse a string + rather than disassembling by hand. It's definitely overkill, + but definitly simpler. */ + +static char * +amd64_disass_insn (struct gdbarch *gdbarch, CORE_ADDR pc, int *len) +{ + struct ui_file *tmp_stream; + struct cleanup *cleanups; + char *insn; + long dummy; + + tmp_stream = mem_fileopen (); + cleanups = make_cleanup_ui_file_delete (tmp_stream); + *len = gdb_print_insn (gdbarch, pc, tmp_stream, NULL); + insn = ui_file_xstrdup (tmp_stream, &dummy); + do_cleanups (cleanups); + + return insn; +} + +/* Some prologues sometimes start with a few instructions that + are not part of a typical prologue. Observed in the Windows + kernel32.dll, we had a couple of "mov" instructions before + the real prologue stared. We have also seen GCC emit some + "mov %rN,OFFSET(%rsp)" instructions instead of "push" instructions + in order to save a register on the stack. For lack of a better + word, let's call this section of the function the "pre-prologue". + + This function analyzes the contents of the function pre-prologue, + updates the cache accordingly, and returns the address of the first + instruction past this pre-prologue. */ static CORE_ADDR -amd64_analyze_prologue (struct gdbarch *gdbarch, - CORE_ADDR pc, CORE_ADDR current_pc, - struct amd64_frame_cache *cache) +amd64_analyze_pre_prologue (struct gdbarch *gdbarch, + CORE_ADDR pc, CORE_ADDR limit_pc, + struct amd64_frame_cache *cache) +{ + while (pc < limit_pc) + { + int len = 0; + char *insn; + int regnum = 0; + int offset = 0; + int dummy; + int skip = 0; + + if (amd64_register_save (gdbarch, pc, &len, ®num, &offset)) + { + cache->saved_regs[regnum] = offset + 8; + pc += len; + continue; + } + + insn = amd64_disass_insn (gdbarch, pc, &len); + if (pc + len < limit_pc && strncmp (insn, "mov", 3) == 0) + skip = 1; + xfree (insn); + if (skip) + pc += len; + else + break; + } + + return pc; +} + +/* If the instruction at PC establishes a frame pointer, then update + CACHE accordingly and return the address of the instruction + immediately following it. Otherwise just return the same PC. */ + +static CORE_ADDR +amd64_analyze_frame_pointer_setup (struct gdbarch *gdbarch, + CORE_ADDR pc, CORE_ADDR limit_pc, + struct amd64_frame_cache *cache) { enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); static gdb_byte proto[3] = { 0x48, 0x89, 0xe5 }; /* movq %rsp, %rbp */ gdb_byte buf[3]; gdb_byte op; - if (current_pc <= pc) - return current_pc; - - pc = amd64_analyze_stack_align (pc, current_pc, cache); + if (limit_pc <= pc) + return limit_pc; op = read_memory_unsigned_integer (pc, 1, byte_order); - if (op == 0x55) /* pushq %rbp */ + if (op == 0x55) /* pushq %rbp */ { /* Take into account that we've executed the `pushq %rbp' that starts this instruction sequence. */ @@ -1861,8 +1979,8 @@ amd64_analyze_prologue (struct gdbarch *gdbarch, cache->sp_offset += 8; /* If that's all, return now. */ - if (current_pc <= pc + 1) - return current_pc; + if (limit_pc <= pc + 1) + return limit_pc; /* Check for `movq %rsp, %rbp'. */ read_memory (pc + 1, buf, 3); @@ -1877,6 +1995,113 @@ amd64_analyze_prologue (struct gdbarch *gdbarch, return pc; } +/* Analyzes the instructions in the part of the function prologue + where registers are being saved on stack. Update CACHE accordingly. + Return the address of the first instruction past the last register + save. */ + +static CORE_ADDR +amd64_analyze_register_saves (struct gdbarch *gdbarch, + CORE_ADDR pc, CORE_ADDR limit_pc, + struct amd64_frame_cache *cache) +{ + gdb_byte op; + enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); + + while (pc < limit_pc) + { + int insn_len; + int regnum = -1; + int offset; + + op = read_memory_unsigned_integer (pc, 1, byte_order); + if (op >= 0x50 && op <= 0x57) + { + /* This is a "push REG" insn where REG is %rax .. %rdi. */ + regnum = amd64_arch_reg_to_regnum (op - 0x50); + pc += 1; + } + else if (op == 0x41) + { + op = read_memory_unsigned_integer (pc + 1, 1, byte_order); + if (op >= 0x50 && op <= 0x57) + { + /* This is a "push REG" insn where REG is %r8 .. %r15. */ + regnum = amd64_arch_reg_to_regnum (op - 0x50 + AMD64_R8_REGNUM); + pc += 2; + } + } + + if (regnum >= 0) + { + cache->sp_offset += 8; + cache->saved_regs[regnum] = cache->sp_offset; + } + else + return pc; /* This instruction is not a register save. Return. */ + } + + return pc; +} + +/* If PC points to an instruction that decrements the rsp register, + (thus finishing the frame setup), analyze it, adjust the CACHE + accordingly, and return the address of the instruction immediately + following it. */ + +static CORE_ADDR +amd64_analyze_stack_pointer_adjustment (struct gdbarch *gdbarch, + CORE_ADDR pc, CORE_ADDR limit_pc, + struct amd64_frame_cache *cache) +{ + gdb_byte buf[3]; + enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); + + /* There are several ways to encode the sub instruction. Either way, + it starts with 3 bytes, so read these bytes first. */ + if (pc + sizeof (buf) >= limit_pc) + return pc; + read_memory (pc, buf, sizeof (buf)); + + if (pc + 4 < limit_pc && buf[0] == 0x48 && buf[1] == 0x83 && buf[2] == 0xec) + { + /* 4-byte version of the sub instruction. The offset is encoded + in the 4th byte. */ + cache->sp_offset += + read_memory_unsigned_integer (pc + sizeof (buf), 1, byte_order); + pc += sizeof (buf) + 1; + } + else if (pc + 7 < limit_pc + && buf [0] == 0x48 && buf[1] == 0x81 && buf[2] == 0xec) + { + /* 7-byte version of the sub instruction. The offset is encoded + as a word at the end of the instruction. */ + cache->sp_offset += + read_memory_unsigned_integer (pc + sizeof (buf), 4, byte_order); + pc += sizeof (buf) + 4; + } + + return pc; +} + +/* Do an analysis of the prologue at PC and update CACHE + accordingly. Bail out early if CURRENT_PC is reached. Return the + address where the analysis stopped. */ + +static CORE_ADDR +amd64_analyze_prologue (struct gdbarch *gdbarch, + CORE_ADDR pc, CORE_ADDR current_pc, + struct amd64_frame_cache *cache) +{ + pc = amd64_analyze_pre_prologue (gdbarch, pc, current_pc, cache); + pc = amd64_analyze_stack_align (pc, current_pc, cache); + pc = amd64_analyze_frame_pointer_setup (gdbarch, pc, current_pc, cache); + pc = amd64_analyze_register_saves (gdbarch, pc, current_pc, cache); + pc = amd64_analyze_stack_pointer_adjustment (gdbarch, pc, current_pc, cache); + + return pc; +} + /* Return PC of first real instruction. */ static CORE_ADDR ^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [RFC] Improve amd64 prologue analysis 2010-11-19 17:20 ` Joel Brobecker @ 2010-11-19 22:50 ` Pierre Muller 2010-12-14 7:05 ` Joel Brobecker 0 siblings, 1 reply; 16+ messages in thread From: Pierre Muller @ 2010-11-19 22:50 UTC (permalink / raw) To: 'Joel Brobecker', kettenis; +Cc: gdb-patches I think that your code does indeed catch some instructions that are not covered by my patch, especially in Windows DLL. Concerning your pre_prologue function, I think that I found the explanation in "amd64 Prolog and Epilog" description page from MSDN. This page states that before storing non-volatile registers and allocating the local stack, the register parameters should be placed into the stack space that must be allocated anyhow when calling a function according to the ABI. http://msdn.microsoft.com/en-us/library/tawsa7cb.aspx This means that basically only 8 patterns should be possible: movq %rcx, 8(%rsp) or movq %xmm0, 8(%rsp) movq %rdx, 16(%rsp) or movq %xmm1, 16(%rsp) movq %r8, 24(%rsp) or movq %xmm2, 24(%rsp) movq %r9, 32(%rsp) or movq %xmm3, 32(%rsp) But apparently KernelBase DLL doesn't itself follow this rule, and other registers are saved into this stack space allocated for register parameters... So that keeping it general makes sense. Mark, what should we do about those patches? Pierre ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-19 22:50 ` Pierre Muller @ 2010-12-14 7:05 ` Joel Brobecker 2010-12-14 9:58 ` Pedro Alves 2010-12-15 23:07 ` Mark Kettenis 0 siblings, 2 replies; 16+ messages in thread From: Joel Brobecker @ 2010-12-14 7:05 UTC (permalink / raw) To: kettenis; +Cc: Pierre Muller, gdb-patches Hi Mark, > I think that your code does indeed catch some > instructions that are not covered by my patch, > especially in Windows DLL. If Pierre were to submit a patch that merges both our changes, do you think it would have a chance of being included? The alternative is to start working on a pdata/xdata unwinder, but I don't see myself having the time to look into that anytime soon (not within the next 12 months). You mentioned that, if we have prologue instruction parsers, they should be Windows-specific. But I would think that this extra level of precaution is unnecessary, since the prologue analyzer should almost never be used on the other platforms where we have the DWARF frame info. Is that wishful thinking? -- Joel ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-12-14 7:05 ` Joel Brobecker @ 2010-12-14 9:58 ` Pedro Alves 2010-12-15 23:07 ` Mark Kettenis 1 sibling, 0 replies; 16+ messages in thread From: Pedro Alves @ 2010-12-14 9:58 UTC (permalink / raw) To: gdb-patches; +Cc: Joel Brobecker, kettenis, Pierre Muller On Tuesday 14 December 2010 07:05:35, Joel Brobecker wrote: > Hi Mark, > > > I think that your code does indeed catch some > > instructions that are not covered by my patch, > > especially in Windows DLL. > > If Pierre were to submit a patch that merges both our changes, > do you think it would have a chance of being included? > > The alternative is to start working on a pdata/xdata unwinder, > but I don't see myself having the time to look into that anytime > soon (not within the next 12 months). > > You mentioned that, if we have prologue instruction parsers, they should > be Windows-specific. But I would think that this extra level of > precaution is unnecessary, since the prologue analyzer should almost > never be used on the other platforms where we have the DWARF frame info. > Is that wishful thinking? I imagine that for Wine debugging, table based SEH unwinding in linux code will be useful. -- Pedro Alves ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-12-14 7:05 ` Joel Brobecker 2010-12-14 9:58 ` Pedro Alves @ 2010-12-15 23:07 ` Mark Kettenis 2010-12-16 4:15 ` Joel Brobecker 1 sibling, 1 reply; 16+ messages in thread From: Mark Kettenis @ 2010-12-15 23:07 UTC (permalink / raw) To: brobecker; +Cc: pierre.muller, gdb-patches > Date: Tue, 14 Dec 2010 11:05:35 +0400 > From: Joel Brobecker <brobecker@adacore.com> > > Hi Mark, > > > I think that your code does indeed catch some > > instructions that are not covered by my patch, > > especially in Windows DLL. > > If Pierre were to submit a patch that merges both our changes, > do you think it would have a chance of being included? > > The alternative is to start working on a pdata/xdata unwinder, > but I don't see myself having the time to look into that anytime > soon (not within the next 12 months). > > You mentioned that, if we have prologue instruction parsers, they should > be Windows-specific. But I would think that this extra level of > precaution is unnecessary, since the prologue analyzer should almost > never be used on the other platforms where we have the DWARF frame info. > Is that wishful thinking? Many years ago we realized that prologue analyzers are a dead end. Compilers will continue to invent silly new ways to set up a stack frame and save registers, and we'll forever play catch up. Then amd64 showed up and the folks who designed the ABI actually had the insight to make unwind information pretty much mandatory. As a result we survived very well without an amd64 prologue analyzer. I hope you'll understand my reluctance to add one after managing 7 years or so without. So if support for pdata/xdata gets added I would expect that the prologue analyzer won't be needed on 64-bit Windows either. At that point the prologue analyzer will become dead code and we should remove it again. By not adding the prologue analyzer to other platforms we'll increase the chance that will indeed be possible. The fact that this is for a an OS that in my view should never have been supported by GDB, that has a 64-bit ABI that I consider horribly broken doesn't help. If you stick it somewhere where I don't need to look at it, I won't care as much. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-12-15 23:07 ` Mark Kettenis @ 2010-12-16 4:15 ` Joel Brobecker 0 siblings, 0 replies; 16+ messages in thread From: Joel Brobecker @ 2010-12-16 4:15 UTC (permalink / raw) To: Mark Kettenis; +Cc: pierre.muller, gdb-patches > I hope you'll understand my reluctance to add one after managing 7 > years or so without. OK - that's very reasonable. Prologue analyzers are very ugly, and the amd64 one is worse, because the insns are harder to parse (at least to me). That's why, until Pierre started sending patches, I never really considered submitting the changes we made, thinking that we were the only one for whom it actually made a difference. > So if support for pdata/xdata gets added I would expect that > the prologue analyzer won't be needed on 64-bit Windows either. That's correct. > If you stick it somewhere where I don't need to look at it, I won't > care as much. Sounds like an acceptable compromise. -- Joel ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-19 8:15 ` Pierre Muller 2010-11-19 17:20 ` Joel Brobecker @ 2010-11-24 21:19 ` Mark Kettenis 2010-11-24 22:15 ` Joel Brobecker 2010-11-24 22:17 ` Joel Brobecker 1 sibling, 2 replies; 16+ messages in thread From: Mark Kettenis @ 2010-11-24 21:19 UTC (permalink / raw) To: pierre.muller; +Cc: brobecker, gdb-patches [-- Warning: decoded text below may be mangled, UTF-8 assumed --] [-- Attachment #1: Type: text/plain, Size: 1779 bytes --] > From: "Pierre Muller" <pierre.muller@ics-cnrs.unistra.fr> > Date: Fri, 19 Nov 2010 09:14:56 +0100 > > > -----Message d'origine----- > > De : gdb-patches-owner@sourceware.org [mailto:gdb-patches- > > owner@sourceware.org] De la part de Joel Brobecker > > Envoyé : jeudi 18 novembre 2010 18:22 > > À : Pierre Muller > > Cc : gdb-patches@sourceware.org > > Objet : Re: [RFC] Improve amd64 prologue analysis > > > > > Free Pascal for x86_64 (amd64) CPU generates losts of functions > > > that do not use RBP as a frame pointer. > > > I tried to improve amd64-tdep.c to better cope > > > with this case. > > > > I thought that on amd64, we weren't going to have prologue parsers > > and rely on frame unwinding info instead? At AdaCore, we have the > > same sort of things because of Windows where the system code (mostly > > system DLLs) does not have the unwinding info in DWARF format. But > > for code generated by us, we have all the debugging information needed > > to unwind without parsing the prologue, even on Windows64. > > Does this means that we should only use that code > if no dwarf debug info is available? The official amd64 ABI that's used by most, if not all, Unix-like operating systems, pretty much mandates DWARF-like debug info. It really should always be available on platforms that follow that ABI. Now I suspect that 64-bit Windows, given its utterly retarded IL32P64 model, doesn't follow that ABI. Therefore my suggestion would be to only use this code on Windows. > The problem currently on Windows-64bit generated code > is that dwarf debug information is more > deeply broken than stabs, so that I am still mainly using stabs > (especially to debug dwarf problems...). Hmm, stabs on 64-bit platforms has never really worked. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-24 21:19 ` Mark Kettenis @ 2010-11-24 22:15 ` Joel Brobecker 2010-11-24 22:26 ` Joel Brobecker 2010-11-24 22:17 ` Joel Brobecker 1 sibling, 1 reply; 16+ messages in thread From: Joel Brobecker @ 2010-11-24 22:15 UTC (permalink / raw) To: Mark Kettenis; +Cc: pierre.muller, gdb-patches > Now I suspect that 64-bit Windows, given its utterly retarded IL32P64 > model, doesn't follow that ABI. Therefore my suggestion would be to > only use this code on Windows. On windows, there is some unwinding info that's generated as well. We just don't read it, yet. It's something we'd love to do at AdaCore eventually, but we're lacking the time at the moment. -- Joel ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-24 22:15 ` Joel Brobecker @ 2010-11-24 22:26 ` Joel Brobecker 2010-11-25 13:39 ` Mark Kettenis 0 siblings, 1 reply; 16+ messages in thread From: Joel Brobecker @ 2010-11-24 22:26 UTC (permalink / raw) To: Mark Kettenis; +Cc: pierre.muller, gdb-patches > > Now I suspect that 64-bit Windows, given its utterly retarded IL32P64 > > model, doesn't follow that ABI. Therefore my suggestion would be to > > only use this code on Windows. > > On windows, there is some unwinding info that's generated as well. > We just don't read it, yet. It's something we'd love to do at AdaCore > eventually, but we're lacking the time at the moment. One last bit of information - which I got second hand, but hopefully accurate. I think that the GCC team is on its way to generating the unwinding info in that format, rather than the usual DWARF-based eh_frame/debug_frame. So the Microsoft format should become the default even for code generated by GCC... So eventually, I think we are going to need to add an unwinder for that, even for GCC code. -- Joel ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-24 22:26 ` Joel Brobecker @ 2010-11-25 13:39 ` Mark Kettenis 2010-11-25 16:30 ` Joel Brobecker 0 siblings, 1 reply; 16+ messages in thread From: Mark Kettenis @ 2010-11-25 13:39 UTC (permalink / raw) To: brobecker; +Cc: pierre.muller, gdb-patches > Date: Wed, 24 Nov 2010 14:25:55 -0800 > From: Joel Brobecker <brobecker@adacore.com> > > > > Now I suspect that 64-bit Windows, given its utterly retarded IL32P64 > > > model, doesn't follow that ABI. Therefore my suggestion would be to > > > only use this code on Windows. > > > > On windows, there is some unwinding info that's generated as well. > > We just don't read it, yet. It's something we'd love to do at AdaCore > > eventually, but we're lacking the time at the moment. > > One last bit of information - which I got second hand, but hopefully > accurate. I think that the GCC team is on its way to generating the > unwinding info in that format, rather than the usual DWARF-based > eh_frame/debug_frame. So the Microsoft format should become the default > even for code generated by GCC... So eventually, I think we are going > to need to add an unwinder for that, even for GCC code. I certainly hope you're wrong here, since it would mean a massive ABI break on platforms currently using .eh_frame, and presumably would be x86-specific. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-25 13:39 ` Mark Kettenis @ 2010-11-25 16:30 ` Joel Brobecker 2010-11-25 19:19 ` Kai Tietz 0 siblings, 1 reply; 16+ messages in thread From: Joel Brobecker @ 2010-11-25 16:30 UTC (permalink / raw) To: Mark Kettenis; +Cc: pierre.muller, gdb-patches > > One last bit of information - which I got second hand, but hopefully > > accurate. I think that the GCC team is on its way to generating the > > unwinding info in that format, rather than the usual DWARF-based > > eh_frame/debug_frame. So the Microsoft format should become the default > > even for code generated by GCC... So eventually, I think we are going > > to need to add an unwinder for that, even for GCC code. > > I certainly hope you're wrong here, since it would mean a massive ABI > break on platforms currently using .eh_frame, and presumably would be > x86-specific. I should have said that this is only relevant to x86_64-windows. All other platforms will continue to use the usual eh_frame, of course. -- Joel ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-25 16:30 ` Joel Brobecker @ 2010-11-25 19:19 ` Kai Tietz 0 siblings, 0 replies; 16+ messages in thread From: Kai Tietz @ 2010-11-25 19:19 UTC (permalink / raw) To: Joel Brobecker; +Cc: Mark Kettenis, pierre.muller, gdb-patches 2010/11/25 Joel Brobecker <brobecker@adacore.com>: >> > One last bit of information - which I got second hand, but hopefully >> > accurate. I think that the GCC team is on its way to generating the >> > unwinding info in that format, rather than the usual DWARF-based >> > eh_frame/debug_frame. So the Microsoft format should become the default >> > even for code generated by GCC... So eventually, I think we are going >> > to need to add an unwinder for that, even for GCC code. >> >> I certainly hope you're wrong here, since it would mean a massive ABI >> break on platforms currently using .eh_frame, and presumably would be >> x86-specific. > > I should have said that this is only relevant to x86_64-windows. > All other platforms will continue to use the usual eh_frame, of > course. > > -- > Joel > Well, Joel is right. Beginning with 4.6 gcc there is a base support of SEH prologue unwind information for x64 windows target. By this it is possible to use API provided by imagehlp.dll for generating stack backtraces. At the moment SEH isn't used for exception unwinding, but this is planned for 4.7 gcc version. A test-implementation of it is already existing, but not sure if it will make it into 4.6 as optional unwind implementation for tests. Kai ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] Improve amd64 prologue analysis 2010-11-24 21:19 ` Mark Kettenis 2010-11-24 22:15 ` Joel Brobecker @ 2010-11-24 22:17 ` Joel Brobecker 1 sibling, 0 replies; 16+ messages in thread From: Joel Brobecker @ 2010-11-24 22:17 UTC (permalink / raw) To: Mark Kettenis; +Cc: pierre.muller, gdb-patches > Now I suspect that 64-bit Windows, given its utterly retarded IL32P64 > model, doesn't follow that ABI. Therefore my suggestion would be to > only use this code on Windows. BTW: We could do that (only use the code on Windows), but I would think that this is already the case de facto: Since we're supposed to always have DWARF unwinding info on platforms such as GNU/Linux, should this code ever trigger? -- Joel ^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2010-12-16 4:15 UTC | newest] Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2010-11-15 17:28 [RFC] Improve amd64 prologue analysis Pierre Muller 2010-11-18 17:22 ` Joel Brobecker 2010-11-19 8:15 ` Pierre Muller 2010-11-19 17:20 ` Joel Brobecker 2010-11-19 22:50 ` Pierre Muller 2010-12-14 7:05 ` Joel Brobecker 2010-12-14 9:58 ` Pedro Alves 2010-12-15 23:07 ` Mark Kettenis 2010-12-16 4:15 ` Joel Brobecker 2010-11-24 21:19 ` Mark Kettenis 2010-11-24 22:15 ` Joel Brobecker 2010-11-24 22:26 ` Joel Brobecker 2010-11-25 13:39 ` Mark Kettenis 2010-11-25 16:30 ` Joel Brobecker 2010-11-25 19:19 ` Kai Tietz 2010-11-24 22:17 ` Joel Brobecker
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox