From: Andrew Burgess <aburgess@redhat.com>
To: Yang Liu <liuyang22@iscas.ac.cn>, gdb-patches@sourceware.org
Cc: palmer@dabbelt.com, simon.marchi@polymtl.ca,
Yang Liu <liuyang22@iscas.ac.cn>
Subject: Re: [PATCH v3] gdb: RISC-V: Refine lr/sc sequence support
Date: Wed, 08 Nov 2023 11:15:27 +0000 [thread overview]
Message-ID: <871qd0ts1c.fsf@redhat.com> (raw)
In-Reply-To: <20231107065816.85723-1-liuyang22@iscas.ac.cn>
Yang Liu <liuyang22@iscas.ac.cn> writes:
> Per RISC-V spec, the lr/sc sequence can consist of up to 16 instructions, and we
> cannot insert breakpoints in the middle of this sequence. Before this, we only
> detected a specific pattern (the most common one). This patch improves this part
> and now supports more complex pattern detection.
Thanks for the update. This looks great.
Approved-By: Andrew Burgess <aburgess@redhat.com>
Thanks,
Andrew
>
> Signed-off-by: Yang Liu <liuyang22@iscas.ac.cn>
> ---
> gdb/riscv-tdep.c | 290 ++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 251 insertions(+), 39 deletions(-)
>
> diff --git a/gdb/riscv-tdep.c b/gdb/riscv-tdep.c
> index 3a2891c2c92..ffad7fe2a60 100644
> --- a/gdb/riscv-tdep.c
> +++ b/gdb/riscv-tdep.c
> @@ -1578,8 +1578,34 @@ class riscv_insn
> BLTU,
> BGEU,
> /* These are needed for stepping over atomic sequences. */
> - LR,
> - SC,
> + SLTI,
> + SLTIU,
> + XORI,
> + ORI,
> + ANDI,
> + SLLI,
> + SLLIW,
> + SRLI,
> + SRLIW,
> + SRAI,
> + SRAIW,
> + SUB,
> + SUBW,
> + SLL,
> + SLLW,
> + SLT,
> + SLTU,
> + XOR,
> + SRL,
> + SRLW,
> + SRA,
> + SRAW,
> + OR,
> + AND,
> + LR_W,
> + LR_D,
> + SC_W,
> + SC_D,
> /* This instruction is used to do a syscall. */
> ECALL,
>
> @@ -1768,6 +1794,13 @@ class riscv_insn
> m_imm.s = EXTRACT_CBTYPE_IMM (ival);
> }
>
> + void decode_ca_type_insn (enum opcode opcode, ULONGEST ival)
> + {
> + m_opcode = opcode;
> + m_rs1 = decode_register_index_short (ival, OP_SH_CRS1S);
> + m_rs2 = decode_register_index_short (ival, OP_SH_CRS2S);
> + }
> +
> /* Fetch instruction from target memory at ADDR, return the content of
> the instruction, and update LEN with the instruction length. */
> static ULONGEST fetch_instruction (struct gdbarch *gdbarch,
> @@ -1882,14 +1915,62 @@ riscv_insn::decode (struct gdbarch *gdbarch, CORE_ADDR pc)
> decode_b_type_insn (BLTU, ival);
> else if (is_bgeu_insn (ival))
> decode_b_type_insn (BGEU, ival);
> + else if (is_slti_insn(ival))
> + decode_i_type_insn (SLTI, ival);
> + else if (is_sltiu_insn(ival))
> + decode_i_type_insn (SLTIU, ival);
> + else if (is_xori_insn(ival))
> + decode_i_type_insn (XORI, ival);
> + else if (is_ori_insn(ival))
> + decode_i_type_insn (ORI, ival);
> + else if (is_andi_insn(ival))
> + decode_i_type_insn (ANDI, ival);
> + else if (is_slli_insn(ival))
> + decode_i_type_insn (SLLI, ival);
> + else if (is_slliw_insn(ival))
> + decode_i_type_insn (SLLIW, ival);
> + else if (is_srli_insn(ival))
> + decode_i_type_insn (SRLI, ival);
> + else if (is_srliw_insn(ival))
> + decode_i_type_insn (SRLIW, ival);
> + else if (is_srai_insn(ival))
> + decode_i_type_insn (SRAI, ival);
> + else if (is_sraiw_insn(ival))
> + decode_i_type_insn (SRAIW, ival);
> + else if (is_sub_insn(ival))
> + decode_r_type_insn (SUB, ival);
> + else if (is_subw_insn(ival))
> + decode_r_type_insn (SUBW, ival);
> + else if (is_sll_insn(ival))
> + decode_r_type_insn (SLL, ival);
> + else if (is_sllw_insn(ival))
> + decode_r_type_insn (SLLW, ival);
> + else if (is_slt_insn(ival))
> + decode_r_type_insn (SLT, ival);
> + else if (is_sltu_insn(ival))
> + decode_r_type_insn (SLTU, ival);
> + else if (is_xor_insn(ival))
> + decode_r_type_insn (XOR, ival);
> + else if (is_srl_insn(ival))
> + decode_r_type_insn (SRL, ival);
> + else if (is_srlw_insn(ival))
> + decode_r_type_insn (SRLW, ival);
> + else if (is_sra_insn(ival))
> + decode_r_type_insn (SRA, ival);
> + else if (is_sraw_insn(ival))
> + decode_r_type_insn (SRAW, ival);
> + else if (is_or_insn(ival))
> + decode_r_type_insn (OR, ival);
> + else if (is_and_insn(ival))
> + decode_r_type_insn (AND, ival);
> else if (is_lr_w_insn (ival))
> - decode_r_type_insn (LR, ival);
> + decode_r_type_insn (LR_W, ival);
> else if (is_lr_d_insn (ival))
> - decode_r_type_insn (LR, ival);
> + decode_r_type_insn (LR_D, ival);
> else if (is_sc_w_insn (ival))
> - decode_r_type_insn (SC, ival);
> + decode_r_type_insn (SC_W, ival);
> else if (is_sc_d_insn (ival))
> - decode_r_type_insn (SC, ival);
> + decode_r_type_insn (SC_D, ival);
> else if (is_ecall_insn (ival))
> decode_i_type_insn (ECALL, ival);
> else if (is_ld_insn (ival))
> @@ -1944,6 +2025,24 @@ riscv_insn::decode (struct gdbarch *gdbarch, CORE_ADDR pc)
> m_rd = decode_register_index (ival, OP_SH_CRS1S);
> m_imm.s = EXTRACT_CITYPE_LUI_IMM (ival);
> }
> + else if (is_c_srli_insn (ival))
> + decode_cb_type_insn (SRLI, ival);
> + else if (is_c_srai_insn (ival))
> + decode_cb_type_insn (SRAI, ival);
> + else if (is_c_andi_insn (ival))
> + decode_cb_type_insn (ANDI, ival);
> + else if (is_c_sub_insn (ival))
> + decode_ca_type_insn (SUB, ival);
> + else if (is_c_xor_insn (ival))
> + decode_ca_type_insn (XOR, ival);
> + else if (is_c_or_insn (ival))
> + decode_ca_type_insn (OR, ival);
> + else if (is_c_and_insn (ival))
> + decode_ca_type_insn (AND, ival);
> + else if (is_c_subw_insn (ival))
> + decode_ca_type_insn (SUBW, ival);
> + else if (is_c_addw_insn (ival))
> + decode_ca_type_insn (ADDW, ival);
> else if (is_c_li_insn (ival))
> decode_ci_type_insn (LI, ival);
> /* C_SD and C_FSW have the same opcode. C_SD is RV64 and RV128 only,
> @@ -4404,51 +4503,164 @@ riscv_next_pc (struct regcache *regcache, CORE_ADDR pc)
> return next_pc;
> }
>
> +/* Return true if INSN is not a control transfer instruction and is allowed to
> + appear in the middle of the lr/sc sequence. */
> +
> +static bool
> +riscv_insn_is_non_cti_and_allowed_in_atomic_sequence
> + (const struct riscv_insn &insn)
> +{
> + switch (insn.opcode ())
> + {
> + case riscv_insn::LUI:
> + case riscv_insn::AUIPC:
> + case riscv_insn::ADDI:
> + case riscv_insn::ADDIW:
> + case riscv_insn::SLTI:
> + case riscv_insn::SLTIU:
> + case riscv_insn::XORI:
> + case riscv_insn::ORI:
> + case riscv_insn::ANDI:
> + case riscv_insn::SLLI:
> + case riscv_insn::SLLIW:
> + case riscv_insn::SRLI:
> + case riscv_insn::SRLIW:
> + case riscv_insn::SRAI:
> + case riscv_insn::ADD:
> + case riscv_insn::ADDW:
> + case riscv_insn::SRAIW:
> + case riscv_insn::SUB:
> + case riscv_insn::SUBW:
> + case riscv_insn::SLL:
> + case riscv_insn::SLLW:
> + case riscv_insn::SLT:
> + case riscv_insn::SLTU:
> + case riscv_insn::XOR:
> + case riscv_insn::SRL:
> + case riscv_insn::SRLW:
> + case riscv_insn::SRA:
> + case riscv_insn::SRAW:
> + case riscv_insn::OR:
> + case riscv_insn::AND:
> + return true;
> + }
> +
> + return false;
> +}
> +
> +/* Return true if INSN is a direct branch instruction. */
> +
> +static bool
> +riscv_insn_is_direct_branch (const struct riscv_insn &insn)
> +{
> + switch (insn.opcode ())
> + {
> + case riscv_insn::BEQ:
> + case riscv_insn::BNE:
> + case riscv_insn::BLT:
> + case riscv_insn::BGE:
> + case riscv_insn::BLTU:
> + case riscv_insn::BGEU:
> + case riscv_insn::JAL:
> + return true;
> + }
> +
> + return false;
> +}
> +
> /* We can't put a breakpoint in the middle of a lr/sc atomic sequence, so look
> for the end of the sequence and put the breakpoint there. */
>
> -static bool
> -riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR pc,
> - CORE_ADDR *next_pc)
> +static std::vector<CORE_ADDR>
> +riscv_deal_with_atomic_sequence (struct regcache *regcache, CORE_ADDR pc)
> {
> struct gdbarch *gdbarch = regcache->arch ();
> struct riscv_insn insn;
> - CORE_ADDR cur_step_pc = pc;
> - CORE_ADDR last_addr = 0;
> + CORE_ADDR cur_step_pc = pc, next_pc;
> + std::vector<CORE_ADDR> next_pcs;
> + bool found_valid_atomic_sequence = false;
> + enum riscv_insn::opcode lr_opcode;
>
> /* First instruction has to be a load reserved. */
> insn.decode (gdbarch, cur_step_pc);
> - if (insn.opcode () != riscv_insn::LR)
> - return false;
> - cur_step_pc = cur_step_pc + insn.length ();
> + lr_opcode = insn.opcode ();
> + if (lr_opcode != riscv_insn::LR_D && lr_opcode != riscv_insn::LR_W)
> + return {};
> +
> + /* The loop comprises only an LR/SC sequence and code to retry the sequence in
> + the case of failure, and must comprise at most 16 instructions placed
> + sequentially in memory. While our code tries to follow these restrictions,
> + it has the following limitations:
> +
> + (a) We expect the loop to start with an LR and end with a BNE.
> + Apparently this does not cover all cases for a valid sequence.
> + (b) The atomic limitations only apply to the code that is actually
> + executed, so here again it's overly restrictive.
> + (c) The lr/sc are required to be for the same target address, but this
> + information is only known at runtime. Same as (b), in order to check
> + this we will end up needing to simulate the sequence, which is more
> + complicated than what we're doing right now.
> +
> + Also note that we only expect a maximum of (16-2) instructions in the for
> + loop as we have assumed the presence of LR and BNE at the beginning and end
> + respectively. */
> + for (int insn_count = 0; insn_count < 16 - 2; ++insn_count)
> + {
> + cur_step_pc += insn.length ();
> + insn.decode (gdbarch, cur_step_pc);
>
> - /* Next instruction should be branch to exit. */
> - insn.decode (gdbarch, cur_step_pc);
> - if (insn.opcode () != riscv_insn::BNE)
> - return false;
> - last_addr = cur_step_pc + insn.imm_signed ();
> - cur_step_pc = cur_step_pc + insn.length ();
> + /* The dynamic code executed between lr/sc can only contain instructions
> + from the base I instruction set, excluding loads, stores, backward
> + jumps, taken backward branches, JALR, FENCE, FENCE.I, and SYSTEM
> + instructions. If the C extension is supported, then compressed forms
> + of the aforementioned I instructions are also permitted. */
>
> - /* Next instruction should be store conditional. */
> - insn.decode (gdbarch, cur_step_pc);
> - if (insn.opcode () != riscv_insn::SC)
> - return false;
> - cur_step_pc = cur_step_pc + insn.length ();
> + if (riscv_insn_is_non_cti_and_allowed_in_atomic_sequence (insn))
> + continue;
> + /* Look for a conditional branch instruction, check if it's taken forward
> + or not. */
> + else if (riscv_insn_is_direct_branch (insn))
> + {
> + if (insn.imm_signed () > 0)
> + {
> + next_pc = cur_step_pc + insn.imm_signed ();
> + next_pcs.push_back (next_pc);
> + }
> + else
> + break;
> + }
> + /* Look for a paired SC instruction which closes the atomic sequence. */
> + else if ((insn.opcode () == riscv_insn::SC_D
> + && lr_opcode == riscv_insn::LR_D)
> + || (insn.opcode () == riscv_insn::SC_W
> + && lr_opcode == riscv_insn::LR_W))
> + found_valid_atomic_sequence = true;
> + else
> + break;
> + }
> +
> + if (!found_valid_atomic_sequence)
> + return {};
>
> /* Next instruction should be branch to start. */
> insn.decode (gdbarch, cur_step_pc);
> if (insn.opcode () != riscv_insn::BNE)
> - return false;
> + return {};
> if (pc != (cur_step_pc + insn.imm_signed ()))
> - return false;
> - cur_step_pc = cur_step_pc + insn.length ();
> + return {};
> + cur_step_pc += insn.length ();
>
> - /* We should now be at the end of the sequence. */
> - if (cur_step_pc != last_addr)
> - return false;
> + /* Remove all PCs that jump within the sequence. */
> + auto matcher = [cur_step_pc] (const CORE_ADDR addr) -> bool
> + {
> + return addr < cur_step_pc;
> + };
> + auto it = std::remove_if (next_pcs.begin (), next_pcs.end (), matcher);
> + next_pcs.erase (it, next_pcs.end ());
>
> - *next_pc = cur_step_pc;
> - return true;
> + next_pc = cur_step_pc;
> + next_pcs.push_back (next_pc);
> + return next_pcs;
> }
>
> /* This is called just before we want to resume the inferior, if we want to
> @@ -4458,14 +4670,14 @@ riscv_next_pc_atomic_sequence (struct regcache *regcache, CORE_ADDR pc,
> std::vector<CORE_ADDR>
> riscv_software_single_step (struct regcache *regcache)
> {
> - CORE_ADDR pc, next_pc;
> -
> - pc = regcache_read_pc (regcache);
> + CORE_ADDR cur_pc = regcache_read_pc (regcache), next_pc;
> + std::vector<CORE_ADDR> next_pcs
> + = riscv_deal_with_atomic_sequence (regcache, cur_pc);
>
> - if (riscv_next_pc_atomic_sequence (regcache, pc, &next_pc))
> - return {next_pc};
> + if (!next_pcs.empty ())
> + return next_pcs;
>
> - next_pc = riscv_next_pc (regcache, pc);
> + next_pc = riscv_next_pc (regcache, cur_pc);
>
> return {next_pc};
> }
> --
> 2.42.0
next prev parent reply other threads:[~2023-11-08 11:15 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-07 6:58 Yang Liu
2023-11-08 11:15 ` Andrew Burgess [this message]
2024-01-08 11:37 ` Yang Liu
2024-01-11 18:09 ` Tom Tromey
2024-01-12 6:27 ` Yang Liu
2024-01-12 6:39 ` Palmer Dabbelt
2024-01-12 8:43 ` Guinevere Larsen
2024-01-12 9:07 ` Yang Liu
2024-01-12 8:44 ` Eli Zaretskii
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=871qd0ts1c.fsf@redhat.com \
--to=aburgess@redhat.com \
--cc=gdb-patches@sourceware.org \
--cc=liuyang22@iscas.ac.cn \
--cc=palmer@dabbelt.com \
--cc=simon.marchi@polymtl.ca \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox