Mirror of the gdb mailing list
 help / color / mirror / Atom feed
* [PATCH v2 09/28] arm64/sve: Signal frame and context structure definition
       [not found] <1504198860-12951-1-git-send-email-Dave.Martin@arm.com>
@ 2017-08-31 17:01 ` Dave Martin
  2017-09-13 13:36   ` Catalin Marinas
  2017-08-31 17:02 ` [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support Dave Martin
  2017-08-31 17:09 ` [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length Dave Martin
  2 siblings, 1 reply; 21+ messages in thread
From: Dave Martin @ 2017-08-31 17:01 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Catalin Marinas, Will Deacon, Ard Biesheuvel, Alex Bennée,
	Szabolcs Nagy, Richard Sandiford, kvmarm, libc-alpha, linux-arch,
	gdb, Alan Hayward, Yao Qi

This patch defines the representation that will be used for the SVE
register state in the signal frame, and implements support for
saving and restoring the SVE registers around signals.

The same layout will also be used for the in-kernel task state.

Due to the variability of the SVE vector length, it is not possible
to define a fixed C struct to describe all the registers.  Instead,
Macros are defined in sigcontext.h to facilitate access to the
parts of the structure.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Cc: Alex Bennée <alex.bennee@linaro.org>

---

Changes since v1
----------------

Requested by Alex Bennée:

* Add SVE_VQ_BYTES #define to make it clear when the code is
intentionally referring to the size in bytes of a quadword, and migrate
away from magic 16/0x10 where used with this meaning.
---
 arch/arm64/include/uapi/asm/sigcontext.h | 117 ++++++++++++++++++++++++++++++-
 1 file changed, 116 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index f0a76b9..c78cf8e 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -16,6 +16,8 @@
 #ifndef _UAPI__ASM_SIGCONTEXT_H
 #define _UAPI__ASM_SIGCONTEXT_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 
 /*
@@ -41,10 +43,11 @@ struct sigcontext {
  *
  *	0x210		fpsimd_context
  *	 0x10		esr_context
+ *	0x8a0		sve_context (vl <= 64) (optional)
  *	 0x20		extra_context (optional)
  *	 0x10		terminator (null _aarch64_ctx)
  *
- *	0xdb0		(reserved for future allocation)
+ *	0x510		(reserved for future allocation)
  *
  * New records that can exceed this space need to be opt-in for userspace, so
  * that an expanded signal frame is not generated unexpectedly.  The mechanism
@@ -116,4 +119,116 @@ struct extra_context {
 	__u32 __reserved[3];
 };
 
+#define SVE_MAGIC	0x53564501
+
+struct sve_context {
+	struct _aarch64_ctx head;
+	__u16 vl;
+	__u16 __reserved[3];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * The SVE architecture leaves space for future expansion of the
+ * vector length beyond its initial architectural limit of 2048 bits
+ * (16 quadwords).
+ */
+#define SVE_VQ_BYTES		0x10	/* number of bytes per quadword */
+
+#define SVE_VQ_MIN		1
+#define SVE_VQ_MAX		0x200
+
+#define SVE_VL_MIN		(SVE_VQ_MIN * SVE_VQ_BYTES)
+#define SVE_VL_MAX		(SVE_VQ_MAX * SVE_VQ_BYTES)
+
+#define SVE_NUM_ZREGS		32
+#define SVE_NUM_PREGS		16
+
+#define sve_vl_valid(vl) \
+	((vl) % SVE_VQ_BYTES == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
+#define sve_vq_from_vl(vl)	((vl) / SVE_VQ_BYTES)
+#define sve_vl_from_vq(vq)	((vq) * SVE_VQ_BYTES)
+
+/*
+ * If the SVE registers are currently live for the thread at signal delivery,
+ * sve_context.head.size >=
+ *	SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl))
+ * and the register data may be accessed using the SVE_SIG_*() macros.
+ *
+ * If sve_context.head.size <
+ *	SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)),
+ * the SVE registers were not live for the thread and no register data
+ * is included: in this case, the SVE_SIG_*() macros should not be
+ * used except for this check.
+ *
+ * The same convention applies when returning from a signal: a caller
+ * will need to remove or resize the sve_context block if it wants to
+ * make the SVE registers live when they were previously non-live or
+ * vice-versa.  This may require the the caller to allocate fresh
+ * memory and/or move other context blocks in the signal frame.
+ *
+ * Changing the vector length during signal return is not permitted:
+ * sve_context.vl must equal the thread's current vector length when
+ * doing a sigreturn.
+ *
+ *
+ * Note: for all these macros, the "vq" argument denotes the SVE
+ * vector length in quadwords (i.e., units of 128 bits).
+ *
+ * The correct way to obtain vq is to use sve_vq_from_vl(vl).  The
+ * result is valid if and only if sve_vl_valid(vl) is true.  This is
+ * guaranteed for a struct sve_context written by the kernel.
+ *
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_SIG_x_OFFSET(args) is the start offset relative to
+ * the start of struct sve_context, and SVE_SIG_x_SIZE(args) is the
+ * size in bytes:
+ *
+ *	x	type				description
+ *	-	----				-----------
+ *	REGS					the entire SVE context
+ *
+ *	ZREGS	__uint128_t[SVE_NUM_ZREGS][vq]	all Z-registers
+ *	ZREG	__uint128_t[vq]			individual Z-register Zn
+ *
+ *	PREGS	uint16_t[SVE_NUM_PREGS][vq]	all P-registers
+ *	PREG	uint16_t[vq]			individual P-register Pn
+ *
+ *	FFR	uint16_t[vq]			first-fault status register
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_SIG_ZREG_SIZE(vq)	((__u32)(vq) * SVE_VQ_BYTES)
+#define SVE_SIG_PREG_SIZE(vq)	((__u32)(vq) * (SVE_VQ_BYTES / 8))
+#define SVE_SIG_FFR_SIZE(vq)	SVE_SIG_PREG_SIZE(vq)
+
+#define SVE_SIG_REGS_OFFSET					\
+	((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+#define SVE_SIG_ZREGS_OFFSET	SVE_SIG_REGS_OFFSET
+#define SVE_SIG_ZREG_OFFSET(vq, n) \
+	(SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREG_SIZE(vq) * (n))
+#define SVE_SIG_ZREGS_SIZE(vq) \
+	(SVE_SIG_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_SIG_ZREGS_OFFSET)
+
+#define SVE_SIG_PREGS_OFFSET(vq) \
+	(SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREGS_SIZE(vq))
+#define SVE_SIG_PREG_OFFSET(vq, n) \
+	(SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREG_SIZE(vq) * (n))
+#define SVE_SIG_PREGS_SIZE(vq) \
+	(SVE_SIG_PREG_OFFSET(vq, SVE_NUM_PREGS) - SVE_SIG_PREGS_OFFSET(vq))
+
+#define SVE_SIG_FFR_OFFSET(vq) \
+	(SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREGS_SIZE(vq))
+
+#define SVE_SIG_REGS_SIZE(vq) \
+	(SVE_SIG_FFR_OFFSET(vq) + SVE_SIG_FFR_SIZE(vq) - SVE_SIG_REGS_OFFSET)
+
+#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
+
+
 #endif /* _UAPI__ASM_SIGCONTEXT_H */
-- 
2.1.4


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
       [not found] <1504198860-12951-1-git-send-email-Dave.Martin@arm.com>
  2017-08-31 17:01 ` [PATCH v2 09/28] arm64/sve: Signal frame and context structure definition Dave Martin
@ 2017-08-31 17:02 ` Dave Martin
  2017-09-06 16:22   ` Okamoto, Takayuki
  2017-09-14 12:57   ` Alex Bennée
  2017-08-31 17:09 ` [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length Dave Martin
  2 siblings, 2 replies; 21+ messages in thread
From: Dave Martin @ 2017-08-31 17:02 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Catalin Marinas, Will Deacon, Ard Biesheuvel, Alex Bennée,
	Szabolcs Nagy, Richard Sandiford, kvmarm, libc-alpha, linux-arch,
	gdb, Alan Hayward, Yao Qi, Oleg Nesterov, Alexander Viro

This patch defines and implements a new regset NT_ARM_SVE, which
describes a thread's SVE register state.  This allows a debugger to
manipulate the SVE state, as well as being included in ELF
coredumps for post-mortem debugging.

Because the regset size and layout are dependent on the thread's
current vector length, it is not possible to define a C struct to
describe the regset contents as is done for existing regsets.
Instead, and for the same reasons, NT_ARM_SVE is based on the
freeform variable-layout approach used for the SVE signal frame.

Additionally, to reduce debug overhead when debugging threads that
might or might not have live SVE register state, NT_ARM_SVE may be
presented in one of two different formats: the old struct
user_fpsimd_state format is embedded for describing the state of a
thread with no live SVE state, whereas a new variable-layout
structure is embedded for describing live SVE state.  This avoids a
debugger needing to poll NT_PRFPREG in addition to NT_ARM_SVE, and
allows existing userspace code to handle the non-SVE case without
too much modification.

For this to work, NT_ARM_SVE is defined with a fixed-format header
of type struct user_sve_header, which the recipient can use to
figure out the content, size and layout of the reset of the regset.
Accessor macros are defined to allow the vector-length-dependent
parts of the regset to be manipulated.

Signed-off-by: Alan Hayward <alan.hayward@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Cc: Alex Bennée <alex.bennee@linaro.org>

---

Changes since v1
----------------

Other changes related to Alex Bennée's comments:

* Migrate to SVE_VQ_BYTES instead of magic numbers.

Requested by Alex Bennée:

* Thin out BUG_ON()s:
Redundant BUG_ON()s and ones that just check invariants are removed.
Important sanity-checks are migrated to WARN_ON()s, with some
minimal best-effort patch-up code.

Other:

* [ABI fix] Bail out with -EIO if attempting to set the
SVE regs for an unsupported VL, instead of misparsing the regset data.

* Replace some in-kernel open-coded arithmetic with ALIGN()/
DIV_ROUND_UP().
---
 arch/arm64/include/asm/fpsimd.h      |  13 +-
 arch/arm64/include/uapi/asm/ptrace.h | 135 ++++++++++++++++++
 arch/arm64/kernel/fpsimd.c           |  40 +++++-
 arch/arm64/kernel/ptrace.c           | 270 +++++++++++++++++++++++++++++++++--
 include/uapi/linux/elf.h             |   1 +
 5 files changed, 449 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 6c22624..2723cca 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -38,13 +38,16 @@ struct fpsimd_state {
 			__uint128_t vregs[32];
 			u32 fpsr;
 			u32 fpcr;
+			/*
+			 * For ptrace compatibility, pad to next 128-bit
+			 * boundary here if extending this struct.
+			 */
 		};
 	};
 	/* the id of the last cpu to have restored this state */
 	unsigned int cpu;
 };
 
-
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
 #define VFP_FPSCR_STAT_MASK	0xf800009f
@@ -89,6 +92,10 @@ extern void sve_alloc(struct task_struct *task);
 extern void fpsimd_release_thread(struct task_struct *task);
 extern void fpsimd_dup_sve(struct task_struct *dst,
 			   struct task_struct const *src);
+extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void sve_sync_to_fpsimd(struct task_struct *task);
+extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
+
 extern int sve_set_vector_length(struct task_struct *task,
 				 unsigned long vl, unsigned long flags);
 
@@ -103,6 +110,10 @@ static void __maybe_unused sve_alloc(struct task_struct *task) { }
 static void __maybe_unused fpsimd_release_thread(struct task_struct *task) { }
 static void __maybe_unused fpsimd_dup_sve(struct task_struct *dst,
 					  struct task_struct const *src) { }
+static void __maybe_unused sve_sync_to_fpsimd(struct task_struct *task) { }
+static void __maybe_unused sve_sync_from_fpsimd_zeropad(
+	struct task_struct *task) { }
+
 static void __maybe_unused sve_init_vq_map(void) { }
 static void __maybe_unused sve_update_vq_map(void) { }
 static int __maybe_unused sve_verify_vq_map(void) { return 0; }
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index d1ff83d..1915ab0 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -22,6 +22,7 @@
 #include <linux/types.h>
 
 #include <asm/hwcap.h>
+#include <asm/sigcontext.h>
 
 
 /*
@@ -63,6 +64,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/prctl.h>
+
 /*
  * User structures for general purpose, floating point and debug registers.
  */
@@ -90,6 +93,138 @@ struct user_hwdebug_state {
 	}		dbg_regs[16];
 };
 
+/* SVE/FP/SIMD state (NT_ARM_SVE) */
+
+struct user_sve_header {
+	__u32 size; /* total meaningful regset content in bytes */
+	__u32 max_size; /* maxmium possible size for this thread */
+	__u16 vl; /* current vector length */
+	__u16 max_vl; /* maximum possible vector length */
+	__u16 flags;
+	__u16 __reserved;
+};
+
+/* Definitions for user_sve_header.flags: */
+#define SVE_PT_REGS_MASK		(1 << 0)
+
+/* Flags: must be kept in sync with prctl interface in <linux/ptrace.h> */
+#define SVE_PT_REGS_FPSIMD		0
+#define SVE_PT_REGS_SVE			SVE_PT_REGS_MASK
+
+#define SVE_PT_VL_INHERIT		(PR_SVE_VL_INHERIT >> 16)
+#define SVE_PT_VL_ONEXEC		(PR_SVE_SET_VL_ONEXEC >> 16)
+
+
+/*
+ * The remainder of the SVE state follows struct user_sve_header.  The
+ * total size of the SVE state (including header) depends on the
+ * metadata in the header:  SVE_PT_SIZE(vq, flags) gives the total size
+ * of the state in bytes, including the header.
+ *
+ * Refer to <asm/sigcontext.h> for details of how to pass the correct
+ * "vq" argument to these macros.
+ */
+
+/* Offset from the start of struct user_sve_header to the register data */
+#define SVE_PT_REGS_OFFSET					\
+	((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+/*
+ * The register data content and layout depends on the value of the
+ * flags field.
+ */
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case:
+ *
+ * The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type
+ * struct user_fpsimd_state.  Additional data might be appended in the
+ * future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size.
+ * SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than
+ * sizeof(struct user_fpsimd_state).
+ */
+
+#define SVE_PT_FPSIMD_OFFSET		SVE_PT_REGS_OFFSET
+
+#define SVE_PT_FPSIMD_SIZE(vq, flags)	(sizeof(struct user_fpsimd_state))
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case:
+ *
+ * The payload starts at offset SVE_PT_SVE_OFFSET, and is of size
+ * SVE_PT_SVE_SIZE(vq, flags).
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to
+ * the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is
+ * the size in bytes:
+ *
+ *	x	type				description
+ *	-	----				-----------
+ *	ZREGS		\
+ *	ZREG		|
+ *	PREGS		| refer to <asm/sigcontext.h>
+ *	PREG		|
+ *	FFR		/
+ *
+ *	FPSR	uint32_t			FPSR
+ *	FPCR	uint32_t			FPCR
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_PT_SVE_ZREG_SIZE(vq)	SVE_SIG_ZREG_SIZE(vq)
+#define SVE_PT_SVE_PREG_SIZE(vq)	SVE_SIG_PREG_SIZE(vq)
+#define SVE_PT_SVE_FFR_SIZE(vq)		SVE_SIG_FFR_SIZE(vq)
+#define SVE_PT_SVE_FPSR_SIZE		sizeof(__u32)
+#define SVE_PT_SVE_FPCR_SIZE		sizeof(__u32)
+
+#define __SVE_SIG_TO_PT(offset) \
+	((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET)
+
+#define SVE_PT_SVE_OFFSET		SVE_PT_REGS_OFFSET
+
+#define SVE_PT_SVE_ZREGS_OFFSET \
+	__SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET)
+#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \
+	__SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n))
+#define SVE_PT_SVE_ZREGS_SIZE(vq) \
+	(SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET)
+
+#define SVE_PT_SVE_PREGS_OFFSET(vq) \
+	__SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq))
+#define SVE_PT_SVE_PREG_OFFSET(vq, n) \
+	__SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n))
+#define SVE_PT_SVE_PREGS_SIZE(vq) \
+	(SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \
+		SVE_PT_SVE_PREGS_OFFSET(vq))
+
+#define SVE_PT_SVE_FFR_OFFSET(vq) \
+	__SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
+
+#define SVE_PT_SVE_FPSR_OFFSET(vq)				\
+	((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) +	\
+			(SVE_VQ_BYTES - 1))			\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+#define SVE_PT_SVE_FPCR_OFFSET(vq) \
+	(SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
+
+/*
+ * Any future extension appended after FPCR must be aligned to the next
+ * 128-bit boundary.
+ */
+
+#define SVE_PT_SVE_SIZE(vq, flags)					\
+	((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE		\
+			- SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+#define SVE_PT_SIZE(vq, flags)						\
+	 (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ?		\
+		  SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags)	\
+		: SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index fff9fcf..361c019 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -303,6 +303,37 @@ void sve_alloc(struct task_struct *task)
 	BUG_ON(!task->thread.sve_state);
 }
 
+void fpsimd_sync_to_sve(struct task_struct *task)
+{
+	if (!test_tsk_thread_flag(task, TIF_SVE))
+		fpsimd_to_sve(task);
+}
+
+void sve_sync_to_fpsimd(struct task_struct *task)
+{
+	if (test_tsk_thread_flag(task, TIF_SVE))
+		sve_to_fpsimd(task);
+}
+
+void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
+{
+	unsigned int vq;
+	void *sst = task->thread.sve_state;
+	struct fpsimd_state const *fst = &task->thread.fpsimd_state;
+	unsigned int i;
+
+	if (!test_tsk_thread_flag(task, TIF_SVE))
+		return;
+
+	vq = sve_vq_from_vl(task->thread.sve_vl);
+
+	memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
+
+	for (i = 0; i < 32; ++i)
+		memcpy(ZREG(sst, vq, i), &fst->vregs[i],
+		       sizeof(fst->vregs[i]));
+}
+
 /*
  * Handle SVE state across fork():
  *
@@ -459,10 +490,17 @@ static void __init sve_efi_setup(void)
 	 * This is evidence of a crippled system and we are returning void,
 	 * so no attempt is made to handle this situation here.
 	 */
-	BUG_ON(!sve_vl_valid(sve_max_vl));
+	if (!sve_vl_valid(sve_max_vl))
+		goto fail;
+
 	efi_sve_state = __alloc_percpu(
 		SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
 	if (!efi_sve_state)
+		goto fail;
+
+	return;
+
+fail:
 		panic("Cannot allocate percpu memory for EFI SVE save/restore");
 }
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 9cbb612..5ef4735b 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -32,6 +32,7 @@
 #include <linux/security.h>
 #include <linux/init.h>
 #include <linux/signal.h>
+#include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
@@ -40,6 +41,7 @@
 #include <linux/elf.h>
 
 #include <asm/compat.h>
+#include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
 #include <asm/stacktrace.h>
@@ -618,33 +620,66 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
 /*
  * TODO: update fp accessors for lazy context switching (sync/flush hwstate)
  */
-static int fpr_get(struct task_struct *target, const struct user_regset *regset,
-		   unsigned int pos, unsigned int count,
-		   void *kbuf, void __user *ubuf)
+static int __fpr_get(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     void *kbuf, void __user *ubuf, unsigned int start_pos)
 {
 	struct user_fpsimd_state *uregs;
+
+	sve_sync_to_fpsimd(target);
+
 	uregs = &target->thread.fpsimd_state.user_fpsimd;
 
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
+				   start_pos, start_pos + sizeof(*uregs));
+}
+
+static int fpr_get(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
 	if (target == current)
 		fpsimd_preserve_current_state();
 
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+	return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0);
 }
 
-static int fpr_set(struct task_struct *target, const struct user_regset *regset,
-		   unsigned int pos, unsigned int count,
-		   const void *kbuf, const void __user *ubuf)
+static int __fpr_set(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf,
+		     unsigned int start_pos)
 {
 	int ret;
 	struct user_fpsimd_state newstate =
 		target->thread.fpsimd_state.user_fpsimd;
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
+	sve_sync_to_fpsimd(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate,
+				 start_pos, start_pos + sizeof(newstate));
 	if (ret)
 		return ret;
 
 	target->thread.fpsimd_state.user_fpsimd = newstate;
+
+	return ret;
+}
+
+static int fpr_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0);
+	if (ret)
+		return ret;
+
+	sve_sync_from_fpsimd_zeropad(target);
 	fpsimd_flush_task_state(target);
+
 	return ret;
 }
 
@@ -702,6 +737,210 @@ static int system_call_set(struct task_struct *target,
 	return ret;
 }
 
+#ifdef CONFIG_ARM64_SVE
+
+static void sve_init_header_from_task(struct user_sve_header *header,
+				      struct task_struct *target)
+{
+	unsigned int vq;
+
+	memset(header, 0, sizeof(*header));
+
+	header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
+		SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
+	if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+		header->flags |= SVE_PT_VL_INHERIT;
+
+	header->vl = target->thread.sve_vl;
+	vq = sve_vq_from_vl(header->vl);
+
+	if (WARN_ON(!sve_vl_valid(sve_max_vl)))
+		header->max_vl = header->vl;
+
+	header->size = SVE_PT_SIZE(vq, header->flags);
+	header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
+				      SVE_PT_REGS_SVE);
+}
+
+static unsigned int sve_size_from_header(struct user_sve_header const *header)
+{
+	return ALIGN(header->size, SVE_VQ_BYTES);
+}
+
+static unsigned int sve_get_size(struct task_struct *target,
+				 const struct user_regset *regset)
+{
+	struct user_sve_header header;
+
+	if (!system_supports_sve())
+		return 0;
+
+	sve_init_header_from_task(&header, target);
+	return sve_size_from_header(&header);
+}
+
+static int sve_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	int ret;
+	struct user_sve_header header;
+	unsigned int vq;
+	unsigned long start, end;
+
+	if (!system_supports_sve())
+		return -EINVAL;
+
+	/* Header */
+	sve_init_header_from_task(&header, target);
+	vq = sve_vq_from_vl(header.vl);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
+				  0, sizeof(header));
+	if (ret)
+		return ret;
+
+	if (target == current)
+		fpsimd_preserve_current_state();
+
+	/* Registers: FPSIMD-only case */
+
+	BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+	if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
+		return __fpr_get(target, regset, pos, count, kbuf, ubuf,
+				 SVE_PT_FPSIMD_OFFSET);
+
+	/* Otherwise: full SVE case */
+
+	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+	start = SVE_PT_SVE_OFFSET;
+	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  target->thread.sve_state,
+				  start, end);
+	if (ret)
+		return ret;
+
+	start = end;
+	end = SVE_PT_SVE_FPSR_OFFSET(vq);
+	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+				       start, end);
+	if (ret)
+		return ret;
+
+	/*
+	 * Copy fpsr, and fpcr which must follow contiguously in
+	 * struct fpsimd_state:
+	 */
+	start = end;
+	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpsimd_state.fpsr,
+				  start, end);
+	if (ret)
+		return ret;
+
+	start = end;
+	end = sve_size_from_header(&header);
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					start, end);
+}
+
+static int sve_set(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct user_sve_header header;
+	unsigned int vq;
+	unsigned long start, end;
+
+	if (!system_supports_sve())
+		return -EINVAL;
+
+	/* Header */
+	if (count < sizeof(header))
+		return -EINVAL;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
+				 0, sizeof(header));
+	if (ret)
+		goto out;
+
+	/*
+	 * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by
+	 * sve_set_vector_length(), which will also validate them for us:
+	 */
+	ret = sve_set_vector_length(target, header.vl,
+				    header.flags & ~SVE_PT_REGS_MASK);
+	if (ret)
+		goto out;
+
+	/* Actual VL set may be less than the user asked for: */
+	vq = sve_vq_from_vl(target->thread.sve_vl);
+
+	/* Registers: FPSIMD-only case */
+
+	BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+	if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
+		sve_sync_to_fpsimd(target);
+
+		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
+				SVE_PT_FPSIMD_OFFSET);
+		clear_tsk_thread_flag(target, TIF_SVE);
+		goto out;
+	}
+
+	/* Otherwise: full SVE case */
+
+	/*
+	 * If setting a different VL from the requested VL and there is
+	 * register data, the data layout will be wrong: don't even
+	 * try to set the registers in this case.
+	 */
+	if (count && vq != sve_vq_from_vl(header.vl)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	sve_alloc(target);
+	fpsimd_sync_to_sve(target);
+	set_tsk_thread_flag(target, TIF_SVE);
+
+	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+	start = SVE_PT_SVE_OFFSET;
+	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 target->thread.sve_state,
+				 start, end);
+	if (ret)
+		goto out;
+
+	start = end;
+	end = SVE_PT_SVE_FPSR_OFFSET(vq);
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					start, end);
+	if (ret)
+		goto out;
+
+	/*
+	 * Copy fpsr, and fpcr which must follow contiguously in
+	 * struct fpsimd_state:
+	 */
+	start = end;
+	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.fpsimd_state.fpsr,
+				 start, end);
+
+out:
+	fpsimd_flush_task_state(target);
+	return ret;
+}
+
+#endif /* CONFIG_ARM64_SVE */
+
 enum aarch64_regset {
 	REGSET_GPR,
 	REGSET_FPR,
@@ -711,6 +950,9 @@ enum aarch64_regset {
 	REGSET_HW_WATCH,
 #endif
 	REGSET_SYSTEM_CALL,
+#ifdef CONFIG_ARM64_SVE
+	REGSET_SVE,
+#endif
 };
 
 static const struct user_regset aarch64_regsets[] = {
@@ -768,6 +1010,18 @@ static const struct user_regset aarch64_regsets[] = {
 		.get = system_call_get,
 		.set = system_call_set,
 	},
+#ifdef CONFIG_ARM64_SVE
+	[REGSET_SVE] = { /* Scalable Vector Extension */
+		.core_note_type = NT_ARM_SVE,
+		.n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE),
+				  SVE_VQ_BYTES),
+		.size = SVE_VQ_BYTES,
+		.align = SVE_VQ_BYTES,
+		.get = sve_get,
+		.set = sve_set,
+		.get_size = sve_get_size,
+	},
+#endif
 };
 
 static const struct user_regset_view user_aarch64_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b5280db..735b8f4 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -416,6 +416,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
 #define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
 #define NT_ARM_SYSTEM_CALL	0x404	/* ARM system call number */
+#define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension registers */
 #define NT_METAG_CBUF	0x500		/* Metag catch buffer registers */
 #define NT_METAG_RPIPE	0x501		/* Metag read pipeline state */
 #define NT_METAG_TLS	0x502		/* Metag TLS pointer */
-- 
2.1.4


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
       [not found] <1504198860-12951-1-git-send-email-Dave.Martin@arm.com>
  2017-08-31 17:01 ` [PATCH v2 09/28] arm64/sve: Signal frame and context structure definition Dave Martin
  2017-08-31 17:02 ` [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support Dave Martin
@ 2017-08-31 17:09 ` Dave Martin
  2017-09-13 17:29   ` Catalin Marinas
  2017-09-20 11:00   ` Alan Hayward
  2 siblings, 2 replies; 21+ messages in thread
From: Dave Martin @ 2017-08-31 17:09 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Catalin Marinas, Will Deacon, Ard Biesheuvel, Alex Bennée,
	Szabolcs Nagy, Richard Sandiford, kvmarm, libc-alpha, linux-arch,
	gdb, Alan Hayward, Yao Qi

This patch implements the core logic for changing a task's vector
length on request from userspace.  This will be used by the ptrace
and prctl frontends that are implemented in later patches.

The SVE architecture permits, but does not require, implementations
to support vector lengths that are not a power of two.  To handle
this, logic is added to check a requested vector length against a
possibly sparse bitmap of available vector lengths at runtime, so
that the best supported value can be chosen.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Cc: Alex Bennée <alex.bennee@linaro.org>

---

Changes since v1
----------------

Requested by Alex Bennée:

* Comment the definition of SVE_VL_ARCH_MAX.

* Thin out BUG_ON()s:
Redundant BUG_ON()s and ones that just check invariants are removed.
Important sanity-checks are migrated to WARN_ON()s, with some
minimal best-effort patch-up code.

Other changes related Alex Bennée's comments:

* sve_max_vl is definitely not supposed to be changed after boot.
Make it official by marking it __ro_after_init.

* Migrate away from magic number for SVE_VQ_BYTES.
---
 arch/arm64/include/asm/fpsimd.h |   8 +++
 arch/arm64/kernel/fpsimd.c      | 128 +++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/prctl.h      |   5 ++
 3 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 7efd04e..32c8e19 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -20,6 +20,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/cache.h>
 #include <linux/stddef.h>
 
 /*
@@ -70,11 +71,16 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
 
+/* Maximum VL that SVE VL-agnostic software can transparently support */
+#define SVE_VL_ARCH_MAX 0x100
+
 extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
 			   unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
 
+extern int __ro_after_init sve_max_vl;
+
 #ifdef CONFIG_ARM64_SVE
 
 extern size_t sve_state_size(struct task_struct const *task);
@@ -83,6 +89,8 @@ extern void sve_alloc(struct task_struct *task);
 extern void fpsimd_release_thread(struct task_struct *task);
 extern void fpsimd_dup_sve(struct task_struct *dst,
 			   struct task_struct const *src);
+extern int sve_set_vector_length(struct task_struct *task,
+				 unsigned long vl, unsigned long flags);
 
 #else /* ! CONFIG_ARM64_SVE */
 
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index f82cde8..713476e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -17,8 +17,10 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/bitmap.h>
 #include <linux/bottom_half.h>
 #include <linux/bug.h>
+#include <linux/cache.h>
 #include <linux/compat.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
@@ -26,6 +28,7 @@
 #include <linux/init.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
+#include <linux/prctl.h>
 #include <linux/ptrace.h>
 #include <linux/sched/signal.h>
 #include <linux/signal.h>
@@ -109,6 +112,20 @@ static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
 /* Default VL for tasks that don't set it explicitly: */
 static int sve_default_vl = SVE_VL_MIN;
 
+#ifdef CONFIG_ARM64_SVE
+
+/* Maximum supported vector length across all CPUs (initially poisoned) */
+int __ro_after_init sve_max_vl = -1;
+/* Set of available vector lengths, as vq_to_bit(vq): */
+static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+
+#else /* ! CONFIG_ARM64_SVE */
+
+/* Dummy declaration for code that will be optimised out: */
+extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+
+#endif /* ! CONFIG_ARM64_SVE */
+
 static void sve_free(struct task_struct *task)
 {
 	kfree(task->thread.sve_state);
@@ -186,6 +203,44 @@ static void task_fpsimd_save(void)
 	}
 }
 
+static unsigned int vq_to_bit(unsigned int vq)
+{
+	return SVE_VQ_MAX - vq;
+}
+
+static unsigned int bit_to_vq(unsigned int bit)
+{
+	if (WARN_ON(bit >= SVE_VQ_MAX))
+		bit = SVE_VQ_MAX - 1;
+
+	return SVE_VQ_MAX - bit;
+}
+
+/*
+ * All vector length selection from userspace comes through here.
+ * We're on a slow path, so some sanity-checks are included.
+ * If things go wrong there's a bug somewhere, but try to fall back to a
+ * safe choice.
+ */
+static unsigned int find_supported_vector_length(unsigned int vl)
+{
+	int bit;
+	int max_vl = sve_max_vl;
+
+	if (WARN_ON(!sve_vl_valid(vl)))
+		vl = SVE_VL_MIN;
+
+	if (WARN_ON(!sve_vl_valid(max_vl)))
+		max_vl = SVE_VL_MIN;
+
+	if (vl > max_vl)
+		vl = max_vl;
+
+	bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
+			    vq_to_bit(sve_vq_from_vl(vl)));
+	return sve_vl_from_vq(bit_to_vq(bit));
+}
+
 #define ZREG(sve_state, vq, n) ((char *)(sve_state) +		\
 	(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
 
@@ -265,6 +320,73 @@ void fpsimd_dup_sve(struct task_struct *dst, struct task_struct const *src)
 	dst->thread.sve_state = NULL;
 }
 
+int sve_set_vector_length(struct task_struct *task,
+			  unsigned long vl, unsigned long flags)
+{
+	WARN_ON(task == current && preemptible());
+
+	if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
+				     PR_SVE_SET_VL_ONEXEC))
+		return -EINVAL;
+
+	if (!sve_vl_valid(vl))
+		return -EINVAL;
+
+	/*
+	 * Clamp to the maximum vector length that VL-agnostic SVE code can
+	 * work with.  A flag may be assigned in the future to allow setting
+	 * of larger vector lengths without confusing older software.
+	 */
+	if (vl > SVE_VL_ARCH_MAX)
+		vl = SVE_VL_ARCH_MAX;
+
+	vl = find_supported_vector_length(vl);
+
+	if (flags & (PR_SVE_VL_INHERIT |
+		     PR_SVE_SET_VL_ONEXEC))
+		task->thread.sve_vl_onexec = vl;
+	else
+		/* Reset VL to system default on next exec: */
+		task->thread.sve_vl_onexec = 0;
+
+	/* Only actually set the VL if not deferred: */
+	if (flags & PR_SVE_SET_VL_ONEXEC)
+		goto out;
+
+	/*
+	 * To ensure the FPSIMD bits of the SVE vector registers are preserved,
+	 * write any live register state back to task_struct, and convert to a
+	 * non-SVE thread.
+	 */
+	if (vl != task->thread.sve_vl) {
+		if (task == current) {
+			task_fpsimd_save();
+			set_thread_flag(TIF_FOREIGN_FPSTATE);
+		}
+
+		if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
+			sve_to_fpsimd(task);
+
+		/*
+		 * Force reallocation of task SVE state to the correct size
+		 * on next use:
+		 */
+		sve_free(task);
+	}
+
+	task->thread.sve_vl = vl;
+
+	fpsimd_flush_task_state(task);
+
+out:
+	if (flags & PR_SVE_VL_INHERIT)
+		set_thread_flag(TIF_SVE_VL_INHERIT);
+	else
+		clear_thread_flag(TIF_SVE_VL_INHERIT);
+
+	return 0;
+}
+
 void fpsimd_release_thread(struct task_struct *dead_task)
 {
 	sve_free(dead_task);
@@ -361,7 +483,7 @@ void fpsimd_thread_switch(struct task_struct *next)
 
 void fpsimd_flush_thread(void)
 {
-	int vl;
+	int vl, supported_vl;
 
 	if (!system_supports_fpsimd())
 		return;
@@ -389,6 +511,10 @@ void fpsimd_flush_thread(void)
 		if (WARN_ON(!sve_vl_valid(vl)))
 			vl = SVE_VL_MIN;
 
+		supported_vl = find_supported_vector_length(vl);
+		if (WARN_ON(supported_vl != vl))
+			vl = supported_vl;
+
 		current->thread.sve_vl = vl;
 
 		/*
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759..1b64901 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,9 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER		3
 # define PR_CAP_AMBIENT_CLEAR_ALL	4
 
+/* arm64 Scalable Vector Extension controls */
+# define PR_SVE_SET_VL_ONEXEC		(1 << 18) /* defer effect until exec */
+# define PR_SVE_VL_LEN_MASK		0xffff
+# define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
+
 #endif /* _LINUX_PRCTL_H */
-- 
2.1.4


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
  2017-08-31 17:02 ` [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support Dave Martin
@ 2017-09-06 16:22   ` Okamoto, Takayuki
       [not found]     ` <20170906181634.GF6321@e103592.cambridge.arm.com>
  2017-09-14 12:57   ` Alex Bennée
  1 sibling, 1 reply; 21+ messages in thread
From: Okamoto, Takayuki @ 2017-09-06 16:22 UTC (permalink / raw)
  To: 'Dave Martin', linux-arm-kernel
  Cc: Catalin Marinas, Will Deacon, Ard Biesheuvel, Alex Bennée,
	Szabolcs Nagy, Richard Sandiford, kvmarm, libc-alpha, linux-arch,
	gdb, Alan Hayward, Yao Qi, Oleg Nesterov, Alexander Viro

Hi Dave,

I am an engineer of the postK computer from Fujitsu.

When I tried to read "max_vl" by ptrace with this patch on our local SVE 
simulator, it was read as zero.
I think the cause of this incident is that "max_vl" is set as "header->vl" 
only on warning case in sve_init_header_from_task().
"max_vl" should be set up also on normal case, like the following patch.


--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -755,6 +755,8 @@ static void sve_init_header_from_task(struct user_sve_header *header,

        if (WARN_ON(!sve_vl_valid(sve_max_vl)))
                header->max_vl = header->vl;
+       else
+               header->max_vl = sve_max_vl;

        header->size = SVE_PT_SIZE(vq, header->flags);
        header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),


Best regards,
Takayuki Okamoto

-----Original Message-----
From: gdb-owner@sourceware.org [mailto:gdb-owner@sourceware.org] On Behalf Of Dave Martin
Sent: Friday, September 1, 2017 2:01 AM
To: linux-arm-kernel@lists.infradead.org
Cc: Catalin Marinas <catalin.marinas@arm.com>; Will Deacon <will.deacon@arm.com>; Ard Biesheuvel <ard.biesheuvel@linaro.org>; Alex Bennée <alex.bennee@linaro.org>; Szabolcs Nagy <szabolcs.nagy@arm.com>; Richard Sandiford <richard.sandiford@arm.com>; kvmarm@lists.cs.columbia.edu; libc-alpha@sourceware.org; linux-arch@vger.kernel.org; gdb@sourceware.org; Alan Hayward <alan.hayward@arm.com>; Yao Qi <Yao.Qi@arm.com>; Oleg Nesterov <oleg@redhat.com>; Alexander Viro <viro@zeniv.linux.org.uk>
Subject: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support

This patch defines and implements a new regset NT_ARM_SVE, which
describes a thread's SVE register state.  This allows a debugger to
manipulate the SVE state, as well as being included in ELF
coredumps for post-mortem debugging.

Because the regset size and layout are dependent on the thread's
current vector length, it is not possible to define a C struct to
describe the regset contents as is done for existing regsets.
Instead, and for the same reasons, NT_ARM_SVE is based on the
freeform variable-layout approach used for the SVE signal frame.

Additionally, to reduce debug overhead when debugging threads that
might or might not have live SVE register state, NT_ARM_SVE may be
presented in one of two different formats: the old struct
user_fpsimd_state format is embedded for describing the state of a
thread with no live SVE state, whereas a new variable-layout
structure is embedded for describing live SVE state.  This avoids a
debugger needing to poll NT_PRFPREG in addition to NT_ARM_SVE, and
allows existing userspace code to handle the non-SVE case without
too much modification.

For this to work, NT_ARM_SVE is defined with a fixed-format header
of type struct user_sve_header, which the recipient can use to
figure out the content, size and layout of the reset of the regset.
Accessor macros are defined to allow the vector-length-dependent
parts of the regset to be manipulated.

Signed-off-by: Alan Hayward <alan.hayward@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Cc: Alex Bennée <alex.bennee@linaro.org>

---

Changes since v1
----------------

Other changes related to Alex Bennée's comments:

* Migrate to SVE_VQ_BYTES instead of magic numbers.

Requested by Alex Bennée:

* Thin out BUG_ON()s:
Redundant BUG_ON()s and ones that just check invariants are removed.
Important sanity-checks are migrated to WARN_ON()s, with some
minimal best-effort patch-up code.

Other:

* [ABI fix] Bail out with -EIO if attempting to set the
SVE regs for an unsupported VL, instead of misparsing the regset data.

* Replace some in-kernel open-coded arithmetic with ALIGN()/
DIV_ROUND_UP().
---
 arch/arm64/include/asm/fpsimd.h      |  13 +-
 arch/arm64/include/uapi/asm/ptrace.h | 135 ++++++++++++++++++
 arch/arm64/kernel/fpsimd.c           |  40 +++++-
 arch/arm64/kernel/ptrace.c           | 270 +++++++++++++++++++++++++++++++++--
 include/uapi/linux/elf.h             |   1 +
 5 files changed, 449 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 6c22624..2723cca 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -38,13 +38,16 @@ struct fpsimd_state {
 			__uint128_t vregs[32];
 			u32 fpsr;
 			u32 fpcr;
+			/*
+			 * For ptrace compatibility, pad to next 128-bit
+			 * boundary here if extending this struct.
+			 */
 		};
 	};
 	/* the id of the last cpu to have restored this state */
 	unsigned int cpu;
 };
 
-
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
 #define VFP_FPSCR_STAT_MASK	0xf800009f
@@ -89,6 +92,10 @@ extern void sve_alloc(struct task_struct *task);
 extern void fpsimd_release_thread(struct task_struct *task);
 extern void fpsimd_dup_sve(struct task_struct *dst,
 			   struct task_struct const *src);
+extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void sve_sync_to_fpsimd(struct task_struct *task);
+extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
+
 extern int sve_set_vector_length(struct task_struct *task,
 				 unsigned long vl, unsigned long flags);
 
@@ -103,6 +110,10 @@ static void __maybe_unused sve_alloc(struct task_struct *task) { }
 static void __maybe_unused fpsimd_release_thread(struct task_struct *task) { }
 static void __maybe_unused fpsimd_dup_sve(struct task_struct *dst,
 					  struct task_struct const *src) { }
+static void __maybe_unused sve_sync_to_fpsimd(struct task_struct *task) { }
+static void __maybe_unused sve_sync_from_fpsimd_zeropad(
+	struct task_struct *task) { }
+
 static void __maybe_unused sve_init_vq_map(void) { }
 static void __maybe_unused sve_update_vq_map(void) { }
 static int __maybe_unused sve_verify_vq_map(void) { return 0; }
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index d1ff83d..1915ab0 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -22,6 +22,7 @@
 #include <linux/types.h>
 
 #include <asm/hwcap.h>
+#include <asm/sigcontext.h>
 
 
 /*
@@ -63,6 +64,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/prctl.h>
+
 /*
  * User structures for general purpose, floating point and debug registers.
  */
@@ -90,6 +93,138 @@ struct user_hwdebug_state {
 	}		dbg_regs[16];
 };
 
+/* SVE/FP/SIMD state (NT_ARM_SVE) */
+
+struct user_sve_header {
+	__u32 size; /* total meaningful regset content in bytes */
+	__u32 max_size; /* maxmium possible size for this thread */
+	__u16 vl; /* current vector length */
+	__u16 max_vl; /* maximum possible vector length */
+	__u16 flags;
+	__u16 __reserved;
+};
+
+/* Definitions for user_sve_header.flags: */
+#define SVE_PT_REGS_MASK		(1 << 0)
+
+/* Flags: must be kept in sync with prctl interface in <linux/ptrace.h> */
+#define SVE_PT_REGS_FPSIMD		0
+#define SVE_PT_REGS_SVE			SVE_PT_REGS_MASK
+
+#define SVE_PT_VL_INHERIT		(PR_SVE_VL_INHERIT >> 16)
+#define SVE_PT_VL_ONEXEC		(PR_SVE_SET_VL_ONEXEC >> 16)
+
+
+/*
+ * The remainder of the SVE state follows struct user_sve_header.  The
+ * total size of the SVE state (including header) depends on the
+ * metadata in the header:  SVE_PT_SIZE(vq, flags) gives the total size
+ * of the state in bytes, including the header.
+ *
+ * Refer to <asm/sigcontext.h> for details of how to pass the correct
+ * "vq" argument to these macros.
+ */
+
+/* Offset from the start of struct user_sve_header to the register data */
+#define SVE_PT_REGS_OFFSET					\
+	((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+/*
+ * The register data content and layout depends on the value of the
+ * flags field.
+ */
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case:
+ *
+ * The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type
+ * struct user_fpsimd_state.  Additional data might be appended in the
+ * future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size.
+ * SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than
+ * sizeof(struct user_fpsimd_state).
+ */
+
+#define SVE_PT_FPSIMD_OFFSET		SVE_PT_REGS_OFFSET
+
+#define SVE_PT_FPSIMD_SIZE(vq, flags)	(sizeof(struct user_fpsimd_state))
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case:
+ *
+ * The payload starts at offset SVE_PT_SVE_OFFSET, and is of size
+ * SVE_PT_SVE_SIZE(vq, flags).
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to
+ * the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is
+ * the size in bytes:
+ *
+ *	x	type				description
+ *	-	----				-----------
+ *	ZREGS		\
+ *	ZREG		|
+ *	PREGS		| refer to <asm/sigcontext.h>
+ *	PREG		|
+ *	FFR		/
+ *
+ *	FPSR	uint32_t			FPSR
+ *	FPCR	uint32_t			FPCR
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_PT_SVE_ZREG_SIZE(vq)	SVE_SIG_ZREG_SIZE(vq)
+#define SVE_PT_SVE_PREG_SIZE(vq)	SVE_SIG_PREG_SIZE(vq)
+#define SVE_PT_SVE_FFR_SIZE(vq)		SVE_SIG_FFR_SIZE(vq)
+#define SVE_PT_SVE_FPSR_SIZE		sizeof(__u32)
+#define SVE_PT_SVE_FPCR_SIZE		sizeof(__u32)
+
+#define __SVE_SIG_TO_PT(offset) \
+	((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET)
+
+#define SVE_PT_SVE_OFFSET		SVE_PT_REGS_OFFSET
+
+#define SVE_PT_SVE_ZREGS_OFFSET \
+	__SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET)
+#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \
+	__SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n))
+#define SVE_PT_SVE_ZREGS_SIZE(vq) \
+	(SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET)
+
+#define SVE_PT_SVE_PREGS_OFFSET(vq) \
+	__SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq))
+#define SVE_PT_SVE_PREG_OFFSET(vq, n) \
+	__SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n))
+#define SVE_PT_SVE_PREGS_SIZE(vq) \
+	(SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \
+		SVE_PT_SVE_PREGS_OFFSET(vq))
+
+#define SVE_PT_SVE_FFR_OFFSET(vq) \
+	__SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
+
+#define SVE_PT_SVE_FPSR_OFFSET(vq)				\
+	((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) +	\
+			(SVE_VQ_BYTES - 1))			\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+#define SVE_PT_SVE_FPCR_OFFSET(vq) \
+	(SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
+
+/*
+ * Any future extension appended after FPCR must be aligned to the next
+ * 128-bit boundary.
+ */
+
+#define SVE_PT_SVE_SIZE(vq, flags)					\
+	((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE		\
+			- SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+#define SVE_PT_SIZE(vq, flags)						\
+	 (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ?		\
+		  SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags)	\
+		: SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index fff9fcf..361c019 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -303,6 +303,37 @@ void sve_alloc(struct task_struct *task)
 	BUG_ON(!task->thread.sve_state);
 }
 
+void fpsimd_sync_to_sve(struct task_struct *task)
+{
+	if (!test_tsk_thread_flag(task, TIF_SVE))
+		fpsimd_to_sve(task);
+}
+
+void sve_sync_to_fpsimd(struct task_struct *task)
+{
+	if (test_tsk_thread_flag(task, TIF_SVE))
+		sve_to_fpsimd(task);
+}
+
+void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
+{
+	unsigned int vq;
+	void *sst = task->thread.sve_state;
+	struct fpsimd_state const *fst = &task->thread.fpsimd_state;
+	unsigned int i;
+
+	if (!test_tsk_thread_flag(task, TIF_SVE))
+		return;
+
+	vq = sve_vq_from_vl(task->thread.sve_vl);
+
+	memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
+
+	for (i = 0; i < 32; ++i)
+		memcpy(ZREG(sst, vq, i), &fst->vregs[i],
+		       sizeof(fst->vregs[i]));
+}
+
 /*
  * Handle SVE state across fork():
  *
@@ -459,10 +490,17 @@ static void __init sve_efi_setup(void)
 	 * This is evidence of a crippled system and we are returning void,
 	 * so no attempt is made to handle this situation here.
 	 */
-	BUG_ON(!sve_vl_valid(sve_max_vl));
+	if (!sve_vl_valid(sve_max_vl))
+		goto fail;
+
 	efi_sve_state = __alloc_percpu(
 		SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
 	if (!efi_sve_state)
+		goto fail;
+
+	return;
+
+fail:
 		panic("Cannot allocate percpu memory for EFI SVE save/restore");
 }
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 9cbb612..5ef4735b 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -32,6 +32,7 @@
 #include <linux/security.h>
 #include <linux/init.h>
 #include <linux/signal.h>
+#include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
@@ -40,6 +41,7 @@
 #include <linux/elf.h>
 
 #include <asm/compat.h>
+#include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
 #include <asm/stacktrace.h>
@@ -618,33 +620,66 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
 /*
  * TODO: update fp accessors for lazy context switching (sync/flush hwstate)
  */
-static int fpr_get(struct task_struct *target, const struct user_regset *regset,
-		   unsigned int pos, unsigned int count,
-		   void *kbuf, void __user *ubuf)
+static int __fpr_get(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     void *kbuf, void __user *ubuf, unsigned int start_pos)
 {
 	struct user_fpsimd_state *uregs;
+
+	sve_sync_to_fpsimd(target);
+
 	uregs = &target->thread.fpsimd_state.user_fpsimd;
 
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
+				   start_pos, start_pos + sizeof(*uregs));
+}
+
+static int fpr_get(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
 	if (target == current)
 		fpsimd_preserve_current_state();
 
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+	return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0);
 }
 
-static int fpr_set(struct task_struct *target, const struct user_regset *regset,
-		   unsigned int pos, unsigned int count,
-		   const void *kbuf, const void __user *ubuf)
+static int __fpr_set(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf,
+		     unsigned int start_pos)
 {
 	int ret;
 	struct user_fpsimd_state newstate =
 		target->thread.fpsimd_state.user_fpsimd;
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
+	sve_sync_to_fpsimd(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate,
+				 start_pos, start_pos + sizeof(newstate));
 	if (ret)
 		return ret;
 
 	target->thread.fpsimd_state.user_fpsimd = newstate;
+
+	return ret;
+}
+
+static int fpr_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0);
+	if (ret)
+		return ret;
+
+	sve_sync_from_fpsimd_zeropad(target);
 	fpsimd_flush_task_state(target);
+
 	return ret;
 }
 
@@ -702,6 +737,210 @@ static int system_call_set(struct task_struct *target,
 	return ret;
 }
 
+#ifdef CONFIG_ARM64_SVE
+
+static void sve_init_header_from_task(struct user_sve_header *header,
+				      struct task_struct *target)
+{
+	unsigned int vq;
+
+	memset(header, 0, sizeof(*header));
+
+	header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
+		SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
+	if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+		header->flags |= SVE_PT_VL_INHERIT;
+
+	header->vl = target->thread.sve_vl;
+	vq = sve_vq_from_vl(header->vl);
+
+	if (WARN_ON(!sve_vl_valid(sve_max_vl)))
+		header->max_vl = header->vl;
+
+	header->size = SVE_PT_SIZE(vq, header->flags);
+	header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
+				      SVE_PT_REGS_SVE);
+}
+
+static unsigned int sve_size_from_header(struct user_sve_header const *header)
+{
+	return ALIGN(header->size, SVE_VQ_BYTES);
+}
+
+static unsigned int sve_get_size(struct task_struct *target,
+				 const struct user_regset *regset)
+{
+	struct user_sve_header header;
+
+	if (!system_supports_sve())
+		return 0;
+
+	sve_init_header_from_task(&header, target);
+	return sve_size_from_header(&header);
+}
+
+static int sve_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	int ret;
+	struct user_sve_header header;
+	unsigned int vq;
+	unsigned long start, end;
+
+	if (!system_supports_sve())
+		return -EINVAL;
+
+	/* Header */
+	sve_init_header_from_task(&header, target);
+	vq = sve_vq_from_vl(header.vl);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
+				  0, sizeof(header));
+	if (ret)
+		return ret;
+
+	if (target == current)
+		fpsimd_preserve_current_state();
+
+	/* Registers: FPSIMD-only case */
+
+	BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+	if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
+		return __fpr_get(target, regset, pos, count, kbuf, ubuf,
+				 SVE_PT_FPSIMD_OFFSET);
+
+	/* Otherwise: full SVE case */
+
+	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+	start = SVE_PT_SVE_OFFSET;
+	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  target->thread.sve_state,
+				  start, end);
+	if (ret)
+		return ret;
+
+	start = end;
+	end = SVE_PT_SVE_FPSR_OFFSET(vq);
+	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+				       start, end);
+	if (ret)
+		return ret;
+
+	/*
+	 * Copy fpsr, and fpcr which must follow contiguously in
+	 * struct fpsimd_state:
+	 */
+	start = end;
+	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpsimd_state.fpsr,
+				  start, end);
+	if (ret)
+		return ret;
+
+	start = end;
+	end = sve_size_from_header(&header);
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					start, end);
+}
+
+static int sve_set(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct user_sve_header header;
+	unsigned int vq;
+	unsigned long start, end;
+
+	if (!system_supports_sve())
+		return -EINVAL;
+
+	/* Header */
+	if (count < sizeof(header))
+		return -EINVAL;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
+				 0, sizeof(header));
+	if (ret)
+		goto out;
+
+	/*
+	 * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by
+	 * sve_set_vector_length(), which will also validate them for us:
+	 */
+	ret = sve_set_vector_length(target, header.vl,
+				    header.flags & ~SVE_PT_REGS_MASK);
+	if (ret)
+		goto out;
+
+	/* Actual VL set may be less than the user asked for: */
+	vq = sve_vq_from_vl(target->thread.sve_vl);
+
+	/* Registers: FPSIMD-only case */
+
+	BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+	if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
+		sve_sync_to_fpsimd(target);
+
+		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
+				SVE_PT_FPSIMD_OFFSET);
+		clear_tsk_thread_flag(target, TIF_SVE);
+		goto out;
+	}
+
+	/* Otherwise: full SVE case */
+
+	/*
+	 * If setting a different VL from the requested VL and there is
+	 * register data, the data layout will be wrong: don't even
+	 * try to set the registers in this case.
+	 */
+	if (count && vq != sve_vq_from_vl(header.vl)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	sve_alloc(target);
+	fpsimd_sync_to_sve(target);
+	set_tsk_thread_flag(target, TIF_SVE);
+
+	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+	start = SVE_PT_SVE_OFFSET;
+	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 target->thread.sve_state,
+				 start, end);
+	if (ret)
+		goto out;
+
+	start = end;
+	end = SVE_PT_SVE_FPSR_OFFSET(vq);
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					start, end);
+	if (ret)
+		goto out;
+
+	/*
+	 * Copy fpsr, and fpcr which must follow contiguously in
+	 * struct fpsimd_state:
+	 */
+	start = end;
+	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.fpsimd_state.fpsr,
+				 start, end);
+
+out:
+	fpsimd_flush_task_state(target);
+	return ret;
+}
+
+#endif /* CONFIG_ARM64_SVE */
+
 enum aarch64_regset {
 	REGSET_GPR,
 	REGSET_FPR,
@@ -711,6 +950,9 @@ enum aarch64_regset {
 	REGSET_HW_WATCH,
 #endif
 	REGSET_SYSTEM_CALL,
+#ifdef CONFIG_ARM64_SVE
+	REGSET_SVE,
+#endif
 };
 
 static const struct user_regset aarch64_regsets[] = {
@@ -768,6 +1010,18 @@ static const struct user_regset aarch64_regsets[] = {
 		.get = system_call_get,
 		.set = system_call_set,
 	},
+#ifdef CONFIG_ARM64_SVE
+	[REGSET_SVE] = { /* Scalable Vector Extension */
+		.core_note_type = NT_ARM_SVE,
+		.n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE),
+				  SVE_VQ_BYTES),
+		.size = SVE_VQ_BYTES,
+		.align = SVE_VQ_BYTES,
+		.get = sve_get,
+		.set = sve_set,
+		.get_size = sve_get_size,
+	},
+#endif
 };
 
 static const struct user_regset_view user_aarch64_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b5280db..735b8f4 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -416,6 +416,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
 #define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
 #define NT_ARM_SYSTEM_CALL	0x404	/* ARM system call number */
+#define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension registers */
 #define NT_METAG_CBUF	0x500		/* Metag catch buffer registers */
 #define NT_METAG_RPIPE	0x501		/* Metag read pipeline state */
 #define NT_METAG_TLS	0x502		/* Metag TLS pointer */
-- 
2.1.4



^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
       [not found]     ` <20170906181634.GF6321@e103592.cambridge.arm.com>
@ 2017-09-07  5:11       ` Okamoto, Takayuki
  2017-09-08 13:11         ` Dave Martin
  0 siblings, 1 reply; 21+ messages in thread
From: Okamoto, Takayuki @ 2017-09-07  5:11 UTC (permalink / raw)
  To: 'Dave Martin'
  Cc: linux-arch, libc-alpha, Ard Biesheuvel, Szabolcs Nagy, gdb,
	Yao Qi, Alan Hayward, Will Deacon, Oleg Nesterov,
	Richard Sandiford, Alexander Viro, Catalin Marinas,
	Alex Bennée, kvmarm, linux-arm-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="gb2312", Size: 4743 bytes --]

Hi Dave,

Thank you for your reply.

> Your fix looks correct and seems to work.  For stylistic reasons, I may
> write it like this instead, but the effect should be the same:
> 
> 	header->max_vl = sve_max_vl;
> 	if (WARN_ON(!sve_vl_valid(sve_max_vl))
> 		header->max_vl = header->vl;

It is better than my fix.
Please, apply it at next version.

Best regards,
Takayuki Okamoto

> -----Original Message-----
> From: linux-arm-kernel
> [mailto:linux-arm-kernel-bounces@lists.infradead.org] On Behalf Of Dave
> Martin
> Sent: Thursday, September 7, 2017 3:17 AM
> To: Okamoto, Takayuki <tokamoto@jp.fujitsu.com>
> Cc: linux-arch@vger.kernel.org; libc-alpha@sourceware.org; Ard
> Biesheuvel <ard.biesheuvel@linaro.org>; Szabolcs Nagy
> <szabolcs.nagy@arm.com>; gdb@sourceware.org; Yao Qi <Yao.Qi@arm.com>;
> Alan Hayward <alan.hayward@arm.com>; Will Deacon <will.deacon@arm.com>;
> Oleg Nesterov <oleg@redhat.com>; Richard Sandiford
> <richard.sandiford@arm.com>; Alexander Viro <viro@zeniv.linux.org.uk>;
> Catalin Marinas <catalin.marinas@arm.com>; Alex Benn¨¦e
> <alex.bennee@linaro.org>; kvmarm@lists.cs.columbia.edu;
> linux-arm-kernel@lists.infradead.org
> Subject: Re: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
> 
> On Wed, Sep 06, 2017 at 04:21:50PM +0000, Okamoto, Takayuki wrote:
> > Hi Dave,
> >
> > I am an engineer of the postK computer from Fujitsu.
> >
> > When I tried to read "max_vl" by ptrace with this patch on our local SVE
> > simulator, it was read as zero.
> > I think the cause of this incident is that "max_vl" is set as "header->vl"
> > only on warning case in sve_init_header_from_task().
> > "max_vl" should be set up also on normal case, like the following patch.
> >
> >
> > --- a/arch/arm64/kernel/ptrace.c
> > +++ b/arch/arm64/kernel/ptrace.c
> > @@ -755,6 +755,8 @@ static void sve_init_header_from_task(struct
> user_sve_header *header,
> >
> >         if (WARN_ON(!sve_vl_valid(sve_max_vl)))
> >                 header->max_vl = header->vl;
> > +       else
> > +               header->max_vl = sve_max_vl;
> >
> >         header->size = SVE_PT_SIZE(vq, header->flags);
> >         header->max_size =
> SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
> 
> Hi, thanks for reporting this.
> 
> It looks like a refactoring mistake I made while removing BUG_ON()s,
> which I missed in my testing.
> 
> Your fix looks correct and seems to work.  For stylistic reasons, I may
> write it like this instead, but the effect should be the same:
> 
> 	header->max_vl = sve_max_vl;
> 	if (WARN_ON(!sve_vl_valid(sve_max_vl))
> 		header->max_vl = header->vl;
> 
> Cheers
> ---Dave
> 
> >
> >
> > Best regards,
> > Takayuki Okamoto
> >
> > -----Original Message-----
> > From: gdb-owner@sourceware.org [mailto:gdb-owner@sourceware.org] On
> Behalf Of Dave Martin
> > Sent: Friday, September 1, 2017 2:01 AM
> > To: linux-arm-kernel@lists.infradead.org
> > Cc: Catalin Marinas <catalin.marinas@arm.com>; Will Deacon
> <will.deacon@arm.com>; Ard Biesheuvel <ard.biesheuvel@linaro.org>; Alex
> Benn¨¦e <alex.bennee@linaro.org>; Szabolcs Nagy <szabolcs.nagy@arm.com>;
> Richard Sandiford <richard.sandiford@arm.com>;
> kvmarm@lists.cs.columbia.edu; libc-alpha@sourceware.org;
> linux-arch@vger.kernel.org; gdb@sourceware.org; Alan Hayward
> <alan.hayward@arm.com>; Yao Qi <Yao.Qi@arm.com>; Oleg Nesterov
> <oleg@redhat.com>; Alexander Viro <viro@zeniv.linux.org.uk>
> > Subject: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
> >
> 
> [...]
> 
> > @@ -702,6 +737,210 @@ static int system_call_set(struct task_struct
> *target,
> >  	return ret;
> >  }
> >
> > +#ifdef CONFIG_ARM64_SVE
> > +
> > +static void sve_init_header_from_task(struct user_sve_header *header,
> > +				      struct task_struct *target)
> > +{
> > +	unsigned int vq;
> > +
> > +	memset(header, 0, sizeof(*header));
> > +
> > +	header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
> > +		SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
> > +	if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
> > +		header->flags |= SVE_PT_VL_INHERIT;
> > +
> > +	header->vl = target->thread.sve_vl;
> > +	vq = sve_vq_from_vl(header->vl);
> > +
> > +	if (WARN_ON(!sve_vl_valid(sve_max_vl)))
> > +		header->max_vl = header->vl;
> > +
> > +	header->size = SVE_PT_SIZE(vq, header->flags);
> > +	header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
> > +				      SVE_PT_REGS_SVE);
> > +}
> 
> [...]
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

\x16º&ÖëzÛ«ŸŽ}ãYb²Ö«r\x18\x1d

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
  2017-09-07  5:11       ` Okamoto, Takayuki
@ 2017-09-08 13:11         ` Dave Martin
  0 siblings, 0 replies; 21+ messages in thread
From: Dave Martin @ 2017-09-08 13:11 UTC (permalink / raw)
  To: Okamoto, Takayuki
  Cc: linux-arch, libc-alpha, Ard Biesheuvel, Szabolcs Nagy, gdb,
	Yao Qi, Will Deacon, Oleg Nesterov, Alex Bennée,
	Richard Sandiford, Alexander Viro, Alan Hayward, Catalin Marinas,
	kvmarm, linux-arm-kernel

On Thu, Sep 07, 2017 at 05:11:45AM +0000, Okamoto, Takayuki wrote:
> Hi Dave,
> 
> Thank you for your reply.
> 
> > Your fix looks correct and seems to work.  For stylistic reasons, I may
> > write it like this instead, but the effect should be the same:
> > 
> > 	header->max_vl = sve_max_vl;
> > 	if (WARN_ON(!sve_vl_valid(sve_max_vl))
> > 		header->max_vl = header->vl;
> 
> It is better than my fix.
> Please, apply it at next version.

I've rebased to v4.13 and pushed a branch to track fixes against v2,
here:

 * http://linux-arm.org/git?p=linux-dm.git;a=shortlog;h=refs/heads/sve/v2%2Bfixes

 * git://linux-arm.org/linux-dm.git sve/v2+fixes

Cheers
---Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 09/28] arm64/sve: Signal frame and context structure definition
  2017-08-31 17:01 ` [PATCH v2 09/28] arm64/sve: Signal frame and context structure definition Dave Martin
@ 2017-09-13 13:36   ` Catalin Marinas
  2017-09-13 21:33     ` Dave Martin
  0 siblings, 1 reply; 21+ messages in thread
From: Catalin Marinas @ 2017-09-13 13:36 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-arm-kernel, linux-arch, libc-alpha, gdb, Ard Biesheuvel,
	Szabolcs Nagy, Yao Qi, Alan Hayward, Will Deacon,
	Richard Sandiford, Alex Bennée, kvmarm

On Thu, Aug 31, 2017 at 06:00:41PM +0100, Dave P Martin wrote:
> +/*
> + * The SVE architecture leaves space for future expansion of the
> + * vector length beyond its initial architectural limit of 2048 bits
> + * (16 quadwords).
> + */
> +#define SVE_VQ_BYTES		0x10	/* number of bytes per quadword */
> +
> +#define SVE_VQ_MIN		1
> +#define SVE_VQ_MAX		0x200

Just a nitpick (up to you): could you use 16 and 512 here instead of
hex? I usually associate hex numbers with some bit fields.

-- 
Catalin


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-08-31 17:09 ` [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length Dave Martin
@ 2017-09-13 17:29   ` Catalin Marinas
  2017-09-13 19:06     ` Dave Martin
  2017-09-20 11:00   ` Alan Hayward
  1 sibling, 1 reply; 21+ messages in thread
From: Catalin Marinas @ 2017-09-13 17:29 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-arm-kernel, Will Deacon, Ard Biesheuvel, Alex Bennée,
	Szabolcs Nagy, Richard Sandiford, kvmarm, libc-alpha, linux-arch,
	gdb, Alan Hayward, Yao Qi

On Thu, Aug 31, 2017 at 06:00:46PM +0100, Dave P Martin wrote:
> This patch implements the core logic for changing a task's vector
> length on request from userspace.  This will be used by the ptrace
> and prctl frontends that are implemented in later patches.
> 
> The SVE architecture permits, but does not require, implementations
> to support vector lengths that are not a power of two.  To handle
> this, logic is added to check a requested vector length against a
> possibly sparse bitmap of available vector lengths at runtime, so
> that the best supported value can be chosen.
> 
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Cc: Alex Bennée <alex.bennee@linaro.org>

Can this be merged with patch 20? It seems to add the PR_ definitions
which get actually used later when the prctl interface is added.

-- 
Catalin


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-09-13 17:29   ` Catalin Marinas
@ 2017-09-13 19:06     ` Dave Martin
  2017-09-13 22:11       ` Catalin Marinas
  0 siblings, 1 reply; 21+ messages in thread
From: Dave Martin @ 2017-09-13 19:06 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, libc-alpha, Ard Biesheuvel, Szabolcs Nagy,
	Richard Sandiford, Yao Qi, Alan Hayward, Will Deacon, gdb,
	Alex Bennée, kvmarm, linux-arm-kernel

On Wed, Sep 13, 2017 at 10:29:11AM -0700, Catalin Marinas wrote:
> On Thu, Aug 31, 2017 at 06:00:46PM +0100, Dave P Martin wrote:
> > This patch implements the core logic for changing a task's vector
> > length on request from userspace.  This will be used by the ptrace
> > and prctl frontends that are implemented in later patches.
> > 
> > The SVE architecture permits, but does not require, implementations
> > to support vector lengths that are not a power of two.  To handle
> > this, logic is added to check a requested vector length against a
> > possibly sparse bitmap of available vector lengths at runtime, so
> > that the best supported value can be chosen.
> > 
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > Cc: Alex Bennée <alex.bennee@linaro.org>
> 
> Can this be merged with patch 20? It seems to add the PR_ definitions
> which get actually used later when the prctl interface is added.

This patch is used both by patch 19 and by patch 20, which I preferred
not to merge with each other: ptrace and prctl are significantly
different things.

The prctl bit definitions are added here because they are the canonical
definitions used by both interfaces.  The ptrace #defines are based on
them.

Does it make sense if I merge patch 20 into this one and apply patch 19
on top?  This avoide the appearance of prctl #defines with no prctl
implementation.

Cheers
---Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 09/28] arm64/sve: Signal frame and context structure definition
  2017-09-13 13:36   ` Catalin Marinas
@ 2017-09-13 21:33     ` Dave Martin
  0 siblings, 0 replies; 21+ messages in thread
From: Dave Martin @ 2017-09-13 21:33 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, libc-alpha, Ard Biesheuvel, Szabolcs Nagy, gdb,
	Yao Qi, Will Deacon, Richard Sandiford, Alan Hayward,
	Alex Bennée, kvmarm, linux-arm-kernel

On Wed, Sep 13, 2017 at 06:36:18AM -0700, Catalin Marinas wrote:
> On Thu, Aug 31, 2017 at 06:00:41PM +0100, Dave P Martin wrote:
> > +/*
> > + * The SVE architecture leaves space for future expansion of the
> > + * vector length beyond its initial architectural limit of 2048 bits
> > + * (16 quadwords).
> > + */
> > +#define SVE_VQ_BYTES		0x10	/* number of bytes per quadword */
> > +
> > +#define SVE_VQ_MIN		1
> > +#define SVE_VQ_MAX		0x200
> 
> Just a nitpick (up to you): could you use 16 and 512 here instead of
> hex? I usually associate hex numbers with some bit fields.

I have no strong opinion other than a desire to make these constants
typo-proof.

There's no particular reason why these shouldn't be in decimal, so I can
change them if you like, provided you promise to notice if I misspell
512 as 521...

(git grep 131027)

Cheers
---Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-09-13 19:06     ` Dave Martin
@ 2017-09-13 22:11       ` Catalin Marinas
  2017-10-05 16:42         ` Dave Martin
  0 siblings, 1 reply; 21+ messages in thread
From: Catalin Marinas @ 2017-09-13 22:11 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-arch, libc-alpha, gdb, Ard Biesheuvel, Szabolcs Nagy,
	Richard Sandiford, Yao Qi, Will Deacon, Alan Hayward,
	Alex Bennée, kvmarm, linux-arm-kernel

On Wed, Sep 13, 2017 at 08:06:12PM +0100, Dave P Martin wrote:
> On Wed, Sep 13, 2017 at 10:29:11AM -0700, Catalin Marinas wrote:
> > On Thu, Aug 31, 2017 at 06:00:46PM +0100, Dave P Martin wrote:
> > > This patch implements the core logic for changing a task's vector
> > > length on request from userspace.  This will be used by the ptrace
> > > and prctl frontends that are implemented in later patches.
> > > 
> > > The SVE architecture permits, but does not require, implementations
> > > to support vector lengths that are not a power of two.  To handle
> > > this, logic is added to check a requested vector length against a
> > > possibly sparse bitmap of available vector lengths at runtime, so
> > > that the best supported value can be chosen.
> > > 
> > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > > Cc: Alex Bennée <alex.bennee@linaro.org>
> > 
> > Can this be merged with patch 20? It seems to add the PR_ definitions
> > which get actually used later when the prctl interface is added.
> 
> This patch is used both by patch 19 and by patch 20, which I preferred
> not to merge with each other: ptrace and prctl are significantly
> different things.
> 
> The prctl bit definitions are added here because they are the canonical
> definitions used by both interfaces.  The ptrace #defines are based on
> them.
> 
> Does it make sense if I merge patch 20 into this one and apply patch 19
> on top?  This avoide the appearance of prctl #defines with no prctl
> implementation.

That's fine, you can bring patch 20 forward. If there are other
non-trivial issues, feel free to ignore my comment.

-- 
Catalin


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
  2017-08-31 17:02 ` [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support Dave Martin
  2017-09-06 16:22   ` Okamoto, Takayuki
@ 2017-09-14 12:57   ` Alex Bennée
  2017-09-28 14:57     ` Dave Martin
  2017-09-29 12:46     ` Dave Martin
  1 sibling, 2 replies; 21+ messages in thread
From: Alex Bennée @ 2017-09-14 12:57 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-arm-kernel, Catalin Marinas, Will Deacon, Ard Biesheuvel,
	Szabolcs Nagy, Richard Sandiford, kvmarm, libc-alpha, linux-arch,
	gdb, Alan Hayward, Yao Qi, Oleg Nesterov, Alexander Viro


Dave Martin <Dave.Martin@arm.com> writes:

> This patch defines and implements a new regset NT_ARM_SVE, which
> describes a thread's SVE register state.  This allows a debugger to
> manipulate the SVE state, as well as being included in ELF
> coredumps for post-mortem debugging.
>
> Because the regset size and layout are dependent on the thread's
> current vector length, it is not possible to define a C struct to
> describe the regset contents as is done for existing regsets.
> Instead, and for the same reasons, NT_ARM_SVE is based on the
> freeform variable-layout approach used for the SVE signal frame.
>
> Additionally, to reduce debug overhead when debugging threads that
> might or might not have live SVE register state, NT_ARM_SVE may be
> presented in one of two different formats: the old struct
> user_fpsimd_state format is embedded for describing the state of a
> thread with no live SVE state, whereas a new variable-layout
> structure is embedded for describing live SVE state.  This avoids a
> debugger needing to poll NT_PRFPREG in addition to NT_ARM_SVE, and
> allows existing userspace code to handle the non-SVE case without
> too much modification.
>
> For this to work, NT_ARM_SVE is defined with a fixed-format header
> of type struct user_sve_header, which the recipient can use to
> figure out the content, size and layout of the reset of the regset.
> Accessor macros are defined to allow the vector-length-dependent
> parts of the regset to be manipulated.
>
> Signed-off-by: Alan Hayward <alan.hayward@arm.com>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Cc: Alex Bennée <alex.bennee@linaro.org>
>
> ---
>
> Changes since v1
> ----------------
>
> Other changes related to Alex Bennée's comments:
>
> * Migrate to SVE_VQ_BYTES instead of magic numbers.
>
> Requested by Alex Bennée:
>
> * Thin out BUG_ON()s:
> Redundant BUG_ON()s and ones that just check invariants are removed.
> Important sanity-checks are migrated to WARN_ON()s, with some
> minimal best-effort patch-up code.
>
> Other:
>
> * [ABI fix] Bail out with -EIO if attempting to set the
> SVE regs for an unsupported VL, instead of misparsing the regset data.
>
> * Replace some in-kernel open-coded arithmetic with ALIGN()/
> DIV_ROUND_UP().
> ---
>  arch/arm64/include/asm/fpsimd.h      |  13 +-
>  arch/arm64/include/uapi/asm/ptrace.h | 135 ++++++++++++++++++
>  arch/arm64/kernel/fpsimd.c           |  40 +++++-
>  arch/arm64/kernel/ptrace.c           | 270 +++++++++++++++++++++++++++++++++--
>  include/uapi/linux/elf.h             |   1 +
>  5 files changed, 449 insertions(+), 10 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 6c22624..2723cca 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -38,13 +38,16 @@ struct fpsimd_state {
>  			__uint128_t vregs[32];
>  			u32 fpsr;
>  			u32 fpcr;
> +			/*
> +			 * For ptrace compatibility, pad to next 128-bit
> +			 * boundary here if extending this struct.
> +			 */
>  		};
>  	};
>  	/* the id of the last cpu to have restored this state */
>  	unsigned int cpu;
>  };
>
> -
>  #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
>  /* Masks for extracting the FPSR and FPCR from the FPSCR */
>  #define VFP_FPSCR_STAT_MASK	0xf800009f
> @@ -89,6 +92,10 @@ extern void sve_alloc(struct task_struct *task);
>  extern void fpsimd_release_thread(struct task_struct *task);
>  extern void fpsimd_dup_sve(struct task_struct *dst,
>  			   struct task_struct const *src);
> +extern void fpsimd_sync_to_sve(struct task_struct *task);
> +extern void sve_sync_to_fpsimd(struct task_struct *task);
> +extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
> +
>  extern int sve_set_vector_length(struct task_struct *task,
>  				 unsigned long vl, unsigned long flags);
>
> @@ -103,6 +110,10 @@ static void __maybe_unused sve_alloc(struct task_struct *task) { }
>  static void __maybe_unused fpsimd_release_thread(struct task_struct *task) { }
>  static void __maybe_unused fpsimd_dup_sve(struct task_struct *dst,
>  					  struct task_struct const *src) { }
> +static void __maybe_unused sve_sync_to_fpsimd(struct task_struct *task) { }
> +static void __maybe_unused sve_sync_from_fpsimd_zeropad(
> +	struct task_struct *task) { }
> +
>  static void __maybe_unused sve_init_vq_map(void) { }
>  static void __maybe_unused sve_update_vq_map(void) { }
>  static int __maybe_unused sve_verify_vq_map(void) { return 0; }
> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> index d1ff83d..1915ab0 100644
> --- a/arch/arm64/include/uapi/asm/ptrace.h
> +++ b/arch/arm64/include/uapi/asm/ptrace.h
> @@ -22,6 +22,7 @@
>  #include <linux/types.h>
>
>  #include <asm/hwcap.h>
> +#include <asm/sigcontext.h>
>
>
>  /*
> @@ -63,6 +64,8 @@
>
>  #ifndef __ASSEMBLY__
>
> +#include <linux/prctl.h>
> +
>  /*
>   * User structures for general purpose, floating point and debug registers.
>   */
> @@ -90,6 +93,138 @@ struct user_hwdebug_state {
>  	}		dbg_regs[16];
>  };
>
> +/* SVE/FP/SIMD state (NT_ARM_SVE) */
> +
> +struct user_sve_header {
> +	__u32 size; /* total meaningful regset content in bytes */
> +	__u32 max_size; /* maxmium possible size for this thread */
> +	__u16 vl; /* current vector length */
> +	__u16 max_vl; /* maximum possible vector length */
> +	__u16 flags;
> +	__u16 __reserved;
> +};
> +
> +/* Definitions for user_sve_header.flags: */
> +#define SVE_PT_REGS_MASK		(1 << 0)
> +
> +/* Flags: must be kept in sync with prctl interface in
> <linux/ptrace.h> */

Which flags? We base some on PR_foo flags but we seem to shift them
anyway so where is the requirement for them to match from?

> +#define SVE_PT_REGS_FPSIMD		0
> +#define SVE_PT_REGS_SVE			SVE_PT_REGS_MASK
> +
> +#define SVE_PT_VL_INHERIT		(PR_SVE_VL_INHERIT >> 16)
> +#define SVE_PT_VL_ONEXEC		(PR_SVE_SET_VL_ONEXEC >> 16)
> +
> +
> +/*
> + * The remainder of the SVE state follows struct user_sve_header.  The
> + * total size of the SVE state (including header) depends on the
> + * metadata in the header:  SVE_PT_SIZE(vq, flags) gives the total size
> + * of the state in bytes, including the header.
> + *
> + * Refer to <asm/sigcontext.h> for details of how to pass the correct
> + * "vq" argument to these macros.
> + */
> +
> +/* Offset from the start of struct user_sve_header to the register data */
> +#define SVE_PT_REGS_OFFSET					\
> +	((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1))	\
> +		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
> +
> +/*
> + * The register data content and layout depends on the value of the
> + * flags field.
> + */
> +
> +/*
> + * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case:
> + *
> + * The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type
> + * struct user_fpsimd_state.  Additional data might be appended in the
> + * future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size.
> + * SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than
> + * sizeof(struct user_fpsimd_state).
> + */
> +
> +#define SVE_PT_FPSIMD_OFFSET		SVE_PT_REGS_OFFSET
> +
> +#define SVE_PT_FPSIMD_SIZE(vq, flags)	(sizeof(struct user_fpsimd_state))
> +
> +/*
> + * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case:
> + *
> + * The payload starts at offset SVE_PT_SVE_OFFSET, and is of size
> + * SVE_PT_SVE_SIZE(vq, flags).
> + *
> + * Additional macros describe the contents and layout of the payload.
> + * For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to
> + * the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is
> + * the size in bytes:
> + *
> + *	x	type				description
> + *	-	----				-----------
> + *	ZREGS		\
> + *	ZREG		|
> + *	PREGS		| refer to <asm/sigcontext.h>
> + *	PREG		|
> + *	FFR		/
> + *
> + *	FPSR	uint32_t			FPSR
> + *	FPCR	uint32_t			FPCR
> + *
> + * Additional data might be appended in the future.
> + */
> +
> +#define SVE_PT_SVE_ZREG_SIZE(vq)	SVE_SIG_ZREG_SIZE(vq)
> +#define SVE_PT_SVE_PREG_SIZE(vq)	SVE_SIG_PREG_SIZE(vq)
> +#define SVE_PT_SVE_FFR_SIZE(vq)		SVE_SIG_FFR_SIZE(vq)
> +#define SVE_PT_SVE_FPSR_SIZE		sizeof(__u32)
> +#define SVE_PT_SVE_FPCR_SIZE		sizeof(__u32)
> +
> +#define __SVE_SIG_TO_PT(offset) \
> +	((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET)
> +
> +#define SVE_PT_SVE_OFFSET		SVE_PT_REGS_OFFSET
> +
> +#define SVE_PT_SVE_ZREGS_OFFSET \
> +	__SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET)
> +#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \
> +	__SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n))
> +#define SVE_PT_SVE_ZREGS_SIZE(vq) \
> +	(SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET)
> +
> +#define SVE_PT_SVE_PREGS_OFFSET(vq) \
> +	__SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq))
> +#define SVE_PT_SVE_PREG_OFFSET(vq, n) \
> +	__SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n))
> +#define SVE_PT_SVE_PREGS_SIZE(vq) \
> +	(SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \
> +		SVE_PT_SVE_PREGS_OFFSET(vq))
> +
> +#define SVE_PT_SVE_FFR_OFFSET(vq) \
> +	__SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
> +
> +#define SVE_PT_SVE_FPSR_OFFSET(vq)				\
> +	((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) +	\
> +			(SVE_VQ_BYTES - 1))			\
> +		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
> +#define SVE_PT_SVE_FPCR_OFFSET(vq) \
> +	(SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
> +
> +/*
> + * Any future extension appended after FPCR must be aligned to the next
> + * 128-bit boundary.
> + */
> +
> +#define SVE_PT_SVE_SIZE(vq, flags)					\
> +	((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE		\
> +			- SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1))	\
> +		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
> +
> +#define SVE_PT_SIZE(vq, flags)						\
> +	 (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ?		\
> +		  SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags)	\
> +		: SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
> +
>  #endif /* __ASSEMBLY__ */
>
>  #endif /* _UAPI__ASM_PTRACE_H */
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index fff9fcf..361c019 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -303,6 +303,37 @@ void sve_alloc(struct task_struct *task)
>  	BUG_ON(!task->thread.sve_state);
>  }
>
> +void fpsimd_sync_to_sve(struct task_struct *task)
> +{
> +	if (!test_tsk_thread_flag(task, TIF_SVE))
> +		fpsimd_to_sve(task);
> +}
> +
> +void sve_sync_to_fpsimd(struct task_struct *task)
> +{
> +	if (test_tsk_thread_flag(task, TIF_SVE))
> +		sve_to_fpsimd(task);
> +}
> +
> +void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
> +{
> +	unsigned int vq;
> +	void *sst = task->thread.sve_state;
> +	struct fpsimd_state const *fst = &task->thread.fpsimd_state;
> +	unsigned int i;
> +
> +	if (!test_tsk_thread_flag(task, TIF_SVE))
> +		return;
> +
> +	vq = sve_vq_from_vl(task->thread.sve_vl);
> +
> +	memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
> +
> +	for (i = 0; i < 32; ++i)
> +		memcpy(ZREG(sst, vq, i), &fst->vregs[i],
> +		       sizeof(fst->vregs[i]));
> +}
> +
>  /*
>   * Handle SVE state across fork():
>   *
> @@ -459,10 +490,17 @@ static void __init sve_efi_setup(void)
>  	 * This is evidence of a crippled system and we are returning void,
>  	 * so no attempt is made to handle this situation here.
>  	 */
> -	BUG_ON(!sve_vl_valid(sve_max_vl));
> +	if (!sve_vl_valid(sve_max_vl))
> +		goto fail;
> +
>  	efi_sve_state = __alloc_percpu(
>  		SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
>  	if (!efi_sve_state)
> +		goto fail;
> +
> +	return;
> +
> +fail:
>  		panic("Cannot allocate percpu memory for EFI SVE save/restore");
>  }
>
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 9cbb612..5ef4735b 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -32,6 +32,7 @@
>  #include <linux/security.h>
>  #include <linux/init.h>
>  #include <linux/signal.h>
> +#include <linux/string.h>
>  #include <linux/uaccess.h>
>  #include <linux/perf_event.h>
>  #include <linux/hw_breakpoint.h>
> @@ -40,6 +41,7 @@
>  #include <linux/elf.h>
>
>  #include <asm/compat.h>
> +#include <asm/cpufeature.h>
>  #include <asm/debug-monitors.h>
>  #include <asm/pgtable.h>
>  #include <asm/stacktrace.h>
> @@ -618,33 +620,66 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
>  /*
>   * TODO: update fp accessors for lazy context switching (sync/flush hwstate)
>   */
> -static int fpr_get(struct task_struct *target, const struct user_regset *regset,
> -		   unsigned int pos, unsigned int count,
> -		   void *kbuf, void __user *ubuf)
> +static int __fpr_get(struct task_struct *target,
> +		     const struct user_regset *regset,
> +		     unsigned int pos, unsigned int count,
> +		     void *kbuf, void __user *ubuf, unsigned int start_pos)
>  {
>  	struct user_fpsimd_state *uregs;
> +
> +	sve_sync_to_fpsimd(target);
> +
>  	uregs = &target->thread.fpsimd_state.user_fpsimd;
>
> +	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
> +				   start_pos, start_pos + sizeof(*uregs));
> +}
> +
> +static int fpr_get(struct task_struct *target, const struct user_regset *regset,
> +		   unsigned int pos, unsigned int count,
> +		   void *kbuf, void __user *ubuf)
> +{
>  	if (target == current)
>  		fpsimd_preserve_current_state();
>
> -	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
> +	return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0);
>  }
>
> -static int fpr_set(struct task_struct *target, const struct user_regset *regset,
> -		   unsigned int pos, unsigned int count,
> -		   const void *kbuf, const void __user *ubuf)
> +static int __fpr_set(struct task_struct *target,
> +		     const struct user_regset *regset,
> +		     unsigned int pos, unsigned int count,
> +		     const void *kbuf, const void __user *ubuf,
> +		     unsigned int start_pos)
>  {
>  	int ret;
>  	struct user_fpsimd_state newstate =
>  		target->thread.fpsimd_state.user_fpsimd;
>
> -	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
> +	sve_sync_to_fpsimd(target);
> +
> +	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate,
> +				 start_pos, start_pos + sizeof(newstate));
>  	if (ret)
>  		return ret;
>
>  	target->thread.fpsimd_state.user_fpsimd = newstate;
> +
> +	return ret;
> +}
> +
> +static int fpr_set(struct task_struct *target, const struct user_regset *regset,
> +		   unsigned int pos, unsigned int count,
> +		   const void *kbuf, const void __user *ubuf)
> +{
> +	int ret;
> +
> +	ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0);
> +	if (ret)
> +		return ret;
> +
> +	sve_sync_from_fpsimd_zeropad(target);
>  	fpsimd_flush_task_state(target);
> +
>  	return ret;
>  }
>
> @@ -702,6 +737,210 @@ static int system_call_set(struct task_struct *target,
>  	return ret;
>  }
>
> +#ifdef CONFIG_ARM64_SVE
> +
> +static void sve_init_header_from_task(struct user_sve_header *header,
> +				      struct task_struct *target)
> +{
> +	unsigned int vq;
> +
> +	memset(header, 0, sizeof(*header));
> +
> +	header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
> +		SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
> +	if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
> +		header->flags |= SVE_PT_VL_INHERIT;
> +
> +	header->vl = target->thread.sve_vl;
> +	vq = sve_vq_from_vl(header->vl);
> +
> +	if (WARN_ON(!sve_vl_valid(sve_max_vl)))
> +		header->max_vl = header->vl;
> +
> +	header->size = SVE_PT_SIZE(vq, header->flags);
> +	header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
> +				      SVE_PT_REGS_SVE);
> +}
> +
> +static unsigned int sve_size_from_header(struct user_sve_header const *header)
> +{
> +	return ALIGN(header->size, SVE_VQ_BYTES);
> +}
> +
> +static unsigned int sve_get_size(struct task_struct *target,
> +				 const struct user_regset *regset)
> +{
> +	struct user_sve_header header;
> +
> +	if (!system_supports_sve())
> +		return 0;
> +
> +	sve_init_header_from_task(&header, target);
> +	return sve_size_from_header(&header);
> +}
> +
> +static int sve_get(struct task_struct *target,
> +		   const struct user_regset *regset,
> +		   unsigned int pos, unsigned int count,
> +		   void *kbuf, void __user *ubuf)
> +{
> +	int ret;
> +	struct user_sve_header header;
> +	unsigned int vq;
> +	unsigned long start, end;
> +
> +	if (!system_supports_sve())
> +		return -EINVAL;
> +
> +	/* Header */
> +	sve_init_header_from_task(&header, target);
> +	vq = sve_vq_from_vl(header.vl);
> +
> +	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
> +				  0, sizeof(header));
> +	if (ret)
> +		return ret;
> +
> +	if (target == current)
> +		fpsimd_preserve_current_state();
> +
> +	/* Registers: FPSIMD-only case */
> +
> +	BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
> +	if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
> +		return __fpr_get(target, regset, pos, count, kbuf, ubuf,
> +				 SVE_PT_FPSIMD_OFFSET);
> +
> +	/* Otherwise: full SVE case */
> +
> +	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
> +	start = SVE_PT_SVE_OFFSET;
> +	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
> +	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
> +				  target->thread.sve_state,
> +				  start, end);
> +	if (ret)
> +		return ret;
> +
> +	start = end;
> +	end = SVE_PT_SVE_FPSR_OFFSET(vq);
> +	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
> +				       start, end);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Copy fpsr, and fpcr which must follow contiguously in
> +	 * struct fpsimd_state:
> +	 */
> +	start = end;
> +	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
> +	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
> +				  &target->thread.fpsimd_state.fpsr,
> +				  start, end);
> +	if (ret)
> +		return ret;
> +
> +	start = end;
> +	end = sve_size_from_header(&header);
> +	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
> +					start, end);
> +}
> +
> +static int sve_set(struct task_struct *target,
> +		   const struct user_regset *regset,
> +		   unsigned int pos, unsigned int count,
> +		   const void *kbuf, const void __user *ubuf)
> +{
> +	int ret;
> +	struct user_sve_header header;
> +	unsigned int vq;
> +	unsigned long start, end;
> +
> +	if (!system_supports_sve())
> +		return -EINVAL;
> +
> +	/* Header */
> +	if (count < sizeof(header))
> +		return -EINVAL;
> +	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
> +				 0, sizeof(header));
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by
> +	 * sve_set_vector_length(), which will also validate them for us:
> +	 */
> +	ret = sve_set_vector_length(target, header.vl,
> +				    header.flags & ~SVE_PT_REGS_MASK);
> +	if (ret)
> +		goto out;
> +
> +	/* Actual VL set may be less than the user asked for: */
> +	vq = sve_vq_from_vl(target->thread.sve_vl);
> +
> +	/* Registers: FPSIMD-only case */
> +
> +	BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
> +	if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
> +		sve_sync_to_fpsimd(target);
> +
> +		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
> +				SVE_PT_FPSIMD_OFFSET);
> +		clear_tsk_thread_flag(target, TIF_SVE);
> +		goto out;
> +	}
> +
> +	/* Otherwise: full SVE case */
> +
> +	/*
> +	 * If setting a different VL from the requested VL and there is
> +	 * register data, the data layout will be wrong: don't even
> +	 * try to set the registers in this case.
> +	 */
> +	if (count && vq != sve_vq_from_vl(header.vl)) {
> +		ret = -EIO;
> +		goto out;
> +	}
> +
> +	sve_alloc(target);
> +	fpsimd_sync_to_sve(target);
> +	set_tsk_thread_flag(target, TIF_SVE);
> +
> +	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
> +	start = SVE_PT_SVE_OFFSET;
> +	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
> +	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
> +				 target->thread.sve_state,
> +				 start, end);
> +	if (ret)
> +		goto out;
> +
> +	start = end;
> +	end = SVE_PT_SVE_FPSR_OFFSET(vq);
> +	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
> +					start, end);
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * Copy fpsr, and fpcr which must follow contiguously in
> +	 * struct fpsimd_state:
> +	 */
> +	start = end;
> +	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
> +	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
> +				 &target->thread.fpsimd_state.fpsr,
> +				 start, end);
> +
> +out:
> +	fpsimd_flush_task_state(target);
> +	return ret;
> +}
> +
> +#endif /* CONFIG_ARM64_SVE */
> +
>  enum aarch64_regset {
>  	REGSET_GPR,
>  	REGSET_FPR,
> @@ -711,6 +950,9 @@ enum aarch64_regset {
>  	REGSET_HW_WATCH,
>  #endif
>  	REGSET_SYSTEM_CALL,
> +#ifdef CONFIG_ARM64_SVE
> +	REGSET_SVE,
> +#endif
>  };
>
>  static const struct user_regset aarch64_regsets[] = {
> @@ -768,6 +1010,18 @@ static const struct user_regset aarch64_regsets[] = {
>  		.get = system_call_get,
>  		.set = system_call_set,
>  	},
> +#ifdef CONFIG_ARM64_SVE
> +	[REGSET_SVE] = { /* Scalable Vector Extension */
> +		.core_note_type = NT_ARM_SVE,
> +		.n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE),
> +				  SVE_VQ_BYTES),
> +		.size = SVE_VQ_BYTES,
> +		.align = SVE_VQ_BYTES,
> +		.get = sve_get,
> +		.set = sve_set,
> +		.get_size = sve_get_size,
> +	},
> +#endif
>  };
>
>  static const struct user_regset_view user_aarch64_view = {
> diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
> index b5280db..735b8f4 100644
> --- a/include/uapi/linux/elf.h
> +++ b/include/uapi/linux/elf.h
> @@ -416,6 +416,7 @@ typedef struct elf64_shdr {
>  #define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
>  #define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
>  #define NT_ARM_SYSTEM_CALL	0x404	/* ARM system call number */
> +#define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension registers */
>  #define NT_METAG_CBUF	0x500		/* Metag catch buffer registers */
>  #define NT_METAG_RPIPE	0x501		/* Metag read pipeline state */
>  #define NT_METAG_TLS	0x502		/* Metag TLS pointer */

Otherwise:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

--
Alex Bennée


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-08-31 17:09 ` [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length Dave Martin
  2017-09-13 17:29   ` Catalin Marinas
@ 2017-09-20 11:00   ` Alan Hayward
       [not found]     ` <20170920110902.GG24231@e103592.cambridge.arm.com>
  1 sibling, 1 reply; 21+ messages in thread
From: Alan Hayward @ 2017-09-20 11:00 UTC (permalink / raw)
  To: Dave P Martin
  Cc: linux-arm-kernel, Catalin Marinas, Will Deacon, Ard Biesheuvel,
	Alex Bennée, Szabolcs Nagy, Richard Sandiford, kvmarm,
	libc-alpha, linux-arch, gdb, Yao Qi, nd

(Resending without disclaimer)

> On 31 Aug 2017, at 18:00, Dave Martin <Dave.Martin@arm.com> wrote:

> 
> +int sve_set_vector_length(struct task_struct *task,
> +			  unsigned long vl, unsigned long flags)
> +{
> +	WARN_ON(task == current && preemptible());
> +
> +	if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
> +				     PR_SVE_SET_VL_ONEXEC))
> +		return -EINVAL;
> +
> +	if (!sve_vl_valid(vl))
> +		return -EINVAL;
> +
> +	/*
> +	 * Clamp to the maximum vector length that VL-agnostic SVE code can
> +	 * work with.  A flag may be assigned in the future to allow setting
> +	 * of larger vector lengths without confusing older software.
> +	 */
> +	if (vl > SVE_VL_ARCH_MAX)
> +		vl = SVE_VL_ARCH_MAX;
> +
> +	vl = find_supported_vector_length(vl);
> +


Given, sve_set_vector_length is called when setting the vector length in
PTRACE_SETREGSET, it looks to me like if you set VL to a value that’s not
supported by the hardware, then it’s going to round down to the previous value.
Is that correct? I’m not sure if that’s explained in the docs?

What happens if you give a vl value lower than the min supported value in the
hardware?


> +/*
> + * All vector length selection from userspace comes through here.
> + * We're on a slow path, so some sanity-checks are included.
> + * If things go wrong there's a bug somewhere, but try to fall back to a
> + * safe choice.
> + */
> +static unsigned int find_supported_vector_length(unsigned int vl)
> +{
> +	int bit;
> +	int max_vl = sve_max_vl;
> +
> +	if (WARN_ON(!sve_vl_valid(vl)))
> +		vl = SVE_VL_MIN;
> +
> +	if (WARN_ON(!sve_vl_valid(max_vl)))
> +		max_vl = SVE_VL_MIN;
> +
> +	if (vl > max_vl)
> +		vl = max_vl;
> +
> +	bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
> +			    vq_to_bit(sve_vq_from_vl(vl)));
> +	return sve_vl_from_vq(bit_to_vq(bit));
> +}
> +


Thanks,
Alan.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
       [not found]     ` <20170920110902.GG24231@e103592.cambridge.arm.com>
@ 2017-09-20 18:08       ` Alan Hayward
  2017-09-21 11:19         ` Dave Martin
  0 siblings, 1 reply; 21+ messages in thread
From: Alan Hayward @ 2017-09-20 18:08 UTC (permalink / raw)
  To: Dave Martin
  Cc: Dave P Martin, linux-arch, libc-alpha, gdb, Ard Biesheuvel,
	Szabolcs Nagy, Catalin Marinas, Yao Qi, Will Deacon,
	Richard Sandiford, nd, Alex Bennée, kvmarm,
	linux-arm-kernel


> On 20 Sep 2017, at 12:09, Dave Martin <dave.martin@foss.arm.com> wrote:
> 
> On Wed, Sep 20, 2017 at 10:59:55AM +0000, Alan Hayward wrote:
>> (Resending without disclaimer)
>> 
>>> On 31 Aug 2017, at 18:00, Dave Martin <Dave.Martin@arm.com> wrote:
>> 
>>> 
>>> +int sve_set_vector_length(struct task_struct *task,
>>> +			  unsigned long vl, unsigned long flags)
>>> +{
>>> +	WARN_ON(task == current && preemptible());
>>> +
>>> +	if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
>>> +				     PR_SVE_SET_VL_ONEXEC))
>>> +		return -EINVAL;
>>> +
>>> +	if (!sve_vl_valid(vl))
>>> +		return -EINVAL;
>>> +
>>> +	/*
>>> +	 * Clamp to the maximum vector length that VL-agnostic SVE code can
>>> +	 * work with.  A flag may be assigned in the future to allow setting
>>> +	 * of larger vector lengths without confusing older software.
>>> +	 */
>>> +	if (vl > SVE_VL_ARCH_MAX)
>>> +		vl = SVE_VL_ARCH_MAX;
>>> +
>>> +	vl = find_supported_vector_length(vl);
>>> +
>> 
>> 
>> Given, sve_set_vector_length is called when setting the vector length in
>> PTRACE_SETREGSET, it looks to me like if you set VL to a value that’s not
>> supported by the hardware, then it’s going to round down to the previous value.
>> Is that correct? I’m not sure if that’s explained in the docs?
> 
> Does this cover it?
> 
> "On success, the calling thread's vector length is changed to the
> largest value supported by the system that is less than or equal to vl."
> 
> (For ptrace, I just cross-reference the PR_SVE_SET_VL behaviour, above.)

For ptrace is it worth mentioning user should do a GET after a SET to confirm
what VL value was actually set?

> 
>> What happens if you give a vl value lower than the min supported value in the
>> hardware?
> 
> This is impossible, unless vl < SVE_VL_MIN (which is rejected explicitly
> by the !sve_vl_valid() check in sve_set_vector_length()).
> 
> The architecture required support for all power-of-two vector lengths
> less than the maximum supported vector length, so by construction
> SVE_VL_MIN is supported by all hardware.

Ok, I’m happy with that.

> 
> To be defensive, if we fail to detect support for SVE_VL_MIN, I set the
> corresponding bit in sve_vq_map and WARN.  This is just to help ensure
> find_supported_vector_length doesn't fall off the end of sve_vq_map.
> 
> 
> Does that sounds correct?  There may be a clearer way of achieving this.
> 
> Cheers
> ---Dave
> 
>> 
>> 
>>> +/*
>>> + * All vector length selection from userspace comes through here.
>>> + * We're on a slow path, so some sanity-checks are included.
>>> + * If things go wrong there's a bug somewhere, but try to fall back to a
>>> + * safe choice.
>>> + */
>>> +static unsigned int find_supported_vector_length(unsigned int vl)
>>> +{
>>> +	int bit;
>>> +	int max_vl = sve_max_vl;
>>> +
>>> +	if (WARN_ON(!sve_vl_valid(vl)))
>>> +		vl = SVE_VL_MIN;
>>> +
>>> +	if (WARN_ON(!sve_vl_valid(max_vl)))
>>> +		max_vl = SVE_VL_MIN;
>>> +
>>> +	if (vl > max_vl)
>>> +		vl = max_vl;
>>> +
>>> +	bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
>>> +			    vq_to_bit(sve_vq_from_vl(vl)));
>>> +	return sve_vl_from_vq(bit_to_vq(bit));
>>> +}
>>> +
>> 
>> 
>> Thanks,
>> Alan.
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-09-20 18:08       ` Alan Hayward
@ 2017-09-21 11:19         ` Dave Martin
  2017-09-21 11:57           ` Alan Hayward
  0 siblings, 1 reply; 21+ messages in thread
From: Dave Martin @ 2017-09-21 11:19 UTC (permalink / raw)
  To: Alan Hayward
  Cc: Dave Martin, linux-arch, libc-alpha, gdb, Ard Biesheuvel,
	Szabolcs Nagy, Catalin Marinas, Yao Qi, Will Deacon,
	Richard Sandiford, nd, Alex Bennée, kvmarm,
	linux-arm-kernel

On Wed, Sep 20, 2017 at 06:08:21PM +0000, Alan Hayward wrote:
> 
> > On 20 Sep 2017, at 12:09, Dave Martin <dave.martin@foss.arm.com> wrote:

[...]

> >> Given, sve_set_vector_length is called when setting the vector length in
> >> PTRACE_SETREGSET, it looks to me like if you set VL to a value that’s not
> >> supported by the hardware, then it’s going to round down to the previous value.
> >> Is that correct? I’m not sure if that’s explained in the docs?
> > 
> > Does this cover it?
> > 
> > "On success, the calling thread's vector length is changed to the
> > largest value supported by the system that is less than or equal to vl."
> > 
> > (For ptrace, I just cross-reference the PR_SVE_SET_VL behaviour, above.)
> 
> For ptrace is it worth mentioning user should do a GET after a SET to confirm
> what VL value was actually set?

This seems worth a clarification -- I'd thought this was already
mentioned, but it isn't.

How about:

  The caller must make a further GETREGSET call if it needs to know what VL is
  actually set by SETREGSET, unless is it known in advance that the requested
  VL is supported.


[...]

Cheers
---Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-09-21 11:19         ` Dave Martin
@ 2017-09-21 11:57           ` Alan Hayward
  0 siblings, 0 replies; 21+ messages in thread
From: Alan Hayward @ 2017-09-21 11:57 UTC (permalink / raw)
  To: Dave P Martin
  Cc: Dave Martin, linux-arch, libc-alpha, gdb, Ard Biesheuvel,
	Szabolcs Nagy, Catalin Marinas, Yao Qi, Will Deacon,
	Richard Sandiford, nd, Alex Bennée, kvmarm,
	linux-arm-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 1314 bytes --]


> On 21 Sep 2017, at 12:19, Dave Martin <Dave.Martin@arm.com> wrote:
> 
> On Wed, Sep 20, 2017 at 06:08:21PM +0000, Alan Hayward wrote:
>> 
>>> On 20 Sep 2017, at 12:09, Dave Martin <dave.martin@foss.arm.com> wrote:
> 
> [...]
> 
>>>> Given, sve_set_vector_length is called when setting the vector length in
>>>> PTRACE_SETREGSET, it looks to me like if you set VL to a value that’s not
>>>> supported by the hardware, then it’s going to round down to the previous value.
>>>> Is that correct? I’m not sure if that’s explained in the docs?
>>> 
>>> Does this cover it?
>>> 
>>> "On success, the calling thread's vector length is changed to the
>>> largest value supported by the system that is less than or equal to vl."
>>> 
>>> (For ptrace, I just cross-reference the PR_SVE_SET_VL behaviour, above.)
>> 
>> For ptrace is it worth mentioning user should do a GET after a SET to confirm
>> what VL value was actually set?
> 
> This seems worth a clarification -- I'd thought this was already
> mentioned, but it isn't.
> 
> How about:
> 
>  The caller must make a further GETREGSET call if it needs to know what VL is
>  actually set by SETREGSET, unless is it known in advance that the requested
>  VL is supported.
> 

Looks good to me.


Alan.\x16º&ÖëzÛ«ŸŽ´Ó‰b²Ö«r\x18\x1d

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
  2017-09-14 12:57   ` Alex Bennée
@ 2017-09-28 14:57     ` Dave Martin
  2017-09-29 12:46     ` Dave Martin
  1 sibling, 0 replies; 21+ messages in thread
From: Dave Martin @ 2017-09-28 14:57 UTC (permalink / raw)
  To: Alex Bennée
  Cc: linux-arch, libc-alpha, gdb, Ard Biesheuvel, Szabolcs Nagy,
	Catalin Marinas, Yao Qi, Alan Hayward, Will Deacon,
	Oleg Nesterov, Alexander Viro, Richard Sandiford, kvmarm,
	linux-arm-kernel

On Thu, Sep 14, 2017 at 01:57:08PM +0100, Alex Bennée wrote:
> 
> Dave Martin <Dave.Martin@arm.com> writes:
> 
> > This patch defines and implements a new regset NT_ARM_SVE, which
> > describes a thread's SVE register state.  This allows a debugger to
> > manipulate the SVE state, as well as being included in ELF
> > coredumps for post-mortem debugging.
> >
> > Because the regset size and layout are dependent on the thread's
> > current vector length, it is not possible to define a C struct to
> > describe the regset contents as is done for existing regsets.
> > Instead, and for the same reasons, NT_ARM_SVE is based on the
> > freeform variable-layout approach used for the SVE signal frame.
> >
> > Additionally, to reduce debug overhead when debugging threads that
> > might or might not have live SVE register state, NT_ARM_SVE may be
> > presented in one of two different formats: the old struct
> > user_fpsimd_state format is embedded for describing the state of a
> > thread with no live SVE state, whereas a new variable-layout
> > structure is embedded for describing live SVE state.  This avoids a
> > debugger needing to poll NT_PRFPREG in addition to NT_ARM_SVE, and
> > allows existing userspace code to handle the non-SVE case without
> > too much modification.
> >
> > For this to work, NT_ARM_SVE is defined with a fixed-format header
> > of type struct user_sve_header, which the recipient can use to
> > figure out the content, size and layout of the reset of the regset.
> > Accessor macros are defined to allow the vector-length-dependent
> > parts of the regset to be manipulated.
> >
> > Signed-off-by: Alan Hayward <alan.hayward@arm.com>
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > Cc: Alex Bennée <alex.bennee@linaro.org>
> >
> > ---
> >
> > Changes since v1
> > ----------------
> >
> > Other changes related to Alex Bennée's comments:
> >
> > * Migrate to SVE_VQ_BYTES instead of magic numbers.
> >
> > Requested by Alex Bennée:
> >
> > * Thin out BUG_ON()s:
> > Redundant BUG_ON()s and ones that just check invariants are removed.
> > Important sanity-checks are migrated to WARN_ON()s, with some
> > minimal best-effort patch-up code.
> >
> > Other:
> >
> > * [ABI fix] Bail out with -EIO if attempting to set the
> > SVE regs for an unsupported VL, instead of misparsing the regset data.
> >
> > * Replace some in-kernel open-coded arithmetic with ALIGN()/
> > DIV_ROUND_UP().
> > ---

[...]

> > diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> > index d1ff83d..1915ab0 100644
> > --- a/arch/arm64/include/uapi/asm/ptrace.h
> > +++ b/arch/arm64/include/uapi/asm/ptrace.h
> > @@ -22,6 +22,7 @@
> >  #include <linux/types.h>
> >
> >  #include <asm/hwcap.h>
> > +#include <asm/sigcontext.h>
> >
> >
> >  /*
> > @@ -63,6 +64,8 @@
> >
> >  #ifndef __ASSEMBLY__
> >
> > +#include <linux/prctl.h>
> > +
> >  /*
> >   * User structures for general purpose, floating point and debug registers.
> >   */
> > @@ -90,6 +93,138 @@ struct user_hwdebug_state {
> >  	}		dbg_regs[16];
> >  };
> >
> > +/* SVE/FP/SIMD state (NT_ARM_SVE) */
> > +
> > +struct user_sve_header {
> > +	__u32 size; /* total meaningful regset content in bytes */
> > +	__u32 max_size; /* maxmium possible size for this thread */
> > +	__u16 vl; /* current vector length */
> > +	__u16 max_vl; /* maximum possible vector length */
> > +	__u16 flags;
> > +	__u16 __reserved;
> > +};
> > +
> > +/* Definitions for user_sve_header.flags: */
> > +#define SVE_PT_REGS_MASK		(1 << 0)
> > +
> > +/* Flags: must be kept in sync with prctl interface in
> > <linux/ptrace.h> */
> 
> Which flags? We base some on PR_foo flags but we seem to shift them

All the prctl flags that have equivalents here, because they're part of
the internal API to sve_set_vector_length().  It didn't quite seem
appropriate to document that in a userspace header, but it's probably
better to say something here than not.  I'll improve the comment.

> anyway so where is the requirement for them to match from?

There is a bug here though: sve_set() in ptrace.c is supposed to shift
the flags from header.flags (which is a u16) back into the
PR_SVE_SET_VL position (<< 16) for the flags argument of
sve_set_vector_length().  But this isn't done, so attempting to set (or
restore) those flags through ptrace may resulting EINVALs from
sve_set_vector_length().

I'll write a test for this case and implement a fix, something like...

-8<-

 static int sve_set(struct task_struct *target,
[...]
 	ret = sve_set_vector_length(target, header.vl,
-				    header.flags & ~SVE_PT_REGS_MASK);
+				    (header.flags & ~SVE_PT_REGS_MASK) << 16UL);


->8-

What do you think?

[...]

> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

Again, I'll wait for your feedback first.

Cheers
---Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support
  2017-09-14 12:57   ` Alex Bennée
  2017-09-28 14:57     ` Dave Martin
@ 2017-09-29 12:46     ` Dave Martin
  1 sibling, 0 replies; 21+ messages in thread
From: Dave Martin @ 2017-09-29 12:46 UTC (permalink / raw)
  To: Alex Bennée
  Cc: linux-arch, libc-alpha, gdb, Ard Biesheuvel, Szabolcs Nagy,
	Catalin Marinas, Yao Qi, Alan Hayward, Will Deacon,
	Oleg Nesterov, Alexander Viro, Richard Sandiford, kvmarm,
	linux-arm-kernel

On Thu, Sep 14, 2017 at 01:57:08PM +0100, Alex Bennée wrote:
> 
> Dave Martin <Dave.Martin@arm.com> writes:
> 
> > This patch defines and implements a new regset NT_ARM_SVE, which
> > describes a thread's SVE register state.  This allows a debugger to
> > manipulate the SVE state, as well as being included in ELF
> > coredumps for post-mortem debugging.
> >
> > Because the regset size and layout are dependent on the thread's
> > current vector length, it is not possible to define a C struct to
> > describe the regset contents as is done for existing regsets.
> > Instead, and for the same reasons, NT_ARM_SVE is based on the
> > freeform variable-layout approach used for the SVE signal frame.
> >
> > Additionally, to reduce debug overhead when debugging threads that
> > might or might not have live SVE register state, NT_ARM_SVE may be
> > presented in one of two different formats: the old struct
> > user_fpsimd_state format is embedded for describing the state of a
> > thread with no live SVE state, whereas a new variable-layout
> > structure is embedded for describing live SVE state.  This avoids a
> > debugger needing to poll NT_PRFPREG in addition to NT_ARM_SVE, and
> > allows existing userspace code to handle the non-SVE case without
> > too much modification.
> >
> > For this to work, NT_ARM_SVE is defined with a fixed-format header
> > of type struct user_sve_header, which the recipient can use to
> > figure out the content, size and layout of the reset of the regset.
> > Accessor macros are defined to allow the vector-length-dependent
> > parts of the regset to be manipulated.
> >
> > Signed-off-by: Alan Hayward <alan.hayward@arm.com>
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > Cc: Alex Bennée <alex.bennee@linaro.org>
> >
> > ---
> >
> > Changes since v1
> > ----------------
> >
> > Other changes related to Alex Bennée's comments:
> >
> > * Migrate to SVE_VQ_BYTES instead of magic numbers.
> >
> > Requested by Alex Bennée:
> >
> > * Thin out BUG_ON()s:
> > Redundant BUG_ON()s and ones that just check invariants are removed.
> > Important sanity-checks are migrated to WARN_ON()s, with some
> > minimal best-effort patch-up code.
> >
> > Other:
> >
> > * [ABI fix] Bail out with -EIO if attempting to set the
> > SVE regs for an unsupported VL, instead of misparsing the regset data.
> >
> > * Replace some in-kernel open-coded arithmetic with ALIGN()/
> > DIV_ROUND_UP().
> > ---

[...]

> > diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h

[...]

> > +/* Definitions for user_sve_header.flags: */
> > +#define SVE_PT_REGS_MASK		(1 << 0)
> > +
> > +/* Flags: must be kept in sync with prctl interface in
> > <linux/ptrace.h> */
> 
> Which flags? We base some on PR_foo flags but we seem to shift them
> anyway so where is the requirement for them to match from?

I've rearranged this as:

-8<-

/* Definitions for user_sve_header.flags: */
#define SVE_PT_REGS_MASK		(1 << 0)

#define SVE_PT_REGS_FPSIMD		0
#define SVE_PT_REGS_SVE			SVE_PT_REGS_MASK

/*
 * Common SVE_PT_* flags:
 * These must be kept in sync with prctl interface in <linux/ptrace.h>
 */
#define SVE_PT_VL_INHERIT		(PR_SVE_VL_INHERIT >> 16)
#define SVE_PT_VL_ONEXEC		(PR_SVE_SET_VL_ONEXEC >> 16)

->8-

This avoids the suggestion that SVE_PT_REGS_{MASK,FPSIMD,SVE} are
supposed to have prctl counterparts.

I don't really want to write more, in case it is misinterpreted as
specification of behaviour.

This comment is really only meant as a reminder to maintainers that
they should go look at prctl.h too, before blindly making changes
here.


Any good?  If you have a different suggestion, I'm all ears...

[...]

Cheers
---Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-09-13 22:11       ` Catalin Marinas
@ 2017-10-05 16:42         ` Dave Martin
  2017-10-05 16:53           ` Catalin Marinas
  0 siblings, 1 reply; 21+ messages in thread
From: Dave Martin @ 2017-10-05 16:42 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, libc-alpha, Ard Biesheuvel, Szabolcs Nagy, gdb,
	Yao Qi, Alan Hayward, Will Deacon, Richard Sandiford,
	Alex Bennée, kvmarm, linux-arm-kernel

On Wed, Sep 13, 2017 at 03:11:23PM -0700, Catalin Marinas wrote:
> On Wed, Sep 13, 2017 at 08:06:12PM +0100, Dave P Martin wrote:
> > On Wed, Sep 13, 2017 at 10:29:11AM -0700, Catalin Marinas wrote:
> > > On Thu, Aug 31, 2017 at 06:00:46PM +0100, Dave P Martin wrote:
> > > > This patch implements the core logic for changing a task's vector
> > > > length on request from userspace.  This will be used by the ptrace
> > > > and prctl frontends that are implemented in later patches.
> > > > 
> > > > The SVE architecture permits, but does not require, implementations
> > > > to support vector lengths that are not a power of two.  To handle
> > > > this, logic is added to check a requested vector length against a
> > > > possibly sparse bitmap of available vector lengths at runtime, so
> > > > that the best supported value can be chosen.
> > > > 
> > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > > > Cc: Alex Bennée <alex.bennee@linaro.org>
> > > 
> > > Can this be merged with patch 20? It seems to add the PR_ definitions
> > > which get actually used later when the prctl interface is added.
> > 
> > This patch is used both by patch 19 and by patch 20, which I preferred
> > not to merge with each other: ptrace and prctl are significantly
> > different things.
> > 
> > The prctl bit definitions are added here because they are the canonical
> > definitions used by both interfaces.  The ptrace #defines are based on
> > them.
> > 
> > Does it make sense if I merge patch 20 into this one and apply patch 19
> > on top?  This avoide the appearance of prctl #defines with no prctl
> > implementation.
> 
> That's fine, you can bring patch 20 forward. If there are other
> non-trivial issues, feel free to ignore my comment.

I've had a go at this, but I think it's going to be more trouble than
it's worth -- there are other interdependencies between the patches
which make them tricky to reorder.

I could add a note in the commit message for this patch explaining why
the prctl flag #defines are being added here.  What do you think?

Cheers
---Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-10-05 16:42         ` Dave Martin
@ 2017-10-05 16:53           ` Catalin Marinas
  2017-10-05 17:04             ` Dave Martin
  0 siblings, 1 reply; 21+ messages in thread
From: Catalin Marinas @ 2017-10-05 16:53 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-arch, libc-alpha, Ard Biesheuvel, Szabolcs Nagy, gdb,
	Yao Qi, Will Deacon, Richard Sandiford, Alan Hayward,
	Alex Bennée, kvmarm, linux-arm-kernel

On Thu, Oct 05, 2017 at 05:42:29PM +0100, Dave P Martin wrote:
> On Wed, Sep 13, 2017 at 03:11:23PM -0700, Catalin Marinas wrote:
> > On Wed, Sep 13, 2017 at 08:06:12PM +0100, Dave P Martin wrote:
> > > On Wed, Sep 13, 2017 at 10:29:11AM -0700, Catalin Marinas wrote:
> > > > On Thu, Aug 31, 2017 at 06:00:46PM +0100, Dave P Martin wrote:
> > > > > This patch implements the core logic for changing a task's vector
> > > > > length on request from userspace.  This will be used by the ptrace
> > > > > and prctl frontends that are implemented in later patches.
> > > > > 
> > > > > The SVE architecture permits, but does not require, implementations
> > > > > to support vector lengths that are not a power of two.  To handle
> > > > > this, logic is added to check a requested vector length against a
> > > > > possibly sparse bitmap of available vector lengths at runtime, so
> > > > > that the best supported value can be chosen.
> > > > > 
> > > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > > > > Cc: Alex Bennée <alex.bennee@linaro.org>
> > > > 
> > > > Can this be merged with patch 20? It seems to add the PR_ definitions
> > > > which get actually used later when the prctl interface is added.
> > > 
> > > This patch is used both by patch 19 and by patch 20, which I preferred
> > > not to merge with each other: ptrace and prctl are significantly
> > > different things.
> > > 
> > > The prctl bit definitions are added here because they are the canonical
> > > definitions used by both interfaces.  The ptrace #defines are based on
> > > them.
> > > 
> > > Does it make sense if I merge patch 20 into this one and apply patch 19
> > > on top?  This avoide the appearance of prctl #defines with no prctl
> > > implementation.
> > 
> > That's fine, you can bring patch 20 forward. If there are other
> > non-trivial issues, feel free to ignore my comment.
> 
> I've had a go at this, but I think it's going to be more trouble than
> it's worth -- there are other interdependencies between the patches
> which make them tricky to reorder.
> 
> I could add a note in the commit message for this patch explaining why
> the prctl flag #defines are being added here.  What do you think?

As I said, it's up to you. A line in the commit message would do.

-- 
Catalin


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length
  2017-10-05 16:53           ` Catalin Marinas
@ 2017-10-05 17:04             ` Dave Martin
  0 siblings, 0 replies; 21+ messages in thread
From: Dave Martin @ 2017-10-05 17:04 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, libc-alpha, Ard Biesheuvel, Szabolcs Nagy, gdb,
	Yao Qi, Alan Hayward, Will Deacon, Richard Sandiford,
	Alex Bennée, kvmarm, linux-arm-kernel

On Thu, Oct 05, 2017 at 05:53:34PM +0100, Catalin Marinas wrote:
> On Thu, Oct 05, 2017 at 05:42:29PM +0100, Dave P Martin wrote:
> > On Wed, Sep 13, 2017 at 03:11:23PM -0700, Catalin Marinas wrote:
> > > On Wed, Sep 13, 2017 at 08:06:12PM +0100, Dave P Martin wrote:
> > > > On Wed, Sep 13, 2017 at 10:29:11AM -0700, Catalin Marinas wrote:

[...]

> > > > > Can this be merged with patch 20? It seems to add the PR_ definitions
> > > > > which get actually used later when the prctl interface is added.
> > > > 
> > > > This patch is used both by patch 19 and by patch 20, which I preferred
> > > > not to merge with each other: ptrace and prctl are significantly
> > > > different things.
> > > > 
> > > > The prctl bit definitions are added here because they are the canonical
> > > > definitions used by both interfaces.  The ptrace #defines are based on
> > > > them.
> > > > 
> > > > Does it make sense if I merge patch 20 into this one and apply patch 19
> > > > on top?  This avoide the appearance of prctl #defines with no prctl
> > > > implementation.
> > > 
> > > That's fine, you can bring patch 20 forward. If there are other
> > > non-trivial issues, feel free to ignore my comment.
> > 
> > I've had a go at this, but I think it's going to be more trouble than
> > it's worth -- there are other interdependencies between the patches
> > which make them tricky to reorder.
> > 
> > I could add a note in the commit message for this patch explaining why
> > the prctl flag #defines are being added here.  What do you think?
> 
> As I said, it's up to you. A line in the commit message would do.

OK, I think I'll stick with this then.

Cheers
---Dave


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2017-10-05 17:04 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1504198860-12951-1-git-send-email-Dave.Martin@arm.com>
2017-08-31 17:01 ` [PATCH v2 09/28] arm64/sve: Signal frame and context structure definition Dave Martin
2017-09-13 13:36   ` Catalin Marinas
2017-09-13 21:33     ` Dave Martin
2017-08-31 17:02 ` [PATCH v2 19/28] arm64/sve: ptrace and ELF coredump support Dave Martin
2017-09-06 16:22   ` Okamoto, Takayuki
     [not found]     ` <20170906181634.GF6321@e103592.cambridge.arm.com>
2017-09-07  5:11       ` Okamoto, Takayuki
2017-09-08 13:11         ` Dave Martin
2017-09-14 12:57   ` Alex Bennée
2017-09-28 14:57     ` Dave Martin
2017-09-29 12:46     ` Dave Martin
2017-08-31 17:09 ` [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length Dave Martin
2017-09-13 17:29   ` Catalin Marinas
2017-09-13 19:06     ` Dave Martin
2017-09-13 22:11       ` Catalin Marinas
2017-10-05 16:42         ` Dave Martin
2017-10-05 16:53           ` Catalin Marinas
2017-10-05 17:04             ` Dave Martin
2017-09-20 11:00   ` Alan Hayward
     [not found]     ` <20170920110902.GG24231@e103592.cambridge.arm.com>
2017-09-20 18:08       ` Alan Hayward
2017-09-21 11:19         ` Dave Martin
2017-09-21 11:57           ` Alan Hayward

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox