Merge tag 'JH7110_515_SDK_v4.8.2' into jh7110-mm-devel

version JH7110_515_SDK_v4.8.2 for JH7110 EVB board
2023-04-26 23:23:22 +08:00
parent d672e1510f c4695ba8d8
commit ed636e23a0
3 changed files with 316 additions and 173 deletions
@@ -1,27 +1,49 @@
-From 15850d406a9807d70e752aacfbb456946a01f6ac Mon Sep 17 00:00:00 2001
+From 666e593642136fad34be8eef1fcf1e872830013c Mon Sep 17 00:00:00 2001
 From: Mason Huo <mason.huo@starfivetech.com>
-Date: Tue, 21 Mar 2023 14:36:21 +0800
+Date: Mon, 17 Apr 2023 13:41:06 +0800
 Subject: [PATCH] Optimize memory operations for RISCV

-Port the memcpy & memset implementation from
-Linux kerenl to glibc to improve performance of
-memory operations in user space.
---
- sysdeps/riscv/memcpy.S | 120 +++++++++++++++++++++++++++++++++++++++++
- sysdeps/riscv/memset.S | 120 +++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 240 insertions(+)
- create mode 100644 sysdeps/riscv/memcpy.S
- create mode 100644 sysdeps/riscv/memset.S
+Port the linux kernel memcpy function for optimizing
+the 128 byte align case, this will improve the
+performance of large block memcpy.

-diff --git a/sysdeps/riscv/memcpy.S b/sysdeps/riscv/memcpy.S
+Here we combine the memcpy of glibc and kernel.
+
+Signed-off-by: Mason Huo <mason.huo@starfivetech.com>
+---
+ sysdeps/riscv/Makefile         |   4 +
+ sysdeps/riscv/memcpy.c         | 265 +++++++++++++++++++++++++++++++++
+ sysdeps/riscv/memcpy_aligned.S |  82 ++++++++++
+ 3 files changed, 351 insertions(+)
+ create mode 100644 sysdeps/riscv/memcpy.c
+ create mode 100644 sysdeps/riscv/memcpy_aligned.S
+
+diff --git a/sysdeps/riscv/Makefile b/sysdeps/riscv/Makefile
+index 20a99681..5c3c3244 100644
+--- a/sysdeps/riscv/Makefile
+++ b/sysdeps/riscv/Makefile
+@@ -2,6 +2,10 @@ ifeq ($(subdir),misc)
+ sysdep_headers += sys/asm.h
+ endif
+ 
+ifeq ($(subdir),string)
+sysdep_routines += memcpy_aligned
+endif
+
+ # RISC-V's assembler also needs to know about PIC as it changes the definition
+ # of some assembler macros.
+ ASFLAGS-.os += $(pic-ccflag)
+diff --git a/sysdeps/riscv/memcpy.c b/sysdeps/riscv/memcpy.c
 new file mode 100644
-index 00000000..aa0eaee9
+index 00000000..1de6141e
 --- /dev/null
-+++ b/sysdeps/riscv/memcpy.S
-@@ -0,0 +1,120 @@
-+/* memcpy for RISC-V.
-+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+++ b/sysdeps/riscv/memcpy.c
+@@ -0,0 +1,265 @@
+/* Copy memory to memory until the specified number of bytes
+   has been copied.  Overlap is NOT handled correctly.
+   Copyright (C) 1991-2020 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund (tege@sics.se).
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
@@ -34,40 +56,272 @@ index 00000000..aa0eaee9
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library.  If not, see
+   License along with the GNU C Library; if not, see
 +   <https://www.gnu.org/licenses/>.  */
 +
+#include <stddef.h>
+#include <string.h>
+
+#define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
+#define OP_T_THRES      16
+#define op_t    unsigned long
+#define OPSIZ   (sizeof(op_t))
+#define OPSIZ_MASK   (sizeof(op_t) - 1)
+#define FAST_COPY_THRES  (128)
+#define byte    unsigned char
+
+static void _wordcopy_fwd_aligned(long dstp, long srcp, size_t len)
+{
+	op_t a0, a1;
+
+	switch (len % 8) {
+	case 2:
+		a0 = ((op_t *) srcp)[0];
+		srcp -= 6 * OPSIZ;
+		dstp -= 7 * OPSIZ;
+		len += 6;
+		goto do1;
+	case 3:
+		a1 = ((op_t *) srcp)[0];
+		srcp -= 5 * OPSIZ;
+		dstp -= 6 * OPSIZ;
+		len += 5;
+		goto do2;
+	case 4:
+		a0 = ((op_t *) srcp)[0];
+		srcp -= 4 * OPSIZ;
+		dstp -= 5 * OPSIZ;
+		len += 4;
+		goto do3;
+	case 5:
+		a1 = ((op_t *) srcp)[0];
+		srcp -= 3 * OPSIZ;
+		dstp -= 4 * OPSIZ;
+		len += 3;
+		goto do4;
+	case 6:
+		a0 = ((op_t *) srcp)[0];
+		srcp -= 2 * OPSIZ;
+		dstp -= 3 * OPSIZ;
+		len += 2;
+		goto do5;
+	case 7:
+		a1 = ((op_t *) srcp)[0];
+		srcp -= 1 * OPSIZ;
+		dstp -= 2 * OPSIZ;
+		len += 1;
+		goto do6;
+
+	case 0:
+		if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+			return;
+		a0 = ((op_t *) srcp)[0];
+		srcp -= 0 * OPSIZ;
+		dstp -= 1 * OPSIZ;
+		goto do7;
+	case 1:
+		a1 = ((op_t *) srcp)[0];
+		srcp -= -1 * OPSIZ;
+		dstp -= 0 * OPSIZ;
+		len -= 1;
+		if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+			goto do0;
+		goto do8;                 /* No-op.  */
+	}
+
+	do {
+do8:
+		a0 = ((op_t *) srcp)[0];
+		((op_t *) dstp)[0] = a1;
+do7:
+		a1 = ((op_t *) srcp)[1];
+		((op_t *) dstp)[1] = a0;
+do6:
+		a0 = ((op_t *) srcp)[2];
+		((op_t *) dstp)[2] = a1;
+do5:
+		a1 = ((op_t *) srcp)[3];
+		((op_t *) dstp)[3] = a0;
+do4:
+		a0 = ((op_t *) srcp)[4];
+		((op_t *) dstp)[4] = a1;
+do3:
+		a1 = ((op_t *) srcp)[5];
+		((op_t *) dstp)[5] = a0;
+do2:
+		a0 = ((op_t *) srcp)[6];
+		((op_t *) dstp)[6] = a1;
+do1:
+		a1 = ((op_t *) srcp)[7];
+		((op_t *) dstp)[7] = a0;
+
+		srcp += 8 * OPSIZ;
+		dstp += 8 * OPSIZ;
+		len -= 8;
+	} while (len != 0);
+
+	/* This is the right position for do0.  Please don't move
+	 * it into the loop.
+	 */
+do0:
+	((op_t *) dstp)[0] = a1;
+}
+
+static void _wordcopy_fwd_dest_aligned(long dstp, long srcp, size_t len)
+{
+	op_t a0, a1, a2, a3;
+	int sh_1, sh_2;
+
+	/* Calculate how to shift a word read at the memory operation
+	 * aligned srcp to make it aligned for copy.
+	 */
+
+	sh_1 = 8 * (srcp % OPSIZ);
+	sh_2 = 8 * OPSIZ - sh_1;
+
+	/* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+	 * it points in the middle of.
+	 */
+	srcp &= -OPSIZ;
+
+	switch (len % 4) {
+	case 2:
+		a1 = ((op_t *) srcp)[0];
+		a2 = ((op_t *) srcp)[1];
+		srcp -= 1 * OPSIZ;
+		dstp -= 3 * OPSIZ;
+		len += 2;
+		goto do1;
+	case 3:
+		a0 = ((op_t *) srcp)[0];
+		a1 = ((op_t *) srcp)[1];
+		srcp -= 0 * OPSIZ;
+		dstp -= 2 * OPSIZ;
+		len += 1;
+		goto do2;
+	case 0:
+		if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+			return;
+		a3 = ((op_t *) srcp)[0];
+		a0 = ((op_t *) srcp)[1];
+		srcp -= -1 * OPSIZ;
+		dstp -= 1 * OPSIZ;
+		len += 0;
+		goto do3;
+	case 1:
+		a2 = ((op_t *) srcp)[0];
+		a3 = ((op_t *) srcp)[1];
+		srcp -= -2 * OPSIZ;
+		dstp -= 0 * OPSIZ;
+		len -= 1;
+		if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+			goto do0;
+		goto do4;                 /* No-op.  */
+	}
+
+	do {
+do4:
+		a0 = ((op_t *) srcp)[0];
+		((op_t *) dstp)[0] = MERGE(a2, sh_1, a3, sh_2);
+do3:
+		a1 = ((op_t *) srcp)[1];
+		((op_t *) dstp)[1] = MERGE(a3, sh_1, a0, sh_2);
+do2:
+		a2 = ((op_t *) srcp)[2];
+		((op_t *) dstp)[2] = MERGE(a0, sh_1, a1, sh_2);
+do1:
+		a3 = ((op_t *) srcp)[3];
+		((op_t *) dstp)[3] = MERGE(a1, sh_1, a2, sh_2);
+
+		srcp += 4 * OPSIZ;
+		dstp += 4 * OPSIZ;
+		len -= 4;
+	} while (len != 0);
+
+	/* This is the right position for do0.  Please don't move
+	 * it into the loop.
+	 */
+do0:
+	((op_t *) dstp)[0] = MERGE(a2, sh_1, a3, sh_2);
+}
+
+#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)		\
+do {							\
+	size_t __nbytes = (nbytes);			\
+	while (__nbytes > 0) {						\
+		byte __x = ((byte *) src_bp)[0];		\
+		src_bp += 1;				\
+		__nbytes -= 1;				\
+		((byte *) dst_bp)[0] = __x;		\
+		dst_bp += 1;				\
+	}						\
+} while (0)
+
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)			\
+do {										\
+	if (src_bp % OPSIZ == 0)						\
+		_wordcopy_fwd_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);	\
+	else									\
+		_wordcopy_fwd_dest_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);	\
+	src_bp += (nbytes) & -OPSIZ;						\
+	dst_bp += (nbytes) & -OPSIZ;						\
+	(nbytes_left) = (nbytes) % OPSIZ;					\
+} while (0)
+
+extern void *__memcpy_aligned(void *dest, const void *src, size_t len);
+void *__memcpy(void *dest, const void *src, size_t len)
+{
+	unsigned long dstp = (long) dest;
+	unsigned long srcp = (long) src;
+
+	/* If there not too few bytes to copy, use word copy.  */
+	if (len >= OP_T_THRES) {
+		if ((len >= FAST_COPY_THRES) && ((dstp & OPSIZ_MASK) == 0) &&
+			((srcp & OPSIZ_MASK) == 0)) {
+			__memcpy_aligned(dest, src, len);
+			return dest;
+		}
+		/* Copy just a few bytes to make DSTP aligned.  */
+		len -= (-dstp) % OPSIZ;
+		BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
+
+		/* Copy from SRCP to DSTP taking advantage of the known alignment of
+		 * DSTP.  Number of bytes remaining is put in the third argument,
+		 * i.e. in LEN.  This number may vary from machine to machine.
+		 */
+		WORD_COPY_FWD(dstp, srcp, len, len);
+	/* Fall out and copy the tail.  */
+	}
+
+	/* There are just a few bytes to copy.  Use byte memory operations.  */
+	BYTE_COPY_FWD(dstp, srcp, len);
+
+	return dest;
+}
+
+#ifdef weak_alias
+weak_alias (__memcpy, memcpy)
+#endif
+
+libc_hidden_builtin_def (memcpy)
+diff --git a/sysdeps/riscv/memcpy_aligned.S b/sysdeps/riscv/memcpy_aligned.S
+new file mode 100644
+index 00000000..b9b01e35
+--- /dev/null
+++ b/sysdeps/riscv/memcpy_aligned.S
+@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 Regents of the University of California
+ */
+
 +#include <sysdep.h>
 +#include <sys/asm.h>
 +
-+ENTRY(memcpy)
+/* void *__memcpy_aligned(void *, const void *, size_t) */
+ENTRY(__memcpy_aligned)
 +	move t6, a0  /* Preserve return value */
 +
-+	/* Defer to byte-oriented copy for small sizes */
-+	sltiu a3, a2, 128
-+	bnez a3, 4f
-+	/* Use word-oriented copy only if low-order bits match */
-+	andi a3, t6, SZREG-1
-+	andi a4, a1, SZREG-1
-+	bne a3, a4, 4f
-+
-+	beqz a3, 2f  /* Skip if already aligned */
-+	/*
-+	 * Round to nearest double word-aligned address
-+	 * greater than or equal to start address
-+	 */
-+	andi a3, a1, ~(SZREG-1)
-+	addi a3, a3, SZREG
-+	/* Handle initial misalignment */
-+	sub a4, a3, a1
-+1:
-+	lb a5, 0(a1)
-+	addi a1, a1, 1
-+	sb a5, 0(t6)
-+	addi t6, t6, 1
-+	bltu a1, a3, 1b
-+	sub a2, a2, a4  /* Update count */
-+
 +2:
 +	andi a4, a2, ~((16*SZREG)-1)
 +	beqz a4, 4f
@@ -137,134 +391,7 @@ index 00000000..aa0eaee9
 +	bltu a1, a3, 5b
 +6:
 +	ret
-+END(memcpy)
-+libc_hidden_builtin_def (memcpy)
-diff --git a/sysdeps/riscv/memset.S b/sysdeps/riscv/memset.S
-new file mode 100644
-index 00000000..431a19f5
--- /dev/null
-+++ b/sysdeps/riscv/memset.S
-@@ -0,0 +1,120 @@
-+/* memset for RISC-V.
-+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
-+   This file is part of the GNU C Library.
-+
-+   The GNU C Library is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU Lesser General Public
-+   License as published by the Free Software Foundation; either
-+   version 2.1 of the License, or (at your option) any later version.
-+
-+   The GNU C Library is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   Lesser General Public License for more details.
-+
-+   You should have received a copy of the GNU Lesser General Public
-+   License along with the GNU C Library.  If not, see
-+   <https://www.gnu.org/licenses/>.  */
-+
-+#include <sysdep.h>
-+#include <sys/asm.h>
-+
-+ENTRY(memset)
-+	move t0, a0  /* Preserve return value */
-+
-+	/* Defer to byte-oriented fill for small sizes */
-+	sltiu a3, a2, 16
-+	bnez a3, 4f
-+
-+	/*
-+	 * Round to nearest XLEN-aligned address
-+	 * greater than or equal to start address
-+	 */
-+	addi a3, t0, SZREG-1
-+	andi a3, a3, ~(SZREG-1)
-+	beq a3, t0, 2f  /* Skip if already aligned */
-+	/* Handle initial misalignment */
-+	sub a4, a3, t0
-+1:
-+	sb a1, 0(t0)
-+	addi t0, t0, 1
-+	bltu t0, a3, 1b
-+	sub a2, a2, a4  /* Update count */
-+
-+2: /* Duff's device with 32 XLEN stores per iteration */
-+	/* Broadcast value into all bytes */
-+	andi a1, a1, 0xff
-+	slli a3, a1, 8
-+	or a1, a3, a1
-+	slli a3, a1, 16
-+	or a1, a3, a1
-+	slli a3, a1, 32
-+	or a1, a3, a1
-+
-+	/* Calculate end address */
-+	andi a4, a2, ~(SZREG-1)
-+	add a3, t0, a4
-+
-+	andi a4, a4, 31*SZREG  /* Calculate remainder */
-+	beqz a4, 3f            /* Shortcut if no remainder */
-+	neg a4, a4
-+	addi a4, a4, 32*SZREG  /* Calculate initial offset */
-+
-+	/* Adjust start address with offset */
-+	sub t0, t0, a4
-+
-+	/* Jump into loop body */
-+	/* Assumes 32-bit instruction lengths */
-+	la a5, 3f
-+	srli a4, a4, 1
-+	add a5, a5, a4
-+	jr a5
-+3:
-+	REG_S a1,        0(t0)
-+	REG_S a1,    SZREG(t0)
-+	REG_S a1,  2*SZREG(t0)
-+	REG_S a1,  3*SZREG(t0)
-+	REG_S a1,  4*SZREG(t0)
-+	REG_S a1,  5*SZREG(t0)
-+	REG_S a1,  6*SZREG(t0)
-+	REG_S a1,  7*SZREG(t0)
-+	REG_S a1,  8*SZREG(t0)
-+	REG_S a1,  9*SZREG(t0)
-+	REG_S a1, 10*SZREG(t0)
-+	REG_S a1, 11*SZREG(t0)
-+	REG_S a1, 12*SZREG(t0)
-+	REG_S a1, 13*SZREG(t0)
-+	REG_S a1, 14*SZREG(t0)
-+	REG_S a1, 15*SZREG(t0)
-+	REG_S a1, 16*SZREG(t0)
-+	REG_S a1, 17*SZREG(t0)
-+	REG_S a1, 18*SZREG(t0)
-+	REG_S a1, 19*SZREG(t0)
-+	REG_S a1, 20*SZREG(t0)
-+	REG_S a1, 21*SZREG(t0)
-+	REG_S a1, 22*SZREG(t0)
-+	REG_S a1, 23*SZREG(t0)
-+	REG_S a1, 24*SZREG(t0)
-+	REG_S a1, 25*SZREG(t0)
-+	REG_S a1, 26*SZREG(t0)
-+	REG_S a1, 27*SZREG(t0)
-+	REG_S a1, 28*SZREG(t0)
-+	REG_S a1, 29*SZREG(t0)
-+	REG_S a1, 30*SZREG(t0)
-+	REG_S a1, 31*SZREG(t0)
-+	addi t0, t0, 32*SZREG
-+	bltu t0, a3, 3b
-+	andi a2, a2, SZREG-1  /* Update count */
-+
-+4:
-+	/* Handle trailing misalignment */
-+	beqz a2, 6f
-+	add a3, t0, a2
-+5:
-+	sb a1, 0(t0)
-+	addi t0, t0, 1
-+	bltu t0, a3, 5b
-+6:
-+	ret
-+END(memset)
-+libc_hidden_builtin_def (memset)
+END(__memcpy_aligned)
 -- 
 2.39.2

@@ -0,0 +1,11 @@
+--- ifplugd-0.28.orig/conf/ifplugd.conf	2003-12-12 01:58:43.000000000 +0800
+++ ifplugd-0.28/conf/ifplugd.conf	2023-04-24 17:43:48.342070388 +0800
+@@ -34,7 +34,7 @@ INTERFACES="eth0 eth1"
+
+ # Additional parameters for ifplugd.
+ # (Run "ifplugd -h" for further information.)
+-ARGS="-fwI -u0 -d10"
+ARGS="-fwI -u0 -d1"
+
+ # Additional parameters for ifplugd for the specified interface.  Note
+ # that $ARGS is ignored, when a variable like this is set for an
@@ -16,7 +16,12 @@ case "$1" in
 		fi
 		;;
 	stop)
-		swapoff PARTLABEL="hibernation"
+		FILE=/dev/disk/by-partlabel/hibernation
+                if [ -b "$FILE" ];then
+			swapoff PARTLABEL="hibernation"
+		else
+			echo "No PARTLABEL=hibernation!"
+		fi
 		;;
 	*)
 		echo "Usage: $0 {start|stop}"