From 013b3a25e8f6edfe68cde6720e581a1ca77b4be7 Mon Sep 17 00:00:00 2001 From: Mason Huo Date: Mon, 17 Apr 2023 14:53:22 +0800 Subject: [PATCH 1/3] package: glibc: Add optimization memory copy with aligned version Port the linux kernel memcpy function for optimizing the 128 byte align case, this will improve the performance of large block memcpy. Here we combine the memcpy of glibc and kernel. Signed-off-by: Mason Huo --- ...Optimize-memory-operations-for-RISCV.patch | 471 +++++++++++------- 1 file changed, 299 insertions(+), 172 deletions(-) diff --git a/package/glibc/2.32-50-g737efa27fca5c97f566a2005687fda7d6659cd2e/0001-Optimize-memory-operations-for-RISCV.patch b/package/glibc/2.32-50-g737efa27fca5c97f566a2005687fda7d6659cd2e/0001-Optimize-memory-operations-for-RISCV.patch index c9053382..d3ab989f 100644 --- a/package/glibc/2.32-50-g737efa27fca5c97f566a2005687fda7d6659cd2e/0001-Optimize-memory-operations-for-RISCV.patch +++ b/package/glibc/2.32-50-g737efa27fca5c97f566a2005687fda7d6659cd2e/0001-Optimize-memory-operations-for-RISCV.patch @@ -1,27 +1,49 @@ -From 15850d406a9807d70e752aacfbb456946a01f6ac Mon Sep 17 00:00:00 2001 +From 666e593642136fad34be8eef1fcf1e872830013c Mon Sep 17 00:00:00 2001 From: Mason Huo -Date: Tue, 21 Mar 2023 14:36:21 +0800 +Date: Mon, 17 Apr 2023 13:41:06 +0800 Subject: [PATCH] Optimize memory operations for RISCV -Port the memcpy & memset implementation from -Linux kerenl to glibc to improve performance of -memory operations in user space. ---- - sysdeps/riscv/memcpy.S | 120 +++++++++++++++++++++++++++++++++++++++++ - sysdeps/riscv/memset.S | 120 +++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 240 insertions(+) - create mode 100644 sysdeps/riscv/memcpy.S - create mode 100644 sysdeps/riscv/memset.S +Port the linux kernel memcpy function for optimizing +the 128 byte align case, this will improve the +performance of large block memcpy. -diff --git a/sysdeps/riscv/memcpy.S b/sysdeps/riscv/memcpy.S +Here we combine the memcpy of glibc and kernel. + +Signed-off-by: Mason Huo +--- + sysdeps/riscv/Makefile | 4 + + sysdeps/riscv/memcpy.c | 265 +++++++++++++++++++++++++++++++++ + sysdeps/riscv/memcpy_aligned.S | 82 ++++++++++ + 3 files changed, 351 insertions(+) + create mode 100644 sysdeps/riscv/memcpy.c + create mode 100644 sysdeps/riscv/memcpy_aligned.S + +diff --git a/sysdeps/riscv/Makefile b/sysdeps/riscv/Makefile +index 20a99681..5c3c3244 100644 +--- a/sysdeps/riscv/Makefile ++++ b/sysdeps/riscv/Makefile +@@ -2,6 +2,10 @@ ifeq ($(subdir),misc) + sysdep_headers += sys/asm.h + endif + ++ifeq ($(subdir),string) ++sysdep_routines += memcpy_aligned ++endif ++ + # RISC-V's assembler also needs to know about PIC as it changes the definition + # of some assembler macros. + ASFLAGS-.os += $(pic-ccflag) +diff --git a/sysdeps/riscv/memcpy.c b/sysdeps/riscv/memcpy.c new file mode 100644 -index 00000000..aa0eaee9 +index 00000000..1de6141e --- /dev/null -+++ b/sysdeps/riscv/memcpy.S -@@ -0,0 +1,120 @@ -+/* memcpy for RISC-V. -+ Copyright (C) 1996-2020 Free Software Foundation, Inc. ++++ b/sysdeps/riscv/memcpy.c +@@ -0,0 +1,265 @@ ++/* Copy memory to memory until the specified number of bytes ++ has been copied. Overlap is NOT handled correctly. ++ Copyright (C) 1991-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. ++ Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public @@ -34,40 +56,272 @@ index 00000000..aa0eaee9 + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see ++ License along with the GNU C Library; if not, see + . */ + ++#include ++#include ++ ++#define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) ++#define OP_T_THRES 16 ++#define op_t unsigned long ++#define OPSIZ (sizeof(op_t)) ++#define OPSIZ_MASK (sizeof(op_t) - 1) ++#define FAST_COPY_THRES (128) ++#define byte unsigned char ++ ++static void _wordcopy_fwd_aligned(long dstp, long srcp, size_t len) ++{ ++ op_t a0, a1; ++ ++ switch (len % 8) { ++ case 2: ++ a0 = ((op_t *) srcp)[0]; ++ srcp -= 6 * OPSIZ; ++ dstp -= 7 * OPSIZ; ++ len += 6; ++ goto do1; ++ case 3: ++ a1 = ((op_t *) srcp)[0]; ++ srcp -= 5 * OPSIZ; ++ dstp -= 6 * OPSIZ; ++ len += 5; ++ goto do2; ++ case 4: ++ a0 = ((op_t *) srcp)[0]; ++ srcp -= 4 * OPSIZ; ++ dstp -= 5 * OPSIZ; ++ len += 4; ++ goto do3; ++ case 5: ++ a1 = ((op_t *) srcp)[0]; ++ srcp -= 3 * OPSIZ; ++ dstp -= 4 * OPSIZ; ++ len += 3; ++ goto do4; ++ case 6: ++ a0 = ((op_t *) srcp)[0]; ++ srcp -= 2 * OPSIZ; ++ dstp -= 3 * OPSIZ; ++ len += 2; ++ goto do5; ++ case 7: ++ a1 = ((op_t *) srcp)[0]; ++ srcp -= 1 * OPSIZ; ++ dstp -= 2 * OPSIZ; ++ len += 1; ++ goto do6; ++ ++ case 0: ++ if (OP_T_THRES <= 3 * OPSIZ && len == 0) ++ return; ++ a0 = ((op_t *) srcp)[0]; ++ srcp -= 0 * OPSIZ; ++ dstp -= 1 * OPSIZ; ++ goto do7; ++ case 1: ++ a1 = ((op_t *) srcp)[0]; ++ srcp -= -1 * OPSIZ; ++ dstp -= 0 * OPSIZ; ++ len -= 1; ++ if (OP_T_THRES <= 3 * OPSIZ && len == 0) ++ goto do0; ++ goto do8; /* No-op. */ ++ } ++ ++ do { ++do8: ++ a0 = ((op_t *) srcp)[0]; ++ ((op_t *) dstp)[0] = a1; ++do7: ++ a1 = ((op_t *) srcp)[1]; ++ ((op_t *) dstp)[1] = a0; ++do6: ++ a0 = ((op_t *) srcp)[2]; ++ ((op_t *) dstp)[2] = a1; ++do5: ++ a1 = ((op_t *) srcp)[3]; ++ ((op_t *) dstp)[3] = a0; ++do4: ++ a0 = ((op_t *) srcp)[4]; ++ ((op_t *) dstp)[4] = a1; ++do3: ++ a1 = ((op_t *) srcp)[5]; ++ ((op_t *) dstp)[5] = a0; ++do2: ++ a0 = ((op_t *) srcp)[6]; ++ ((op_t *) dstp)[6] = a1; ++do1: ++ a1 = ((op_t *) srcp)[7]; ++ ((op_t *) dstp)[7] = a0; ++ ++ srcp += 8 * OPSIZ; ++ dstp += 8 * OPSIZ; ++ len -= 8; ++ } while (len != 0); ++ ++ /* This is the right position for do0. Please don't move ++ * it into the loop. ++ */ ++do0: ++ ((op_t *) dstp)[0] = a1; ++} ++ ++static void _wordcopy_fwd_dest_aligned(long dstp, long srcp, size_t len) ++{ ++ op_t a0, a1, a2, a3; ++ int sh_1, sh_2; ++ ++ /* Calculate how to shift a word read at the memory operation ++ * aligned srcp to make it aligned for copy. ++ */ ++ ++ sh_1 = 8 * (srcp % OPSIZ); ++ sh_2 = 8 * OPSIZ - sh_1; ++ ++ /* Make SRCP aligned by rounding it down to the beginning of the `op_t' ++ * it points in the middle of. ++ */ ++ srcp &= -OPSIZ; ++ ++ switch (len % 4) { ++ case 2: ++ a1 = ((op_t *) srcp)[0]; ++ a2 = ((op_t *) srcp)[1]; ++ srcp -= 1 * OPSIZ; ++ dstp -= 3 * OPSIZ; ++ len += 2; ++ goto do1; ++ case 3: ++ a0 = ((op_t *) srcp)[0]; ++ a1 = ((op_t *) srcp)[1]; ++ srcp -= 0 * OPSIZ; ++ dstp -= 2 * OPSIZ; ++ len += 1; ++ goto do2; ++ case 0: ++ if (OP_T_THRES <= 3 * OPSIZ && len == 0) ++ return; ++ a3 = ((op_t *) srcp)[0]; ++ a0 = ((op_t *) srcp)[1]; ++ srcp -= -1 * OPSIZ; ++ dstp -= 1 * OPSIZ; ++ len += 0; ++ goto do3; ++ case 1: ++ a2 = ((op_t *) srcp)[0]; ++ a3 = ((op_t *) srcp)[1]; ++ srcp -= -2 * OPSIZ; ++ dstp -= 0 * OPSIZ; ++ len -= 1; ++ if (OP_T_THRES <= 3 * OPSIZ && len == 0) ++ goto do0; ++ goto do4; /* No-op. */ ++ } ++ ++ do { ++do4: ++ a0 = ((op_t *) srcp)[0]; ++ ((op_t *) dstp)[0] = MERGE(a2, sh_1, a3, sh_2); ++do3: ++ a1 = ((op_t *) srcp)[1]; ++ ((op_t *) dstp)[1] = MERGE(a3, sh_1, a0, sh_2); ++do2: ++ a2 = ((op_t *) srcp)[2]; ++ ((op_t *) dstp)[2] = MERGE(a0, sh_1, a1, sh_2); ++do1: ++ a3 = ((op_t *) srcp)[3]; ++ ((op_t *) dstp)[3] = MERGE(a1, sh_1, a2, sh_2); ++ ++ srcp += 4 * OPSIZ; ++ dstp += 4 * OPSIZ; ++ len -= 4; ++ } while (len != 0); ++ ++ /* This is the right position for do0. Please don't move ++ * it into the loop. ++ */ ++do0: ++ ((op_t *) dstp)[0] = MERGE(a2, sh_1, a3, sh_2); ++} ++ ++#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ ++do { \ ++ size_t __nbytes = (nbytes); \ ++ while (__nbytes > 0) { \ ++ byte __x = ((byte *) src_bp)[0]; \ ++ src_bp += 1; \ ++ __nbytes -= 1; \ ++ ((byte *) dst_bp)[0] = __x; \ ++ dst_bp += 1; \ ++ } \ ++} while (0) ++ ++#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ ++do { \ ++ if (src_bp % OPSIZ == 0) \ ++ _wordcopy_fwd_aligned(dst_bp, src_bp, (nbytes) / OPSIZ); \ ++ else \ ++ _wordcopy_fwd_dest_aligned(dst_bp, src_bp, (nbytes) / OPSIZ); \ ++ src_bp += (nbytes) & -OPSIZ; \ ++ dst_bp += (nbytes) & -OPSIZ; \ ++ (nbytes_left) = (nbytes) % OPSIZ; \ ++} while (0) ++ ++extern void *__memcpy_aligned(void *dest, const void *src, size_t len); ++void *__memcpy(void *dest, const void *src, size_t len) ++{ ++ unsigned long dstp = (long) dest; ++ unsigned long srcp = (long) src; ++ ++ /* If there not too few bytes to copy, use word copy. */ ++ if (len >= OP_T_THRES) { ++ if ((len >= FAST_COPY_THRES) && ((dstp & OPSIZ_MASK) == 0) && ++ ((srcp & OPSIZ_MASK) == 0)) { ++ __memcpy_aligned(dest, src, len); ++ return dest; ++ } ++ /* Copy just a few bytes to make DSTP aligned. */ ++ len -= (-dstp) % OPSIZ; ++ BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); ++ ++ /* Copy from SRCP to DSTP taking advantage of the known alignment of ++ * DSTP. Number of bytes remaining is put in the third argument, ++ * i.e. in LEN. This number may vary from machine to machine. ++ */ ++ WORD_COPY_FWD(dstp, srcp, len, len); ++ /* Fall out and copy the tail. */ ++ } ++ ++ /* There are just a few bytes to copy. Use byte memory operations. */ ++ BYTE_COPY_FWD(dstp, srcp, len); ++ ++ return dest; ++} ++ ++#ifdef weak_alias ++weak_alias (__memcpy, memcpy) ++#endif ++ ++libc_hidden_builtin_def (memcpy) +diff --git a/sysdeps/riscv/memcpy_aligned.S b/sysdeps/riscv/memcpy_aligned.S +new file mode 100644 +index 00000000..b9b01e35 +--- /dev/null ++++ b/sysdeps/riscv/memcpy_aligned.S +@@ -0,0 +1,82 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2013 Regents of the University of California ++ */ ++ +#include +#include + -+ENTRY(memcpy) ++/* void *__memcpy_aligned(void *, const void *, size_t) */ ++ENTRY(__memcpy_aligned) + move t6, a0 /* Preserve return value */ + -+ /* Defer to byte-oriented copy for small sizes */ -+ sltiu a3, a2, 128 -+ bnez a3, 4f -+ /* Use word-oriented copy only if low-order bits match */ -+ andi a3, t6, SZREG-1 -+ andi a4, a1, SZREG-1 -+ bne a3, a4, 4f -+ -+ beqz a3, 2f /* Skip if already aligned */ -+ /* -+ * Round to nearest double word-aligned address -+ * greater than or equal to start address -+ */ -+ andi a3, a1, ~(SZREG-1) -+ addi a3, a3, SZREG -+ /* Handle initial misalignment */ -+ sub a4, a3, a1 -+1: -+ lb a5, 0(a1) -+ addi a1, a1, 1 -+ sb a5, 0(t6) -+ addi t6, t6, 1 -+ bltu a1, a3, 1b -+ sub a2, a2, a4 /* Update count */ -+ +2: + andi a4, a2, ~((16*SZREG)-1) + beqz a4, 4f @@ -137,134 +391,7 @@ index 00000000..aa0eaee9 + bltu a1, a3, 5b +6: + ret -+END(memcpy) -+libc_hidden_builtin_def (memcpy) -diff --git a/sysdeps/riscv/memset.S b/sysdeps/riscv/memset.S -new file mode 100644 -index 00000000..431a19f5 ---- /dev/null -+++ b/sysdeps/riscv/memset.S -@@ -0,0 +1,120 @@ -+/* memset for RISC-V. -+ Copyright (C) 1996-2020 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#include -+#include -+ -+ENTRY(memset) -+ move t0, a0 /* Preserve return value */ -+ -+ /* Defer to byte-oriented fill for small sizes */ -+ sltiu a3, a2, 16 -+ bnez a3, 4f -+ -+ /* -+ * Round to nearest XLEN-aligned address -+ * greater than or equal to start address -+ */ -+ addi a3, t0, SZREG-1 -+ andi a3, a3, ~(SZREG-1) -+ beq a3, t0, 2f /* Skip if already aligned */ -+ /* Handle initial misalignment */ -+ sub a4, a3, t0 -+1: -+ sb a1, 0(t0) -+ addi t0, t0, 1 -+ bltu t0, a3, 1b -+ sub a2, a2, a4 /* Update count */ -+ -+2: /* Duff's device with 32 XLEN stores per iteration */ -+ /* Broadcast value into all bytes */ -+ andi a1, a1, 0xff -+ slli a3, a1, 8 -+ or a1, a3, a1 -+ slli a3, a1, 16 -+ or a1, a3, a1 -+ slli a3, a1, 32 -+ or a1, a3, a1 -+ -+ /* Calculate end address */ -+ andi a4, a2, ~(SZREG-1) -+ add a3, t0, a4 -+ -+ andi a4, a4, 31*SZREG /* Calculate remainder */ -+ beqz a4, 3f /* Shortcut if no remainder */ -+ neg a4, a4 -+ addi a4, a4, 32*SZREG /* Calculate initial offset */ -+ -+ /* Adjust start address with offset */ -+ sub t0, t0, a4 -+ -+ /* Jump into loop body */ -+ /* Assumes 32-bit instruction lengths */ -+ la a5, 3f -+ srli a4, a4, 1 -+ add a5, a5, a4 -+ jr a5 -+3: -+ REG_S a1, 0(t0) -+ REG_S a1, SZREG(t0) -+ REG_S a1, 2*SZREG(t0) -+ REG_S a1, 3*SZREG(t0) -+ REG_S a1, 4*SZREG(t0) -+ REG_S a1, 5*SZREG(t0) -+ REG_S a1, 6*SZREG(t0) -+ REG_S a1, 7*SZREG(t0) -+ REG_S a1, 8*SZREG(t0) -+ REG_S a1, 9*SZREG(t0) -+ REG_S a1, 10*SZREG(t0) -+ REG_S a1, 11*SZREG(t0) -+ REG_S a1, 12*SZREG(t0) -+ REG_S a1, 13*SZREG(t0) -+ REG_S a1, 14*SZREG(t0) -+ REG_S a1, 15*SZREG(t0) -+ REG_S a1, 16*SZREG(t0) -+ REG_S a1, 17*SZREG(t0) -+ REG_S a1, 18*SZREG(t0) -+ REG_S a1, 19*SZREG(t0) -+ REG_S a1, 20*SZREG(t0) -+ REG_S a1, 21*SZREG(t0) -+ REG_S a1, 22*SZREG(t0) -+ REG_S a1, 23*SZREG(t0) -+ REG_S a1, 24*SZREG(t0) -+ REG_S a1, 25*SZREG(t0) -+ REG_S a1, 26*SZREG(t0) -+ REG_S a1, 27*SZREG(t0) -+ REG_S a1, 28*SZREG(t0) -+ REG_S a1, 29*SZREG(t0) -+ REG_S a1, 30*SZREG(t0) -+ REG_S a1, 31*SZREG(t0) -+ addi t0, t0, 32*SZREG -+ bltu t0, a3, 3b -+ andi a2, a2, SZREG-1 /* Update count */ -+ -+4: -+ /* Handle trailing misalignment */ -+ beqz a2, 6f -+ add a3, t0, a2 -+5: -+ sb a1, 0(t0) -+ addi t0, t0, 1 -+ bltu t0, a3, 5b -+6: -+ ret -+END(memset) -+libc_hidden_builtin_def (memset) ++END(__memcpy_aligned) -- 2.39.2 From 8b0d868279fdce12fe75ccd7049f293d22367e12 Mon Sep 17 00:00:00 2001 From: Mason Huo Date: Mon, 24 Apr 2023 11:38:28 +0800 Subject: [PATCH 2/3] package: starfive: Optimize the swapoff partition Check if the swap partition ("hibernation") exist or not before swapoff command. Signed-off-by: Mason Huo --- package/starfive/pm/S99hibernation | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/package/starfive/pm/S99hibernation b/package/starfive/pm/S99hibernation index ac5a09fc..90b61fce 100644 --- a/package/starfive/pm/S99hibernation +++ b/package/starfive/pm/S99hibernation @@ -16,7 +16,12 @@ case "$1" in fi ;; stop) - swapoff PARTLABEL="hibernation" + FILE=/dev/disk/by-partlabel/hibernation + if [ -b "$FILE" ];then + swapoff PARTLABEL="hibernation" + else + echo "No PARTLABEL=hibernation!" + fi ;; *) echo "Usage: $0 {start|stop}" From 910ecaeb5ecdefcf9f5bdd49ea49bda95aab6cc4 Mon Sep 17 00:00:00 2001 From: Samin Guo Date: Wed, 26 Apr 2023 15:21:37 +0800 Subject: [PATCH 3/3] ifplugd: Reduce the IP release time IP release time reduced from 10 seconds to 1 second Signed-off-by: Samin Guo --- package/ifplugd/0007-set-delay-down-to-1.patch | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 package/ifplugd/0007-set-delay-down-to-1.patch diff --git a/package/ifplugd/0007-set-delay-down-to-1.patch b/package/ifplugd/0007-set-delay-down-to-1.patch new file mode 100644 index 00000000..4afe2ddd --- /dev/null +++ b/package/ifplugd/0007-set-delay-down-to-1.patch @@ -0,0 +1,11 @@ +--- ifplugd-0.28.orig/conf/ifplugd.conf 2003-12-12 01:58:43.000000000 +0800 ++++ ifplugd-0.28/conf/ifplugd.conf 2023-04-24 17:43:48.342070388 +0800 +@@ -34,7 +34,7 @@ INTERFACES="eth0 eth1" + + # Additional parameters for ifplugd. + # (Run "ifplugd -h" for further information.) +-ARGS="-fwI -u0 -d10" ++ARGS="-fwI -u0 -d1" + + # Additional parameters for ifplugd for the specified interface. Note + # that $ARGS is ignored, when a variable like this is set for an