[44089f7] | 1 | Submitted By: William Harrington <wwh04660 at ucmo dot edu>
|
---|
| 2 | Date: 2008-08-24
|
---|
| 3 | Initial Package Version: 1.5
|
---|
| 4 | Upstream Status: Rejected
|
---|
| 5 | Description: Use a local memcpy instead of the glibc implementation for ping
|
---|
| 6 | This resolves the bus errors
|
---|
| 7 |
|
---|
| 8 | diff -Naur inetutils-1.5.orig/ping/Makefile.in inetutils-1.5/ping/Makefile.in
|
---|
| 9 | --- inetutils-1.5.orig/ping/Makefile.in 2006-10-21 11:59:48.000000000 +0000
|
---|
| 10 | +++ inetutils-1.5/ping/Makefile.in 2007-06-28 23:10:43.000000000 +0000
|
---|
| 11 | @@ -109,7 +109,8 @@
|
---|
| 12 | PROGRAMS = $(bin_PROGRAMS)
|
---|
| 13 | am_ping_OBJECTS = ping.$(OBJEXT) ping_common.$(OBJEXT) \
|
---|
| 14 | ping_echo.$(OBJEXT) ping_address.$(OBJEXT) \
|
---|
| 15 | - ping_router.$(OBJEXT) ping_timestamp.$(OBJEXT)
|
---|
| 16 | + ping_router.$(OBJEXT) ping_timestamp.$(OBJEXT) \
|
---|
| 17 | + wordcopy.$(OBJEXT)
|
---|
| 18 | ping_OBJECTS = $(am_ping_OBJECTS)
|
---|
| 19 | ping_DEPENDENCIES =
|
---|
| 20 | am_ping6_OBJECTS = ping6.$(OBJEXT) ping_common.$(OBJEXT)
|
---|
| 21 | @@ -333,8 +334,9 @@
|
---|
| 22 | ping_LDADD = -L../libinetutils -linetutils -L../libicmp -licmp -L../lib -lgnu
|
---|
| 23 | ping6_LDADD = -L../lib -lgnu -L../libinetutils -linetutils
|
---|
| 24 | INCLUDES = -I$(top_srcdir)/lib -I../lib -I$(top_srcdir)/libicmp
|
---|
| 25 | -ping_SOURCES = ping.c ping_common.c ping_echo.c ping_address.c \
|
---|
| 26 | - ping_router.c ping_timestamp.c ping_common.h ping_impl.h
|
---|
| 27 | +ping_SOURCES = memcopy.h pagecopy.h ping.c ping_common.c \
|
---|
| 28 | + ping_echo.c ping_address.c ping_router.c ping_timestamp.c \
|
---|
| 29 | + ping_common.h ping_impl.h wordcopy.c
|
---|
| 30 |
|
---|
| 31 | ping6_SOURCES = ping6.c ping_common.c ping_common.h ping6.h
|
---|
| 32 | SUIDMODE = -o root -m 4775
|
---|
| 33 | diff -Naur inetutils-1.5.orig/ping/memcopy.h inetutils-1.5/ping/memcopy.h
|
---|
| 34 | --- inetutils-1.5.orig/ping/memcopy.h 1970-01-01 00:00:00.000000000 +0000
|
---|
| 35 | +++ inetutils-1.5/ping/memcopy.h 2007-06-28 23:08:34.000000000 +0000
|
---|
| 36 | @@ -0,0 +1,150 @@
|
---|
| 37 | +/* memcopy.h -- definitions for memory copy functions. Generic C version.
|
---|
| 38 | + Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc.
|
---|
| 39 | + This file is part of the GNU C Library.
|
---|
| 40 | + Contributed by Torbjorn Granlund (tege@sics.se).
|
---|
| 41 | +
|
---|
| 42 | + The GNU C Library is free software; you can redistribute it and/or
|
---|
| 43 | + modify it under the terms of the GNU Lesser General Public
|
---|
| 44 | + License as published by the Free Software Foundation; either
|
---|
| 45 | + version 2.1 of the License, or (at your option) any later version.
|
---|
| 46 | +
|
---|
| 47 | + The GNU C Library is distributed in the hope that it will be useful,
|
---|
| 48 | + but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 49 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
| 50 | + Lesser General Public License for more details.
|
---|
| 51 | +
|
---|
| 52 | + You should have received a copy of the GNU Lesser General Public
|
---|
| 53 | + License along with the GNU C Library; if not, write to the Free
|
---|
| 54 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
---|
| 55 | + 02111-1307 USA. */
|
---|
| 56 | +
|
---|
| 57 | +/* The strategy of the memory functions is:
|
---|
| 58 | +
|
---|
| 59 | + 1. Copy bytes until the destination pointer is aligned.
|
---|
| 60 | +
|
---|
| 61 | + 2. Copy words in unrolled loops. If the source and destination
|
---|
| 62 | + are not aligned in the same way, use word memory operations,
|
---|
| 63 | + but shift and merge two read words before writing.
|
---|
| 64 | +
|
---|
| 65 | + 3. Copy the few remaining bytes.
|
---|
| 66 | +
|
---|
| 67 | + This is fast on processors that have at least 10 registers for
|
---|
| 68 | + allocation by GCC, and that can access memory at reg+const in one
|
---|
| 69 | + instruction.
|
---|
| 70 | +
|
---|
| 71 | + I made an "exhaustive" test of this memmove when I wrote it,
|
---|
| 72 | + exhaustive in the sense that I tried all alignment and length
|
---|
| 73 | + combinations, with and without overlap. */
|
---|
| 74 | +
|
---|
| 75 | +#include <sys/cdefs.h>
|
---|
| 76 | +#include <endian.h>
|
---|
| 77 | +
|
---|
| 78 | +/* The macros defined in this file are:
|
---|
| 79 | +
|
---|
| 80 | + BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy)
|
---|
| 81 | +
|
---|
| 82 | + BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy)
|
---|
| 83 | +
|
---|
| 84 | + WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy)
|
---|
| 85 | +
|
---|
| 86 | + WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy)
|
---|
| 87 | +
|
---|
| 88 | + MERGE(old_word, sh_1, new_word, sh_2)
|
---|
| 89 | + [I fail to understand. I feel stupid. --roland]
|
---|
| 90 | +*/
|
---|
| 91 | +
|
---|
| 92 | +/* Type to use for aligned memory operations.
|
---|
| 93 | + This should normally be the biggest type supported by a single load
|
---|
| 94 | + and store. */
|
---|
| 95 | +#define op_t unsigned long int
|
---|
| 96 | +#define OPSIZ (sizeof(op_t))
|
---|
| 97 | +
|
---|
| 98 | +/* Type to use for unaligned operations. */
|
---|
| 99 | +typedef unsigned char byte;
|
---|
| 100 | +
|
---|
| 101 | +/* Optimal type for storing bytes in registers. */
|
---|
| 102 | +#define reg_char char
|
---|
| 103 | +
|
---|
| 104 | +#if __BYTE_ORDER == __LITTLE_ENDIAN
|
---|
| 105 | +#define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
|
---|
| 106 | +#endif
|
---|
| 107 | +#if __BYTE_ORDER == __BIG_ENDIAN
|
---|
| 108 | +#define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
|
---|
| 109 | +#endif
|
---|
| 110 | +
|
---|
| 111 | +/* Copy exactly NBYTES bytes from SRC_BP to DST_BP,
|
---|
| 112 | + without any assumptions about alignment of the pointers. */
|
---|
| 113 | +#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
|
---|
| 114 | + do \
|
---|
| 115 | + { \
|
---|
| 116 | + size_t __nbytes = (nbytes); \
|
---|
| 117 | + while (__nbytes > 0) \
|
---|
| 118 | + { \
|
---|
| 119 | + byte __x = ((byte *) src_bp)[0]; \
|
---|
| 120 | + src_bp += 1; \
|
---|
| 121 | + __nbytes -= 1; \
|
---|
| 122 | + ((byte *) dst_bp)[0] = __x; \
|
---|
| 123 | + dst_bp += 1; \
|
---|
| 124 | + } \
|
---|
| 125 | + } while (0)
|
---|
| 126 | +
|
---|
| 127 | +/* Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
|
---|
| 128 | + beginning at the bytes right before the pointers and continuing towards
|
---|
| 129 | + smaller addresses. Don't assume anything about alignment of the
|
---|
| 130 | + pointers. */
|
---|
| 131 | +#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \
|
---|
| 132 | + do \
|
---|
| 133 | + { \
|
---|
| 134 | + size_t __nbytes = (nbytes); \
|
---|
| 135 | + while (__nbytes > 0) \
|
---|
| 136 | + { \
|
---|
| 137 | + byte __x; \
|
---|
| 138 | + src_ep -= 1; \
|
---|
| 139 | + __x = ((byte *) src_ep)[0]; \
|
---|
| 140 | + dst_ep -= 1; \
|
---|
| 141 | + __nbytes -= 1; \
|
---|
| 142 | + ((byte *) dst_ep)[0] = __x; \
|
---|
| 143 | + } \
|
---|
| 144 | + } while (0)
|
---|
| 145 | +
|
---|
| 146 | +/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with
|
---|
| 147 | + the assumption that DST_BP is aligned on an OPSIZ multiple. If
|
---|
| 148 | + not all bytes could be easily copied, store remaining number of bytes
|
---|
| 149 | + in NBYTES_LEFT, otherwise store 0. */
|
---|
| 150 | +extern void _wordcopy_fwd_aligned (long int, long int, size_t) __THROW;
|
---|
| 151 | +extern void _wordcopy_fwd_dest_aligned (long int, long int, size_t) __THROW;
|
---|
| 152 | +#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
|
---|
| 153 | + do \
|
---|
| 154 | + { \
|
---|
| 155 | + if (src_bp % OPSIZ == 0) \
|
---|
| 156 | + _wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
|
---|
| 157 | + else \
|
---|
| 158 | + _wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
|
---|
| 159 | + src_bp += (nbytes) & -OPSIZ; \
|
---|
| 160 | + dst_bp += (nbytes) & -OPSIZ; \
|
---|
| 161 | + (nbytes_left) = (nbytes) % OPSIZ; \
|
---|
| 162 | + } while (0)
|
---|
| 163 | +
|
---|
| 164 | +/* Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
|
---|
| 165 | + beginning at the words (of type op_t) right before the pointers and
|
---|
| 166 | + continuing towards smaller addresses. May take advantage of that
|
---|
| 167 | + DST_END_PTR is aligned on an OPSIZ multiple. If not all bytes could be
|
---|
| 168 | + easily copied, store remaining number of bytes in NBYTES_REMAINING,
|
---|
| 169 | + otherwise store 0. */
|
---|
| 170 | +extern void _wordcopy_bwd_aligned (long int, long int, size_t) __THROW;
|
---|
| 171 | +extern void _wordcopy_bwd_dest_aligned (long int, long int, size_t) __THROW;
|
---|
| 172 | +#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \
|
---|
| 173 | + do \
|
---|
| 174 | + { \
|
---|
| 175 | + if (src_ep % OPSIZ == 0) \
|
---|
| 176 | + _wordcopy_bwd_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \
|
---|
| 177 | + else \
|
---|
| 178 | + _wordcopy_bwd_dest_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \
|
---|
| 179 | + src_ep -= (nbytes) & -OPSIZ; \
|
---|
| 180 | + dst_ep -= (nbytes) & -OPSIZ; \
|
---|
| 181 | + (nbytes_left) = (nbytes) % OPSIZ; \
|
---|
| 182 | + } while (0)
|
---|
| 183 | +
|
---|
| 184 | +
|
---|
| 185 | +/* Threshold value for when to enter the unrolled loops. */
|
---|
| 186 | +#define OP_T_THRES 16
|
---|
| 187 | diff -Naur inetutils-1.5.orig/ping/pagecopy.h inetutils-1.5/ping/pagecopy.h
|
---|
| 188 | --- inetutils-1.5.orig/ping/pagecopy.h 1970-01-01 00:00:00.000000000 +0000
|
---|
| 189 | +++ inetutils-1.5/ping/pagecopy.h 2007-06-28 23:08:34.000000000 +0000
|
---|
| 190 | @@ -0,0 +1,75 @@
|
---|
| 191 | +/* Macros for copying by pages; used in memcpy, memmove. Generic macros.
|
---|
| 192 | + Copyright (C) 1995, 1997 Free Software Foundation, Inc.
|
---|
| 193 | + This file is part of the GNU C Library.
|
---|
| 194 | +
|
---|
| 195 | + The GNU C Library is free software; you can redistribute it and/or
|
---|
| 196 | + modify it under the terms of the GNU Lesser General Public
|
---|
| 197 | + License as published by the Free Software Foundation; either
|
---|
| 198 | + version 2.1 of the License, or (at your option) any later version.
|
---|
| 199 | +
|
---|
| 200 | + The GNU C Library is distributed in the hope that it will be useful,
|
---|
| 201 | + but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 202 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
| 203 | + Lesser General Public License for more details.
|
---|
| 204 | +
|
---|
| 205 | + You should have received a copy of the GNU Lesser General Public
|
---|
| 206 | + License along with the GNU C Library; if not, write to the Free
|
---|
| 207 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
---|
| 208 | + 02111-1307 USA. */
|
---|
| 209 | +
|
---|
| 210 | +/* This file defines the macro:
|
---|
| 211 | +
|
---|
| 212 | + PAGE_COPY_FWD_MAYBE (dstp, srcp, nbytes_left, nbytes)
|
---|
| 213 | +
|
---|
| 214 | + which is invoked like WORD_COPY_FWD et al. The pointers should be at
|
---|
| 215 | + least word aligned. This will check if virtual copying by pages can and
|
---|
| 216 | + should be done and do it if so.
|
---|
| 217 | +
|
---|
| 218 | + System-specific pagecopy.h files should define these macros and then
|
---|
| 219 | + #include this file:
|
---|
| 220 | +
|
---|
| 221 | + PAGE_COPY_THRESHOLD
|
---|
| 222 | + -- Minimum size for which virtual copying by pages is worthwhile.
|
---|
| 223 | +
|
---|
| 224 | + PAGE_SIZE
|
---|
| 225 | + -- Size of a page.
|
---|
| 226 | +
|
---|
| 227 | + PAGE_COPY_FWD (dstp, srcp, nbytes_left, nbytes)
|
---|
| 228 | + -- Macro to perform the virtual copy operation.
|
---|
| 229 | + The pointers will be aligned to PAGE_SIZE bytes.
|
---|
| 230 | +*/
|
---|
| 231 | +
|
---|
| 232 | +
|
---|
| 233 | +#if PAGE_COPY_THRESHOLD
|
---|
| 234 | +
|
---|
| 235 | +#include <assert.h>
|
---|
| 236 | +
|
---|
| 237 | +#define PAGE_COPY_FWD_MAYBE(dstp, srcp, nbytes_left, nbytes) \
|
---|
| 238 | + do \
|
---|
| 239 | + { \
|
---|
| 240 | + if ((nbytes) >= PAGE_COPY_THRESHOLD && \
|
---|
| 241 | + PAGE_OFFSET ((dstp) - (srcp)) == 0) \
|
---|
| 242 | + { \
|
---|
| 243 | + /* The amount to copy is past the threshold for copying \
|
---|
| 244 | + pages virtually with kernel VM operations, and the \
|
---|
| 245 | + source and destination addresses have the same alignment. */ \
|
---|
| 246 | + size_t nbytes_before = PAGE_OFFSET (-(dstp)); \
|
---|
| 247 | + if (nbytes_before != 0) \
|
---|
| 248 | + { \
|
---|
| 249 | + /* First copy the words before the first page boundary. */ \
|
---|
| 250 | + WORD_COPY_FWD (dstp, srcp, nbytes_left, nbytes_before); \
|
---|
| 251 | + assert (nbytes_left == 0); \
|
---|
| 252 | + nbytes -= nbytes_before; \
|
---|
| 253 | + } \
|
---|
| 254 | + PAGE_COPY_FWD (dstp, srcp, nbytes_left, nbytes); \
|
---|
| 255 | + } \
|
---|
| 256 | + } while (0)
|
---|
| 257 | +
|
---|
| 258 | +/* The page size is always a power of two, so we can avoid modulo division. */
|
---|
| 259 | +#define PAGE_OFFSET(n) ((n) & (PAGE_SIZE - 1))
|
---|
| 260 | +
|
---|
| 261 | +#else
|
---|
| 262 | +
|
---|
| 263 | +#define PAGE_COPY_FWD_MAYBE(dstp, srcp, nbytes_left, nbytes) /* nada */
|
---|
| 264 | +
|
---|
| 265 | +#endif
|
---|
| 266 | diff -Naur inetutils-1.5.orig/ping/ping_echo.c inetutils-1.5/ping/ping_echo.c
|
---|
| 267 | --- inetutils-1.5.orig/ping/ping_echo.c 2006-10-11 21:46:25.000000000 +0000
|
---|
| 268 | +++ inetutils-1.5/ping/ping_echo.c 2007-06-28 23:08:34.000000000 +0000
|
---|
| 269 | @@ -57,6 +57,11 @@
|
---|
| 270 |
|
---|
| 271 | #include <xalloc.h>
|
---|
| 272 |
|
---|
| 273 | +#ifndef memcpy
|
---|
| 274 | +#include "memcopy.h"
|
---|
| 275 | +#include "pagecopy.h"
|
---|
| 276 | +#endif
|
---|
| 277 | +
|
---|
| 278 | static int handler (int code, void *closure,
|
---|
| 279 | struct sockaddr_in *dest, struct sockaddr_in *from,
|
---|
| 280 | struct ip *ip, icmphdr_t *icmp, int datalen);
|
---|
| 281 | @@ -68,7 +73,6 @@
|
---|
| 282 | void print_icmp_header (struct sockaddr_in *from,
|
---|
| 283 | struct ip *ip, icmphdr_t *icmp, int len);
|
---|
| 284 | static void print_ip_opt (struct ip *ip, int hlen);
|
---|
| 285 | -
|
---|
| 286 | int
|
---|
| 287 | ping_echo (int argc, char **argv)
|
---|
| 288 | {
|
---|
| 289 | @@ -169,6 +173,46 @@
|
---|
| 290 | timing++;
|
---|
| 291 | tp = (struct timeval *) icmp->icmp_data;
|
---|
| 292 |
|
---|
| 293 | +#define OP_T_THRES 16
|
---|
| 294 | +#undef memcpy
|
---|
| 295 | +void *
|
---|
| 296 | + memcpy (dstpp, srcpp, len)
|
---|
| 297 | + void *dstpp;
|
---|
| 298 | +const void *srcpp;
|
---|
| 299 | +size_t len;
|
---|
| 300 | +{
|
---|
| 301 | + unsigned long int dstp = (long int) dstpp;
|
---|
| 302 | + unsigned long int srcp = (long int) srcpp;
|
---|
| 303 | +
|
---|
| 304 | + /* Copy from the beginning to the end. */
|
---|
| 305 | +
|
---|
| 306 | + /* If there not too few bytes to copy, use word copy. */
|
---|
| 307 | + if (len >= OP_T_THRES)
|
---|
| 308 | + {
|
---|
| 309 | + /* Copy just a few bytes to make DSTP aligned. */
|
---|
| 310 | + len -= (-dstp) % OPSIZ;
|
---|
| 311 | + BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);
|
---|
| 312 | +
|
---|
| 313 | + /* Copy whole pages from SRCP to DSTP by virtual address
|
---|
| 314 | + manipulation, as much as possible. */
|
---|
| 315 | +
|
---|
| 316 | + PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);
|
---|
| 317 | +
|
---|
| 318 | + /* Copy from SRCP to DSTP taking advantage of the known alignment
|
---|
| 319 | + of DSTP. Number of bytes remaining is put in the third argument,
|
---|
| 320 | + i.e. in LEN. This number may vary from machine to machine. */
|
---|
| 321 | +
|
---|
| 322 | + WORD_COPY_FWD (dstp, srcp, len, len);
|
---|
| 323 | +
|
---|
| 324 | + /* Fall out and copy the tail. */
|
---|
| 325 | + }
|
---|
| 326 | +
|
---|
| 327 | + /* There are just a few bytes to copy. Use byte memory operations. */
|
---|
| 328 | + BYTE_COPY_FWD (dstp, srcp, len);
|
---|
| 329 | +
|
---|
| 330 | + return dstpp;
|
---|
| 331 | +}
|
---|
| 332 | +
|
---|
| 333 | /* Avoid unaligned data: */
|
---|
| 334 | memcpy (&tv1, tp, sizeof (tv1));
|
---|
| 335 | tvsub (&tv, &tv1);
|
---|
| 336 | diff -Naur inetutils-1.5.orig/ping/wordcopy.c inetutils-1.5/ping/wordcopy.c
|
---|
| 337 | --- inetutils-1.5.orig/ping/wordcopy.c 1970-01-01 00:00:00.000000000 +0000
|
---|
| 338 | +++ inetutils-1.5/ping/wordcopy.c 2007-06-28 23:00:04.000000000 +0000
|
---|
| 339 | @@ -0,0 +1,413 @@
|
---|
| 340 | +/* _memcopy.c -- subroutines for memory copy functions.
|
---|
| 341 | + Copyright (C) 1991, 1996 Free Software Foundation, Inc.
|
---|
| 342 | + This file is part of the GNU C Library.
|
---|
| 343 | + Contributed by Torbjorn Granlund (tege@sics.se).
|
---|
| 344 | +
|
---|
| 345 | + The GNU C Library is free software; you can redistribute it and/or
|
---|
| 346 | + modify it under the terms of the GNU Lesser General Public
|
---|
| 347 | + License as published by the Free Software Foundation; either
|
---|
| 348 | + version 2.1 of the License, or (at your option) any later version.
|
---|
| 349 | +
|
---|
| 350 | + The GNU C Library is distributed in the hope that it will be useful,
|
---|
| 351 | + but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 352 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
| 353 | + Lesser General Public License for more details.
|
---|
| 354 | +
|
---|
| 355 | + You should have received a copy of the GNU Lesser General Public
|
---|
| 356 | + License along with the GNU C Library; if not, write to the Free
|
---|
| 357 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
---|
| 358 | + 02111-1307 USA. */
|
---|
| 359 | +
|
---|
| 360 | +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
|
---|
| 361 | +
|
---|
| 362 | +#include <stddef.h>
|
---|
| 363 | +#include <memcopy.h>
|
---|
| 364 | +
|
---|
| 365 | +/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
|
---|
| 366 | + block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
|
---|
| 367 | + Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
|
---|
| 368 | +
|
---|
| 369 | +void
|
---|
| 370 | +_wordcopy_fwd_aligned (dstp, srcp, len)
|
---|
| 371 | + long int dstp;
|
---|
| 372 | + long int srcp;
|
---|
| 373 | + size_t len;
|
---|
| 374 | +{
|
---|
| 375 | + op_t a0, a1;
|
---|
| 376 | +
|
---|
| 377 | + switch (len % 8)
|
---|
| 378 | + {
|
---|
| 379 | + case 2:
|
---|
| 380 | + a0 = ((op_t *) srcp)[0];
|
---|
| 381 | + srcp -= 6 * OPSIZ;
|
---|
| 382 | + dstp -= 7 * OPSIZ;
|
---|
| 383 | + len += 6;
|
---|
| 384 | + goto do1;
|
---|
| 385 | + case 3:
|
---|
| 386 | + a1 = ((op_t *) srcp)[0];
|
---|
| 387 | + srcp -= 5 * OPSIZ;
|
---|
| 388 | + dstp -= 6 * OPSIZ;
|
---|
| 389 | + len += 5;
|
---|
| 390 | + goto do2;
|
---|
| 391 | + case 4:
|
---|
| 392 | + a0 = ((op_t *) srcp)[0];
|
---|
| 393 | + srcp -= 4 * OPSIZ;
|
---|
| 394 | + dstp -= 5 * OPSIZ;
|
---|
| 395 | + len += 4;
|
---|
| 396 | + goto do3;
|
---|
| 397 | + case 5:
|
---|
| 398 | + a1 = ((op_t *) srcp)[0];
|
---|
| 399 | + srcp -= 3 * OPSIZ;
|
---|
| 400 | + dstp -= 4 * OPSIZ;
|
---|
| 401 | + len += 3;
|
---|
| 402 | + goto do4;
|
---|
| 403 | + case 6:
|
---|
| 404 | + a0 = ((op_t *) srcp)[0];
|
---|
| 405 | + srcp -= 2 * OPSIZ;
|
---|
| 406 | + dstp -= 3 * OPSIZ;
|
---|
| 407 | + len += 2;
|
---|
| 408 | + goto do5;
|
---|
| 409 | + case 7:
|
---|
| 410 | + a1 = ((op_t *) srcp)[0];
|
---|
| 411 | + srcp -= 1 * OPSIZ;
|
---|
| 412 | + dstp -= 2 * OPSIZ;
|
---|
| 413 | + len += 1;
|
---|
| 414 | + goto do6;
|
---|
| 415 | +
|
---|
| 416 | + case 0:
|
---|
| 417 | + if (OP_T_THRES <= 3 * OPSIZ && len == 0)
|
---|
| 418 | + return;
|
---|
| 419 | + a0 = ((op_t *) srcp)[0];
|
---|
| 420 | + srcp -= 0 * OPSIZ;
|
---|
| 421 | + dstp -= 1 * OPSIZ;
|
---|
| 422 | + goto do7;
|
---|
| 423 | + case 1:
|
---|
| 424 | + a1 = ((op_t *) srcp)[0];
|
---|
| 425 | + srcp -=-1 * OPSIZ;
|
---|
| 426 | + dstp -= 0 * OPSIZ;
|
---|
| 427 | + len -= 1;
|
---|
| 428 | + if (OP_T_THRES <= 3 * OPSIZ && len == 0)
|
---|
| 429 | + goto do0;
|
---|
| 430 | + goto do8; /* No-op. */
|
---|
| 431 | + }
|
---|
| 432 | +
|
---|
| 433 | + do
|
---|
| 434 | + {
|
---|
| 435 | + do8:
|
---|
| 436 | + a0 = ((op_t *) srcp)[0];
|
---|
| 437 | + ((op_t *) dstp)[0] = a1;
|
---|
| 438 | + do7:
|
---|
| 439 | + a1 = ((op_t *) srcp)[1];
|
---|
| 440 | + ((op_t *) dstp)[1] = a0;
|
---|
| 441 | + do6:
|
---|
| 442 | + a0 = ((op_t *) srcp)[2];
|
---|
| 443 | + ((op_t *) dstp)[2] = a1;
|
---|
| 444 | + do5:
|
---|
| 445 | + a1 = ((op_t *) srcp)[3];
|
---|
| 446 | + ((op_t *) dstp)[3] = a0;
|
---|
| 447 | + do4:
|
---|
| 448 | + a0 = ((op_t *) srcp)[4];
|
---|
| 449 | + ((op_t *) dstp)[4] = a1;
|
---|
| 450 | + do3:
|
---|
| 451 | + a1 = ((op_t *) srcp)[5];
|
---|
| 452 | + ((op_t *) dstp)[5] = a0;
|
---|
| 453 | + do2:
|
---|
| 454 | + a0 = ((op_t *) srcp)[6];
|
---|
| 455 | + ((op_t *) dstp)[6] = a1;
|
---|
| 456 | + do1:
|
---|
| 457 | + a1 = ((op_t *) srcp)[7];
|
---|
| 458 | + ((op_t *) dstp)[7] = a0;
|
---|
| 459 | +
|
---|
| 460 | + srcp += 8 * OPSIZ;
|
---|
| 461 | + dstp += 8 * OPSIZ;
|
---|
| 462 | + len -= 8;
|
---|
| 463 | + }
|
---|
| 464 | + while (len != 0);
|
---|
| 465 | +
|
---|
| 466 | + /* This is the right position for do0. Please don't move
|
---|
| 467 | + it into the loop. */
|
---|
| 468 | + do0:
|
---|
| 469 | + ((op_t *) dstp)[0] = a1;
|
---|
| 470 | +}
|
---|
| 471 | +
|
---|
| 472 | +/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
|
---|
| 473 | + block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
|
---|
| 474 | + DSTP should be aligned for memory operations on `op_t's, but SRCP must
|
---|
| 475 | + *not* be aligned. */
|
---|
| 476 | +
|
---|
| 477 | +void
|
---|
| 478 | +_wordcopy_fwd_dest_aligned (dstp, srcp, len)
|
---|
| 479 | + long int dstp;
|
---|
| 480 | + long int srcp;
|
---|
| 481 | + size_t len;
|
---|
| 482 | +{
|
---|
| 483 | + op_t a0, a1, a2, a3;
|
---|
| 484 | + int sh_1, sh_2;
|
---|
| 485 | +
|
---|
| 486 | + /* Calculate how to shift a word read at the memory operation
|
---|
| 487 | + aligned srcp to make it aligned for copy. */
|
---|
| 488 | +
|
---|
| 489 | + sh_1 = 8 * (srcp % OPSIZ);
|
---|
| 490 | + sh_2 = 8 * OPSIZ - sh_1;
|
---|
| 491 | +
|
---|
| 492 | + /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
|
---|
| 493 | + it points in the middle of. */
|
---|
| 494 | + srcp &= -OPSIZ;
|
---|
| 495 | +
|
---|
| 496 | + switch (len % 4)
|
---|
| 497 | + {
|
---|
| 498 | + case 2:
|
---|
| 499 | + a1 = ((op_t *) srcp)[0];
|
---|
| 500 | + a2 = ((op_t *) srcp)[1];
|
---|
| 501 | + srcp -= 1 * OPSIZ;
|
---|
| 502 | + dstp -= 3 * OPSIZ;
|
---|
| 503 | + len += 2;
|
---|
| 504 | + goto do1;
|
---|
| 505 | + case 3:
|
---|
| 506 | + a0 = ((op_t *) srcp)[0];
|
---|
| 507 | + a1 = ((op_t *) srcp)[1];
|
---|
| 508 | + srcp -= 0 * OPSIZ;
|
---|
| 509 | + dstp -= 2 * OPSIZ;
|
---|
| 510 | + len += 1;
|
---|
| 511 | + goto do2;
|
---|
| 512 | + case 0:
|
---|
| 513 | + if (OP_T_THRES <= 3 * OPSIZ && len == 0)
|
---|
| 514 | + return;
|
---|
| 515 | + a3 = ((op_t *) srcp)[0];
|
---|
| 516 | + a0 = ((op_t *) srcp)[1];
|
---|
| 517 | + srcp -=-1 * OPSIZ;
|
---|
| 518 | + dstp -= 1 * OPSIZ;
|
---|
| 519 | + len += 0;
|
---|
| 520 | + goto do3;
|
---|
| 521 | + case 1:
|
---|
| 522 | + a2 = ((op_t *) srcp)[0];
|
---|
| 523 | + a3 = ((op_t *) srcp)[1];
|
---|
| 524 | + srcp -=-2 * OPSIZ;
|
---|
| 525 | + dstp -= 0 * OPSIZ;
|
---|
| 526 | + len -= 1;
|
---|
| 527 | + if (OP_T_THRES <= 3 * OPSIZ && len == 0)
|
---|
| 528 | + goto do0;
|
---|
| 529 | + goto do4; /* No-op. */
|
---|
| 530 | + }
|
---|
| 531 | +
|
---|
| 532 | + do
|
---|
| 533 | + {
|
---|
| 534 | + do4:
|
---|
| 535 | + a0 = ((op_t *) srcp)[0];
|
---|
| 536 | + ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
|
---|
| 537 | + do3:
|
---|
| 538 | + a1 = ((op_t *) srcp)[1];
|
---|
| 539 | + ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
|
---|
| 540 | + do2:
|
---|
| 541 | + a2 = ((op_t *) srcp)[2];
|
---|
| 542 | + ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
|
---|
| 543 | + do1:
|
---|
| 544 | + a3 = ((op_t *) srcp)[3];
|
---|
| 545 | + ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
|
---|
| 546 | +
|
---|
| 547 | + srcp += 4 * OPSIZ;
|
---|
| 548 | + dstp += 4 * OPSIZ;
|
---|
| 549 | + len -= 4;
|
---|
| 550 | + }
|
---|
| 551 | + while (len != 0);
|
---|
| 552 | +
|
---|
| 553 | + /* This is the right position for do0. Please don't move
|
---|
| 554 | + it into the loop. */
|
---|
| 555 | + do0:
|
---|
| 556 | + ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
|
---|
| 557 | +}
|
---|
| 558 | +
|
---|
| 559 | +/* _wordcopy_bwd_aligned -- Copy block finishing right before
|
---|
| 560 | + SRCP to block finishing right before DSTP with LEN `op_t' words
|
---|
| 561 | + (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
|
---|
| 562 | + operations on `op_t's. */
|
---|
| 563 | +
|
---|
| 564 | +void
|
---|
| 565 | +_wordcopy_bwd_aligned (dstp, srcp, len)
|
---|
| 566 | + long int dstp;
|
---|
| 567 | + long int srcp;
|
---|
| 568 | + size_t len;
|
---|
| 569 | +{
|
---|
| 570 | + op_t a0, a1;
|
---|
| 571 | +
|
---|
| 572 | + switch (len % 8)
|
---|
| 573 | + {
|
---|
| 574 | + case 2:
|
---|
| 575 | + srcp -= 2 * OPSIZ;
|
---|
| 576 | + dstp -= 1 * OPSIZ;
|
---|
| 577 | + a0 = ((op_t *) srcp)[1];
|
---|
| 578 | + len += 6;
|
---|
| 579 | + goto do1;
|
---|
| 580 | + case 3:
|
---|
| 581 | + srcp -= 3 * OPSIZ;
|
---|
| 582 | + dstp -= 2 * OPSIZ;
|
---|
| 583 | + a1 = ((op_t *) srcp)[2];
|
---|
| 584 | + len += 5;
|
---|
| 585 | + goto do2;
|
---|
| 586 | + case 4:
|
---|
| 587 | + srcp -= 4 * OPSIZ;
|
---|
| 588 | + dstp -= 3 * OPSIZ;
|
---|
| 589 | + a0 = ((op_t *) srcp)[3];
|
---|
| 590 | + len += 4;
|
---|
| 591 | + goto do3;
|
---|
| 592 | + case 5:
|
---|
| 593 | + srcp -= 5 * OPSIZ;
|
---|
| 594 | + dstp -= 4 * OPSIZ;
|
---|
| 595 | + a1 = ((op_t *) srcp)[4];
|
---|
| 596 | + len += 3;
|
---|
| 597 | + goto do4;
|
---|
| 598 | + case 6:
|
---|
| 599 | + srcp -= 6 * OPSIZ;
|
---|
| 600 | + dstp -= 5 * OPSIZ;
|
---|
| 601 | + a0 = ((op_t *) srcp)[5];
|
---|
| 602 | + len += 2;
|
---|
| 603 | + goto do5;
|
---|
| 604 | + case 7:
|
---|
| 605 | + srcp -= 7 * OPSIZ;
|
---|
| 606 | + dstp -= 6 * OPSIZ;
|
---|
| 607 | + a1 = ((op_t *) srcp)[6];
|
---|
| 608 | + len += 1;
|
---|
| 609 | + goto do6;
|
---|
| 610 | +
|
---|
| 611 | + case 0:
|
---|
| 612 | + if (OP_T_THRES <= 3 * OPSIZ && len == 0)
|
---|
| 613 | + return;
|
---|
| 614 | + srcp -= 8 * OPSIZ;
|
---|
| 615 | + dstp -= 7 * OPSIZ;
|
---|
| 616 | + a0 = ((op_t *) srcp)[7];
|
---|
| 617 | + goto do7;
|
---|
| 618 | + case 1:
|
---|
| 619 | + srcp -= 9 * OPSIZ;
|
---|
| 620 | + dstp -= 8 * OPSIZ;
|
---|
| 621 | + a1 = ((op_t *) srcp)[8];
|
---|
| 622 | + len -= 1;
|
---|
| 623 | + if (OP_T_THRES <= 3 * OPSIZ && len == 0)
|
---|
| 624 | + goto do0;
|
---|
| 625 | + goto do8; /* No-op. */
|
---|
| 626 | + }
|
---|
| 627 | +
|
---|
| 628 | + do
|
---|
| 629 | + {
|
---|
| 630 | + do8:
|
---|
| 631 | + a0 = ((op_t *) srcp)[7];
|
---|
| 632 | + ((op_t *) dstp)[7] = a1;
|
---|
| 633 | + do7:
|
---|
| 634 | + a1 = ((op_t *) srcp)[6];
|
---|
| 635 | + ((op_t *) dstp)[6] = a0;
|
---|
| 636 | + do6:
|
---|
| 637 | + a0 = ((op_t *) srcp)[5];
|
---|
| 638 | + ((op_t *) dstp)[5] = a1;
|
---|
| 639 | + do5:
|
---|
| 640 | + a1 = ((op_t *) srcp)[4];
|
---|
| 641 | + ((op_t *) dstp)[4] = a0;
|
---|
| 642 | + do4:
|
---|
| 643 | + a0 = ((op_t *) srcp)[3];
|
---|
| 644 | + ((op_t *) dstp)[3] = a1;
|
---|
| 645 | + do3:
|
---|
| 646 | + a1 = ((op_t *) srcp)[2];
|
---|
| 647 | + ((op_t *) dstp)[2] = a0;
|
---|
| 648 | + do2:
|
---|
| 649 | + a0 = ((op_t *) srcp)[1];
|
---|
| 650 | + ((op_t *) dstp)[1] = a1;
|
---|
| 651 | + do1:
|
---|
| 652 | + a1 = ((op_t *) srcp)[0];
|
---|
| 653 | + ((op_t *) dstp)[0] = a0;
|
---|
| 654 | +
|
---|
| 655 | + srcp -= 8 * OPSIZ;
|
---|
| 656 | + dstp -= 8 * OPSIZ;
|
---|
| 657 | + len -= 8;
|
---|
| 658 | + }
|
---|
| 659 | + while (len != 0);
|
---|
| 660 | +
|
---|
| 661 | + /* This is the right position for do0. Please don't move
|
---|
| 662 | + it into the loop. */
|
---|
| 663 | + do0:
|
---|
| 664 | + ((op_t *) dstp)[7] = a1;
|
---|
| 665 | +}
|
---|
| 666 | +
|
---|
| 667 | +/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
|
---|
| 668 | + before SRCP to block finishing right before DSTP with LEN `op_t'
|
---|
| 669 | + words (not LEN bytes!). DSTP should be aligned for memory
|
---|
| 670 | + operations on `op_t', but SRCP must *not* be aligned. */
|
---|
| 671 | +
|
---|
| 672 | +void
|
---|
| 673 | +_wordcopy_bwd_dest_aligned (dstp, srcp, len)
|
---|
| 674 | + long int dstp;
|
---|
| 675 | + long int srcp;
|
---|
| 676 | + size_t len;
|
---|
| 677 | +{
|
---|
| 678 | + op_t a0, a1, a2, a3;
|
---|
| 679 | + int sh_1, sh_2;
|
---|
| 680 | +
|
---|
| 681 | + /* Calculate how to shift a word read at the memory operation
|
---|
| 682 | + aligned srcp to make it aligned for copy. */
|
---|
| 683 | +
|
---|
| 684 | + sh_1 = 8 * (srcp % OPSIZ);
|
---|
| 685 | + sh_2 = 8 * OPSIZ - sh_1;
|
---|
| 686 | +
|
---|
| 687 | + /* Make srcp aligned by rounding it down to the beginning of the op_t
|
---|
| 688 | + it points in the middle of. */
|
---|
| 689 | + srcp &= -OPSIZ;
|
---|
| 690 | + srcp += OPSIZ;
|
---|
| 691 | +
|
---|
| 692 | + switch (len % 4)
|
---|
| 693 | + {
|
---|
| 694 | + case 2:
|
---|
| 695 | + srcp -= 3 * OPSIZ;
|
---|
| 696 | + dstp -= 1 * OPSIZ;
|
---|
| 697 | + a2 = ((op_t *) srcp)[2];
|
---|
| 698 | + a1 = ((op_t *) srcp)[1];
|
---|
| 699 | + len += 2;
|
---|
| 700 | + goto do1;
|
---|
| 701 | + case 3:
|
---|
| 702 | + srcp -= 4 * OPSIZ;
|
---|
| 703 | + dstp -= 2 * OPSIZ;
|
---|
| 704 | + a3 = ((op_t *) srcp)[3];
|
---|
| 705 | + a2 = ((op_t *) srcp)[2];
|
---|
| 706 | + len += 1;
|
---|
| 707 | + goto do2;
|
---|
| 708 | + case 0:
|
---|
| 709 | + if (OP_T_THRES <= 3 * OPSIZ && len == 0)
|
---|
| 710 | + return;
|
---|
| 711 | + srcp -= 5 * OPSIZ;
|
---|
| 712 | + dstp -= 3 * OPSIZ;
|
---|
| 713 | + a0 = ((op_t *) srcp)[4];
|
---|
| 714 | + a3 = ((op_t *) srcp)[3];
|
---|
| 715 | + goto do3;
|
---|
| 716 | + case 1:
|
---|
| 717 | + srcp -= 6 * OPSIZ;
|
---|
| 718 | + dstp -= 4 * OPSIZ;
|
---|
| 719 | + a1 = ((op_t *) srcp)[5];
|
---|
| 720 | + a0 = ((op_t *) srcp)[4];
|
---|
| 721 | + len -= 1;
|
---|
| 722 | + if (OP_T_THRES <= 3 * OPSIZ && len == 0)
|
---|
| 723 | + goto do0;
|
---|
| 724 | + goto do4; /* No-op. */
|
---|
| 725 | + }
|
---|
| 726 | +
|
---|
| 727 | + do
|
---|
| 728 | + {
|
---|
| 729 | + do4:
|
---|
| 730 | + a3 = ((op_t *) srcp)[3];
|
---|
| 731 | + ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
|
---|
| 732 | + do3:
|
---|
| 733 | + a2 = ((op_t *) srcp)[2];
|
---|
| 734 | + ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
|
---|
| 735 | + do2:
|
---|
| 736 | + a1 = ((op_t *) srcp)[1];
|
---|
| 737 | + ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
|
---|
| 738 | + do1:
|
---|
| 739 | + a0 = ((op_t *) srcp)[0];
|
---|
| 740 | + ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
|
---|
| 741 | +
|
---|
| 742 | + srcp -= 4 * OPSIZ;
|
---|
| 743 | + dstp -= 4 * OPSIZ;
|
---|
| 744 | + len -= 4;
|
---|
| 745 | + }
|
---|
| 746 | + while (len != 0);
|
---|
| 747 | +
|
---|
| 748 | + /* This is the right position for do0. Please don't move
|
---|
| 749 | + it into the loop. */
|
---|
| 750 | + do0:
|
---|
| 751 | + ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
|
---|
| 752 | +}
|
---|