source: patches/inetutils-1.5-memcpy_sparc64-1.patch@ 124823a

Last change on this file since 124823a was c90a839, checked in by Joe Ciccone <jciccone@…>, 16 years ago

Applied r3989 from trunk to the 1.1 branch.

  • Property mode set to 100644
File size: 22.7 KB
RevLine 
[c90a839]1Submitted By: William Harrington <wwh04660 at ucmo dot edu>
2Date: 2008-08-24
3Initial Package Version: 1.5
4Upstream Status: Rejected
5Description: Use a local memcpy instead of the glibc implementation for ping
6 This resolves the bus errors
7
8diff -Naur inetutils-1.5.orig/ping/Makefile.in inetutils-1.5/ping/Makefile.in
9--- inetutils-1.5.orig/ping/Makefile.in 2006-10-21 11:59:48.000000000 +0000
10+++ inetutils-1.5/ping/Makefile.in 2007-06-28 23:10:43.000000000 +0000
11@@ -109,7 +109,8 @@
12 PROGRAMS = $(bin_PROGRAMS)
13 am_ping_OBJECTS = ping.$(OBJEXT) ping_common.$(OBJEXT) \
14 ping_echo.$(OBJEXT) ping_address.$(OBJEXT) \
15- ping_router.$(OBJEXT) ping_timestamp.$(OBJEXT)
16+ ping_router.$(OBJEXT) ping_timestamp.$(OBJEXT) \
17+ wordcopy.$(OBJEXT)
18 ping_OBJECTS = $(am_ping_OBJECTS)
19 ping_DEPENDENCIES =
20 am_ping6_OBJECTS = ping6.$(OBJEXT) ping_common.$(OBJEXT)
21@@ -333,8 +334,9 @@
22 ping_LDADD = -L../libinetutils -linetutils -L../libicmp -licmp -L../lib -lgnu
23 ping6_LDADD = -L../lib -lgnu -L../libinetutils -linetutils
24 INCLUDES = -I$(top_srcdir)/lib -I../lib -I$(top_srcdir)/libicmp
25-ping_SOURCES = ping.c ping_common.c ping_echo.c ping_address.c \
26- ping_router.c ping_timestamp.c ping_common.h ping_impl.h
27+ping_SOURCES = memcopy.h pagecopy.h ping.c ping_common.c \
28+ ping_echo.c ping_address.c ping_router.c ping_timestamp.c \
29+ ping_common.h ping_impl.h wordcopy.c
30
31 ping6_SOURCES = ping6.c ping_common.c ping_common.h ping6.h
32 SUIDMODE = -o root -m 4775
33diff -Naur inetutils-1.5.orig/ping/memcopy.h inetutils-1.5/ping/memcopy.h
34--- inetutils-1.5.orig/ping/memcopy.h 1970-01-01 00:00:00.000000000 +0000
35+++ inetutils-1.5/ping/memcopy.h 2007-06-28 23:08:34.000000000 +0000
36@@ -0,0 +1,150 @@
37+/* memcopy.h -- definitions for memory copy functions. Generic C version.
38+ Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc.
39+ This file is part of the GNU C Library.
40+ Contributed by Torbjorn Granlund (tege@sics.se).
41+
42+ The GNU C Library is free software; you can redistribute it and/or
43+ modify it under the terms of the GNU Lesser General Public
44+ License as published by the Free Software Foundation; either
45+ version 2.1 of the License, or (at your option) any later version.
46+
47+ The GNU C Library is distributed in the hope that it will be useful,
48+ but WITHOUT ANY WARRANTY; without even the implied warranty of
49+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
50+ Lesser General Public License for more details.
51+
52+ You should have received a copy of the GNU Lesser General Public
53+ License along with the GNU C Library; if not, write to the Free
54+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
55+ 02111-1307 USA. */
56+
57+/* The strategy of the memory functions is:
58+
59+ 1. Copy bytes until the destination pointer is aligned.
60+
61+ 2. Copy words in unrolled loops. If the source and destination
62+ are not aligned in the same way, use word memory operations,
63+ but shift and merge two read words before writing.
64+
65+ 3. Copy the few remaining bytes.
66+
67+ This is fast on processors that have at least 10 registers for
68+ allocation by GCC, and that can access memory at reg+const in one
69+ instruction.
70+
71+ I made an "exhaustive" test of this memmove when I wrote it,
72+ exhaustive in the sense that I tried all alignment and length
73+ combinations, with and without overlap. */
74+
75+#include <sys/cdefs.h>
76+#include <endian.h>
77+
78+/* The macros defined in this file are:
79+
80+ BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy)
81+
82+ BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy)
83+
84+ WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy)
85+
86+ WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy)
87+
88+ MERGE(old_word, sh_1, new_word, sh_2)
89+ [I fail to understand. I feel stupid. --roland]
90+*/
91+
92+/* Type to use for aligned memory operations.
93+ This should normally be the biggest type supported by a single load
94+ and store. */
95+#define op_t unsigned long int
96+#define OPSIZ (sizeof(op_t))
97+
98+/* Type to use for unaligned operations. */
99+typedef unsigned char byte;
100+
101+/* Optimal type for storing bytes in registers. */
102+#define reg_char char
103+
104+#if __BYTE_ORDER == __LITTLE_ENDIAN
105+#define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
106+#endif
107+#if __BYTE_ORDER == __BIG_ENDIAN
108+#define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
109+#endif
110+
111+/* Copy exactly NBYTES bytes from SRC_BP to DST_BP,
112+ without any assumptions about alignment of the pointers. */
113+#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
114+ do \
115+ { \
116+ size_t __nbytes = (nbytes); \
117+ while (__nbytes > 0) \
118+ { \
119+ byte __x = ((byte *) src_bp)[0]; \
120+ src_bp += 1; \
121+ __nbytes -= 1; \
122+ ((byte *) dst_bp)[0] = __x; \
123+ dst_bp += 1; \
124+ } \
125+ } while (0)
126+
127+/* Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
128+ beginning at the bytes right before the pointers and continuing towards
129+ smaller addresses. Don't assume anything about alignment of the
130+ pointers. */
131+#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \
132+ do \
133+ { \
134+ size_t __nbytes = (nbytes); \
135+ while (__nbytes > 0) \
136+ { \
137+ byte __x; \
138+ src_ep -= 1; \
139+ __x = ((byte *) src_ep)[0]; \
140+ dst_ep -= 1; \
141+ __nbytes -= 1; \
142+ ((byte *) dst_ep)[0] = __x; \
143+ } \
144+ } while (0)
145+
146+/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with
147+ the assumption that DST_BP is aligned on an OPSIZ multiple. If
148+ not all bytes could be easily copied, store remaining number of bytes
149+ in NBYTES_LEFT, otherwise store 0. */
150+extern void _wordcopy_fwd_aligned (long int, long int, size_t) __THROW;
151+extern void _wordcopy_fwd_dest_aligned (long int, long int, size_t) __THROW;
152+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
153+ do \
154+ { \
155+ if (src_bp % OPSIZ == 0) \
156+ _wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
157+ else \
158+ _wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
159+ src_bp += (nbytes) & -OPSIZ; \
160+ dst_bp += (nbytes) & -OPSIZ; \
161+ (nbytes_left) = (nbytes) % OPSIZ; \
162+ } while (0)
163+
164+/* Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
165+ beginning at the words (of type op_t) right before the pointers and
166+ continuing towards smaller addresses. May take advantage of that
167+ DST_END_PTR is aligned on an OPSIZ multiple. If not all bytes could be
168+ easily copied, store remaining number of bytes in NBYTES_REMAINING,
169+ otherwise store 0. */
170+extern void _wordcopy_bwd_aligned (long int, long int, size_t) __THROW;
171+extern void _wordcopy_bwd_dest_aligned (long int, long int, size_t) __THROW;
172+#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \
173+ do \
174+ { \
175+ if (src_ep % OPSIZ == 0) \
176+ _wordcopy_bwd_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \
177+ else \
178+ _wordcopy_bwd_dest_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \
179+ src_ep -= (nbytes) & -OPSIZ; \
180+ dst_ep -= (nbytes) & -OPSIZ; \
181+ (nbytes_left) = (nbytes) % OPSIZ; \
182+ } while (0)
183+
184+
185+/* Threshold value for when to enter the unrolled loops. */
186+#define OP_T_THRES 16
187diff -Naur inetutils-1.5.orig/ping/pagecopy.h inetutils-1.5/ping/pagecopy.h
188--- inetutils-1.5.orig/ping/pagecopy.h 1970-01-01 00:00:00.000000000 +0000
189+++ inetutils-1.5/ping/pagecopy.h 2007-06-28 23:08:34.000000000 +0000
190@@ -0,0 +1,75 @@
191+/* Macros for copying by pages; used in memcpy, memmove. Generic macros.
192+ Copyright (C) 1995, 1997 Free Software Foundation, Inc.
193+ This file is part of the GNU C Library.
194+
195+ The GNU C Library is free software; you can redistribute it and/or
196+ modify it under the terms of the GNU Lesser General Public
197+ License as published by the Free Software Foundation; either
198+ version 2.1 of the License, or (at your option) any later version.
199+
200+ The GNU C Library is distributed in the hope that it will be useful,
201+ but WITHOUT ANY WARRANTY; without even the implied warranty of
202+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
203+ Lesser General Public License for more details.
204+
205+ You should have received a copy of the GNU Lesser General Public
206+ License along with the GNU C Library; if not, write to the Free
207+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
208+ 02111-1307 USA. */
209+
210+/* This file defines the macro:
211+
212+ PAGE_COPY_FWD_MAYBE (dstp, srcp, nbytes_left, nbytes)
213+
214+ which is invoked like WORD_COPY_FWD et al. The pointers should be at
215+ least word aligned. This will check if virtual copying by pages can and
216+ should be done and do it if so.
217+
218+ System-specific pagecopy.h files should define these macros and then
219+ #include this file:
220+
221+ PAGE_COPY_THRESHOLD
222+ -- Minimum size for which virtual copying by pages is worthwhile.
223+
224+ PAGE_SIZE
225+ -- Size of a page.
226+
227+ PAGE_COPY_FWD (dstp, srcp, nbytes_left, nbytes)
228+ -- Macro to perform the virtual copy operation.
229+ The pointers will be aligned to PAGE_SIZE bytes.
230+*/
231+
232+
233+#if PAGE_COPY_THRESHOLD
234+
235+#include <assert.h>
236+
237+#define PAGE_COPY_FWD_MAYBE(dstp, srcp, nbytes_left, nbytes) \
238+ do \
239+ { \
240+ if ((nbytes) >= PAGE_COPY_THRESHOLD && \
241+ PAGE_OFFSET ((dstp) - (srcp)) == 0) \
242+ { \
243+ /* The amount to copy is past the threshold for copying \
244+ pages virtually with kernel VM operations, and the \
245+ source and destination addresses have the same alignment. */ \
246+ size_t nbytes_before = PAGE_OFFSET (-(dstp)); \
247+ if (nbytes_before != 0) \
248+ { \
249+ /* First copy the words before the first page boundary. */ \
250+ WORD_COPY_FWD (dstp, srcp, nbytes_left, nbytes_before); \
251+ assert (nbytes_left == 0); \
252+ nbytes -= nbytes_before; \
253+ } \
254+ PAGE_COPY_FWD (dstp, srcp, nbytes_left, nbytes); \
255+ } \
256+ } while (0)
257+
258+/* The page size is always a power of two, so we can avoid modulo division. */
259+#define PAGE_OFFSET(n) ((n) & (PAGE_SIZE - 1))
260+
261+#else
262+
263+#define PAGE_COPY_FWD_MAYBE(dstp, srcp, nbytes_left, nbytes) /* nada */
264+
265+#endif
266diff -Naur inetutils-1.5.orig/ping/ping_echo.c inetutils-1.5/ping/ping_echo.c
267--- inetutils-1.5.orig/ping/ping_echo.c 2006-10-11 21:46:25.000000000 +0000
268+++ inetutils-1.5/ping/ping_echo.c 2007-06-28 23:08:34.000000000 +0000
269@@ -57,6 +57,11 @@
270
271 #include <xalloc.h>
272
273+#ifndef memcpy
274+#include "memcopy.h"
275+#include "pagecopy.h"
276+#endif
277+
278 static int handler (int code, void *closure,
279 struct sockaddr_in *dest, struct sockaddr_in *from,
280 struct ip *ip, icmphdr_t *icmp, int datalen);
281@@ -68,7 +73,6 @@
282 void print_icmp_header (struct sockaddr_in *from,
283 struct ip *ip, icmphdr_t *icmp, int len);
284 static void print_ip_opt (struct ip *ip, int hlen);
285-
286 int
287 ping_echo (int argc, char **argv)
288 {
289@@ -169,6 +173,46 @@
290 timing++;
291 tp = (struct timeval *) icmp->icmp_data;
292
293+#define OP_T_THRES 16
294+#undef memcpy
295+void *
296+ memcpy (dstpp, srcpp, len)
297+ void *dstpp;
298+const void *srcpp;
299+size_t len;
300+{
301+ unsigned long int dstp = (long int) dstpp;
302+ unsigned long int srcp = (long int) srcpp;
303+
304+ /* Copy from the beginning to the end. */
305+
306+ /* If there not too few bytes to copy, use word copy. */
307+ if (len >= OP_T_THRES)
308+ {
309+ /* Copy just a few bytes to make DSTP aligned. */
310+ len -= (-dstp) % OPSIZ;
311+ BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);
312+
313+ /* Copy whole pages from SRCP to DSTP by virtual address
314+ manipulation, as much as possible. */
315+
316+ PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);
317+
318+ /* Copy from SRCP to DSTP taking advantage of the known alignment
319+ of DSTP. Number of bytes remaining is put in the third argument,
320+ i.e. in LEN. This number may vary from machine to machine. */
321+
322+ WORD_COPY_FWD (dstp, srcp, len, len);
323+
324+ /* Fall out and copy the tail. */
325+ }
326+
327+ /* There are just a few bytes to copy. Use byte memory operations. */
328+ BYTE_COPY_FWD (dstp, srcp, len);
329+
330+ return dstpp;
331+}
332+
333 /* Avoid unaligned data: */
334 memcpy (&tv1, tp, sizeof (tv1));
335 tvsub (&tv, &tv1);
336diff -Naur inetutils-1.5.orig/ping/wordcopy.c inetutils-1.5/ping/wordcopy.c
337--- inetutils-1.5.orig/ping/wordcopy.c 1970-01-01 00:00:00.000000000 +0000
338+++ inetutils-1.5/ping/wordcopy.c 2007-06-28 23:00:04.000000000 +0000
339@@ -0,0 +1,413 @@
340+/* _memcopy.c -- subroutines for memory copy functions.
341+ Copyright (C) 1991, 1996 Free Software Foundation, Inc.
342+ This file is part of the GNU C Library.
343+ Contributed by Torbjorn Granlund (tege@sics.se).
344+
345+ The GNU C Library is free software; you can redistribute it and/or
346+ modify it under the terms of the GNU Lesser General Public
347+ License as published by the Free Software Foundation; either
348+ version 2.1 of the License, or (at your option) any later version.
349+
350+ The GNU C Library is distributed in the hope that it will be useful,
351+ but WITHOUT ANY WARRANTY; without even the implied warranty of
352+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
353+ Lesser General Public License for more details.
354+
355+ You should have received a copy of the GNU Lesser General Public
356+ License along with the GNU C Library; if not, write to the Free
357+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
358+ 02111-1307 USA. */
359+
360+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
361+
362+#include <stddef.h>
363+#include <memcopy.h>
364+
365+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
366+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
367+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
368+
369+void
370+_wordcopy_fwd_aligned (dstp, srcp, len)
371+ long int dstp;
372+ long int srcp;
373+ size_t len;
374+{
375+ op_t a0, a1;
376+
377+ switch (len % 8)
378+ {
379+ case 2:
380+ a0 = ((op_t *) srcp)[0];
381+ srcp -= 6 * OPSIZ;
382+ dstp -= 7 * OPSIZ;
383+ len += 6;
384+ goto do1;
385+ case 3:
386+ a1 = ((op_t *) srcp)[0];
387+ srcp -= 5 * OPSIZ;
388+ dstp -= 6 * OPSIZ;
389+ len += 5;
390+ goto do2;
391+ case 4:
392+ a0 = ((op_t *) srcp)[0];
393+ srcp -= 4 * OPSIZ;
394+ dstp -= 5 * OPSIZ;
395+ len += 4;
396+ goto do3;
397+ case 5:
398+ a1 = ((op_t *) srcp)[0];
399+ srcp -= 3 * OPSIZ;
400+ dstp -= 4 * OPSIZ;
401+ len += 3;
402+ goto do4;
403+ case 6:
404+ a0 = ((op_t *) srcp)[0];
405+ srcp -= 2 * OPSIZ;
406+ dstp -= 3 * OPSIZ;
407+ len += 2;
408+ goto do5;
409+ case 7:
410+ a1 = ((op_t *) srcp)[0];
411+ srcp -= 1 * OPSIZ;
412+ dstp -= 2 * OPSIZ;
413+ len += 1;
414+ goto do6;
415+
416+ case 0:
417+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
418+ return;
419+ a0 = ((op_t *) srcp)[0];
420+ srcp -= 0 * OPSIZ;
421+ dstp -= 1 * OPSIZ;
422+ goto do7;
423+ case 1:
424+ a1 = ((op_t *) srcp)[0];
425+ srcp -=-1 * OPSIZ;
426+ dstp -= 0 * OPSIZ;
427+ len -= 1;
428+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
429+ goto do0;
430+ goto do8; /* No-op. */
431+ }
432+
433+ do
434+ {
435+ do8:
436+ a0 = ((op_t *) srcp)[0];
437+ ((op_t *) dstp)[0] = a1;
438+ do7:
439+ a1 = ((op_t *) srcp)[1];
440+ ((op_t *) dstp)[1] = a0;
441+ do6:
442+ a0 = ((op_t *) srcp)[2];
443+ ((op_t *) dstp)[2] = a1;
444+ do5:
445+ a1 = ((op_t *) srcp)[3];
446+ ((op_t *) dstp)[3] = a0;
447+ do4:
448+ a0 = ((op_t *) srcp)[4];
449+ ((op_t *) dstp)[4] = a1;
450+ do3:
451+ a1 = ((op_t *) srcp)[5];
452+ ((op_t *) dstp)[5] = a0;
453+ do2:
454+ a0 = ((op_t *) srcp)[6];
455+ ((op_t *) dstp)[6] = a1;
456+ do1:
457+ a1 = ((op_t *) srcp)[7];
458+ ((op_t *) dstp)[7] = a0;
459+
460+ srcp += 8 * OPSIZ;
461+ dstp += 8 * OPSIZ;
462+ len -= 8;
463+ }
464+ while (len != 0);
465+
466+ /* This is the right position for do0. Please don't move
467+ it into the loop. */
468+ do0:
469+ ((op_t *) dstp)[0] = a1;
470+}
471+
472+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
473+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
474+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
475+ *not* be aligned. */
476+
477+void
478+_wordcopy_fwd_dest_aligned (dstp, srcp, len)
479+ long int dstp;
480+ long int srcp;
481+ size_t len;
482+{
483+ op_t a0, a1, a2, a3;
484+ int sh_1, sh_2;
485+
486+ /* Calculate how to shift a word read at the memory operation
487+ aligned srcp to make it aligned for copy. */
488+
489+ sh_1 = 8 * (srcp % OPSIZ);
490+ sh_2 = 8 * OPSIZ - sh_1;
491+
492+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
493+ it points in the middle of. */
494+ srcp &= -OPSIZ;
495+
496+ switch (len % 4)
497+ {
498+ case 2:
499+ a1 = ((op_t *) srcp)[0];
500+ a2 = ((op_t *) srcp)[1];
501+ srcp -= 1 * OPSIZ;
502+ dstp -= 3 * OPSIZ;
503+ len += 2;
504+ goto do1;
505+ case 3:
506+ a0 = ((op_t *) srcp)[0];
507+ a1 = ((op_t *) srcp)[1];
508+ srcp -= 0 * OPSIZ;
509+ dstp -= 2 * OPSIZ;
510+ len += 1;
511+ goto do2;
512+ case 0:
513+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
514+ return;
515+ a3 = ((op_t *) srcp)[0];
516+ a0 = ((op_t *) srcp)[1];
517+ srcp -=-1 * OPSIZ;
518+ dstp -= 1 * OPSIZ;
519+ len += 0;
520+ goto do3;
521+ case 1:
522+ a2 = ((op_t *) srcp)[0];
523+ a3 = ((op_t *) srcp)[1];
524+ srcp -=-2 * OPSIZ;
525+ dstp -= 0 * OPSIZ;
526+ len -= 1;
527+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
528+ goto do0;
529+ goto do4; /* No-op. */
530+ }
531+
532+ do
533+ {
534+ do4:
535+ a0 = ((op_t *) srcp)[0];
536+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
537+ do3:
538+ a1 = ((op_t *) srcp)[1];
539+ ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
540+ do2:
541+ a2 = ((op_t *) srcp)[2];
542+ ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
543+ do1:
544+ a3 = ((op_t *) srcp)[3];
545+ ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
546+
547+ srcp += 4 * OPSIZ;
548+ dstp += 4 * OPSIZ;
549+ len -= 4;
550+ }
551+ while (len != 0);
552+
553+ /* This is the right position for do0. Please don't move
554+ it into the loop. */
555+ do0:
556+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
557+}
558+
559+/* _wordcopy_bwd_aligned -- Copy block finishing right before
560+ SRCP to block finishing right before DSTP with LEN `op_t' words
561+ (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
562+ operations on `op_t's. */
563+
564+void
565+_wordcopy_bwd_aligned (dstp, srcp, len)
566+ long int dstp;
567+ long int srcp;
568+ size_t len;
569+{
570+ op_t a0, a1;
571+
572+ switch (len % 8)
573+ {
574+ case 2:
575+ srcp -= 2 * OPSIZ;
576+ dstp -= 1 * OPSIZ;
577+ a0 = ((op_t *) srcp)[1];
578+ len += 6;
579+ goto do1;
580+ case 3:
581+ srcp -= 3 * OPSIZ;
582+ dstp -= 2 * OPSIZ;
583+ a1 = ((op_t *) srcp)[2];
584+ len += 5;
585+ goto do2;
586+ case 4:
587+ srcp -= 4 * OPSIZ;
588+ dstp -= 3 * OPSIZ;
589+ a0 = ((op_t *) srcp)[3];
590+ len += 4;
591+ goto do3;
592+ case 5:
593+ srcp -= 5 * OPSIZ;
594+ dstp -= 4 * OPSIZ;
595+ a1 = ((op_t *) srcp)[4];
596+ len += 3;
597+ goto do4;
598+ case 6:
599+ srcp -= 6 * OPSIZ;
600+ dstp -= 5 * OPSIZ;
601+ a0 = ((op_t *) srcp)[5];
602+ len += 2;
603+ goto do5;
604+ case 7:
605+ srcp -= 7 * OPSIZ;
606+ dstp -= 6 * OPSIZ;
607+ a1 = ((op_t *) srcp)[6];
608+ len += 1;
609+ goto do6;
610+
611+ case 0:
612+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
613+ return;
614+ srcp -= 8 * OPSIZ;
615+ dstp -= 7 * OPSIZ;
616+ a0 = ((op_t *) srcp)[7];
617+ goto do7;
618+ case 1:
619+ srcp -= 9 * OPSIZ;
620+ dstp -= 8 * OPSIZ;
621+ a1 = ((op_t *) srcp)[8];
622+ len -= 1;
623+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
624+ goto do0;
625+ goto do8; /* No-op. */
626+ }
627+
628+ do
629+ {
630+ do8:
631+ a0 = ((op_t *) srcp)[7];
632+ ((op_t *) dstp)[7] = a1;
633+ do7:
634+ a1 = ((op_t *) srcp)[6];
635+ ((op_t *) dstp)[6] = a0;
636+ do6:
637+ a0 = ((op_t *) srcp)[5];
638+ ((op_t *) dstp)[5] = a1;
639+ do5:
640+ a1 = ((op_t *) srcp)[4];
641+ ((op_t *) dstp)[4] = a0;
642+ do4:
643+ a0 = ((op_t *) srcp)[3];
644+ ((op_t *) dstp)[3] = a1;
645+ do3:
646+ a1 = ((op_t *) srcp)[2];
647+ ((op_t *) dstp)[2] = a0;
648+ do2:
649+ a0 = ((op_t *) srcp)[1];
650+ ((op_t *) dstp)[1] = a1;
651+ do1:
652+ a1 = ((op_t *) srcp)[0];
653+ ((op_t *) dstp)[0] = a0;
654+
655+ srcp -= 8 * OPSIZ;
656+ dstp -= 8 * OPSIZ;
657+ len -= 8;
658+ }
659+ while (len != 0);
660+
661+ /* This is the right position for do0. Please don't move
662+ it into the loop. */
663+ do0:
664+ ((op_t *) dstp)[7] = a1;
665+}
666+
667+/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
668+ before SRCP to block finishing right before DSTP with LEN `op_t'
669+ words (not LEN bytes!). DSTP should be aligned for memory
670+ operations on `op_t', but SRCP must *not* be aligned. */
671+
672+void
673+_wordcopy_bwd_dest_aligned (dstp, srcp, len)
674+ long int dstp;
675+ long int srcp;
676+ size_t len;
677+{
678+ op_t a0, a1, a2, a3;
679+ int sh_1, sh_2;
680+
681+ /* Calculate how to shift a word read at the memory operation
682+ aligned srcp to make it aligned for copy. */
683+
684+ sh_1 = 8 * (srcp % OPSIZ);
685+ sh_2 = 8 * OPSIZ - sh_1;
686+
687+ /* Make srcp aligned by rounding it down to the beginning of the op_t
688+ it points in the middle of. */
689+ srcp &= -OPSIZ;
690+ srcp += OPSIZ;
691+
692+ switch (len % 4)
693+ {
694+ case 2:
695+ srcp -= 3 * OPSIZ;
696+ dstp -= 1 * OPSIZ;
697+ a2 = ((op_t *) srcp)[2];
698+ a1 = ((op_t *) srcp)[1];
699+ len += 2;
700+ goto do1;
701+ case 3:
702+ srcp -= 4 * OPSIZ;
703+ dstp -= 2 * OPSIZ;
704+ a3 = ((op_t *) srcp)[3];
705+ a2 = ((op_t *) srcp)[2];
706+ len += 1;
707+ goto do2;
708+ case 0:
709+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
710+ return;
711+ srcp -= 5 * OPSIZ;
712+ dstp -= 3 * OPSIZ;
713+ a0 = ((op_t *) srcp)[4];
714+ a3 = ((op_t *) srcp)[3];
715+ goto do3;
716+ case 1:
717+ srcp -= 6 * OPSIZ;
718+ dstp -= 4 * OPSIZ;
719+ a1 = ((op_t *) srcp)[5];
720+ a0 = ((op_t *) srcp)[4];
721+ len -= 1;
722+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
723+ goto do0;
724+ goto do4; /* No-op. */
725+ }
726+
727+ do
728+ {
729+ do4:
730+ a3 = ((op_t *) srcp)[3];
731+ ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
732+ do3:
733+ a2 = ((op_t *) srcp)[2];
734+ ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
735+ do2:
736+ a1 = ((op_t *) srcp)[1];
737+ ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
738+ do1:
739+ a0 = ((op_t *) srcp)[0];
740+ ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
741+
742+ srcp -= 4 * OPSIZ;
743+ dstp -= 4 * OPSIZ;
744+ len -= 4;
745+ }
746+ while (len != 0);
747+
748+ /* This is the right position for do0. Please don't move
749+ it into the loop. */
750+ do0:
751+ ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
752+}
Note: See TracBrowser for help on using the repository browser.