Skip to content

Commit 5fb515e

Browse files
Phil Elwellpopcornmix
Phil Elwell
authored andcommitted
Improve __copy_to_user and __copy_from_user performance
Provide a __copy_from_user that uses memcpy. On BCM2708, use optimised memcpy/memmove/memcmp/memset implementations. arch/arm: Add mmiocpy/set aliases for memcpy/set See: #1082 copy_from_user: CPU_SW_DOMAIN_PAN compatibility The downstream copy_from_user acceleration must also play nice with CONFIG_CPU_SW_DOMAIN_PAN. See: #1381 Signed-off-by: Phil Elwell <[email protected]>
1 parent 5beee46 commit 5fb515e

12 files changed

+1373
-6
lines changed

arch/arm/include/asm/string.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ extern void * memchr(const void *, int, __kernel_size_t);
2424
#define __HAVE_ARCH_MEMSET
2525
extern void * memset(void *, int, __kernel_size_t);
2626

27+
#ifdef CONFIG_MACH_BCM2708
28+
#define __HAVE_ARCH_MEMCMP
29+
extern int memcmp(const void *, const void *, size_t);
30+
#endif
31+
2732
extern void __memzero(void *ptr, __kernel_size_t n);
2833

2934
#define memset(p,v,n) \

arch/arm/include/asm/uaccess.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,9 @@ do { \
477477
extern unsigned long __must_check
478478
arm_copy_from_user(void *to, const void __user *from, unsigned long n);
479479

480+
extern unsigned long __must_check
481+
__copy_from_user_std(void *to, const void __user *from, unsigned long n);
482+
480483
static inline unsigned long __must_check
481484
__copy_from_user(void *to, const void __user *from, unsigned long n)
482485
{

arch/arm/lib/Makefile

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66

77
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
88
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
9-
delay.o delay-loop.o findbit.o memchr.o memcpy.o \
10-
memmove.o memset.o memzero.o setbit.o \
11-
strchr.o strrchr.o \
9+
delay.o delay-loop.o findbit.o memchr.o memzero.o \
10+
setbit.o strchr.o strrchr.o \
1211
testchangebit.o testclearbit.o testsetbit.o \
1312
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
1413
ucmpdi2.o lib1funcs.o div64.o \
@@ -18,6 +17,16 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
1817
mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
1918
copy_from_user.o copy_to_user.o
2019

20+
# Choose optimised implementations for Raspberry Pi
21+
ifeq ($(CONFIG_MACH_BCM2708),y)
22+
CFLAGS_uaccess_with_memcpy.o += -DCOPY_FROM_USER_THRESHOLD=1600
23+
CFLAGS_uaccess_with_memcpy.o += -DCOPY_TO_USER_THRESHOLD=672
24+
obj-$(CONFIG_MODULES) += exports_rpi.o
25+
lib-y += memcpy_rpi.o memmove_rpi.o memset_rpi.o memcmp_rpi.o
26+
else
27+
lib-y += memcpy.o memmove.o memset.o
28+
endif
29+
2130
# using lib_ here won't override already available weak symbols
2231
obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
2332

arch/arm/lib/arm-mem.h

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/*
2+
Copyright (c) 2013, Raspberry Pi Foundation
3+
Copyright (c) 2013, RISC OS Open Ltd
4+
All rights reserved.
5+
6+
Redistribution and use in source and binary forms, with or without
7+
modification, are permitted provided that the following conditions are met:
8+
* Redistributions of source code must retain the above copyright
9+
notice, this list of conditions and the following disclaimer.
10+
* Redistributions in binary form must reproduce the above copyright
11+
notice, this list of conditions and the following disclaimer in the
12+
documentation and/or other materials provided with the distribution.
13+
* Neither the name of the copyright holder nor the
14+
names of its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
21+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
*/
28+
29+
.macro myfunc fname
30+
.func fname
31+
.global fname
32+
fname:
33+
.endm
34+
35+
.macro preload_leading_step1 backwards, ptr, base
36+
/* If the destination is already 16-byte aligned, then we need to preload
37+
* between 0 and prefetch_distance (inclusive) cache lines ahead so there
38+
* are no gaps when the inner loop starts.
39+
*/
40+
.if backwards
41+
sub ptr, base, #1
42+
bic ptr, ptr, #31
43+
.else
44+
bic ptr, base, #31
45+
.endif
46+
.set OFFSET, 0
47+
.rept prefetch_distance+1
48+
pld [ptr, #OFFSET]
49+
.if backwards
50+
.set OFFSET, OFFSET-32
51+
.else
52+
.set OFFSET, OFFSET+32
53+
.endif
54+
.endr
55+
.endm
56+
57+
.macro preload_leading_step2 backwards, ptr, base, leading_bytes, tmp
58+
/* However, if the destination is not 16-byte aligned, we may need to
59+
* preload one more cache line than that. The question we need to ask is:
60+
* are the leading bytes more than the amount by which the source
61+
* pointer will be rounded down for preloading, and if so, by how many
62+
* cache lines?
63+
*/
64+
.if backwards
65+
/* Here we compare against how many bytes we are into the
66+
* cache line, counting down from the highest such address.
67+
* Effectively, we want to calculate
68+
* leading_bytes = dst&15
69+
* cacheline_offset = 31-((src-leading_bytes-1)&31)
70+
* extra_needed = leading_bytes - cacheline_offset
71+
* and test if extra_needed is <= 0, or rearranging:
72+
* leading_bytes + (src-leading_bytes-1)&31 <= 31
73+
*/
74+
mov tmp, base, lsl #32-5
75+
sbc tmp, tmp, leading_bytes, lsl #32-5
76+
adds tmp, tmp, leading_bytes, lsl #32-5
77+
bcc 61f
78+
pld [ptr, #-32*(prefetch_distance+1)]
79+
.else
80+
/* Effectively, we want to calculate
81+
* leading_bytes = (-dst)&15
82+
* cacheline_offset = (src+leading_bytes)&31
83+
* extra_needed = leading_bytes - cacheline_offset
84+
* and test if extra_needed is <= 0.
85+
*/
86+
mov tmp, base, lsl #32-5
87+
add tmp, tmp, leading_bytes, lsl #32-5
88+
rsbs tmp, tmp, leading_bytes, lsl #32-5
89+
bls 61f
90+
pld [ptr, #32*(prefetch_distance+1)]
91+
.endif
92+
61:
93+
.endm
94+
95+
.macro preload_trailing backwards, base, remain, tmp
96+
/* We need either 0, 1 or 2 extra preloads */
97+
.if backwards
98+
rsb tmp, base, #0
99+
mov tmp, tmp, lsl #32-5
100+
.else
101+
mov tmp, base, lsl #32-5
102+
.endif
103+
adds tmp, tmp, remain, lsl #32-5
104+
adceqs tmp, tmp, #0
105+
/* The instruction above has two effects: ensures Z is only
106+
* set if C was clear (so Z indicates that both shifted quantities
107+
* were 0), and clears C if Z was set (so C indicates that the sum
108+
* of the shifted quantities was greater and not equal to 32) */
109+
beq 82f
110+
.if backwards
111+
sub tmp, base, #1
112+
bic tmp, tmp, #31
113+
.else
114+
bic tmp, base, #31
115+
.endif
116+
bcc 81f
117+
.if backwards
118+
pld [tmp, #-32*(prefetch_distance+1)]
119+
81:
120+
pld [tmp, #-32*prefetch_distance]
121+
.else
122+
pld [tmp, #32*(prefetch_distance+2)]
123+
81:
124+
pld [tmp, #32*(prefetch_distance+1)]
125+
.endif
126+
82:
127+
.endm
128+
129+
.macro preload_all backwards, narrow_case, shift, base, remain, tmp0, tmp1
130+
.if backwards
131+
sub tmp0, base, #1
132+
bic tmp0, tmp0, #31
133+
pld [tmp0]
134+
sub tmp1, base, remain, lsl #shift
135+
.else
136+
bic tmp0, base, #31
137+
pld [tmp0]
138+
add tmp1, base, remain, lsl #shift
139+
sub tmp1, tmp1, #1
140+
.endif
141+
bic tmp1, tmp1, #31
142+
cmp tmp1, tmp0
143+
beq 92f
144+
.if narrow_case
145+
/* In this case, all the data fits in either 1 or 2 cache lines */
146+
pld [tmp1]
147+
.else
148+
91:
149+
.if backwards
150+
sub tmp0, tmp0, #32
151+
.else
152+
add tmp0, tmp0, #32
153+
.endif
154+
cmp tmp0, tmp1
155+
pld [tmp0]
156+
bne 91b
157+
.endif
158+
92:
159+
.endm

arch/arm/lib/copy_from_user.S

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,13 @@
8989

9090
.text
9191

92-
ENTRY(arm_copy_from_user)
92+
ENTRY(__copy_from_user_std)
93+
WEAK(arm_copy_from_user)
9394

9495
#include "copy_template.S"
9596

9697
ENDPROC(arm_copy_from_user)
98+
ENDPROC(__copy_from_user_std)
9799

98100
.pushsection .fixup,"ax"
99101
.align 0

arch/arm/lib/exports_rpi.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* Copyright (c) 2014, Raspberry Pi (Trading) Ltd.
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions
6+
* are met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions, and the following disclaimer,
9+
* without modification.
10+
* 2. Redistributions in binary form must reproduce the above copyright
11+
* notice, this list of conditions and the following disclaimer in the
12+
* documentation and/or other materials provided with the distribution.
13+
* 3. The names of the above-listed copyright holders may not be used
14+
* to endorse or promote products derived from this software without
15+
* specific prior written permission.
16+
*
17+
* ALTERNATIVELY, this software may be distributed under the terms of the
18+
* GNU General Public License ("GPL") version 2, as published by the Free
19+
* Software Foundation.
20+
*
21+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22+
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23+
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24+
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25+
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26+
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27+
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28+
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29+
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30+
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
*/
33+
34+
#include <linux/kernel.h>
35+
#include <linux/module.h>
36+
37+
EXPORT_SYMBOL(memcmp);

0 commit comments

Comments
 (0)