Skip to content

Commit 9d3b846

Browse files
rhpvordermanestyxx
authored andcommitted
pythongh-120196: Reuse find_max_char() for bytes objects (python#120497)
1 parent dc246d9 commit 9d3b846

File tree

2 files changed

+25
-55
lines changed

2 files changed

+25
-55
lines changed

Objects/bytes_methods.c

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -92,57 +92,6 @@ _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
9292
}
9393

9494

95-
PyDoc_STRVAR_shared(_Py_isascii__doc__,
96-
"B.isascii() -> bool\n\
97-
\n\
98-
Return True if B is empty or all characters in B are ASCII,\n\
99-
False otherwise.");
100-
101-
// Optimization is copied from ascii_decode in unicodeobject.c
102-
/* Mask to quickly check whether a C 'size_t' contains a
103-
non-ASCII, UTF8-encoded char. */
104-
#if (SIZEOF_SIZE_T == 8)
105-
# define ASCII_CHAR_MASK 0x8080808080808080ULL
106-
#elif (SIZEOF_SIZE_T == 4)
107-
# define ASCII_CHAR_MASK 0x80808080U
108-
#else
109-
# error C 'size_t' size should be either 4 or 8!
110-
#endif
111-
112-
PyObject*
113-
_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
114-
{
115-
const char *p = cptr;
116-
const char *end = p + len;
117-
118-
while (p < end) {
119-
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
120-
for an explanation. */
121-
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
122-
/* Help allocation */
123-
const char *_p = p;
124-
while (_p + SIZEOF_SIZE_T <= end) {
125-
size_t value = *(const size_t *) _p;
126-
if (value & ASCII_CHAR_MASK) {
127-
Py_RETURN_FALSE;
128-
}
129-
_p += SIZEOF_SIZE_T;
130-
}
131-
p = _p;
132-
if (_p == end)
133-
break;
134-
}
135-
if ((unsigned char)*p & 0x80) {
136-
Py_RETURN_FALSE;
137-
}
138-
p++;
139-
}
140-
Py_RETURN_TRUE;
141-
}
142-
143-
#undef ASCII_CHAR_MASK
144-
145-
14695
PyDoc_STRVAR_shared(_Py_isdigit__doc__,
14796
"B.isdigit() -> bool\n\
14897
\n\
@@ -438,6 +387,7 @@ _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
438387
#include "stringlib/fastsearch.h"
439388
#include "stringlib/count.h"
440389
#include "stringlib/find.h"
390+
#include "stringlib/find_max_char.h"
441391

442392
/*
443393
Wraps stringlib_parse_args_finds() and additionally checks the first
@@ -765,3 +715,21 @@ _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *subobj,
765715
{
766716
return _Py_bytes_tailmatch(str, len, "endswith", subobj, start, end, +1);
767717
}
718+
719+
PyDoc_STRVAR_shared(_Py_isascii__doc__,
720+
"B.isascii() -> bool\n\
721+
\n\
722+
Return True if B is empty or all characters in B are ASCII,\n\
723+
False otherwise.");
724+
725+
PyObject*
726+
_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
727+
{
728+
const char *p = cptr;
729+
const char *end = p + len;
730+
Py_ssize_t max_char = stringlib_find_max_char(cptr, end);
731+
if (max_char > 127) {
732+
Py_RETURN_FALSE;
733+
}
734+
Py_RETURN_TRUE;
735+
}

Objects/stringlib/find_max_char.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/* Finding the optimal width of unicode characters in a buffer */
22

3-
#if !STRINGLIB_IS_UNICODE
3+
/* find_max_char for one-byte will work for bytes objects as well. */
4+
#if !STRINGLIB_IS_UNICODE && STRINGLIB_SIZEOF_CHAR > 1
45
# error "find_max_char.h is specific to Unicode"
56
#endif
67

@@ -20,19 +21,20 @@ Py_LOCAL_INLINE(Py_UCS4)
2021
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
2122
{
2223
const unsigned char *p = (const unsigned char *) begin;
24+
const unsigned char *_end = (const unsigned char *)end;
2325

24-
while (p < end) {
26+
while (p < _end) {
2527
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
2628
/* Help register allocation */
2729
const unsigned char *_p = p;
28-
while (_p + SIZEOF_SIZE_T <= end) {
30+
while (_p + SIZEOF_SIZE_T <= _end) {
2931
size_t value = *(const size_t *) _p;
3032
if (value & UCS1_ASCII_CHAR_MASK)
3133
return 255;
3234
_p += SIZEOF_SIZE_T;
3335
}
3436
p = _p;
35-
if (p == end)
37+
if (p == _end)
3638
break;
3739
}
3840
if (*p++ & 0x80)

0 commit comments

Comments
 (0)