gh-108767: Replace ctype.h functions with pyctype.h functions (#108772)

vstinner · web-flow · commit 578ebc5d5fab · 2023-09-01T18:36:53.000+02:00
Replace &lt;ctype.h&gt; locale dependent functions with Python "pyctype.h"
locale independent functions:

* Replace isalpha() with Py_ISALPHA().
* Replace isdigit() with Py_ISDIGIT().
* Replace isxdigit() with Py_ISXDIGIT().
* Replace tolower() with Py_TOLOWER().

Leave Modules/_sre/sre.c unchanged, it uses locale dependent
functions on purpose.

Include explicitly &lt;ctype.h&gt; in _decimal.c to get isascii().
diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c
@@ -35,6 +35,7 @@
 #include "complexobject.h"
 #include "mpdecimal.h"
 
+#include <ctype.h>                // isascii()
 #include <stdlib.h>
 
 #include "docstrings.h"
diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c
@@ -1701,7 +1701,7 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out)
 static int
 parse_uint(const char *const p, uint8_t *value)
 {
-    if (!isdigit(*p)) {
+    if (!Py_ISDIGIT(*p)) {
         return -1;
     }
 
@@ -1732,7 +1732,7 @@ parse_abbr(const char *const p, PyObject **abbr)
             //   '+' ) character, or the minus-sign ( '-' ) character. The std
             //   and dst fields in this case shall not include the quoting
             //   characters.
-            if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
+            if (!Py_ISALPHA(buff) && !Py_ISDIGIT(buff) && buff != '+' &&
                 buff != '-') {
                 return -1;
             }
@@ -1748,7 +1748,7 @@ parse_abbr(const char *const p, PyObject **abbr)
         //   In the unquoted form, all characters in these fields shall be
         //   alphabetic characters from the portable character set in the
         //   current locale.
-        while (isalpha(*ptr)) {
+        while (Py_ISALPHA(*ptr)) {
             ptr++;
         }
         str_end = ptr;
@@ -1802,7 +1802,7 @@ parse_tz_delta(const char *const p, long *total_seconds)
     // The hour can be 1 or 2 numeric characters
     for (size_t i = 0; i < 2; ++i) {
         buff = *ptr;
-        if (!isdigit(buff)) {
+        if (!Py_ISDIGIT(buff)) {
             if (i == 0) {
                 return -1;
             }
@@ -1830,7 +1830,7 @@ parse_tz_delta(const char *const p, long *total_seconds)
 
         for (size_t j = 0; j < 2; ++j) {
             buff = *ptr;
-            if (!isdigit(buff)) {
+            if (!Py_ISDIGIT(buff)) {
                 return -1;
             }
             *(outputs[i]) *= 10;
@@ -1932,7 +1932,7 @@ parse_transition_rule(const char *const p, TransitionRuleType **out)
         }
 
         for (size_t i = 0; i < 3; ++i) {
-            if (!isdigit(*ptr)) {
+            if (!Py_ISDIGIT(*ptr)) {
                 if (i == 0) {
                     return -1;
                 }
@@ -2007,7 +2007,7 @@ parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
 
         uint8_t buff = 0;
         for (size_t j = 0; j < 2; j++) {
-            if (!isdigit(*ptr)) {
+            if (!Py_ISDIGIT(*ptr)) {
                 if (i == 0 && j > 0) {
                     break;
                 }
diff --git a/Modules/getaddrinfo.c b/Modules/getaddrinfo.c
@@ -51,7 +51,6 @@
 #include <string.h>
 #include <stdlib.h>
 #include <stddef.h>
-#include <ctype.h>
 #include <unistd.h>
 
 #include "addrinfo.h"
@@ -228,8 +227,9 @@ str_isnumber(const char *p)
 {
     unsigned char *q = (unsigned char *)p;
     while (*q) {
-        if (! isdigit(*q))
+        if (!Py_ISDIGIT(*q)) {
             return NO;
+        }
         q++;
     }
     return YES;
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
@@ -722,11 +722,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
                 if (--fmtcnt >= 0)
                     c = *fmt++;
             }
-            else if (c >= 0 && isdigit(c)) {
+            else if (c >= 0 && Py_ISDIGIT(c)) {
                 width = c - '0';
                 while (--fmtcnt >= 0) {
                     c = Py_CHARMASK(*fmt++);
-                    if (!isdigit(c))
+                    if (!Py_ISDIGIT(c))
                         break;
                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
                         PyErr_SetString(
@@ -761,11 +761,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
                     if (--fmtcnt >= 0)
                         c = *fmt++;
                 }
-                else if (c >= 0 && isdigit(c)) {
+                else if (c >= 0 && Py_ISDIGIT(c)) {
                     prec = c - '0';
                     while (--fmtcnt >= 0) {
                         c = Py_CHARMASK(*fmt++);
-                        if (!isdigit(c))
+                        if (!Py_ISDIGIT(c))
                             break;
                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
                             PyErr_SetString(
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
@@ -4,7 +4,6 @@
 #include "Python.h"
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
 
-#include <ctype.h>
 #include <assert.h>
 
 #include "tokenizer.h"
@@ -158,7 +157,7 @@ get_normal_name(const char *s)  /* for utf-8 and latin-1 */
         else if (c == '_')
             buf[i] = '-';
         else
-            buf[i] = tolower(c);
+            buf[i] = Py_TOLOWER(c);
     }
     buf[i] = '\0';
     if (strcmp(buf, "utf-8") == 0 ||
@@ -1715,12 +1714,12 @@ tok_decimal_tail(struct tok_state *tok)
     while (1) {
         do {
             c = tok_nextc(tok);
-        } while (isdigit(c));
+        } while (Py_ISDIGIT(c));
         if (c != '_') {
             break;
         }
         c = tok_nextc(tok);
-        if (!isdigit(c)) {
+        if (!Py_ISDIGIT(c)) {
             tok_backup(tok, c);
             syntaxerror(tok, "invalid decimal literal");
             return 0;
@@ -2108,7 +2107,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     /* Period or number starting with period? */
     if (c == '.') {
         c = tok_nextc(tok);
-        if (isdigit(c)) {
+        if (Py_ISDIGIT(c)) {
             goto fraction;
         } else if (c == '.') {
             c = tok_nextc(tok);
@@ -2131,7 +2130,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     }
 
     /* Number */
-    if (isdigit(c)) {
+    if (Py_ISDIGIT(c)) {
         if (c == '0') {
             /* Hex, octal or binary -- maybe. */
             c = tok_nextc(tok);
@@ -2142,13 +2141,13 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                     if (c == '_') {
                         c = tok_nextc(tok);
                     }
-                    if (!isxdigit(c)) {
+                    if (!Py_ISXDIGIT(c)) {
                         tok_backup(tok, c);
                         return MAKE_TOKEN(syntaxerror(tok, "invalid hexadecimal literal"));
                     }
                     do {
                         c = tok_nextc(tok);
-                    } while (isxdigit(c));
+                    } while (Py_ISXDIGIT(c));
                 } while (c == '_');
                 if (!verify_end_of_number(tok, c, "hexadecimal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
@@ -2162,7 +2161,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         c = tok_nextc(tok);
                     }
                     if (c < '0' || c >= '8') {
-                        if (isdigit(c)) {
+                        if (Py_ISDIGIT(c)) {
                             return MAKE_TOKEN(syntaxerror(tok,
                                     "invalid digit '%c' in octal literal", c));
                         }
@@ -2175,7 +2174,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         c = tok_nextc(tok);
                     } while ('0' <= c && c < '8');
                 } while (c == '_');
-                if (isdigit(c)) {
+                if (Py_ISDIGIT(c)) {
                     return MAKE_TOKEN(syntaxerror(tok,
                             "invalid digit '%c' in octal literal", c));
                 }
@@ -2191,7 +2190,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         c = tok_nextc(tok);
                     }
                     if (c != '0' && c != '1') {
-                        if (isdigit(c)) {
+                        if (Py_ISDIGIT(c)) {
                             return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c));
                         }
                         else {
@@ -2203,7 +2202,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         c = tok_nextc(tok);
                     } while (c == '0' || c == '1');
                 } while (c == '_');
-                if (isdigit(c)) {
+                if (Py_ISDIGIT(c)) {
                     return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c));
                 }
                 if (!verify_end_of_number(tok, c, "binary")) {
@@ -2217,7 +2216,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 while (1) {
                     if (c == '_') {
                         c = tok_nextc(tok);
-                        if (!isdigit(c)) {
+                        if (!Py_ISDIGIT(c)) {
                             tok_backup(tok, c);
                             return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal"));
                         }
@@ -2228,7 +2227,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                     c = tok_nextc(tok);
                 }
                 char* zeros_end = tok->cur;
-                if (isdigit(c)) {
+                if (Py_ISDIGIT(c)) {
                     nonzero = 1;
                     c = tok_decimal_tail(tok);
                     if (c == 0) {
@@ -2272,7 +2271,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                     c = tok_nextc(tok);
         fraction:
                     /* Fraction */
-                    if (isdigit(c)) {
+                    if (Py_ISDIGIT(c)) {
                         c = tok_decimal_tail(tok);
                         if (c == 0) {
                             return MAKE_TOKEN(ERRORTOKEN);
@@ -2287,11 +2286,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                     c = tok_nextc(tok);
                     if (c == '+' || c == '-') {
                         c = tok_nextc(tok);
-                        if (!isdigit(c)) {
+                        if (!Py_ISDIGIT(c)) {
                             tok_backup(tok, c);
                             return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal"));
                         }
-                    } else if (!isdigit(c)) {
+                    } else if (!Py_ISDIGIT(c)) {
                         tok_backup(tok, c);
                         if (!verify_end_of_number(tok, e, "decimal")) {
                             return MAKE_TOKEN(ERRORTOKEN);
@@ -2326,7 +2325,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     }
 
   f_string_quote:
-    if (((tolower(*tok->start) == 'f' || tolower(*tok->start) == 'r') && (c == '\'' || c == '"'))) {
+    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r') && (c == '\'' || c == '"'))) {
         int quote = c;
         int quote_size = 1;             /* 1 or 3 */
 
@@ -2377,7 +2376,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         switch (*tok->start) {
             case 'F':
             case 'f':
-                the_current_tok->f_string_raw = tolower(*(tok->start + 1)) == 'r';
+                the_current_tok->f_string_raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
                 break;
             case 'R':
             case 'r':
diff --git a/Python/pystrcmp.c b/Python/pystrcmp.c
@@ -11,20 +11,20 @@ PyOS_mystrnicmp(const char *s1, const char *s2, Py_ssize_t size)
         return 0;
     p1 = (const unsigned char *)s1;
     p2 = (const unsigned char *)s2;
-    for (; (--size > 0) && *p1 && *p2 && (tolower(*p1) == tolower(*p2));
+    for (; (--size > 0) && *p1 && *p2 && (Py_TOLOWER(*p1) == Py_TOLOWER(*p2));
          p1++, p2++) {
         ;
     }
-    return tolower(*p1) - tolower(*p2);
+    return Py_TOLOWER(*p1) - Py_TOLOWER(*p2);
 }
 
 int
 PyOS_mystricmp(const char *s1, const char *s2)
 {
     const unsigned char *p1 = (const unsigned char *)s1;
     const unsigned char *p2 = (const unsigned char *)s2;
-    for (; *p1 && *p2 && (tolower(*p1) == tolower(*p2)); p1++, p2++) {
+    for (; *p1 && *p2 && (Py_TOLOWER(*p1) == Py_TOLOWER(*p2)); p1++, p2++) {
         ;
     }
-    return (tolower(*p1) - tolower(*p2));
+    return (Py_TOLOWER(*p1) - Py_TOLOWER(*p2));
 }

Original file line number	Diff line number	Diff line change
`@@ -11,20 +11,20 @@ PyOS_mystrnicmp(const char s1, const char s2, Py_ssize_t size)`
`11`	`11`	`return 0;`
`12`	`12`	`p1 = (const unsigned char *)s1;`
`13`	`13`	`p2 = (const unsigned char *)s2;`
`14`		`- for (; (--size > 0) && p1 && p2 && (tolower(p1) == tolower(p2));`
	`14`	`+ for (; (--size > 0) && p1 && p2 && (Py_TOLOWER(p1) == Py_TOLOWER(p2));`
`15`	`15`	`p1++, p2++) {`
`16`	`16`	`;`
`17`	`17`	`}`
`18`		`- return tolower(p1) - tolower(p2);`
	`18`	`+ return Py_TOLOWER(p1) - Py_TOLOWER(p2);`
`19`	`19`	`}`
`20`	`20`
`21`	`21`	`int`
`22`	`22`	`PyOS_mystricmp(const char s1, const char s2)`
`23`	`23`	`{`
`24`	`24`	`const unsigned char p1 = (const unsigned char )s1;`
`25`	`25`	`const unsigned char p2 = (const unsigned char )s2;`
`26`		`- for (; p1 && p2 && (tolower(p1) == tolower(p2)); p1++, p2++) {`
	`26`	`+ for (; p1 && p2 && (Py_TOLOWER(p1) == Py_TOLOWER(p2)); p1++, p2++) {`
`27`	`27`	`;`
`28`	`28`	`}`
`29`		`- return (tolower(p1) - tolower(p2));`
	`29`	`+ return (Py_TOLOWER(p1) - Py_TOLOWER(p2));`
`30`	`30`	`}`