@@ -49,14 +49,24 @@ gettyperecord(Py_UNICODE code)
49
49
return & _PyUnicode_TypeRecords [index ];
50
50
}
51
51
52
- /* Returns 1 for Unicode characters having the category 'Zl' or type
53
- 'B', 0 otherwise. */
52
+ /* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or
53
+ type 'B', 0 otherwise. */
54
54
55
- int _PyUnicode_IsLinebreak (Py_UNICODE ch )
55
+ int _PyUnicode_IsLinebreak (register const Py_UNICODE ch )
56
56
{
57
- const _PyUnicode_TypeRecord * ctype = gettyperecord (ch );
58
-
59
- return (ctype -> flags & LINEBREAK_MASK ) != 0 ;
57
+ switch (ch ) {
58
+ case 0x000A : /* LINE FEED */
59
+ case 0x000D : /* CARRIAGE RETURN */
60
+ case 0x001C : /* FILE SEPARATOR */
61
+ case 0x001D : /* GROUP SEPARATOR */
62
+ case 0x001E : /* RECORD SEPARATOR */
63
+ case 0x0085 : /* NEXT LINE */
64
+ case 0x2028 : /* LINE SEPARATOR */
65
+ case 0x2029 : /* PARAGRAPH SEPARATOR */
66
+ return 1 ;
67
+ default :
68
+ return 0 ;
69
+ }
60
70
}
61
71
62
72
/* Returns the titlecase Unicode characters corresponding to ch or just
@@ -327,11 +337,43 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
327
337
/* Returns 1 for Unicode characters having the bidirectional type
328
338
'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
329
339
330
- int _PyUnicode_IsWhitespace (Py_UNICODE ch )
340
+ int _PyUnicode_IsWhitespace (register const Py_UNICODE ch )
331
341
{
332
- const _PyUnicode_TypeRecord * ctype = gettyperecord (ch );
333
-
334
- return (ctype -> flags & SPACE_MASK ) != 0 ;
342
+ switch (ch ) {
343
+ case 0x0009 : /* HORIZONTAL TABULATION */
344
+ case 0x000A : /* LINE FEED */
345
+ case 0x000B : /* VERTICAL TABULATION */
346
+ case 0x000C : /* FORM FEED */
347
+ case 0x000D : /* CARRIAGE RETURN */
348
+ case 0x001C : /* FILE SEPARATOR */
349
+ case 0x001D : /* GROUP SEPARATOR */
350
+ case 0x001E : /* RECORD SEPARATOR */
351
+ case 0x001F : /* UNIT SEPARATOR */
352
+ case 0x0020 : /* SPACE */
353
+ case 0x0085 : /* NEXT LINE */
354
+ case 0x00A0 : /* NO-BREAK SPACE */
355
+ case 0x1680 : /* OGHAM SPACE MARK */
356
+ case 0x2000 : /* EN QUAD */
357
+ case 0x2001 : /* EM QUAD */
358
+ case 0x2002 : /* EN SPACE */
359
+ case 0x2003 : /* EM SPACE */
360
+ case 0x2004 : /* THREE-PER-EM SPACE */
361
+ case 0x2005 : /* FOUR-PER-EM SPACE */
362
+ case 0x2006 : /* SIX-PER-EM SPACE */
363
+ case 0x2007 : /* FIGURE SPACE */
364
+ case 0x2008 : /* PUNCTUATION SPACE */
365
+ case 0x2009 : /* THIN SPACE */
366
+ case 0x200A : /* HAIR SPACE */
367
+ case 0x200B : /* ZERO WIDTH SPACE */
368
+ case 0x2028 : /* LINE SEPARATOR */
369
+ case 0x2029 : /* PARAGRAPH SEPARATOR */
370
+ case 0x202F : /* NARROW NO-BREAK SPACE */
371
+ case 0x205F : /* MEDIUM MATHEMATICAL SPACE */
372
+ case 0x3000 : /* IDEOGRAPHIC SPACE */
373
+ return 1 ;
374
+ default :
375
+ return 0 ;
376
+ }
335
377
}
336
378
337
379
/* Returns 1 for Unicode characters having the category 'Ll', 0
0 commit comments