Skip to content

Commit 175154d

Browse files
committed
Optimize conversion of CP932 text to Unicode
Conversion of CP932 text to UTF-8 using `mb_convert_encoding` is now about 20% faster than before.
1 parent 73633bf commit 175154d

File tree

1 file changed

+41
-41
lines changed

1 file changed

+41
-41
lines changed

ext/mbstring/libmbfl/filters/mbfilter_cjk.c

+41-41
Original file line numberDiff line numberDiff line change
@@ -7486,54 +7486,54 @@ static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *bu
74867486
} else if (c > 0xA0 && c < 0xE0) {
74877487
/* Kana */
74887488
*out++ = 0xFEC0 + c;
7489-
} else if (c > 0x80 && c < 0xFD && c != 0xA0 && p < e) {
7489+
} else {
7490+
if (p == e) {
7491+
*out++ = MBFL_BAD_INPUT;
7492+
break;
7493+
}
74907494
unsigned char c2 = *p++;
7495+
unsigned int w = 0;
7496+
unsigned int s = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
74917497

7492-
if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
7493-
unsigned int s1, s2, w = 0;
7494-
SJIS_DECODE(c, c2, s1, s2);
7495-
unsigned int s = (s1 - 0x21)*94 + s2 - 0x21;
7496-
7497-
if (s <= 137) {
7498-
if (s == 31) {
7499-
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
7500-
} else if (s == 32) {
7501-
w = 0xFF5E; /* FULLWIDTH TILDE */
7502-
} else if (s == 33) {
7503-
w = 0x2225; /* PARALLEL TO */
7504-
} else if (s == 60) {
7505-
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
7506-
} else if (s == 80) {
7507-
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
7508-
} else if (s == 81) {
7509-
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
7510-
} else if (s == 137) {
7511-
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
7512-
}
7498+
if (s <= 137) {
7499+
if (s == 31) {
7500+
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
7501+
} else if (s == 32) {
7502+
w = 0xFF5E; /* FULLWIDTH TILDE */
7503+
} else if (s == 33) {
7504+
w = 0x2225; /* PARALLEL TO */
7505+
} else if (s == 60) {
7506+
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
7507+
} else if (s == 80) {
7508+
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
7509+
} else if (s == 81) {
7510+
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
7511+
} else if (s == 137) {
7512+
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
75137513
}
7514+
}
75147515

7515-
if (w == 0) {
7516-
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
7517-
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
7518-
} else if (s < jisx0208_ucs_table_size) {
7519-
w = jisx0208_ucs_table[s];
7520-
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
7521-
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
7522-
} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
7523-
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
7524-
} else if (s >= (94*94) && s < (114*94)) {
7525-
w = s - (94*94) + 0xE000;
7526-
}
7516+
if (w == 0) {
7517+
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
7518+
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
7519+
} else if (s < jisx0208_ucs_table_size) {
7520+
w = jisx0208_ucs_table[s];
7521+
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
7522+
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
7523+
} else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
7524+
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
7525+
} else if (s >= (94*94) && s < (114*94)) {
7526+
w = s - (94*94) + 0xE000;
75277527
}
7528+
}
75287529

7529-
if (!w)
7530-
w = MBFL_BAD_INPUT;
7531-
*out++ = w;
7532-
} else {
7533-
*out++ = MBFL_BAD_INPUT;
7530+
if (!w) {
7531+
if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
7532+
p--;
7533+
}
7534+
w = MBFL_BAD_INPUT;
75347535
}
7535-
} else {
7536-
*out++ = MBFL_BAD_INPUT;
7536+
*out++ = w;
75377537
}
75387538
}
75397539

0 commit comments

Comments
 (0)