@@ -6118,13 +6118,66 @@ S_langinfo_sv_i(pTHX_
6118
6118
6119
6119
const char * retval = nl_langinfo (item );
6120
6120
Size_t total_len = strlen (retval );
6121
+ char separator ;
6122
+
6123
+ if (UNLIKELY (item == ALT_DIGITS ) && total_len > 0 ) {
6124
+
6125
+ char * sep_pos =
6126
+ (char * ) strpbrk (retval , "!\"#$%&'()*+,-./.@[\\]^_`{|}~" );
6127
+ if (sep_pos ) {
6128
+ separator = retval [sep_pos - retval ];
6129
+ }
6130
+ else {
6131
+ separator = '\0' ;
6132
+
6133
+ /* Must be using NUL to separate the digits. There are up to
6134
+ * 100 of them, ending in two NULs if fewer. Find the end */
6135
+ const char * s = retval + total_len + 1 ;
6136
+
6137
+ for (unsigned int i = 1 ; i <= 99 ; i ++ ) {
6138
+ Size_t len = strlen (s ) + 1 ;
6139
+ total_len += len ;
6140
+
6141
+ if (len == 1 ) { /* Only a NUL */
6142
+ break ;
6143
+ }
6144
+
6145
+ s += len ;
6146
+ }
6147
+ }
6148
+ }
6149
+
6121
6150
sv_setpvn (sv , retval , total_len );
6122
6151
6123
6152
gwLOCALE_UNLOCK ;
6124
6153
6154
+ /* Convert the ALT_DIGITS separator to a semi-colong if not already */
6155
+ if (UNLIKELY (item == ALT_DIGITS ) && total_len > 0 && separator != ';' ) {
6156
+ char * digit_string = SvPVX (sv );
6157
+ char * s = digit_string ;
6158
+ char * e = s + total_len ;
6159
+
6160
+ while (s < e ) {
6161
+ char * this_end = (char * ) memchr (s , separator , total_len );
6162
+ if (! this_end ) {
6163
+ break ;
6164
+ }
6165
+
6166
+ * this_end = ';' ;
6167
+ s = this_end ;
6168
+ }
6169
+ }
6170
+
6125
6171
SvUTF8_off (sv );
6126
6172
retval = SvPVX_const (sv );
6127
6173
6174
+ /* Note that get_locale_string_utf8ness_i() is passed a char*, so stops
6175
+ * looking at the first NUL, meaning it only looks at string [0] in the
6176
+ * ALT_DIGITS case: alternate zero. One might think that you'd need to
6177
+ * look at all the strings to determine utf8ness. But that is not true
6178
+ * for this case; string [0] is sufficient. This is because there are
6179
+ * no ASCII alternate digits, so [0] is enough to decide the utf8ness
6180
+ * */
6128
6181
if (utf8ness ) {
6129
6182
* utf8ness = get_locale_string_utf8ness_i (retval ,
6130
6183
LOCALE_UTF8NESS_UNKNOWN ,
@@ -6865,34 +6918,7 @@ S_emulate_langinfo(pTHX_ const int item,
6865
6918
6866
6919
restore_toggled_locale_c (LC_TIME , orig_TIME_locale );
6867
6920
6868
- /* If the item is 'ALT_DIGITS', '*retbuf' contains the alternate
6869
- * format for wday 0. If the value is the same as the normal 0,
6870
- * there isn't an alternate, so clear the buffer.
6871
- *
6872
- * (wday was chosen because its range is all a single digit.
6873
- * Things like tm_sec have two digits as the minimum: '00'.) */
6874
- if (item == ALT_DIGITS && strEQ (temp , "0" )) {
6875
- retval = "" ;
6876
- Safefree (temp );
6877
- break ;
6878
- }
6879
-
6880
- /* ALT_DIGITS is problematic. Experiments on it showed that
6881
- * strftime() did not always work properly when going from alt-9 to
6882
- * alt-10. Only a few locales have this item defined, and in all
6883
- * of them on Linux that khw was able to find, nl_langinfo() merely
6884
- * returned the alt-0 character, possibly doubled. Most Unicode
6885
- * digits are in blocks of 10 consecutive code points, so that is
6886
- * sufficient information for such scripts, as we can infer alt-1,
6887
- * alt-2, .... But for a Japanese locale, a CJK ideographic 0 is
6888
- * returned, and the CJK digits are not in code point order, so you
6889
- * can't really infer anything. The localedef for this locale did
6890
- * specify the succeeding digits, so that strftime() works properly
6891
- * on them, without needing to infer anything. But the
6892
- * nl_langinfo() return did not give sufficient information for the
6893
- * caller to understand what's going on. So until there is
6894
- * evidence that it should work differently, this returns the alt-0
6895
- * string for ALT_DIGITS. */
6921
+ if (LIKELY (item != ALT_DIGITS )) {
6896
6922
6897
6923
/* If to return what strftime() returns, are done */
6898
6924
if (! return_format ) {
@@ -6926,6 +6952,130 @@ S_emulate_langinfo(pTHX_ const int item,
6926
6952
6927
6953
Safefree (temp );
6928
6954
break ;
6955
+ }
6956
+
6957
+ /* Here, the item is 'ALT_DIGITS' and temp contains the zeroth
6958
+ * alternate digit. If empty or doesn't differ from regular digits,
6959
+ * return that there aren't alternate digits */
6960
+ if (temp [0 ] == '\0' || strchr (temp , '0' )) {
6961
+ Safefree (temp );
6962
+ retval = "" ;
6963
+ break ;
6964
+ }
6965
+
6966
+ /* ALT_DIGITS requires special handling because it requires up to 100
6967
+ * values. Below we generate those by using the %O modifier to
6968
+ * strftime() formats.
6969
+ *
6970
+ * We already have the alternate digit for zero in 'temp', generated
6971
+ * using the %Ow format. That was used because it seems least likely
6972
+ * to have a leading zero. But some locales return that anyway. If
6973
+ * the first half of temp is identical to the second half, assume that
6974
+ * is the case, and use just the first half */
6975
+ const char * alt0 = temp ; /* Clearer synonym */
6976
+ Size_t alt0_len = strlen (alt0 );
6977
+ if ((alt0_len & 1 ) == 0 ) {
6978
+ Size_t half_alt0_len = alt0_len / 2 ;
6979
+ if (strnEQ (temp , temp + half_alt0_len , half_alt0_len )) {
6980
+ alt0_len = half_alt0_len ;
6981
+ }
6982
+ }
6983
+
6984
+ /* Save the 0 digit string */
6985
+ sv_setpvn (sv , alt0 , alt0_len );
6986
+ sv_catpvn_nomg (sv , ";" , 1 );
6987
+
6988
+ /* Various %O formats can be used to derive the alternate digits. Only
6989
+ * %Oy can go up to the full 100 values. If it doesn't work, we try
6990
+ * various fallbacks in decreasing order of how many values they can
6991
+ * deliver. maxes[] tells the highest value that the format applies
6992
+ * to; offsets[] compensates for 0-based vs 1-based indices; and vars[]
6993
+ * holds what field in the 'struct tm' to applies to the corresponding
6994
+ * format */
6995
+ int year , min , sec ;
6996
+ const char * fmts [] = {"%Oy" , "%OM" , "%OS" , "%Od" , "%OH" , "%Om" , "%Ow" };
6997
+ const Size_t maxes [] = { 99 , 59 , 59 , 31 , 23 , 11 , 6 };
6998
+ const int offsets [] = { 0 , 0 , 0 , 1 , 0 , 1 , 0 };
6999
+ int * vars [] = {& year , & min , & sec , & mday , & hour , & mon , & mday };
7000
+ Size_t j = 0 ; /* Current index into the above tables */
7001
+
7002
+ orig_TIME_locale = toggle_locale_c (LC_TIME , locale );
7003
+
7004
+ for (unsigned int i = 1 ; i <= 99 ; i ++ ) {
7005
+ struct tm mytm ;
7006
+
7007
+ redo :
7008
+ if (j >= C_ARRAY_LENGTH (fmts )) {
7009
+ break ; /* Exhausted formats early; can't continue */
7010
+ }
7011
+
7012
+ if (i > maxes [j ]) {
7013
+ j ++ ; /* Exhausted this format; try next one */
7014
+ goto redo ;
7015
+ }
7016
+
7017
+ year = (strchr (fmts [j ], 'y' )) ? 1900 : 2011 ;
7018
+ hour = 0 ;
7019
+ min = 0 ;
7020
+ sec = 0 ;
7021
+ mday = 1 ;
7022
+ mon = 0 ;
7023
+
7024
+ /* Change the variable corresponding to this format to the
7025
+ * current time being run in 'i' */
7026
+ * (vars [j ]) += i - offsets [j ];
7027
+
7028
+ /* Do the strftime. Once we have determined the UTF8ness (if
7029
+ * we want it), assume the rest will be the same, and use
7030
+ * strftime_tm(), which doesn't recalculate UTF8ness */
7031
+ ints_to_tm (& mytm , sec , min , hour , mday , mon , year , 0 , 0 , 0 );
7032
+ char * temp ;
7033
+ if (utf8ness && is_utf8 != UTF8NESS_NO && is_utf8 != UTF8NESS_YES ) {
7034
+ temp = strftime8 (fmts [j ],
7035
+ & mytm ,
7036
+ UTF8NESS_IMMATERIAL ,
7037
+ & is_utf8 ,
7038
+ false /* not calling from sv_strftime */
7039
+ );
7040
+ }
7041
+ else {
7042
+ temp = strftime_tm (fmts [j ], & mytm );
7043
+ }
7044
+
7045
+ DEBUG_Lv (PerlIO_printf (Perl_debug_log ,
7046
+ "i=%d, format=%s, alt='%s'\n" ,
7047
+ i , fmts [j ], temp ));
7048
+
7049
+ /* If no result (meaning this platform didn't recognize this
7050
+ * format), or it returned regular digits, give up on this
7051
+ * format, to try the next candidate one */
7052
+ if (temp == NULL || strpbrk (temp , "0123456789" )) {
7053
+ Safefree (temp );
7054
+ j ++ ;
7055
+ goto redo ;
7056
+ }
7057
+
7058
+ /* If there is a leading zero, skip past it, to get the second
7059
+ * one in the string */
7060
+ const char * current = temp ;
7061
+ if (strnEQ (temp , alt0 , alt0_len )) {
7062
+ current += alt0_len ;
7063
+ }
7064
+
7065
+ /* Append this number to the ongoing list, including the separator.
7066
+ * */
7067
+ sv_catpv_nomg (sv , current );
7068
+ sv_catpvn_nomg (sv , ";" , 1 );
7069
+ Safefree (temp );
7070
+ } /* End of loop generating ALT_DIGIT strings */
7071
+
7072
+ Safefree (alt0 );
7073
+
7074
+ restore_toggled_locale_c (LC_TIME , orig_TIME_locale );
7075
+
7076
+ retval_type = RETVAL_IN_sv ;
7077
+ break ;
7078
+
6929
7079
# endif
6930
7080
6931
7081
} /* End of braced group for outer switch 'default:' case */
0 commit comments