@@ -6118,13 +6118,46 @@ S_langinfo_sv_i(pTHX_
6118
6118
6119
6119
const char * retval = nl_langinfo (item );
6120
6120
Size_t total_len = strlen (retval );
6121
+
6122
+ if (UNLIKELY (item == ALT_DIGITS ) && total_len > 0 ) {
6123
+
6124
+ /* The return of this item must end in 2 NULs if there are fewer
6125
+ * than 100 strings */
6126
+ const char * s = retval + total_len + 1 ;
6127
+
6128
+ for (unsigned int i = 1 ; i <= 99 ; i ++ ) {
6129
+ Size_t len = strlen (s ) + 1 ;
6130
+ total_len += len ;
6131
+
6132
+ if (len == 1 ) { /* Only a NUL */
6133
+ break ;
6134
+ }
6135
+
6136
+ s += len ;
6137
+ }
6138
+ }
6139
+
6121
6140
sv_setpvn (sv , retval , total_len );
6122
6141
6123
6142
gwLOCALE_UNLOCK ;
6124
6143
6144
+ /* Make sure the sequence ends in a double NUL to make it easier on
6145
+ * down stream handlers; this is not guaranteed by nl_langinfo() itself
6146
+ * */
6147
+ if (UNLIKELY (item == ALT_DIGITS ) && total_len > 0 ) {
6148
+ sv_catpvn_nomg (sv , "\0" , 1 );
6149
+ }
6150
+
6125
6151
SvUTF8_off (sv );
6126
6152
retval = SvPVX_const (sv );
6127
6153
6154
+ /* Note that get_locale_string_utf8ness_i() is passed a char*, so stops
6155
+ * looking at the first NUL, meaning it only looks at string [0] in the
6156
+ * ALT_DIGITS case: alternate zero. One might think that you'd need to
6157
+ * look at all the strings to determine utf8ness. But that is not true
6158
+ * for this case; string [0] is sufficient. This is because there are
6159
+ * no ASCII alternate digits, so [0] is enough to decide the utf8ness
6160
+ * */
6128
6161
if (utf8ness ) {
6129
6162
* utf8ness = get_locale_string_utf8ness_i (retval ,
6130
6163
LOCALE_UTF8NESS_UNKNOWN ,
@@ -6865,34 +6898,7 @@ S_emulate_langinfo(pTHX_ const int item,
6865
6898
6866
6899
restore_toggled_locale_c (LC_TIME , orig_TIME_locale );
6867
6900
6868
- /* If the item is 'ALT_DIGITS', '*retbuf' contains the alternate
6869
- * format for wday 0. If the value is the same as the normal 0,
6870
- * there isn't an alternate, so clear the buffer.
6871
- *
6872
- * (wday was chosen because its range is all a single digit.
6873
- * Things like tm_sec have two digits as the minimum: '00'.) */
6874
- if (item == ALT_DIGITS && strEQ (temp , "0" )) {
6875
- retval = "" ;
6876
- Safefree (temp );
6877
- break ;
6878
- }
6879
-
6880
- /* ALT_DIGITS is problematic. Experiments on it showed that
6881
- * strftime() did not always work properly when going from alt-9 to
6882
- * alt-10. Only a few locales have this item defined, and in all
6883
- * of them on Linux that khw was able to find, nl_langinfo() merely
6884
- * returned the alt-0 character, possibly doubled. Most Unicode
6885
- * digits are in blocks of 10 consecutive code points, so that is
6886
- * sufficient information for such scripts, as we can infer alt-1,
6887
- * alt-2, .... But for a Japanese locale, a CJK ideographic 0 is
6888
- * returned, and the CJK digits are not in code point order, so you
6889
- * can't really infer anything. The localedef for this locale did
6890
- * specify the succeeding digits, so that strftime() works properly
6891
- * on them, without needing to infer anything. But the
6892
- * nl_langinfo() return did not give sufficient information for the
6893
- * caller to understand what's going on. So until there is
6894
- * evidence that it should work differently, this returns the alt-0
6895
- * string for ALT_DIGITS. */
6901
+ if (LIKELY (item != ALT_DIGITS )) {
6896
6902
6897
6903
/* If to return what strftime() returns, are done */
6898
6904
if (! return_format ) {
@@ -6926,6 +6932,134 @@ S_emulate_langinfo(pTHX_ const int item,
6926
6932
6927
6933
Safefree (temp );
6928
6934
break ;
6935
+ }
6936
+
6937
+ /* Here, the item is 'ALT_DIGITS' and temp contains the zeroth
6938
+ * alternate digit. If empty or doesn't differ from regular digits,
6939
+ * return that there aren't alternate digits */
6940
+ if (temp [0 ] == '\0' || strchr (temp , '0' )) {
6941
+ Safefree (temp );
6942
+ retval = "" ;
6943
+ break ;
6944
+ }
6945
+
6946
+ /* ALT_DIGITS requires special handling because it is not a simple
6947
+ * string, but a sequence of up to 100 NUL-terminated strings. Below
6948
+ * we generate those by using the %O modifier to strftime() formats.
6949
+ *
6950
+ * We already have the alternate digit for zero in 'temp', generated
6951
+ * using the %Ow format. That was used because it seems least likely
6952
+ * to have a leading zero. But some locales return that anyway. If
6953
+ * the first half of temp is identical to the second half, assume that
6954
+ * is the case, and use just the second half */
6955
+ const char * alt0 = temp ; /* Clearer synonym */
6956
+ Size_t alt0_len = strlen (alt0 );
6957
+ if ((alt0_len & 1 ) == 0 ) {
6958
+ Size_t half_alt0_len = alt0_len / 2 ;
6959
+ if (strnEQ (temp , temp + half_alt0_len , half_alt0_len ))
6960
+ {
6961
+ alt0_len = half_alt0_len ;
6962
+ }
6963
+ }
6964
+
6965
+ /* Save the 0 digit string */
6966
+ sv_setpvn (sv , alt0 , alt0_len );
6967
+ sv_catpvn_nomg (sv , "\0" , 1 );
6968
+
6969
+ /* Various %O formats can be used to derive the alternate digits. Only
6970
+ * %Oy can go up to the full 100 values. If it doesn't work, we try
6971
+ * various fallbacks in decreasing order of how many values they can
6972
+ * deliver. maxes[] tells the highest value that the format applies
6973
+ * to; offsets[] compensates for 0-based vs 1-based indices; and vars[]
6974
+ * holds what field in the 'struct tm' to applies to the corresponding
6975
+ * format */
6976
+ int year , min , sec ;
6977
+ const char * fmts [] = {"%Oy" , "%OM" , "%OS" , "%Od" , "%OH" , "%Om" , "%Ow" };
6978
+ const Size_t maxes [] = { 99 , 59 , 59 , 31 , 23 , 11 , 6 };
6979
+ const int offsets [] = { 0 , 0 , 0 , 1 , 0 , 1 , 0 };
6980
+ int * vars [] = {& year , & min , & sec , & mday , & hour , & mon , & mday };
6981
+ Size_t j = 0 ; /* Current index into the above tables */
6982
+
6983
+ orig_TIME_locale = toggle_locale_c (LC_TIME , locale );
6984
+
6985
+ for (unsigned int i = 1 ; i <= 99 ; i ++ ) {
6986
+ struct tm mytm ;
6987
+
6988
+ redo :
6989
+ if (j >= C_ARRAY_LENGTH (fmts )) {
6990
+ break ; /* Exhausted formats early; can't continue */
6991
+ }
6992
+
6993
+ if (i > maxes [j ]) {
6994
+ j ++ ; /* Exhausted this format; try next one */
6995
+ goto redo ;
6996
+ }
6997
+
6998
+ year = (strchr (fmts [j ], 'y' )) ? 1900 : 2011 ;
6999
+ hour = 0 ;
7000
+ min = 0 ;
7001
+ sec = 0 ;
7002
+ mday = 1 ;
7003
+ mon = 0 ;
7004
+
7005
+ /* Change the variable corresponding to this format to the
7006
+ * current time being run in 'i' */
7007
+ * (vars [j ]) += i - offsets [j ];
7008
+
7009
+ /* Do the strftime. Once we have determined the UTF8ness (if
7010
+ * we want it), assume the rest will be the same, and use
7011
+ * strftime_tm(), which doesn't recalculate UTF8ness */
7012
+ ints_to_tm (& mytm , sec , min , hour , mday , mon , year , 0 , 0 , 0 );
7013
+ char * temp ;
7014
+ if (utf8ness && is_utf8 != UTF8NESS_NO && is_utf8 != UTF8NESS_YES ) {
7015
+ temp = strftime8 (fmts [j ],
7016
+ & mytm ,
7017
+ UTF8NESS_IMMATERIAL ,
7018
+ & is_utf8 ,
7019
+ false /* not calling from sv_strftime */
7020
+ );
7021
+ }
7022
+ else {
7023
+ temp = strftime_tm (fmts [j ], & mytm );
7024
+ }
7025
+
7026
+ DEBUG_Lv (PerlIO_printf (Perl_debug_log ,
7027
+ "i=%d, format=%s, alt='%s'\n" ,
7028
+ i , fmts [j ], temp ));
7029
+
7030
+ /* If no result (meaning this platform didn't recognize this
7031
+ * format), or it returned regular digits, give up on this
7032
+ * format, to try the next candidate one */
7033
+ if (temp == NULL || strpbrk (temp , "0123456789" )) {
7034
+ Safefree (temp );
7035
+ j ++ ;
7036
+ goto redo ;
7037
+ }
7038
+
7039
+ /* If there is a leading zero, skip past it, to get the second
7040
+ * one in the string */
7041
+ const char * current = temp ;
7042
+ if (strnEQ (temp , alt0 , alt0_len )) {
7043
+ current += alt0_len ;
7044
+ }
7045
+
7046
+ /* Append this number to the ongoing list, including a NUL
7047
+ * separator */
7048
+ sv_catpv_nomg (sv , current );
7049
+ sv_catpvn_nomg (sv , "\0" , 1 );
7050
+ Safefree (temp );
7051
+ } /* End of loop generating ALT_DIGIT strings */
7052
+
7053
+ Safefree (alt0 );
7054
+
7055
+ restore_toggled_locale_c (LC_TIME , orig_TIME_locale );
7056
+
7057
+ /* Make sure there is an empty string trailing everything, so
7058
+ * it all ends with two consecutive NULs */
7059
+ sv_catpvn_nomg (sv , "\0" , 1 );
7060
+ retval_type = RETVAL_IN_sv ;
7061
+ break ;
7062
+
6929
7063
# endif
6930
7064
6931
7065
} /* End of braced group for outer switch 'default:' case */
0 commit comments