wok-6.x diff coreutils/stuff/coreutils-8.25-i18n-2.patch @ rev 19392
Up vmtouch (1.1.0)
author | Paul Issott <paul@slitaz.org> |
---|---|
date | Sat Aug 27 20:53:04 2016 +0100 (2016-08-27) |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/coreutils/stuff/coreutils-8.25-i18n-2.patch Sat Aug 27 20:53:04 2016 +0100 1.3 @@ -0,0 +1,4790 @@ 1.4 +Submitted by: DJ Lucas (dj_AT_linuxfromscratch_DOT_org) 1.5 +Date: 2016-02-09 1.6 +Initial Package Version: 8.25 1.7 +Upstream Status: Rejected 1.8 +Origin: Based on Suse's i18n patches at https://build.opensuse.org/package/view_file/Base:System/coreutils/coreutils-i18n.patch 1.9 +Description: Fixes several i18n issues with various Coreutils programs 1.10 + 1.11 +diff -Naurp coreutils-8.25-orig/lib/linebuffer.h coreutils-8.25/lib/linebuffer.h 1.12 +--- coreutils-8.25-orig/lib/linebuffer.h 2016-01-01 07:45:55.000000000 -0600 1.13 ++++ coreutils-8.25/lib/linebuffer.h 2016-02-08 19:07:10.298944609 -0600 1.14 +@@ -21,6 +21,11 @@ 1.15 + 1.16 + # include <stdio.h> 1.17 + 1.18 ++/* Get mbstate_t. */ 1.19 ++# if HAVE_WCHAR_H 1.20 ++# include <wchar.h> 1.21 ++# endif 1.22 ++ 1.23 + /* A 'struct linebuffer' holds a line of text. */ 1.24 + 1.25 + struct linebuffer 1.26 +@@ -28,6 +33,9 @@ struct linebuffer 1.27 + size_t size; /* Allocated. */ 1.28 + size_t length; /* Used. */ 1.29 + char *buffer; 1.30 ++# if HAVE_WCHAR_H 1.31 ++ mbstate_t state; 1.32 ++# endif 1.33 + }; 1.34 + 1.35 + /* Initialize linebuffer LINEBUFFER for use. */ 1.36 +diff -Naurp coreutils-8.25-orig/src/cut.c coreutils-8.25/src/cut.c 1.37 +--- coreutils-8.25-orig/src/cut.c 2016-01-13 05:08:59.000000000 -0600 1.38 ++++ coreutils-8.25/src/cut.c 2016-02-08 19:07:10.300944616 -0600 1.39 +@@ -28,6 +28,11 @@ 1.40 + #include <assert.h> 1.41 + #include <getopt.h> 1.42 + #include <sys/types.h> 1.43 ++ 1.44 ++/* Get mbstate_t, mbrtowc(). */ 1.45 ++#if HAVE_WCHAR_H 1.46 ++# include <wchar.h> 1.47 ++#endif 1.48 + #include "system.h" 1.49 + 1.50 + #include "error.h" 1.51 +@@ -38,6 +43,18 @@ 1.52 + 1.53 + #include "set-fields.h" 1.54 + 1.55 ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 1.56 ++ installation; work around this configuration error. */ 1.57 ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 1.58 ++# undef MB_LEN_MAX 1.59 ++# define MB_LEN_MAX 16 1.60 ++#endif 1.61 ++ 1.62 ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 1.63 ++#if HAVE_MBRTOWC && defined mbstate_t 1.64 ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 1.65 ++#endif 1.66 ++ 1.67 + /* The official name of this program (e.g., no 'g' prefix). */ 1.68 + #define PROGRAM_NAME "cut" 1.69 + 1.70 +@@ -54,6 +71,52 @@ 1.71 + } \ 1.72 + while (0) 1.73 + 1.74 ++/* Refill the buffer BUF to get a multibyte character. */ 1.75 ++#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ 1.76 ++ do \ 1.77 ++ { \ 1.78 ++ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ 1.79 ++ { \ 1.80 ++ memmove (BUF, BUFPOS, BUFLEN); \ 1.81 ++ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ 1.82 ++ BUFPOS = BUF; \ 1.83 ++ } \ 1.84 ++ } \ 1.85 ++ while (0) 1.86 ++ 1.87 ++/* Get wide character on BUFPOS. BUFPOS is not included after that. 1.88 ++ If byte sequence is not valid as a character, CONVFAIL is true. Otherwise false. */ 1.89 ++#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ 1.90 ++ do \ 1.91 ++ { \ 1.92 ++ mbstate_t state_bak; \ 1.93 ++ \ 1.94 ++ if (BUFLEN < 1) \ 1.95 ++ { \ 1.96 ++ WC = WEOF; \ 1.97 ++ break; \ 1.98 ++ } \ 1.99 ++ \ 1.100 ++ /* Get a wide character. */ \ 1.101 ++ CONVFAIL = false; \ 1.102 ++ state_bak = STATE; \ 1.103 ++ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ 1.104 ++ \ 1.105 ++ switch (MBLENGTH) \ 1.106 ++ { \ 1.107 ++ case (size_t)-1: \ 1.108 ++ case (size_t)-2: \ 1.109 ++ CONVFAIL = true; \ 1.110 ++ STATE = state_bak; \ 1.111 ++ /* Fall througn. */ \ 1.112 ++ \ 1.113 ++ case 0: \ 1.114 ++ MBLENGTH = 1; \ 1.115 ++ break; \ 1.116 ++ } \ 1.117 ++ } \ 1.118 ++ while (0) 1.119 ++ 1.120 + 1.121 + /* Pointer inside RP. When checking if a byte or field is selected 1.122 + by a finite range, we check if it is between CURRENT_RP.LO 1.123 +@@ -61,6 +124,9 @@ 1.124 + CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ 1.125 + static struct field_range_pair *current_rp; 1.126 + 1.127 ++/* Length of the delimiter given as argument to -d. */ 1.128 ++size_t delimlen; 1.129 ++ 1.130 + /* This buffer is used to support the semantics of the -s option 1.131 + (or lack of same) when the specified field list includes (does 1.132 + not include) the first field. In both of those cases, the entire 1.133 +@@ -77,15 +143,25 @@ enum operating_mode 1.134 + { 1.135 + undefined_mode, 1.136 + 1.137 +- /* Output characters that are in the given bytes. */ 1.138 ++ /* Output bytes that are at the given positions. */ 1.139 + byte_mode, 1.140 + 1.141 ++ /* Output characters that are at the given positions. */ 1.142 ++ character_mode, 1.143 ++ 1.144 + /* Output the given delimiter-separated fields. */ 1.145 + field_mode 1.146 + }; 1.147 + 1.148 + static enum operating_mode operating_mode; 1.149 + 1.150 ++/* If nonzero, when in byte mode, don't split multibyte characters. */ 1.151 ++static int byte_mode_character_aware; 1.152 ++ 1.153 ++/* If nonzero, the function for single byte locale is work 1.154 ++ if this program runs on multibyte locale. */ 1.155 ++static int force_singlebyte_mode; 1.156 ++ 1.157 + /* If true do not output lines containing no delimiter characters. 1.158 + Otherwise, all such lines are printed. This option is valid only 1.159 + with field mode. */ 1.160 +@@ -97,6 +173,9 @@ static bool complement; 1.161 + 1.162 + /* The delimiter character for field mode. */ 1.163 + static unsigned char delim; 1.164 ++#if HAVE_WCHAR_H 1.165 ++static wchar_t wcdelim; 1.166 ++#endif 1.167 + 1.168 + /* The delimiter for each line/record. */ 1.169 + static unsigned char line_delim = '\n'; 1.170 +@@ -164,7 +243,7 @@ Print selected parts of lines from each 1.171 + -f, --fields=LIST select only these fields; also print any line\n\ 1.172 + that contains no delimiter character, unless\n\ 1.173 + the -s option is specified\n\ 1.174 +- -n (ignored)\n\ 1.175 ++ -n with -b: don't split multibyte characters\n\ 1.176 + "), stdout); 1.177 + fputs (_("\ 1.178 + --complement complement the set of selected bytes, characters\n\ 1.179 +@@ -280,6 +359,82 @@ cut_bytes (FILE *stream) 1.180 + } 1.181 + } 1.182 + 1.183 ++#if HAVE_MBRTOWC 1.184 ++/* This function is in use for the following case. 1.185 ++ 1.186 ++ 1. Read from the stream STREAM, printing to standard output any selected 1.187 ++ characters. 1.188 ++ 1.189 ++ 2. Read from stream STREAM, printing to standard output any selected bytes, 1.190 ++ without splitting multibyte characters. */ 1.191 ++ 1.192 ++static void 1.193 ++cut_characters_or_cut_bytes_no_split (FILE *stream) 1.194 ++{ 1.195 ++ size_t idx; /* number of bytes or characters in the line so far. */ 1.196 ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 1.197 ++ char *bufpos; /* Next read position of BUF. */ 1.198 ++ size_t buflen; /* The length of the byte sequence in buf. */ 1.199 ++ wint_t wc; /* A gotten wide character. */ 1.200 ++ size_t mblength; /* The byte size of a multibyte character which shows 1.201 ++ as same character as WC. */ 1.202 ++ mbstate_t state; /* State of the stream. */ 1.203 ++ bool convfail = false; /* true, when conversion failed. Otherwise false. */ 1.204 ++ /* Whether to begin printing delimiters between ranges for the current line. 1.205 ++ Set after we've begun printing data corresponding to the first range. */ 1.206 ++ bool print_delimiter = false; 1.207 ++ 1.208 ++ idx = 0; 1.209 ++ buflen = 0; 1.210 ++ bufpos = buf; 1.211 ++ memset (&state, '\0', sizeof(mbstate_t)); 1.212 ++ 1.213 ++ current_rp = frp; 1.214 ++ 1.215 ++ while (1) 1.216 ++ { 1.217 ++ REFILL_BUFFER (buf, bufpos, buflen, stream); 1.218 ++ 1.219 ++ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); 1.220 ++ (void) convfail; /* ignore unused */ 1.221 ++ 1.222 ++ if (wc == WEOF) 1.223 ++ { 1.224 ++ if (idx > 0) 1.225 ++ putchar (line_delim); 1.226 ++ break; 1.227 ++ } 1.228 ++ else if (wc == line_delim) 1.229 ++ { 1.230 ++ putchar (line_delim); 1.231 ++ idx = 0; 1.232 ++ print_delimiter = false; 1.233 ++ current_rp = frp; 1.234 ++ } 1.235 ++ else 1.236 ++ { 1.237 ++ next_item (&idx); 1.238 ++ if (print_kth (idx)) 1.239 ++ { 1.240 ++ if (output_delimiter_specified) 1.241 ++ { 1.242 ++ if (print_delimiter && is_range_start_index (idx)) 1.243 ++ { 1.244 ++ fwrite (output_delimiter_string, sizeof (char), 1.245 ++ output_delimiter_length, stdout); 1.246 ++ } 1.247 ++ print_delimiter = true; 1.248 ++ } 1.249 ++ fwrite (bufpos, mblength, sizeof(char), stdout); 1.250 ++ } 1.251 ++ } 1.252 ++ 1.253 ++ buflen -= mblength; 1.254 ++ bufpos += mblength; 1.255 ++ } 1.256 ++} 1.257 ++#endif 1.258 ++ 1.259 + /* Read from stream STREAM, printing to standard output any selected fields. */ 1.260 + 1.261 + static void 1.262 +@@ -425,13 +580,211 @@ cut_fields (FILE *stream) 1.263 + } 1.264 + } 1.265 + 1.266 ++#if HAVE_MBRTOWC 1.267 ++static void 1.268 ++cut_fields_mb (FILE *stream) 1.269 ++{ 1.270 ++ int c; 1.271 ++ size_t field_idx; 1.272 ++ int found_any_selected_field; 1.273 ++ int buffer_first_field; 1.274 ++ int empty_input; 1.275 ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 1.276 ++ char *bufpos; /* Next read position of BUF. */ 1.277 ++ size_t buflen; /* The length of the byte sequence in buf. */ 1.278 ++ wint_t wc = 0; /* A gotten wide character. */ 1.279 ++ size_t mblength; /* The byte size of a multibyte character which shows 1.280 ++ as same character as WC. */ 1.281 ++ mbstate_t state; /* State of the stream. */ 1.282 ++ bool convfail = false; /* true, when conversion failed. Otherwise false. */ 1.283 ++ 1.284 ++ current_rp = frp; 1.285 ++ 1.286 ++ found_any_selected_field = 0; 1.287 ++ field_idx = 1; 1.288 ++ bufpos = buf; 1.289 ++ buflen = 0; 1.290 ++ memset (&state, '\0', sizeof(mbstate_t)); 1.291 ++ 1.292 ++ c = getc (stream); 1.293 ++ empty_input = (c == EOF); 1.294 ++ if (c != EOF) 1.295 ++ { 1.296 ++ ungetc (c, stream); 1.297 ++ wc = 0; 1.298 ++ } 1.299 ++ else 1.300 ++ wc = WEOF; 1.301 ++ 1.302 ++ /* To support the semantics of the -s flag, we may have to buffer 1.303 ++ all of the first field to determine whether it is `delimited.' 1.304 ++ But that is unnecessary if all non-delimited lines must be printed 1.305 ++ and the first field has been selected, or if non-delimited lines 1.306 ++ must be suppressed and the first field has *not* been selected. 1.307 ++ That is because a non-delimited line has exactly one field. */ 1.308 ++ buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); 1.309 ++ 1.310 ++ while (1) 1.311 ++ { 1.312 ++ if (field_idx == 1 && buffer_first_field) 1.313 ++ { 1.314 ++ int len = 0; 1.315 ++ 1.316 ++ while (1) 1.317 ++ { 1.318 ++ REFILL_BUFFER (buf, bufpos, buflen, stream); 1.319 ++ 1.320 ++ GET_NEXT_WC_FROM_BUFFER 1.321 ++ (wc, bufpos, buflen, mblength, state, convfail); 1.322 ++ 1.323 ++ if (wc == WEOF) 1.324 ++ break; 1.325 ++ 1.326 ++ field_1_buffer = xrealloc (field_1_buffer, len + mblength); 1.327 ++ memcpy (field_1_buffer + len, bufpos, mblength); 1.328 ++ len += mblength; 1.329 ++ buflen -= mblength; 1.330 ++ bufpos += mblength; 1.331 ++ 1.332 ++ if (!convfail && (wc == line_delim || wc == wcdelim)) 1.333 ++ break; 1.334 ++ } 1.335 ++ 1.336 ++ if (len <= 0 && wc == WEOF) 1.337 ++ break; 1.338 ++ 1.339 ++ /* If the first field extends to the end of line (it is not 1.340 ++ delimited) and we are printing all non-delimited lines, 1.341 ++ print this one. */ 1.342 ++ if (convfail || (!convfail && wc != wcdelim)) 1.343 ++ { 1.344 ++ if (suppress_non_delimited) 1.345 ++ { 1.346 ++ /* Empty. */ 1.347 ++ } 1.348 ++ else 1.349 ++ { 1.350 ++ fwrite (field_1_buffer, sizeof (char), len, stdout); 1.351 ++ /* Make sure the output line is newline terminated. */ 1.352 ++ if (convfail || (!convfail && wc != line_delim)) 1.353 ++ putchar (line_delim); 1.354 ++ } 1.355 ++ continue; 1.356 ++ } 1.357 ++ 1.358 ++ if (print_kth (1)) 1.359 ++ { 1.360 ++ /* Print the field, but not the trailing delimiter. */ 1.361 ++ fwrite (field_1_buffer, sizeof (char), len - 1, stdout); 1.362 ++ found_any_selected_field = 1; 1.363 ++ } 1.364 ++ next_item (&field_idx); 1.365 ++ } 1.366 ++ 1.367 ++ if (wc != WEOF) 1.368 ++ { 1.369 ++ if (print_kth (field_idx)) 1.370 ++ { 1.371 ++ if (found_any_selected_field) 1.372 ++ { 1.373 ++ fwrite (output_delimiter_string, sizeof (char), 1.374 ++ output_delimiter_length, stdout); 1.375 ++ } 1.376 ++ found_any_selected_field = 1; 1.377 ++ } 1.378 ++ 1.379 ++ while (1) 1.380 ++ { 1.381 ++ REFILL_BUFFER (buf, bufpos, buflen, stream); 1.382 ++ 1.383 ++ GET_NEXT_WC_FROM_BUFFER 1.384 ++ (wc, bufpos, buflen, mblength, state, convfail); 1.385 ++ 1.386 ++ if (wc == WEOF) 1.387 ++ break; 1.388 ++ else if (!convfail && (wc == wcdelim || wc == line_delim)) 1.389 ++ { 1.390 ++ buflen -= mblength; 1.391 ++ bufpos += mblength; 1.392 ++ break; 1.393 ++ } 1.394 ++ 1.395 ++ if (print_kth (field_idx)) 1.396 ++ fwrite (bufpos, mblength, sizeof(char), stdout); 1.397 ++ 1.398 ++ buflen -= mblength; 1.399 ++ bufpos += mblength; 1.400 ++ } 1.401 ++ } 1.402 ++ 1.403 ++ if ((!convfail || wc == line_delim) && buflen < 1) 1.404 ++ wc = WEOF; 1.405 ++ 1.406 ++ if (!convfail && wc == wcdelim) 1.407 ++ next_item (&field_idx); 1.408 ++ else if (wc == WEOF || (!convfail && wc == line_delim)) 1.409 ++ { 1.410 ++ if (found_any_selected_field 1.411 ++ || (!empty_input && !(suppress_non_delimited && field_idx == 1))) 1.412 ++ putchar (line_delim); 1.413 ++ if (wc == WEOF) 1.414 ++ break; 1.415 ++ field_idx = 1; 1.416 ++ current_rp = frp; 1.417 ++ found_any_selected_field = 0; 1.418 ++ } 1.419 ++ } 1.420 ++} 1.421 ++#endif 1.422 ++ 1.423 + static void 1.424 + cut_stream (FILE *stream) 1.425 + { 1.426 +- if (operating_mode == byte_mode) 1.427 +- cut_bytes (stream); 1.428 ++#if HAVE_MBRTOWC 1.429 ++ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 1.430 ++ { 1.431 ++ switch (operating_mode) 1.432 ++ { 1.433 ++ case byte_mode: 1.434 ++ if (byte_mode_character_aware) 1.435 ++ cut_characters_or_cut_bytes_no_split (stream); 1.436 ++ else 1.437 ++ cut_bytes (stream); 1.438 ++ break; 1.439 ++ 1.440 ++ case character_mode: 1.441 ++ cut_characters_or_cut_bytes_no_split (stream); 1.442 ++ break; 1.443 ++ 1.444 ++ case field_mode: 1.445 ++ if (delimlen == 1) 1.446 ++ { 1.447 ++ /* Check if we have utf8 multibyte locale, so we can use this 1.448 ++ optimization because of uniqueness of characters, which is 1.449 ++ not true for e.g. SJIS */ 1.450 ++ char * loc = setlocale(LC_CTYPE, NULL); 1.451 ++ if (loc && (strstr (loc, "UTF-8") || strstr (loc, "utf-8") || 1.452 ++ strstr (loc, "UTF8") || strstr (loc, "utf8"))) 1.453 ++ { 1.454 ++ cut_fields (stream); 1.455 ++ break; 1.456 ++ } 1.457 ++ } 1.458 ++ cut_fields_mb (stream); 1.459 ++ break; 1.460 ++ 1.461 ++ default: 1.462 ++ abort (); 1.463 ++ } 1.464 ++ } 1.465 + else 1.466 +- cut_fields (stream); 1.467 ++#endif 1.468 ++ { 1.469 ++ if (operating_mode == field_mode) 1.470 ++ cut_fields (stream); 1.471 ++ else 1.472 ++ cut_bytes (stream); 1.473 ++ } 1.474 + } 1.475 + 1.476 + /* Process file FILE to standard output. 1.477 +@@ -483,6 +836,7 @@ main (int argc, char **argv) 1.478 + bool ok; 1.479 + bool delim_specified = false; 1.480 + char *spec_list_string IF_LINT ( = NULL); 1.481 ++ char mbdelim[MB_LEN_MAX + 1]; 1.482 + 1.483 + initialize_main (&argc, &argv); 1.484 + set_program_name (argv[0]); 1.485 +@@ -505,7 +859,6 @@ main (int argc, char **argv) 1.486 + switch (optc) 1.487 + { 1.488 + case 'b': 1.489 +- case 'c': 1.490 + /* Build the byte list. */ 1.491 + if (operating_mode != undefined_mode) 1.492 + FATAL_ERROR (_("only one type of list may be specified")); 1.493 +@@ -513,6 +866,14 @@ main (int argc, char **argv) 1.494 + spec_list_string = optarg; 1.495 + break; 1.496 + 1.497 ++ case 'c': 1.498 ++ /* Build the character list. */ 1.499 ++ if (operating_mode != undefined_mode) 1.500 ++ FATAL_ERROR (_("only one type of list may be specified")); 1.501 ++ operating_mode = character_mode; 1.502 ++ spec_list_string = optarg; 1.503 ++ break; 1.504 ++ 1.505 + case 'f': 1.506 + /* Build the field list. */ 1.507 + if (operating_mode != undefined_mode) 1.508 +@@ -524,10 +885,38 @@ main (int argc, char **argv) 1.509 + case 'd': 1.510 + /* New delimiter. */ 1.511 + /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ 1.512 +- if (optarg[0] != '\0' && optarg[1] != '\0') 1.513 +- FATAL_ERROR (_("the delimiter must be a single character")); 1.514 +- delim = optarg[0]; 1.515 +- delim_specified = true; 1.516 ++ { 1.517 ++#if HAVE_MBRTOWC 1.518 ++ if(MB_CUR_MAX > 1) 1.519 ++ { 1.520 ++ mbstate_t state; 1.521 ++ 1.522 ++ memset (&state, '\0', sizeof(mbstate_t)); 1.523 ++ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); 1.524 ++ 1.525 ++ if (delimlen == (size_t)-1 || delimlen == (size_t)-2) 1.526 ++ ++force_singlebyte_mode; 1.527 ++ else 1.528 ++ { 1.529 ++ delimlen = (delimlen < 1) ? 1 : delimlen; 1.530 ++ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') 1.531 ++ FATAL_ERROR (_("the delimiter must be a single character")); 1.532 ++ memcpy (mbdelim, optarg, delimlen); 1.533 ++ mbdelim[delimlen] = '\0'; 1.534 ++ if (delimlen == 1) 1.535 ++ delim = *optarg; 1.536 ++ } 1.537 ++ } 1.538 ++ 1.539 ++ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1.540 ++#endif 1.541 ++ { 1.542 ++ if (optarg[0] != '\0' && optarg[1] != '\0') 1.543 ++ FATAL_ERROR (_("the delimiter must be a single character")); 1.544 ++ delim = (unsigned char) optarg[0]; 1.545 ++ } 1.546 ++ delim_specified = true; 1.547 ++ } 1.548 + break; 1.549 + 1.550 + case OUTPUT_DELIMITER_OPTION: 1.551 +@@ -540,6 +929,7 @@ main (int argc, char **argv) 1.552 + break; 1.553 + 1.554 + case 'n': 1.555 ++ byte_mode_character_aware = 1; 1.556 + break; 1.557 + 1.558 + case 's': 1.559 +@@ -579,15 +969,34 @@ main (int argc, char **argv) 1.560 + | (complement ? SETFLD_COMPLEMENT : 0) ); 1.561 + 1.562 + if (!delim_specified) 1.563 +- delim = '\t'; 1.564 ++ { 1.565 ++ delim = '\t'; 1.566 ++#ifdef HAVE_MBRTOWC 1.567 ++ wcdelim = L'\t'; 1.568 ++ mbdelim[0] = '\t'; 1.569 ++ mbdelim[1] = '\0'; 1.570 ++ delimlen = 1; 1.571 ++#endif 1.572 ++ } 1.573 + 1.574 + if (output_delimiter_string == NULL) 1.575 + { 1.576 +- static char dummy[2]; 1.577 +- dummy[0] = delim; 1.578 +- dummy[1] = '\0'; 1.579 +- output_delimiter_string = dummy; 1.580 +- output_delimiter_length = 1; 1.581 ++#ifdef HAVE_MBRTOWC 1.582 ++ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) 1.583 ++ { 1.584 ++ output_delimiter_string = xstrdup(mbdelim); 1.585 ++ output_delimiter_length = delimlen; 1.586 ++ } 1.587 ++ 1.588 ++ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) 1.589 ++#endif 1.590 ++ { 1.591 ++ static char dummy[2]; 1.592 ++ dummy[0] = delim; 1.593 ++ dummy[1] = '\0'; 1.594 ++ output_delimiter_string = dummy; 1.595 ++ output_delimiter_length = 1; 1.596 ++ } 1.597 + } 1.598 + 1.599 + if (optind == argc) 1.600 +diff -Naurp coreutils-8.25-orig/src/expand.c coreutils-8.25/src/expand.c 1.601 +--- coreutils-8.25-orig/src/expand.c 2016-01-01 07:48:50.000000000 -0600 1.602 ++++ coreutils-8.25/src/expand.c 2016-02-08 19:07:10.301944619 -0600 1.603 +@@ -37,12 +37,34 @@ 1.604 + #include <stdio.h> 1.605 + #include <getopt.h> 1.606 + #include <sys/types.h> 1.607 ++ 1.608 ++/* Get mbstate_t, mbrtowc(), wcwidth(). */ 1.609 ++#if HAVE_WCHAR_H 1.610 ++# include <wchar.h> 1.611 ++#endif 1.612 ++ 1.613 ++/* Get iswblank(). */ 1.614 ++#if HAVE_WCTYPE_H 1.615 ++# include <wctype.h> 1.616 ++#endif 1.617 ++ 1.618 + #include "system.h" 1.619 + #include "error.h" 1.620 + #include "fadvise.h" 1.621 + #include "quote.h" 1.622 + #include "xstrndup.h" 1.623 + 1.624 ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 1.625 ++ installation; work around this configuration error. */ 1.626 ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 1.627 ++# define MB_LEN_MAX 16 1.628 ++#endif 1.629 ++ 1.630 ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 1.631 ++#if HAVE_MBRTOWC && defined mbstate_t 1.632 ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 1.633 ++#endif 1.634 ++ 1.635 + /* The official name of this program (e.g., no 'g' prefix). */ 1.636 + #define PROGRAM_NAME "expand" 1.637 + 1.638 +@@ -357,6 +379,142 @@ expand (void) 1.639 + } 1.640 + } 1.641 + 1.642 ++#if HAVE_MBRTOWC 1.643 ++static void 1.644 ++expand_multibyte (void) 1.645 ++{ 1.646 ++ FILE *fp; /* Input strem. */ 1.647 ++ mbstate_t i_state; /* Current shift state of the input stream. */ 1.648 ++ mbstate_t i_state_bak; /* Back up the I_STATE. */ 1.649 ++ mbstate_t o_state; /* Current shift state of the output stream. */ 1.650 ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 1.651 ++ char *bufpos = buf; /* Next read position of BUF. */ 1.652 ++ size_t buflen = 0; /* The length of the byte sequence in buf. */ 1.653 ++ wchar_t wc; /* A gotten wide character. */ 1.654 ++ size_t mblength; /* The byte size of a multibyte character 1.655 ++ which shows as same character as WC. */ 1.656 ++ int tab_index = 0; /* Index in `tab_list' of next tabstop. */ 1.657 ++ int column = 0; /* Column on screen of the next char. */ 1.658 ++ int next_tab_column; /* Column the next tab stop is on. */ 1.659 ++ int convert = 1; /* If nonzero, perform translations. */ 1.660 ++ 1.661 ++ fp = next_file ((FILE *) NULL); 1.662 ++ if (fp == NULL) 1.663 ++ return; 1.664 ++ 1.665 ++ memset (&o_state, '\0', sizeof(mbstate_t)); 1.666 ++ memset (&i_state, '\0', sizeof(mbstate_t)); 1.667 ++ 1.668 ++ for (;;) 1.669 ++ { 1.670 ++ /* Refill the buffer BUF. */ 1.671 ++ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 1.672 ++ { 1.673 ++ memmove (buf, bufpos, buflen); 1.674 ++ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 1.675 ++ bufpos = buf; 1.676 ++ } 1.677 ++ 1.678 ++ /* No character is left in BUF. */ 1.679 ++ if (buflen < 1) 1.680 ++ { 1.681 ++ fp = next_file (fp); 1.682 ++ 1.683 ++ if (fp == NULL) 1.684 ++ break; /* No more files. */ 1.685 ++ else 1.686 ++ { 1.687 ++ memset (&i_state, '\0', sizeof(mbstate_t)); 1.688 ++ continue; 1.689 ++ } 1.690 ++ } 1.691 ++ 1.692 ++ /* Get a wide character. */ 1.693 ++ i_state_bak = i_state; 1.694 ++ mblength = mbrtowc (&wc, bufpos, buflen, &i_state); 1.695 ++ 1.696 ++ switch (mblength) 1.697 ++ { 1.698 ++ case (size_t)-1: /* illegal byte sequence. */ 1.699 ++ case (size_t)-2: 1.700 ++ mblength = 1; 1.701 ++ i_state = i_state_bak; 1.702 ++ if (convert) 1.703 ++ { 1.704 ++ ++column; 1.705 ++ if (convert_entire_line == 0 && !isblank(*bufpos)) 1.706 ++ convert = 0; 1.707 ++ } 1.708 ++ putchar (*bufpos); 1.709 ++ break; 1.710 ++ 1.711 ++ case 0: /* null. */ 1.712 ++ mblength = 1; 1.713 ++ if (convert && convert_entire_line == 0) 1.714 ++ convert = 0; 1.715 ++ putchar ('\0'); 1.716 ++ break; 1.717 ++ 1.718 ++ default: 1.719 ++ if (wc == L'\n') /* LF. */ 1.720 ++ { 1.721 ++ tab_index = 0; 1.722 ++ column = 0; 1.723 ++ convert = 1; 1.724 ++ putchar ('\n'); 1.725 ++ } 1.726 ++ else if (wc == L'\t' && convert) /* Tab. */ 1.727 ++ { 1.728 ++ if (tab_size == 0) 1.729 ++ { 1.730 ++ /* Do not let tab_index == first_free_tab; 1.731 ++ stop when it is 1 less. */ 1.732 ++ while (tab_index < first_free_tab - 1 1.733 ++ && column >= tab_list[tab_index]) 1.734 ++ tab_index++; 1.735 ++ next_tab_column = tab_list[tab_index]; 1.736 ++ if (tab_index < first_free_tab - 1) 1.737 ++ tab_index++; 1.738 ++ if (column >= next_tab_column) 1.739 ++ next_tab_column = column + 1; 1.740 ++ } 1.741 ++ else 1.742 ++ next_tab_column = column + tab_size - column % tab_size; 1.743 ++ 1.744 ++ while (column < next_tab_column) 1.745 ++ { 1.746 ++ putchar (' '); 1.747 ++ ++column; 1.748 ++ } 1.749 ++ } 1.750 ++ else /* Others. */ 1.751 ++ { 1.752 ++ if (convert) 1.753 ++ { 1.754 ++ if (wc == L'\b') 1.755 ++ { 1.756 ++ if (column > 0) 1.757 ++ --column; 1.758 ++ } 1.759 ++ else 1.760 ++ { 1.761 ++ int width; /* The width of WC. */ 1.762 ++ 1.763 ++ width = wcwidth (wc); 1.764 ++ column += (width > 0) ? width : 0; 1.765 ++ if (convert_entire_line == 0 && !iswblank(wc)) 1.766 ++ convert = 0; 1.767 ++ } 1.768 ++ } 1.769 ++ fwrite (bufpos, sizeof(char), mblength, stdout); 1.770 ++ } 1.771 ++ } 1.772 ++ buflen -= mblength; 1.773 ++ bufpos += mblength; 1.774 ++ } 1.775 ++} 1.776 ++#endif 1.777 ++ 1.778 + int 1.779 + main (int argc, char **argv) 1.780 + { 1.781 +@@ -421,7 +579,12 @@ main (int argc, char **argv) 1.782 + 1.783 + file_list = (optind < argc ? &argv[optind] : stdin_argv); 1.784 + 1.785 +- expand (); 1.786 ++#if HAVE_MBRTOWC 1.787 ++ if (MB_CUR_MAX > 1) 1.788 ++ expand_multibyte (); 1.789 ++ else 1.790 ++#endif 1.791 ++ expand (); 1.792 + 1.793 + if (have_read_stdin && fclose (stdin) != 0) 1.794 + error (EXIT_FAILURE, errno, "-"); 1.795 +diff -Naurp coreutils-8.25-orig/src/fold.c coreutils-8.25/src/fold.c 1.796 +--- coreutils-8.25-orig/src/fold.c 2016-01-01 07:48:50.000000000 -0600 1.797 ++++ coreutils-8.25/src/fold.c 2016-02-08 19:07:10.302944622 -0600 1.798 +@@ -22,11 +22,33 @@ 1.799 + #include <getopt.h> 1.800 + #include <sys/types.h> 1.801 + 1.802 ++/* Get mbstate_t, mbrtowc(), wcwidth(). */ 1.803 ++#if HAVE_WCHAR_H 1.804 ++# include <wchar.h> 1.805 ++#endif 1.806 ++ 1.807 ++/* Get iswprint(), iswblank(), wcwidth(). */ 1.808 ++#if HAVE_WCTYPE_H 1.809 ++# include <wctype.h> 1.810 ++#endif 1.811 ++ 1.812 + #include "system.h" 1.813 + #include "error.h" 1.814 + #include "fadvise.h" 1.815 + #include "xdectoint.h" 1.816 + 1.817 ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 1.818 ++ installation; work around this configuration error. */ 1.819 ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 1.820 ++# undef MB_LEN_MAX 1.821 ++# define MB_LEN_MAX 16 1.822 ++#endif 1.823 ++ 1.824 ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 1.825 ++#if HAVE_MBRTOWC && defined mbstate_t 1.826 ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 1.827 ++#endif 1.828 ++ 1.829 + #define TAB_WIDTH 8 1.830 + 1.831 + /* The official name of this program (e.g., no 'g' prefix). */ 1.832 +@@ -34,20 +56,41 @@ 1.833 + 1.834 + #define AUTHORS proper_name ("David MacKenzie") 1.835 + 1.836 ++#define FATAL_ERROR(Message) \ 1.837 ++ do \ 1.838 ++ { \ 1.839 ++ error (0, 0, (Message)); \ 1.840 ++ usage (2); \ 1.841 ++ } \ 1.842 ++ while (0) 1.843 ++ 1.844 ++enum operating_mode 1.845 ++{ 1.846 ++ /* Fold texts by columns that are at the given positions. */ 1.847 ++ column_mode, 1.848 ++ 1.849 ++ /* Fold texts by bytes that are at the given positions. */ 1.850 ++ byte_mode, 1.851 ++ 1.852 ++ /* Fold texts by characters that are at the given positions. */ 1.853 ++ character_mode, 1.854 ++}; 1.855 ++ 1.856 ++/* The argument shows current mode. (Default: column_mode) */ 1.857 ++static enum operating_mode operating_mode; 1.858 ++ 1.859 + /* If nonzero, try to break on whitespace. */ 1.860 + static bool break_spaces; 1.861 + 1.862 +-/* If nonzero, count bytes, not column positions. */ 1.863 +-static bool count_bytes; 1.864 +- 1.865 + /* If nonzero, at least one of the files we read was standard input. */ 1.866 + static bool have_read_stdin; 1.867 + 1.868 +-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; 1.869 ++static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; 1.870 + 1.871 + static struct option const longopts[] = 1.872 + { 1.873 + {"bytes", no_argument, NULL, 'b'}, 1.874 ++ {"characters", no_argument, NULL, 'c'}, 1.875 + {"spaces", no_argument, NULL, 's'}, 1.876 + {"width", required_argument, NULL, 'w'}, 1.877 + {GETOPT_HELP_OPTION_DECL}, 1.878 +@@ -75,6 +118,7 @@ Wrap input lines in each FILE, writing t 1.879 + 1.880 + fputs (_("\ 1.881 + -b, --bytes count bytes rather than columns\n\ 1.882 ++ -c, --characters count characters rather than columns\n\ 1.883 + -s, --spaces break at spaces\n\ 1.884 + -w, --width=WIDTH use WIDTH columns instead of 80\n\ 1.885 + "), stdout); 1.886 +@@ -92,7 +136,7 @@ Wrap input lines in each FILE, writing t 1.887 + static size_t 1.888 + adjust_column (size_t column, char c) 1.889 + { 1.890 +- if (!count_bytes) 1.891 ++ if (operating_mode != byte_mode) 1.892 + { 1.893 + if (c == '\b') 1.894 + { 1.895 +@@ -115,30 +159,14 @@ adjust_column (size_t column, char c) 1.896 + to stdout, with maximum line length WIDTH. 1.897 + Return true if successful. */ 1.898 + 1.899 +-static bool 1.900 +-fold_file (char const *filename, size_t width) 1.901 ++static void 1.902 ++fold_text (FILE *istream, size_t width, int *saved_errno) 1.903 + { 1.904 +- FILE *istream; 1.905 + int c; 1.906 + size_t column = 0; /* Screen column where next char will go. */ 1.907 + size_t offset_out = 0; /* Index in 'line_out' for next char. */ 1.908 + static char *line_out = NULL; 1.909 + static size_t allocated_out = 0; 1.910 +- int saved_errno; 1.911 +- 1.912 +- if (STREQ (filename, "-")) 1.913 +- { 1.914 +- istream = stdin; 1.915 +- have_read_stdin = true; 1.916 +- } 1.917 +- else 1.918 +- istream = fopen (filename, "r"); 1.919 +- 1.920 +- if (istream == NULL) 1.921 +- { 1.922 +- error (0, errno, "%s", quotef (filename)); 1.923 +- return false; 1.924 +- } 1.925 + 1.926 + fadvise (istream, FADVISE_SEQUENTIAL); 1.927 + 1.928 +@@ -168,6 +196,15 @@ fold_file (char const *filename, size_t 1.929 + bool found_blank = false; 1.930 + size_t logical_end = offset_out; 1.931 + 1.932 ++ /* If LINE_OUT has no wide character, 1.933 ++ put a new wide character in LINE_OUT 1.934 ++ if column is bigger than width. */ 1.935 ++ if (offset_out == 0) 1.936 ++ { 1.937 ++ line_out[offset_out++] = c; 1.938 ++ continue; 1.939 ++ } 1.940 ++ 1.941 + /* Look for the last blank. */ 1.942 + while (logical_end) 1.943 + { 1.944 +@@ -214,11 +251,221 @@ fold_file (char const *filename, size_t 1.945 + line_out[offset_out++] = c; 1.946 + } 1.947 + 1.948 +- saved_errno = errno; 1.949 ++ *saved_errno = errno; 1.950 ++ 1.951 ++ if (offset_out) 1.952 ++ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 1.953 ++ 1.954 ++} 1.955 ++ 1.956 ++#if HAVE_MBRTOWC 1.957 ++static void 1.958 ++fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) 1.959 ++{ 1.960 ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 1.961 ++ size_t buflen = 0; /* The length of the byte sequence in buf. */ 1.962 ++ char *bufpos = buf; /* Next read position of BUF. */ 1.963 ++ wint_t wc; /* A gotten wide character. */ 1.964 ++ size_t mblength; /* The byte size of a multibyte character which shows 1.965 ++ as same character as WC. */ 1.966 ++ mbstate_t state, state_bak; /* State of the stream. */ 1.967 ++ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */ 1.968 ++ 1.969 ++ static char *line_out = NULL; 1.970 ++ size_t offset_out = 0; /* Index in `line_out' for next char. */ 1.971 ++ static size_t allocated_out = 0; 1.972 ++ 1.973 ++ int increment; 1.974 ++ size_t column = 0; 1.975 ++ 1.976 ++ size_t last_blank_pos; 1.977 ++ size_t last_blank_column; 1.978 ++ int is_blank_seen; 1.979 ++ int last_blank_increment = 0; 1.980 ++ int is_bs_following_last_blank; 1.981 ++ size_t bs_following_last_blank_num; 1.982 ++ int is_cr_after_last_blank; 1.983 ++ 1.984 ++#define CLEAR_FLAGS \ 1.985 ++ do \ 1.986 ++ { \ 1.987 ++ last_blank_pos = 0; \ 1.988 ++ last_blank_column = 0; \ 1.989 ++ is_blank_seen = 0; \ 1.990 ++ is_bs_following_last_blank = 0; \ 1.991 ++ bs_following_last_blank_num = 0; \ 1.992 ++ is_cr_after_last_blank = 0; \ 1.993 ++ } \ 1.994 ++ while (0) 1.995 ++ 1.996 ++#define START_NEW_LINE \ 1.997 ++ do \ 1.998 ++ { \ 1.999 ++ putchar ('\n'); \ 1.1000 ++ column = 0; \ 1.1001 ++ offset_out = 0; \ 1.1002 ++ CLEAR_FLAGS; \ 1.1003 ++ } \ 1.1004 ++ while (0) 1.1005 ++ 1.1006 ++ CLEAR_FLAGS; 1.1007 ++ memset (&state, '\0', sizeof(mbstate_t)); 1.1008 ++ 1.1009 ++ for (;; bufpos += mblength, buflen -= mblength) 1.1010 ++ { 1.1011 ++ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) 1.1012 ++ { 1.1013 ++ memmove (buf, bufpos, buflen); 1.1014 ++ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); 1.1015 ++ bufpos = buf; 1.1016 ++ } 1.1017 ++ 1.1018 ++ if (buflen < 1) 1.1019 ++ break; 1.1020 ++ 1.1021 ++ /* Get a wide character. */ 1.1022 ++ state_bak = state; 1.1023 ++ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); 1.1024 ++ 1.1025 ++ switch (mblength) 1.1026 ++ { 1.1027 ++ case (size_t)-1: 1.1028 ++ case (size_t)-2: 1.1029 ++ convfail++; 1.1030 ++ state = state_bak; 1.1031 ++ /* Fall through. */ 1.1032 ++ 1.1033 ++ case 0: 1.1034 ++ mblength = 1; 1.1035 ++ break; 1.1036 ++ } 1.1037 ++ 1.1038 ++rescan: 1.1039 ++ if (operating_mode == byte_mode) /* byte mode */ 1.1040 ++ increment = mblength; 1.1041 ++ else if (operating_mode == character_mode) /* character mode */ 1.1042 ++ increment = 1; 1.1043 ++ else /* column mode */ 1.1044 ++ { 1.1045 ++ if (convfail) 1.1046 ++ increment = 1; 1.1047 ++ else 1.1048 ++ { 1.1049 ++ switch (wc) 1.1050 ++ { 1.1051 ++ case L'\n': 1.1052 ++ fwrite (line_out, sizeof(char), offset_out, stdout); 1.1053 ++ START_NEW_LINE; 1.1054 ++ continue; 1.1055 ++ 1.1056 ++ case L'\b': 1.1057 ++ increment = (column > 0) ? -1 : 0; 1.1058 ++ break; 1.1059 ++ 1.1060 ++ case L'\r': 1.1061 ++ increment = -1 * column; 1.1062 ++ break; 1.1063 ++ 1.1064 ++ case L'\t': 1.1065 ++ increment = 8 - column % 8; 1.1066 ++ break; 1.1067 ++ 1.1068 ++ default: 1.1069 ++ increment = wcwidth (wc); 1.1070 ++ increment = (increment < 0) ? 0 : increment; 1.1071 ++ } 1.1072 ++ } 1.1073 ++ } 1.1074 ++ 1.1075 ++ if (column + increment > width && break_spaces && last_blank_pos) 1.1076 ++ { 1.1077 ++ fwrite (line_out, sizeof(char), last_blank_pos, stdout); 1.1078 ++ putchar ('\n'); 1.1079 ++ 1.1080 ++ offset_out = offset_out - last_blank_pos; 1.1081 ++ column = column - last_blank_column + ((is_cr_after_last_blank) 1.1082 ++ ? last_blank_increment : bs_following_last_blank_num); 1.1083 ++ memmove (line_out, line_out + last_blank_pos, offset_out); 1.1084 ++ CLEAR_FLAGS; 1.1085 ++ goto rescan; 1.1086 ++ } 1.1087 ++ 1.1088 ++ if (column + increment > width && column != 0) 1.1089 ++ { 1.1090 ++ fwrite (line_out, sizeof(char), offset_out, stdout); 1.1091 ++ START_NEW_LINE; 1.1092 ++ goto rescan; 1.1093 ++ } 1.1094 ++ 1.1095 ++ if (allocated_out < offset_out + mblength) 1.1096 ++ { 1.1097 ++ line_out = X2REALLOC (line_out, &allocated_out); 1.1098 ++ } 1.1099 ++ 1.1100 ++ memcpy (line_out + offset_out, bufpos, mblength); 1.1101 ++ offset_out += mblength; 1.1102 ++ column += increment; 1.1103 ++ 1.1104 ++ if (is_blank_seen && !convfail && wc == L'\r') 1.1105 ++ is_cr_after_last_blank = 1; 1.1106 ++ 1.1107 ++ if (is_bs_following_last_blank && !convfail && wc == L'\b') 1.1108 ++ ++bs_following_last_blank_num; 1.1109 ++ else 1.1110 ++ is_bs_following_last_blank = 0; 1.1111 ++ 1.1112 ++ if (break_spaces && !convfail && iswblank (wc)) 1.1113 ++ { 1.1114 ++ last_blank_pos = offset_out; 1.1115 ++ last_blank_column = column; 1.1116 ++ is_blank_seen = 1; 1.1117 ++ last_blank_increment = increment; 1.1118 ++ is_bs_following_last_blank = 1; 1.1119 ++ bs_following_last_blank_num = 0; 1.1120 ++ is_cr_after_last_blank = 0; 1.1121 ++ } 1.1122 ++ } 1.1123 ++ 1.1124 ++ *saved_errno = errno; 1.1125 + 1.1126 + if (offset_out) 1.1127 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); 1.1128 + 1.1129 ++} 1.1130 ++#endif 1.1131 ++ 1.1132 ++/* Fold file FILENAME, or standard input if FILENAME is "-", 1.1133 ++ to stdout, with maximum line length WIDTH. 1.1134 ++ Return 0 if successful, 1 if an error occurs. */ 1.1135 ++ 1.1136 ++static bool 1.1137 ++fold_file (char const *filename, size_t width) 1.1138 ++{ 1.1139 ++ FILE *istream; 1.1140 ++ int saved_errno; 1.1141 ++ 1.1142 ++ if (STREQ (filename, "-")) 1.1143 ++ { 1.1144 ++ istream = stdin; 1.1145 ++ have_read_stdin = 1; 1.1146 ++ } 1.1147 ++ else 1.1148 ++ istream = fopen (filename, "r"); 1.1149 ++ 1.1150 ++ if (istream == NULL) 1.1151 ++ { 1.1152 ++ error (0, errno, "%s", quotef (filename)); 1.1153 ++ return 1; 1.1154 ++ } 1.1155 ++ 1.1156 ++ /* Define how ISTREAM is being folded. */ 1.1157 ++#if HAVE_MBRTOWC 1.1158 ++ if (MB_CUR_MAX > 1) 1.1159 ++ fold_multibyte_text (istream, width, &saved_errno); 1.1160 ++ else 1.1161 ++#endif 1.1162 ++ fold_text (istream, width, &saved_errno); 1.1163 ++ 1.1164 + if (ferror (istream)) 1.1165 + { 1.1166 + error (0, saved_errno, "%s", quotef (filename)); 1.1167 +@@ -251,7 +498,8 @@ main (int argc, char **argv) 1.1168 + 1.1169 + atexit (close_stdout); 1.1170 + 1.1171 +- break_spaces = count_bytes = have_read_stdin = false; 1.1172 ++ operating_mode = column_mode; 1.1173 ++ break_spaces = have_read_stdin = false; 1.1174 + 1.1175 + while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) 1.1176 + { 1.1177 +@@ -260,7 +508,15 @@ main (int argc, char **argv) 1.1178 + switch (optc) 1.1179 + { 1.1180 + case 'b': /* Count bytes rather than columns. */ 1.1181 +- count_bytes = true; 1.1182 ++ if (operating_mode != column_mode) 1.1183 ++ FATAL_ERROR (_("only one way of folding may be specified")); 1.1184 ++ operating_mode = byte_mode; 1.1185 ++ break; 1.1186 ++ 1.1187 ++ case 'c': 1.1188 ++ if (operating_mode != column_mode) 1.1189 ++ FATAL_ERROR (_("only one way of folding may be specified")); 1.1190 ++ operating_mode = character_mode; 1.1191 + break; 1.1192 + 1.1193 + case 's': /* Break at word boundaries. */ 1.1194 +diff -Naurp coreutils-8.25-orig/src/join.c coreutils-8.25/src/join.c 1.1195 +--- coreutils-8.25-orig/src/join.c 2016-01-13 05:08:59.000000000 -0600 1.1196 ++++ coreutils-8.25/src/join.c 2016-02-08 19:07:10.303944625 -0600 1.1197 +@@ -22,18 +22,32 @@ 1.1198 + #include <sys/types.h> 1.1199 + #include <getopt.h> 1.1200 + 1.1201 ++/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ 1.1202 ++#if HAVE_WCHAR_H 1.1203 ++# include <wchar.h> 1.1204 ++#endif 1.1205 ++ 1.1206 ++/* Get iswblank(), towupper. */ 1.1207 ++#if HAVE_WCTYPE_H 1.1208 ++# include <wctype.h> 1.1209 ++#endif 1.1210 ++ 1.1211 + #include "system.h" 1.1212 + #include "error.h" 1.1213 + #include "fadvise.h" 1.1214 + #include "hard-locale.h" 1.1215 + #include "linebuffer.h" 1.1216 +-#include "memcasecmp.h" 1.1217 + #include "quote.h" 1.1218 + #include "stdio--.h" 1.1219 + #include "xmemcoll.h" 1.1220 + #include "xstrtol.h" 1.1221 + #include "argmatch.h" 1.1222 + 1.1223 ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 1.1224 ++#if HAVE_MBRTOWC && defined mbstate_t 1.1225 ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 1.1226 ++#endif 1.1227 ++ 1.1228 + /* The official name of this program (e.g., no 'g' prefix). */ 1.1229 + #define PROGRAM_NAME "join" 1.1230 + 1.1231 +@@ -135,10 +149,12 @@ static struct outlist outlist_head; 1.1232 + /* Last element in 'outlist', where a new element can be added. */ 1.1233 + static struct outlist *outlist_end = &outlist_head; 1.1234 + 1.1235 +-/* Tab character separating fields. If negative, fields are separated 1.1236 +- by any nonempty string of blanks, otherwise by exactly one 1.1237 +- tab character whose value (when cast to unsigned char) equals TAB. */ 1.1238 +-static int tab = -1; 1.1239 ++/* Tab character separating fields. If NULL, fields are separated 1.1240 ++ by any nonempty string of blanks. */ 1.1241 ++static char *tab = NULL; 1.1242 ++ 1.1243 ++/* The number of bytes used for tab. */ 1.1244 ++static size_t tablen = 0; 1.1245 + 1.1246 + /* If nonzero, check that the input is correctly ordered. */ 1.1247 + static enum 1.1248 +@@ -275,13 +291,14 @@ xfields (struct line *line) 1.1249 + if (ptr == lim) 1.1250 + return; 1.1251 + 1.1252 +- if (0 <= tab && tab != '\n') 1.1253 ++ if (tab != NULL) 1.1254 + { 1.1255 ++ unsigned char t = tab[0]; 1.1256 + char *sep; 1.1257 +- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) 1.1258 ++ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) 1.1259 + extract_field (line, ptr, sep - ptr); 1.1260 + } 1.1261 +- else if (tab < 0) 1.1262 ++ else 1.1263 + { 1.1264 + /* Skip leading blanks before the first field. */ 1.1265 + while (field_sep (*ptr)) 1.1266 +@@ -305,6 +322,147 @@ xfields (struct line *line) 1.1267 + extract_field (line, ptr, lim - ptr); 1.1268 + } 1.1269 + 1.1270 ++#if HAVE_MBRTOWC 1.1271 ++static void 1.1272 ++xfields_multibyte (struct line *line) 1.1273 ++{ 1.1274 ++ char *ptr = line->buf.buffer; 1.1275 ++ char const *lim = ptr + line->buf.length - 1; 1.1276 ++ wchar_t wc = 0; 1.1277 ++ size_t mblength = 1; 1.1278 ++ mbstate_t state, state_bak; 1.1279 ++ 1.1280 ++ memset (&state, 0, sizeof (mbstate_t)); 1.1281 ++ 1.1282 ++ if (ptr >= lim) 1.1283 ++ return; 1.1284 ++ 1.1285 ++ if (tab != NULL) 1.1286 ++ { 1.1287 ++ char *sep = ptr; 1.1288 ++ for (; ptr < lim; ptr = sep + mblength) 1.1289 ++ { 1.1290 ++ sep = ptr; 1.1291 ++ while (sep < lim) 1.1292 ++ { 1.1293 ++ state_bak = state; 1.1294 ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 1.1295 ++ 1.1296 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.1297 ++ { 1.1298 ++ mblength = 1; 1.1299 ++ state = state_bak; 1.1300 ++ } 1.1301 ++ mblength = (mblength < 1) ? 1 : mblength; 1.1302 ++ 1.1303 ++ if (mblength == tablen && !memcmp (sep, tab, mblength)) 1.1304 ++ break; 1.1305 ++ else 1.1306 ++ { 1.1307 ++ sep += mblength; 1.1308 ++ continue; 1.1309 ++ } 1.1310 ++ } 1.1311 ++ 1.1312 ++ if (sep >= lim) 1.1313 ++ break; 1.1314 ++ 1.1315 ++ extract_field (line, ptr, sep - ptr); 1.1316 ++ } 1.1317 ++ } 1.1318 ++ else 1.1319 ++ { 1.1320 ++ /* Skip leading blanks before the first field. */ 1.1321 ++ while(ptr < lim) 1.1322 ++ { 1.1323 ++ state_bak = state; 1.1324 ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 1.1325 ++ 1.1326 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.1327 ++ { 1.1328 ++ mblength = 1; 1.1329 ++ state = state_bak; 1.1330 ++ break; 1.1331 ++ } 1.1332 ++ mblength = (mblength < 1) ? 1 : mblength; 1.1333 ++ 1.1334 ++ if (!iswblank(wc) && wc != '\n') 1.1335 ++ break; 1.1336 ++ ptr += mblength; 1.1337 ++ } 1.1338 ++ 1.1339 ++ do 1.1340 ++ { 1.1341 ++ char *sep; 1.1342 ++ state_bak = state; 1.1343 ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 1.1344 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.1345 ++ { 1.1346 ++ mblength = 1; 1.1347 ++ state = state_bak; 1.1348 ++ break; 1.1349 ++ } 1.1350 ++ mblength = (mblength < 1) ? 1 : mblength; 1.1351 ++ 1.1352 ++ sep = ptr + mblength; 1.1353 ++ while (sep < lim) 1.1354 ++ { 1.1355 ++ state_bak = state; 1.1356 ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 1.1357 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.1358 ++ { 1.1359 ++ mblength = 1; 1.1360 ++ state = state_bak; 1.1361 ++ break; 1.1362 ++ } 1.1363 ++ mblength = (mblength < 1) ? 1 : mblength; 1.1364 ++ 1.1365 ++ if (iswblank (wc) || wc == '\n') 1.1366 ++ break; 1.1367 ++ 1.1368 ++ sep += mblength; 1.1369 ++ } 1.1370 ++ 1.1371 ++ extract_field (line, ptr, sep - ptr); 1.1372 ++ if (sep >= lim) 1.1373 ++ return; 1.1374 ++ 1.1375 ++ state_bak = state; 1.1376 ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); 1.1377 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.1378 ++ { 1.1379 ++ mblength = 1; 1.1380 ++ state = state_bak; 1.1381 ++ break; 1.1382 ++ } 1.1383 ++ mblength = (mblength < 1) ? 1 : mblength; 1.1384 ++ 1.1385 ++ ptr = sep + mblength; 1.1386 ++ while (ptr < lim) 1.1387 ++ { 1.1388 ++ state_bak = state; 1.1389 ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); 1.1390 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.1391 ++ { 1.1392 ++ mblength = 1; 1.1393 ++ state = state_bak; 1.1394 ++ break; 1.1395 ++ } 1.1396 ++ mblength = (mblength < 1) ? 1 : mblength; 1.1397 ++ 1.1398 ++ if (!iswblank (wc) && wc != '\n') 1.1399 ++ break; 1.1400 ++ 1.1401 ++ ptr += mblength; 1.1402 ++ } 1.1403 ++ } 1.1404 ++ while (ptr < lim); 1.1405 ++ } 1.1406 ++ 1.1407 ++ extract_field (line, ptr, lim - ptr); 1.1408 ++} 1.1409 ++#endif 1.1410 ++ 1.1411 + static void 1.1412 + freeline (struct line *line) 1.1413 + { 1.1414 +@@ -326,56 +484,133 @@ keycmp (struct line const *line1, struct 1.1415 + size_t jf_1, size_t jf_2) 1.1416 + { 1.1417 + /* Start of field to compare in each file. */ 1.1418 +- char *beg1; 1.1419 +- char *beg2; 1.1420 +- 1.1421 +- size_t len1; 1.1422 +- size_t len2; /* Length of fields to compare. */ 1.1423 ++ char *beg[2]; 1.1424 ++ char *copy[2]; 1.1425 ++ size_t len[2]; /* Length of fields to compare. */ 1.1426 + int diff; 1.1427 ++ int i, j; 1.1428 ++ int mallocd = 0; 1.1429 + 1.1430 + if (jf_1 < line1->nfields) 1.1431 + { 1.1432 +- beg1 = line1->fields[jf_1].beg; 1.1433 +- len1 = line1->fields[jf_1].len; 1.1434 ++ beg[0] = line1->fields[jf_1].beg; 1.1435 ++ len[0] = line1->fields[jf_1].len; 1.1436 + } 1.1437 + else 1.1438 + { 1.1439 +- beg1 = NULL; 1.1440 +- len1 = 0; 1.1441 ++ beg[0] = NULL; 1.1442 ++ len[0] = 0; 1.1443 + } 1.1444 + 1.1445 + if (jf_2 < line2->nfields) 1.1446 + { 1.1447 +- beg2 = line2->fields[jf_2].beg; 1.1448 +- len2 = line2->fields[jf_2].len; 1.1449 ++ beg[1] = line2->fields[jf_2].beg; 1.1450 ++ len[1] = line2->fields[jf_2].len; 1.1451 + } 1.1452 + else 1.1453 + { 1.1454 +- beg2 = NULL; 1.1455 +- len2 = 0; 1.1456 ++ beg[1] = NULL; 1.1457 ++ len[1] = 0; 1.1458 + } 1.1459 + 1.1460 +- if (len1 == 0) 1.1461 +- return len2 == 0 ? 0 : -1; 1.1462 +- if (len2 == 0) 1.1463 ++ if (len[0] == 0) 1.1464 ++ return len[1] == 0 ? 0 : -1; 1.1465 ++ if (len[1] == 0) 1.1466 + return 1; 1.1467 + 1.1468 + if (ignore_case) 1.1469 + { 1.1470 +- /* FIXME: ignore_case does not work with NLS (in particular, 1.1471 +- with multibyte chars). */ 1.1472 +- diff = memcasecmp (beg1, beg2, MIN (len1, len2)); 1.1473 ++#ifdef HAVE_MBRTOWC 1.1474 ++ if (MB_CUR_MAX > 1) 1.1475 ++ { 1.1476 ++ size_t mblength; 1.1477 ++ wchar_t wc, uwc; 1.1478 ++ mbstate_t state, state_bak; 1.1479 ++ 1.1480 ++ memset (&state, '\0', sizeof (mbstate_t)); 1.1481 ++ 1.1482 ++ for (i = 0; i < 2; i++) 1.1483 ++ { 1.1484 ++ mallocd = 1; 1.1485 ++ copy[i] = xmalloc (len[i] + 1); 1.1486 ++ memset (copy[i], '\0',len[i] + 1); 1.1487 ++ 1.1488 ++ for (j = 0; j < MIN (len[0], len[1]);) 1.1489 ++ { 1.1490 ++ state_bak = state; 1.1491 ++ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); 1.1492 ++ 1.1493 ++ switch (mblength) 1.1494 ++ { 1.1495 ++ case (size_t) -1: 1.1496 ++ case (size_t) -2: 1.1497 ++ state = state_bak; 1.1498 ++ /* Fall through */ 1.1499 ++ case 0: 1.1500 ++ mblength = 1; 1.1501 ++ break; 1.1502 ++ 1.1503 ++ default: 1.1504 ++ uwc = towupper (wc); 1.1505 ++ 1.1506 ++ if (uwc != wc) 1.1507 ++ { 1.1508 ++ mbstate_t state_wc; 1.1509 ++ size_t mblen; 1.1510 ++ 1.1511 ++ memset (&state_wc, '\0', sizeof (mbstate_t)); 1.1512 ++ mblen = wcrtomb (copy[i] + j, uwc, &state_wc); 1.1513 ++ assert (mblen != (size_t)-1); 1.1514 ++ } 1.1515 ++ else 1.1516 ++ memcpy (copy[i] + j, beg[i] + j, mblength); 1.1517 ++ } 1.1518 ++ j += mblength; 1.1519 ++ } 1.1520 ++ copy[i][j] = '\0'; 1.1521 ++ } 1.1522 ++ } 1.1523 ++ else 1.1524 ++#endif 1.1525 ++ { 1.1526 ++ for (i = 0; i < 2; i++) 1.1527 ++ { 1.1528 ++ mallocd = 1; 1.1529 ++ copy[i] = xmalloc (len[i] + 1); 1.1530 ++ 1.1531 ++ for (j = 0; j < MIN (len[0], len[1]); j++) 1.1532 ++ copy[i][j] = toupper (beg[i][j]); 1.1533 ++ 1.1534 ++ copy[i][j] = '\0'; 1.1535 ++ } 1.1536 ++ } 1.1537 + } 1.1538 + else 1.1539 + { 1.1540 +- if (hard_LC_COLLATE) 1.1541 +- return xmemcoll (beg1, len1, beg2, len2); 1.1542 +- diff = memcmp (beg1, beg2, MIN (len1, len2)); 1.1543 ++ copy[0] = beg[0]; 1.1544 ++ copy[1] = beg[1]; 1.1545 ++ } 1.1546 ++ 1.1547 ++ if (hard_LC_COLLATE) 1.1548 ++ { 1.1549 ++ diff = xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); 1.1550 ++ 1.1551 ++ if (mallocd) 1.1552 ++ for (i = 0; i < 2; i++) 1.1553 ++ free (copy[i]); 1.1554 ++ 1.1555 ++ return diff; 1.1556 + } 1.1557 ++ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); 1.1558 ++ 1.1559 ++ if (mallocd) 1.1560 ++ for (i = 0; i < 2; i++) 1.1561 ++ free (copy[i]); 1.1562 ++ 1.1563 + 1.1564 + if (diff) 1.1565 + return diff; 1.1566 +- return len1 < len2 ? -1 : len1 != len2; 1.1567 ++ return len[0] - len[1]; 1.1568 + } 1.1569 + 1.1570 + /* Check that successive input lines PREV and CURRENT from input file 1.1571 +@@ -467,6 +702,11 @@ get_line (FILE *fp, struct line **linep, 1.1572 + } 1.1573 + ++line_no[which - 1]; 1.1574 + 1.1575 ++#if HAVE_MBRTOWC 1.1576 ++ if (MB_CUR_MAX > 1) 1.1577 ++ xfields_multibyte (line); 1.1578 ++ else 1.1579 ++#endif 1.1580 + xfields (line); 1.1581 + 1.1582 + if (prevline[which - 1]) 1.1583 +@@ -566,21 +806,28 @@ prfield (size_t n, struct line const *li 1.1584 + 1.1585 + /* Output all the fields in line, other than the join field. */ 1.1586 + 1.1587 ++#define PUT_TAB_CHAR \ 1.1588 ++ do \ 1.1589 ++ { \ 1.1590 ++ (tab != NULL) ? \ 1.1591 ++ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ 1.1592 ++ } \ 1.1593 ++ while (0) 1.1594 ++ 1.1595 + static void 1.1596 + prfields (struct line const *line, size_t join_field, size_t autocount) 1.1597 + { 1.1598 + size_t i; 1.1599 + size_t nfields = autoformat ? autocount : line->nfields; 1.1600 +- char output_separator = tab < 0 ? ' ' : tab; 1.1601 + 1.1602 + for (i = 0; i < join_field && i < nfields; ++i) 1.1603 + { 1.1604 +- putchar (output_separator); 1.1605 ++ PUT_TAB_CHAR; 1.1606 + prfield (i, line); 1.1607 + } 1.1608 + for (i = join_field + 1; i < nfields; ++i) 1.1609 + { 1.1610 +- putchar (output_separator); 1.1611 ++ PUT_TAB_CHAR; 1.1612 + prfield (i, line); 1.1613 + } 1.1614 + } 1.1615 +@@ -591,7 +838,6 @@ static void 1.1616 + prjoin (struct line const *line1, struct line const *line2) 1.1617 + { 1.1618 + const struct outlist *outlist; 1.1619 +- char output_separator = tab < 0 ? ' ' : tab; 1.1620 + size_t field; 1.1621 + struct line const *line; 1.1622 + 1.1623 +@@ -625,7 +871,7 @@ prjoin (struct line const *line1, struct 1.1624 + o = o->next; 1.1625 + if (o == NULL) 1.1626 + break; 1.1627 +- putchar (output_separator); 1.1628 ++ PUT_TAB_CHAR; 1.1629 + } 1.1630 + putchar (eolchar); 1.1631 + } 1.1632 +@@ -1103,21 +1349,46 @@ main (int argc, char **argv) 1.1633 + 1.1634 + case 't': 1.1635 + { 1.1636 +- unsigned char newtab = optarg[0]; 1.1637 ++ char *newtab = NULL; 1.1638 ++ size_t newtablen; 1.1639 ++ newtab = xstrdup (optarg); 1.1640 ++#if HAVE_MBRTOWC 1.1641 ++ if (MB_CUR_MAX > 1) 1.1642 ++ { 1.1643 ++ mbstate_t state; 1.1644 ++ 1.1645 ++ memset (&state, 0, sizeof (mbstate_t)); 1.1646 ++ newtablen = mbrtowc (NULL, newtab, 1.1647 ++ strnlen (newtab, MB_LEN_MAX), 1.1648 ++ &state); 1.1649 ++ if (newtablen == (size_t) 0 1.1650 ++ || newtablen == (size_t) -1 1.1651 ++ || newtablen == (size_t) -2) 1.1652 ++ newtablen = 1; 1.1653 ++ } 1.1654 ++ else 1.1655 ++#endif 1.1656 ++ newtablen = 1; 1.1657 + if (! newtab) 1.1658 +- newtab = '\n'; /* '' => process the whole line. */ 1.1659 ++ { 1.1660 ++ newtab = (char*)"\n"; /* '' => process the whole line. */ 1.1661 ++ } 1.1662 + else if (optarg[1]) 1.1663 + { 1.1664 +- if (STREQ (optarg, "\\0")) 1.1665 +- newtab = '\0'; 1.1666 +- else 1.1667 +- error (EXIT_FAILURE, 0, _("multi-character tab %s"), 1.1668 +- quote (optarg)); 1.1669 ++ if (newtablen == 1 && newtab[1]) 1.1670 ++ { 1.1671 ++ if (STREQ (newtab, "\\0")) 1.1672 ++ newtab[0] = '\0'; 1.1673 ++ } 1.1674 ++ } 1.1675 ++ if (tab != NULL && strcmp (tab, newtab)) 1.1676 ++ { 1.1677 ++ free (newtab); 1.1678 ++ error (EXIT_FAILURE, 0, _("incompatible tabs")); 1.1679 + } 1.1680 +- if (0 <= tab && tab != newtab) 1.1681 +- error (EXIT_FAILURE, 0, _("incompatible tabs")); 1.1682 + tab = newtab; 1.1683 +- } 1.1684 ++ tablen = newtablen; 1.1685 ++ } 1.1686 + break; 1.1687 + 1.1688 + case 'z': 1.1689 +diff -Naurp coreutils-8.25-orig/src/pr.c coreutils-8.25/src/pr.c 1.1690 +--- coreutils-8.25-orig/src/pr.c 2016-01-01 07:48:50.000000000 -0600 1.1691 ++++ coreutils-8.25/src/pr.c 2016-02-08 19:07:10.306944635 -0600 1.1692 +@@ -311,6 +311,24 @@ 1.1693 + 1.1694 + #include <getopt.h> 1.1695 + #include <sys/types.h> 1.1696 ++ 1.1697 ++/* Get MB_LEN_MAX. */ 1.1698 ++#include <limits.h> 1.1699 ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 1.1700 ++ installation; work around this configuration error. */ 1.1701 ++#if !defined MB_LEN_MAX || MB_LEN_MAX == 1 1.1702 ++# define MB_LEN_MAX 16 1.1703 ++#endif 1.1704 ++ 1.1705 ++/* Get MB_CUR_MAX. */ 1.1706 ++#include <stdlib.h> 1.1707 ++ 1.1708 ++/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 1.1709 ++/* Get mbstate_t, mbrtowc(), wcwidth(). */ 1.1710 ++#if HAVE_WCHAR_H 1.1711 ++# include <wchar.h> 1.1712 ++#endif 1.1713 ++ 1.1714 + #include "system.h" 1.1715 + #include "error.h" 1.1716 + #include "fadvise.h" 1.1717 +@@ -323,6 +341,18 @@ 1.1718 + #include "xstrtol.h" 1.1719 + #include "xdectoint.h" 1.1720 + 1.1721 ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 1.1722 ++#if HAVE_MBRTOWC && defined mbstate_t 1.1723 ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 1.1724 ++#endif 1.1725 ++ 1.1726 ++#ifndef HAVE_DECL_WCWIDTH 1.1727 ++"this configure-time declaration test was not run" 1.1728 ++#endif 1.1729 ++#if !HAVE_DECL_WCWIDTH 1.1730 ++extern int wcwidth (); 1.1731 ++#endif 1.1732 ++ 1.1733 + /* The official name of this program (e.g., no 'g' prefix). */ 1.1734 + #define PROGRAM_NAME "pr" 1.1735 + 1.1736 +@@ -415,7 +445,20 @@ struct COLUMN 1.1737 + 1.1738 + typedef struct COLUMN COLUMN; 1.1739 + 1.1740 +-static int char_to_clump (char c); 1.1741 ++/* Funtion pointers to switch functions for single byte locale or for 1.1742 ++ multibyte locale. If multibyte functions do not exist in your sysytem, 1.1743 ++ these pointers always point the function for single byte locale. */ 1.1744 ++static void (*print_char) (char c); 1.1745 ++static int (*char_to_clump) (char c); 1.1746 ++ 1.1747 ++/* Functions for single byte locale. */ 1.1748 ++static void print_char_single (char c); 1.1749 ++static int char_to_clump_single (char c); 1.1750 ++ 1.1751 ++/* Functions for multibyte locale. */ 1.1752 ++static void print_char_multi (char c); 1.1753 ++static int char_to_clump_multi (char c); 1.1754 ++ 1.1755 + static bool read_line (COLUMN *p); 1.1756 + static bool print_page (void); 1.1757 + static bool print_stored (COLUMN *p); 1.1758 +@@ -427,6 +470,7 @@ static void add_line_number (COLUMN *p); 1.1759 + static void getoptnum (const char *n_str, int min, int *num, 1.1760 + const char *errfmt); 1.1761 + static void getoptarg (char *arg, char switch_char, char *character, 1.1762 ++ int *character_length, int *character_width, 1.1763 + int *number); 1.1764 + static void print_files (int number_of_files, char **av); 1.1765 + static void init_parameters (int number_of_files); 1.1766 +@@ -440,7 +484,6 @@ static void store_char (char c); 1.1767 + static void pad_down (unsigned int lines); 1.1768 + static void read_rest_of_line (COLUMN *p); 1.1769 + static void skip_read (COLUMN *p, int column_number); 1.1770 +-static void print_char (char c); 1.1771 + static void cleanup (void); 1.1772 + static void print_sep_string (void); 1.1773 + static void separator_string (const char *optarg_S); 1.1774 +@@ -452,7 +495,7 @@ static COLUMN *column_vector; 1.1775 + we store the leftmost columns contiguously in buff. 1.1776 + To print a line from buff, get the index of the first character 1.1777 + from line_vector[i], and print up to line_vector[i + 1]. */ 1.1778 +-static char *buff; 1.1779 ++static unsigned char *buff; 1.1780 + 1.1781 + /* Index of the position in buff where the next character 1.1782 + will be stored. */ 1.1783 +@@ -556,7 +599,7 @@ static int chars_per_column; 1.1784 + static bool untabify_input = false; 1.1785 + 1.1786 + /* (-e) The input tab character. */ 1.1787 +-static char input_tab_char = '\t'; 1.1788 ++static char input_tab_char[MB_LEN_MAX] = "\t"; 1.1789 + 1.1790 + /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... 1.1791 + where the leftmost column is 1. */ 1.1792 +@@ -566,7 +609,10 @@ static int chars_per_input_tab = 8; 1.1793 + static bool tabify_output = false; 1.1794 + 1.1795 + /* (-i) The output tab character. */ 1.1796 +-static char output_tab_char = '\t'; 1.1797 ++static char output_tab_char[MB_LEN_MAX] = "\t"; 1.1798 ++ 1.1799 ++/* (-i) The byte length of output tab character. */ 1.1800 ++static int output_tab_char_length = 1; 1.1801 + 1.1802 + /* (-i) The width of the output tab. */ 1.1803 + static int chars_per_output_tab = 8; 1.1804 +@@ -636,7 +682,13 @@ static int line_number; 1.1805 + static bool numbered_lines = false; 1.1806 + 1.1807 + /* (-n) Character which follows each line number. */ 1.1808 +-static char number_separator = '\t'; 1.1809 ++static char number_separator[MB_LEN_MAX] = "\t"; 1.1810 ++ 1.1811 ++/* (-n) The byte length of the character which follows each line number. */ 1.1812 ++static int number_separator_length = 1; 1.1813 ++ 1.1814 ++/* (-n) The character width of the character which follows each line number. */ 1.1815 ++static int number_separator_width = 0; 1.1816 + 1.1817 + /* (-n) line counting starts with 1st line of input file (not with 1st 1.1818 + line of 1st page printed). */ 1.1819 +@@ -689,6 +741,7 @@ static bool use_col_separator = false; 1.1820 + -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */ 1.1821 + static char *col_sep_string = (char *) ""; 1.1822 + static int col_sep_length = 0; 1.1823 ++static int col_sep_width = 0; 1.1824 + static char *column_separator = (char *) " "; 1.1825 + static char *line_separator = (char *) "\t"; 1.1826 + 1.1827 +@@ -839,6 +892,13 @@ separator_string (const char *optarg_S) 1.1828 + col_sep_length = (int) strlen (optarg_S); 1.1829 + col_sep_string = xmalloc (col_sep_length + 1); 1.1830 + strcpy (col_sep_string, optarg_S); 1.1831 ++ 1.1832 ++#if HAVE_MBRTOWC 1.1833 ++ if (MB_CUR_MAX > 1) 1.1834 ++ col_sep_width = mbswidth (col_sep_string, 0); 1.1835 ++ else 1.1836 ++#endif 1.1837 ++ col_sep_width = col_sep_length; 1.1838 + } 1.1839 + 1.1840 + int 1.1841 +@@ -863,6 +923,21 @@ main (int argc, char **argv) 1.1842 + 1.1843 + atexit (close_stdout); 1.1844 + 1.1845 ++/* Define which functions are used, the ones for single byte locale or the ones 1.1846 ++ for multibyte locale. */ 1.1847 ++#if HAVE_MBRTOWC 1.1848 ++ if (MB_CUR_MAX > 1) 1.1849 ++ { 1.1850 ++ print_char = print_char_multi; 1.1851 ++ char_to_clump = char_to_clump_multi; 1.1852 ++ } 1.1853 ++ else 1.1854 ++#endif 1.1855 ++ { 1.1856 ++ print_char = print_char_single; 1.1857 ++ char_to_clump = char_to_clump_single; 1.1858 ++ } 1.1859 ++ 1.1860 + n_files = 0; 1.1861 + file_names = (argc > 1 1.1862 + ? xmalloc ((argc - 1) * sizeof (char *)) 1.1863 +@@ -939,8 +1014,12 @@ main (int argc, char **argv) 1.1864 + break; 1.1865 + case 'e': 1.1866 + if (optarg) 1.1867 +- getoptarg (optarg, 'e', &input_tab_char, 1.1868 +- &chars_per_input_tab); 1.1869 ++ { 1.1870 ++ int dummy_length, dummy_width; 1.1871 ++ 1.1872 ++ getoptarg (optarg, 'e', input_tab_char, &dummy_length, 1.1873 ++ &dummy_width, &chars_per_input_tab); 1.1874 ++ } 1.1875 + /* Could check tab width > 0. */ 1.1876 + untabify_input = true; 1.1877 + break; 1.1878 +@@ -953,8 +1032,12 @@ main (int argc, char **argv) 1.1879 + break; 1.1880 + case 'i': 1.1881 + if (optarg) 1.1882 +- getoptarg (optarg, 'i', &output_tab_char, 1.1883 +- &chars_per_output_tab); 1.1884 ++ { 1.1885 ++ int dummy_width; 1.1886 ++ 1.1887 ++ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, 1.1888 ++ &dummy_width, &chars_per_output_tab); 1.1889 ++ } 1.1890 + /* Could check tab width > 0. */ 1.1891 + tabify_output = true; 1.1892 + break; 1.1893 +@@ -972,8 +1055,8 @@ main (int argc, char **argv) 1.1894 + case 'n': 1.1895 + numbered_lines = true; 1.1896 + if (optarg) 1.1897 +- getoptarg (optarg, 'n', &number_separator, 1.1898 +- &chars_per_number); 1.1899 ++ getoptarg (optarg, 'n', number_separator, &number_separator_length, 1.1900 ++ &number_separator_width, &chars_per_number); 1.1901 + break; 1.1902 + case 'N': 1.1903 + skip_count = false; 1.1904 +@@ -997,7 +1080,7 @@ main (int argc, char **argv) 1.1905 + old_s = false; 1.1906 + /* Reset an additional input of -s, -S dominates -s */ 1.1907 + col_sep_string = bad_cast (""); 1.1908 +- col_sep_length = 0; 1.1909 ++ col_sep_length = col_sep_width = 0; 1.1910 + use_col_separator = true; 1.1911 + if (optarg) 1.1912 + separator_string (optarg); 1.1913 +@@ -1152,10 +1235,45 @@ getoptnum (const char *n_str, int min, i 1.1914 + a number. */ 1.1915 + 1.1916 + static void 1.1917 +-getoptarg (char *arg, char switch_char, char *character, int *number) 1.1918 ++getoptarg (char *arg, char switch_char, char *character, int *character_length, 1.1919 ++ int *character_width, int *number) 1.1920 + { 1.1921 + if (!ISDIGIT (*arg)) 1.1922 +- *character = *arg++; 1.1923 ++ { 1.1924 ++#ifdef HAVE_MBRTOWC 1.1925 ++ if (MB_CUR_MAX > 1) /* for multibyte locale. */ 1.1926 ++ { 1.1927 ++ wchar_t wc; 1.1928 ++ size_t mblength; 1.1929 ++ int width; 1.1930 ++ mbstate_t state = {'\0'}; 1.1931 ++ 1.1932 ++ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); 1.1933 ++ 1.1934 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.1935 ++ { 1.1936 ++ *character_length = 1; 1.1937 ++ *character_width = 1; 1.1938 ++ } 1.1939 ++ else 1.1940 ++ { 1.1941 ++ *character_length = (mblength < 1) ? 1 : mblength; 1.1942 ++ width = wcwidth (wc); 1.1943 ++ *character_width = (width < 0) ? 0 : width; 1.1944 ++ } 1.1945 ++ 1.1946 ++ strncpy (character, arg, *character_length); 1.1947 ++ arg += *character_length; 1.1948 ++ } 1.1949 ++ else /* for single byte locale. */ 1.1950 ++#endif 1.1951 ++ { 1.1952 ++ *character = *arg++; 1.1953 ++ *character_length = 1; 1.1954 ++ *character_width = 1; 1.1955 ++ } 1.1956 ++ } 1.1957 ++ 1.1958 + if (*arg) 1.1959 + { 1.1960 + long int tmp_long; 1.1961 +@@ -1177,6 +1295,11 @@ static void 1.1962 + init_parameters (int number_of_files) 1.1963 + { 1.1964 + int chars_used_by_number = 0; 1.1965 ++ int mb_len = 1; 1.1966 ++#if HAVE_MBRTOWC 1.1967 ++ if (MB_CUR_MAX > 1) 1.1968 ++ mb_len = MB_LEN_MAX; 1.1969 ++#endif 1.1970 + 1.1971 + lines_per_body = lines_per_page - lines_per_header - lines_per_footer; 1.1972 + if (lines_per_body <= 0) 1.1973 +@@ -1214,7 +1337,7 @@ init_parameters (int number_of_files) 1.1974 + else 1.1975 + col_sep_string = column_separator; 1.1976 + 1.1977 +- col_sep_length = 1; 1.1978 ++ col_sep_length = col_sep_width = 1; 1.1979 + use_col_separator = true; 1.1980 + } 1.1981 + /* It's rather pointless to define a TAB separator with column 1.1982 +@@ -1244,11 +1367,11 @@ init_parameters (int number_of_files) 1.1983 + + TAB_WIDTH (chars_per_input_tab, chars_per_number); */ 1.1984 + 1.1985 + /* Estimate chars_per_text without any margin and keep it constant. */ 1.1986 +- if (number_separator == '\t') 1.1987 ++ if (number_separator[0] == '\t') 1.1988 + number_width = (chars_per_number 1.1989 + + TAB_WIDTH (chars_per_default_tab, chars_per_number)); 1.1990 + else 1.1991 +- number_width = chars_per_number + 1; 1.1992 ++ number_width = chars_per_number + number_separator_width; 1.1993 + 1.1994 + /* The number is part of the column width unless we are 1.1995 + printing files in parallel. */ 1.1996 +@@ -1257,7 +1380,7 @@ init_parameters (int number_of_files) 1.1997 + } 1.1998 + 1.1999 + chars_per_column = (chars_per_line - chars_used_by_number 1.2000 +- - (columns - 1) * col_sep_length) / columns; 1.2001 ++ - (columns - 1) * col_sep_width) / columns; 1.2002 + 1.2003 + if (chars_per_column < 1) 1.2004 + error (EXIT_FAILURE, 0, _("page width too narrow")); 1.2005 +@@ -1275,7 +1398,7 @@ init_parameters (int number_of_files) 1.2006 + We've to use 8 as the lower limit, if we use chars_per_default_tab = 8 1.2007 + to expand a tab which is not an input_tab-char. */ 1.2008 + free (clump_buff); 1.2009 +- clump_buff = xmalloc (MAX (8, chars_per_input_tab)); 1.2010 ++ clump_buff = xmalloc (mb_len * MAX (8, chars_per_input_tab)); 1.2011 + } 1.2012 + 1.2013 + /* Open the necessary files, 1.2014 +@@ -1383,7 +1506,7 @@ init_funcs (void) 1.2015 + 1.2016 + /* Enlarge p->start_position of first column to use the same form of 1.2017 + padding_not_printed with all columns. */ 1.2018 +- h = h + col_sep_length; 1.2019 ++ h = h + col_sep_width; 1.2020 + 1.2021 + /* This loop takes care of all but the rightmost column. */ 1.2022 + 1.2023 +@@ -1417,7 +1540,7 @@ init_funcs (void) 1.2024 + } 1.2025 + else 1.2026 + { 1.2027 +- h = h_next + col_sep_length; 1.2028 ++ h = h_next + col_sep_width; 1.2029 + h_next = h + chars_per_column; 1.2030 + } 1.2031 + } 1.2032 +@@ -1708,9 +1831,9 @@ static void 1.2033 + align_column (COLUMN *p) 1.2034 + { 1.2035 + padding_not_printed = p->start_position; 1.2036 +- if (padding_not_printed - col_sep_length > 0) 1.2037 ++ if (padding_not_printed - col_sep_width > 0) 1.2038 + { 1.2039 +- pad_across_to (padding_not_printed - col_sep_length); 1.2040 ++ pad_across_to (padding_not_printed - col_sep_width); 1.2041 + padding_not_printed = ANYWHERE; 1.2042 + } 1.2043 + 1.2044 +@@ -1981,13 +2104,13 @@ store_char (char c) 1.2045 + /* May be too generous. */ 1.2046 + buff = X2REALLOC (buff, &buff_allocated); 1.2047 + } 1.2048 +- buff[buff_current++] = c; 1.2049 ++ buff[buff_current++] = (unsigned char) c; 1.2050 + } 1.2051 + 1.2052 + static void 1.2053 + add_line_number (COLUMN *p) 1.2054 + { 1.2055 +- int i; 1.2056 ++ int i, j; 1.2057 + char *s; 1.2058 + int num_width; 1.2059 + 1.2060 +@@ -2004,22 +2127,24 @@ add_line_number (COLUMN *p) 1.2061 + /* Tabification is assumed for multiple columns, also for n-separators, 1.2062 + but 'default n-separator = TAB' hasn't been given priority over 1.2063 + equal column_width also specified by POSIX. */ 1.2064 +- if (number_separator == '\t') 1.2065 ++ if (number_separator[0] == '\t') 1.2066 + { 1.2067 + i = number_width - chars_per_number; 1.2068 + while (i-- > 0) 1.2069 + (p->char_func) (' '); 1.2070 + } 1.2071 + else 1.2072 +- (p->char_func) (number_separator); 1.2073 ++ for (j = 0; j < number_separator_length; j++) 1.2074 ++ (p->char_func) (number_separator[j]); 1.2075 + } 1.2076 + else 1.2077 + /* To comply with POSIX, we avoid any expansion of default TAB 1.2078 + separator with a single column output. No column_width requirement 1.2079 + has to be considered. */ 1.2080 + { 1.2081 +- (p->char_func) (number_separator); 1.2082 +- if (number_separator == '\t') 1.2083 ++ for (j = 0; j < number_separator_length; j++) 1.2084 ++ (p->char_func) (number_separator[j]); 1.2085 ++ if (number_separator[0] == '\t') 1.2086 + output_position = POS_AFTER_TAB (chars_per_output_tab, 1.2087 + output_position); 1.2088 + } 1.2089 +@@ -2180,7 +2305,7 @@ print_white_space (void) 1.2090 + while (goal - h_old > 1 1.2091 + && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) 1.2092 + { 1.2093 +- putchar (output_tab_char); 1.2094 ++ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); 1.2095 + h_old = h_new; 1.2096 + } 1.2097 + while (++h_old <= goal) 1.2098 +@@ -2200,6 +2325,7 @@ print_sep_string (void) 1.2099 + { 1.2100 + char *s; 1.2101 + int l = col_sep_length; 1.2102 ++ int not_space_flag; 1.2103 + 1.2104 + s = col_sep_string; 1.2105 + 1.2106 +@@ -2213,6 +2339,7 @@ print_sep_string (void) 1.2107 + { 1.2108 + for (; separators_not_printed > 0; --separators_not_printed) 1.2109 + { 1.2110 ++ not_space_flag = 0; 1.2111 + while (l-- > 0) 1.2112 + { 1.2113 + /* 3 types of sep_strings: spaces only, spaces and chars, 1.2114 +@@ -2226,12 +2353,15 @@ print_sep_string (void) 1.2115 + } 1.2116 + else 1.2117 + { 1.2118 ++ not_space_flag = 1; 1.2119 + if (spaces_not_printed > 0) 1.2120 + print_white_space (); 1.2121 + putchar (*s++); 1.2122 +- ++output_position; 1.2123 + } 1.2124 + } 1.2125 ++ if (not_space_flag) 1.2126 ++ output_position += col_sep_width; 1.2127 ++ 1.2128 + /* sep_string ends with some spaces */ 1.2129 + if (spaces_not_printed > 0) 1.2130 + print_white_space (); 1.2131 +@@ -2259,7 +2389,7 @@ print_clump (COLUMN *p, int n, char *clu 1.2132 + required number of tabs and spaces. */ 1.2133 + 1.2134 + static void 1.2135 +-print_char (char c) 1.2136 ++print_char_single (char c) 1.2137 + { 1.2138 + if (tabify_output) 1.2139 + { 1.2140 +@@ -2283,6 +2413,74 @@ print_char (char c) 1.2141 + putchar (c); 1.2142 + } 1.2143 + 1.2144 ++#ifdef HAVE_MBRTOWC 1.2145 ++static void 1.2146 ++print_char_multi (char c) 1.2147 ++{ 1.2148 ++ static size_t mbc_pos = 0; 1.2149 ++ static char mbc[MB_LEN_MAX] = {'\0'}; 1.2150 ++ static mbstate_t state = {'\0'}; 1.2151 ++ mbstate_t state_bak; 1.2152 ++ wchar_t wc; 1.2153 ++ size_t mblength; 1.2154 ++ int width; 1.2155 ++ 1.2156 ++ if (tabify_output) 1.2157 ++ { 1.2158 ++ state_bak = state; 1.2159 ++ mbc[mbc_pos++] = c; 1.2160 ++ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 1.2161 ++ 1.2162 ++ while (mbc_pos > 0) 1.2163 ++ { 1.2164 ++ switch (mblength) 1.2165 ++ { 1.2166 ++ case (size_t)-2: 1.2167 ++ state = state_bak; 1.2168 ++ return; 1.2169 ++ 1.2170 ++ case (size_t)-1: 1.2171 ++ state = state_bak; 1.2172 ++ ++output_position; 1.2173 ++ putchar (mbc[0]); 1.2174 ++ memmove (mbc, mbc + 1, MB_CUR_MAX - 1); 1.2175 ++ --mbc_pos; 1.2176 ++ break; 1.2177 ++ 1.2178 ++ case 0: 1.2179 ++ mblength = 1; 1.2180 ++ 1.2181 ++ default: 1.2182 ++ if (wc == L' ') 1.2183 ++ { 1.2184 ++ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 1.2185 ++ --mbc_pos; 1.2186 ++ ++spaces_not_printed; 1.2187 ++ return; 1.2188 ++ } 1.2189 ++ else if (spaces_not_printed > 0) 1.2190 ++ print_white_space (); 1.2191 ++ 1.2192 ++ /* Nonprintables are assumed to have width 0, except L'\b'. */ 1.2193 ++ if ((width = wcwidth (wc)) < 1) 1.2194 ++ { 1.2195 ++ if (wc == L'\b') 1.2196 ++ --output_position; 1.2197 ++ } 1.2198 ++ else 1.2199 ++ output_position += width; 1.2200 ++ 1.2201 ++ fwrite (mbc, sizeof(char), mblength, stdout); 1.2202 ++ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 1.2203 ++ mbc_pos -= mblength; 1.2204 ++ } 1.2205 ++ } 1.2206 ++ return; 1.2207 ++ } 1.2208 ++ putchar (c); 1.2209 ++} 1.2210 ++#endif 1.2211 ++ 1.2212 + /* Skip to page PAGE before printing. 1.2213 + PAGE may be larger than total number of pages. */ 1.2214 + 1.2215 +@@ -2462,9 +2660,9 @@ read_line (COLUMN *p) 1.2216 + align_empty_cols = false; 1.2217 + } 1.2218 + 1.2219 +- if (padding_not_printed - col_sep_length > 0) 1.2220 ++ if (padding_not_printed - col_sep_width > 0) 1.2221 + { 1.2222 +- pad_across_to (padding_not_printed - col_sep_length); 1.2223 ++ pad_across_to (padding_not_printed - col_sep_width); 1.2224 + padding_not_printed = ANYWHERE; 1.2225 + } 1.2226 + 1.2227 +@@ -2534,7 +2732,7 @@ print_stored (COLUMN *p) 1.2228 + int i; 1.2229 + 1.2230 + int line = p->current_line++; 1.2231 +- char *first = &buff[line_vector[line]]; 1.2232 ++ unsigned char *first = &buff[line_vector[line]]; 1.2233 + /* FIXME 1.2234 + UMR: Uninitialized memory read: 1.2235 + * This is occurring while in: 1.2236 +@@ -2546,7 +2744,7 @@ print_stored (COLUMN *p) 1.2237 + xmalloc [xmalloc.c:94] 1.2238 + init_store_cols [pr.c:1648] 1.2239 + */ 1.2240 +- char *last = &buff[line_vector[line + 1]]; 1.2241 ++ unsigned char *last = &buff[line_vector[line + 1]]; 1.2242 + 1.2243 + pad_vertically = true; 1.2244 + 1.2245 +@@ -2565,9 +2763,9 @@ print_stored (COLUMN *p) 1.2246 + } 1.2247 + } 1.2248 + 1.2249 +- if (padding_not_printed - col_sep_length > 0) 1.2250 ++ if (padding_not_printed - col_sep_width > 0) 1.2251 + { 1.2252 +- pad_across_to (padding_not_printed - col_sep_length); 1.2253 ++ pad_across_to (padding_not_printed - col_sep_width); 1.2254 + padding_not_printed = ANYWHERE; 1.2255 + } 1.2256 + 1.2257 +@@ -2580,8 +2778,8 @@ print_stored (COLUMN *p) 1.2258 + if (spaces_not_printed == 0) 1.2259 + { 1.2260 + output_position = p->start_position + end_vector[line]; 1.2261 +- if (p->start_position - col_sep_length == chars_per_margin) 1.2262 +- output_position -= col_sep_length; 1.2263 ++ if (p->start_position - col_sep_width == chars_per_margin) 1.2264 ++ output_position -= col_sep_width; 1.2265 + } 1.2266 + 1.2267 + return true; 1.2268 +@@ -2600,7 +2798,7 @@ print_stored (COLUMN *p) 1.2269 + number of characters is 1.) */ 1.2270 + 1.2271 + static int 1.2272 +-char_to_clump (char c) 1.2273 ++char_to_clump_single (char c) 1.2274 + { 1.2275 + unsigned char uc = c; 1.2276 + char *s = clump_buff; 1.2277 +@@ -2610,10 +2808,10 @@ char_to_clump (char c) 1.2278 + int chars; 1.2279 + int chars_per_c = 8; 1.2280 + 1.2281 +- if (c == input_tab_char) 1.2282 ++ if (c == input_tab_char[0]) 1.2283 + chars_per_c = chars_per_input_tab; 1.2284 + 1.2285 +- if (c == input_tab_char || c == '\t') 1.2286 ++ if (c == input_tab_char[0] || c == '\t') 1.2287 + { 1.2288 + width = TAB_WIDTH (chars_per_c, input_position); 1.2289 + 1.2290 +@@ -2694,6 +2892,164 @@ char_to_clump (char c) 1.2291 + return chars; 1.2292 + } 1.2293 + 1.2294 ++#ifdef HAVE_MBRTOWC 1.2295 ++static int 1.2296 ++char_to_clump_multi (char c) 1.2297 ++{ 1.2298 ++ static size_t mbc_pos = 0; 1.2299 ++ static char mbc[MB_LEN_MAX] = {'\0'}; 1.2300 ++ static mbstate_t state = {'\0'}; 1.2301 ++ mbstate_t state_bak; 1.2302 ++ wchar_t wc; 1.2303 ++ size_t mblength; 1.2304 ++ int wc_width; 1.2305 ++ register char *s = clump_buff; 1.2306 ++ register int i, j; 1.2307 ++ char esc_buff[4]; 1.2308 ++ int width; 1.2309 ++ int chars; 1.2310 ++ int chars_per_c = 8; 1.2311 ++ 1.2312 ++ state_bak = state; 1.2313 ++ mbc[mbc_pos++] = c; 1.2314 ++ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); 1.2315 ++ 1.2316 ++ width = 0; 1.2317 ++ chars = 0; 1.2318 ++ while (mbc_pos > 0) 1.2319 ++ { 1.2320 ++ switch (mblength) 1.2321 ++ { 1.2322 ++ case (size_t)-2: 1.2323 ++ state = state_bak; 1.2324 ++ return 0; 1.2325 ++ 1.2326 ++ case (size_t)-1: 1.2327 ++ state = state_bak; 1.2328 ++ mblength = 1; 1.2329 ++ 1.2330 ++ if (use_esc_sequence || use_cntrl_prefix) 1.2331 ++ { 1.2332 ++ width = +4; 1.2333 ++ chars = +4; 1.2334 ++ *s++ = '\\'; 1.2335 ++ sprintf (esc_buff, "%03o", (unsigned char) mbc[0]); 1.2336 ++ for (i = 0; i <= 2; ++i) 1.2337 ++ *s++ = (int) esc_buff[i]; 1.2338 ++ } 1.2339 ++ else 1.2340 ++ { 1.2341 ++ width += 1; 1.2342 ++ chars += 1; 1.2343 ++ *s++ = mbc[0]; 1.2344 ++ } 1.2345 ++ break; 1.2346 ++ 1.2347 ++ case 0: 1.2348 ++ mblength = 1; 1.2349 ++ /* Fall through */ 1.2350 ++ 1.2351 ++ default: 1.2352 ++ if (memcmp (mbc, input_tab_char, mblength) == 0) 1.2353 ++ chars_per_c = chars_per_input_tab; 1.2354 ++ 1.2355 ++ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') 1.2356 ++ { 1.2357 ++ int width_inc; 1.2358 ++ 1.2359 ++ width_inc = TAB_WIDTH (chars_per_c, input_position); 1.2360 ++ width += width_inc; 1.2361 ++ 1.2362 ++ if (untabify_input) 1.2363 ++ { 1.2364 ++ for (i = width_inc; i; --i) 1.2365 ++ *s++ = ' '; 1.2366 ++ chars += width_inc; 1.2367 ++ } 1.2368 ++ else 1.2369 ++ { 1.2370 ++ for (i = 0; i < mblength; i++) 1.2371 ++ *s++ = mbc[i]; 1.2372 ++ chars += mblength; 1.2373 ++ } 1.2374 ++ } 1.2375 ++ else if ((wc_width = wcwidth (wc)) < 1) 1.2376 ++ { 1.2377 ++ if (use_esc_sequence) 1.2378 ++ { 1.2379 ++ for (i = 0; i < mblength; i++) 1.2380 ++ { 1.2381 ++ width += 4; 1.2382 ++ chars += 4; 1.2383 ++ *s++ = '\\'; 1.2384 ++ sprintf (esc_buff, "%03o", (unsigned char) mbc[i]); 1.2385 ++ for (j = 0; j <= 2; ++j) 1.2386 ++ *s++ = (int) esc_buff[j]; 1.2387 ++ } 1.2388 ++ } 1.2389 ++ else if (use_cntrl_prefix) 1.2390 ++ { 1.2391 ++ if (wc < 0200) 1.2392 ++ { 1.2393 ++ width += 2; 1.2394 ++ chars += 2; 1.2395 ++ *s++ = '^'; 1.2396 ++ *s++ = wc ^ 0100; 1.2397 ++ } 1.2398 ++ else 1.2399 ++ { 1.2400 ++ for (i = 0; i < mblength; i++) 1.2401 ++ { 1.2402 ++ width += 4; 1.2403 ++ chars += 4; 1.2404 ++ *s++ = '\\'; 1.2405 ++ sprintf (esc_buff, "%03o", (unsigned char) mbc[i]); 1.2406 ++ for (j = 0; j <= 2; ++j) 1.2407 ++ *s++ = (int) esc_buff[j]; 1.2408 ++ } 1.2409 ++ } 1.2410 ++ } 1.2411 ++ else if (wc == L'\b') 1.2412 ++ { 1.2413 ++ width += -1; 1.2414 ++ chars += 1; 1.2415 ++ *s++ = c; 1.2416 ++ } 1.2417 ++ else 1.2418 ++ { 1.2419 ++ width += 0; 1.2420 ++ chars += mblength; 1.2421 ++ for (i = 0; i < mblength; i++) 1.2422 ++ *s++ = mbc[i]; 1.2423 ++ } 1.2424 ++ } 1.2425 ++ else 1.2426 ++ { 1.2427 ++ width += wc_width; 1.2428 ++ chars += mblength; 1.2429 ++ for (i = 0; i < mblength; i++) 1.2430 ++ *s++ = mbc[i]; 1.2431 ++ } 1.2432 ++ } 1.2433 ++ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); 1.2434 ++ mbc_pos -= mblength; 1.2435 ++ } 1.2436 ++ 1.2437 ++ /* Too many backspaces must put us in position 0 -- never negative. */ 1.2438 ++ if (width < 0 && input_position == 0) 1.2439 ++ { 1.2440 ++ chars = 0; 1.2441 ++ input_position = 0; 1.2442 ++ } 1.2443 ++ else if (width < 0 && input_position <= -width) 1.2444 ++ input_position = 0; 1.2445 ++ else 1.2446 ++ input_position += width; 1.2447 ++ 1.2448 ++ return chars; 1.2449 ++} 1.2450 ++#endif 1.2451 ++ 1.2452 + /* We've just printed some files and need to clean up things before 1.2453 + looking for more options and printing the next batch of files. 1.2454 + 1.2455 +diff -Naurp coreutils-8.25-orig/src/sort.c coreutils-8.25/src/sort.c 1.2456 +--- coreutils-8.25-orig/src/sort.c 2016-01-16 13:09:33.000000000 -0600 1.2457 ++++ coreutils-8.25/src/sort.c 2016-02-08 19:07:10.310944648 -0600 1.2458 +@@ -29,6 +29,14 @@ 1.2459 + #include <sys/wait.h> 1.2460 + #include <signal.h> 1.2461 + #include <assert.h> 1.2462 ++#if HAVE_WCHAR_H 1.2463 ++# include <wchar.h> 1.2464 ++#endif 1.2465 ++/* Get isw* functions. */ 1.2466 ++#if HAVE_WCTYPE_H 1.2467 ++# include <wctype.h> 1.2468 ++#endif 1.2469 ++ 1.2470 + #include "system.h" 1.2471 + #include "argmatch.h" 1.2472 + #include "error.h" 1.2473 +@@ -163,14 +171,39 @@ static int decimal_point; 1.2474 + /* Thousands separator; if -1, then there isn't one. */ 1.2475 + static int thousands_sep; 1.2476 + 1.2477 ++/* True if -f is specified. */ 1.2478 ++static bool folding; 1.2479 ++ 1.2480 + /* Nonzero if the corresponding locales are hard. */ 1.2481 + static bool hard_LC_COLLATE; 1.2482 +-#if HAVE_NL_LANGINFO 1.2483 ++#if HAVE_LANGINFO_CODESET 1.2484 + static bool hard_LC_TIME; 1.2485 + #endif 1.2486 + 1.2487 + #define NONZERO(x) ((x) != 0) 1.2488 + 1.2489 ++/* get a multibyte character's byte length. */ 1.2490 ++#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \ 1.2491 ++ do \ 1.2492 ++ { \ 1.2493 ++ wchar_t wc; \ 1.2494 ++ mbstate_t state_bak; \ 1.2495 ++ \ 1.2496 ++ state_bak = STATE; \ 1.2497 ++ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \ 1.2498 ++ \ 1.2499 ++ switch (MBLENGTH) \ 1.2500 ++ { \ 1.2501 ++ case (size_t)-1: \ 1.2502 ++ case (size_t)-2: \ 1.2503 ++ STATE = state_bak; \ 1.2504 ++ /* Fall through. */ \ 1.2505 ++ case 0: \ 1.2506 ++ MBLENGTH = 1; \ 1.2507 ++ } \ 1.2508 ++ } \ 1.2509 ++ while (0) 1.2510 ++ 1.2511 + /* The kind of blanks for '-b' to skip in various options. */ 1.2512 + enum blanktype { bl_start, bl_end, bl_both }; 1.2513 + 1.2514 +@@ -344,13 +377,11 @@ static bool reverse; 1.2515 + they were read if all keys compare equal. */ 1.2516 + static bool stable; 1.2517 + 1.2518 +-/* If TAB has this value, blanks separate fields. */ 1.2519 +-enum { TAB_DEFAULT = CHAR_MAX + 1 }; 1.2520 +- 1.2521 +-/* Tab character separating fields. If TAB_DEFAULT, then fields are 1.2522 ++/* Tab character separating fields. If tab_length is 0, then fields are 1.2523 + separated by the empty string between a non-blank character and a blank 1.2524 + character. */ 1.2525 +-static int tab = TAB_DEFAULT; 1.2526 ++static char tab[MB_LEN_MAX + 1]; 1.2527 ++static size_t tab_length = 0; 1.2528 + 1.2529 + /* Flag to remove consecutive duplicate lines from the output. 1.2530 + Only the last of a sequence of equal lines will be output. */ 1.2531 +@@ -810,6 +841,46 @@ reap_all (void) 1.2532 + reap (-1); 1.2533 + } 1.2534 + 1.2535 ++/* Function pointers. */ 1.2536 ++static void 1.2537 ++(*inittables) (void); 1.2538 ++static char * 1.2539 ++(*begfield) (const struct line*, const struct keyfield *); 1.2540 ++static char * 1.2541 ++(*limfield) (const struct line*, const struct keyfield *); 1.2542 ++static void 1.2543 ++(*skipblanks) (char **ptr, char *lim); 1.2544 ++static int 1.2545 ++(*getmonth) (char const *, size_t, char **); 1.2546 ++static int 1.2547 ++(*keycompare) (const struct line *, const struct line *); 1.2548 ++static int 1.2549 ++(*numcompare) (const char *, const char *); 1.2550 ++ 1.2551 ++/* Test for white space multibyte character. 1.2552 ++ Set LENGTH the byte length of investigated multibyte character. */ 1.2553 ++#if HAVE_MBRTOWC 1.2554 ++static int 1.2555 ++ismbblank (const char *str, size_t len, size_t *length) 1.2556 ++{ 1.2557 ++ size_t mblength; 1.2558 ++ wchar_t wc; 1.2559 ++ mbstate_t state; 1.2560 ++ 1.2561 ++ memset (&state, '\0', sizeof(mbstate_t)); 1.2562 ++ mblength = mbrtowc (&wc, str, len, &state); 1.2563 ++ 1.2564 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.2565 ++ { 1.2566 ++ *length = 1; 1.2567 ++ return 0; 1.2568 ++ } 1.2569 ++ 1.2570 ++ *length = (mblength < 1) ? 1 : mblength; 1.2571 ++ return iswblank (wc) || wc == '\n'; 1.2572 ++} 1.2573 ++#endif 1.2574 ++ 1.2575 + /* Clean up any remaining temporary files. */ 1.2576 + 1.2577 + static void 1.2578 +@@ -1254,7 +1325,7 @@ zaptemp (char const *name) 1.2579 + free (node); 1.2580 + } 1.2581 + 1.2582 +-#if HAVE_NL_LANGINFO 1.2583 ++#if HAVE_LANGINFO_CODESET 1.2584 + 1.2585 + static int 1.2586 + struct_month_cmp (void const *m1, void const *m2) 1.2587 +@@ -1269,7 +1340,7 @@ struct_month_cmp (void const *m1, void c 1.2588 + /* Initialize the character class tables. */ 1.2589 + 1.2590 + static void 1.2591 +-inittables (void) 1.2592 ++inittables_uni (void) 1.2593 + { 1.2594 + size_t i; 1.2595 + 1.2596 +@@ -1281,7 +1352,7 @@ inittables (void) 1.2597 + fold_toupper[i] = toupper (i); 1.2598 + } 1.2599 + 1.2600 +-#if HAVE_NL_LANGINFO 1.2601 ++#if HAVE_LANGINFO_CODESET 1.2602 + /* If we're not in the "C" locale, read different names for months. */ 1.2603 + if (hard_LC_TIME) 1.2604 + { 1.2605 +@@ -1363,6 +1434,84 @@ specify_nmerge (int oi, char c, char con 1.2606 + xstrtol_fatal (e, oi, c, long_options, s); 1.2607 + } 1.2608 + 1.2609 ++#if HAVE_MBRTOWC 1.2610 ++static void 1.2611 ++inittables_mb (void) 1.2612 ++{ 1.2613 ++ int i, j, k, l; 1.2614 ++ char *name, *s, *lc_time, *lc_ctype; 1.2615 ++ size_t s_len, mblength; 1.2616 ++ char mbc[MB_LEN_MAX]; 1.2617 ++ wchar_t wc, pwc; 1.2618 ++ mbstate_t state_mb, state_wc; 1.2619 ++ 1.2620 ++ lc_time = setlocale (LC_TIME, ""); 1.2621 ++ if (lc_time) 1.2622 ++ lc_time = xstrdup (lc_time); 1.2623 ++ 1.2624 ++ lc_ctype = setlocale (LC_CTYPE, ""); 1.2625 ++ if (lc_ctype) 1.2626 ++ lc_ctype = xstrdup (lc_ctype); 1.2627 ++ 1.2628 ++ if (lc_time && lc_ctype) 1.2629 ++ /* temporarily set LC_CTYPE to match LC_TIME, so that we can convert 1.2630 ++ * the names of months to upper case */ 1.2631 ++ setlocale (LC_CTYPE, lc_time); 1.2632 ++ 1.2633 ++ for (i = 0; i < MONTHS_PER_YEAR; i++) 1.2634 ++ { 1.2635 ++ s = (char *) nl_langinfo (ABMON_1 + i); 1.2636 ++ s_len = strlen (s); 1.2637 ++ monthtab[i].name = name = (char *) xmalloc (s_len + 1); 1.2638 ++ monthtab[i].val = i + 1; 1.2639 ++ 1.2640 ++ memset (&state_mb, '\0', sizeof (mbstate_t)); 1.2641 ++ memset (&state_wc, '\0', sizeof (mbstate_t)); 1.2642 ++ 1.2643 ++ for (j = 0; j < s_len;) 1.2644 ++ { 1.2645 ++ if (!ismbblank (s + j, s_len - j, &mblength)) 1.2646 ++ break; 1.2647 ++ j += mblength; 1.2648 ++ } 1.2649 ++ 1.2650 ++ for (k = 0; j < s_len;) 1.2651 ++ { 1.2652 ++ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb); 1.2653 ++ assert (mblength != (size_t)-1 && mblength != (size_t)-2); 1.2654 ++ if (mblength == 0) 1.2655 ++ break; 1.2656 ++ 1.2657 ++ pwc = towupper (wc); 1.2658 ++ if (pwc == wc) 1.2659 ++ { 1.2660 ++ memcpy (mbc, s + j, mblength); 1.2661 ++ j += mblength; 1.2662 ++ } 1.2663 ++ else 1.2664 ++ { 1.2665 ++ j += mblength; 1.2666 ++ mblength = wcrtomb (mbc, pwc, &state_wc); 1.2667 ++ assert (mblength != (size_t)0 && mblength != (size_t)-1); 1.2668 ++ } 1.2669 ++ 1.2670 ++ for (l = 0; l < mblength; l++) 1.2671 ++ name[k++] = mbc[l]; 1.2672 ++ } 1.2673 ++ name[k] = '\0'; 1.2674 ++ } 1.2675 ++ qsort ((void *) monthtab, MONTHS_PER_YEAR, 1.2676 ++ sizeof (struct month), struct_month_cmp); 1.2677 ++ 1.2678 ++ if (lc_time && lc_ctype) 1.2679 ++ /* restore the original locales */ 1.2680 ++ setlocale (LC_CTYPE, lc_ctype); 1.2681 ++ 1.2682 ++ free (lc_ctype); 1.2683 ++ free (lc_time); 1.2684 ++} 1.2685 ++#endif 1.2686 ++ 1.2687 + /* Specify the amount of main memory to use when sorting. */ 1.2688 + static void 1.2689 + specify_sort_size (int oi, char c, char const *s) 1.2690 +@@ -1596,7 +1745,7 @@ buffer_linelim (struct buffer const *buf 1.2691 + by KEY in LINE. */ 1.2692 + 1.2693 + static char * 1.2694 +-begfield (struct line const *line, struct keyfield const *key) 1.2695 ++begfield_uni (const struct line *line, const struct keyfield *key) 1.2696 + { 1.2697 + char *ptr = line->text, *lim = ptr + line->length - 1; 1.2698 + size_t sword = key->sword; 1.2699 +@@ -1605,10 +1754,10 @@ begfield (struct line const *line, struc 1.2700 + /* The leading field separator itself is included in a field when -t 1.2701 + is absent. */ 1.2702 + 1.2703 +- if (tab != TAB_DEFAULT) 1.2704 ++ if (tab_length) 1.2705 + while (ptr < lim && sword--) 1.2706 + { 1.2707 +- while (ptr < lim && *ptr != tab) 1.2708 ++ while (ptr < lim && *ptr != tab[0]) 1.2709 + ++ptr; 1.2710 + if (ptr < lim) 1.2711 + ++ptr; 1.2712 +@@ -1634,11 +1783,70 @@ begfield (struct line const *line, struc 1.2713 + return ptr; 1.2714 + } 1.2715 + 1.2716 ++#if HAVE_MBRTOWC 1.2717 ++static char * 1.2718 ++begfield_mb (const struct line *line, const struct keyfield *key) 1.2719 ++{ 1.2720 ++ int i; 1.2721 ++ char *ptr = line->text, *lim = ptr + line->length - 1; 1.2722 ++ size_t sword = key->sword; 1.2723 ++ size_t schar = key->schar; 1.2724 ++ size_t mblength; 1.2725 ++ mbstate_t state; 1.2726 ++ 1.2727 ++ memset (&state, '\0', sizeof(mbstate_t)); 1.2728 ++ 1.2729 ++ if (tab_length) 1.2730 ++ while (ptr < lim && sword--) 1.2731 ++ { 1.2732 ++ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1.2733 ++ { 1.2734 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2735 ++ ptr += mblength; 1.2736 ++ } 1.2737 ++ if (ptr < lim) 1.2738 ++ { 1.2739 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2740 ++ ptr += mblength; 1.2741 ++ } 1.2742 ++ } 1.2743 ++ else 1.2744 ++ while (ptr < lim && sword--) 1.2745 ++ { 1.2746 ++ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1.2747 ++ ptr += mblength; 1.2748 ++ if (ptr < lim) 1.2749 ++ { 1.2750 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2751 ++ ptr += mblength; 1.2752 ++ } 1.2753 ++ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1.2754 ++ ptr += mblength; 1.2755 ++ } 1.2756 ++ 1.2757 ++ if (key->skipsblanks) 1.2758 ++ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1.2759 ++ ptr += mblength; 1.2760 ++ 1.2761 ++ for (i = 0; i < schar; i++) 1.2762 ++ { 1.2763 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2764 ++ 1.2765 ++ if (ptr + mblength > lim) 1.2766 ++ break; 1.2767 ++ else 1.2768 ++ ptr += mblength; 1.2769 ++ } 1.2770 ++ 1.2771 ++ return ptr; 1.2772 ++} 1.2773 ++#endif 1.2774 ++ 1.2775 + /* Return the limit of (a pointer to the first character after) the field 1.2776 + in LINE specified by KEY. */ 1.2777 + 1.2778 + static char * 1.2779 +-limfield (struct line const *line, struct keyfield const *key) 1.2780 ++limfield_uni (const struct line *line, const struct keyfield *key) 1.2781 + { 1.2782 + char *ptr = line->text, *lim = ptr + line->length - 1; 1.2783 + size_t eword = key->eword, echar = key->echar; 1.2784 +@@ -1653,10 +1861,10 @@ limfield (struct line const *line, struc 1.2785 + 'beginning' is the first character following the delimiting TAB. 1.2786 + Otherwise, leave PTR pointing at the first 'blank' character after 1.2787 + the preceding field. */ 1.2788 +- if (tab != TAB_DEFAULT) 1.2789 ++ if (tab_length) 1.2790 + while (ptr < lim && eword--) 1.2791 + { 1.2792 +- while (ptr < lim && *ptr != tab) 1.2793 ++ while (ptr < lim && *ptr != tab[0]) 1.2794 + ++ptr; 1.2795 + if (ptr < lim && (eword || echar)) 1.2796 + ++ptr; 1.2797 +@@ -1702,10 +1910,10 @@ limfield (struct line const *line, struc 1.2798 + */ 1.2799 + 1.2800 + /* Make LIM point to the end of (one byte past) the current field. */ 1.2801 +- if (tab != TAB_DEFAULT) 1.2802 ++ if (tab_length) 1.2803 + { 1.2804 + char *newlim; 1.2805 +- newlim = memchr (ptr, tab, lim - ptr); 1.2806 ++ newlim = memchr (ptr, tab[0], lim - ptr); 1.2807 + if (newlim) 1.2808 + lim = newlim; 1.2809 + } 1.2810 +@@ -1736,6 +1944,130 @@ limfield (struct line const *line, struc 1.2811 + return ptr; 1.2812 + } 1.2813 + 1.2814 ++#if HAVE_MBRTOWC 1.2815 ++static char * 1.2816 ++limfield_mb (const struct line *line, const struct keyfield *key) 1.2817 ++{ 1.2818 ++ char *ptr = line->text, *lim = ptr + line->length - 1; 1.2819 ++ size_t eword = key->eword, echar = key->echar; 1.2820 ++ int i; 1.2821 ++ size_t mblength; 1.2822 ++ mbstate_t state; 1.2823 ++ 1.2824 ++ if (echar == 0) 1.2825 ++ eword++; /* skip all of end field. */ 1.2826 ++ 1.2827 ++ memset (&state, '\0', sizeof(mbstate_t)); 1.2828 ++ 1.2829 ++ if (tab_length) 1.2830 ++ while (ptr < lim && eword--) 1.2831 ++ { 1.2832 ++ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0) 1.2833 ++ { 1.2834 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2835 ++ ptr += mblength; 1.2836 ++ } 1.2837 ++ if (ptr < lim && (eword | echar)) 1.2838 ++ { 1.2839 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2840 ++ ptr += mblength; 1.2841 ++ } 1.2842 ++ } 1.2843 ++ else 1.2844 ++ while (ptr < lim && eword--) 1.2845 ++ { 1.2846 ++ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1.2847 ++ ptr += mblength; 1.2848 ++ if (ptr < lim) 1.2849 ++ { 1.2850 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2851 ++ ptr += mblength; 1.2852 ++ } 1.2853 ++ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength)) 1.2854 ++ ptr += mblength; 1.2855 ++ } 1.2856 ++ 1.2857 ++ 1.2858 ++# ifdef POSIX_UNSPECIFIED 1.2859 ++ /* Make LIM point to the end of (one byte past) the current field. */ 1.2860 ++ if (tab_length) 1.2861 ++ { 1.2862 ++ char *newlim, *p; 1.2863 ++ 1.2864 ++ newlim = NULL; 1.2865 ++ for (p = ptr; p < lim;) 1.2866 ++ { 1.2867 ++ if (memcmp (p, tab, tab_length) == 0) 1.2868 ++ { 1.2869 ++ newlim = p; 1.2870 ++ break; 1.2871 ++ } 1.2872 ++ 1.2873 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2874 ++ p += mblength; 1.2875 ++ } 1.2876 ++ } 1.2877 ++ else 1.2878 ++ { 1.2879 ++ char *newlim; 1.2880 ++ newlim = ptr; 1.2881 ++ 1.2882 ++ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength)) 1.2883 ++ newlim += mblength; 1.2884 ++ if (ptr < lim) 1.2885 ++ { 1.2886 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2887 ++ ptr += mblength; 1.2888 ++ } 1.2889 ++ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength)) 1.2890 ++ newlim += mblength; 1.2891 ++ lim = newlim; 1.2892 ++ } 1.2893 ++# endif 1.2894 ++ 1.2895 ++ if (echar != 0) 1.2896 ++ { 1.2897 ++ /* If we're skipping leading blanks, don't start counting characters 1.2898 ++ * until after skipping past any leading blanks. */ 1.2899 ++ if (key->skipeblanks) 1.2900 ++ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength)) 1.2901 ++ ptr += mblength; 1.2902 ++ 1.2903 ++ memset (&state, '\0', sizeof(mbstate_t)); 1.2904 ++ 1.2905 ++ /* Advance PTR by ECHAR (if possible), but no further than LIM. */ 1.2906 ++ for (i = 0; i < echar; i++) 1.2907 ++ { 1.2908 ++ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state); 1.2909 ++ 1.2910 ++ if (ptr + mblength > lim) 1.2911 ++ break; 1.2912 ++ else 1.2913 ++ ptr += mblength; 1.2914 ++ } 1.2915 ++ } 1.2916 ++ 1.2917 ++ return ptr; 1.2918 ++} 1.2919 ++#endif 1.2920 ++ 1.2921 ++static void 1.2922 ++skipblanks_uni (char **ptr, char *lim) 1.2923 ++{ 1.2924 ++ while (*ptr < lim && blanks[to_uchar (**ptr)]) 1.2925 ++ ++(*ptr); 1.2926 ++} 1.2927 ++ 1.2928 ++#if HAVE_MBRTOWC 1.2929 ++static void 1.2930 ++skipblanks_mb (char **ptr, char *lim) 1.2931 ++{ 1.2932 ++ size_t mblength; 1.2933 ++ while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength)) 1.2934 ++ (*ptr) += mblength; 1.2935 ++} 1.2936 ++#endif 1.2937 ++ 1.2938 + /* Fill BUF reading from FP, moving buf->left bytes from the end 1.2939 + of buf->buf to the beginning first. If EOF is reached and the 1.2940 + file wasn't terminated by a newline, supply one. Set up BUF's line 1.2941 +@@ -1822,8 +2154,22 @@ fillbuf (struct buffer *buf, FILE *fp, c 1.2942 + else 1.2943 + { 1.2944 + if (key->skipsblanks) 1.2945 +- while (blanks[to_uchar (*line_start)]) 1.2946 +- line_start++; 1.2947 ++ { 1.2948 ++#if HAVE_MBRTOWC 1.2949 ++ if (MB_CUR_MAX > 1) 1.2950 ++ { 1.2951 ++ size_t mblength; 1.2952 ++ while (line_start < line->keylim && 1.2953 ++ ismbblank (line_start, 1.2954 ++ line->keylim - line_start, 1.2955 ++ &mblength)) 1.2956 ++ line_start += mblength; 1.2957 ++ } 1.2958 ++ else 1.2959 ++#endif 1.2960 ++ while (blanks[to_uchar (*line_start)]) 1.2961 ++ line_start++; 1.2962 ++ } 1.2963 + line->keybeg = line_start; 1.2964 + } 1.2965 + } 1.2966 +@@ -1944,7 +2290,7 @@ human_numcompare (char const *a, char co 1.2967 + hideously fast. */ 1.2968 + 1.2969 + static int 1.2970 +-numcompare (char const *a, char const *b) 1.2971 ++numcompare_uni (const char *a, const char *b) 1.2972 + { 1.2973 + while (blanks[to_uchar (*a)]) 1.2974 + a++; 1.2975 +@@ -1954,6 +2300,25 @@ numcompare (char const *a, char const *b 1.2976 + return strnumcmp (a, b, decimal_point, thousands_sep); 1.2977 + } 1.2978 + 1.2979 ++#if HAVE_MBRTOWC 1.2980 ++static int 1.2981 ++numcompare_mb (const char *a, const char *b) 1.2982 ++{ 1.2983 ++ size_t mblength, len; 1.2984 ++ len = strlen (a); /* okay for UTF-8 */ 1.2985 ++ while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) 1.2986 ++ { 1.2987 ++ a += mblength; 1.2988 ++ len -= mblength; 1.2989 ++ } 1.2990 ++ len = strlen (b); /* okay for UTF-8 */ 1.2991 ++ while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength)) 1.2992 ++ b += mblength; 1.2993 ++ 1.2994 ++ return strnumcmp (a, b, decimal_point, thousands_sep); 1.2995 ++} 1.2996 ++#endif /* HAV_EMBRTOWC */ 1.2997 ++ 1.2998 + /* Work around a problem whereby the long double value returned by glibc's 1.2999 + strtold ("NaN", ...) contains uninitialized bits: clear all bytes of 1.3000 + A and B before calling strtold. FIXME: remove this function once 1.3001 +@@ -2004,7 +2369,7 @@ general_numcompare (char const *sa, char 1.3002 + Return 0 if the name in S is not recognized. */ 1.3003 + 1.3004 + static int 1.3005 +-getmonth (char const *month, char **ea) 1.3006 ++getmonth_uni (char const *month, size_t len, char **ea) 1.3007 + { 1.3008 + size_t lo = 0; 1.3009 + size_t hi = MONTHS_PER_YEAR; 1.3010 +@@ -2280,15 +2645,14 @@ debug_key (struct line const *line, stru 1.3011 + char saved = *lim; 1.3012 + *lim = '\0'; 1.3013 + 1.3014 +- while (blanks[to_uchar (*beg)]) 1.3015 +- beg++; 1.3016 ++ skipblanks (&beg, lim); 1.3017 + 1.3018 + char *tighter_lim = beg; 1.3019 + 1.3020 + if (lim < beg) 1.3021 + tighter_lim = lim; 1.3022 + else if (key->month) 1.3023 +- getmonth (beg, &tighter_lim); 1.3024 ++ getmonth (beg, lim-beg, &tighter_lim); 1.3025 + else if (key->general_numeric) 1.3026 + ignore_value (strtold (beg, &tighter_lim)); 1.3027 + else if (key->numeric || key->human_numeric) 1.3028 +@@ -2432,7 +2796,7 @@ key_warnings (struct keyfield const *gke 1.3029 + bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key) 1.3030 + && !(key->schar || key->echar); 1.3031 + bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ 1.3032 +- if (!gkey_only && tab == TAB_DEFAULT && !line_offset 1.3033 ++ if (!gkey_only && !tab_length && !line_offset 1.3034 + && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned)) 1.3035 + || (!key->skipsblanks && key->schar) 1.3036 + || (!key->skipeblanks && key->echar))) 1.3037 +@@ -2490,11 +2854,87 @@ key_warnings (struct keyfield const *gke 1.3038 + error (0, 0, _("option '-r' only applies to last-resort comparison")); 1.3039 + } 1.3040 + 1.3041 ++#if HAVE_MBRTOWC 1.3042 ++static int 1.3043 ++getmonth_mb (const char *s, size_t len, char **ea) 1.3044 ++{ 1.3045 ++ char *month; 1.3046 ++ register size_t i; 1.3047 ++ register int lo = 0, hi = MONTHS_PER_YEAR, result; 1.3048 ++ char *tmp; 1.3049 ++ size_t wclength, mblength; 1.3050 ++ const char *pp; 1.3051 ++ const wchar_t *wpp; 1.3052 ++ wchar_t *month_wcs; 1.3053 ++ mbstate_t state; 1.3054 ++ 1.3055 ++ while (len > 0 && ismbblank (s, len, &mblength)) 1.3056 ++ { 1.3057 ++ s += mblength; 1.3058 ++ len -= mblength; 1.3059 ++ } 1.3060 ++ 1.3061 ++ if (len == 0) 1.3062 ++ return 0; 1.3063 ++ 1.3064 ++ if (SIZE_MAX - len < 1) 1.3065 ++ xalloc_die (); 1.3066 ++ 1.3067 ++ month = (char *) xnmalloc (len + 1, MB_CUR_MAX); 1.3068 ++ 1.3069 ++ pp = tmp = (char *) xnmalloc (len + 1, MB_CUR_MAX); 1.3070 ++ memcpy (tmp, s, len); 1.3071 ++ tmp[len] = '\0'; 1.3072 ++ wpp = month_wcs = (wchar_t *) xnmalloc (len + 1, sizeof (wchar_t)); 1.3073 ++ memset (&state, '\0', sizeof (mbstate_t)); 1.3074 ++ 1.3075 ++ wclength = mbsrtowcs (month_wcs, &pp, len + 1, &state); 1.3076 ++ if (wclength == (size_t)-1 || pp != NULL) 1.3077 ++ error (SORT_FAILURE, 0, _("Invalid multibyte input %s."), quote(s)); 1.3078 ++ 1.3079 ++ for (i = 0; i < wclength; i++) 1.3080 ++ { 1.3081 ++ month_wcs[i] = towupper(month_wcs[i]); 1.3082 ++ if (iswblank (month_wcs[i])) 1.3083 ++ { 1.3084 ++ month_wcs[i] = L'\0'; 1.3085 ++ break; 1.3086 ++ } 1.3087 ++ } 1.3088 ++ 1.3089 ++ mblength = wcsrtombs (month, &wpp, (len + 1) * MB_CUR_MAX, &state); 1.3090 ++ assert (mblength != (-1) && wpp == NULL); 1.3091 ++ 1.3092 ++ do 1.3093 ++ { 1.3094 ++ int ix = (lo + hi) / 2; 1.3095 ++ 1.3096 ++ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) 1.3097 ++ hi = ix; 1.3098 ++ else 1.3099 ++ lo = ix; 1.3100 ++ } 1.3101 ++ while (hi - lo > 1); 1.3102 ++ 1.3103 ++ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) 1.3104 ++ ? monthtab[lo].val : 0); 1.3105 ++ 1.3106 ++ if (ea && result) 1.3107 ++ *ea = (char*) s + strlen (monthtab[lo].name); 1.3108 ++ 1.3109 ++ free (month); 1.3110 ++ free (tmp); 1.3111 ++ free (month_wcs); 1.3112 ++ 1.3113 ++ return result; 1.3114 ++} 1.3115 ++#endif 1.3116 ++ 1.3117 + /* Compare two lines A and B trying every key in sequence until there 1.3118 + are no more keys or a difference is found. */ 1.3119 + 1.3120 + static int 1.3121 +-keycompare (struct line const *a, struct line const *b) 1.3122 ++keycompare_uni (const struct line *a, const struct line *b) 1.3123 + { 1.3124 + struct keyfield *key = keylist; 1.3125 + 1.3126 +@@ -2579,7 +3019,7 @@ keycompare (struct line const *a, struct 1.3127 + else if (key->human_numeric) 1.3128 + diff = human_numcompare (ta, tb); 1.3129 + else if (key->month) 1.3130 +- diff = getmonth (ta, NULL) - getmonth (tb, NULL); 1.3131 ++ diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL); 1.3132 + else if (key->random) 1.3133 + diff = compare_random (ta, tlena, tb, tlenb); 1.3134 + else if (key->version) 1.3135 +@@ -2695,6 +3135,211 @@ keycompare (struct line const *a, struct 1.3136 + return key->reverse ? -diff : diff; 1.3137 + } 1.3138 + 1.3139 ++#if HAVE_MBRTOWC 1.3140 ++static int 1.3141 ++keycompare_mb (const struct line *a, const struct line *b) 1.3142 ++{ 1.3143 ++ struct keyfield *key = keylist; 1.3144 ++ 1.3145 ++ /* For the first iteration only, the key positions have been 1.3146 ++ precomputed for us. */ 1.3147 ++ char *texta = a->keybeg; 1.3148 ++ char *textb = b->keybeg; 1.3149 ++ char *lima = a->keylim; 1.3150 ++ char *limb = b->keylim; 1.3151 ++ 1.3152 ++ size_t mblength_a, mblength_b; 1.3153 ++ wchar_t wc_a, wc_b; 1.3154 ++ mbstate_t state_a, state_b; 1.3155 ++ 1.3156 ++ int diff = 0; 1.3157 ++ 1.3158 ++ memset (&state_a, '\0', sizeof(mbstate_t)); 1.3159 ++ memset (&state_b, '\0', sizeof(mbstate_t)); 1.3160 ++ /* Ignore keys with start after end. */ 1.3161 ++ if (a->keybeg - a->keylim > 0) 1.3162 ++ return 0; 1.3163 ++ 1.3164 ++ 1.3165 ++ /* Ignore and/or translate chars before comparing. */ 1.3166 ++# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \ 1.3167 ++ do \ 1.3168 ++ { \ 1.3169 ++ wchar_t uwc; \ 1.3170 ++ char mbc[MB_LEN_MAX]; \ 1.3171 ++ mbstate_t state_wc; \ 1.3172 ++ \ 1.3173 ++ for (NEW_LEN = i = 0; i < LEN;) \ 1.3174 ++ { \ 1.3175 ++ mbstate_t state_bak; \ 1.3176 ++ \ 1.3177 ++ state_bak = STATE; \ 1.3178 ++ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \ 1.3179 ++ \ 1.3180 ++ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \ 1.3181 ++ || MBLENGTH == 0) \ 1.3182 ++ { \ 1.3183 ++ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \ 1.3184 ++ STATE = state_bak; \ 1.3185 ++ if (!ignore) \ 1.3186 ++ COPY[NEW_LEN++] = TEXT[i]; \ 1.3187 ++ i++; \ 1.3188 ++ continue; \ 1.3189 ++ } \ 1.3190 ++ \ 1.3191 ++ if (ignore) \ 1.3192 ++ { \ 1.3193 ++ if ((ignore == nonprinting && !iswprint (WC)) \ 1.3194 ++ || (ignore == nondictionary \ 1.3195 ++ && !iswalnum (WC) && !iswblank (WC))) \ 1.3196 ++ { \ 1.3197 ++ i += MBLENGTH; \ 1.3198 ++ continue; \ 1.3199 ++ } \ 1.3200 ++ } \ 1.3201 ++ \ 1.3202 ++ if (translate) \ 1.3203 ++ { \ 1.3204 ++ \ 1.3205 ++ uwc = towupper(WC); \ 1.3206 ++ if (WC == uwc) \ 1.3207 ++ { \ 1.3208 ++ memcpy (mbc, TEXT + i, MBLENGTH); \ 1.3209 ++ i += MBLENGTH; \ 1.3210 ++ } \ 1.3211 ++ else \ 1.3212 ++ { \ 1.3213 ++ i += MBLENGTH; \ 1.3214 ++ WC = uwc; \ 1.3215 ++ memset (&state_wc, '\0', sizeof (mbstate_t)); \ 1.3216 ++ \ 1.3217 ++ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \ 1.3218 ++ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \ 1.3219 ++ } \ 1.3220 ++ \ 1.3221 ++ for (j = 0; j < MBLENGTH; j++) \ 1.3222 ++ COPY[NEW_LEN++] = mbc[j]; \ 1.3223 ++ } \ 1.3224 ++ else \ 1.3225 ++ for (j = 0; j < MBLENGTH; j++) \ 1.3226 ++ COPY[NEW_LEN++] = TEXT[i++]; \ 1.3227 ++ } \ 1.3228 ++ COPY[NEW_LEN] = '\0'; \ 1.3229 ++ } \ 1.3230 ++ while (0) 1.3231 ++ 1.3232 ++ /* Actually compare the fields. */ 1.3233 ++ 1.3234 ++ for (;;) 1.3235 ++ { 1.3236 ++ /* Find the lengths. */ 1.3237 ++ size_t lena = lima <= texta ? 0 : lima - texta; 1.3238 ++ size_t lenb = limb <= textb ? 0 : limb - textb; 1.3239 ++ 1.3240 ++ char enda IF_LINT (= 0); 1.3241 ++ char endb IF_LINT (= 0); 1.3242 ++ 1.3243 ++ char const *translate = key->translate; 1.3244 ++ bool const *ignore = key->ignore; 1.3245 ++ 1.3246 ++ if (ignore || translate) 1.3247 ++ { 1.3248 ++ if (SIZE_MAX - lenb - 2 < lena) 1.3249 ++ xalloc_die (); 1.3250 ++ char *copy_a = (char *) xnmalloc (lena + lenb + 2, MB_CUR_MAX); 1.3251 ++ char *copy_b = copy_a + lena * MB_CUR_MAX + 1; 1.3252 ++ size_t new_len_a, new_len_b; 1.3253 ++ size_t i, j; 1.3254 ++ 1.3255 ++ IGNORE_CHARS (new_len_a, lena, texta, copy_a, 1.3256 ++ wc_a, mblength_a, state_a); 1.3257 ++ IGNORE_CHARS (new_len_b, lenb, textb, copy_b, 1.3258 ++ wc_b, mblength_b, state_b); 1.3259 ++ texta = copy_a; textb = copy_b; 1.3260 ++ lena = new_len_a; lenb = new_len_b; 1.3261 ++ } 1.3262 ++ else 1.3263 ++ { 1.3264 ++ /* Use the keys in-place, temporarily null-terminated. */ 1.3265 ++ enda = texta[lena]; texta[lena] = '\0'; 1.3266 ++ endb = textb[lenb]; textb[lenb] = '\0'; 1.3267 ++ } 1.3268 ++ 1.3269 ++ if (key->random) 1.3270 ++ diff = compare_random (texta, lena, textb, lenb); 1.3271 ++ else if (key->numeric | key->general_numeric | key->human_numeric) 1.3272 ++ { 1.3273 ++ char savea = *lima, saveb = *limb; 1.3274 ++ 1.3275 ++ *lima = *limb = '\0'; 1.3276 ++ diff = (key->numeric ? numcompare (texta, textb) 1.3277 ++ : key->general_numeric ? general_numcompare (texta, textb) 1.3278 ++ : human_numcompare (texta, textb)); 1.3279 ++ *lima = savea, *limb = saveb; 1.3280 ++ } 1.3281 ++ else if (key->version) 1.3282 ++ diff = filevercmp (texta, textb); 1.3283 ++ else if (key->month) 1.3284 ++ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL); 1.3285 ++ else if (lena == 0) 1.3286 ++ diff = - NONZERO (lenb); 1.3287 ++ else if (lenb == 0) 1.3288 ++ diff = 1; 1.3289 ++ else if (hard_LC_COLLATE && !folding) 1.3290 ++ { 1.3291 ++ diff = xmemcoll0 (texta, lena + 1, textb, lenb + 1); 1.3292 ++ } 1.3293 ++ else 1.3294 ++ { 1.3295 ++ diff = memcmp (texta, textb, MIN (lena, lenb)); 1.3296 ++ if (diff == 0) 1.3297 ++ diff = lena < lenb ? -1 : lena != lenb; 1.3298 ++ } 1.3299 ++ 1.3300 ++ if (ignore || translate) 1.3301 ++ free (texta); 1.3302 ++ else 1.3303 ++ { 1.3304 ++ texta[lena] = enda; 1.3305 ++ textb[lenb] = endb; 1.3306 ++ } 1.3307 ++ 1.3308 ++ if (diff) 1.3309 ++ goto not_equal; 1.3310 ++ 1.3311 ++ key = key->next; 1.3312 ++ if (! key) 1.3313 ++ break; 1.3314 ++ 1.3315 ++ /* Find the beginning and limit of the next field. */ 1.3316 ++ if (key->eword != -1) 1.3317 ++ lima = limfield (a, key), limb = limfield (b, key); 1.3318 ++ else 1.3319 ++ lima = a->text + a->length - 1, limb = b->text + b->length - 1; 1.3320 ++ 1.3321 ++ if (key->sword != -1) 1.3322 ++ texta = begfield (a, key), textb = begfield (b, key); 1.3323 ++ else 1.3324 ++ { 1.3325 ++ texta = a->text, textb = b->text; 1.3326 ++ if (key->skipsblanks) 1.3327 ++ { 1.3328 ++ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a)) 1.3329 ++ texta += mblength_a; 1.3330 ++ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b)) 1.3331 ++ textb += mblength_b; 1.3332 ++ } 1.3333 ++ } 1.3334 ++ } 1.3335 ++ 1.3336 ++not_equal: 1.3337 ++ if (key && key->reverse) 1.3338 ++ return -diff; 1.3339 ++ else 1.3340 ++ return diff; 1.3341 ++} 1.3342 ++#endif 1.3343 ++ 1.3344 + /* Compare two lines A and B, returning negative, zero, or positive 1.3345 + depending on whether A compares less than, equal to, or greater than B. */ 1.3346 + 1.3347 +@@ -2722,7 +3367,7 @@ compare (struct line const *a, struct li 1.3348 + diff = - NONZERO (blen); 1.3349 + else if (blen == 0) 1.3350 + diff = 1; 1.3351 +- else if (hard_LC_COLLATE) 1.3352 ++ else if (hard_LC_COLLATE && !folding) 1.3353 + { 1.3354 + /* Note xmemcoll0 is a performance enhancement as 1.3355 + it will not unconditionally write '\0' after the 1.3356 +@@ -4121,6 +4766,7 @@ set_ordering (char const *s, struct keyf 1.3357 + break; 1.3358 + case 'f': 1.3359 + key->translate = fold_toupper; 1.3360 ++ folding = true; 1.3361 + break; 1.3362 + case 'g': 1.3363 + key->general_numeric = true; 1.3364 +@@ -4199,7 +4845,7 @@ main (int argc, char **argv) 1.3365 + initialize_exit_failure (SORT_FAILURE); 1.3366 + 1.3367 + hard_LC_COLLATE = hard_locale (LC_COLLATE); 1.3368 +-#if HAVE_NL_LANGINFO 1.3369 ++#if HAVE_LANGINFO_CODESET 1.3370 + hard_LC_TIME = hard_locale (LC_TIME); 1.3371 + #endif 1.3372 + 1.3373 +@@ -4220,6 +4866,29 @@ main (int argc, char **argv) 1.3374 + thousands_sep = -1; 1.3375 + } 1.3376 + 1.3377 ++#if HAVE_MBRTOWC 1.3378 ++ if (MB_CUR_MAX > 1) 1.3379 ++ { 1.3380 ++ inittables = inittables_mb; 1.3381 ++ begfield = begfield_mb; 1.3382 ++ limfield = limfield_mb; 1.3383 ++ skipblanks = skipblanks_mb; 1.3384 ++ getmonth = getmonth_mb; 1.3385 ++ keycompare = keycompare_mb; 1.3386 ++ numcompare = numcompare_mb; 1.3387 ++ } 1.3388 ++ else 1.3389 ++#endif 1.3390 ++ { 1.3391 ++ inittables = inittables_uni; 1.3392 ++ begfield = begfield_uni; 1.3393 ++ limfield = limfield_uni; 1.3394 ++ skipblanks = skipblanks_uni; 1.3395 ++ getmonth = getmonth_uni; 1.3396 ++ keycompare = keycompare_uni; 1.3397 ++ numcompare = numcompare_uni; 1.3398 ++ } 1.3399 ++ 1.3400 + have_read_stdin = false; 1.3401 + inittables (); 1.3402 + 1.3403 +@@ -4494,13 +5163,34 @@ main (int argc, char **argv) 1.3404 + 1.3405 + case 't': 1.3406 + { 1.3407 +- char newtab = optarg[0]; 1.3408 +- if (! newtab) 1.3409 ++ char newtab[MB_LEN_MAX + 1]; 1.3410 ++ size_t newtab_length = 1; 1.3411 ++ strncpy (newtab, optarg, MB_LEN_MAX); 1.3412 ++ if (! newtab[0]) 1.3413 + error (SORT_FAILURE, 0, _("empty tab")); 1.3414 +- if (optarg[1]) 1.3415 ++#if HAVE_MBRTOWC 1.3416 ++ if (MB_CUR_MAX > 1) 1.3417 ++ { 1.3418 ++ wchar_t wc; 1.3419 ++ mbstate_t state; 1.3420 ++ 1.3421 ++ memset (&state, '\0', sizeof (mbstate_t)); 1.3422 ++ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, 1.3423 ++ MB_LEN_MAX), 1.3424 ++ &state); 1.3425 ++ switch (newtab_length) 1.3426 ++ { 1.3427 ++ case (size_t) -1: 1.3428 ++ case (size_t) -2: 1.3429 ++ case 0: 1.3430 ++ newtab_length = 1; 1.3431 ++ } 1.3432 ++ } 1.3433 ++#endif 1.3434 ++ if (newtab_length == 1 && optarg[1]) 1.3435 + { 1.3436 + if (STREQ (optarg, "\\0")) 1.3437 +- newtab = '\0'; 1.3438 ++ newtab[0] = '\0'; 1.3439 + else 1.3440 + { 1.3441 + /* Provoke with 'sort -txx'. Complain about 1.3442 +@@ -4511,9 +5201,12 @@ main (int argc, char **argv) 1.3443 + quote (optarg)); 1.3444 + } 1.3445 + } 1.3446 +- if (tab != TAB_DEFAULT && tab != newtab) 1.3447 ++ if (tab_length 1.3448 ++ && (tab_length != newtab_length 1.3449 ++ || memcmp (tab, newtab, tab_length) != 0)) 1.3450 + error (SORT_FAILURE, 0, _("incompatible tabs")); 1.3451 +- tab = newtab; 1.3452 ++ memcpy (tab, newtab, newtab_length); 1.3453 ++ tab_length = newtab_length; 1.3454 + } 1.3455 + break; 1.3456 + 1.3457 +@@ -4751,12 +5444,10 @@ main (int argc, char **argv) 1.3458 + sort (files, nfiles, outfile, nthreads); 1.3459 + } 1.3460 + 1.3461 +-#ifdef lint 1.3462 + if (files_from) 1.3463 + readtokens0_free (&tok); 1.3464 + else 1.3465 + free (files); 1.3466 +-#endif 1.3467 + 1.3468 + if (have_read_stdin && fclose (stdin) == EOF) 1.3469 + die (_("close failed"), "-"); 1.3470 +diff -Naurp coreutils-8.25-orig/src/unexpand.c coreutils-8.25/src/unexpand.c 1.3471 +--- coreutils-8.25-orig/src/unexpand.c 2016-01-01 07:48:50.000000000 -0600 1.3472 ++++ coreutils-8.25/src/unexpand.c 2016-02-08 19:07:10.311944651 -0600 1.3473 +@@ -38,12 +38,29 @@ 1.3474 + #include <stdio.h> 1.3475 + #include <getopt.h> 1.3476 + #include <sys/types.h> 1.3477 ++ 1.3478 ++/* Get mbstate_t, mbrtowc(), wcwidth(). */ 1.3479 ++#if HAVE_WCHAR_H 1.3480 ++# include <wchar.h> 1.3481 ++#endif 1.3482 ++ 1.3483 + #include "system.h" 1.3484 + #include "error.h" 1.3485 + #include "fadvise.h" 1.3486 + #include "quote.h" 1.3487 + #include "xstrndup.h" 1.3488 + 1.3489 ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 1.3490 ++ installation; work around this configuration error. */ 1.3491 ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 1.3492 ++# define MB_LEN_MAX 16 1.3493 ++#endif 1.3494 ++ 1.3495 ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 1.3496 ++#if HAVE_MBRTOWC && defined mbstate_t 1.3497 ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 1.3498 ++#endif 1.3499 ++ 1.3500 + /* The official name of this program (e.g., no 'g' prefix). */ 1.3501 + #define PROGRAM_NAME "unexpand" 1.3502 + 1.3503 +@@ -103,6 +120,210 @@ static struct option const longopts[] = 1.3504 + {NULL, 0, NULL, 0} 1.3505 + }; 1.3506 + 1.3507 ++static FILE *next_file (FILE *fp); 1.3508 ++ 1.3509 ++#if HAVE_MBRTOWC 1.3510 ++static void 1.3511 ++unexpand_multibyte (void) 1.3512 ++{ 1.3513 ++ FILE *fp; /* Input stream. */ 1.3514 ++ mbstate_t i_state; /* Current shift state of the input stream. */ 1.3515 ++ mbstate_t i_state_bak; /* Back up the I_STATE. */ 1.3516 ++ mbstate_t o_state; /* Current shift state of the output stream. */ 1.3517 ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ 1.3518 ++ char *bufpos = buf; /* Next read position of BUF. */ 1.3519 ++ size_t buflen = 0; /* The length of the byte sequence in buf. */ 1.3520 ++ wint_t wc; /* A gotten wide character. */ 1.3521 ++ size_t mblength; /* The byte size of a multibyte character 1.3522 ++ which shows as same character as WC. */ 1.3523 ++ bool prev_tab = false; 1.3524 ++ 1.3525 ++ /* Index in `tab_list' of next tabstop: */ 1.3526 ++ int tab_index = 0; /* For calculating width of pending tabs. */ 1.3527 ++ int print_tab_index = 0; /* For printing as many tabs as possible. */ 1.3528 ++ unsigned int column = 0; /* Column on screen of next char. */ 1.3529 ++ int next_tab_column; /* Column the next tab stop is on. */ 1.3530 ++ int convert = 1; /* If nonzero, perform translations. */ 1.3531 ++ unsigned int pending = 0; /* Pending columns of blanks. */ 1.3532 ++ 1.3533 ++ fp = next_file ((FILE *) NULL); 1.3534 ++ if (fp == NULL) 1.3535 ++ return; 1.3536 ++ 1.3537 ++ memset (&o_state, '\0', sizeof(mbstate_t)); 1.3538 ++ memset (&i_state, '\0', sizeof(mbstate_t)); 1.3539 ++ 1.3540 ++ for (;;) 1.3541 ++ { 1.3542 ++ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp)) 1.3543 ++ { 1.3544 ++ memmove (buf, bufpos, buflen); 1.3545 ++ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp); 1.3546 ++ bufpos = buf; 1.3547 ++ } 1.3548 ++ 1.3549 ++ /* Get a wide character. */ 1.3550 ++ if (buflen < 1) 1.3551 ++ { 1.3552 ++ mblength = 1; 1.3553 ++ wc = WEOF; 1.3554 ++ } 1.3555 ++ else 1.3556 ++ { 1.3557 ++ i_state_bak = i_state; 1.3558 ++ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state); 1.3559 ++ } 1.3560 ++ 1.3561 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.3562 ++ { 1.3563 ++ i_state = i_state_bak; 1.3564 ++ wc = L'\0'; 1.3565 ++ } 1.3566 ++ 1.3567 ++ if (wc == L' ' && convert && column < INT_MAX) 1.3568 ++ { 1.3569 ++ ++pending; 1.3570 ++ ++column; 1.3571 ++ } 1.3572 ++ else if (wc == L'\t' && convert) 1.3573 ++ { 1.3574 ++ if (tab_size == 0) 1.3575 ++ { 1.3576 ++ /* Do not let tab_index == first_free_tab; 1.3577 ++ stop when it is 1 less. */ 1.3578 ++ while (tab_index < first_free_tab - 1 1.3579 ++ && column >= tab_list[tab_index]) 1.3580 ++ tab_index++; 1.3581 ++ next_tab_column = tab_list[tab_index]; 1.3582 ++ if (tab_index < first_free_tab - 1) 1.3583 ++ tab_index++; 1.3584 ++ if (column >= next_tab_column) 1.3585 ++ { 1.3586 ++ convert = 0; /* Ran out of tab stops. */ 1.3587 ++ goto flush_pend_mb; 1.3588 ++ } 1.3589 ++ } 1.3590 ++ else 1.3591 ++ { 1.3592 ++ next_tab_column = column + tab_size - column % tab_size; 1.3593 ++ } 1.3594 ++ pending += next_tab_column - column; 1.3595 ++ column = next_tab_column; 1.3596 ++ } 1.3597 ++ else 1.3598 ++ { 1.3599 ++flush_pend_mb: 1.3600 ++ /* Flush pending spaces. Print as many tabs as possible, 1.3601 ++ then print the rest as spaces. */ 1.3602 ++ if (pending == 1 && column != 1 && !prev_tab) 1.3603 ++ { 1.3604 ++ putchar (' '); 1.3605 ++ pending = 0; 1.3606 ++ } 1.3607 ++ column -= pending; 1.3608 ++ while (pending > 0) 1.3609 ++ { 1.3610 ++ if (tab_size == 0) 1.3611 ++ { 1.3612 ++ /* Do not let print_tab_index == first_free_tab; 1.3613 ++ stop when it is 1 less. */ 1.3614 ++ while (print_tab_index < first_free_tab - 1 1.3615 ++ && column >= tab_list[print_tab_index]) 1.3616 ++ print_tab_index++; 1.3617 ++ next_tab_column = tab_list[print_tab_index]; 1.3618 ++ if (print_tab_index < first_free_tab - 1) 1.3619 ++ print_tab_index++; 1.3620 ++ } 1.3621 ++ else 1.3622 ++ { 1.3623 ++ next_tab_column = 1.3624 ++ column + tab_size - column % tab_size; 1.3625 ++ } 1.3626 ++ if (next_tab_column - column <= pending) 1.3627 ++ { 1.3628 ++ putchar ('\t'); 1.3629 ++ pending -= next_tab_column - column; 1.3630 ++ column = next_tab_column; 1.3631 ++ } 1.3632 ++ else 1.3633 ++ { 1.3634 ++ --print_tab_index; 1.3635 ++ column += pending; 1.3636 ++ while (pending != 0) 1.3637 ++ { 1.3638 ++ putchar (' '); 1.3639 ++ pending--; 1.3640 ++ } 1.3641 ++ } 1.3642 ++ } 1.3643 ++ 1.3644 ++ if (wc == WEOF) 1.3645 ++ { 1.3646 ++ fp = next_file (fp); 1.3647 ++ if (fp == NULL) 1.3648 ++ break; /* No more files. */ 1.3649 ++ else 1.3650 ++ { 1.3651 ++ memset (&i_state, '\0', sizeof(mbstate_t)); 1.3652 ++ continue; 1.3653 ++ } 1.3654 ++ } 1.3655 ++ 1.3656 ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) 1.3657 ++ { 1.3658 ++ if (convert) 1.3659 ++ { 1.3660 ++ ++column; 1.3661 ++ if (convert_entire_line == 0) 1.3662 ++ convert = 0; 1.3663 ++ } 1.3664 ++ mblength = 1; 1.3665 ++ putchar (buf[0]); 1.3666 ++ } 1.3667 ++ else if (mblength == 0) 1.3668 ++ { 1.3669 ++ if (convert && convert_entire_line == 0) 1.3670 ++ convert = 0; 1.3671 ++ mblength = 1; 1.3672 ++ putchar ('\0'); 1.3673 ++ } 1.3674 ++ else 1.3675 ++ { 1.3676 ++ if (convert) 1.3677 ++ { 1.3678 ++ if (wc == L'\b') 1.3679 ++ { 1.3680 ++ if (column > 0) 1.3681 ++ --column; 1.3682 ++ } 1.3683 ++ else 1.3684 ++ { 1.3685 ++ int width; /* The width of WC. */ 1.3686 ++ 1.3687 ++ width = wcwidth (wc); 1.3688 ++ column += (width > 0) ? width : 0; 1.3689 ++ if (convert_entire_line == 0) 1.3690 ++ convert = 0; 1.3691 ++ } 1.3692 ++ } 1.3693 ++ 1.3694 ++ if (wc == L'\n') 1.3695 ++ { 1.3696 ++ tab_index = print_tab_index = 0; 1.3697 ++ column = pending = 0; 1.3698 ++ convert = 1; 1.3699 ++ } 1.3700 ++ fwrite (bufpos, sizeof(char), mblength, stdout); 1.3701 ++ } 1.3702 ++ } 1.3703 ++ prev_tab = wc == L'\t'; 1.3704 ++ buflen -= mblength; 1.3705 ++ bufpos += mblength; 1.3706 ++ } 1.3707 ++} 1.3708 ++#endif 1.3709 ++ 1.3710 ++ 1.3711 + void 1.3712 + usage (int status) 1.3713 + { 1.3714 +@@ -523,7 +744,12 @@ main (int argc, char **argv) 1.3715 + 1.3716 + file_list = (optind < argc ? &argv[optind] : stdin_argv); 1.3717 + 1.3718 +- unexpand (); 1.3719 ++#if HAVE_MBRTOWC 1.3720 ++ if (MB_CUR_MAX > 1) 1.3721 ++ unexpand_multibyte (); 1.3722 ++ else 1.3723 ++#endif 1.3724 ++ unexpand (); 1.3725 + 1.3726 + if (have_read_stdin && fclose (stdin) != 0) 1.3727 + error (EXIT_FAILURE, errno, "-"); 1.3728 +diff -Naurp coreutils-8.25-orig/src/uniq.c coreutils-8.25/src/uniq.c 1.3729 +--- coreutils-8.25-orig/src/uniq.c 2016-01-13 05:08:59.000000000 -0600 1.3730 ++++ coreutils-8.25/src/uniq.c 2016-02-08 19:07:10.312944654 -0600 1.3731 +@@ -21,6 +21,17 @@ 1.3732 + #include <getopt.h> 1.3733 + #include <sys/types.h> 1.3734 + 1.3735 ++/* Get mbstate_t, mbrtowc(). */ 1.3736 ++#if HAVE_WCHAR_H 1.3737 ++# include <wchar.h> 1.3738 ++#endif 1.3739 ++ 1.3740 ++/* Get isw* functions. */ 1.3741 ++#if HAVE_WCTYPE_H 1.3742 ++# include <wctype.h> 1.3743 ++#endif 1.3744 ++#include <assert.h> 1.3745 ++ 1.3746 + #include "system.h" 1.3747 + #include "argmatch.h" 1.3748 + #include "linebuffer.h" 1.3749 +@@ -33,6 +44,18 @@ 1.3750 + #include "xstrtol.h" 1.3751 + #include "memcasecmp.h" 1.3752 + #include "quote.h" 1.3753 ++#include "xmemcoll.h" 1.3754 ++ 1.3755 ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC 1.3756 ++ installation; work around this configuration error. */ 1.3757 ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 1.3758 ++# define MB_LEN_MAX 16 1.3759 ++#endif 1.3760 ++ 1.3761 ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 1.3762 ++#if HAVE_MBRTOWC && defined mbstate_t 1.3763 ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 1.3764 ++#endif 1.3765 + 1.3766 + /* The official name of this program (e.g., no 'g' prefix). */ 1.3767 + #define PROGRAM_NAME "uniq" 1.3768 +@@ -143,6 +166,10 @@ enum 1.3769 + GROUP_OPTION = CHAR_MAX + 1 1.3770 + }; 1.3771 + 1.3772 ++/* Function pointers. */ 1.3773 ++static char * 1.3774 ++(*find_field) (struct linebuffer *line); 1.3775 ++ 1.3776 + static struct option const longopts[] = 1.3777 + { 1.3778 + {"count", no_argument, NULL, 'c'}, 1.3779 +@@ -252,7 +279,7 @@ size_opt (char const *opt, char const *m 1.3780 + return a pointer to the beginning of the line's field to be compared. */ 1.3781 + 1.3782 + static char * _GL_ATTRIBUTE_PURE 1.3783 +-find_field (struct linebuffer const *line) 1.3784 ++find_field_uni (struct linebuffer *line) 1.3785 + { 1.3786 + size_t count; 1.3787 + char const *lp = line->buffer; 1.3788 +@@ -272,6 +299,83 @@ find_field (struct linebuffer const *lin 1.3789 + return line->buffer + i; 1.3790 + } 1.3791 + 1.3792 ++#if HAVE_MBRTOWC 1.3793 ++ 1.3794 ++# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ 1.3795 ++ do \ 1.3796 ++ { \ 1.3797 ++ mbstate_t state_bak; \ 1.3798 ++ \ 1.3799 ++ CONVFAIL = 0; \ 1.3800 ++ state_bak = *STATEP; \ 1.3801 ++ \ 1.3802 ++ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ 1.3803 ++ \ 1.3804 ++ switch (MBLENGTH) \ 1.3805 ++ { \ 1.3806 ++ case (size_t)-2: \ 1.3807 ++ case (size_t)-1: \ 1.3808 ++ *STATEP = state_bak; \ 1.3809 ++ CONVFAIL++; \ 1.3810 ++ /* Fall through */ \ 1.3811 ++ case 0: \ 1.3812 ++ MBLENGTH = 1; \ 1.3813 ++ } \ 1.3814 ++ } \ 1.3815 ++ while (0) 1.3816 ++ 1.3817 ++static char * 1.3818 ++find_field_multi (struct linebuffer *line) 1.3819 ++{ 1.3820 ++ size_t count; 1.3821 ++ char *lp = line->buffer; 1.3822 ++ size_t size = line->length - 1; 1.3823 ++ size_t pos; 1.3824 ++ size_t mblength; 1.3825 ++ wchar_t wc; 1.3826 ++ mbstate_t *statep; 1.3827 ++ int convfail = 0; 1.3828 ++ 1.3829 ++ pos = 0; 1.3830 ++ statep = &(line->state); 1.3831 ++ 1.3832 ++ /* skip fields. */ 1.3833 ++ for (count = 0; count < skip_fields && pos < size; count++) 1.3834 ++ { 1.3835 ++ while (pos < size) 1.3836 ++ { 1.3837 ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 1.3838 ++ 1.3839 ++ if (convfail || !(iswblank (wc) || wc == '\n')) 1.3840 ++ { 1.3841 ++ pos += mblength; 1.3842 ++ break; 1.3843 ++ } 1.3844 ++ pos += mblength; 1.3845 ++ } 1.3846 ++ 1.3847 ++ while (pos < size) 1.3848 ++ { 1.3849 ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 1.3850 ++ 1.3851 ++ if (!convfail && (iswblank (wc) || wc == '\n')) 1.3852 ++ break; 1.3853 ++ 1.3854 ++ pos += mblength; 1.3855 ++ } 1.3856 ++ } 1.3857 ++ 1.3858 ++ /* skip fields. */ 1.3859 ++ for (count = 0; count < skip_chars && pos < size; count++) 1.3860 ++ { 1.3861 ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); 1.3862 ++ pos += mblength; 1.3863 ++ } 1.3864 ++ 1.3865 ++ return lp + pos; 1.3866 ++} 1.3867 ++#endif 1.3868 ++ 1.3869 + /* Return false if two strings OLD and NEW match, true if not. 1.3870 + OLD and NEW point not to the beginnings of the lines 1.3871 + but rather to the beginnings of the fields to compare. 1.3872 +@@ -280,6 +384,8 @@ find_field (struct linebuffer const *lin 1.3873 + static bool 1.3874 + different (char *old, char *new, size_t oldlen, size_t newlen) 1.3875 + { 1.3876 ++ char *copy_old, *copy_new; 1.3877 ++ 1.3878 + if (check_chars < oldlen) 1.3879 + oldlen = check_chars; 1.3880 + if (check_chars < newlen) 1.3881 +@@ -287,15 +393,104 @@ different (char *old, char *new, size_t 1.3882 + 1.3883 + if (ignore_case) 1.3884 + { 1.3885 +- /* FIXME: This should invoke strcoll somehow. */ 1.3886 +- return oldlen != newlen || memcasecmp (old, new, oldlen); 1.3887 ++ size_t i; 1.3888 ++ 1.3889 ++ copy_old = xmalloc (oldlen + 1); 1.3890 ++ copy_new = xmalloc (oldlen + 1); 1.3891 ++ 1.3892 ++ for (i = 0; i < oldlen; i++) 1.3893 ++ { 1.3894 ++ copy_old[i] = toupper (old[i]); 1.3895 ++ copy_new[i] = toupper (new[i]); 1.3896 ++ } 1.3897 ++ bool rc = xmemcoll (copy_old, oldlen, copy_new, newlen); 1.3898 ++ free (copy_old); 1.3899 ++ free (copy_new); 1.3900 ++ return rc; 1.3901 + } 1.3902 +- else if (hard_LC_COLLATE) 1.3903 +- return xmemcoll (old, oldlen, new, newlen) != 0; 1.3904 + else 1.3905 +- return oldlen != newlen || memcmp (old, new, oldlen); 1.3906 ++ { 1.3907 ++ copy_old = (char *)old; 1.3908 ++ copy_new = (char *)new; 1.3909 ++ } 1.3910 ++ 1.3911 ++ return xmemcoll (copy_old, oldlen, copy_new, newlen); 1.3912 ++ 1.3913 + } 1.3914 + 1.3915 ++#if HAVE_MBRTOWC 1.3916 ++static int 1.3917 ++different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate) 1.3918 ++{ 1.3919 ++ size_t i, j, chars; 1.3920 ++ const char *str[2]; 1.3921 ++ char *copy[2]; 1.3922 ++ size_t len[2]; 1.3923 ++ mbstate_t state[2]; 1.3924 ++ size_t mblength; 1.3925 ++ wchar_t wc, uwc; 1.3926 ++ mbstate_t state_bak; 1.3927 ++ 1.3928 ++ str[0] = old; 1.3929 ++ str[1] = new; 1.3930 ++ len[0] = oldlen; 1.3931 ++ len[1] = newlen; 1.3932 ++ state[0] = oldstate; 1.3933 ++ state[1] = newstate; 1.3934 ++ 1.3935 ++ for (i = 0; i < 2; i++) 1.3936 ++ { 1.3937 ++ copy[i] = xmalloc (len[i] + 1); 1.3938 ++ memset (copy[i], '\0', len[i] + 1); 1.3939 ++ 1.3940 ++ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) 1.3941 ++ { 1.3942 ++ state_bak = state[i]; 1.3943 ++ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); 1.3944 ++ 1.3945 ++ switch (mblength) 1.3946 ++ { 1.3947 ++ case (size_t)-1: 1.3948 ++ case (size_t)-2: 1.3949 ++ state[i] = state_bak; 1.3950 ++ /* Fall through */ 1.3951 ++ case 0: 1.3952 ++ mblength = 1; 1.3953 ++ break; 1.3954 ++ 1.3955 ++ default: 1.3956 ++ if (ignore_case) 1.3957 ++ { 1.3958 ++ uwc = towupper (wc); 1.3959 ++ 1.3960 ++ if (uwc != wc) 1.3961 ++ { 1.3962 ++ mbstate_t state_wc; 1.3963 ++ size_t mblen; 1.3964 ++ 1.3965 ++ memset (&state_wc, '\0', sizeof(mbstate_t)); 1.3966 ++ mblen = wcrtomb (copy[i] + j, uwc, &state_wc); 1.3967 ++ assert (mblen != (size_t)-1); 1.3968 ++ } 1.3969 ++ else 1.3970 ++ memcpy (copy[i] + j, str[i] + j, mblength); 1.3971 ++ } 1.3972 ++ else 1.3973 ++ memcpy (copy[i] + j, str[i] + j, mblength); 1.3974 ++ } 1.3975 ++ j += mblength; 1.3976 ++ } 1.3977 ++ copy[i][j] = '\0'; 1.3978 ++ len[i] = j; 1.3979 ++ } 1.3980 ++ int rc = xmemcoll (copy[0], len[0], copy[1], len[1]); 1.3981 ++ free (copy[0]); 1.3982 ++ free (copy[1]); 1.3983 ++ return rc; 1.3984 ++ 1.3985 ++} 1.3986 ++#endif 1.3987 ++ 1.3988 + /* Output the line in linebuffer LINE to standard output 1.3989 + provided that the switches say it should be output. 1.3990 + MATCH is true if the line matches the previous line. 1.3991 +@@ -359,19 +554,38 @@ check_file (const char *infile, const ch 1.3992 + char *prevfield IF_LINT ( = NULL); 1.3993 + size_t prevlen IF_LINT ( = 0); 1.3994 + bool first_group_printed = false; 1.3995 ++#if HAVE_MBRTOWC 1.3996 ++ mbstate_t prevstate; 1.3997 ++ 1.3998 ++ memset (&prevstate, '\0', sizeof (mbstate_t)); 1.3999 ++#endif 1.4000 + 1.4001 + while (!feof (stdin)) 1.4002 + { 1.4003 + char *thisfield; 1.4004 + size_t thislen; 1.4005 + bool new_group; 1.4006 ++#if HAVE_MBRTOWC 1.4007 ++ mbstate_t thisstate; 1.4008 ++#endif 1.4009 + 1.4010 + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) 1.4011 + break; 1.4012 + 1.4013 + thisfield = find_field (thisline); 1.4014 + thislen = thisline->length - 1 - (thisfield - thisline->buffer); 1.4015 ++#if HAVE_MBRTOWC 1.4016 ++ if (MB_CUR_MAX > 1) 1.4017 ++ { 1.4018 ++ thisstate = thisline->state; 1.4019 + 1.4020 ++ new_group = (prevline->length == 0 1.4021 ++ || different_multi (thisfield, prevfield, 1.4022 ++ thislen, prevlen, 1.4023 ++ thisstate, prevstate)); 1.4024 ++ } 1.4025 ++ else 1.4026 ++#endif 1.4027 + new_group = (prevline->length == 0 1.4028 + || different (thisfield, prevfield, thislen, prevlen)); 1.4029 + 1.4030 +@@ -389,6 +603,10 @@ check_file (const char *infile, const ch 1.4031 + SWAP_LINES (prevline, thisline); 1.4032 + prevfield = thisfield; 1.4033 + prevlen = thislen; 1.4034 ++#if HAVE_MBRTOWC 1.4035 ++ if (MB_CUR_MAX > 1) 1.4036 ++ prevstate = thisstate; 1.4037 ++#endif 1.4038 + first_group_printed = true; 1.4039 + } 1.4040 + } 1.4041 +@@ -401,17 +619,26 @@ check_file (const char *infile, const ch 1.4042 + size_t prevlen; 1.4043 + uintmax_t match_count = 0; 1.4044 + bool first_delimiter = true; 1.4045 ++#if HAVE_MBRTOWC 1.4046 ++ mbstate_t prevstate; 1.4047 ++#endif 1.4048 + 1.4049 + if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) 1.4050 + goto closefiles; 1.4051 + prevfield = find_field (prevline); 1.4052 + prevlen = prevline->length - 1 - (prevfield - prevline->buffer); 1.4053 ++#if HAVE_MBRTOWC 1.4054 ++ prevstate = prevline->state; 1.4055 ++#endif 1.4056 + 1.4057 + while (!feof (stdin)) 1.4058 + { 1.4059 + bool match; 1.4060 + char *thisfield; 1.4061 + size_t thislen; 1.4062 ++#if HAVE_MBRTOWC 1.4063 ++ mbstate_t thisstate = thisline->state; 1.4064 ++#endif 1.4065 + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) 1.4066 + { 1.4067 + if (ferror (stdin)) 1.4068 +@@ -420,6 +647,14 @@ check_file (const char *infile, const ch 1.4069 + } 1.4070 + thisfield = find_field (thisline); 1.4071 + thislen = thisline->length - 1 - (thisfield - thisline->buffer); 1.4072 ++#if HAVE_MBRTOWC 1.4073 ++ if (MB_CUR_MAX > 1) 1.4074 ++ { 1.4075 ++ match = !different_multi (thisfield, prevfield, 1.4076 ++ thislen, prevlen, thisstate, prevstate); 1.4077 ++ } 1.4078 ++ else 1.4079 ++#endif 1.4080 + match = !different (thisfield, prevfield, thislen, prevlen); 1.4081 + match_count += match; 1.4082 + 1.4083 +@@ -452,6 +687,9 @@ check_file (const char *infile, const ch 1.4084 + SWAP_LINES (prevline, thisline); 1.4085 + prevfield = thisfield; 1.4086 + prevlen = thislen; 1.4087 ++#if HAVE_MBRTOWC 1.4088 ++ prevstate = thisstate; 1.4089 ++#endif 1.4090 + if (!match) 1.4091 + match_count = 0; 1.4092 + } 1.4093 +@@ -498,6 +736,19 @@ main (int argc, char **argv) 1.4094 + 1.4095 + atexit (close_stdout); 1.4096 + 1.4097 ++#if HAVE_MBRTOWC 1.4098 ++ if (MB_CUR_MAX > 1) 1.4099 ++ { 1.4100 ++ find_field = find_field_multi; 1.4101 ++ } 1.4102 ++ else 1.4103 ++#endif 1.4104 ++ { 1.4105 ++ find_field = find_field_uni; 1.4106 ++ } 1.4107 ++ 1.4108 ++ 1.4109 ++ 1.4110 + skip_chars = 0; 1.4111 + skip_fields = 0; 1.4112 + check_chars = SIZE_MAX; 1.4113 +diff -Naurp coreutils-8.25-orig/tests/i18n/sort-month.sh coreutils-8.25/tests/i18n/sort-month.sh 1.4114 +--- coreutils-8.25-orig/tests/i18n/sort-month.sh 1969-12-31 18:00:00.000000000 -0600 1.4115 ++++ coreutils-8.25/tests/i18n/sort-month.sh 2016-02-08 19:07:10.312944654 -0600 1.4116 +@@ -0,0 +1,34 @@ 1.4117 ++#!/bin/sh 1.4118 ++# Verify sort -M multi-byte support. 1.4119 ++ 1.4120 ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src 1.4121 ++print_ver_ sort 1.4122 ++require_valgrind_ 1.4123 ++ 1.4124 ++# Skip this test if some deallocations are 1.4125 ++# avoided at process end. 1.4126 ++grep '^#define lint 1' $CONFIG_HEADER > /dev/null || 1.4127 ++ skip_ 'Allocation checks only work reliably in "lint" mode' 1.4128 ++ 1.4129 ++export LC_ALL=en_US.UTF-8 1.4130 ++locale -k LC_CTYPE | grep -q "charmap.*UTF-8" \ 1.4131 ++ || skip_ "No UTF-8 locale available" 1.4132 ++ 1.4133 ++# Note the use of ɑ here which expands to 1.4134 ++# a wider representation upon case conversion 1.4135 ++# which triggered an assertion in sort -M 1.4136 ++cat <<EOF > exp 1.4137 ++. 1.4138 ++ɑ 1.4139 ++EOF 1.4140 ++ 1.4141 ++ 1.4142 ++# check large mem leak with --month-sort 1.4143 ++# https://bugzilla.redhat.com/show_bug.cgi?id=1259942 1.4144 ++valgrind --leak-check=full \ 1.4145 ++ --error-exitcode=1 --errors-for-leak-kinds=definite \ 1.4146 ++ sort -M < exp > out || fail=1 1.4147 ++compare exp out || { fail=1; cat out; } 1.4148 ++ 1.4149 ++ 1.4150 ++Exit $fail 1.4151 +diff -Naurp coreutils-8.25-orig/tests/i18n/sort.sh coreutils-8.25/tests/i18n/sort.sh 1.4152 +--- coreutils-8.25-orig/tests/i18n/sort.sh 1969-12-31 18:00:00.000000000 -0600 1.4153 ++++ coreutils-8.25/tests/i18n/sort.sh 2016-02-08 19:07:10.312944654 -0600 1.4154 +@@ -0,0 +1,29 @@ 1.4155 ++#!/bin/sh 1.4156 ++# Verify sort's multi-byte support. 1.4157 ++ 1.4158 ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src 1.4159 ++print_ver_ sort 1.4160 ++ 1.4161 ++export LC_ALL=en_US.UTF-8 1.4162 ++locale -k LC_CTYPE | grep -q "charmap.*UTF-8" \ 1.4163 ++ || skip_ "No UTF-8 locale available" 1.4164 ++ 1.4165 ++# Enable heap consistency checkng on older systems 1.4166 ++export MALLOC_CHECK_=2 1.4167 ++ 1.4168 ++ 1.4169 ++# check buffer overflow issue due to 1.4170 ++# expanding multi-byte representation due to case conversion 1.4171 ++# https://bugzilla.suse.com/show_bug.cgi?id=928749 1.4172 ++cat <<EOF > exp 1.4173 ++. 1.4174 ++ɑ 1.4175 ++EOF 1.4176 ++cat <<EOF | sort -f > out || fail=1 1.4177 ++. 1.4178 ++ɑ 1.4179 ++EOF 1.4180 ++compare exp out || { fail=1; cat out; } 1.4181 ++ 1.4182 ++ 1.4183 ++Exit $fail 1.4184 +diff -Naurp coreutils-8.25-orig/tests/local.mk coreutils-8.25/tests/local.mk 1.4185 +--- coreutils-8.25-orig/tests/local.mk 2016-01-16 12:18:13.000000000 -0600 1.4186 ++++ coreutils-8.25/tests/local.mk 2016-02-08 19:07:10.313944658 -0600 1.4187 +@@ -344,6 +344,9 @@ all_tests = \ 1.4188 + tests/misc/sort-discrim.sh \ 1.4189 + tests/misc/sort-files0-from.pl \ 1.4190 + tests/misc/sort-float.sh \ 1.4191 ++ tests/misc/sort-mb-tests.sh \ 1.4192 ++ tests/i18n/sort.sh \ 1.4193 ++ tests/i18n/sort-month.sh \ 1.4194 + tests/misc/sort-merge.pl \ 1.4195 + tests/misc/sort-merge-fdlimit.sh \ 1.4196 + tests/misc/sort-month.sh \ 1.4197 +diff -Naurp coreutils-8.25-orig/tests/misc/cut.pl coreutils-8.25/tests/misc/cut.pl 1.4198 +--- coreutils-8.25-orig/tests/misc/cut.pl 2016-01-16 12:18:13.000000000 -0600 1.4199 ++++ coreutils-8.25/tests/misc/cut.pl 2016-02-08 19:07:10.314944661 -0600 1.4200 +@@ -23,9 +23,11 @@ use strict; 1.4201 + # Turn off localization of executable's output. 1.4202 + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; 1.4203 + 1.4204 +-my $mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4205 ++my $mb_locale; 1.4206 ++# uncommented enable multibyte paths 1.4207 ++$mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4208 + ! defined $mb_locale || $mb_locale eq 'none' 1.4209 +- and $mb_locale = 'C'; 1.4210 ++ and $mb_locale = 'C'; 1.4211 + 1.4212 + my $prog = 'cut'; 1.4213 + my $try = "Try '$prog --help' for more information.\n"; 1.4214 +@@ -240,6 +242,7 @@ if ($mb_locale ne 'C') 1.4215 + my @new_t = @$t; 1.4216 + my $test_name = shift @new_t; 1.4217 + 1.4218 ++ next if ($test_name =~ "newline-[12][0-9]"); 1.4219 + push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4220 + } 1.4221 + push @Tests, @new; 1.4222 +diff -Naurp coreutils-8.25-orig/tests/misc/expand.pl coreutils-8.25/tests/misc/expand.pl 1.4223 +--- coreutils-8.25-orig/tests/misc/expand.pl 2016-01-16 12:18:13.000000000 -0600 1.4224 ++++ coreutils-8.25/tests/misc/expand.pl 2016-02-08 19:07:10.314944661 -0600 1.4225 +@@ -23,6 +23,15 @@ use strict; 1.4226 + # Turn off localization of executable's output. 1.4227 + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; 1.4228 + 1.4229 ++#comment out next line to disable multibyte tests 1.4230 ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4231 ++! defined $mb_locale || $mb_locale eq 'none' 1.4232 ++ and $mb_locale = 'C'; 1.4233 ++ 1.4234 ++my $prog = 'expand'; 1.4235 ++my $try = "Try \`$prog --help' for more information.\n"; 1.4236 ++my $inval = "$prog: invalid byte, character or field list\n$try"; 1.4237 ++ 1.4238 + my @Tests = 1.4239 + ( 1.4240 + ['t1', '--tabs=3', {IN=>"a\tb"}, {OUT=>"a b"}], 1.4241 +@@ -31,6 +40,37 @@ my @Tests = 1.4242 + ['i2', '--tabs=3 -i', {IN=>" \ta\tb"}, {OUT=>" a\tb"}], 1.4243 + ); 1.4244 + 1.4245 ++if ($mb_locale ne 'C') 1.4246 ++ { 1.4247 ++ # Duplicate each test vector, appending "-mb" to the test name and 1.4248 ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we 1.4249 ++ # provide coverage for the distro-added multi-byte code paths. 1.4250 ++ my @new; 1.4251 ++ foreach my $t (@Tests) 1.4252 ++ { 1.4253 ++ my @new_t = @$t; 1.4254 ++ my $test_name = shift @new_t; 1.4255 ++ 1.4256 ++ # Depending on whether expand is multi-byte-patched, 1.4257 ++ # it emits different diagnostics: 1.4258 ++ # non-MB: invalid byte or field list 1.4259 ++ # MB: invalid byte, character or field list 1.4260 ++ # Adjust the expected error output accordingly. 1.4261 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} 1.4262 ++ (@new_t)) 1.4263 ++ { 1.4264 ++ my $sub = {ERR_SUBST => 's/, character//'}; 1.4265 ++ push @new_t, $sub; 1.4266 ++ push @$t, $sub; 1.4267 ++ } 1.4268 ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4269 ++ } 1.4270 ++ push @Tests, @new; 1.4271 ++ } 1.4272 ++ 1.4273 ++ 1.4274 ++@Tests = triple_test \@Tests; 1.4275 ++ 1.4276 + my $save_temps = $ENV{DEBUG}; 1.4277 + my $verbose = $ENV{VERBOSE}; 1.4278 + 1.4279 +diff -Naurp coreutils-8.25-orig/tests/misc/fold.pl coreutils-8.25/tests/misc/fold.pl 1.4280 +--- coreutils-8.25-orig/tests/misc/fold.pl 2016-01-16 12:18:13.000000000 -0600 1.4281 ++++ coreutils-8.25/tests/misc/fold.pl 2016-02-08 19:07:10.314944661 -0600 1.4282 +@@ -20,9 +20,18 @@ use strict; 1.4283 + 1.4284 + (my $program_name = $0) =~ s|.*/||; 1.4285 + 1.4286 ++my $prog = 'fold'; 1.4287 ++my $try = "Try \`$prog --help' for more information.\n"; 1.4288 ++my $inval = "$prog: invalid byte, character or field list\n$try"; 1.4289 ++ 1.4290 + # Turn off localization of executable's output. 1.4291 + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; 1.4292 + 1.4293 ++# uncommented to enable multibyte paths 1.4294 ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4295 ++! defined $mb_locale || $mb_locale eq 'none' 1.4296 ++ and $mb_locale = 'C'; 1.4297 ++ 1.4298 + my @Tests = 1.4299 + ( 1.4300 + ['s1', '-w2 -s', {IN=>"a\t"}, {OUT=>"a\n\t"}], 1.4301 +@@ -31,9 +40,48 @@ my @Tests = 1.4302 + ['s4', '-w4 -s', {IN=>"abc ef\n"}, {OUT=>"abc \nef\n"}], 1.4303 + ); 1.4304 + 1.4305 ++# Add _POSIX2_VERSION=199209 to the environment of each test 1.4306 ++# that uses an old-style option like +1. 1.4307 ++if ($mb_locale ne 'C') 1.4308 ++ { 1.4309 ++ # Duplicate each test vector, appending "-mb" to the test name and 1.4310 ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we 1.4311 ++ # provide coverage for the distro-added multi-byte code paths. 1.4312 ++ my @new; 1.4313 ++ foreach my $t (@Tests) 1.4314 ++ { 1.4315 ++ my @new_t = @$t; 1.4316 ++ my $test_name = shift @new_t; 1.4317 ++ 1.4318 ++ # Depending on whether fold is multi-byte-patched, 1.4319 ++ # it emits different diagnostics: 1.4320 ++ # non-MB: invalid byte or field list 1.4321 ++ # MB: invalid byte, character or field list 1.4322 ++ # Adjust the expected error output accordingly. 1.4323 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} 1.4324 ++ (@new_t)) 1.4325 ++ { 1.4326 ++ my $sub = {ERR_SUBST => 's/, character//'}; 1.4327 ++ push @new_t, $sub; 1.4328 ++ push @$t, $sub; 1.4329 ++ } 1.4330 ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4331 ++ } 1.4332 ++ push @Tests, @new; 1.4333 ++ } 1.4334 ++ 1.4335 ++@Tests = triple_test \@Tests; 1.4336 ++ 1.4337 ++# Remember that triple_test creates from each test with exactly one "IN" 1.4338 ++# file two more tests (.p and .r suffix on name) corresponding to reading 1.4339 ++# input from a file and from a pipe. The pipe-reading test would fail 1.4340 ++# due to a race condition about 1 in 20 times. 1.4341 ++# Remove the IN_PIPE version of the "output-is-input" test above. 1.4342 ++# The others aren't susceptible because they have three inputs each. 1.4343 ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; 1.4344 ++ 1.4345 + my $save_temps = $ENV{DEBUG}; 1.4346 + my $verbose = $ENV{VERBOSE}; 1.4347 + 1.4348 +-my $prog = 'fold'; 1.4349 + my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); 1.4350 + exit $fail; 1.4351 +diff -Naurp coreutils-8.25-orig/tests/misc/join.pl coreutils-8.25/tests/misc/join.pl 1.4352 +--- coreutils-8.25-orig/tests/misc/join.pl 2016-01-16 12:18:13.000000000 -0600 1.4353 ++++ coreutils-8.25/tests/misc/join.pl 2016-02-08 19:07:10.315944664 -0600 1.4354 +@@ -25,6 +25,15 @@ my $limits = getlimits (); 1.4355 + 1.4356 + my $prog = 'join'; 1.4357 + 1.4358 ++my $try = "Try \`$prog --help' for more information.\n"; 1.4359 ++my $inval = "$prog: invalid byte, character or field list\n$try"; 1.4360 ++ 1.4361 ++my $mb_locale; 1.4362 ++#Comment out next line to disable multibyte tests 1.4363 ++$mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4364 ++! defined $mb_locale || $mb_locale eq 'none' 1.4365 ++ and $mb_locale = 'C'; 1.4366 ++ 1.4367 + my $delim = chr 0247; 1.4368 + sub t_subst ($) 1.4369 + { 1.4370 +@@ -329,8 +338,49 @@ foreach my $t (@tv) 1.4371 + push @Tests, $new_ent; 1.4372 + } 1.4373 + 1.4374 ++# Add _POSIX2_VERSION=199209 to the environment of each test 1.4375 ++# that uses an old-style option like +1. 1.4376 ++if ($mb_locale ne 'C') 1.4377 ++ { 1.4378 ++ # Duplicate each test vector, appending "-mb" to the test name and 1.4379 ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we 1.4380 ++ # provide coverage for the distro-added multi-byte code paths. 1.4381 ++ my @new; 1.4382 ++ foreach my $t (@Tests) 1.4383 ++ { 1.4384 ++ my @new_t = @$t; 1.4385 ++ my $test_name = shift @new_t; 1.4386 ++ 1.4387 ++ # Depending on whether join is multi-byte-patched, 1.4388 ++ # it emits different diagnostics: 1.4389 ++ # non-MB: invalid byte or field list 1.4390 ++ # MB: invalid byte, character or field list 1.4391 ++ # Adjust the expected error output accordingly. 1.4392 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} 1.4393 ++ (@new_t)) 1.4394 ++ { 1.4395 ++ my $sub = {ERR_SUBST => 's/, character//'}; 1.4396 ++ push @new_t, $sub; 1.4397 ++ push @$t, $sub; 1.4398 ++ } 1.4399 ++ #Adjust the output some error messages including test_name for mb 1.4400 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR}} 1.4401 ++ (@new_t)) 1.4402 ++ { 1.4403 ++ my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"}; 1.4404 ++ push @new_t, $sub2; 1.4405 ++ push @$t, $sub2; 1.4406 ++ } 1.4407 ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4408 ++ } 1.4409 ++ push @Tests, @new; 1.4410 ++ } 1.4411 ++ 1.4412 + @Tests = triple_test \@Tests; 1.4413 + 1.4414 ++#skip invalid-j-mb test, it is failing because of the format 1.4415 ++@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests; 1.4416 ++ 1.4417 + my $save_temps = $ENV{DEBUG}; 1.4418 + my $verbose = $ENV{VERBOSE}; 1.4419 + 1.4420 +diff -Naurp coreutils-8.25-orig/tests/misc/sort-mb-tests.sh coreutils-8.25/tests/misc/sort-mb-tests.sh 1.4421 +--- coreutils-8.25-orig/tests/misc/sort-mb-tests.sh 1969-12-31 18:00:00.000000000 -0600 1.4422 ++++ coreutils-8.25/tests/misc/sort-mb-tests.sh 2016-02-08 19:07:10.315944664 -0600 1.4423 +@@ -0,0 +1,45 @@ 1.4424 ++#!/bin/sh 1.4425 ++# Verify sort's multi-byte support. 1.4426 ++ 1.4427 ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src 1.4428 ++print_ver_ sort 1.4429 ++ 1.4430 ++export LC_ALL=en_US.UTF-8 1.4431 ++locale -k LC_CTYPE | grep -q "charmap.*UTF-8" \ 1.4432 ++ || skip_ "No UTF-8 locale available" 1.4433 ++ 1.4434 ++ 1.4435 ++cat <<EOF > exp 1.4436 ++Banana@5 1.4437 ++Apple@10 1.4438 ++Citrus@20 1.4439 ++Cherry@30 1.4440 ++EOF 1.4441 ++ 1.4442 ++cat <<EOF | sort -t @ -k2 -n > out || fail=1 1.4443 ++Apple@10 1.4444 ++Banana@5 1.4445 ++Citrus@20 1.4446 ++Cherry@30 1.4447 ++EOF 1.4448 ++ 1.4449 ++compare exp out || { fail=1; cat out; } 1.4450 ++ 1.4451 ++ 1.4452 ++cat <<EOF > exp 1.4453 ++Citrus@AA20@@5 1.4454 ++Cherry@AA30@@10 1.4455 ++Apple@AA10@@20 1.4456 ++Banana@AA5@@30 1.4457 ++EOF 1.4458 ++ 1.4459 ++cat <<EOF | sort -t @ -k4 -n > out || fail=1 1.4460 ++Apple@AA10@@20 1.4461 ++Banana@AA5@@30 1.4462 ++Citrus@AA20@@5 1.4463 ++Cherry@AA30@@10 1.4464 ++EOF 1.4465 ++ 1.4466 ++compare exp out || { fail=1; cat out; } 1.4467 ++ 1.4468 ++Exit $fail 1.4469 +diff -Naurp coreutils-8.25-orig/tests/misc/sort-merge.pl coreutils-8.25/tests/misc/sort-merge.pl 1.4470 +--- coreutils-8.25-orig/tests/misc/sort-merge.pl 2016-01-16 12:18:14.000000000 -0600 1.4471 ++++ coreutils-8.25/tests/misc/sort-merge.pl 2016-02-08 19:07:10.316944667 -0600 1.4472 +@@ -26,6 +26,15 @@ my $prog = 'sort'; 1.4473 + # Turn off localization of executable's output. 1.4474 + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; 1.4475 + 1.4476 ++my $mb_locale; 1.4477 ++# uncommented according to upstream commit enabling multibyte paths 1.4478 ++$mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4479 ++! defined $mb_locale || $mb_locale eq 'none' 1.4480 ++ and $mb_locale = 'C'; 1.4481 ++ 1.4482 ++my $try = "Try \`$prog --help' for more information.\n"; 1.4483 ++my $inval = "$prog: invalid byte, character or field list\n$try"; 1.4484 ++ 1.4485 + # three empty files and one that says 'foo' 1.4486 + my @inputs = (+(map{{IN=> {"empty$_"=> ''}}}1..3), {IN=> {foo=> "foo\n"}}); 1.4487 + 1.4488 +@@ -77,6 +86,39 @@ my @Tests = 1.4489 + {OUT=>$big_input}], 1.4490 + ); 1.4491 + 1.4492 ++# Add _POSIX2_VERSION=199209 to the environment of each test 1.4493 ++# that uses an old-style option like +1. 1.4494 ++if ($mb_locale ne 'C') 1.4495 ++ { 1.4496 ++ # Duplicate each test vector, appending "-mb" to the test name and 1.4497 ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we 1.4498 ++ # provide coverage for the distro-added multi-byte code paths. 1.4499 ++ my @new; 1.4500 ++ foreach my $t (@Tests) 1.4501 ++ { 1.4502 ++ my @new_t = @$t; 1.4503 ++ my $test_name = shift @new_t; 1.4504 ++ 1.4505 ++ # Depending on whether sort is multi-byte-patched, 1.4506 ++ # it emits different diagnostics: 1.4507 ++ # non-MB: invalid byte or field list 1.4508 ++ # MB: invalid byte, character or field list 1.4509 ++ # Adjust the expected error output accordingly. 1.4510 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} 1.4511 ++ (@new_t)) 1.4512 ++ { 1.4513 ++ my $sub = {ERR_SUBST => 's/, character//'}; 1.4514 ++ push @new_t, $sub; 1.4515 ++ push @$t, $sub; 1.4516 ++ } 1.4517 ++ next if ($test_name =~ "nmerge-."); 1.4518 ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4519 ++ } 1.4520 ++ push @Tests, @new; 1.4521 ++ } 1.4522 ++ 1.4523 ++@Tests = triple_test \@Tests; 1.4524 ++ 1.4525 + my $save_temps = $ENV{DEBUG}; 1.4526 + my $verbose = $ENV{VERBOSE}; 1.4527 + 1.4528 +diff -Naurp coreutils-8.25-orig/tests/misc/sort.pl coreutils-8.25/tests/misc/sort.pl 1.4529 +--- coreutils-8.25-orig/tests/misc/sort.pl 2016-01-16 12:18:14.000000000 -0600 1.4530 ++++ coreutils-8.25/tests/misc/sort.pl 2016-02-08 19:07:10.316944667 -0600 1.4531 +@@ -24,10 +24,15 @@ my $prog = 'sort'; 1.4532 + # Turn off localization of executable's output. 1.4533 + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; 1.4534 + 1.4535 +-my $mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4536 ++my $mb_locale; 1.4537 ++#Comment out next line to disable multibyte tests 1.4538 ++$mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4539 + ! defined $mb_locale || $mb_locale eq 'none' 1.4540 + and $mb_locale = 'C'; 1.4541 + 1.4542 ++my $try = "Try \`$prog --help' for more information.\n"; 1.4543 ++my $inval = "$prog: invalid byte, character or field list\n$try"; 1.4544 ++ 1.4545 + # Since each test is run with a file name and with redirected stdin, 1.4546 + # the name in the diagnostic is either the file name or "-". 1.4547 + # Normalize each diagnostic to use '-'. 1.4548 +@@ -424,6 +429,38 @@ foreach my $t (@Tests) 1.4549 + } 1.4550 + } 1.4551 + 1.4552 ++if ($mb_locale ne 'C') 1.4553 ++ { 1.4554 ++ # Duplicate each test vector, appending "-mb" to the test name and 1.4555 ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we 1.4556 ++ # provide coverage for the distro-added multi-byte code paths. 1.4557 ++ my @new; 1.4558 ++ foreach my $t (@Tests) 1.4559 ++ { 1.4560 ++ my @new_t = @$t; 1.4561 ++ my $test_name = shift @new_t; 1.4562 ++ 1.4563 ++ # Depending on whether sort is multi-byte-patched, 1.4564 ++ # it emits different diagnostics: 1.4565 ++ # non-MB: invalid byte or field list 1.4566 ++ # MB: invalid byte, character or field list 1.4567 ++ # Adjust the expected error output accordingly. 1.4568 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} 1.4569 ++ (@new_t)) 1.4570 ++ { 1.4571 ++ my $sub = {ERR_SUBST => 's/, character//'}; 1.4572 ++ push @new_t, $sub; 1.4573 ++ push @$t, $sub; 1.4574 ++ } 1.4575 ++ #disable several failing tests until investigation, disable all tests with envvars set 1.4576 ++ next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t)); 1.4577 ++ next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1" or $test_name =~ "2[01]a"); 1.4578 ++ next if ($test_name =~ "11[ab]"); # avoid FP: expected result differs to MB result due to collation rules. 1.4579 ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4580 ++ } 1.4581 ++ push @Tests, @new; 1.4582 ++ } 1.4583 ++ 1.4584 + @Tests = triple_test \@Tests; 1.4585 + 1.4586 + # Remember that triple_test creates from each test with exactly one "IN" 1.4587 +@@ -433,6 +470,7 @@ foreach my $t (@Tests) 1.4588 + # Remove the IN_PIPE version of the "output-is-input" test above. 1.4589 + # The others aren't susceptible because they have three inputs each. 1.4590 + @Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; 1.4591 ++@Tests = grep {$_->[0] ne 'output-is-input-mb.p'} @Tests; 1.4592 + 1.4593 + my $save_temps = $ENV{DEBUG}; 1.4594 + my $verbose = $ENV{VERBOSE}; 1.4595 +diff -Naurp coreutils-8.25-orig/tests/misc/unexpand.pl coreutils-8.25/tests/misc/unexpand.pl 1.4596 +--- coreutils-8.25-orig/tests/misc/unexpand.pl 2016-01-16 12:18:14.000000000 -0600 1.4597 ++++ coreutils-8.25/tests/misc/unexpand.pl 2016-02-08 19:07:10.317944671 -0600 1.4598 +@@ -27,6 +27,14 @@ my $limits = getlimits (); 1.4599 + 1.4600 + my $prog = 'unexpand'; 1.4601 + 1.4602 ++# comment out next line to disable multibyte tests 1.4603 ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4604 ++! defined $mb_locale || $mb_locale eq 'none' 1.4605 ++ and $mb_locale = 'C'; 1.4606 ++ 1.4607 ++my $try = "Try \`$prog --help' for more information.\n"; 1.4608 ++my $inval = "$prog: invalid byte, character or field list\n$try"; 1.4609 ++ 1.4610 + my @Tests = 1.4611 + ( 1.4612 + ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], 1.4613 +@@ -92,6 +100,37 @@ my @Tests = 1.4614 + {EXIT => 1}, {ERR => "$prog: tab stop value is too large\n"}], 1.4615 + ); 1.4616 + 1.4617 ++if ($mb_locale ne 'C') 1.4618 ++ { 1.4619 ++ # Duplicate each test vector, appending "-mb" to the test name and 1.4620 ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we 1.4621 ++ # provide coverage for the distro-added multi-byte code paths. 1.4622 ++ my @new; 1.4623 ++ foreach my $t (@Tests) 1.4624 ++ { 1.4625 ++ my @new_t = @$t; 1.4626 ++ my $test_name = shift @new_t; 1.4627 ++ 1.4628 ++ # Depending on whether unexpand is multi-byte-patched, 1.4629 ++ # it emits different diagnostics: 1.4630 ++ # non-MB: invalid byte or field list 1.4631 ++ # MB: invalid byte, character or field list 1.4632 ++ # Adjust the expected error output accordingly. 1.4633 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} 1.4634 ++ (@new_t)) 1.4635 ++ { 1.4636 ++ my $sub = {ERR_SUBST => 's/, character//'}; 1.4637 ++ push @new_t, $sub; 1.4638 ++ push @$t, $sub; 1.4639 ++ } 1.4640 ++ next if ($test_name =~ 'b-1'); 1.4641 ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4642 ++ } 1.4643 ++ push @Tests, @new; 1.4644 ++ } 1.4645 ++ 1.4646 ++@Tests = triple_test \@Tests; 1.4647 ++ 1.4648 + my $save_temps = $ENV{DEBUG}; 1.4649 + my $verbose = $ENV{VERBOSE}; 1.4650 + 1.4651 +diff -Naurp coreutils-8.25-orig/tests/misc/uniq.pl coreutils-8.25/tests/misc/uniq.pl 1.4652 +--- coreutils-8.25-orig/tests/misc/uniq.pl 2016-01-16 12:18:14.000000000 -0600 1.4653 ++++ coreutils-8.25/tests/misc/uniq.pl 2016-02-08 19:07:10.317944671 -0600 1.4654 +@@ -23,9 +23,17 @@ my $limits = getlimits (); 1.4655 + my $prog = 'uniq'; 1.4656 + my $try = "Try '$prog --help' for more information.\n"; 1.4657 + 1.4658 ++my $inval = "$prog: invalid byte, character or field list\n$try"; 1.4659 ++ 1.4660 + # Turn off localization of executable's output. 1.4661 + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; 1.4662 + 1.4663 ++my $mb_locale; 1.4664 ++#Comment out next line to disable multibyte tests 1.4665 ++$mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4666 ++! defined $mb_locale || $mb_locale eq 'none' 1.4667 ++ and $mb_locale = 'C'; 1.4668 ++ 1.4669 + # When possible, create a "-z"-testing variant of each test. 1.4670 + sub add_z_variants($) 1.4671 + { 1.4672 +@@ -262,6 +270,53 @@ foreach my $t (@Tests) 1.4673 + and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; 1.4674 + } 1.4675 + 1.4676 ++if ($mb_locale ne 'C') 1.4677 ++ { 1.4678 ++ # Duplicate each test vector, appending "-mb" to the test name and 1.4679 ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we 1.4680 ++ # provide coverage for the distro-added multi-byte code paths. 1.4681 ++ my @new; 1.4682 ++ foreach my $t (@Tests) 1.4683 ++ { 1.4684 ++ my @new_t = @$t; 1.4685 ++ my $test_name = shift @new_t; 1.4686 ++ 1.4687 ++ # Depending on whether uniq is multi-byte-patched, 1.4688 ++ # it emits different diagnostics: 1.4689 ++ # non-MB: invalid byte or field list 1.4690 ++ # MB: invalid byte, character or field list 1.4691 ++ # Adjust the expected error output accordingly. 1.4692 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} 1.4693 ++ (@new_t)) 1.4694 ++ { 1.4695 ++ my $sub = {ERR_SUBST => 's/, character//'}; 1.4696 ++ push @new_t, $sub; 1.4697 ++ push @$t, $sub; 1.4698 ++ } 1.4699 ++ # In test #145, replace the each ‘...’ by '...'. 1.4700 ++ if ($test_name =~ "145") 1.4701 ++ { 1.4702 ++ my $sub = { ERR_SUBST => "s/‘([^’]+)’/'\$1'/g"}; 1.4703 ++ push @new_t, $sub; 1.4704 ++ push @$t, $sub; 1.4705 ++ } 1.4706 ++ next if ( $test_name =~ "schar" 1.4707 ++ or $test_name =~ "^obs-plus" 1.4708 ++ or $test_name =~ "119"); 1.4709 ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4710 ++ } 1.4711 ++ push @Tests, @new; 1.4712 ++ } 1.4713 ++ 1.4714 ++# Remember that triple_test creates from each test with exactly one "IN" 1.4715 ++# file two more tests (.p and .r suffix on name) corresponding to reading 1.4716 ++# input from a file and from a pipe. The pipe-reading test would fail 1.4717 ++# due to a race condition about 1 in 20 times. 1.4718 ++# Remove the IN_PIPE version of the "output-is-input" test above. 1.4719 ++# The others aren't susceptible because they have three inputs each. 1.4720 ++ 1.4721 ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; 1.4722 ++ 1.4723 + @Tests = add_z_variants \@Tests; 1.4724 + @Tests = triple_test \@Tests; 1.4725 + 1.4726 +diff -Naurp coreutils-8.25-orig/tests/pr/pr-tests.pl coreutils-8.25/tests/pr/pr-tests.pl 1.4727 +--- coreutils-8.25-orig/tests/pr/pr-tests.pl 2016-01-16 12:18:14.000000000 -0600 1.4728 ++++ coreutils-8.25/tests/pr/pr-tests.pl 2016-02-08 19:07:10.318944674 -0600 1.4729 +@@ -24,6 +24,15 @@ use strict; 1.4730 + my $prog = 'pr'; 1.4731 + my $normalize_strerror = "s/': .*/'/"; 1.4732 + 1.4733 ++my $mb_locale; 1.4734 ++#Uncomment the following line to enable multibyte tests 1.4735 ++$mb_locale = $ENV{LOCALE_FR_UTF8}; 1.4736 ++! defined $mb_locale || $mb_locale eq 'none' 1.4737 ++ and $mb_locale = 'C'; 1.4738 ++ 1.4739 ++my $try = "Try \`$prog --help' for more information.\n"; 1.4740 ++my $inval = "$prog: invalid byte, character or field list\n$try"; 1.4741 ++ 1.4742 + my @tv = ( 1.4743 + 1.4744 + # -b option is no longer an official option. But it's still working to 1.4745 +@@ -467,8 +476,48 @@ push @Tests, 1.4746 + {IN=>{3=>"x\ty\tz\n"}}, 1.4747 + {OUT=>join("\t", qw(a b c m n o x y z)) . "\n"} ]; 1.4748 + 1.4749 ++# Add _POSIX2_VERSION=199209 to the environment of each test 1.4750 ++# that uses an old-style option like +1. 1.4751 ++if ($mb_locale ne 'C') 1.4752 ++ { 1.4753 ++ # Duplicate each test vector, appending "-mb" to the test name and 1.4754 ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we 1.4755 ++ # provide coverage for the distro-added multi-byte code paths. 1.4756 ++ my @new; 1.4757 ++ foreach my $t (@Tests) 1.4758 ++ { 1.4759 ++ my @new_t = @$t; 1.4760 ++ my $test_name = shift @new_t; 1.4761 ++ 1.4762 ++ # Depending on whether pr is multi-byte-patched, 1.4763 ++ # it emits different diagnostics: 1.4764 ++ # non-MB: invalid byte or field list 1.4765 ++ # MB: invalid byte, character or field list 1.4766 ++ # Adjust the expected error output accordingly. 1.4767 ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} 1.4768 ++ (@new_t)) 1.4769 ++ { 1.4770 ++ my $sub = {ERR_SUBST => 's/, character//'}; 1.4771 ++ push @new_t, $sub; 1.4772 ++ push @$t, $sub; 1.4773 ++ } 1.4774 ++ #temporarily skip some failing tests 1.4775 ++ next if ($test_name =~ "col-0" or $test_name =~ "col-inval"); 1.4776 ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; 1.4777 ++ } 1.4778 ++ push @Tests, @new; 1.4779 ++ } 1.4780 ++ 1.4781 + @Tests = triple_test \@Tests; 1.4782 + 1.4783 ++# Remember that triple_test creates from each test with exactly one "IN" 1.4784 ++# file two more tests (.p and .r suffix on name) corresponding to reading 1.4785 ++# input from a file and from a pipe. The pipe-reading test would fail 1.4786 ++# due to a race condition about 1 in 20 times. 1.4787 ++# Remove the IN_PIPE version of the "output-is-input" test above. 1.4788 ++# The others aren't susceptible because they have three inputs each. 1.4789 ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; 1.4790 ++ 1.4791 + my $save_temps = $ENV{DEBUG}; 1.4792 + my $verbose = $ENV{VERBOSE}; 1.4793 +