cookutils rev 846

cook: add normalize_mo(); update doc/cookopts.txt; tiny edits.
author Aleksej Bobylev <al.bobylev@gmail.com>
date Mon Dec 12 05:56:11 2016 +0200 (2016-12-12)
parents a3a05fc499d0
children 30c6b6064a50
files cook doc/cookopts.txt
line diff
     1.1 --- a/cook	Sun Dec 11 15:46:38 2016 +0100
     1.2 +++ b/cook	Mon Dec 12 05:56:11 2016 +0200
     1.3 @@ -629,7 +629,7 @@
     1.4  	[ ! -d "$fs/usr/share/locale" ] && return
     1.5  	[ -z "$(find $fs/usr/share/locale -type f -name '*.mo')" ] && return
     1.6  
     1.7 -	action 'Stripping translations files...'
     1.8 +	action 'Thin out translation files...'
     1.9  	local size0=$(find $fs/usr/share/locale -type f -name '*.mo' -exec ls -l \{\} \; | awk '{s+=$5}END{print s}')
    1.10  	local time0=$(date +%s)
    1.11  
    1.12 @@ -660,6 +660,136 @@
    1.13  }
    1.14  
    1.15  
    1.16 +# Normalize all *.mo files: unconditionally convert to UTF-8; remove strings that are really not add
    1.17 +# translation (msgid = msgstr)
    1.18 +# Normalization can be disabled with COOKOPTS="!monorm"
    1.19 +
    1.20 +normalize_mo() {
    1.21 +	[ "${COOKOPTS/!monorm/}" != "$COOKOPTS" ] && return
    1.22 +	[ -z "$(find $install -type f -name '*.mo')" ] && return
    1.23 +
    1.24 +	action 'Normalizing mo files...'
    1.25 +	local size0=$(find $install -type f -name '*.mo' -exec ls -l \{\} \; | awk '{s+=$5}END{print s}')
    1.26 +	local time0=$(date +%s)
    1.27 +
    1.28 +	# Gettext functions: msgunfmt, msguniq, msgconv, msgfmt
    1.29 +	tazpkg -gi gettext      --quiet
    1.30 +	# Gconv modules (convert to UTF-8)
    1.31 +	tazpkg -gi glibc-locale --quiet
    1.32 +
    1.33 +	# Process all existing *.mo files
    1.34 +	for mo in $(find "$install" -type f -name '*.mo'); do
    1.35 +		tmpfile="$(mktemp)"
    1.36 +
    1.37 +		msgunfmt "$mo" | msguniq | msgconv -o "$tmpfile" -t 'UTF-8'
    1.38 +		# add newline
    1.39 +		echo >> "$tmpfile"
    1.40 +
    1.41 +		# get Plural-Forms
    1.42 +		awk '
    1.43 +		BEGIN { skip = ""; }
    1.44 +		{
    1.45 +			if (! skip) {
    1.46 +				s = $0;
    1.47 +				gsub(/^[^\"]*\"/, "", s);
    1.48 +				gsub(/\"$/, "", s);
    1.49 +				printf("%s", s);
    1.50 +			}
    1.51 +			if (! $0) skip = "yes";
    1.52 +		}
    1.53 +		' "$tmpfile" | sed 's|\\n|\n|g' | grep "^Plural-Forms:" > "$tmpfile.pf"
    1.54 +
    1.55 +		if ! grep -q 'msgid_plural' "$tmpfile"; then
    1.56 +			echo > "$tmpfile.pf"
    1.57 +		fi
    1.58 +
    1.59 +		# main
    1.60 +		awk -v pf="$(cat "$tmpfile.pf")" '
    1.61 +		function clean() {
    1.62 +			mode = msgctxt = msgid = msgid_plural = msgstr = msgstr0 = msgstr1 = msgstr2 = msgstr3 = msgstr4 = msgstr5 = "";
    1.63 +		}
    1.64 +
    1.65 +		function getstring() {
    1.66 +			# Skip unquoted words at the beginning (msgid, msgstr...) and get string from inside quotes
    1.67 +			s = $0;
    1.68 +			gsub(/^[^\"]*\"/, "", s);
    1.69 +			gsub(/\"$/, "", s);
    1.70 +			return s;
    1.71 +		}
    1.72 +
    1.73 +		BEGIN {
    1.74 +			printf("msgid \"\"\nmsgstr \"\"\n\"Content-Type: text/plain; charset=UTF-8\\n\"\n");
    1.75 +			if (pf)
    1.76 +				printf("\"%s\\n\"\n", pf);
    1.77 +			printf("\n");
    1.78 +			skip = 1;
    1.79 +			clean();
    1.80 +		}
    1.81 +
    1.82 +		{
    1.83 +			# Skip the entire header
    1.84 +			if (!skip) {
    1.85 +				if ($1 == "msgctxt" || $1 == "msgid" || $1 == "msgstr" || $1 == "msgid_plural")
    1.86 +					mode = $1;
    1.87 +				if ($1 == "msgstr[0]") mode = "msgstr0";
    1.88 +				if ($1 == "msgstr[1]") mode = "msgstr1";
    1.89 +				if ($1 == "msgstr[2]") mode = "msgstr2";
    1.90 +				if ($1 == "msgstr[3]") mode = "msgstr3";
    1.91 +				if ($1 == "msgstr[4]") mode = "msgstr4";
    1.92 +				if ($1 == "msgstr[5]") mode = "msgstr5";
    1.93 +
    1.94 +				if (mode == "msgctxt")      msgctxt      = msgctxt      getstring();
    1.95 +				if (mode == "msgid")        msgid        = msgid        getstring();
    1.96 +				if (mode == "msgstr")       msgstr       = msgstr       getstring();
    1.97 +				if (mode == "msgid_plural") msgid_plural = msgid_plural getstring();
    1.98 +				if (mode == "msgstr0")      msgstr0      = msgstr0      getstring();
    1.99 +				if (mode == "msgstr1")      msgstr1      = msgstr1      getstring();
   1.100 +				if (mode == "msgstr2")      msgstr2      = msgstr2      getstring();
   1.101 +				if (mode == "msgstr3")      msgstr3      = msgstr3      getstring();
   1.102 +				if (mode == "msgstr4")      msgstr4      = msgstr4      getstring();
   1.103 +				if (mode == "msgstr5")      msgstr5      = msgstr5      getstring();
   1.104 +
   1.105 +				if (! $0) {
   1.106 +					if (msgid != msgstr) {
   1.107 +						if (msgctxt)      printf("msgctxt \"%s\"\n",      msgctxt);
   1.108 +						                  printf("msgid \"%s\"\n",        msgid);
   1.109 +						if (msgid_plural) printf("msgid_plural \"%s\"\n", msgid_plural);
   1.110 +						if (msgstr)       printf("msgstr \"%s\"\n",       msgstr);
   1.111 +						if (msgstr0)      printf("msgstr[0] \"%s\"\n",    msgstr0);
   1.112 +						if (msgstr1)      printf("msgstr[1] \"%s\"\n",    msgstr1);
   1.113 +						if (msgstr2)      printf("msgstr[2] \"%s\"\n",    msgstr2);
   1.114 +						if (msgstr3)      printf("msgstr[3] \"%s\"\n",    msgstr3);
   1.115 +						if (msgstr4)      printf("msgstr[4] \"%s\"\n",    msgstr4);
   1.116 +						if (msgstr5)      printf("msgstr[5] \"%s\"\n",    msgstr5);
   1.117 +						                  printf("\n");
   1.118 +					}
   1.119 +					clean();
   1.120 +				}
   1.121 +			}
   1.122 +			if ($0 == "") skip = "";
   1.123 +		}
   1.124 +		' "$tmpfile" > "$tmpfile.awk"
   1.125 +
   1.126 +		msgfmt "$tmpfile.awk" -o "$tmpfile.mo"
   1.127 +
   1.128 +		if [ -s "$tmpfile.mo" ]; then
   1.129 +			rm "$mo"; mv "$tmpfile.mo" "$mo"
   1.130 +		else
   1.131 +			_ 'Error processing %s' "$mo"
   1.132 +			[ -e "$tmpfile.mo" ] && rm "$tmpfile.mo"
   1.133 +		fi
   1.134 +
   1.135 +		# Clean
   1.136 +		rm "$tmpfile" "$tmpfile.pf" "$tmpfile.awk"
   1.137 +	done
   1.138 +
   1.139 +	local size1=$(find $install -type f -name '*.mo' -exec ls -l \{\} \; | awk '{s+=$5}END{print s}')
   1.140 +	local time1=$(date +%s)
   1.141 +	status
   1.142 +	comp_summary "$time0" "$time1" "$size0" "$size1"
   1.143 +}
   1.144 +
   1.145 +
   1.146  # Update installed.cook.diff
   1.147  
   1.148  update_installed_cook_diff() {
   1.149 @@ -740,7 +870,7 @@
   1.150  # Function used after compile_rules() to compress all png images
   1.151  # Compressing can be disabled with COOKOPTS="!pngz"
   1.152  
   1.153 -cook_compress_png() {
   1.154 +compress_png() {
   1.155  	[ "${COOKOPTS/!pngz/}" != "$COOKOPTS" ] && return
   1.156  	case "$ARCH" in
   1.157  		arm*) return;; # While SliTaz-arm miss `pngquant` and `optipng`
   1.158 @@ -777,7 +907,7 @@
   1.159  # Function used after compile_rules() to compress all svg images
   1.160  # Compressing can be disabled with COOKOPTS="!svgz"
   1.161  
   1.162 -cook_compress_svg() {
   1.163 +compress_svg() {
   1.164  	[ "${COOKOPTS/!svgz/}" != "$COOKOPTS" ] && return
   1.165  	case "$ARCH" in
   1.166  		arm*) return;; # While SliTaz-arm miss `svgcleaner`
   1.167 @@ -1053,10 +1183,11 @@
   1.168  	if [ -z "$WANTED" ]; then
   1.169  		footer
   1.170  		compress_manpages
   1.171 -		cook_compress_png
   1.172 -		cook_compress_svg
   1.173 +		compress_png
   1.174 +		compress_svg
   1.175  		compress_ui
   1.176  		fix_desktop_files
   1.177 +		normalize_mo
   1.178  	fi
   1.179  	footer
   1.180  
     2.1 --- a/doc/cookopts.txt	Sun Dec 11 15:46:38 2016 +0100
     2.2 +++ b/doc/cookopts.txt	Mon Dec 12 05:56:11 2016 +0200
     2.3 @@ -37,15 +37,15 @@
     2.4      *.mo files.
     2.5      The presence of this option overrides the default action (all existing *.mo
     2.6      files will remain).
     2.7 -    Please note, you can add all the translations to the package, for example, using
     2.8 -    the command: `cook_copy_files *.mo` (then by default only the supported locales
     2.9 -    will be left).
    2.10 +    Please note, you can add all the translations to the package, for example,
    2.11 +    using the command: `cook_copy_files *.mo` (then by default only the
    2.12 +    supported locales will be left).
    2.13  
    2.14  !extradesktops
    2.15 -    Default action is to remove extra information from the desktop files: entries
    2.16 -    such as 'GenericName', 'X-GNOME-FullName' (all X-* are exposed),
    2.17 -    'Terminal=false', Keywords and other sections. This extra information just isn't
    2.18 -    supported in the current (LXDE) environment.
    2.19 +    Default action is to remove extra information from the desktop files:
    2.20 +    entries such as 'GenericName', 'X-GNOME-FullName' (all X-* are exposed),
    2.21 +    'Terminal=false', Keywords and other sections. This extra information just
    2.22 +    isn't supported in the current (LXDE) environment.
    2.23      The presence of this option overrides the default action (extra information
    2.24      will not be removed from the .desktop files).
    2.25  
    2.26 @@ -60,8 +60,8 @@
    2.27  !manz
    2.28      Default action is to compress all man pages.
    2.29      The presence of this option overrides the default action (all man pages will
    2.30 -    be left "as is"). Please note that these files may be installed in a compressed
    2.31 -    format.
    2.32 +    be left "as is"). Please note that these files may be installed in a
    2.33 +    compressed format.
    2.34  
    2.35  !pngz
    2.36      Default action is to compress all PNG images. Image compression allows you
    2.37 @@ -87,9 +87,9 @@
    2.38  op0 to op8
    2.39      Default action is to use optimization level 2 while processing PNG images by
    2.40      `optipng`. Using this option you can set the desired `optipng` optimization
    2.41 -    level. The higher the level, the slower the compression and the smaller the file
    2.42 -    size. Please note that this option has no exclamation mark, because it means
    2.43 -    "don't do", and here, on the contrary, "to do".
    2.44 +    level. The higher the level, the slower the compression and the smaller the
    2.45 +    file size. Please note that this option has no exclamation mark, because it
    2.46 +    means "don't do", and here, on the contrary, "to do".
    2.47  
    2.48  !svgz
    2.49      Default action is to compress all SVG images.
    2.50 @@ -101,3 +101,11 @@
    2.51      of insignificant spaces and comments.
    2.52      The presence of this option overrides the default action (all *.ui and
    2.53      *.glade files will be left "as is").
    2.54 +
    2.55 +!monorm
    2.56 +    Default action is to normalize *.mo files. Normalization means the
    2.57 +    unconditional conversion to UTF-8, removal of accidental duplicates, the
    2.58 +    most of the header lines as well as catalog entries that are not added the
    2.59 +    translation (when string in msgid equals to string in msgstr).
    2.60 +    The presence of this option overrides the default action (all *.mo files
    2.61 +    will be left "as is").