From [[Faidon]]: Joey, Attached is a patch that adds locale support to ikiwiki. A suitable locale is choosed in that order: 1. $config{locale} 2. $ENV{LC_ALL} 3. en_US.UTF-8 4. en_*.UTF-8 5. *.UTF-8 6. en_US 7. en_* 8. * 9. POSIX (where * == the first item found) The patch brings the following functionality: 1. Proper local time, either using a UTF-8 locale or not (by the means of a new function decode_locale), 2. Support for UTF-8 (or ISO-8859-X) filenames in SVN. Before this patch, commiting (or even rcs_updating) on repositories with UTF-8 filenames is impossible. This is RFC because it has some hard-coded parts: 'locale -a' and /usr/share/i18n/SUPPORTED. They obviously work on Debian, but I'm sure they won't work on other distros, let along on other operating systems. Besides that, it's quite a big of a change and I could use some comments to make it better :) ---- First comments on this: * Defaulting to en_US anything or even en feels wrong. Defaulting to C is standard. * If ikiwiki uses utf-8, why should it cater to non-utf8 locales? If it only supports locales that are utf-8 or simple ascii then it doesn't need to do messy charset conversion and charset determininition via the SUPPORTED file. It can just make sure that incoming data is properly interpreted as utf-8 by perl; based on the patch I guess there are still some issues along those lines in the svn filename code. * I don't see any real need to guess at a locale to use with locale -a. If a user wants a locale they should set one. (_Or_, ikiwiki could record the user's own locale settings at wiki setup time, so that the compiled CGI wrapper contains the locale settings in effect when it was built. However, this is likely to have issues with ikiwiki-mass-rebuild. Maybe some kind of tool to generate a setup file, including a locale setting taken from the user's locale would be a useful avenue..) --[[Joey]] ---- Index: IkiWiki/Rcs/svn.pm =================================================================== --- IkiWiki/Rcs/svn.pm (revision 904) +++ IkiWiki/Rcs/svn.pm (working copy) @@ -174,16 +236,16 @@ } my $rev=int(possibly_foolish_untaint($ENV{REV})); - my $user=`svnlook author $config{svnrepo} -r $rev`; + my $user=decode_locale(`svnlook author $config{svnrepo} -r $rev`); chomp $user; - my $message=`svnlook log $config{svnrepo} -r $rev`; + my $message=decode_locale(`svnlook log $config{svnrepo} -r $rev`); if ($message=~/$svn_webcommit/) { $user="$1"; $message=$2; } my @changed_pages; - foreach my $change (`svnlook changed $config{svnrepo} -r $rev`) { + foreach my $change (decode_locale(`svnlook changed $config{svnrepo} -r $rev`)) { chomp $change; if ($change =~ /^[A-Z]+\s+\Q$config{svnpath}\E\/(.*)/) { push @changed_pages, $1; @@ -197,7 +259,7 @@ # subscribers a diff that might contain pages they did not # sign up for. Should separate the diff per page and # reassemble into one mail with just the pages subscribed to. - my $diff=`svnlook diff $config{svnrepo} -r $rev --no-diff-deleted`; + my $diff=decode_locale(`svnlook diff $config{svnrepo} -r $rev --no-diff-deleted`); my $subject="$config{wikiname} update of "; if (@changed_pages > 2) { Index: IkiWiki/Render.pm =================================================================== --- IkiWiki/Render.pm (revision 904) +++ IkiWiki/Render.pm (working copy) @@ -222,7 +222,7 @@ eval q{use POSIX}; # strftime doesn't know about encodings, so make sure # its output is properly treated as utf8 - return decode_utf8(POSIX::strftime( + return decode_locale(POSIX::strftime( $config{timeformat}, localtime($time))); } #}}} Index: IkiWiki.pm =================================================================== --- IkiWiki.pm (revision 904) +++ IkiWiki.pm (working copy) @@ -9,6 +9,7 @@ # Optimisation. use Memoize; memoize("abs2rel"); +memoize("get_charset_from_locale"); use vars qw{%config %links %oldlinks %oldpagemtime %pagectime %renderedfiles %pagesources %depends %hooks}; @@ -49,9 +50,15 @@ adminemail => undef, plugin => [qw{mdwn inline htmlscrubber}], timeformat => '%c', + locale => get_preferred_locale(), } #}}} sub checkconfig () { #{{{ + debug("setting LC_ALL to '$config{locale}'"); + eval q{use POSIX}; + $ENV{LC_ALL} = $config{locale}; + POSIX::setlocale(&POSIX::LC_ALL, $config{locale}); + if ($config{w3mmode}) { eval q{use Cwd q{abs_path}}; $config{srcdir}=possibly_foolish_untaint(abs_path($config{srcdir})); @@ -489,4 +496,50 @@ $hooks{$param{type}}{$param{id}}=\%param; } # }}} +sub get_preferred_locale() { + if (my $env = $ENV{LC_ALL}) { + return $env; + } + + my @avail=`locale -a`; + chomp @avail; + + return "POSIX" unless @avail; + + my @ret; + # prefer UTF-8 locales + @avail = map { my $l = $_; $l =~ s/\.utf8/\.UTF-8/; $l; } @avail; + @avail = @ret if @ret = grep(/\.UTF-8$/, @avail); + + # prefer en_US or en_ locales + return $ret[0] if @ret = grep(/^en_US/, @avail); + return $ret[0] if @ret = grep(/^en_/, @avail); + return $ret[0] if @ret = grep(/^[^.@]+$/, @avail); + + # fallback to the first locale found + return $avail[0]; +} # }}} + +sub get_charset_from_locale($) { + my $locale=shift; + my ($l, $c); + + my $supportedlist = "/usr/share/i18n/SUPPORTED"; + if (defined $locale and open(SUPPORTED, "< $supportedlist")) { + while () { + chomp; + ($l, $c) = split(/\s+/); + last if ($l eq $locale); + } + close(SUPPORTED); + + return $c if ($l eq $locale); + } + return "ISO-8859-1"; +} # }}} + +sub decode_locale($) { + return decode(get_charset_from_locale($config{locale}), shift); +} # }}} + 1