]> git.vanrenterghem.biz Git - git.ikiwiki.info.git/blob - doc/todo/Mercurial_backend_update.mdwn
0fdaf16e597e4cd23da1dec539b892beb30918f2
[git.ikiwiki.info.git] / doc / todo / Mercurial_backend_update.mdwn
1 I submitted some changes that added 5 "Yes"es and 2 "Fast"s to Mercurial at [[/rcs]], but some functionality is still missing as compared to e.g. `git.pm`, with which it should be able to be equivalent.
3 To do this, a more basic rewrite would simplify things. I inline the complete file below with comments. I don't expect anyone to take the time to read it all at once, but I'd be glad if those interested in the Mercurial backend could do some beta testing.
5 * [This specific revision at my hg repo](http://46.239.104.5:81/hg/program/ikiwiki/file/4994ba5e36fa/Plugin/mercurial.pm) ([raw version](http://46.239.104.5:81/hg/program/ikiwiki/raw-file/4994ba5e36fa/Plugin/mercurial.pm)).
7 * [My default branch](http://510x.se/hg/program/ikiwiki/file/default/Plugin/mercurial.pm) (where updates will be made, will mention here if anything happens) ([raw version](http://510x.se/hg/program/ikiwiki/raw-file/default/Plugin/mercurial.pm)).
9 (I've stripped the `hgrc`-generation from the linked versions, so it should work to just drop them on top of the old `mercurial.pm`).
11 I break out my comments from the code to make them more readable. I comment all the changes as compared to current upstream. --[[Daniel Andersson]]
13 ---
15                 #!/usr/bin/perl
16                 package IkiWiki::Plugin::mercurial;
18                 use warnings;
19                 use strict;
20                 use IkiWiki;
21                 use Encode;
22                 use open qw{:utf8 :std};
25 Pattern to validate hg sha1 sums. hg usually truncates the hash to 12
26 characters and prepends a local revision number for output, but internally
27 it keeps a 40 character hash. Will use the long version in this code.
29                 my $sha1_pattern = qr/[0-9a-fA-F]{40}/;
31 Message to skip in recent changes
33                 my $dummy_commit_msg = 'dummy commit';
35 *TODO:* `$hg_dir` not really implemented yet, until a srcdir/repository distinction is
36 made as for e.g. Git. Used in `rcs_receive`, and for attachments in `hg_parse_changes`. See comments in those places, though.
38                 my $hg_dir=undef;
40                 sub import {
41                         hook(type => "checkconfig", id => "mercurial", call => \&checkconfig);
42                         hook(type => "getsetup", id => "mercurial", call => \&getsetup);
43                         hook(type => "rcs", id => "rcs_update", call => \&rcs_update);
44                         hook(type => "rcs", id => "rcs_prepedit", call => \&rcs_prepedit);
45                         hook(type => "rcs", id => "rcs_commit", call => \&rcs_commit);
46                         hook(type => "rcs", id => "rcs_commit_staged", call => \&rcs_commit_staged);
47                         hook(type => "rcs", id => "rcs_add", call => \&rcs_add);
48                         hook(type => "rcs", id => "rcs_remove", call => \&rcs_remove);
49                         hook(type => "rcs", id => "rcs_rename", call => \&rcs_rename);
50                         hook(type => "rcs", id => "rcs_recentchanges", call => \&rcs_recentchanges);
51                         hook(type => "rcs", id => "rcs_diff", call => \&rcs_diff);
52                         hook(type => "rcs", id => "rcs_getctime", call => \&rcs_getctime);
53                         hook(type => "rcs", id => "rcs_getmtime", call => \&rcs_getmtime);
54                         hook(type => "rcs", id => "rcs_preprevert", call => \&rcs_preprevert);
55                         hook(type => "rcs", id => "rcs_revert", call => \&rcs_revert);
57 This last hook is "unsanctioned" from [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve its function
58 another way later.
60                         hook(type => "rcs", id => "rcs_wrapper_postcall", call => \&rcs_wrapper_postcall);
61                 }
63                 sub checkconfig () {
64                         if (exists $config{mercurial_wrapper} && length $config{mercurial_wrapper}) {
65                                 push @{$config{wrappers}}, {
66                                         wrapper => $config{mercurial_wrapper},
67                                         wrappermode => (defined $config{mercurial_wrappermode} ? $config{mercurial_wrappermode} : "06755"),
69 Next line part of [[Auto-setup and maintain Mercurial wrapper hooks]].
71                                         wrapper_postcall => (defined $config{mercurial_wrapper_hgrc_update} ? $config{mercurial_wrapper_hgrc_update} : "1"),
72                                 };
73                         }
74                 }
76                 sub getsetup () {
77                         return
78                                 plugin => {
79                                         safe => 0, # rcs plugin
80                                         rebuild => undef,
81                                         section => "rcs",
82                                 },
83                                 mercurial_wrapper => {
84                                         type => "string",
85                                         #example => # FIXME add example
86                                         description => "mercurial post-commit hook to generate",
87                                         safe => 0, # file
88                                         rebuild => 0,
89                                 },
90                                 mercurial_wrappermode => {
91                                         type => "string",
92                                         example => '06755',
93                                         description => "mode for mercurial_wrapper (can safely be made suid)",
94                                         safe => 0,
95                                         rebuild => 0,
96                                 },
97                                 mercurial_wrapper_hgrc_update => {
98                                         type => "string",
99                                         example => "1",
100                                         description => "updates existing hgrc to reflect path changes for mercurial_wrapper",
101                                         safe => 0,
102                                         rebuild => 0,
103                                 },
104                                 historyurl => {
105                                         type => "string",
106                                         example => "http://example.com:8000/log/tip/\[[file]]",
107                                         description => "url to hg serve'd repository, to show file history (\[[file]] substituted)",
108                                         safe => 1,
109                                         rebuild => 1,
110                                 },
111                                 diffurl => {
112                                         type => "string",
113                                         example => "http://localhost:8000/?fd=\[[r2]];file=\[[file]]",
114                                         description => "url to hg serve'd repository, to show diff (\[[file]] and \[[r2]] substituted)",
115                                         safe => 1,
116                                         rebuild => 1,
117                                 },
118                 }
120                 sub safe_hg (&@) {
121                         # Start a child process safely without resorting to /bin/sh.
122                         # Returns command output (in list content) or success state
123                         # (in scalar context), or runs the specified data handler.
125                         my ($error_handler, $data_handler, @cmdline) = @_;
127                         my $pid = open my $OUT, "-|";
129                         error("Cannot fork: $!") if !defined $pid;
131                         if (!$pid) {
132                                 # In child.
133                                 # hg commands want to be in wc.
135 This `$hg_dir` logic means nothing and could be stripped until srcdir/repdir distinction is made (it's stripped in upstream `mercurial.pm` right now).
137                                 if (! defined $hg_dir) {
138                                         chdir $config{srcdir}
139                                             or error("cannot chdir to $config{srcdir}: $!");
140                                 }
141                                 else {
142                                         chdir $hg_dir or error("cannot chdir to $hg_dir: $!");
143                                 }
145                                 exec @cmdline or error("Cannot exec '@cmdline': $!");
146                         }
147                         # In parent.
149                         my @lines;
150                         while (<$OUT>) {
151                                 chomp;
153                                 if (! defined $data_handler) {
154                                         push @lines, $_;
155                                 }
156                                 else {
157                                         last unless $data_handler->($_);
158                                 }
159                         }
161                         close $OUT;
163                         $error_handler->("'@cmdline' failed: $!") if $? && $error_handler;
165                         return wantarray ? @lines : ($? == 0);
166                 }
167                 # Convenient wrappers.
168                 sub run_or_die ($@) { safe_hg(\&error, undef, @_) }
169                 sub run_or_cry ($@) { safe_hg(sub { warn @_ }, undef, @_) }
170                 sub run_or_non ($@) { safe_hg(undef, undef, @_) }
173 To handle uncommited local changes ("ULC"s for short), I use logic similar to the (non-standard) "shelve" extension to Mercurial. By taking a diff before resetting to last commit, making changes and then applying diff again, one can do things Mercurial otherwise refuses, which is necessary later.
175 This function creates this diff.
177                 sub hg_local_dirstate_shelve ($) {
178                         # Creates a diff snapshot of uncommited changes existing the srcdir.
179                         # Takes a string (preferably revision) as input to create a unique and
180                         # identifiable diff name.
181                         my $tempdiffname = "diff_".shift;
182                         my $tempdiffpath;
183                         if (my @tempdiff = run_or_die('hg', 'diff', '-g')) {
184                                 $"="\n";
185                                 writefile($tempdiffname, $config{srcdir},
186                                                 "@tempdiff");
187                                 $"=" ";
188                                 $tempdiffpath = $config{srcdir}.'/'.$tempdiffname;
189                         }
190                         return $tempdiffpath;
191                 }
193 This function restores the diff.
195                 sub hg_local_dirstate_unshelve ($) {
196                         # Applies diff snapshot to revert back to initial dir state. If diff
197                         # revert succeeds, the diff is removed. Otherwise it stays to not
198                         # eradicate the local changes if they were important. This clutters the
199                         # directory though. Better ways to handle this are welcome. A true way
200                         # around this dance is to have a separate repository for local changes
201                         # and push ready commits to the srcdir instead.
202                         if (my $tempdiffpath = shift) {
203                                 if (run_or_cry('hg', 'import', '--no-commit', $tempdiffpath)) {
204                                         unlink($tempdiffpath);
205                                         return undef;
206                                 }
207                         }
208                 }
210 This makes online diffing possible. A similar approach as in `git.pm`, which is [discussed to some length in a comment there](http://source.ikiwiki.branchable.com/?p=source.git;a=blob;f=IkiWiki/Plugin/git.pm;h=cf7fbe9b7c43ee53180612d0411e6202074fb9e0;hb=refs/heads/master#l211), is taken.
212                 sub merge_past ($$$) {
213                         my ($sha1, $file, $message) = @_;
215                         # Undo stack for cleanup in case of an error
216                         my @undo;
217                         # File content with conflict markers
218                         my $conflict;  
219                         my $tempdiffpath; 
221                         eval {
222                                 # Hide local changes from Mercurial by renaming the modified
223                                 # file.  Relative paths must be converted to absolute for
224                                 # renaming.
225                                 my ($target, $hidden) = (
226                                         "$config{srcdir}/${file}",
227                                         "$config{srcdir}/${file}.${sha1}"
228                                 );
229                                 rename($target, $hidden)
230                                         or error("rename '$target' to '$hidden' failed: $!");
231                                 # Ensure to restore the renamed file on error.
232                                 push @undo, sub {
233                                         return if ! -e "$hidden"; # already renamed
234                                         rename($hidden, $target)
235                                             or warn "rename '$hidden' to '$target' failed: $!";
236                                 };
239 Take a snapshot of srcdir to be able to restore uncommited local changes ("ULCs") afterwards.
241 * This must happen _after_ the merging commit in Mercurial, there is no way around it. By design hg refuses to commit merges if there are other changes to tracked content present, no matter how much  you beg.
243 * ULCs to the file being edited are special: they can't be diffed here since `editpage.pm` already has overwritten the file. When the web edit session started though, the ULC version (not the commited
244 version) was read into the form, so in a way, the web user _has already merged_ with the ULC. It is not saved in commit history, but that is the exact consequence of "uncommited" changes. If an ULC is done between the time the web edit started and was submitted, then it is lost, though.  All in all, one shouldn't be editing the srcdir directly when web edits of the same file are allowed. Clone the repo and push changes instead.
246 Much of these issues disappear, I believe, if one works with a master repo which only is pushed to.
248                                 my $tempdiffpath = hg_local_dirstate_shelve($sha1);
250                                 # Ensure uniqueness of bookmarks.
251                                 my $bookmark_upstream_head = "current_head_$sha1";
252                                 my $bookmark_edit_base = "edit_base_$sha1";
254                                 # Git and Mercurial differ in the branch concept. Mercurial's
255                                 # "bookmarks" are closer in function in this regard.
257 Bookmarks aren't standard until Mercurial 1.8 ([2011--02--10](http://selenic.com/hg/rev/d4ab9486e514)), but they've been bundled with Mercurial since ~2008, so they can be enabled by writing a `hgrc`, which is also being worked on.
259                                 # Create a bookmark at current tip.
260                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
261                                                 $bookmark_upstream_head) };
262                                 run_or_die('hg', 'bookmark', $bookmark_upstream_head);
264                                 # Create a bookmark at the revision from which the edit was
265                                 # started and switch to it, discarding changes (they are stored
266                                 # in $tempdiff and the hidden file at the moment).
267                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
268                                                 $bookmark_edit_base) };
269                                 run_or_die('hg', 'bookmark', '-r', $sha1, $bookmark_edit_base);
270                                 run_or_die('hg', 'update', ,'-C', $bookmark_edit_base);
272                                 # Reveal the modified file.
273                                 rename($hidden, $target)
274                                     or error("rename '$hidden' to '$target' failed: $!");
276                                 # Commit at the bookmarked revision, creating a new head.
277                                 run_or_cry('hg', 'commit', '-m', $message);
279                                 # Attempt to merge the newly created head with upstream head.
280                                 # '--tool internal:merge' to avoid spawning a GUI merger.
282 (*Semi-TODO:* How do you make this command quiet? On failed merge, it
283 always writes to STDERR and clutters the web server log.)
285                                 if (!run_or_non('hg', 'merge', '--tool', 'internal:merge',
286                                                 $bookmark_upstream_head)) {
287                                         # ..., otherwise return file with conflict markers.
288                                         $conflict = readfile($target);
290                                         # The hardcore reset approach. Keep your hands inside
291                                         # the cart.
292                                         run_or_die('hg', 'rollback');
293                                         run_or_die('hg', 'update', '-C',
294                                                 $bookmark_upstream_head);
295                                         if ($tempdiffpath) {
296                                                 hg_local_dirstate_unshelve($tempdiffpath);
297                                         }
299 Other approaches tried here:
301 1. Clean up merge attempt,
303         run_or_die('hg', 'update', '-C', $bookmark_upstream_head);
305 2. Redo "merge", using only upstream head versions,
307         run_or_die('hg', 'merge', '--tool', 'internal:local', $bookmark_edit_base);
309 3. dummy commit to close head.
311         run_or_non('hg', 'commit', '-m', $message);
313 This creates a cluttered and erroneous history. We
314 tell Mercurial to merge, even though we in practice
315 discard. This creates problems when trying to revert
316 changes.
318 Other attempt:
320 1. Discard merge attempt and switch to temp head,
322         run_or_die('hg', 'update', '-C', $bookmark_edit_base);
324 2. close the temp head (why do they call the command that in practice closes heads "--close-branch"?),
326         run_or_non('hg', 'commit', '--close-branch', '-m', $message);
328 3. restore working directory to pre-fiddling status.
330         run_or_die('hg', 'update', $bookmark_upstream_head);
332 ...but this requires the same amount of forks as the
333 above method, and confuses other parts of ikiwiki
334 since the upstream head is now the third newest
335 revision. Maybe that particular problem is solvable
336 by setting a global default bookmark that follows the
337 main tip.  It will leave clutter in the revision
338 history, though. Two extra commits that in practice
339 don't hold relevant information will be recorded for
340 each failed merge attempt.
342 To only create one extra commit, one could imagine
343 adding `--close-branch` to the commit that initially
344 created the new head (since there is no problem
345 merging with closed heads), but it's not possible to
346 close and create a head at the same time, apparently.
348                                 }
349                         };
350                         my $failure = $@;
352                         # Process undo stack (in reverse order). By policy, cleanup actions
353                         # should normally print a warning on failure.
354                         while (my $handle = pop @undo) {
355                                 $handle->();
356                         }
358                         error("Mercurial merge failed!\n$failure\n") if $failure;
360                         return ($conflict, $tempdiffpath);
361                 }
363                 sub hg_commit_info ($;$;$) {
364                         # Return an array of commit info hashes of num commits starting from
365                         # the given sha1sum.
366                         #
367 This could be optimized by using a lookup cache similar to
368 `findtimes()`. By adding `KeyAttr => ['node']` to `XMLin()` options, one
369 could use the revision ID as key and do a single massive history
370 lookup and later just check if the given revision already exists as a
371 key.  Right now I'm at the "don't optimize it yet" stage, though.
373 This uses Mercurial's built-in `--style xml` and parses it with `XML::Simple`. Mercurial's log output is otherwise somewhat cumbersome to get good stuff out of, so this XML solution is quite good, I think. It adds module dependency, but XML::Simple seems fairly standard (but what do I know, I've used 1 Perl installation in my life).
375                         use XML::Simple;
376                         use Date::Parse;
378                         my ($sha1, $num, $file) = @_;
380                         my @opts;
381                         if (defined $sha1) {
382                                 if ($sha1 =~ m/^($sha1_pattern)$/) {
383                                         push @opts, ('-r'. $1.':0');
384                                 }
385                                 elsif ($sha1 =~ m/^($sha1_pattern):($sha1_pattern)$/) {
386                                         push @opts, ('-r', $1.':'.$2);
387                                 }
388                         }
389                         push @opts, ('--limit', $num) if defined $num;
390                         push @opts, ('--', $file) if defined $file;
392                         my %env=%ENV;
393                         $ENV{HGENCODING} = 'utf-8';
394                         my @xml = run_or_cry('hg', 'log', '-v', '--style', 'xml', @opts);
395                         %ENV=%env;
397                         # hg returns empty string if file is not in repository.
398                         return undef if !@xml;
400 Some places it is clear that I'm coding ad-hoc Perl. I don't know if this is a reasonably efficient way to give input to `XMLin`, but it works.
402                         # Want to preserve linebreaks in multiline comments.
403                         $"="\n";
404                         my $xmllog = XMLin("@xml",
405                                 ForceArray => ['logentry', 'parent', 'copy', 'path']);
406                         $"=" ";
408                         my @c_infos;
409                         foreach my $rev (@{$xmllog->{logentry}}) {
410                                 my %c_info;
411                                 # In Mercurial, "rev" is technically the strictly local
412                                 # revision number.  What ikiwiki wants is what is called
413                                 # "node": a globally defined SHA1 checksum.
414                                 $c_info{rev} = $rev->{node};
415                                 foreach my $parent (@{$rev->{parent}}) {
416                                         push @{$c_info{parents}}, {rev => $parent->{node}};
417                                 }
418                                 $c_info{user} = $rev->{author}{content};
419                                 # Mercurial itself parses out and stores an email address if
420                                 # present in author name. If not, hg sets email to author name.
421                                 if ( $rev->{author}{content} ne $rev->{author}{email} &&
422                                         $rev->{author}{email} =~ m/^([^\@]+)\@(.*)$/ ) {
423                                         if ($2 eq "web") {
424                                                 $c_info{nickname} = $1;
425                                                 $c_info{web_commit} = "1";
426                                         }
427                                 }
428                                 # Mercurial gives date in ISO 8601, well handled by str2time().
429                                 $c_info{when} = str2time($rev->{date});
430                                 # Mercurial doesn't allow empty commit messages, so there
431                                 # should always be a single defined message.
432                                 $c_info{message} = $rev->{msg}{content};
433                                 # Inside "paths" sits a single array "path" that contains
434                                 # multiple paths. Crystal clear :-)
435                                 foreach my $path (@{$rev->{paths}{path}}) {
436                                         push @{$c_info{files}}, {
437                                                 # Mercurial doesn't track file permissions as
438                                                 # Git do, so that's missing here.
439                                                 'file' => $path->{content},
440                                                 'status' => $path->{action},
441                                         };
442                                 }
443                                 # There also exists an XML branch "copies"->"copy", containing
444                                 # source and dest of files that have been copied with "hg cp".
445                                 # The copy action is also registered in "paths" as a removal of
446                                 # source and addition of dest, so it's not needed here.
447                                 push @c_infos, {%c_info};
448                                 use Data::Dumper;
449                         }
451                         return wantarray ? @c_infos : $c_infos[0];
452                 }
454                 sub hg_sha1 (;$) {
455                         # Return head sha1sum (of given file).
456                         my $file = shift || q{--};
458                         # Non-existing file doesn't give error, just empty string.
459                         my $f_info = hg_commit_info(undef, 1, $file);
460                         my $sha1;
461                         if ($f_info->{rev}) {
462                                 ($sha1) = $f_info->{rev} =~ m/($sha1_pattern)/;
463                         }
464                         else {
465                                 debug("Empty sha1sum for '$file'.");
466                         }
467                         return defined $sha1 ? $sha1 : q{};
468                 }
470                 sub rcs_update () {
471                         run_or_cry('hg', '-q', 'update');
472                 }
474                 sub rcs_prepedit ($) {
475                         # Return the commit sha1sum of the file when editing begins.
476                         # This will be later used in rcs_commit if a merge is required.
477                         my ($file) = @_;
479                         return hg_sha1($file);
480                 }
482                 sub rcs_commit (@) {
483                         # Try to commit the page; returns undef on _success_ and
484                         # a version of the page with the rcs's conflict markers on
485                         # failure.
486                         my %params=@_;
488                         # Check to see if the page has been changed by someone else since
489                         # rcs_prepedit was called.
490                         my $cur    = hg_sha1($params{file});
491                         my ($prev) = $params{token} =~ /^($sha1_pattern)$/; # untaint
493                         if (defined $cur && defined $prev && $cur ne $prev) {
495 If there was a conflict, the file with conflict markers is returned. Else, the path to the tempdiff, which is to be run to restore previous local state after `rcs_commit_staged`, is returned.
497                                 my ($conflict, $tempdiffpath) =
498                                         merge_past($prev, $params{file}, $dummy_commit_msg);
499                                 return defined $conflict
500                                         ? $conflict
501                                         : rcs_commit_helper(
502                                                 @_,
503                                                 merge => 1,
504                                                 tempdiffpath => $tempdiffpath);
505                         }
507                         return rcs_commit_helper(@_);
508                 }
510                 sub rcs_commit_helper (@) {
511                         my %params=@_;
513                         my %env=%ENV;
514                         $ENV{HGENCODING} = 'utf-8';
516                         my $user="Anonymous";
517                         my $nickname;
518                         if (defined $params{session}) {
519                                 if (defined $params{session}->param("name")) {
520                                         $user = $params{session}->param("name");
521                                 }
522                                 elsif (defined $params{session}->remote_addr()) {
523                                         $user = $params{session}->remote_addr();
524                                 }
526                                 if (defined $params{session}->param("nickname")) {
527                                         $nickname=encode_utf8($params{session}->param("nickname"));
528                                         $nickname=~s/\s+/_/g;
529                                         $nickname=~s/[^-_0-9[:alnum:]]+//g;
530                                 }
531                                 $ENV{HGUSER} = encode_utf8($user . ' <' . $nickname . '@web>');
532                         }
534                         if (! length $params{message}) {
535                                 $params{message} = "no message given";
536                         }
538                         $params{message} = IkiWiki::possibly_foolish_untaint($params{message});
540                         my @opts;
542 Mercurial rejects file arguments when performing a merging commit. It
543 only does "all or nothing" commits by design when merging, so given file arguments must be discarded. It should not pose a problem.
545                         if (exists $params{file} && ! defined $params{merge}) {
546                                 push @opts, '--', $params{file};
547                         }
549                         # hg commit returns non-zero if nothing really changed.
550                         # So we should ignore its exit status (hence run_or_non).
551                         run_or_non('hg', 'commit', '-m', $params{message}, '-q', @opts);
553 If there were uncommited local changes in srcdir before a merge was done, they are restored here.
555                         if (defined $params{tempdiffpath}) {
556                                 hg_local_dirstate_unshelve($params{tempdiffpath});
557                         }
559                         %ENV=%env;
560                         return undef; # success
561                 }
563                 sub rcs_commit_staged (@) {
564                         # Commits all staged changes. Changes can be staged using rcs_add,
565                         # rcs_remove, and rcs_rename.
566                         return rcs_commit_helper(@_);
567                 }
569                 sub rcs_add ($) {
570                         my ($file) = @_;
572                         run_or_cry('hg', 'add', $file);
573                 }
575                 sub rcs_remove ($) {
576                         # Remove file from archive.
577                         my ($file) = @_;
579                         run_or_cry('hg', 'remove', '-f', $file);
580                 }
582                 sub rcs_rename ($$) {
583                         my ($src, $dest) = @_;
585                         run_or_cry('hg', 'rename', '-f', $src, $dest);
586                 }
588                 sub rcs_recentchanges ($) {
589                         my ($num) = @_;
591                         my @c_infos;
593                         foreach my $c_info (hg_commit_info(undef, $num, undef)) {
594                                 my @pagenames;
595                                 for my $page (@{$c_info->{files}}) {
596                                         my $diffurl=defined $config{diffurl} ?
597                                                 $config{diffurl} : '';
598                                         # These substitutions enable defining keywords \[[file]]
599                                         # and \[[r2]] (backward compatibility) in the setup file
600                                         # that will be exchanged with filename and revision
601                                         # respectively.
602                                         $diffurl =~ s/\[\[file\]\]/$page->{file}/go;
603                                         $diffurl =~ s/\[\[r2\]\]/$c_info->{rev}/go;
604                                         push @pagenames, {
605                                                 # pagename() strips suffixes and returns the
606                                                 # path to the file as it is to be represented
607                                                 # in the build dir.
608                                                 page => pagename($page->{file}),
609                                                 diffurl => $diffurl,
610                                         };
611                                 }
613                                 # It is expected of ikiwiki to get each comment line as a
614                                 # separate entry.
615                                 my @messagelines;
616                                 open my $message, '<', \$c_info->{message};
617                                 while (<$message>) { push @messagelines, { line => $_ } };
619                                 push @c_infos, {
620                                         rev        => $c_info->{rev},
621                                         user       => $c_info->{user},
622                                         nickname   => defined $c_info->{nickname} ?
623                                                         $c_info->{nickname} : $c_info->{user},
624                                         committype => $c_info->{web_commit} ? "web" : "hg",
625                                         when       => $c_info->{when},
626                                         message    => [@messagelines],
627                                         pages      => [@pagenames],
628                                 } if @pagenames;
629                         }
631                         return @c_infos;
632                 }
634                 sub rcs_diff ($;$) {
635                         my $rev=shift;
636                         my $maxlines=shift;
637                         my @lines;
638                         my $addlines=sub {
639                                 my $line=shift;
640                                 return if defined $maxlines && @lines == $maxlines;
641                                 push @lines, $line."\n"
642                                         if (@lines || $line=~/^diff --git/);
643                                 return 1;
644                         };
645                         safe_hg(undef, $addlines, "hg", "diff", "-c", $rev, "-g");
646                         if (wantarray) {
647                                 return @lines;
648                         }
649                         else {
650                                 return join("", @lines);
651                         }
652                 }
654                 {
655                 my %time_cache;
657 This is an upstream change I did a week ago or so. Perhaps it can be merged in some clever way with the updated `hg_commit_info` to make one shared lookup cache. Don't know how much would be gained.
659                 sub findtimes ($$) {
660                         my $file=shift;
661                         my $id=shift; # 0 = mtime ; 1 = ctime
663                         if (! keys %time_cache) {
664                                 my $date;
666                                 # It doesn't seem possible to specify the format wanted for the
667                                 # changelog (same format as is generated in git.pm:findtimes(),
668                                 # though the date differs slightly) without using a style
669                                 # _file_. There is a "hg log" switch "--template" to directly
670                                 # control simple output formatting, but in this case, the
671                                 # {file} directive must be redefined, which can only be done
672                                 # with "--style".
673                                 #
674                                 # If {file} is not redefined, all files are output on a single
675                                 # line separated with a space. It is not possible to conclude
676                                 # if the space is part of a filename or just a separator, and
677                                 # thus impossible to use in this case.
678                                 # 
679                                 # Some output filters are available in hg, but they are not fit
680                                 # for this cause (and would slow down the process
681                                 # unnecessarily).
682                                 
683                                 eval q{use File::Temp};
684                                 error $@ if $@;
685                                 my ($tmpl_fh, $tmpl_filename) = File::Temp::tempfile(UNLINK => 1);
686                                 
687                                 print $tmpl_fh 'changeset = "{date}\\n{files}\\n"' . "\n";
688                                 print $tmpl_fh 'file = "{file}\\n"' . "\n";
689                                 
690                                 foreach my $line (run_or_die('hg', 'log', '--style', $tmpl_filename)) {
691                                         # {date} gives output on the form
692                                         # 1310694511.0-7200
693                                         # where the first number is UTC Unix timestamp with one
694                                         # decimal (decimal always 0, at least on my system)
695                                         # followed by local timezone offset from UTC in
696                                         # seconds.
697                                         if (! defined $date && $line =~ /^\d+\.\d[+-]\d*$/) {
698                                                 $line =~ s/^(\d+).*/$1/;
699                                                 $date=$line;
700                                         }
701                                         elsif (! length $line) {
702                                                 $date=undef;
703                                         }
704                                         else {
705                                                 my $f=$line;
707                                                 if (! $time_cache{$f}) {
708                                                         $time_cache{$f}[0]=$date; # mtime
709                                                 }
710                                                 $time_cache{$f}[1]=$date; # ctime
711                                         }
712                                 }
713                         }
715                         return exists $time_cache{$file} ? $time_cache{$file}[$id] : 0;
716                 }
718                 }
720                 sub rcs_getctime ($) {
721                         my $file = shift;
723                         return findtimes($file, 1);
724                 }
726                 sub rcs_getmtime ($) {
727                         my $file = shift;
729                         return findtimes($file, 0);
730                 }
732 The comment just below the function declaration below is taken from `git.pm`. Is it true? Should ikiwiki support sharing its repo with other things? Mercurial-wise that sounds like a world of pain.
734                 {
735                 my $ret;
736                 sub hg_find_root {
737                         # The wiki may not be the only thing in the git repo.
738                         # Determine if it is in a subdirectory by examining the srcdir,
739                         # and its parents, looking for the .git directory.
741                         return @$ret if defined $ret;
743                         my $subdir="";
744                         my $dir=$config{srcdir};
745                         while (! -d "$dir/.hg") {
746                                 $subdir=IkiWiki::basename($dir)."/".$subdir;
747                                 $dir=IkiWiki::dirname($dir);
748                                 if (! length $dir) {
749                                         error("cannot determine root of hg repo");
750                                 }
751                         }
753                         $ret=[$subdir, $dir];
754                         return @$ret;
755                 }
757                 }
759                 sub hg_parse_changes (@) {
760                         # Only takes a single info hash as argument in rcs_preprevert, but
761                         # should be able to take several in rcs_receive.
762                         my @c_infos_raw = shift;
764                         my ($subdir, $rootdir) = hg_find_root();
765                         my @c_infos_ret;
767                         foreach my $c_info_raw (@c_infos_raw) {
768                                 foreach my $path (@{$c_info_raw->{files}}) {
769                                         my ($file, $action, $temppath);
771                                         $file=$path->{file};
773                                         # check that all changed files are in the subdir
774                                         if (length $subdir && ! ($file =~ s/^$subdir//)) {
775                                                 error sprintf(gettext("you are not allowed to change %s"), $file);
776                                         }
778                                         if    ($path->{status} eq "M") { $action="change" }
779                                         elsif ($path->{status} eq "A") { $action="add" }
780                                         elsif ($path->{status} eq "R") { $action="remove" }
781                                         else  { error "unknown status ".$path->{status} }
783 I haven't tested the attachment code below. Is it run when there is an non-trusted file upload?
785                                         # extract attachment to temp file
786                                         if (($action eq 'add' || $action eq 'change') &&
787                                                 ! pagetype($file)) {
789                                                 eval q{use File::Temp};
790                                                 die $@ if $@;
792                                                 my $fh;
793                                                 ($fh, $temppath)=File::Temp::tempfile(undef, UNLINK => 1);
794                                                 my $cmd = "cd $hg_dir && ".
795                                                         "hg diff -g -c $c_info_raw->{rev} > '$temppath'";
796                                                 if (system($cmd) != 0) {
797                                                         error("failed writing temp file '$temppath'.");
798                                                 }
799                                         }
801                                         push @c_infos_ret, {
802                                                 file => $file,
803                                                 action => $action,
804                                                 path => $temppath,
805                                         };
806                                 }
807                         }
809                         return @c_infos_ret;
810                 }
812 *TODO:* I don't know what's happening here. I've changed the code to adhere to this file's variables and functions, but it refers to a srcdir _and_ a default repo, which currently isn't available in the Mercurial setup.
814 `rcs_receive` is optional and only runs when running a pre-receive hook. Where `$_` comes from and its format are mysteries to me.
816 Also, a comment in `git.pm` mentions that we don't want to chdir to a subdir "and only see changes in it" - but this isn't true for either Git or Mercurial to my knowledge. It only seems to happen in `git.pm` since the `git log` command in `git_commit_info` ends with "`-- .`" - if it didn't do that, one wouldn't have to chdir for this reason, I believe.
818 In this case we need to stay in default repo instead of srcdir though, so `hg_dir="."` _is_ needed, but not for the abovementioned reason :-) (maybe there's more to it, though).
820                 sub rcs_receive () {
821                         my @c_infos_ret;
822                         while (<>) {
823                                 chomp;
824                                 my ($oldrev, $newrev, $refname) = split(' ', $_, 3);
826                                 # only allow changes to hg_default_branch
828 *TODO:* What happens here? Some Git voodoo. _If_ `$_` has the exact same format for Mercurial, then the below should work just as well here, I think.
830                                 if ($refname !~ m|^refs/heads/$config{hg_default_branch}$|) {
831                                         error sprintf(gettext("you are not allowed to change %s"), $refname);
832                                 }
834 Comment from `git.pm`:
836                                 # Avoid chdir when running git here, because the changes are in
837                                 # the default git repo, not the srcdir repo.  (Also, if a subdir
838                                 # is involved, we don't want to chdir to it and only see
839                                 # changes in it.) The pre-receive hook already puts us in the
840                                 # right place.
841                                 $hg_dir=".";
842                                 push @c_infos_ret,
843                                         hg_parse_changes(hg_commit_info($newrev.":".$oldrev,
844                                                         undef, undef));
845                                 $hg_dir=undef;
846                         }
848                         return @c_infos_ret;
849                 }
851                 sub rcs_preprevert ($) {
852                         my $rev=shift;
853                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
855 The below 4 lines of code are from `git.pm`, but I can't see what they actually do there. Neither Git nor Mercurial only lists changes in working directory when given a command - they always traverse to repository root by themselves. I keep it here for comments, in case I'm missing something.
857 *UPDATE:* See earlier note about `git log` ending in "`-- .`".
859                         ## Examine changes from root of git repo, not from any subdir,
860                         ## in order to see all changes.
861                         #my ($subdir, $rootdir) = git_find_root();
862                         #$git_dir=$rootdir;
864                         my $c_info=hg_commit_info($sha1, 1, undef) or error "unknown commit";
866                         # hg revert will fail on merge commits. Add a nice message.
867                         if (exists $c_info->{parents} && $c_info->{parents} > 1) {
868                                 error gettext("you are not allowed to revert a merge");
869                         }
871                         my @c_info_ret=hg_parse_changes($c_info);
873                         ### Probably not needed, if earlier comment is correct.
874                         #$hg_dir=undef;
875                         return @c_info_ret;
876                 }
878                 sub rcs_revert ($) {
879                         # Try to revert the given rev; returns undef on _success_.
880                         my $rev = shift;
881                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
883                         # Save uncommited local changes to diff file. Attempt to restore later.
884                         my $tempdiffpath = hg_local_dirstate_shelve($sha1);
886                         # Clean dir to latest commit.
887                         run_or_die('hg', 'update', '-C');
889 Some voodoo is needed here. `hg backout --tool internal:local -r $sha1` is *almost* good, but if the reversion is done to the directly previous revision, hg automatically commits, which is bad in this case. Instead I generate a reverse diff and pipe it to `import --no-commit`.
891                         if (run_or_non("hg diff -c $sha1 --reverse | hg import --no-commit -")) {
892                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
893                                 return undef;
894                         }
895                         else {
896                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
897                                 return sprintf(gettext("Failed to revert commit %s"), $sha1);
898                         }
899                 }
901 Below follows code regarding [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve it in another place later, but the code in itself is working.
903 Should perhaps add initiation of the bookmark extension here, to support older Mercurial versions.
905                 sub rcs_wrapper_postcall($) {
906                         # Update hgrc if it exists. Change post-commit/incoming hooks with the
907                         # .ikiwiki suffix to point to the wrapper path given in the setup file.
908                         # Work with a tempfile to not delete hgrc if the loop is interrupted
909                         # midway.
910                         # I believe there is a better way to solve this than creating new hooks
911                         # and callbacks. Will await discussion on ikiwiki.info.
912                         my $hgrc=$config{srcdir}.'/.hg/hgrc';
913                         my $backup_suffix='.ikiwiki.bak';
914                         if (-e $hgrc) {
915                                 use File::Spec;
916                                 my $mercurial_wrapper_abspath=File::Spec->rel2abs($config{mercurial_wrapper}, $config{srcdir});
917                                 local ($^I, @ARGV)=($backup_suffix, $hgrc);
918                                 while (<>) {
919                                         s/^(post-commit|incoming)(\.ikiwiki[ \t]*=[ \t]*).*$/$1$2$mercurial_wrapper_abspath/;
920                                         print;
921                                 }
922                                 unlink($hgrc.$backup_suffix);
923                         }
924                 }
926                 1