]> git.vanrenterghem.biz Git - git.ikiwiki.info.git/blob - doc/todo/Mercurial_backend_update.mdwn
717db61b5522ed1b9de4a934d510f97ffa11a8a8
[git.ikiwiki.info.git] / doc / todo / Mercurial_backend_update.mdwn
1 I submitted some changes that added 5 "Yes"es and 2 "Fast"s to Mercurial at [[/rcs]], but some functionality is still missing as compared to e.g. `git.pm`, with which it should be able to be equivalent.
3 To do this, a more basic rewrite would simplify things. I inline the complete file below with comments. I don't expect anyone to take the time to read it all at once, but I'd be glad if those interested in the Mercurial backend could do some beta testing.
5 I break out my comments from the code to make them more readable. I comment all the changes as compared to current upstream.
7 ---
9                 #!/usr/bin/perl
10                 package IkiWiki::Plugin::mercurial;
12                 use warnings;
13                 use strict;
14                 use IkiWiki;
15                 use Encode;
16                 use open qw{:utf8 :std};
19 Pattern to validate hg sha1 sums. hg usually truncates the hash to 12
20 characters and prepends a local revision number for output, but internally
21 it keeps a 40 character hash. Will use the long version in this code.
23                 my $sha1_pattern = qr/[0-9a-fA-F]{40}/;
25 Message to skip in recent changes
27                 my $dummy_commit_msg = 'dummy commit';
29 *TODO:* $hg_dir not really implemented yet, until a srcdir/repository distinction is
30 made as for e.g. Git. Used in `rcs_receive`, and for attachments in `hg_parse_changes`. See comments in those places, though.
32                 my $hg_dir=undef;
34                 sub import {
35                         hook(type => "checkconfig", id => "mercurial", call => \&checkconfig);
36                         hook(type => "getsetup", id => "mercurial", call => \&getsetup);
37                         hook(type => "rcs", id => "rcs_update", call => \&rcs_update);
38                         hook(type => "rcs", id => "rcs_prepedit", call => \&rcs_prepedit);
39                         hook(type => "rcs", id => "rcs_commit", call => \&rcs_commit);
40                         hook(type => "rcs", id => "rcs_commit_staged", call => \&rcs_commit_staged);
41                         hook(type => "rcs", id => "rcs_add", call => \&rcs_add);
42                         hook(type => "rcs", id => "rcs_remove", call => \&rcs_remove);
43                         hook(type => "rcs", id => "rcs_rename", call => \&rcs_rename);
44                         hook(type => "rcs", id => "rcs_recentchanges", call => \&rcs_recentchanges);
45                         hook(type => "rcs", id => "rcs_diff", call => \&rcs_diff);
46                         hook(type => "rcs", id => "rcs_getctime", call => \&rcs_getctime);
47                         hook(type => "rcs", id => "rcs_getmtime", call => \&rcs_getmtime);
48                         hook(type => "rcs", id => "rcs_preprevert", call => \&rcs_preprevert);
49                         hook(type => "rcs", id => "rcs_revert", call => \&rcs_revert);
51 This last hook is "unsanctioned" from [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve its function
52 another way later.
54                         hook(type => "rcs", id => "rcs_wrapper_postcall", call => \&rcs_wrapper_postcall);
55                 }
57                 sub checkconfig () {
58                         if (exists $config{mercurial_wrapper} && length $config{mercurial_wrapper}) {
59                                 push @{$config{wrappers}}, {
60                                         wrapper => $config{mercurial_wrapper},
61                                         wrappermode => (defined $config{mercurial_wrappermode} ? $config{mercurial_wrappermode} : "06755"),
63 Next line part of [[Auto-setup and maintain Mercurial wrapper hooks]].
65                                         wrapper_postcall => (defined $config{mercurial_wrapper_hgrc_update} ? $config{mercurial_wrapper_hgrc_update} : "1"),
66                                 };
67                         }
68                 }
70                 sub getsetup () {
71                         return
72                                 plugin => {
73                                         safe => 0, # rcs plugin
74                                         rebuild => undef,
75                                         section => "rcs",
76                                 },
77                                 mercurial_wrapper => {
78                                         type => "string",
79                                         #example => # FIXME add example
80                                         description => "mercurial post-commit hook to generate",
81                                         safe => 0, # file
82                                         rebuild => 0,
83                                 },
84                                 mercurial_wrappermode => {
85                                         type => "string",
86                                         example => '06755',
87                                         description => "mode for mercurial_wrapper (can safely be made suid)",
88                                         safe => 0,
89                                         rebuild => 0,
90                                 },
91                                 mercurial_wrapper_hgrc_update => {
92                                         type => "string",
93                                         example => "1",
94                                         description => "updates existing hgrc to reflect path changes for mercurial_wrapper",
95                                         safe => 0,
96                                         rebuild => 0,
97                                 },
98                                 historyurl => {
99                                         type => "string",
100                                         example => "http://example.com:8000/log/tip/\[[file]]",
101                                         description => "url to hg serve'd repository, to show file history (\[[file]] substituted)",
102                                         safe => 1,
103                                         rebuild => 1,
104                                 },
105                                 diffurl => {
106                                         type => "string",
107                                         example => "http://localhost:8000/?fd=\[[r2]];file=\[[file]]",
108                                         description => "url to hg serve'd repository, to show diff (\[[file]] and \[[r2]] substituted)",
109                                         safe => 1,
110                                         rebuild => 1,
111                                 },
112                 }
114                 sub safe_hg (&@) {
115                         # Start a child process safely without resorting to /bin/sh.
116                         # Returns command output (in list content) or success state
117                         # (in scalar context), or runs the specified data handler.
119                         my ($error_handler, $data_handler, @cmdline) = @_;
121                         my $pid = open my $OUT, "-|";
123                         error("Cannot fork: $!") if !defined $pid;
125                         if (!$pid) {
126                                 # In child.
127                                 # hg commands want to be in wc.
129 This `$hg_dir` logic means nothing and could be stripped until srcdir/repdir distinction is made (it's stripped in upstream `mercurial.pm` right now).
131                                 if (! defined $hg_dir) {
132                                         chdir $config{srcdir}
133                                             or error("cannot chdir to $config{srcdir}: $!");
134                                 }
135                                 else {
136                                         chdir $hg_dir or error("cannot chdir to $hg_dir: $!");
137                                 }
139                                 exec @cmdline or error("Cannot exec '@cmdline': $!");
140                         }
141                         # In parent.
143                         my @lines;
144                         while (<$OUT>) {
145                                 chomp;
147                                 if (! defined $data_handler) {
148                                         push @lines, $_;
149                                 }
150                                 else {
151                                         last unless $data_handler->($_);
152                                 }
153                         }
155                         close $OUT;
157                         $error_handler->("'@cmdline' failed: $!") if $? && $error_handler;
159                         return wantarray ? @lines : ($? == 0);
160                 }
161                 # Convenient wrappers.
162                 sub run_or_die ($@) { safe_hg(\&error, undef, @_) }
163                 sub run_or_cry ($@) { safe_hg(sub { warn @_ }, undef, @_) }
164                 sub run_or_non ($@) { safe_hg(undef, undef, @_) }
167 To handle uncommited local changes ("ULC"s for short), I use logic similar to the (non-standard) "shelve" extension to Mercurial. By taking a diff before resetting to last commit, making changes and then applying diff again, one can do things Mercurial otherwise refuses, which is necessary later.
169 This function creates this diff.
171                 sub hg_local_dirstate_shelve ($) {
172                         # Creates a diff snapshot of uncommited changes existing the srcdir.
173                         # Takes a string (preferably revision) as input to create a unique and
174                         # identifiable diff name.
175                         my $tempdiffname = "diff_".shift;
176                         my $tempdiffpath;
177                         if (my @tempdiff = run_or_die('hg', 'diff', '-g')) {
178                                 $"="\n";
179                                 writefile($tempdiffname, $config{srcdir},
180                                                 "@tempdiff");
181                                 $"=" ";
182                                 $tempdiffpath = $config{srcdir}.'/'.$tempdiffname;
183                         }
184                         return $tempdiffpath;
185                 }
187 This function restores the diff.
189                 sub hg_local_dirstate_unshelve ($) {
190                         # Applies diff snapshot to revert back to initial dir state. If diff
191                         # revert succeeds, the diff is removed. Otherwise it stays to not
192                         # eradicate the local changes if they were important. This clutters the
193                         # directory though. Better ways to handle this are welcome. A true way
194                         # around this dance is to have a separate repository for local changes
195                         # and push ready commits to the srcdir instead.
196                         if (my $tempdiffpath = shift) {
197                                 if (run_or_cry('hg', 'import', '--no-commit', $tempdiffpath)) {
198                                         unlink($tempdiffpath);
199                                         return undef;
200                                 }
201                         }
202                 }
204 This makes online diffing possible. A similar approach as in `git.pm`, which is [http://source.ikiwiki.branchable.com/?p=source.git;a=blob;f=IkiWiki/Plugin/git.pm;h=cf7fbe9b7c43ee53180612d0411e6202074fb9e0;hb=refs/heads/master#l211](discussed to some length in a comment there), is taken.
206                 sub merge_past ($$$) {
207                         my ($sha1, $file, $message) = @_;
209                         # Undo stack for cleanup in case of an error
210                         my @undo;
211                         # File content with conflict markers
212                         my $conflict;  
213                         my $tempdiffpath; 
215                         eval {
216                                 # Hide local changes from Mercurial by renaming the modified
217                                 # file.  Relative paths must be converted to absolute for
218                                 # renaming.
219                                 my ($target, $hidden) = (
220                                         "$config{srcdir}/${file}",
221                                         "$config{srcdir}/${file}.${sha1}"
222                                 );
223                                 rename($target, $hidden)
224                                         or error("rename '$target' to '$hidden' failed: $!");
225                                 # Ensure to restore the renamed file on error.
226                                 push @undo, sub {
227                                         return if ! -e "$hidden"; # already renamed
228                                         rename($hidden, $target)
229                                             or warn "rename '$hidden' to '$target' failed: $!";
230                                 };
233 Take a snapshot of srcdir to be able to restore uncommited local changes ("ULCs") afterwards.
235 * This must happen _after_ the merging commit in Mercurial, there is no way around it. By design hg refuses to commit merges if there are other changes to tracked content present, no matter how much  you beg.
237 * ULCs to the file being edited are special: they can't be diffed here since `editpage.pm` already has overwritten the file. When the web edit session started though, the ULC version (not the commited
238 version) was read into the form, so in a way, the web user _has already merged_ with the ULC. It is not saved in commit history, but that is the exact consequence of "uncommited" changes. If an ULC is done between the time the web edit started and was submitted, then it is lost, though.  All in all, one shouldn't be editing the srcdir directly when web edits of the same file are allowed. Clone the repo and push changes instead.
240 Much of these issues disappear, I believe, if one works with a master repo which only is pushed to.
242                                 my $tempdiffpath = hg_local_dirstate_shelve($sha1);
244                                 # Ensure uniqueness of bookmarks.
245                                 my $bookmark_upstream_head = "current_head_$sha1";
246                                 my $bookmark_edit_base = "edit_base_$sha1";
248                                 # Git and Mercurial differ in the branch concept. Mercurial's
249                                 # "bookmarks" are closer in function in this regard.
251 Bookmarks aren't standard until Mercurial 1.8 ([2011--02--10](http://selenic.com/hg/rev/d4ab9486e514)), but they've been bundled with Mercurial since ~2008, so they can be enabled by writing a `hgrc`, which is also being worked on.
253                                 # Create a bookmark at current tip.
254                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
255                                                 $bookmark_upstream_head) };
256                                 run_or_die('hg', 'bookmark', $bookmark_upstream_head);
258                                 # Create a bookmark at the revision from which the edit was
259                                 # started and switch to it, discarding changes (they are stored
260                                 # in $tempdiff and the hidden file at the moment).
261                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
262                                                 $bookmark_edit_base) };
263                                 run_or_die('hg', 'bookmark', '-r', $sha1, $bookmark_edit_base);
264                                 run_or_die('hg', 'update', ,'-C', $bookmark_edit_base);
266                                 # Reveal the modified file.
267                                 rename($hidden, $target)
268                                     or error("rename '$hidden' to '$target' failed: $!");
270                                 # Commit at the bookmarked revision, creating a new head.
271                                 run_or_cry('hg', 'commit', '-m', $message);
273                                 # Attempt to merge the newly created head with upstream head.
274                                 # '--tool internal:merge' to avoid spawning a GUI merger.
276 (*Semi-TODO:* How do you make this command quiet? On failed merge, it
277 always writes to STDERR and clutters the web server log.)
279                                 if (!run_or_non('hg', 'merge', '--tool', 'internal:merge',
280                                                 $bookmark_upstream_head)) {
281                                         # ..., otherwise return file with conflict markers.
282                                         $conflict = readfile($target);
284                                         # The hardcore reset approach. Keep your hands inside
285                                         # the cart.
286                                         run_or_die('hg', 'rollback');
287                                         run_or_die('hg', 'update', '-C',
288                                                 $bookmark_upstream_head);
289                                         if ($tempdiffpath) {
290                                                 hg_local_dirstate_unshelve($tempdiffpath);
291                                         }
293 Other approaches tried here:
295 1. Clean up merge attempt,
297         run_or_die('hg', 'update', '-C', $bookmark_upstream_head);
299 2. Redo "merge", using only upstream head versions,
301         run_or_die('hg', 'merge', '--tool', 'internal:local', $bookmark_edit_base);
303 3. dummy commit to close head.
305         run_or_non('hg', 'commit', '-m', $message);
307 This creates a cluttered and erroneous history. We
308 tell Mercurial to merge, even though we in practice
309 discard. This creates problems when trying to revert
310 changes.
312 Other attempt:
314 1. Discard merge attempt and switch to temp head,
316         run_or_die('hg', 'update', '-C', $bookmark_edit_base);
318 2. close the temp head (why do they call the command that in practice closes heads "--close-branch"?),
320         run_or_non('hg', 'commit', '--close-branch', '-m', $message);
322 3. restore working directory to pre-fiddling status.
324         run_or_die('hg', 'update', $bookmark_upstream_head);
326 ...but this requires the same amount of forks as the
327 above method, and confuses other parts of ikiwiki
328 since the upstream head is now the third newest
329 revision. Maybe that particular problem is solvable
330 by setting a global default bookmark that follows the
331 main tip.  It will leave clutter in the revision
332 history, though. Two extra commits that in practice
333 don't hold relevant information will be recorded for
334 each failed merge attempt.
336 To only create one extra commit, one could imagine
337 adding `--close-branch` to the commit that initially
338 created the new head (since there is no problem
339 merging with closed heads), but it's not possible to
340 close and create a head at the same time, apparently.
342                                 }
343                         };
344                         my $failure = $@;
346                         # Process undo stack (in reverse order). By policy, cleanup actions
347                         # should normally print a warning on failure.
348                         while (my $handle = pop @undo) {
349                                 $handle->();
350                         }
352                         error("Mercurial merge failed!\n$failure\n") if $failure;
354                         return ($conflict, $tempdiffpath);
355                 }
357                 sub hg_commit_info ($;$;$) {
358                         # Return an array of commit info hashes of num commits starting from
359                         # the given sha1sum.
360                         #
361 This could be optimized by using a lookup cache similar to
362 `findtimes()`. By adding `KeyAttr => ['node']` to `XMLin()` options, one
363 could use the revision ID as key and do a single massive history
364 lookup and later just check if the given revision already exists as a
365 key.  Right now I'm at the "don't optimize it yet" stage, though.
367 This uses Mercurial's built-in `--style xml` and parses it with `XML::Simple`. Mercurial's log output is otherwise somewhat cumbersome to get good stuff out of, so this XML solution is quite good, I think. It adds module dependency, but XML::Simple seems fairly standard (but what do I know, I've used 1 Perl installation in my life).
369                         use XML::Simple;
370                         use Date::Parse;
372                         my ($sha1, $num, $file) = @_;
374                         my @opts;
375                         if (defined $sha1) {
376                                 if ($sha1 =~ m/^($sha1_pattern)$/) {
377                                         push @opts, ('-r'. $1.':0');
378                                 }
379                                 elsif ($sha1 =~ m/^($sha1_pattern):($sha1_pattern)$/) {
380                                         push @opts, ('-r', $1.':'.$2);
381                                 }
382                         }
383                         push @opts, ('--limit', $num) if defined $num;
384                         push @opts, ('--', $file) if defined $file;
386                         my %env=%ENV;
387                         $ENV{HGENCODING} = 'utf-8';
388                         my @xml = run_or_cry('hg', 'log', '-v', '--style', 'xml', @opts);
389                         %ENV=%env;
391                         # hg returns empty string if file is not in repository.
392                         return undef if !@xml;
394 Some places it is clear that I'm coding ad-hoc Perl. I don't know if this is a reasonably efficient way to give input to `XMLin`, but it works.
396                         # Want to preserve linebreaks in multiline comments.
397                         $"="\n";
398                         my $xmllog = XMLin("@xml",
399                                 ForceArray => ['logentry', 'parent', 'copy', 'path']);
400                         $"=" ";
402                         my @c_infos;
403                         foreach my $rev (@{$xmllog->{logentry}}) {
404                                 my %c_info;
405                                 # In Mercurial, "rev" is technically the strictly local
406                                 # revision number.  What ikiwiki wants is what is called
407                                 # "node": a globally defined SHA1 checksum.
408                                 $c_info{rev} = $rev->{node};
409                                 foreach my $parent (@{$rev->{parent}}) {
410                                         push @{$c_info{parents}}, {rev => $parent->{node}};
411                                 }
412                                 $c_info{user} = $rev->{author}{content};
413                                 # Mercurial itself parses out and stores an email address if
414                                 # present in author name. If not, hg sets email to author name.
415                                 if ( $rev->{author}{content} ne $rev->{author}{email} &&
416                                         $rev->{author}{email} =~ m/^([^\@]+)\@(.*)$/ ) {
417                                         if ($2 eq "web") {
418                                                 $c_info{nickname} = $1;
419                                                 $c_info{web_commit} = "1";
420                                         }
421                                 }
422                                 # Mercurial gives date in ISO 8601, well handled by str2time().
423                                 $c_info{when} = str2time($rev->{date});
424                                 # Mercurial doesn't allow empty commit messages, so there
425                                 # should always be a single defined message.
426                                 $c_info{message} = $rev->{msg}{content};
427                                 # Inside "paths" sits a single array "path" that contains
428                                 # multiple paths. Crystal clear :-)
429                                 foreach my $path (@{$rev->{paths}{path}}) {
430                                         push @{$c_info{files}}, {
431                                                 # Mercurial doesn't track file permissions as
432                                                 # Git do, so that's missing here.
433                                                 'file' => $path->{content},
434                                                 'status' => $path->{action},
435                                         };
436                                 }
437                                 # There also exists an XML branch "copies"->"copy", containing
438                                 # source and dest of files that have been copied with "hg cp".
439                                 # The copy action is also registered in "paths" as a removal of
440                                 # source and addition of dest, so it's not needed here.
441                                 push @c_infos, {%c_info};
442                                 use Data::Dumper;
443                         }
445                         return wantarray ? @c_infos : $c_infos[0];
446                 }
448                 sub hg_sha1 (;$) {
449                         # Return head sha1sum (of given file).
450                         my $file = shift || q{--};
452                         # Non-existing file doesn't give error, just empty string.
453                         my $f_info = hg_commit_info(undef, 1, $file);
454                         my $sha1;
455                         if ($f_info->{rev}) {
456                                 ($sha1) = $f_info->{rev} =~ m/($sha1_pattern)/;
457                         }
458                         else {
459                                 debug("Empty sha1sum for '$file'.");
460                         }
461                         return defined $sha1 ? $sha1 : q{};
462                 }
464                 sub rcs_update () {
465                         run_or_cry('hg', '-q', 'update');
466                 }
468                 sub rcs_prepedit ($) {
469                         # Return the commit sha1sum of the file when editing begins.
470                         # This will be later used in rcs_commit if a merge is required.
471                         my ($file) = @_;
473                         return hg_sha1($file);
474                 }
476                 sub rcs_commit (@) {
477                         # Try to commit the page; returns undef on _success_ and
478                         # a version of the page with the rcs's conflict markers on
479                         # failure.
480                         my %params=@_;
482                         # Check to see if the page has been changed by someone else since
483                         # rcs_prepedit was called.
484                         my $cur    = hg_sha1($params{file});
485                         my ($prev) = $params{token} =~ /^($sha1_pattern)$/; # untaint
487                         if (defined $cur && defined $prev && $cur ne $prev) {
489 If there was a conflict, the file with conflict markers is returned. Else, the path to the tempdiff, which is to be run to restore previous local state after `rcs_commit_staged`, is returned.
491                                 my ($conflict, $tempdiffpath) =
492                                         merge_past($prev, $params{file}, $dummy_commit_msg);
493                                 return defined $conflict
494                                         ? $conflict
495                                         : rcs_commit_helper(
496                                                 @_,
497                                                 merge => 1,
498                                                 tempdiffpath => $tempdiffpath);
499                         }
501                         return rcs_commit_helper(@_);
502                 }
504                 sub rcs_commit_helper (@) {
505                         my %params=@_;
507                         my %env=%ENV;
508                         $ENV{HGENCODING} = 'utf-8';
510                         my $user="Anonymous";
511                         my $nickname;
512                         if (defined $params{session}) {
513                                 if (defined $params{session}->param("name")) {
514                                         $user = $params{session}->param("name");
515                                 }
516                                 elsif (defined $params{session}->remote_addr()) {
517                                         $user = $params{session}->remote_addr();
518                                 }
520                                 if (defined $params{session}->param("nickname")) {
521                                         $nickname=encode_utf8($params{session}->param("nickname"));
522                                         $nickname=~s/\s+/_/g;
523                                         $nickname=~s/[^-_0-9[:alnum:]]+//g;
524                                 }
525                                 $ENV{HGUSER} = encode_utf8($user . ' <' . $nickname . '@web>');
526                         }
528                         if (! length $params{message}) {
529                                 $params{message} = "no message given";
530                         }
532                         $params{message} = IkiWiki::possibly_foolish_untaint($params{message});
534                         my @opts;
536 Mercurial rejects file arguments when performing a merging commit. It
537 only does "all or nothing" commits by design when merging, so given file arguments must be discarded. It should not pose a problem.
539                         if (exists $params{file} && ! defined $params{merge}) {
540                                 push @opts, '--', $params{file};
541                         }
543                         # hg commit returns non-zero if nothing really changed.
544                         # So we should ignore its exit status (hence run_or_non).
545                         run_or_non('hg', 'commit', '-m', $params{message}, '-q', @opts);
547 If there were uncommited local changes in srcdir before a merge was done, they are restored here.
549                         if (defined $params{tempdiffpath}) {
550                                 hg_local_dirstate_unshelve($params{tempdiffpath});
551                         }
553                         %ENV=%env;
554                         return undef; # success
555                 }
557                 sub rcs_commit_staged (@) {
558                         # Commits all staged changes. Changes can be staged using rcs_add,
559                         # rcs_remove, and rcs_rename.
560                         return rcs_commit_helper(@_);
561                 }
563                 sub rcs_add ($) {
564                         my ($file) = @_;
566                         run_or_cry('hg', 'add', $file);
567                 }
569                 sub rcs_remove ($) {
570                         # Remove file from archive.
571                         my ($file) = @_;
573                         run_or_cry('hg', 'remove', '-f', $file);
574                 }
576                 sub rcs_rename ($$) {
577                         my ($src, $dest) = @_;
579                         run_or_cry('hg', 'rename', '-f', $src, $dest);
580                 }
582                 sub rcs_recentchanges ($) {
583                         my ($num) = @_;
585                         my @c_infos;
587                         foreach my $c_info (hg_commit_info(undef, $num, undef)) {
588                                 my @pagenames;
589                                 for my $page (@{$c_info->{files}}) {
590                                         my $diffurl=defined $config{diffurl} ?
591                                                 $config{diffurl} : '';
592                                         # These substitutions enable defining keywords \[[file]]
593                                         # and \[[r2]] (backward compatibility) in the setup file
594                                         # that will be exchanged with filename and revision
595                                         # respectively.
596                                         $diffurl =~ s/\[\[file\]\]/$page->{file}/go;
597                                         $diffurl =~ s/\[\[r2\]\]/$c_info->{rev}/go;
598                                         push @pagenames, {
599                                                 # pagename() strips suffixes and returns the
600                                                 # path to the file as it is to be represented
601                                                 # in the build dir.
602                                                 page => pagename($page->{file}),
603                                                 diffurl => $diffurl,
604                                         };
605                                 }
607                                 # It is expected of ikiwiki to get each comment line as a
608                                 # separate entry.
609                                 my @messagelines;
610                                 open my $message, '<', \$c_info->{message};
611                                 while (<$message>) { push @messagelines, { line => $_ } };
613                                 push @c_infos, {
614                                         rev        => $c_info->{rev},
615                                         user       => $c_info->{user},
616                                         nickname   => defined $c_info->{nickname} ?
617                                                         $c_info->{nickname} : $c_info->{user},
618                                         committype => $c_info->{web_commit} ? "web" : "hg",
619                                         when       => $c_info->{when},
620                                         message    => [@messagelines],
621                                         pages      => [@pagenames],
622                                 } if @pagenames;
623                         }
625                         return @c_infos;
626                 }
628                 sub rcs_diff ($;$) {
629                         my $rev=shift;
630                         my $maxlines=shift;
631                         my @lines;
632                         my $addlines=sub {
633                                 my $line=shift;
634                                 return if defined $maxlines && @lines == $maxlines;
635                                 push @lines, $line."\n"
636                                         if (@lines || $line=~/^diff --git/);
637                                 return 1;
638                         };
639                         safe_hg(undef, $addlines, "hg", "diff", "-c", $rev, "-g");
640                         if (wantarray) {
641                                 return @lines;
642                         }
643                         else {
644                                 return join("", @lines);
645                         }
646                 }
648                 {
649                 my %time_cache;
651 This is an upstream change I did a week ago or so. Perhaps it can be merged in some clever way with the updated `hg_commit_info` to make one shared lookup cache. Don't know how much would be gained.
653                 sub findtimes ($$) {
654                         my $file=shift;
655                         my $id=shift; # 0 = mtime ; 1 = ctime
657                         if (! keys %time_cache) {
658                                 my $date;
660                                 # It doesn't seem possible to specify the format wanted for the
661                                 # changelog (same format as is generated in git.pm:findtimes(),
662                                 # though the date differs slightly) without using a style
663                                 # _file_. There is a "hg log" switch "--template" to directly
664                                 # control simple output formatting, but in this case, the
665                                 # {file} directive must be redefined, which can only be done
666                                 # with "--style".
667                                 #
668                                 # If {file} is not redefined, all files are output on a single
669                                 # line separated with a space. It is not possible to conclude
670                                 # if the space is part of a filename or just a separator, and
671                                 # thus impossible to use in this case.
672                                 # 
673                                 # Some output filters are available in hg, but they are not fit
674                                 # for this cause (and would slow down the process
675                                 # unnecessarily).
676                                 
677                                 eval q{use File::Temp};
678                                 error $@ if $@;
679                                 my ($tmpl_fh, $tmpl_filename) = File::Temp::tempfile(UNLINK => 1);
680                                 
681                                 print $tmpl_fh 'changeset = "{date}\\n{files}\\n"' . "\n";
682                                 print $tmpl_fh 'file = "{file}\\n"' . "\n";
683                                 
684                                 foreach my $line (run_or_die('hg', 'log', '--style', $tmpl_filename)) {
685                                         # {date} gives output on the form
686                                         # 1310694511.0-7200
687                                         # where the first number is UTC Unix timestamp with one
688                                         # decimal (decimal always 0, at least on my system)
689                                         # followed by local timezone offset from UTC in
690                                         # seconds.
691                                         if (! defined $date && $line =~ /^\d+\.\d[+-]\d*$/) {
692                                                 $line =~ s/^(\d+).*/$1/;
693                                                 $date=$line;
694                                         }
695                                         elsif (! length $line) {
696                                                 $date=undef;
697                                         }
698                                         else {
699                                                 my $f=$line;
701                                                 if (! $time_cache{$f}) {
702                                                         $time_cache{$f}[0]=$date; # mtime
703                                                 }
704                                                 $time_cache{$f}[1]=$date; # ctime
705                                         }
706                                 }
707                         }
709                         return exists $time_cache{$file} ? $time_cache{$file}[$id] : 0;
710                 }
712                 }
714                 sub rcs_getctime ($) {
715                         my $file = shift;
717                         return findtimes($file, 1);
718                 }
720                 sub rcs_getmtime ($) {
721                         my $file = shift;
723                         return findtimes($file, 0);
724                 }
726 The comment just below the function declaration below is taken from `git.pm`. Is it true? Should ikiwiki support sharing its repo with other things? Mercurial-wise that sounds like a world of pain.
728                 {
729                 my $ret;
730                 sub hg_find_root {
731                         # The wiki may not be the only thing in the git repo.
732                         # Determine if it is in a subdirectory by examining the srcdir,
733                         # and its parents, looking for the .git directory.
735                         return @$ret if defined $ret;
737                         my $subdir="";
738                         my $dir=$config{srcdir};
739                         while (! -d "$dir/.hg") {
740                                 $subdir=IkiWiki::basename($dir)."/".$subdir;
741                                 $dir=IkiWiki::dirname($dir);
742                                 if (! length $dir) {
743                                         error("cannot determine root of hg repo");
744                                 }
745                         }
747                         $ret=[$subdir, $dir];
748                         return @$ret;
749                 }
751                 }
753                 sub hg_parse_changes (@) {
754                         # Only takes a single info hash as argument in rcs_preprevert, but
755                         # should be able to take several in rcs_receive.
756                         my @c_infos_raw = shift;
758                         my ($subdir, $rootdir) = hg_find_root();
759                         my @c_infos_ret;
761                         foreach my $c_info_raw (@c_infos_raw) {
762                                 foreach my $path (@{$c_info_raw->{files}}) {
763                                         my ($file, $action, $temppath);
765                                         $file=$path->{file};
767                                         # check that all changed files are in the subdir
768                                         if (length $subdir && ! ($file =~ s/^$subdir//)) {
769                                                 error sprintf(gettext("you are not allowed to change %s"), $file);
770                                         }
772                                         if    ($path->{status} eq "M") { $action="change" }
773                                         elsif ($path->{status} eq "A") { $action="add" }
774                                         elsif ($path->{status} eq "R") { $action="remove" }
775                                         else  { error "unknown status ".$path->{status} }
777 I haven't tested the attachment code below. Is it run when there is an non-trusted file upload?
779                                         # extract attachment to temp file
780                                         if (($action eq 'add' || $action eq 'change') &&
781                                                 ! pagetype($file)) {
783                                                 eval q{use File::Temp};
784                                                 die $@ if $@;
786                                                 my $fh;
787                                                 ($fh, $temppath)=File::Temp::tempfile(undef, UNLINK => 1);
788                                                 my $cmd = "cd $hg_dir && ".
789                                                         "hg diff -g -c $c_info_raw->{rev} > '$temppath'";
790                                                 if (system($cmd) != 0) {
791                                                         error("failed writing temp file '$temppath'.");
792                                                 }
793                                         }
795                                         push @c_infos_ret, {
796                                                 file => $file,
797                                                 action => $action,
798                                                 path => $temppath,
799                                         };
800                                 }
801                         }
803                         return @c_infos_ret;
804                 }
806 *TODO:* I don't know what's happening here. I've changed the code to adhere to this file's variables and functions, but it refers to a srcdir _and_ a default repo, which currently isn't available in the Mercurial setup.
808 `rcs_receive` is optional and only runs when running a pre-receive hook. Where `$_` comes from and its format are mysteries to me.
810 Also, a comment in `git.pm` mentions that we don't want to chdir to a subdir "and only see changes in it" - but this isn't true for either Git or Mercurial to my knowledge. It only seems to happen in `git.pm` since the `git log` command in `git_commit_info` ends with "`-- .`" - if it didn't do that, one wouldn't have to chdir for this reason, I believe.
812 In this case we need to stay in default repo instead of srcdir though, so `hg_dir="."` _is_ needed, but not for the abovementioned reason :-) (maybe there's more to it, though).
814                 sub rcs_receive () {
815                         my @c_infos_ret;
816                         while (<>) {
817                                 chomp;
818                                 my ($oldrev, $newrev, $refname) = split(' ', $_, 3);
820                                 # only allow changes to hg_default_branch
822 *TODO:* What happens here? Some Git voodoo. _If_ `$_` has the exact same format for Mercurial, then the below should work just as well here, I think.
824                                 if ($refname !~ m|^refs/heads/$config{hg_default_branch}$|) {
825                                         error sprintf(gettext("you are not allowed to change %s"), $refname);
826                                 }
828 Comment from `git.pm`:
830                                 # Avoid chdir when running git here, because the changes are in
831                                 # the default git repo, not the srcdir repo.  (Also, if a subdir
832                                 # is involved, we don't want to chdir to it and only see
833                                 # changes in it.) The pre-receive hook already puts us in the
834                                 # right place.
835                                 $hg_dir=".";
836                                 push @c_infos_ret,
837                                         hg_parse_changes(hg_commit_info($newrev.":".$oldrev,
838                                                         undef, undef));
839                                 $hg_dir=undef;
840                         }
842                         return @c_infos_ret;
843                 }
845                 sub rcs_preprevert ($) {
846                         my $rev=shift;
847                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
849 The below 4 lines of code are from `git.pm`, but I can't see what they actually do there. Neither Git nor Mercurial only lists changes in working directory when given a command - they always traverse to repository root by themselves. I keep it here for comments, in case I'm missing something.
851 *UPDATE:* See earlier note about `git log` ending in "`-- .`".
853                         ## Examine changes from root of git repo, not from any subdir,
854                         ## in order to see all changes.
855                         #my ($subdir, $rootdir) = git_find_root();
856                         #$git_dir=$rootdir;
858                         my $c_info=hg_commit_info($sha1, 1, undef) or error "unknown commit";
860                         # hg revert will fail on merge commits. Add a nice message.
861                         if (exists $c_info->{parents} && $c_info->{parents} > 1) {
862                                 error gettext("you are not allowed to revert a merge");
863                         }
865                         my @c_info_ret=hg_parse_changes($c_info);
867                         ### Probably not needed, if earlier comment is correct.
868                         #$hg_dir=undef;
869                         return @c_info_ret;
870                 }
872                 sub rcs_revert ($) {
873                         # Try to revert the given rev; returns undef on _success_.
874                         my $rev = shift;
875                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
877                         # Save uncommited local changes to diff file. Attempt to restore later.
878                         my $tempdiffpath = hg_local_dirstate_shelve($sha1);
880                         # Clean dir to latest commit.
881                         run_or_die('hg', 'update', '-C');
883 Some voodoo is needed here. `hg backout --tool internal:local -r $sha1` is *almost* good, but if the reversion is done to the directly previous revision, hg automatically commits, which is bad in this case. Instead I generate a reverse diff and pipe it to `import --no-commit`.
885                         if (run_or_non("hg diff -c $sha1 --reverse | hg import --no-commit -")) {
886                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
887                                 return undef;
888                         }
889                         else {
890                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
891                                 return sprintf(gettext("Failed to revert commit %s"), $sha1);
892                         }
893                 }
895 Below follows code regarding [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve it in another place later, but the code in itself is working.
897 Should perhaps add initiation of the bookmark extension here, to support older Mercurial versions.
899                 sub rcs_wrapper_postcall($) {
900                         # Update hgrc if it exists. Change post-commit/incoming hooks with the
901                         # .ikiwiki suffix to point to the wrapper path given in the setup file.
902                         # Work with a tempfile to not delete hgrc if the loop is interrupted
903                         # midway.
904                         # I believe there is a better way to solve this than creating new hooks
905                         # and callbacks. Will await discussion on ikiwiki.info.
906                         my $hgrc=$config{srcdir}.'/.hg/hgrc';
907                         my $backup_suffix='.ikiwiki.bak';
908                         if (-e $hgrc) {
909                                 use File::Spec;
910                                 my $mercurial_wrapper_abspath=File::Spec->rel2abs($config{mercurial_wrapper}, $config{srcdir});
911                                 local ($^I, @ARGV)=($backup_suffix, $hgrc);
912                                 while (<>) {
913                                         s/^(post-commit|incoming)(\.ikiwiki[ \t]*=[ \t]*).*$/$1$2$mercurial_wrapper_abspath/;
914                                         print;
915                                 }
916                                 unlink($hgrc.$backup_suffix);
917                         }
918                 }
920                 1