]> git.vanrenterghem.biz Git - git.ikiwiki.info.git/blob - IkiWiki/Plugin/meta.pm
pubdate not valid for html5
[git.ikiwiki.info.git] / IkiWiki / Plugin / meta.pm
1 #!/usr/bin/perl
2 # Ikiwiki metadata plugin.
3 package IkiWiki::Plugin::meta;
5 use warnings;
6 use strict;
7 use IkiWiki 3.00;
9 my %metaheaders;
11 sub import {
12         hook(type => "getsetup", id => "meta", call => \&getsetup);
13         hook(type => "needsbuild", id => "meta", call => \&needsbuild);
14         hook(type => "preprocess", id => "meta", call => \&preprocess, scan => 1);
15         hook(type => "pagetemplate", id => "meta", call => \&pagetemplate);
16 }
18 sub getsetup () {
19         return
20                 plugin => {
21                         safe => 1,
22                         rebuild => undef,
23                         section => "core",
24                 },
25 }
27 sub needsbuild (@) {
28         my $needsbuild=shift;
29         foreach my $page (keys %pagestate) {
30                 if (exists $pagestate{$page}{meta}) {
31                         if (exists $pagesources{$page} &&
32                             grep { $_ eq $pagesources{$page} } @$needsbuild) {
33                                 # remove state, it will be re-added
34                                 # if the preprocessor directive is still
35                                 # there during the rebuild
36                                 delete $pagestate{$page}{meta};
37                         }
38                 }
39         }
40         return $needsbuild;
41 }
43 sub scrub ($$$) {
44         if (IkiWiki::Plugin::htmlscrubber->can("sanitize")) {
45                 return IkiWiki::Plugin::htmlscrubber::sanitize(
46                         content => shift, page => shift, destpage => shift);
47         }
48         else {
49                 return shift;
50         }
51 }
53 sub safeurl ($) {
54         my $url=shift;
55         if (exists $IkiWiki::Plugin::htmlscrubber::{safe_url_regexp} &&
56             defined $IkiWiki::Plugin::htmlscrubber::safe_url_regexp) {
57                 return $url=~/$IkiWiki::Plugin::htmlscrubber::safe_url_regexp/;
58         }
59         else {
60                 return 1;
61         }
62 }
64 sub htmlize ($$$) {
65         my $page = shift;
66         my $destpage = shift;
68         return IkiWiki::htmlize($page, $destpage, pagetype($pagesources{$page}),
69                 IkiWiki::linkify($page, $destpage,
70                 IkiWiki::preprocess($page, $destpage, shift)));
71 }
73 sub preprocess (@) {
74         return "" unless @_;
75         my %params=@_;
76         my $key=shift;
77         my $value=$params{$key};
78         delete $params{$key};
79         my $page=$params{page};
80         delete $params{page};
81         my $destpage=$params{destpage};
82         delete $params{destpage};
83         delete $params{preview};
85         eval q{use HTML::Entities};
86         # Always decode, even if encoding later, since it might not be
87         # fully encoded.
88         $value=decode_entities($value);
90         # Metadata collection that needs to happen during the scan pass.
91         if ($key eq 'title') {
92                 $pagestate{$page}{meta}{title}=$value;
93                 if (exists $params{sortas}) {
94                         $pagestate{$page}{meta}{titlesort}=$params{sortas};
95                 }
96                 else {
97                         delete $pagestate{$page}{meta}{titlesort};
98                 }
99                 return "";
100         }
101         elsif ($key eq 'description') {
102                 $pagestate{$page}{meta}{description}=$value;
103                 # fallthrough
104         }
105         elsif ($key eq 'guid') {
106                 $pagestate{$page}{meta}{guid}=$value;
107                 # fallthrough
108         }
109         elsif ($key eq 'license') {
110                 push @{$metaheaders{$page}}, '<link rel="license" href="#pagelicense" />';
111                 $pagestate{$page}{meta}{license}=$value;
112                 return "";
113         }
114         elsif ($key eq 'copyright') {
115                 push @{$metaheaders{$page}}, '<link rel="copyright" href="#pagecopyright" />';
116                 $pagestate{$page}{meta}{copyright}=$value;
117                 return "";
118         }
119         elsif ($key eq 'link' && ! %params) {
120                 # hidden WikiLink
121                 add_link($page, $value);
122                 return "";
123         }
124         elsif ($key eq 'enclosure') {
125                 my $link=bestlink($page, $value);
126                 if (! length $link) {
127                         error gettext("enclosure not found")
128                 }
129                 add_depends($page, $link, deptype("presence"));
131                 $value=urlto($link, $page, 1);
132                 $pagestate{$page}{meta}{enclosure}=$value;
133                 $pagestate{$page}{meta}{enclosurefile}=$link;
134                 # fallthrough
135         }
136         elsif ($key eq 'author') {
137                 $pagestate{$page}{meta}{author}=$value;
138                 if (exists $params{sortas}) {
139                         $pagestate{$page}{meta}{authorsort}=$params{sortas};
140                 }
141                 else {
142                         delete $pagestate{$page}{meta}{authorsort};
143                 }
144                 # fallthorough
145         }
146         elsif ($key eq 'authorurl') {
147                 $pagestate{$page}{meta}{authorurl}=$value if safeurl($value);
148                 # fallthrough
149         }
150         elsif ($key eq 'permalink') {
151                 $pagestate{$page}{meta}{permalink}=$value if safeurl($value);
152                 # fallthrough
153         }
154         elsif ($key eq 'date') {
155                 eval q{use Date::Parse};
156                 if (! $@) {
157                         my $time = str2time($value);
158                         if (defined $time) {
159                                 $IkiWiki::pagectime{$page}=$time;
160                         }
161                         else {
162                                 error(sprintf(gettext('cannot parse date/time: %s'), $value));
163                         }
164                 }
165                 else {
166                         error $@;
167                 }
168         }
169         elsif ($key eq 'updated') {
170                 eval q{use Date::Parse};
171                 if (! $@) {
172                         my $time = str2time($value);
173                         if (defined $time) {
174                                 $pagestate{$page}{meta}{updated}=$time;
175                         }
176                         else {
177                                 error(sprintf(gettext('cannot parse date/time: %s'), $value));
178                         }
179                 }
180                 else {
181                         error $@;
182                 }
183         }
185         if (! defined wantarray) {
186                 # avoid collecting duplicate data during scan pass
187                 return;
188         }
190         # Metadata handling that happens only during preprocessing pass.
191         if ($key eq 'permalink') {
192                 if (safeurl($value)) {
193                         push @{$metaheaders{$page}}, scrub('<link rel="bookmark" href="'.encode_entities($value).'" />', $page, $destpage);
194                 }
195         }
196         elsif ($key eq 'stylesheet') {
197                 my $rel=exists $params{rel} ? $params{rel} : "alternate stylesheet";
198                 my $title=exists $params{title} ? $params{title} : $value;
199                 # adding .css to the value prevents using any old web
200                 # editable page as a stylesheet
201                 my $stylesheet=bestlink($page, $value.".css");
202                 if (! length $stylesheet) {
203                         error gettext("stylesheet not found")
204                 }
205                 push @{$metaheaders{$page}}, scrub('<link href="'.urlto($stylesheet, $page).
206                         '" rel="'.encode_entities($rel).
207                         '" title="'.encode_entities($title).
208                         "\" type=\"text/css\" />", $page, $destpage);
209         }
210         elsif ($key eq 'script') {
211                 my $defer=exists $params{defer} ? ' defer="defer"' : '';
212                 my $async=exists $params{async} ? ' async="async"' : '';
213                 my $js=bestlink($page, $value.".js");
214                 if (! length $js) {
215                         error gettext("script not found");
216                 }
217                 push @{$metaheaders{$page}}, scrub('<script src="'.urlto($js, $page).
218                         '"' . $defer . $async . ' type="text/javascript"></script>',
219                         $page, $destpage);
220         }
221         elsif ($key eq 'openid') {
222                 my $delegate=0; # both by default
223                 if (exists $params{delegate}) {
224                         $delegate = 1 if lc $params{delegate} eq 'openid';
225                         $delegate = 2 if lc $params{delegate} eq 'openid2';
226                 }
227                 if (exists $params{server} && safeurl($params{server})) {
228                         push @{$metaheaders{$page}}, '<link href="'.encode_entities($params{server}).
229                                 '" rel="openid.server" />' if $delegate ne 2;
230                         push @{$metaheaders{$page}}, '<link href="'.encode_entities($params{server}).
231                                 '" rel="openid2.provider" />' if $delegate ne 1;
232                 }
233                 if (safeurl($value)) {
234                         push @{$metaheaders{$page}}, '<link href="'.encode_entities($value).
235                                 '" rel="openid.delegate" />' if $delegate ne 2;
236                         push @{$metaheaders{$page}}, '<link href="'.encode_entities($value).
237                                 '" rel="openid2.local_id" />' if $delegate ne 1;
238                 }
239                 if (exists $params{"xrds-location"} && safeurl($params{"xrds-location"})) {
240                         # force url absolute
241                         eval q{use URI};
242                         error($@) if $@;
243                         my $url=URI->new_abs($params{"xrds-location"}, $config{url});
244                         push @{$metaheaders{$page}}, '<meta http-equiv="X-XRDS-Location" '.
245                                 'content="'.encode_entities($url).'" />';
246                 }
247         }
248         elsif ($key eq 'foaf') {
249                 if (safeurl($value)) {
250                         push @{$metaheaders{$page}}, '<link rel="meta" '.
251                                 'type="application/rdf+xml" title="FOAF" '.
252                                 'href="'.encode_entities($value).'" />';
253                 }
254         }
255         elsif ($key eq 'redir') {
256                 return "" if $page ne $destpage;
257                 my $safe=0;
258                 if ($value !~ /^\w+:\/\//) {
259                         my ($redir_page, $redir_anchor) = split /\#/, $value;
261                         my $link=bestlink($page, $redir_page);
262                         if (! length $link) {
263                                 error gettext("redir page not found")
264                         }
265                         add_depends($page, $link, deptype("presence"));
267                         $value=urlto($link, $page);
268                         $value.='#'.$redir_anchor if defined $redir_anchor;
269                         $safe=1;
271                         # redir cycle detection
272                         $pagestate{$page}{meta}{redir}=$link;
273                         my $at=$page;
274                         my %seen;
275                         while (exists $pagestate{$at}{meta}{redir}) {
276                                 if ($seen{$at}) {
277                                         error gettext("redir cycle is not allowed")
278                                 }
279                                 $seen{$at}=1;
280                                 $at=$pagestate{$at}{meta}{redir};
281                         }
282                 }
283                 else {
284                         $value=encode_entities($value);
285                 }
286                 my $delay=int(exists $params{delay} ? $params{delay} : 0);
287                 my $redir="<meta http-equiv=\"refresh\" content=\"$delay; URL=$value\" />";
288                 if (! $safe) {
289                         $redir=scrub($redir, $page, $destpage);
290                 }
291                 push @{$metaheaders{$page}}, $redir;
292         }
293         elsif ($key eq 'link') {
294                 if (%params) {
295                         push @{$metaheaders{$page}}, scrub("<link href=\"".encode_entities($value)."\" ".
296                                 join(" ", map {
297                                         encode_entities($_)."=\"".encode_entities(decode_entities($params{$_}))."\""
298                                 } keys %params).
299                                 " />\n", $page, $destpage);
300                 }
301         }
302         elsif ($key eq 'robots') {
303                 push @{$metaheaders{$page}}, '<meta name="robots"'.
304                         ' content="'.encode_entities($value).'" />';
305         }
306         elsif ($key eq 'description' || $key eq 'author') {
307                 push @{$metaheaders{$page}}, '<meta name="'.$key.
308                         '" content="'.encode_entities($value).'" />';
309         }
310         elsif ($key eq 'name') {
311                 push @{$metaheaders{$page}}, scrub('<meta name="'.
312                         encode_entities($value).
313                         '" '.
314                         join(' ', map { "$_=\"$params{$_}\"" } keys %params).
315                         ' />', $page, $destpage);
316         }
317         elsif ($key eq 'keywords') {
318                 # Make sure the keyword string is safe: only allow alphanumeric
319                 # characters, space and comma and strip the rest.
320                 $value =~ s/[^[:alnum:], ]+//g;
321                 push @{$metaheaders{$page}}, '<meta name="keywords"'.
322                         ' content="'.encode_entities($value).'" />';
323         }
324         else {
325                 push @{$metaheaders{$page}}, scrub('<meta name="'.
326                         encode_entities($key).'" content="'.
327                         encode_entities($value).'" />', $page, $destpage);
328         }
330         return "";
333 sub pagetemplate (@) {
334         my %params=@_;
335         my $page=$params{page};
336         my $destpage=$params{destpage};
337         my $template=$params{template};
339         if (exists $metaheaders{$page} && $template->query(name => "meta")) {
340                 # avoid duplicate meta lines
341                 my %seen;
342                 $template->param(meta => join("\n", grep { (! $seen{$_}) && ($seen{$_}=1) } @{$metaheaders{$page}}));
343         }
344         if (exists $pagestate{$page}{meta}{title} && $template->query(name => "title")) {
345                 eval q{use HTML::Entities};
346                 $template->param(title => HTML::Entities::encode_numeric($pagestate{$page}{meta}{title}));
347                 $template->param(title_overridden => 1);
348         }
350         if (exists $pagestate{$page}{meta}{enclosure}) {
351                 $template->param(enclosure => HTML::Entities::encode_entities(IkiWiki::urlabs($pagestate{$page}{meta}{enclosure}, $config{url})));
352         }
354         foreach my $field (qw{authorurl}) {
355                 eval q{use HTML::Entities};
356                 $template->param($field => HTML::Entities::encode_entities($pagestate{$page}{meta}{$field}))
357                         if exists $pagestate{$page}{meta}{$field} && $template->query(name => $field);
358         }
360         foreach my $field (qw{permalink}) {
361                 if (exists $pagestate{$page}{meta}{$field} && $template->query(name => $field)) {
362                         eval q{use HTML::Entities};
363                         $template->param($field => HTML::Entities::encode_entities(IkiWiki::urlabs($pagestate{$page}{meta}{$field}, $config{url})));
364                 }
365         }
367         foreach my $field (qw{description author}) {
368                 eval q{use HTML::Entities};
369                 $template->param($field => HTML::Entities::encode_numeric($pagestate{$page}{meta}{$field}))
370                         if exists $pagestate{$page}{meta}{$field} && $template->query(name => $field);
371         }
373         foreach my $field (qw{license copyright}) {
374                 if (exists $pagestate{$page}{meta}{$field} && $template->query(name => $field) &&
375                     ($page eq $destpage || ! exists $pagestate{$destpage}{meta}{$field} ||
376                      $pagestate{$page}{meta}{$field} ne $pagestate{$destpage}{meta}{$field})) {
377                         $template->param($field => htmlize($page, $destpage, $pagestate{$page}{meta}{$field}));
378                 }
379         }
382 sub get_sort_key {
383         my $page = shift;
384         my $meta = shift;
386         # e.g. titlesort (also makes sense for author)
387         my $key = $pagestate{$page}{meta}{$meta . "sort"};
388         return $key if defined $key;
390         # e.g. title
391         $key = $pagestate{$page}{meta}{$meta};
392         return $key if defined $key;
394         # fall back to closer-to-core things
395         if ($meta eq 'title') {
396                 return pagetitle(IkiWiki::basename($page));
397         }
398         elsif ($meta eq 'date') {
399                 return $IkiWiki::pagectime{$page};
400         }
401         elsif ($meta eq 'updated') {
402                 return $IkiWiki::pagemtime{$page};
403         }
404         else {
405                 return '';
406         }
409 sub match {
410         my $field=shift;
411         my $page=shift;
412         
413         # turn glob into a safe regexp
414         my $re=IkiWiki::glob2re(shift);
416         my $val;
417         if (exists $pagestate{$page}{meta}{$field}) {
418                 $val=$pagestate{$page}{meta}{$field};
419         }
420         elsif ($field eq 'title') {
421                 $val = pagetitle($page);
422         }
424         if (defined $val) {
425                 if ($val=~$re) {
426                         return IkiWiki::SuccessReason->new("$re matches $field of $page", $page => $IkiWiki::DEPEND_CONTENT, "" => 1);
427                 }
428                 else {
429                         return IkiWiki::FailReason->new("$re does not match $field of $page", $page => $IkiWiki::DEPEND_CONTENT, "" => 1);
430                 }
431         }
432         else {
433                 return IkiWiki::FailReason->new("$page does not have a $field", $page => $IkiWiki::DEPEND_CONTENT);
434         }
437 package IkiWiki::PageSpec;
439 sub match_title ($$;@) {
440         IkiWiki::Plugin::meta::match("title", @_);
443 sub match_author ($$;@) {
444         IkiWiki::Plugin::meta::match("author", @_);
447 sub match_authorurl ($$;@) {
448         IkiWiki::Plugin::meta::match("authorurl", @_);
451 sub match_license ($$;@) {
452         IkiWiki::Plugin::meta::match("license", @_);
455 sub match_copyright ($$;@) {
456         IkiWiki::Plugin::meta::match("copyright", @_);
459 sub match_guid ($$;@) {
460         IkiWiki::Plugin::meta::match("guid", @_);
463 package IkiWiki::SortSpec;
465 sub cmp_meta {
466         my $meta = shift;
467         error(gettext("sort=meta requires a parameter")) unless defined $meta;
469         if ($meta eq 'updated' || $meta eq 'date') {
470                 return IkiWiki::Plugin::meta::get_sort_key($a, $meta)
471                         <=>
472                         IkiWiki::Plugin::meta::get_sort_key($b, $meta);
473         }
475         return IkiWiki::Plugin::meta::get_sort_key($a, $meta)
476                 cmp
477                 IkiWiki::Plugin::meta::get_sort_key($b, $meta);
480 # A prototype of how sort=title could behave in 4.0 or something
481 sub cmp_meta_title {
482         $_[0] = 'title';
483         return cmp_meta(@_);