From: Julien Date: Wed, 7 Aug 2013 14:50:13 +0000 (+0200) Subject: xapian_omega_same_lang_when_indexing_and_searching X-Git-Tag: 3.20130904~58 X-Git-Url: http://git.vanrenterghem.biz/git.ikiwiki.info.git/commitdiff_plain/14a5bc26c085191cbe186fab6bbeb3286e9297f5 xapian_omega_same_lang_when_indexing_and_searching --- diff --git a/doc/todo/xapian_omega_same_lang_when_indexing_and_searching.mdwn b/doc/todo/xapian_omega_same_lang_when_indexing_and_searching.mdwn new file mode 100644 index 000000000..218fef04c --- /dev/null +++ b/doc/todo/xapian_omega_same_lang_when_indexing_and_searching.mdwn @@ -0,0 +1,72 @@ +Hi, by default xapian/omega use locate param from blog.setup to set stemmer language when wiki is indexing. + +But, when you search, we use omega cgi, and we not set language, so if you indexing in french, but search in english, you have a bad result. + +I propose to set a new param omega_stemmer in blog.setup, to fix the same language when we indexing, and searching. And if omega_stemmer is not set, we use LANG env param. + +Bellow, you can find the patch. + + + + diff --git a/IkiWiki/Plugin/search.pm b/IkiWiki/Plugin/search.pm + index 42d2e0d..08a0a01 100644 + --- a/IkiWiki/Plugin/search.pm + +++ b/IkiWiki/Plugin/search.pm + @@ -33,6 +33,13 @@ sub getsetup () { + safe => 0, # external program + rebuild => 0, + }, + + omega_stemmer => { + + type => "string", + + example => "en", + + description => "language used for indexing and searching", + + safe => 0, # external program + + rebuild => 0, + + }, + } + + sub checkconfig () { + @@ -136,7 +143,7 @@ sub indexhtml (@) { + # Index document and add terms for other metadata. + my $tg = Search::Xapian::TermGenerator->new(); + if (! $stemmer) { + - my $langcode=$ENV{LANG} || "en"; + + my $langcode=$config{omega_stemmer} || $ENV{LANG} || "en"; + $langcode=~s/_.*//; + + # This whitelist is here to work around a xapian bug (#486138) + @@ -183,6 +190,18 @@ sub cgi ($) { + IkiWiki::loadindex(); + $ENV{HELPLINK}=htmllink("", "", "ikiwiki/searching", + noimageinline => 1, linktext => "Help"); + + my $langcode=$config{omega_stemmer} || $ENV{LANG} || "en"; + + $langcode=~s/_.*//; + + + + # This whitelist is here to work around a xapian bug (#486138) + + my @whitelist=qw{da de en es fi fr hu it no pt ru ro sv tr}; + + + + if (grep { $_ eq $langcode } @whitelist) { + + $ENV{STEMMER}=$langcode; + + } + + else { + + $ENV{STEMMER}="en"; + + } + exec($config{omega_cgi}) || error("$config{omega_cgi} failed: $!"); + } + } + diff --git a/templates/searchquery.tmpl b/templates/searchquery.tmpl + index 15bc78e..4742460 100644 + --- a/templates/searchquery.tmpl + +++ b/templates/searchquery.tmpl + @@ -1,6 +1,6 @@ + $setmap{prefix,title,S} + $setmap{prefix,link,XLINK} + -$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France} + +$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France}$set{stemmer,$env{STEMMER}} + ${ + $def{PREV, + $if{$ne{$topdoc,0},