diff options
author | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2010-08-06 23:26:55 +0200 |
---|---|---|
committer | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2010-08-06 23:26:55 +0200 |
commit | ce8b8143639b5f4f40f789852d6908984f1d866d (patch) | |
tree | cc2cce21900b87eff78a5b6a43d0612f9b056e56 /lib | |
parent | 7cef7c08ebab0374f8dced2f047b5a8bf281c44c (diff) |
Count bytes when considering if Xapian will accept tokens, not characters.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/LXRng/Search/Xapian.pm | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm index c77d1ef..100d303 100644 --- a/lib/LXRng/Search/Xapian.pm +++ b/lib/LXRng/Search/Xapian.pm @@ -110,6 +110,7 @@ sub add_release { sub indexed_term { my ($term) = @_; + use bytes; return 0 if length($term) <= 2; return 0 if length($term) > 128; return 0 if $STOPWORD{$term}; @@ -161,7 +162,7 @@ sub search { $query =~ s/([\S_]+_[\S_]*)/"\"$1\""/ge; $query =~ s/\b(?![A-Z][^A-Z]*\b)(\S+)/\L$1\E/g; } - $query =~ s/\b([+]?(\w+))\b/indexed_term($2) ? $1 : ""/ge; + $query =~ s/([+]?(\S+))/indexed_term($2) ? "$1" : ""/ge; my $parsed = $qp->parse_query($query); $parsed = Search::Xapian::Query @@ -180,7 +181,7 @@ sub search { # for both variants simultaneously is more work for Xapian # than doing it in sequence. $query =~ s/_/ /g; - $query =~ s/\b([+]?(\w+))\b/indexed_term($2) ? $1 : ""/ge; + $query =~ s/([+]?(\S+))/indexed_term($2) ? $1 : ""/ge; $parsed = $qp->parse_query($query); $parsed = Search::Xapian::Query |