diff options
| author | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2010-08-06 23:26:55 +0200 | 
|---|---|---|
| committer | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2010-08-06 23:26:55 +0200 | 
| commit | ce8b8143639b5f4f40f789852d6908984f1d866d (patch) | |
| tree | cc2cce21900b87eff78a5b6a43d0612f9b056e56 | |
| parent | 7cef7c08ebab0374f8dced2f047b5a8bf281c44c (diff) | |
Count bytes when considering if Xapian will accept tokens, not characters.
| -rw-r--r-- | lib/LXRng/Search/Xapian.pm | 5 | 
1 files changed, 3 insertions, 2 deletions
| diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm index c77d1ef..100d303 100644 --- a/lib/LXRng/Search/Xapian.pm +++ b/lib/LXRng/Search/Xapian.pm @@ -110,6 +110,7 @@ sub add_release {  sub indexed_term {      my ($term) = @_; +    use bytes;      return 0 if length($term) <= 2;      return 0 if length($term) > 128;      return 0 if $STOPWORD{$term}; @@ -161,7 +162,7 @@ sub search {  	$query =~ s/([\S_]+_[\S_]*)/"\"$1\""/ge;  	$query =~ s/\b(?![A-Z][^A-Z]*\b)(\S+)/\L$1\E/g;      } -    $query =~ s/\b([+]?(\w+))\b/indexed_term($2) ? $1 : ""/ge; +    $query =~ s/([+]?(\S+))/indexed_term($2) ? "$1" : ""/ge;      my $parsed = $qp->parse_query($query);      $parsed = Search::Xapian::Query @@ -180,7 +181,7 @@ sub search {  	# for both variants simultaneously is more work for Xapian  	# than doing it in sequence.  	$query =~ s/_/ /g; -	$query =~ s/\b([+]?(\w+))\b/indexed_term($2) ? $1 : ""/ge; +	$query =~ s/([+]?(\S+))/indexed_term($2) ? $1 : ""/ge;  	$parsed = $qp->parse_query($query);  	$parsed = Search::Xapian::Query | 
