diff options
-rw-r--r-- | lib/LXRng/Search/Xapian.pm | 2 | ||||
-rwxr-xr-x | lxr-genxref | 7 |
2 files changed, 9 insertions, 0 deletions
diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm index 03db5b8..db8e4d3 100644 --- a/lib/LXRng/Search/Xapian.pm +++ b/lib/LXRng/Search/Xapian.pm @@ -121,9 +121,11 @@ sub search { } else { $query =~ s/([\S_]+_[\S_]*)/\"$1\"/g; + $query =~ s/\"((do|sys|pci|dev|spin)_)+/\"/g; $query =~ s/_/ /g; $query =~ s/\b(?![A-Z][^A-Z]*\b)(\S+)/\L$1\E/g; } + warn "$query"; my $query = $qp->parse_query($query); $query = Search::Xapian::Query diff --git a/lxr-genxref b/lxr-genxref index 3173bdd..1642c2d 100755 --- a/lxr-genxref +++ b/lxr-genxref @@ -240,6 +240,13 @@ sub hash_file($$$) { my $pos = 0; # Latin-1 word characters. foreach my $term (/([0-9a-zA-Z\300-\326\330-\366\370-\377]+)/g) { + # TODO: For foo_bar_zoo_ack, index + # - foo_bar_zoo_ack + # - foo_bar bar_zoo zoo_ack + # - foo bar zoo ack + # This enables subcomponent searches without running + # into the stopword problem that earlier + # reduce-to-phrase approaches suffered from. $term = lc($term); next if length($term) > 128; $doc->add_posting($term, $.*100 + $pos++); |