aboutsummaryrefslogtreecommitdiffstats
path: root/lxr-genxref
diff options
context:
space:
mode:
authorArne Georg Gleditsch <argggh@lxr.linpro.no>2008-08-21 21:37:54 +0200
committerArne Georg Gleditsch <argggh@lxr.linpro.no>2008-08-21 21:37:54 +0200
commit7b394d2acece3204c956a5c36f1047cbd5da9e6f (patch)
tree05b7acc6b630aadc41f8dc7e7ff5a16ec403df95 /lxr-genxref
parent17034adefacae12d6522eecc4a9e1f6ad04430fc (diff)
Searching tests.searching
Diffstat (limited to 'lxr-genxref')
-rwxr-xr-xlxr-genxref7
1 files changed, 7 insertions, 0 deletions
diff --git a/lxr-genxref b/lxr-genxref
index 3173bdd..1642c2d 100755
--- a/lxr-genxref
+++ b/lxr-genxref
@@ -240,6 +240,13 @@ sub hash_file($$$) {
my $pos = 0;
# Latin-1 word characters.
foreach my $term (/([0-9a-zA-Z\300-\326\330-\366\370-\377]+)/g) {
+ # TODO: For foo_bar_zoo_ack, index
+ # - foo_bar_zoo_ack
+ # - foo_bar bar_zoo zoo_ack
+ # - foo bar zoo ack
+ # This enables subcomponent searches without running
+ # into the stopword problem that earlier
+ # reduce-to-phrase approaches suffered from.
$term = lc($term);
next if length($term) > 128;
$doc->add_posting($term, $.*100 + $pos++);