From 7b394d2acece3204c956a5c36f1047cbd5da9e6f Mon Sep 17 00:00:00 2001 From: Arne Georg Gleditsch Date: Thu, 21 Aug 2008 21:37:54 +0200 Subject: Searching tests. --- lxr-genxref | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lxr-genxref') diff --git a/lxr-genxref b/lxr-genxref index 3173bdd..1642c2d 100755 --- a/lxr-genxref +++ b/lxr-genxref @@ -240,6 +240,13 @@ sub hash_file($$$) { my $pos = 0; # Latin-1 word characters. foreach my $term (/([0-9a-zA-Z\300-\326\330-\366\370-\377]+)/g) { + # TODO: For foo_bar_zoo_ack, index + # - foo_bar_zoo_ack + # - foo_bar bar_zoo zoo_ack + # - foo bar zoo ack + # This enables subcomponent searches without running + # into the stopword problem that earlier + # reduce-to-phrase approaches suffered from. $term = lc($term); next if length($term) > 128; $doc->add_posting($term, $.*100 + $pos++); -- cgit v1.2.3