diff options
author | Arne Georg Gleditsch <argggh@taniquetil.(none)> | 2009-03-06 21:13:18 +0100 |
---|---|---|
committer | Arne Georg Gleditsch <argggh@taniquetil.(none)> | 2009-03-06 21:13:18 +0100 |
commit | 8d7d1d2595c2c3475ec27d07d8544e345dab5851 (patch) | |
tree | 6337fd7aa489a73c44abb742b2bac22741b00779 /lxr-genxref | |
parent | a25f8442450d7e9fb188a979ab0897cb86e9d84f (diff) |
Added more language modules, threw things about a bit.
Diffstat (limited to 'lxr-genxref')
-rwxr-xr-x | lxr-genxref | 37 |
1 files changed, 22 insertions, 15 deletions
diff --git a/lxr-genxref b/lxr-genxref index 3173bdd..b390b35 100755 --- a/lxr-genxref +++ b/lxr-genxref @@ -37,6 +37,7 @@ use IO::Handle; use Fcntl; use Term::ProgressBar; use Devel::Size qw(size total_size); +use Encode; $SIG{'INT'} = sub { die "\nSIGINT: $$: please wait, flushing caches...\n"; }; $SIG{'QUIT'} = sub { die "\nSIGQUIT: $$: please wait, flushing caches...\n"; }; @@ -51,6 +52,7 @@ my $tree = shift(@ARGV); my @versions = @ARGV; my $context = LXRng::Context->new('tree' => $tree); +die "Usage: $0 <tree-id>\n" unless $context and $context->tree; LXRng::Lang->init($context); my $index = $context->config->{'index'}; @@ -87,7 +89,8 @@ sub make_add_ident($) { $last_func = $symbol; } if ($$info{'kind'} eq 'l') { - $$info{'context'} = $identcache{$last_func}; + $$info{'context'} = $identcache{$last_func} if + defined($last_func); } if (exists $$info{'class'}) { $$info{'context'} = $identcache{$$info{'class'}}; @@ -122,11 +125,12 @@ sub index_file($$) { return 0 unless $index->to_index($fileid); return 1 unless $lang->doindex(); + return 1 unless $lang->ctagslangname(); my $add_ident = make_add_ident($fileid); warn("--- indexing ".$file->name." [".$file->revision."]\n"); - my @extra_flags = ('-IEXPORT_SYMBOL+', '-I__initcall+'); + my $extra_flags = $context->config->{'ctags_flags'} || []; my $ctags; my $pid = open($ctags, '-|'); @@ -134,7 +138,7 @@ sub index_file($$) { if ($pid == 0) { exec('ctags-exuberant', - @extra_flags, + @$extra_flags, '--fields=+aifmknsSz', '--sort=no', '--excmd=number', '-f', '-', '--language-force='.$lang->ctagslangname, @@ -181,12 +185,7 @@ sub reference_file($$$) { warn("--- referencing ".$file->name." [".$file->revision."]\n"); my $reserved = $lang->reserved(); - - my $re = qr( - (?m:^|[^a-zA-Z0-9_]) # Non-symbol chars. - (_*[a-zA-Z][a-zA-Z0-9_]*) # The symbol. - \b - )x; + my $re = $lang->identifier_re(); my %refs; my $line = 1; @@ -199,7 +198,7 @@ sub reference_file($$$) { while ($frag =~ /\G.*?(?:(\n)|$re)/gc) { $line++ && next if defined $1; - my $sym = $2; + my $sym = $lang->mangle_sym($2); next if $$reserved{$sym}; push(@{$refs{$sym} ||= []}, $line); @@ -236,20 +235,28 @@ sub hash_file($$$) { sysopen($handle, $file->phys_path, 0) || die($!); warn("--- hashing ".$file->name." [".$file->revision."]\n"); my $doc = $hash->new_document($file->name); + my $charset = $context->config->{'content_charset'} || []; + $charset = [ref($charset) eq 'ARRAY' ? @$charset : $charset]; + push(@$charset, 'iso-8859-1'); # Fall back + my $add_line = $hash->make_add_text($doc); + while (<$handle>) { my $pos = 0; - # Latin-1 word characters. - foreach my $term (/([0-9a-zA-Z\300-\326\330-\366\370-\377]+)/g) { - $term = lc($term); - next if length($term) > 128; - $doc->add_posting($term, $.*100 + $pos++); + my $text; + while (@$charset) { + $text = eval { decode($$charset[0], $_, Encode::FB_CROAK); }; + last unless $@; + shift(@$charset); } + + $add_line->($.*100, $text); } reference_file($file, $fileid, $doc); $docid = $hash->add_document($doc, [map { $index->release_id($tree, $_) } @$rels]); $index->add_hashed_document($fileid, $docid); + $index->set_rfile_charset($fileid, $$charset[0]); $handle->close(); return 1; } |