diff options
author | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-07-05 00:51:08 +0200 |
---|---|---|
committer | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-07-05 00:51:08 +0200 |
commit | e9fa4c98bb5f084739d3418ade3f0c51e34a0aa1 (patch) | |
tree | fec1d635625e031cde7cba1b0a1d95ee92ac760b /lxr-genxref |
Rebase tree.
Diffstat (limited to 'lxr-genxref')
-rwxr-xr-x | lxr-genxref | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/lxr-genxref b/lxr-genxref new file mode 100755 index 0000000..79e98f5 --- /dev/null +++ b/lxr-genxref @@ -0,0 +1,301 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use FindBin; +use lib "$FindBin::Bin/lib"; + +use LXRng ROOT => $FindBin::Bin; +use LXRng::Context; +use LXRng::Lang qw(C); +use LXRng::Index; +use LXRng::Parse::Simple; + +use Carp; +use Data::Dumper; +use IO::Handle; +use Fcntl; + +$SIG{'INT'} = sub { die "SIGINT: please wait, flushing caches...\n"; }; +$SIG{'QUIT'} = sub { die "SIGQUIT: please wait, flushing caches...\n"; }; +$SIG{'TERM'} = sub { die "SIGTERM: please wait, flushing caches...\n"; }; + +autoflush STDOUT 1; +autoflush STDERR 1; + +my $cols = 0; +sub progress_mark { + my ($mark) = @_; + + if ($cols > 79) { + print("\n"); + $cols = 0; + } + print(STDERR $mark); + $cols++; +} + +sub progress_info { + my ($msg) = @_; + print(STDERR "\n") if $cols > 0; + print(STDERR "$msg\n"); + $cols = 0; +} + +sub make_add_ident { + my ($index, $fileid) = @_; + + my $last_func; + my %identcache; + + my $add_ident; + $add_ident = sub { + my ($symbol, $info) = @_; + + if ($$info{'kind'} eq 'f') { + $last_func = $symbol; + } + if ($$info{'kind'} eq 'l') { + $$info{'context'} = $identcache{$last_func}; + } + if (exists $$info{'class'}) { + $$info{'context'} = $identcache{$$info{'class'}}; + } + if (exists $$info{'struct'}) { + $$info{'context'} = $identcache{$$info{'struct'}}; + } + + $identcache{$symbol} = + $index->add_ident($fileid, $$info{'line'}, + $symbol, $$info{'kind'}, + $$info{'context'}); + + if ($$info{'kind'} eq 'f' and exists $$info{'signature'}) { + # This needs to be more robust. Perhaps ctags ought to do it. + + foreach my $v (split(/,/, $$info{'signature'})) { + next if + $v !~ /([a-zA-Z_0-9]+)[^a-zA-Z_0-9]*$/ or + $1 eq 'void'; + + $add_ident->($1, {'kind' => 'l', 'line' => $$info{'line'}}); + } + } + } +} + +sub index_file { + my ($context, $index, $tree, $rel, $file, $fileid) = @_; + + my $lang = LXRng::Lang->new($file); + + return unless $lang->doindex(); + unless ($index->to_index($fileid)) { + progress_mark("*"); + return; + } + + my $add_ident = make_add_ident($index, $fileid); + + progress_info("indexing ".$file->name."[".$file->revision."] ". + $file->size." bytes ($lang)..."); + + my @extra_flags = ('-IEXPORT_SYMBOL+', '-I__initcall+'); + + my $path = $file->phys_path; + + my $ctags; + my $pid = open($ctags, '-|'); + die $! unless defined $pid; + + if ($pid == 0) { + exec('ctags-exuberant', + @extra_flags, + '--fields=+aifmknsSz', '--sort=no', + '--excmd=number', '-f', '-', + '--language-force='.$lang->ctagslangname, + $lang->ctagsopts, + $path); + + # Still here? + warn $!; + kill(9, $$); + } + + LXRng::Index::transaction { + while (<$ctags>) { + chomp; + my ($symbol, $file, $excmd, @info) = split(/\t/); + my %info = map { split(/:/, $_, 2) } @info; + + $add_ident->($symbol, \%info); + } + } $index; + + $path = undef; +} + +sub reference_file { + my ($context, $index, $tree, $rel, $file, $fileid) = @_; + + my $lang = LXRng::Lang->new($file); + + return unless $lang->doindex(); + + unless ($index->to_reference($fileid)) { + progress_mark("."); + return; + } + +# sysopen(my $handle, $file->phys_path, O_RDONLY) or die($!); +# my $parse = new LXRng::Parse::Simple($handle, 8, @{$lang->parsespec}); + my $parse = new LXRng::Parse::Simple($file->handle, 8, @{$lang->parsespec}); + + progress_info("referencing ".$file->name.", ". + $file->size." bytes ($lang)..."); + + my $res = $lang->reserved(); + + my $re = qr( + (?m:^|[^a-zA-Z0-9_]) # Non-symbol chars. + (_*[a-zA-Z][a-zA-Z0-9_]*) # The symbol. + \b + )x; + + my $line = 1; + while (1) { + my ($btype, $frag) = $parse->nextfrag; + + last unless defined $frag; + $btype ||= 'code'; + if ($btype eq 'code') { + + while ($frag =~ /\G.*?(?:(\n)|$re)/gc) { + $line++ && next if defined $1; + + my $id = $2; + + next if $$res{$id}; + + $index->add_usage($fileid, $line, $id); + } + } + else { + if ($btype eq 'include') { + my @paths = + $lang->resolve_include($context, $file, $frag); + + foreach my $path (@paths) { + $index->add_include($fileid, $path); + } + } + $line += $frag =~ tr/\n/\n/; + } + } +} + +sub hash_file { + my ($context, $index, $hash, $tree, $rel, $file, $fileid) = @_; + + my $docid; + if ($index->to_hash($fileid)) { + my $handle = $file->handle(); + progress_info("hashing ".$file->name."[".$file->revision."] ". + $file->size." bytes..."); + my $doc = $hash->new_document($file->name); + while (<$handle>) { + my $pos = 0; + # Latin-1 word characters. + foreach my $term (/([0-9a-zA-Z\300-\326\330-\366\370-\377]+)/g) { + if ($term =~ /^[A-Z][^A-Z]*$/) { + $term = 'R'.lc($term); + } + else { + $term = lc($term); + } + next if length($term) > 128; + $doc->add_posting($term, $.*100 + $pos++); + } + } + $docid = $hash->add_document($doc, $index->release_id($tree, $rel)); + $index->add_hashed_document($fileid, $docid); + } + else { + $docid = $index->get_hashed_document($fileid); + if ($hash->add_release($docid, $index->release_id($tree, $rel))) { + progress_mark("+"); + } + else { + progress_mark("-"); + } + } + # for all releases this fileid belongs to (that are not is_indexed) + # add_release to $docid. +} + +sub do_index { + my ($context, $index, $hash, $tree, $rel, $iter) = @_; + + my $node; + while (defined($node = $iter->next)) { + next if $node->name =~ /\.o$/; + my $fileid = $index->rfile_id($node, 1); + $index->add_filerelease($tree, $rel, $fileid); + index_file($context, $index, $tree, $rel, $node, $fileid); + hash_file($context, $index, $hash, $tree, $rel, $node, $fileid); + } +} + +sub do_reference { + my ($context, $index, $hash, $tree, $rel, $iter) = @_; + + my $node; + while (defined($node = $iter->next)) { + next if $node->name =~ /\.o$/; + my $fileid = $index->rfile_id($node, 1); + LXRng::Index::transaction { + reference_file($context, $index, $tree, $rel, $node, $fileid); + } $index; + } +} + +my $tree = shift(@ARGV); +my @versions = @ARGV; + +my $context = LXRng::Context->new('tree' => $tree); + +my $index = $context->config->{'index'}; +my $hash = $context->config->{'search'}; +my $rep = $context->config->{'repository'}; + +sub do_genxref { + my ($tree, $version) = @_; + + print("\nindexing release $version...\n"); + my $root = $rep->node('/', $version) or die "bad root"; + $context->release($version); + + LXRng::Index::transaction { + do_index($context, $index, $hash, $tree, $version, $rep->iterator($version)); + do_reference($context, $index, $hash, $tree, $version, $rep->iterator($version)); + } $index; + + $hash->flush(); + # Mark release as is_indexed + progress_info("release $version done"); +} + +if (@versions) { + foreach my $version (@versions) { + do_genxref($tree, $version); + } +} +else { + # Run pass over all un-indexed releases, record all files. + + foreach my $version (reverse @{$context->all_releases}) { + next if $index->_get_release($index->tree_id($tree), $version); + system($0, $tree, $version); + } +} |