#!/usr/bin/perl # # Copyright (C) 2008 Arne Georg Gleditsch and others. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # The full GNU General Public License is included in this distribution # in the file called COPYING. use strict; use warnings; use FindBin; use lib "$FindBin::Bin/lib"; use LXRng ROOT => $FindBin::Bin; use LXRng::Context; use LXRng::Lang; use LXRng::Index; use LXRng::Parse::Simple; use Carp; use Data::Dumper; use IO::Handle; use Fcntl; use Term::ProgressBar; use Devel::Size qw(size total_size); use Encode; $SIG{'INT'} = sub { die "\nSIGINT: $$: please wait, flushing caches...\n"; }; $SIG{'QUIT'} = sub { die "\nSIGQUIT: $$: please wait, flushing caches...\n"; }; $SIG{'TERM'} = sub { die "\nSIGTERM: $$: please wait, flushing caches...\n"; }; $SIG{'PIPE'} = sub { die "\nSIGTERM: $$: please wait, flushing caches...\n"; }; $SIG{'USR1'} = \&memory_status; autoflush STDOUT 1; autoflush STDERR 1; my $tree = shift(@ARGV); my @versions = @ARGV; my $context = LXRng::Context->new('tree' => $tree); die "Usage: $0 \n" unless $context and $context->tree; LXRng::Lang->init($context); my $index = $context->config->{'index'}; my $usage = $context->config->{'usage'}; my $hash = $context->config->{'search'}; my $rep = $context->config->{'repository'}; my $progress; my $progress_ident; my $progress_count; my $progress_next; my $progress_target; $SIG{'__WARN__'} = sub { $progress ? $progress->message(shift) : warn(@_) }; sub memory_status { open(my $statf, "< /proc/$$/statm"); my %stat; @stat{qw(size resident shared text lib data)} = split(" ", <$statf>); warn '$$$ total size '.$stat{'size'}."\n"; foreach my $k (keys %$index) { warn '$$$ index '. sprintf("%-17s %10d\n", $k, total_size($$index{$k})); } } sub progress_init { my ($ident, $total) = @_; if (-t STDOUT) { $progress = Term::ProgressBar->new({name => $ident, count => $total, ETA => 'linear'}); $progress->max_update_rate(0.25); } else { $progress_ident = $ident; $progress_target = $total; $progress_next = int($progress_target/10); $progress_count = 0; } } sub progress_update { my ($count) = @_; return $progress->update($count) if $progress; if (defined($count)) { $progress_count = $count; } else { $progress_count++; } if ($progress_count > $progress_next or $progress_count >= $progress_target) { print("$progress_ident: $progress_count/$progress_target done...\n"); $progress_next = $progress_count + int($progress_target/10); } } sub progress_target { my ($target) = @_; return $progress->target($target) if $progress; $progress_target = $target; } sub make_add_ident($) { my ($fileid) = @_; my $last_func; my %identcache; my $add_ident; $add_ident = sub { my ($symbol, $info) = @_; if ($$info{'kind'} eq 'f') { $last_func = $symbol; } if ($$info{'kind'} eq 'l') { $$info{'context'} = $identcache{$last_func} if defined($last_func); } if (exists $$info{'class'}) { $$info{'context'} = $identcache{$$info{'class'}}; } if (exists $$info{'struct'}) { $$info{'context'} = $identcache{$$info{'struct'}}; } $identcache{$symbol} = $index->add_ident($fileid, $$info{'line'}, $symbol, $$info{'kind'}, $$info{'context'}); if ($$info{'kind'} eq 'f' and exists $$info{'signature'}) { # This needs to be more robust. Perhaps ctags ought to do it. foreach my $v (split(/,/, $$info{'signature'})) { next if $v !~ /([a-zA-Z_0-9]+)[^a-zA-Z_0-9]*$/ or $1 eq 'void'; $add_ident->($1, {'kind' => 'l', 'line' => $$info{'line'}}); } } } } sub index_file($$) { my ($file, $fileid) = @_; my $lang = LXRng::Lang->new($file); return 0 unless $index->to_index($fileid); return 1 unless $lang->doindex(); warn("--- indexing ".$file->name." [".$file->revision."]\n"); my $add_ident = make_add_ident($fileid); return $lang->index_file($context, $file, $add_ident) if $lang->can('index_file'); return 1 unless $lang->ctagslangname(); my $extra_flags = $context->config->{'ctags_flags'} || []; my $ctags; my $pid = open($ctags, '-|'); die $! unless defined $pid; if ($pid == 0) { exec('ctags-exuberant', @$extra_flags, '--fields=+aifmknsSz', '--sort=no', '--excmd=number', '-f', '-', '--language-force='.$lang->ctagslangname, $lang->ctagsopts, $file->phys_path); # Still here? warn $!; kill(9, $$); } while (<$ctags>) { chomp; my ($symbol, $file, $excmd, @info) = split(/\t/); $symbol = $lang->mangle_sym($symbol); my %info = map { split(/:/, $_, 2) } @info; $add_ident->($symbol, \%info); } return 1; } # We allow $usage to be supplied both by the Search and Index # backends, since it's not quite clear which is better. There's a # certain added complexity because of this, so perhaps this feature # ought to go once a clear best choice emerges. (For instance, # reference_file is called from hash_file with a reference to the # hash indexing's document object, which is only actually used if # $usage is the Search backend.) sub reference_file($$$) { my ($file, $fileid, $doc) = @_; return 0 unless $index->to_reference($fileid); my $lang = LXRng::Lang->new($file); return 1 unless $lang->doindex(); my $handle; sysopen($handle, $file->phys_path, 0) || die($!); my $parse = new LXRng::Parse::Simple($handle, 8, @{$lang->parsespec}); warn("--- referencing ".$file->name." [".$file->revision."]\n"); my $reserved = $lang->reserved(); my $re = $lang->identifier_re(); my %refs; my $line = 1; while (1) { my ($btype, $frag) = $parse->nextfrag; last unless defined $frag; $btype ||= 'code'; if ($btype eq 'code') { while ($frag =~ /\G.*?(?:(\n)|$re)/gc) { $line++ && next if defined $1; my $sym = $lang->mangle_sym($2); next if $$reserved{$sym}; push(@{$refs{$sym} ||= []}, $line); } } else { if ($btype eq 'include') { my @paths = $lang->resolve_include($context, $file, $frag); foreach my $path (@paths) { $index->add_include($fileid, $path); } } $line += $frag =~ tr/\n/\n/; } } close($handle); foreach my $sym (keys %refs) { my $sym_id = $index->symbol_id($sym, 1); $usage->add_usage($doc, $fileid, $sym_id, $refs{$sym}); } undef %refs; return 1; } sub hash_file($$$) { my ($file, $fileid, $rels) = @_; return 0 if defined($context->config->{'search_size_limit'}) and $context->config->{'search_size_limit'} > 0 and $file->size > $context->config->{'search_size_limit'}; my $docid; if ($index->to_hash($fileid)) { my $handle; sysopen($handle, $file->phys_path, 0) || die($!); unless (-T $handle) { $handle->close(); return 0; } warn("--- hashing ".$file->name." [".$file->revision."]\n"); my $doc = $hash->new_document($file->name); my $charset = $context->config->{'content_charset'} || []; $charset = [ref($charset) eq 'ARRAY' ? @$charset : $charset]; push(@$charset, 'iso-8859-1'); # Fall back my $add_line = $hash->make_add_text($doc); while (<$handle>) { my $pos = 0; my $text; while (@$charset) { $text = eval { decode($$charset[0], $_, Encode::FB_CROAK); }; last unless $@; shift(@$charset); } $add_line->($.*100, $text); } reference_file($file, $fileid, $doc); $docid = $hash->add_document($doc, [map { $index->release_id($tree, $_) } @$rels]); $index->add_hashed_document($fileid, $docid); $index->set_rfile_charset($fileid, $$charset[0] || 'ascii'); $handle->close(); return 1; } else { $docid = $index->get_hashed_document($fileid); return 0 unless $docid; my $doc = $hash->get_document($docid); if (reference_file($file, $fileid, $doc)) { $hash->save_document($docid, $doc); } my $changed = $hash->add_release($docid, [map { $index->release_id($tree, $_) } @$rels]); return $changed; } } sub inventory_release($) { my ($version) = @_; warn("--- recording all files for $version\n"); my $iter = $rep->iterator($version); LXRng::Index::transaction { my $root = $rep->node('/', $version) or die "Bad root for release $version"; my $node; while (defined($node = $iter->next)) { next if $node->name =~ /\.o$/; my $fileid = $index->rfile_id($node, 1); $index->add_filerelease($tree, $version, $fileid); } } $index; } sub index_pending() { my $pending = $index->pending_files($tree); my $total = 0+@$pending; my $count = 0; print("\n"); progress_init('Indexing', $total); warn("--- indexing/updating $total files...\n"); foreach my $p (@$pending) { LXRng::Index::transaction { my ($fileid, $path, $rev) = @$p; my $rels = $index->new_releases_by_file($fileid); next unless @$rels; $context->release($$rels[0]); # Needed for include resolution. my $node = $rep->node($path, $$rels[0], $rev); next unless $node; if (hash_file($node, $fileid, $rels) | index_file($node, $fileid)) { $count++; progress_update($count); } else { $total--; my $skip = @$pending - $total; if ($skip % 100 == 0) { warn("--- skipped/refreshed $skip files...\n"); } progress_target($total); } } $index; } progress_update($total); my $done = $index->update_indexed_releases($tree); warn("=== releases: ".join(", ", @$done)."\n") if @$done; print("\n"); } progress_init('Recording', 1); if (@versions) { progress_target(1+@versions); foreach my $version (@versions) { inventory_release($version); progress_update(); } } else { @versions = grep { ! $index->_get_release($index->tree_id($tree), $_); } @{$context->all_releases}; progress_target(1+@versions); LXRng::Index::transaction { foreach my $version (reverse @versions) { # TODO: Breaking during the inventory process renders # version half-recorded. inventory_release($version); progress_update(); } } $index; } progress_update(); LXRng::Index::transaction { index_pending(); } $index; $hash->flush(); undef $progress;