aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Georg Gleditsch <argggh@lxr.linpro.no>2007-11-27 01:20:53 +0100
committerArne Georg Gleditsch <argggh@lxr.linpro.no>2007-11-27 01:20:53 +0100
commitfda08219f81aaeb97405ae3b0cbe54c9fefd72be (patch)
tree2030750e6f77872335703e68faf188406cc9f2c3
parent96f381ccf53f4b0f9f17abceb0d7afb63dca6294 (diff)
Make usage indexing changes effective, update progress bar display, ++
-rwxr-xr-xlxr-genxref254
1 files changed, 137 insertions, 117 deletions
diff --git a/lxr-genxref b/lxr-genxref
index 25115ec..e701091 100755
--- a/lxr-genxref
+++ b/lxr-genxref
@@ -16,35 +16,32 @@ use Carp;
use Data::Dumper;
use IO::Handle;
use Fcntl;
+use Term::ProgressBar;
-$SIG{'INT'} = sub { die "SIGINT: please wait, flushing caches...\n"; };
-$SIG{'QUIT'} = sub { die "SIGQUIT: please wait, flushing caches...\n"; };
-$SIG{'TERM'} = sub { die "SIGTERM: please wait, flushing caches...\n"; };
+$SIG{'INT'} = sub { die "\nSIGINT: please wait, flushing caches...\n"; };
+$SIG{'QUIT'} = sub { die "\nSIGQUIT: please wait, flushing caches...\n"; };
+$SIG{'TERM'} = sub { die "\nSIGTERM: please wait, flushing caches...\n"; };
+$SIG{'PIPE'} = sub { die "\nSIGTERM: please wait, flushing caches...\n"; };
autoflush STDOUT 1;
autoflush STDERR 1;
-my $cols = 0;
-sub progress_mark {
- my ($mark) = @_;
+my $tree = shift(@ARGV);
+my @versions = @ARGV;
- if ($cols > 79) {
- print("\n");
- $cols = 0;
- }
- print(STDERR $mark);
- $cols++;
-}
+my $context = LXRng::Context->new('tree' => $tree);
+LXRng::Lang->init($context);
-sub progress_info {
- my ($msg) = @_;
- print(STDERR "\n") if $cols > 0;
- print(STDERR "$msg\n");
- $cols = 0;
-}
+my $index = $context->config->{'index'};
+my $usage = $context->config->{'usage'};
+my $hash = $context->config->{'search'};
+my $rep = $context->config->{'repository'};
+my $progress;
-sub make_add_ident {
- my ($index, $fileid) = @_;
+$SIG{'__WARN__'} = sub { $progress->message(shift) if $progress };
+
+sub make_add_ident($) {
+ my ($fileid) = @_;
my $last_func;
my %identcache;
@@ -85,26 +82,20 @@ sub make_add_ident {
}
}
-sub index_file {
- my ($context, $index, $tree, $file, $fileid) = @_;
+sub index_file($$) {
+ my ($file, $fileid) = @_;
my $lang = LXRng::Lang->new($file);
- unless ($index->to_index($fileid)) {
- progress_mark("*");
- return;
- }
- return unless $lang->doindex();
+ return 0 unless $index->to_index($fileid);
+ return 1 unless $lang->doindex();
- my $add_ident = make_add_ident($index, $fileid);
+ my $add_ident = make_add_ident($fileid);
- progress_info("indexing ".$file->name."[".$file->revision."] ".
- $file->size." bytes ($lang)...");
-
+ $progress->message("--- indexing ".$file->name.
+ " [".$file->revision."]");
my @extra_flags = ('-IEXPORT_SYMBOL+', '-I__initcall+');
- my $path = $file->phys_path;
-
my $ctags;
my $pid = open($ctags, '-|');
die $! unless defined $pid;
@@ -116,44 +107,49 @@ sub index_file {
'--excmd=number', '-f', '-',
'--language-force='.$lang->ctagslangname,
$lang->ctagsopts,
- $path);
+ $file->phys_path);
# Still here?
warn $!;
kill(9, $$);
}
- LXRng::Index::transaction {
- while (<$ctags>) {
- chomp;
- my ($symbol, $file, $excmd, @info) = split(/\t/);
- my %info = map { split(/:/, $_, 2) } @info;
+ while (<$ctags>) {
+ chomp;
+ my ($symbol, $file, $excmd, @info) = split(/\t/);
+ my %info = map { split(/:/, $_, 2) } @info;
- $add_ident->($symbol, \%info);
- }
- } $index;
-
- $path = undef;
+ $add_ident->($symbol, \%info);
+ }
+ return 1;
}
-sub reference_file {
- my ($context, $index, $tree, $file, $fileid) = @_;
+# We allow $usage to be supplied both by the Search and Index
+# backends, since it's not quite clear which is better. There's a
+# certain added complexity because of this, so perhaps this feature
+# ought to go once clear best choice emerges. (For instance,
+# reference_file is called from hash_file with a reference to the
+# hash indexing's document object, which is only actually used if
+# $usage is the Search backend.)
+
+sub reference_file($$$) {
+ my ($file, $fileid, $doc) = @_;
my $lang = LXRng::Lang->new($file);
- unless ($index->to_reference($fileid)) {
- progress_mark(".");
- return;
- }
- return unless $lang->doindex();
+ return 0 unless $index->to_reference($fileid);
+ return 1 unless $lang->doindex();
+
+ my $handle;
+ sysopen($handle, $file->phys_path, 0) || die($!);
- my $parse = new LXRng::Parse::Simple($file->handle, 8,
+ my $parse = new LXRng::Parse::Simple($handle, 8,
@{$lang->parsespec});
- progress_info("referencing ".$file->name.", ".
- $file->size." bytes ($lang)...");
+ $progress->message("--- referencing ".$file->name.
+ " [".$file->revision."]");
- my $res = $lang->reserved();
+ my $reserved = $lang->reserved();
my $re = qr(
(?m:^|[^a-zA-Z0-9_]) # Non-symbol chars.
@@ -161,6 +157,7 @@ sub reference_file {
\b
)x;
+ my %refs;
my $line = 1;
while (1) {
my ($btype, $frag) = $parse->nextfrag;
@@ -171,12 +168,10 @@ sub reference_file {
while ($frag =~ /\G.*?(?:(\n)|$re)/gc) {
$line++ && next if defined $1;
+ my $sym = $2;
- my $id = $2;
-
- next if $$res{$id};
-
- $index->add_usage($fileid, $line, $id);
+ next if $$reserved{$sym};
+ push(@{$refs{$sym} ||= []}, $line);
}
}
else {
@@ -191,59 +186,61 @@ sub reference_file {
$line += $frag =~ tr/\n/\n/;
}
}
+ close($handle);
+
+ foreach my $sym (keys %refs) {
+ my $sym_id = $index->symbol_id($sym, 1);
+ $usage->add_usage($doc, $fileid, $sym_id, $refs{$sym});
+ }
+ undef %refs;
+ return 1;
}
-sub hash_file {
- my ($context, $index, $hash, $tree, $file, $fileid, $rels) = @_;
+sub hash_file($$$) {
+ my ($file, $fileid, $rels) = @_;
my $docid;
if ($index->to_hash($fileid)) {
- my $handle = $file->handle();
- progress_info("hashing ".$file->name."[".$file->revision."] ".
- $file->size." bytes...");
+ my $handle;
+ sysopen($handle, $file->phys_path, 0) || die($!);
+ $progress->message("--- hashing ".$file->name.
+ " [".$file->revision."]");
my $doc = $hash->new_document($file->name);
while (<$handle>) {
my $pos = 0;
# Latin-1 word characters.
foreach my $term (/([0-9a-zA-Z\300-\326\330-\366\370-\377]+)/g) {
- if ($term =~ /^[A-Z][^A-Z]*$/) {
- $term = 'R'.lc($term);
- }
- else {
- $term = lc($term);
- }
+ $term = lc($term);
next if length($term) > 128;
$doc->add_posting($term, $.*100 + $pos++);
}
}
+ reference_file($file, $fileid, $doc);
$docid = $hash->add_document($doc, [map {
$index->release_id($tree, $_) } @$rels]);
$index->add_hashed_document($fileid, $docid);
+ $handle->close();
+ return 1;
}
else {
$docid = $index->get_hashed_document($fileid);
+ my $doc = $hash->get_document($docid);
+ if (reference_file($file, $fileid, $doc)) {
+ $hash->save_document($docid, $doc);
+ }
+
my $changed = $hash->add_release($docid, [map {
$index->release_id($tree, $_) } @$rels]);
- progress_mark($changed ? "+" : "-");
+ return $changed;
}
}
-my $tree = shift(@ARGV);
-my @versions = @ARGV;
+sub inventory_release($) {
+ my ($version) = @_;
-my $context = LXRng::Context->new('tree' => $tree);
-LXRng::Lang->init($context);
-
-my $index = $context->config->{'index'};
-my $hash = $context->config->{'search'};
-my $rep = $context->config->{'repository'};
-
-sub inventory_release {
- my ($tree, $version) = @_;
-
- print("\nrecording all files for $version...\n");
+ $progress->message("--- recording all files for $version");
my $iter = $rep->iterator($version);
LXRng::Index::transaction {
@@ -258,57 +255,80 @@ sub inventory_release {
} $index;
}
-sub index_pending {
- my ($tree) = @_;
+sub index_pending() {
my $pending = $index->pending_files($tree);
- print("\nindexing ".(0+@$pending)." outstanding files...\n");
- LXRng::Index::transaction {
- foreach my $p (@$pending) {
+ my $total = 0+@$pending;
+ my $count = 0;
+
+ print("\n");
+ $progress = Term::ProgressBar->new({name => 'Indexing',
+ count => $total,
+ ETA => 'linear'});
+ $progress->max_update_rate(0.25);
+ $progress->message("--- indexing/updating $total files...");
+
+ foreach my $p (@$pending) {
+ LXRng::Index::transaction {
my ($fileid, $path, $rev) = @$p;
my $rels = $index->new_releases_by_file($fileid);
next unless @$rels;
+
+ $context->release($$rels[0]); # Needed for include resolution.
my $node = $rep->node($path, $$rels[0], $rev);
next unless $node;
- hash_file($context, $index, $hash, $tree, $node, $fileid, $rels);
- index_file($context, $index, $tree, $node, $fileid);
- }
- } $index;
-
- print("\nreferencing ".(0+@$pending)." outstanding files...\n");
- LXRng::Index::transaction {
- foreach my $p (@$pending) {
- my ($fileid, $path, $rev) = @$p;
- my $rels = $index->new_releases_by_file($fileid);
- next unless @$rels;
- $context->release($$rels[0]); # Needed for include resolution.
- my $node = $rep->node($path, $$rels[0], $rev);
- next unless $node;
-
- LXRng::Index::transaction {
- reference_file($context, $index, $tree, $node, $fileid, $rels);
- } $index;
- }
- } $index;
+ if (hash_file($node, $fileid, $rels) |
+ index_file($node, $fileid))
+ {
+ $count++;
+ $progress->update($count);
+ }
+ else {
+ $total--;
+ my $skip = @$pending - $total;
+ if ($skip % 100 == 0) {
+ $progress->message("--- skipped/refreshed $skip files...");
+ }
+ $progress->target($total);
+ }
+ } $index;
+ }
+ $progress->update($total);
my $done = $index->update_indexed_releases($tree);
- progress_info("releases ".join(", ", @$done)." done") if
+ $progress->message("=== releases: ".join(", ", @$done)) if
@$done;
+ print("\n");
}
+
+
+$progress = Term::ProgressBar->new({name => 'Recording',
+ count => 1,
+ ETA => 'linear'});
+$progress->max_update_rate(0.25);
+
if (@versions) {
+ $progress->target(1+@versions);
foreach my $version (@versions) {
- inventory_release($tree, $version);
+ inventory_release($version);
+ $progress->update();
}
}
else {
- foreach my $version (reverse @{$context->all_releases}) {
- next if $index->_get_release($index->tree_id($tree), $version);
- inventory_release($tree, $version);
+ @versions = grep { ! $index->_get_release($index->tree_id($tree), $_);
+ } @{$context->all_releases};
+ $progress->target(1+@versions);
+ foreach my $version (reverse @versions) {
+ inventory_release($version);
+ $progress->update();
}
}
+$progress->update();
-index_pending($tree);
+LXRng::Index::transaction {
+ index_pending();
+} $index;
$hash->flush();