From 69c9d9207a658bd048fbf54016a5fa4b824a1531 Mon Sep 17 00:00:00 2001 From: Arne Georg Gleditsch Date: Wed, 12 Aug 2009 22:39:46 +0200 Subject: Make sure purposely skipped files are not marked as pending. --- lxr-genxref | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lxr-genxref b/lxr-genxref index 782ad9f..599f202 100755 --- a/lxr-genxref +++ b/lxr-genxref @@ -280,16 +280,22 @@ sub reference_file($$$) { sub hash_file($$$) { my ($file, $fileid, $rels) = @_; - return 0 if - defined($context->config->{'search_size_limit'}) and + if (defined($context->config->{'search_size_limit'}) and $context->config->{'search_size_limit'} > 0 and - $file->size > $context->config->{'search_size_limit'}; + $file->size > $context->config->{'search_size_limit'}) + { + $index->to_hash($fileid); + $index->to_reference($fileid); + return 0; + } my $docid; if ($index->to_hash($fileid)) { my $handle; sysopen($handle, $file->phys_path, 0) || die($!); unless (-T $handle) { + # Non-text file. Mark as referenced as well. + $index->to_reference($fileid); $handle->close(); return 0; } @@ -323,7 +329,10 @@ sub hash_file($$$) { } else { $docid = $index->get_hashed_document($fileid); - return 0 unless $docid; + unless ($docid) { + $index->to_reference($fileid); + return 0; + } my $doc = $hash->get_document($docid); if (reference_file($file, $fileid, $doc)) { $hash->save_document($docid, $doc); -- cgit v1.2.3 From 65c53ad4bf0589c890f9c07383858f244c02c35a Mon Sep 17 00:00:00 2001 From: Arne Georg Gleditsch Date: Wed, 12 Aug 2009 22:40:17 +0200 Subject: Don't do textual manipulations on references... --- lib/LXRng/Index/DBI.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm index 8fcb391..eeb6a83 100644 --- a/lib/LXRng/Index/DBI.pm +++ b/lib/LXRng/Index/DBI.pm @@ -471,7 +471,8 @@ sub get_symbol_usage { my %rlines; foreach my $r (@$res) { - $rlines{$$r[0]} = [$$r[1] =~ /(\d+),?/g]; + $rlines{$$r[0]} = ref($$r[1]) eq 'ARRAY' + ? $$r[1] : [$$r[1] =~ /(\d+),?/g]; } return \%rlines; -- cgit v1.2.3 From 40c6f593f54c8023d6ed74e695de6b3a56a74bad Mon Sep 17 00:00:00 2001 From: Arne Georg Gleditsch Date: Wed, 12 Aug 2009 22:40:51 +0200 Subject: Increase flush intervals somewhat. --- lib/LXRng/Index/PgBatch.pm | 2 +- lib/LXRng/Search/Xapian.pm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/LXRng/Index/PgBatch.pm b/lib/LXRng/Index/PgBatch.pm index c1b30eb..7bd67e6 100644 --- a/lib/LXRng/Index/PgBatch.pm +++ b/lib/LXRng/Index/PgBatch.pm @@ -52,7 +52,7 @@ sub transaction { # Only occasional synchronization if we're inside another # transaction. # TODO: Check fill grade of caches and flush based on that. - if ($self->{'writes'}++ % 491 == 0) { + if ($self->{'writes'}++ % 3259 == 0) { $self->flush(); $self->dbh->commit(); } diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm index 014d57a..5230728 100644 --- a/lib/LXRng/Search/Xapian.pm +++ b/lib/LXRng/Search/Xapian.pm @@ -81,7 +81,7 @@ sub add_document { } my $doc_id = $self->wrdb->add_document($doc); $self->{'writes'}++; - $self->flush() if $self->{'writes'} % 499 == 0; + $self->flush() if $self->{'writes'} % 3271 == 0; return $doc_id; } -- cgit v1.2.3