aboutsummaryrefslogtreecommitdiffstats
path: root/lib/LXRng/Index
diff options
context:
space:
mode:
authorArne Georg Gleditsch <argggh@lxr.linpro.no>2007-11-15 21:51:00 +0100
committerArne Georg Gleditsch <argggh@lxr.linpro.no>2007-11-15 21:51:00 +0100
commit8c978d76179b4f573c1eb9b9bb9db966c81330bb (patch)
treeade066d6c36105de19e2a826188d0f1c14818f59 /lib/LXRng/Index
parente9fa4c98bb5f084739d3418ade3f0c51e34a0aa1 (diff)
Too many changes...
Diffstat (limited to 'lib/LXRng/Index')
-rw-r--r--lib/LXRng/Index/DBI.pm117
-rw-r--r--lib/LXRng/Index/Pg.pm3
-rw-r--r--lib/LXRng/Index/PgBatch.pm2
3 files changed, 117 insertions, 5 deletions
diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm
index 602eac8..932202c 100644
--- a/lib/LXRng/Index/DBI.pm
+++ b/lib/LXRng/Index/DBI.pm
@@ -73,6 +73,110 @@ sub _get_tree {
return $id;
}
+sub pending_files {
+ my ($self, $tree) = @_;
+
+ my $tree_id = $self->_get_tree($tree);
+ return [] unless $tree_id;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+
+ # Can be made more fine grained by consulting filestatus, but all
+ # hashed documents need to have their termlist updated... Just
+ # include all files participating in releases not yet fully
+ # indexed.
+ my $sth = $$self{'sth'}{'pending_files'} ||=
+ $dbh->prepare(qq{
+ select rv.id, f.path, rv.revision
+ from ${pre}revisions rv, ${pre}files f
+ where rv.id_file = f.id
+ and rv.id in (select fr.id_rfile
+ from ${pre}releases r, ${pre}filereleases fr
+ where r.id = fr.id_release
+ and r.id_tree = ?
+ and r.is_indexed = 'f')});
+
+# $dbh->prepare(qq{
+# select rv.id, f.path, rv.revision
+# from ${pre}files f, ${pre}revisions rv
+# where rv.id_file = f.id
+# and not exists(select 1 from ${pre}filestatus fs
+# where fs.id_rfile = rv.id
+# and fs.indexed = 't'
+# and fs.hashed = 't'
+# and fs.referenced = 't')
+# and exists(select 1 from ${pre}filereleases fr, ${pre}releases r
+# where fr.id_rfile = rv.id
+# and fr.id_release = r.id
+# and r.id_tree = ?)});
+ if ($sth->execute($tree_id) > 0) {
+ return $sth->fetchall_arrayref();
+ }
+ else {
+ $sth->finish();
+ return [];
+ }
+}
+
+sub new_releases_by_file {
+ my ($self, $file_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'releases_by_file'} ||=
+ $dbh->prepare(qq{
+ select r.release_tag from ${pre}releases r, ${pre}filereleases f
+ where r.id = f.id_release and f.id_rfile = ? and r.is_indexed = 'f'});
+ if ($sth->execute($file_id) > 0) {
+ return [map { $$_[0] } @{$sth->fetchall_arrayref()}];
+ }
+ else {
+ $sth->finish();
+ return [];
+ }
+}
+
+sub update_indexed_releases {
+ my ($self, $tree) = @_;
+
+ my $tree_id = $self->_get_tree($tree);
+ return [] unless $tree_id;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'update_indexed_releases_find'} ||=
+ $dbh->prepare(qq{
+ select r.id, r.release_tag
+ from ${pre}releases r
+ where is_indexed = 'f'
+ and not exists (select 1
+ from ${pre}filereleases fr
+ left outer join ${pre}filestatus fs
+ on (fr.id_rfile = fs.id_rfile)
+ where fr.id_release = r.id
+ and (fs.id_rfile is null
+ or fs.indexed = 'f'
+ or fs.hashed = 'f'
+ or fs.referenced = 'f'))});
+
+ if ($sth->execute() > 0) {
+ my $rels = $sth->fetchall_arrayref();
+ $sth->finish();
+ $sth = $$self{'sth'}{'update_indexed_releases_set'} ||=
+ $dbh->prepare(qq{
+ update ${pre}releases set is_indexed = 't' where id = ?});
+ foreach my $r (@$rels) {
+ $sth->execute($$r[0]);
+ }
+ $sth->finish();
+ return [map { $$_[1] } @$rels];
+ }
+ else {
+ return [];
+ }
+}
+
sub _get_release {
my ($self, $tree_id, $release) = @_;
@@ -345,14 +449,19 @@ sub get_symbol_usage {
my $dbh = $self->dbh;
my $pre = $self->prefix;
- my $sth = $$self{'sth'}{'get_symbol_usage'} ||=
+
+ # Postgres' query optimizer deals badly with placeholders and
+ # prepared statements in this case.
+ return undef unless $symid =~ /^\d+$/s;
+ my $sth =
$dbh->prepare(qq{
select u.id_rfile, u.line
from ${pre}usage u, ${pre}filereleases fr
- where u.id_symbol = ?
- and u.id_rfile = fr.id_rfile and fr.id_release = ?});
+ where u.id_symbol = $symid
+ and u.id_rfile = fr.id_rfile and fr.id_release = ?
+ limit 1000});
- $sth->execute($symid, $rel_id);
+ $sth->execute($rel_id);
my $res = $sth->fetchall_arrayref();
$sth->finish();
diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm
index 05fe3a0..1b905c0 100644
--- a/lib/LXRng/Index/Pg.pm
+++ b/lib/LXRng/Index/Pg.pm
@@ -179,6 +179,8 @@ sub init_db {
or die($dbh->errstr);
$dbh->do(qq{create index ${pre}file_idx1 on ${pre}files using btree (path)})
or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}filerel_idx1 on ${pre}filereleases using btree (id_release)})
+ or die($dbh->errstr);
$dbh->do(qq{grant select on ${pre}charsets to public}) or die($dbh->errstr);
$dbh->do(qq{grant select on ${pre}trees to public}) or die($dbh->errstr);
@@ -212,6 +214,7 @@ sub drop_db {
$dbh->do(qq{drop index ${pre}usage_idx2});
$dbh->do(qq{drop index ${pre}include_idx1});
$dbh->do(qq{drop index ${pre}file_idx1});
+ $dbh->do(qq{drop index ${pre}filerel_idx1});
$dbh->do(qq{drop table ${pre}usage});
$dbh->do(qq{drop table ${pre}identifiers});
diff --git a/lib/LXRng/Index/PgBatch.pm b/lib/LXRng/Index/PgBatch.pm
index 8f8844c..19c9fa9 100644
--- a/lib/LXRng/Index/PgBatch.pm
+++ b/lib/LXRng/Index/PgBatch.pm
@@ -77,7 +77,7 @@ sub flush {
}
}
$self->dbh->commit() unless $self->dbh->{AutoCommit};
- $self->dbh->do(q(analyze)) if $i > 100000;
+ $self->dbh->do(q(analyze)) if $i > 500000;
$self->dbh->disconnect();
warn "\n*** index: flushed $i rows\n";
kill(9, $$);