diff options
Diffstat (limited to 'lib/LXRng/Index')
-rw-r--r-- | lib/LXRng/Index/DBI.pm | 117 | ||||
-rw-r--r-- | lib/LXRng/Index/Pg.pm | 3 | ||||
-rw-r--r-- | lib/LXRng/Index/PgBatch.pm | 2 |
3 files changed, 117 insertions, 5 deletions
diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm index 602eac8..932202c 100644 --- a/lib/LXRng/Index/DBI.pm +++ b/lib/LXRng/Index/DBI.pm @@ -73,6 +73,110 @@ sub _get_tree { return $id; } +sub pending_files { + my ($self, $tree) = @_; + + my $tree_id = $self->_get_tree($tree); + return [] unless $tree_id; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + + # Can be made more fine grained by consulting filestatus, but all + # hashed documents need to have their termlist updated... Just + # include all files participating in releases not yet fully + # indexed. + my $sth = $$self{'sth'}{'pending_files'} ||= + $dbh->prepare(qq{ + select rv.id, f.path, rv.revision + from ${pre}revisions rv, ${pre}files f + where rv.id_file = f.id + and rv.id in (select fr.id_rfile + from ${pre}releases r, ${pre}filereleases fr + where r.id = fr.id_release + and r.id_tree = ? + and r.is_indexed = 'f')}); + +# $dbh->prepare(qq{ +# select rv.id, f.path, rv.revision +# from ${pre}files f, ${pre}revisions rv +# where rv.id_file = f.id +# and not exists(select 1 from ${pre}filestatus fs +# where fs.id_rfile = rv.id +# and fs.indexed = 't' +# and fs.hashed = 't' +# and fs.referenced = 't') +# and exists(select 1 from ${pre}filereleases fr, ${pre}releases r +# where fr.id_rfile = rv.id +# and fr.id_release = r.id +# and r.id_tree = ?)}); + if ($sth->execute($tree_id) > 0) { + return $sth->fetchall_arrayref(); + } + else { + $sth->finish(); + return []; + } +} + +sub new_releases_by_file { + my ($self, $file_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'releases_by_file'} ||= + $dbh->prepare(qq{ + select r.release_tag from ${pre}releases r, ${pre}filereleases f + where r.id = f.id_release and f.id_rfile = ? and r.is_indexed = 'f'}); + if ($sth->execute($file_id) > 0) { + return [map { $$_[0] } @{$sth->fetchall_arrayref()}]; + } + else { + $sth->finish(); + return []; + } +} + +sub update_indexed_releases { + my ($self, $tree) = @_; + + my $tree_id = $self->_get_tree($tree); + return [] unless $tree_id; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'update_indexed_releases_find'} ||= + $dbh->prepare(qq{ + select r.id, r.release_tag + from ${pre}releases r + where is_indexed = 'f' + and not exists (select 1 + from ${pre}filereleases fr + left outer join ${pre}filestatus fs + on (fr.id_rfile = fs.id_rfile) + where fr.id_release = r.id + and (fs.id_rfile is null + or fs.indexed = 'f' + or fs.hashed = 'f' + or fs.referenced = 'f'))}); + + if ($sth->execute() > 0) { + my $rels = $sth->fetchall_arrayref(); + $sth->finish(); + $sth = $$self{'sth'}{'update_indexed_releases_set'} ||= + $dbh->prepare(qq{ + update ${pre}releases set is_indexed = 't' where id = ?}); + foreach my $r (@$rels) { + $sth->execute($$r[0]); + } + $sth->finish(); + return [map { $$_[1] } @$rels]; + } + else { + return []; + } +} + sub _get_release { my ($self, $tree_id, $release) = @_; @@ -345,14 +449,19 @@ sub get_symbol_usage { my $dbh = $self->dbh; my $pre = $self->prefix; - my $sth = $$self{'sth'}{'get_symbol_usage'} ||= + + # Postgres' query optimizer deals badly with placeholders and + # prepared statements in this case. + return undef unless $symid =~ /^\d+$/s; + my $sth = $dbh->prepare(qq{ select u.id_rfile, u.line from ${pre}usage u, ${pre}filereleases fr - where u.id_symbol = ? - and u.id_rfile = fr.id_rfile and fr.id_release = ?}); + where u.id_symbol = $symid + and u.id_rfile = fr.id_rfile and fr.id_release = ? + limit 1000}); - $sth->execute($symid, $rel_id); + $sth->execute($rel_id); my $res = $sth->fetchall_arrayref(); $sth->finish(); diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm index 05fe3a0..1b905c0 100644 --- a/lib/LXRng/Index/Pg.pm +++ b/lib/LXRng/Index/Pg.pm @@ -179,6 +179,8 @@ sub init_db { or die($dbh->errstr); $dbh->do(qq{create index ${pre}file_idx1 on ${pre}files using btree (path)}) or die($dbh->errstr); + $dbh->do(qq{create index ${pre}filerel_idx1 on ${pre}filereleases using btree (id_release)}) + or die($dbh->errstr); $dbh->do(qq{grant select on ${pre}charsets to public}) or die($dbh->errstr); $dbh->do(qq{grant select on ${pre}trees to public}) or die($dbh->errstr); @@ -212,6 +214,7 @@ sub drop_db { $dbh->do(qq{drop index ${pre}usage_idx2}); $dbh->do(qq{drop index ${pre}include_idx1}); $dbh->do(qq{drop index ${pre}file_idx1}); + $dbh->do(qq{drop index ${pre}filerel_idx1}); $dbh->do(qq{drop table ${pre}usage}); $dbh->do(qq{drop table ${pre}identifiers}); diff --git a/lib/LXRng/Index/PgBatch.pm b/lib/LXRng/Index/PgBatch.pm index 8f8844c..19c9fa9 100644 --- a/lib/LXRng/Index/PgBatch.pm +++ b/lib/LXRng/Index/PgBatch.pm @@ -77,7 +77,7 @@ sub flush { } } $self->dbh->commit() unless $self->dbh->{AutoCommit}; - $self->dbh->do(q(analyze)) if $i > 100000; + $self->dbh->do(q(analyze)) if $i > 500000; $self->dbh->disconnect(); warn "\n*** index: flushed $i rows\n"; kill(9, $$); |