aboutsummaryrefslogtreecommitdiffstats
path: root/lib/LXRng/Index
diff options
context:
space:
mode:
authorArne Georg Gleditsch <argggh@lxr.linpro.no>2007-07-05 00:51:08 +0200
committerArne Georg Gleditsch <argggh@lxr.linpro.no>2007-07-05 00:51:08 +0200
commite9fa4c98bb5f084739d3418ade3f0c51e34a0aa1 (patch)
treefec1d635625e031cde7cba1b0a1d95ee92ac760b /lib/LXRng/Index
Rebase tree.
Diffstat (limited to 'lib/LXRng/Index')
-rw-r--r--lib/LXRng/Index/DBI.pm430
-rw-r--r--lib/LXRng/Index/Generic.pm172
-rw-r--r--lib/LXRng/Index/Pg.pm417
-rw-r--r--lib/LXRng/Index/PgBatch.pm217
4 files changed, 1236 insertions, 0 deletions
diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm
new file mode 100644
index 0000000..602eac8
--- /dev/null
+++ b/lib/LXRng/Index/DBI.pm
@@ -0,0 +1,430 @@
+package LXRng::Index::DBI;
+
+use strict;
+use DBI;
+
+use base qw(LXRng::Index::Generic);
+
+sub transaction {
+ my ($self, $code) = @_;
+ if ($self->dbh->{AutoCommit}) {
+ $self->dbh->{AutoCommit} = 0;
+ $code->();
+ $self->dbh->{AutoCommit} = 1;
+ }
+ else {
+ # If we're in a transaction already, don't return to
+ # AutoCommit state.
+ $code->();
+ }
+ $self->dbh->commit();
+}
+
+sub _to_task {
+ my ($self, $rfile_id, $task) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_to_task_ins'} ||=
+ $dbh->prepare(qq{insert into ${pre}filestatus(id_rfile)
+ select ? where not exists
+ (select 1 from ${pre}filestatus
+ where id_rfile = ?)});
+ $sth->execute($rfile_id, $rfile_id);
+
+ $sth = $$self{'sth'}{'_to_task_upd'}{$task} ||=
+ $dbh->prepare(qq{update ${pre}filestatus set $task = 't'
+ where $task = 'f' and id_rfile = ?});
+ return $sth->execute($rfile_id) > 0;
+}
+
+
+sub to_index {
+ my ($self, $rfile_id) = @_;
+
+ return $self->_to_task($rfile_id, 'indexed');
+}
+
+sub to_reference {
+ my ($self, $rfile_id) = @_;
+
+ return $self->_to_task($rfile_id, 'referenced');
+}
+
+sub to_hash {
+ my ($self, $rfile_id) = @_;
+
+ return $self->_to_task($rfile_id, 'hashed');
+}
+
+sub _get_tree {
+ my ($self, $tree) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_get_tree'} ||=
+ $dbh->prepare(qq{select id from ${pre}trees where name = ?});
+ my $id;
+ if ($sth->execute($tree) > 0) {
+ ($id) = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return $id;
+}
+
+sub _get_release {
+ my ($self, $tree_id, $release) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_get_release'} ||=
+ $dbh->prepare(qq{select id from ${pre}releases
+ where id_tree = ? and release_tag = ?});
+ my $id;
+ if ($sth->execute($tree_id, $release) > 0) {
+ ($id) = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return $id;
+}
+
+sub _get_file {
+ my ($self, $path) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_get_file'} ||=
+ $dbh->prepare(qq{select id from ${pre}files where path = ?});
+ my $id;
+ if ($sth->execute($path) > 0) {
+ ($id) = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return $id;
+}
+
+sub _get_rfile_by_release {
+ my ($self, $rel_id, $path) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_get_rfile_by_release'} ||=
+ $dbh->prepare(qq{select r.id
+ from ${pre}filereleases fr, ${pre}files f,
+ ${pre}revisions r
+ where fr.id_rfile = r.id and r.id_file = f.id
+ and fr.id_release = ? and f.path = ?});
+
+ my $id;
+ if ($sth->execute($rel_id, $path) > 0) {
+ ($id) = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return $id;
+}
+
+sub _get_symbol {
+ my ($self, $symbol) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_get_symbol'} ||=
+ $dbh->prepare(qq{select id from ${pre}symbols where name = ?});
+ my $id;
+ if ($sth->execute($symbol) > 0) {
+ ($id) = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return $id;
+}
+
+
+sub _add_include {
+ my ($self, $file_id, $inc_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_include'} ||=
+ $dbh->prepare(qq{insert into ${pre}includes(id_rfile, id_include_path)
+ values (?, ?)});
+ my $id;
+ $sth->execute($file_id, $inc_id);
+
+ return 1;
+}
+
+sub _includes_by_id {
+ my ($self, $file_id) = @_;
+
+}
+
+sub _symbol_by_id {
+ my ($self, $id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_symbol_by_id'} ||=
+ $dbh->prepare(qq{select * from ${pre}symbols
+ where id = ?});
+ my @res;
+ if ($sth->execute($id) > 0) {
+ @res = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return @res;
+}
+
+sub _identifiers_by_name {
+ my ($self, $rel_id, $symbol) = @_;
+
+ my $sym_id = $self->_get_symbol($symbol);
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_identifiers_by_name'} ||=
+ $dbh->prepare(qq{
+ select i.id, i.type, f.path, i.line, s.name, c.type, c.id,
+ i.id_rfile
+ from ${pre}identifiers i
+ left outer join ${pre}identifiers c on i.context = c.id
+ left outer join ${pre}symbols s on c.id_symbol = s.id,
+ ${pre}files f, ${pre}filereleases r, ${pre}revisions v
+ where i.id_rfile = v.id and v.id = r.id_rfile
+ and r.id_release = ? and v.id_file = f.id
+ and i.id_symbol = ?});
+
+ $sth->execute($rel_id, $sym_id);
+ my $res = $sth->fetchall_arrayref();
+
+ use Data::Dumper;
+ foreach my $def (@$res) {
+# warn Dumper($def);
+ $$def[7] = 42;
+# my @files = $self->get_referring_files($rel_id, $$def[7]);
+# warn Dumper(\@files);
+ }
+
+ return $res;
+}
+
+sub _symbols_by_file {
+ my ($self, $rfile_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_symbols_by_file'} ||=
+ $dbh->prepare(qq{select distinct s.name
+ from ${pre}usage u, ${pre}symbols s
+ where id_rfile = ? and u.id_symbol = s.id});
+ $sth->execute($rfile_id);
+ my %res;
+ while (my ($symname) = $sth->fetchrow_array()) {
+ $res{$symname} = 1;
+ }
+
+ return \%res;
+}
+
+sub _add_usage {
+ my ($self, $file_id, $line, $symbol_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_usage'} ||=
+ $dbh->prepare(qq{insert into ${pre}usage(id_rfile, line, id_symbol)
+ values (?, ?, ?)});
+ $sth->execute($file_id, $line, $symbol_id);
+
+ return 1;
+}
+
+sub _usage_by_file {
+ my ($self, $rfile_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_usage_by_file'} ||=
+ $dbh->prepare(qq{select s.name, u.line
+ from ${pre}usage u, ${pre}symbols s
+ where id_rfile = ? and u.id_symbol = s.id});
+ $sth->execute($rfile_id);
+
+ die "Unimplemented";
+}
+
+sub _rfile_path_by_id {
+ my ($self, $rfile_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_rfile_path_by_id'} ||=
+ $dbh->prepare(qq{select f.path from ${pre}files f, ${pre}revisions r
+ where f.id = r.id_file and r.id = ?});
+ my $path;
+ if ($sth->execute($rfile_id) > 0) {
+ ($path) = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return $path;
+}
+
+sub _get_includes_by_file {
+ my ($self, $res, $rel_id, @rfile_ids) = @_;
+
+ my $placeholders = join(', ', ('?') x @rfile_ids);
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $dbh->prepare(qq{select rf.id, f.path
+ from ${pre}revisions rf,
+ ${pre}filereleases v,
+ ${pre}includes i,
+ ${pre}revisions ri,
+ ${pre}files f
+ where rf.id = i.id_rfile
+ and rf.id_file = f.id
+ and rf.id = v.id_rfile
+ and v.id_release = ?
+ and i.id_include_path = ri.id_file
+ and ri.id in ($placeholders)});
+
+
+ $sth->execute($rel_id, @rfile_ids);
+ my $files = $sth->fetchall_arrayref();
+ $sth->finish();
+
+ my @recurse;
+ foreach my $r (@$files) {
+ push(@recurse, $$r[0]) unless exists($$res{$$r[0]});
+
+ $$res{$$r[0]} = $$r[1];
+ }
+
+ $self->_get_includes_by_file($res, $rel_id, @recurse) if @recurse;
+
+ return 1;
+}
+
+sub add_hashed_document {
+ my ($self, $rfile_id, $doc_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'add_hashed_document'} ||=
+ $dbh->prepare(qq{insert into ${pre}hashed_documents(id_rfile, doc_id)
+ values (?, ?)});
+ $sth->execute($rfile_id, $doc_id);
+
+ return 1;
+}
+
+sub get_hashed_document {
+ my ($self, $rfile_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'get_hashed_document'} ||=
+ $dbh->prepare(qq{select doc_id from ${pre}hashed_documents
+ where id_rfile = ?});
+ my $doc_id;
+ if ($sth->execute($rfile_id) > 0) {
+ ($doc_id) = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return $doc_id;
+}
+
+sub get_symbol_usage {
+ my ($self, $rel_id, $symid) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'get_symbol_usage'} ||=
+ $dbh->prepare(qq{
+ select u.id_rfile, u.line
+ from ${pre}usage u, ${pre}filereleases fr
+ where u.id_symbol = ?
+ and u.id_rfile = fr.id_rfile and fr.id_release = ?});
+
+ $sth->execute($symid, $rel_id);
+ my $res = $sth->fetchall_arrayref();
+ $sth->finish();
+
+ return $res;
+}
+
+sub get_identifier_info {
+ my ($self, $ident, $rel_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'get_identifier_info'} ||=
+ $dbh->prepare(qq{
+ select s.name, s.id,
+ i.id, i.type, f.path, i.line, cs.name, c.type, c.id,
+ i.id_rfile
+ from ${pre}identifiers i
+ left outer join ${pre}identifiers c on i.context = c.id
+ left outer join ${pre}symbols cs on c.id_symbol = cs.id,
+ ${pre}symbols s, ${pre}revisions r, ${pre}files f
+ where i.id = ? and i.id_symbol = s.id
+ and i.id_rfile = r.id and r.id_file = f.id});
+
+# select i.id_rfile, f.path, i.line, i.type, i.context, s.id, s.name
+# from identifiers i, symbols s, revisions r, files f
+# where i.id = ? and i.id_symbol = s.id
+# and i.id_rfile = r.id and r.id_file = f.id});
+
+ unless ($sth->execute($ident) == 1) {
+ return undef;
+ }
+
+ my ($symname, $symid,
+ $iid, $type, $path, $line, $cname, $ctype, $cid, $rfile_id) =
+ $sth->fetchrow_array();
+ $sth->finish();
+
+ my $refs = {$rfile_id => $path};
+ $self->get_referring_files($rel_id, $rfile_id, $refs);
+ my $usage = $self->get_symbol_usage($rel_id, $symid);
+
+ my %reflines;
+ foreach my $u (@$usage) {
+ next unless $$refs{$$u[0]};
+ $reflines{$$refs{$$u[0]}} ||= [];
+ push(@{$reflines{$$refs{$$u[0]}}}, $$u[1]);
+ }
+
+ return ($symname, $symid,
+ [$iid, $type, $path, $line, $cname, $ctype, $cid, $rfile_id],
+ \%reflines);
+}
+
+sub get_rfile_timestamp {
+ my ($self, $rfile_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'get_rfile_timestamp'} ||=
+ $dbh->prepare(qq{
+ select extract(epoch from last_modified_gmt)::integer,
+ last_modified_tz
+ from ${pre}revisions where id = ?});
+
+ unless ($sth->execute($rfile_id) == 1) {
+ return undef;
+ }
+
+ my ($epoch, $tz) = $sth->fetchrow_array();
+ $sth->finish();
+
+ return ($epoch, $tz);
+}
+
+1;
diff --git a/lib/LXRng/Index/Generic.pm b/lib/LXRng/Index/Generic.pm
new file mode 100644
index 0000000..cab2513
--- /dev/null
+++ b/lib/LXRng/Index/Generic.pm
@@ -0,0 +1,172 @@
+package LXRng::Index::Generic;
+
+use strict;
+use Memoize;
+
+
+sub new {
+ my ($class, %args) = @_;
+
+ memoize('tree_id');
+ memoize('release_id');
+ memoize('file_id');
+ memoize('symbol_id');
+
+ return bless(\%args, $class);
+}
+
+sub prefix {
+ my ($self) = @_;
+ if (exists $$self{'table_prefix'}) {
+ return $$self{'table_prefix'}.'_';
+ }
+ else {
+ return '';
+ }
+}
+
+sub tree_id {
+ my ($self, $tree, $update) = @_;
+
+ return $self->_get_tree($tree) ||
+ ($update ? $self->_add_tree($tree) : undef);
+}
+
+sub release_id {
+ my ($self, $tree, $release, $update) = @_;
+
+ my $tree_id = $self->tree_id($tree, $update);
+
+ return $self->_get_release($tree_id, $release) ||
+ ($update ? $self->_add_release($tree_id, $release) : undef);
+}
+
+sub file_id {
+ my ($self, $path, $update) = @_;
+
+ return $self->_get_file($path) ||
+ ($update ? $self->_add_file($path) : undef);
+}
+
+sub rfile_id {
+ my ($self, $node, $update) = @_;
+
+ my $path = $node->name;
+ my $revision = $node->revision;
+
+ my $file_id = $self->file_id($path, $update);
+ return undef unless $file_id;
+
+ my ($id, $old_stamp) = $self->_get_rfile($file_id, $revision);
+ return $id unless $update;
+
+ if ($id) {
+ my ($new_stamp) = $node->time =~ /^(\d+)/;
+ if ($update and $old_stamp > $new_stamp) {
+ $self->_update_rfile_timestamp($id, $node->time);
+ }
+ }
+ else {
+ $id = $self->_add_rfile($file_id, $revision, $node->time);
+ }
+ return $id;
+}
+
+sub symbol_id {
+ my ($self, $symbol, $update) = @_;
+
+ return $self->_get_symbol($symbol) ||
+ ($update ? $self->_add_symbol($symbol) : undef);
+}
+
+sub add_filerelease {
+ my ($self, $tree, $release, $rfile_id) = @_;
+
+ my $rel_id = $self->release_id($tree, $release, 1);
+
+ $self->_add_filerelease($rfile_id, $rel_id);
+}
+
+sub add_include {
+ my ($self, $file_id, $include_path) = @_;
+
+ my $inc_id = $self->_get_file($include_path);
+
+ return 0 unless $inc_id;
+ return $self->_add_include($file_id, $inc_id);
+}
+
+sub includes_by_file {
+ my ($self, $tree, $release, $path) = @_;
+
+ my $file_id = $self->file_id($tree, $release, $path);
+
+ return $self->_includes_by_id($file_id);
+}
+
+#sub add_symbol {
+# my ($self, $file_id, $line, $symbol, $type, $ctx_id) = @_;
+# return $self->_add_symbol($file_id, $line, $symbol, $type, $ctx_id);
+#}
+
+sub add_ident {
+ my ($self, $rfile_id, $line, $symbol, $type, $ctx_id) = @_;
+
+ my $sym_id = $self->symbol_id($symbol, 1);
+
+ return $self->_add_ident($rfile_id, $line, $sym_id, $type, $ctx_id);
+}
+
+sub symbol_by_id {
+ my ($self, $id) = @_;
+
+ return $self->_symbol_by_id($id);
+}
+
+sub symbols_by_name {
+ my ($self, $tree, $release, $symbol) = @_;
+
+ my $rel_id = $self->release_id($tree, $release);
+# return $cache_sym{$rel_id}{$symbol} if
+# exists $cache_sym{$rel_id} and exists $cache_sym{$rel_id}{$symbol};
+ return $self->_identifiers_by_name($rel_id, $symbol);
+# $cache_sym{$rel_id}{$symbol} = $id;
+}
+
+sub symbols_by_file {
+ my ($self, $tree, $release, $path) = @_;
+
+ $path =~ s!^/!!;
+ my $rel_id = $self->_get_release($self->_get_tree($tree), $release);
+ my $rfile_id = $self->_get_rfile_by_release($rel_id, $path);
+
+ return $self->_symbols_by_file($rfile_id);
+}
+
+sub add_usage {
+ my ($self, $file_id, $line, $symbol) = @_;
+
+ my $sym_id = $self->symbol_id($symbol, 1);
+
+ return $self->_add_usage($file_id, $line, $sym_id);
+}
+
+sub usage_by_file {
+ my ($self, $tree, $release, $path) = @_;
+
+ my $rel_id = $self->_get_release($self->_get_tree($tree), $release);
+ my $rfile_id = $self->_get_rfile_by_release($rel_id, $path);
+
+ return $self->_usage_by_file($rfile_id);
+}
+
+sub get_referring_files {
+ my ($self, $rel_id, $rfile_id, $res) = @_;
+
+ $res ||= {};
+ $self->_get_includes_by_file($res, $rel_id, $rfile_id);
+
+ return keys %$res;
+}
+
+1;
diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm
new file mode 100644
index 0000000..05fe3a0
--- /dev/null
+++ b/lib/LXRng/Index/Pg.pm
@@ -0,0 +1,417 @@
+package LXRng::Index::Pg;
+
+use strict;
+use DBI;
+
+use base qw(LXRng::Index::DBI);
+
+sub dbh {
+ my ($self) = @_;
+
+ $$self{'dbh'} ||= DBI->connect('dbi:Pg:'.$$self{'db_spec'},
+ $$self{'db_user'}, $$self{'db_pass'},
+ {AutoCommit => 1,
+ RaiseError => 1})
+ or die($DBI::errstr);
+
+ return $$self{'dbh'};
+}
+
+sub init_db {
+ my ($self) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ $dbh->{AutoCommit} = 0;
+
+ $dbh->do(qq{create sequence ${pre}treenum}) or die($dbh->errstr);
+ $dbh->do(qq{create sequence ${pre}relnum}) or die($dbh->errstr);
+ $dbh->do(qq{create sequence ${pre}filenum cache 50}) or die($dbh->errstr);
+ $dbh->do(qq{create sequence ${pre}revnum cache 50}) or die($dbh->errstr);
+ $dbh->do(qq{create sequence ${pre}symnum cache 50}) or die($dbh->errstr);
+ $dbh->do(qq{create sequence ${pre}identnum cache 50}) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}charsets
+ (
+ id serial,
+ name varchar,
+ primary key (id)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{insert into ${pre}charsets(name) values ('ascii')})
+ or die($dbh->errstr);
+ $dbh->do(qq{insert into ${pre}charsets(name) values ('utf-8')})
+ or die($dbh->errstr);
+ $dbh->do(qq{insert into ${pre}charsets(name) values ('iso8859-1')})
+ or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}trees
+ (
+ id int default nextval('${pre}treenum'),
+ name varchar,
+ primary key (id)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}releases
+ (
+ id int default nextval('${pre}relnum'),
+ id_tree int references ${pre}trees(id),
+ release_tag varchar,
+ is_indexed bool default 'f',
+ primary key (id)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}files
+ (
+ id int default nextval('${pre}filenum'),
+ path varchar,
+ primary key (id),
+ unique (path)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}revisions
+ (
+ id int default nextval('${pre}revnum'),
+ id_file int references ${pre}files(id),
+ revision varchar,
+ last_modified_gmt timestamp without time zone, -- GMT
+ last_modified_tz varchar(5), -- Optional TZ
+ body_charset int references ${pre}charsets(id),
+ primary key (id),
+ unique (id_file, revision)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}filestatus
+ (
+ id_rfile int references ${pre}revisions(id),
+ indexed bool default 'f',
+ referenced bool default 'f',
+ hashed bool default 'f',
+ primary key (id_rfile)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}hashed_documents
+ (
+ id_rfile int references ${pre}revisions(id),
+ doc_id int not null,
+ primary key (id_rfile)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}filereleases
+ (
+ id_rfile int references ${pre}revisions(id),
+ id_release int references ${pre}releases(id),
+ primary key (id_rfile, id_release)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}includes
+ (
+ id_rfile int references ${pre}revisions(id),
+ id_include_path int references ${pre}files(id)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}symbols
+ (
+ id int default nextval('${pre}symnum'),
+ name varchar,
+ primary key (id),
+ unique (name)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}identifiers
+ (
+ id int default nextval('${pre}identnum'),
+ id_symbol int references ${pre}symbols(id) deferrable,
+ id_rfile int references ${pre}revisions(id) deferrable,
+ line int,
+ type char(1),
+ context int references ${pre}identifiers(id) deferrable,
+ primary key (id)
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{
+ create table ${pre}usage
+ (
+ id_rfile int references ${pre}revisions(id) deferrable,
+ id_symbol int references ${pre}symbols(id) deferrable,
+ line int
+ )
+ }) or die($dbh->errstr);
+
+ $dbh->do(qq{alter table ${pre}usage alter column id_symbol set statistics 250});
+ $dbh->do(qq{alter table ${pre}identifiers alter column id_symbol set statistics 250});
+
+ $dbh->do(qq{create index ${pre}symbol_idx1 on ${pre}symbols using btree (name)})
+ or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}ident_idx1 on ${pre}identifiers using btree (id_symbol)})
+ or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}ident_idx2 on ${pre}identifiers using btree (id_rfile)})
+ or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}ident_idx3 on ${pre}identifiers using btree (id_symbol, id_rfile)})
+ or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}usage_idx1 on ${pre}usage using btree (id_symbol)})
+ or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}usage_idx2 on ${pre}usage using btree (id_rfile)})
+ or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}include_idx1 on ${pre}includes using btree (id_rfile)})
+ or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}file_idx1 on ${pre}files using btree (path)})
+ or die($dbh->errstr);
+
+ $dbh->do(qq{grant select on ${pre}charsets to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}trees to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}releases to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}files to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}releases to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}filereleases to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}filestatus to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}hashed_documents to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}includes to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}symbols to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}identifiers to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}usage to public}) or die($dbh->errstr);
+ $dbh->do(qq{grant select on ${pre}revisions to public}) or die($dbh->errstr);
+
+ $dbh->commit();
+ $dbh->{AutoCommit} = 0;
+}
+
+sub drop_db {
+ my ($self) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ local($dbh->{RaiseError}) = 0;
+
+ $dbh->do(qq{drop index ${pre}symbol_idx1});
+ $dbh->do(qq{drop index ${pre}ident_idx1});
+ $dbh->do(qq{drop index ${pre}ident_idx2});
+ $dbh->do(qq{drop index ${pre}usage_idx1});
+ $dbh->do(qq{drop index ${pre}usage_idx2});
+ $dbh->do(qq{drop index ${pre}include_idx1});
+ $dbh->do(qq{drop index ${pre}file_idx1});
+
+ $dbh->do(qq{drop table ${pre}usage});
+ $dbh->do(qq{drop table ${pre}identifiers});
+ $dbh->do(qq{drop table ${pre}symbols});
+ $dbh->do(qq{drop table ${pre}includes});
+ $dbh->do(qq{drop table ${pre}filereleases});
+ $dbh->do(qq{drop table ${pre}hashed_documents});
+ $dbh->do(qq{drop table ${pre}filestatus});
+ $dbh->do(qq{drop table ${pre}revisions});
+ $dbh->do(qq{drop table ${pre}files});
+ $dbh->do(qq{drop table ${pre}releases});
+ $dbh->do(qq{drop table ${pre}trees});
+ $dbh->do(qq{drop table ${pre}charsets});
+
+ $dbh->do(qq{drop sequence ${pre}treenum});
+ $dbh->do(qq{drop sequence ${pre}relnum});
+ $dbh->do(qq{drop sequence ${pre}filenum});
+ $dbh->do(qq{drop sequence ${pre}revnum});
+ $dbh->do(qq{drop sequence ${pre}symnum});
+ $dbh->do(qq{drop sequence ${pre}identnum});
+}
+
+sub _add_tree {
+ my ($self, $tree) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_tree_ins'} ||=
+ $dbh->prepare(qq{insert into ${pre}trees(name) values (?)});
+ $sth->execute($tree);
+
+ $sth = $$self{'sth'}{'_add_tree_insid'} ||=
+ $dbh->prepare(qq{select currval('${pre}treenum')});
+ $sth->execute();
+ my ($id) = $sth->fetchrow_array();
+ $sth->finish();
+
+ return $id;
+}
+
+sub _add_release {
+ my ($self, $tree_id, $release) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_relase_ins'} ||=
+ $dbh->prepare(qq{insert into ${pre}releases(id_tree, release_tag)
+ values (?, ?)});
+ $sth->execute($tree_id, $release);
+
+ $sth = $$self{'sth'}{'_add_release_insid'} ||=
+ $dbh->prepare(qq{select currval('${pre}relnum')});
+ $sth->execute();
+ my ($id) = $sth->fetchrow_array();
+ $sth->finish();
+
+ return $id;
+}
+
+sub _add_file {
+ my ($self, $path) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_file_ins'} ||=
+ $dbh->prepare(qq{insert into ${pre}files(path) values (?)});
+ $sth->execute($path);
+
+ $sth = $$self{'sth'}{'_add_file_insid'} ||=
+ $dbh->prepare(qq{select currval('${pre}filenum')});
+ $sth->execute();
+ my ($id) = $sth->fetchrow_array();
+ $sth->finish();
+
+ return $id;
+}
+
+sub _get_rfile {
+ my ($self, $file_id, $revision) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_get_rfile'} ||=
+ $dbh->prepare(qq{select id, extract('epoch' from last_modified_gmt
+ at time zone 'UTC')
+ from ${pre}revisions
+ where id_file = ? and revision = ?});
+ my ($id, $gmt);
+ if ($sth->execute($file_id, $revision) > 0) {
+ ($id, $gmt) = $sth->fetchrow_array();
+ }
+ $sth->finish();
+
+ return ($id, $gmt);
+}
+
+sub _add_rfile {
+ my ($self, $file_id, $revision, $time) = @_;
+
+ my ($epoch, $zone) = $time =~ /^(\d+)(?: ([-+]\d\d\d\d)|)$/;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_rfile_ins'} ||=
+ $dbh->prepare(qq{
+ insert into ${pre}revisions(id_file, revision,
+ last_modified_gmt,
+ last_modified_tz)
+ values (?, ?,
+ timestamp 'epoch' + ? * interval '1 second', ?)});
+ $sth->execute($file_id, $revision, $epoch, $zone)
+ or die($dbh->errstr);
+
+ $sth = $$self{'sth'}{'_add_rfile_insid'} ||=
+ $dbh->prepare(qq{select currval('${pre}revnum')});
+ $sth->execute();
+ my ($id) = $sth->fetchrow_array();
+ $sth->finish();
+
+ return $id;
+}
+
+sub _update_rfile_timestamp {
+ my ($self, $rfile_id, $time) = @_;
+
+ my ($epoch, $zone) = $time =~ /^(\d+)(?: ([-+]\d\d\d\d)|)$/;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_update_rfile_timestamp'} ||=
+ $dbh->prepare(qq{
+ update ${pre}revisions set
+ last_modified_gmt = timestamp 'epoch' + ? * interval '1 second',
+ last_modified_tz = ?
+ where id = ?});
+
+ $sth->execute($epoch, $zone, $rfile_id)
+ or die($dbh->errstr);
+ $sth->finish();
+}
+
+sub _add_filerelease {
+ my ($self, $rfile_id, $rel_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_filerelease'} ||=
+ $dbh->prepare(qq{insert into ${pre}filereleases(id_rfile, id_release)
+ select ?, ? where not exists
+ (select 1 from ${pre}filereleases
+ where id_rfile = ? and id_release = ?)});
+ $sth->execute($rfile_id, $rel_id, $rfile_id, $rel_id);
+}
+
+sub _add_symbol {
+ my ($self, $symbol) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_symbol_ins'} ||=
+ $dbh->prepare(qq{insert into ${pre}symbols(name) values (?)});
+ $sth->execute($symbol);
+
+ $sth = $$self{'sth'}{'_add_symbol_insid'} ||=
+ $dbh->prepare(qq{select currval('${pre}symnum')});
+ $sth->execute();
+ my ($id) = $sth->fetchrow_array();
+ $sth->finish();
+
+ return $id;
+}
+
+sub _add_ident {
+ my ($self, $rfile_id, $line, $sym_id, $type, $ctx_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_add_ident_ins'} ||=
+ $dbh->prepare(qq{insert into ${pre}identifiers
+ (id_rfile, line, id_symbol, type, context)
+ values (?, ?, ?, ?, ?)});
+ $sth->execute($rfile_id, $line, $sym_id, $type, $ctx_id);
+
+ $sth = $$self{'sth'}{'_add_ident_insid'} ||=
+ $dbh->prepare(qq{select currval('${pre}identnum')});
+ $sth->execute();
+ my ($id) = $sth->fetchrow_array();
+ $sth->finish();
+
+ return $id;
+}
+
+sub DESTROY {
+ my ($self) = @_;
+
+ if ($$self{'dbh'}) {
+ $$self{'dbh'}->rollback();
+ $$self{'dbh'}->disconnect();
+ delete($$self{'dbh'});
+ }
+}
+
+1;
diff --git a/lib/LXRng/Index/PgBatch.pm b/lib/LXRng/Index/PgBatch.pm
new file mode 100644
index 0000000..8f8844c
--- /dev/null
+++ b/lib/LXRng/Index/PgBatch.pm
@@ -0,0 +1,217 @@
+package LXRng::Index::PgBatch;
+
+# Specialized subclass of LXRng::Index::Pg for doing parallelized
+# batched inserts into database. Higher performance (and higher
+# complexity).
+
+use strict;
+use DBI;
+use POSIX qw(:sys_wait_h);
+
+use base qw(LXRng::Index::Pg);
+
+
+sub transaction {
+ my ($self, $code) = @_;
+
+ if ($self->dbh->{AutoCommit}) {
+ $self->dbh->{AutoCommit} = 0;
+ $self->dbh->do(q(set constraints all deferred));
+ $code->();
+ $self->flush();
+ $self->dbh->commit();
+ $self->dbh->{AutoCommit} = 1;
+
+ # At end of outermost transaction, wait for outstanding flushes
+ $self->_flush_wait();
+ }
+ else {
+ # If we're in a transaction already, don't return to
+ # AutoCommit state.
+ $code->();
+
+ # Only occasional synchronization if we're inside another
+ # transaction.
+ if ($self->{'writes'}++ % 997 == 0) {
+ $self->flush();
+ $self->dbh->commit();
+ }
+ }
+}
+
+sub new {
+ my ($class, @args) = @_;
+
+ my $self = $class->SUPER::new(@args);
+ $$self{'writes'} = 0;
+
+ return $self;
+}
+
+sub flush {
+ my ($self) = @_;
+
+ return unless exists($$self{'cache'});
+
+ $self->_flush_wait();
+
+ my $pre = $self->prefix;
+ $self->dbh->commit() unless $self->dbh->{AutoCommit};
+ my $pid = fork();
+ die("fork failed: $!") unless defined($pid);
+ if ($pid == 0) {
+ $SIG{'INT'} = 'IGNORE';
+ $SIG{'QUIT'} = 'IGNORE';
+ $SIG{'TERM'} = 'IGNORE';
+
+ my $i = 0;
+ $$self{'dbh'} = undef;
+ foreach my $table (qw(symbols identifiers usage)) {
+ if (exists($$self{'cache'}{$table})) {
+ $self->dbh->do(qq{copy $pre$table from stdin});
+ foreach my $l (@{$$self{'cache'}{$table}}) {
+ $i++;
+ $self->dbh->pg_putline($l);
+ }
+ $self->dbh->pg_endcopy;
+ }
+ }
+ $self->dbh->commit() unless $self->dbh->{AutoCommit};
+ $self->dbh->do(q(analyze)) if $i > 100000;
+ $self->dbh->disconnect();
+ warn "\n*** index: flushed $i rows\n";
+ kill(9, $$);
+ }
+ $$self{'flush_pid'} = $pid;
+ delete($$self{'cache'});
+ warn "\n*** index: flushing in background\n";
+}
+
+sub _flush_wait {
+ my ($self) = @_;
+
+ return unless $$self{'flush_pid'};
+ waitpid($$self{'flush_pid'}, WNOHANG); # Reap zombies
+ return unless kill(0, $$self{'flush_pid'});
+
+ warn "\n*** index: waiting for running flush to complete...\n";
+ $self->dbh->commit() unless $self->dbh->{AutoCommit};
+ waitpid($$self{'flush_pid'}, 0);
+}
+
+sub _cache {
+ my ($self, $name) = @_;
+
+ $$self{'cache'}{$name} ||= [];
+ return $$self{'cache'}{$name};
+}
+
+sub _cached_seqno {
+ my ($self, $seqname) = @_;
+
+ unless (exists($$self{'cached_seqno'}{$seqname}) and
+ $$self{'cached_seqno'}{$seqname}{'min'} <=
+ $$self{'cached_seqno'}{$seqname}{'max'})
+ {
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_cached_seqno'}{$seqname} ||=
+ $dbh->prepare(qq{select setval('$pre$seqname',
+ nextval('$pre$seqname')+1000)});
+ $sth->execute();
+ my ($id) = $sth->fetchrow_array();
+ $sth->finish();
+ $$self{'cached_seqno'}{$seqname}{'min'} = $id-1000;
+ $$self{'cached_seqno'}{$seqname}{'max'} = $id;
+ }
+
+ return $$self{'cached_seqno'}{$seqname}{'min'}++;
+}
+
+sub _prime_symbol_cache {
+ my ($self) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'_prime_symbol_cache'} ||=
+ $dbh->prepare(qq{select name, id from ${pre}symbols});
+ $sth->execute();
+ my %cache;
+ while (my ($name, $id) = $sth->fetchrow_array()) {
+ $cache{$name} = $id;
+ }
+ $sth->finish;
+
+ $$self{'__symbol_cache'} = \%cache;
+}
+
+sub _add_usage {
+ my ($self, $file_id, $line, $symbol_id) = @_;
+
+ push(@{$self->_cache('usage')}, "$file_id\t$symbol_id\t$line\n");
+
+ return 1;
+}
+
+sub _add_symbol {
+ my ($self, $symbol) = @_;
+
+ my $id = $self->_cached_seqno('symnum');
+ push(@{$self->_cache('symbols')}, "$id\t$symbol\n");
+
+ $self->_prime_symbol_cache()
+ unless exists $$self{'__symbol_cache'};
+
+ $$self{'__symbol_cache'}{$symbol} = $id;
+
+ return $id;
+}
+
+sub _add_ident {
+ my ($self, $rfile_id, $line, $sym_id, $type, $ctx_id) = @_;
+
+ $ctx_id = '\\N' unless defined($ctx_id);
+
+ my $id = $self->_cached_seqno('identnum');
+ push(@{$self->_cache('identifiers')}, join("\t", $id, $sym_id,
+ $rfile_id, $line, $type,
+ $ctx_id)."\n");
+
+ return $id;
+}
+
+my $_get_symbol_usage = 0;
+sub _get_symbol {
+ my ($self, $symbol) = @_;
+
+ unless (exists($$self{'__symbol_cache'})) {
+ # Only prime the cache once it's clear that we're likely to
+ # hit it a significant number of times.
+ return $self->SUPER::_get_symbol($symbol) if
+ $_get_symbol_usage++ < 100;
+
+ $self->_prime_symbol_cache();
+ }
+
+ return $$self{'__symbol_cache'}{$symbol} if
+ exists $$self{'__symbol_cache'}{$symbol};
+
+ return undef;
+}
+
+sub DESTROY {
+ my ($self) = @_;
+
+ if ($$self{'writes'} > 0) {
+ $self->flush();
+ $self->_flush_wait();
+ }
+
+ if ($$self{'dbh'}) {
+ $$self{'dbh'}->rollback() unless $$self{'dbh'}{'AutoCommit'};
+ $$self{'dbh'}->disconnect();
+ delete($$self{'dbh'});
+ }
+}
+
+1;