aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorArne Georg Gleditsch <argggh@lxr.linpro.no>2007-11-15 21:51:00 +0100
committerArne Georg Gleditsch <argggh@lxr.linpro.no>2007-11-15 21:51:00 +0100
commit8c978d76179b4f573c1eb9b9bb9db966c81330bb (patch)
treeade066d6c36105de19e2a826188d0f1c14818f59 /lib
parente9fa4c98bb5f084739d3418ade3f0c51e34a0aa1 (diff)
Too many changes...
Diffstat (limited to 'lib')
-rw-r--r--lib/LXRng/Context.pm16
-rw-r--r--lib/LXRng/Index/DBI.pm117
-rw-r--r--lib/LXRng/Index/Pg.pm3
-rw-r--r--lib/LXRng/Index/PgBatch.pm2
-rw-r--r--lib/LXRng/Lang/C.pm3
-rw-r--r--lib/LXRng/Markup/File.pm2
-rw-r--r--lib/LXRng/Repo/Git.pm17
-rw-r--r--lib/LXRng/Search/Xapian.pm29
8 files changed, 161 insertions, 28 deletions
diff --git a/lib/LXRng/Context.pm b/lib/LXRng/Context.pm
index 46faa21..caaa473 100644
--- a/lib/LXRng/Context.pm
+++ b/lib/LXRng/Context.pm
@@ -9,7 +9,10 @@ sub new {
$self = bless({}, $self);
if ($args{'query'}) {
- $$self{'req_url'} = $args{'query'}->url();
+ # CGI::Simple appears to confuse '' with undef for SCRIPT_NAME.
+ # $$self{'req_url'} = $args{'query'}->url();
+ $$self{'req_url'} =
+ $args{'query'}->url(-base => 1).'/'.$ENV{'SCRIPT_NAME'};
foreach my $p ($args{'query'}->param) {
$$self{'params'}{$p} = [$args{'query'}->param($p)];
@@ -28,7 +31,7 @@ sub new {
}
if ($$self{'tree'} =~ s/[+](.*)$//) {
- $$self{'release'} = $1;
+ $$self{'release'} = $1 if $1 ne '*';
}
if ($$self{'tree'}) {
@@ -137,7 +140,7 @@ sub path_elements {
sub config {
my ($self) = @_;
- return $$self{'config'};
+ return $$self{'config'} || {};
}
sub prefs {
@@ -147,7 +150,7 @@ sub prefs {
}
sub base_url {
- my ($self) = @_;
+ my ($self, $notree) = @_;
my $base = $self->config->{'base_url'};
unless ($base) {
@@ -156,7 +159,10 @@ sub base_url {
}
$base =~ s,/+$,,;
- $base .= '/lxr/'.$self->vtree.'/';
+
+ return $base if $notree;
+
+ $base .= '/'.$self->vtree.'/';
$base =~ s,//+$,/,;
return $base;
diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm
index 602eac8..932202c 100644
--- a/lib/LXRng/Index/DBI.pm
+++ b/lib/LXRng/Index/DBI.pm
@@ -73,6 +73,110 @@ sub _get_tree {
return $id;
}
+sub pending_files {
+ my ($self, $tree) = @_;
+
+ my $tree_id = $self->_get_tree($tree);
+ return [] unless $tree_id;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+
+ # Can be made more fine grained by consulting filestatus, but all
+ # hashed documents need to have their termlist updated... Just
+ # include all files participating in releases not yet fully
+ # indexed.
+ my $sth = $$self{'sth'}{'pending_files'} ||=
+ $dbh->prepare(qq{
+ select rv.id, f.path, rv.revision
+ from ${pre}revisions rv, ${pre}files f
+ where rv.id_file = f.id
+ and rv.id in (select fr.id_rfile
+ from ${pre}releases r, ${pre}filereleases fr
+ where r.id = fr.id_release
+ and r.id_tree = ?
+ and r.is_indexed = 'f')});
+
+# $dbh->prepare(qq{
+# select rv.id, f.path, rv.revision
+# from ${pre}files f, ${pre}revisions rv
+# where rv.id_file = f.id
+# and not exists(select 1 from ${pre}filestatus fs
+# where fs.id_rfile = rv.id
+# and fs.indexed = 't'
+# and fs.hashed = 't'
+# and fs.referenced = 't')
+# and exists(select 1 from ${pre}filereleases fr, ${pre}releases r
+# where fr.id_rfile = rv.id
+# and fr.id_release = r.id
+# and r.id_tree = ?)});
+ if ($sth->execute($tree_id) > 0) {
+ return $sth->fetchall_arrayref();
+ }
+ else {
+ $sth->finish();
+ return [];
+ }
+}
+
+sub new_releases_by_file {
+ my ($self, $file_id) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'releases_by_file'} ||=
+ $dbh->prepare(qq{
+ select r.release_tag from ${pre}releases r, ${pre}filereleases f
+ where r.id = f.id_release and f.id_rfile = ? and r.is_indexed = 'f'});
+ if ($sth->execute($file_id) > 0) {
+ return [map { $$_[0] } @{$sth->fetchall_arrayref()}];
+ }
+ else {
+ $sth->finish();
+ return [];
+ }
+}
+
+sub update_indexed_releases {
+ my ($self, $tree) = @_;
+
+ my $tree_id = $self->_get_tree($tree);
+ return [] unless $tree_id;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'update_indexed_releases_find'} ||=
+ $dbh->prepare(qq{
+ select r.id, r.release_tag
+ from ${pre}releases r
+ where is_indexed = 'f'
+ and not exists (select 1
+ from ${pre}filereleases fr
+ left outer join ${pre}filestatus fs
+ on (fr.id_rfile = fs.id_rfile)
+ where fr.id_release = r.id
+ and (fs.id_rfile is null
+ or fs.indexed = 'f'
+ or fs.hashed = 'f'
+ or fs.referenced = 'f'))});
+
+ if ($sth->execute() > 0) {
+ my $rels = $sth->fetchall_arrayref();
+ $sth->finish();
+ $sth = $$self{'sth'}{'update_indexed_releases_set'} ||=
+ $dbh->prepare(qq{
+ update ${pre}releases set is_indexed = 't' where id = ?});
+ foreach my $r (@$rels) {
+ $sth->execute($$r[0]);
+ }
+ $sth->finish();
+ return [map { $$_[1] } @$rels];
+ }
+ else {
+ return [];
+ }
+}
+
sub _get_release {
my ($self, $tree_id, $release) = @_;
@@ -345,14 +449,19 @@ sub get_symbol_usage {
my $dbh = $self->dbh;
my $pre = $self->prefix;
- my $sth = $$self{'sth'}{'get_symbol_usage'} ||=
+
+ # Postgres' query optimizer deals badly with placeholders and
+ # prepared statements in this case.
+ return undef unless $symid =~ /^\d+$/s;
+ my $sth =
$dbh->prepare(qq{
select u.id_rfile, u.line
from ${pre}usage u, ${pre}filereleases fr
- where u.id_symbol = ?
- and u.id_rfile = fr.id_rfile and fr.id_release = ?});
+ where u.id_symbol = $symid
+ and u.id_rfile = fr.id_rfile and fr.id_release = ?
+ limit 1000});
- $sth->execute($symid, $rel_id);
+ $sth->execute($rel_id);
my $res = $sth->fetchall_arrayref();
$sth->finish();
diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm
index 05fe3a0..1b905c0 100644
--- a/lib/LXRng/Index/Pg.pm
+++ b/lib/LXRng/Index/Pg.pm
@@ -179,6 +179,8 @@ sub init_db {
or die($dbh->errstr);
$dbh->do(qq{create index ${pre}file_idx1 on ${pre}files using btree (path)})
or die($dbh->errstr);
+ $dbh->do(qq{create index ${pre}filerel_idx1 on ${pre}filereleases using btree (id_release)})
+ or die($dbh->errstr);
$dbh->do(qq{grant select on ${pre}charsets to public}) or die($dbh->errstr);
$dbh->do(qq{grant select on ${pre}trees to public}) or die($dbh->errstr);
@@ -212,6 +214,7 @@ sub drop_db {
$dbh->do(qq{drop index ${pre}usage_idx2});
$dbh->do(qq{drop index ${pre}include_idx1});
$dbh->do(qq{drop index ${pre}file_idx1});
+ $dbh->do(qq{drop index ${pre}filerel_idx1});
$dbh->do(qq{drop table ${pre}usage});
$dbh->do(qq{drop table ${pre}identifiers});
diff --git a/lib/LXRng/Index/PgBatch.pm b/lib/LXRng/Index/PgBatch.pm
index 8f8844c..19c9fa9 100644
--- a/lib/LXRng/Index/PgBatch.pm
+++ b/lib/LXRng/Index/PgBatch.pm
@@ -77,7 +77,7 @@ sub flush {
}
}
$self->dbh->commit() unless $self->dbh->{AutoCommit};
- $self->dbh->do(q(analyze)) if $i > 100000;
+ $self->dbh->do(q(analyze)) if $i > 500000;
$self->dbh->disconnect();
warn "\n*** index: flushed $i rows\n";
kill(9, $$);
diff --git a/lib/LXRng/Lang/C.pm b/lib/LXRng/Lang/C.pm
index c88f424..1a826bd 100644
--- a/lib/LXRng/Lang/C.pm
+++ b/lib/LXRng/Lang/C.pm
@@ -51,7 +51,8 @@ sub parsespec {
'comment', '//', "\$",
'string', '"', '"',
'string', "'", "'",
- 'include', '#\s*include', "\$"];
+ 'include', '#\s*include\s+"', '"',
+ 'include', '#\s*include\s+<', '>'];
}
sub typemap {
diff --git a/lib/LXRng/Markup/File.pm b/lib/LXRng/Markup/File.pm
index 406737c..abb763c 100644
--- a/lib/LXRng/Markup/File.pm
+++ b/lib/LXRng/Markup/File.pm
@@ -16,7 +16,7 @@ sub context {
sub safe_html {
my ($str) = @_;
- return encode_entities($str, '^\n\r\t !\#\$\(-;=?-~');
+ return encode_entities($str, '^\n\r\t !\#\$\(-;=?-~\200-\377');
}
sub make_format_newline {
diff --git a/lib/LXRng/Repo/Git.pm b/lib/LXRng/Repo/Git.pm
index 2d6ea33..757da26 100644
--- a/lib/LXRng/Repo/Git.pm
+++ b/lib/LXRng/Repo/Git.pm
@@ -74,7 +74,7 @@ sub allversions {
}
sub node {
- my ($self, $path, $release) = @_;
+ my ($self, $path, $release, $rev) = @_;
$path =~ s,^/+,,;
$path =~ s,/+$,,;
@@ -88,14 +88,21 @@ sub node {
return LXRng::Repo::Git::Directory->new($self, '', $ref);
}
- my $git = $self->_git_cmd('ls-tree', $release, $path);
- my ($mode, $type, $ref, $gitpath) = split(" ", <$git>);
+ my $type;
+ if ($rev) {
+ $type = 'blob';
+ }
+ else {
+ my $git = $self->_git_cmd('ls-tree', $release, $path);
+ my ($mode, $gitpath);
+ ($mode, $type, $rev, $gitpath) = split(" ", <$git>);
+ }
if ($type eq 'tree') {
- return LXRng::Repo::Git::Directory->new($self, $path, $ref, $release);
+ return LXRng::Repo::Git::Directory->new($self, $path, $rev, $release);
}
elsif ($type eq 'blob') {
- return LXRng::Repo::Git::File->new($self, $path, $ref, $release);
+ return LXRng::Repo::Git::File->new($self, $path, $rev, $release);
}
else {
return undef;
diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm
index 42c7580..b6e28a0 100644
--- a/lib/LXRng/Search/Xapian.pm
+++ b/lib/LXRng/Search/Xapian.pm
@@ -32,9 +32,11 @@ sub new_document {
}
sub add_document {
- my ($self, $doc, $rel_id) = @_;
+ my ($self, $doc, $rel_ids) = @_;
- $doc->add_term('__@@LXRREL_'.$rel_id);
+ foreach my $r (@$rel_ids) {
+ $doc->add_term('__@@LXRREL_'.$r);
+ }
my $doc_id = $self->wrdb->add_document($doc);
$self->{'writes'}++;
$self->flush() if $self->{'writes'} % 499 == 0;
@@ -42,20 +44,25 @@ sub add_document {
}
sub add_release {
- my ($self, $doc_id, $rel_id) = @_;
+ my ($self, $doc_id, $rel_ids) = @_;
- my $reltag = '__@@LXRREL_'.$rel_id;
my $doc = $self->wrdb->get_document($doc_id);
- my $term = $doc->termlist_begin;
my $termend = $doc->termlist_end;
- $term->skip_to($reltag);
- if ($term ne $termend) {
- return 0 if $term->get_termname eq $reltag;
+ my $changes = 0;
+ foreach my $r (@$rel_ids) {
+ my $reltag = '__@@LXRREL_'.$r;
+ my $term = $doc->termlist_begin;
+ $term->skip_to($reltag);
+ if ($term ne $termend) {
+ next if $term->get_termname eq $reltag;
+ }
+ $doc->add_term($reltag);
+ $changes++;
}
- $doc->add_term($reltag);
- $self->wrdb->replace_document($doc_id, $doc);
- return 1;
+
+ $self->wrdb->replace_document($doc_id, $doc) if $changes;
+ return $changes;
}
sub flush {