diff options
author | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-11-15 21:51:00 +0100 |
---|---|---|
committer | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-11-15 21:51:00 +0100 |
commit | 8c978d76179b4f573c1eb9b9bb9db966c81330bb (patch) | |
tree | ade066d6c36105de19e2a826188d0f1c14818f59 /lib/LXRng | |
parent | e9fa4c98bb5f084739d3418ade3f0c51e34a0aa1 (diff) |
Too many changes...
Diffstat (limited to 'lib/LXRng')
-rw-r--r-- | lib/LXRng/Context.pm | 16 | ||||
-rw-r--r-- | lib/LXRng/Index/DBI.pm | 117 | ||||
-rw-r--r-- | lib/LXRng/Index/Pg.pm | 3 | ||||
-rw-r--r-- | lib/LXRng/Index/PgBatch.pm | 2 | ||||
-rw-r--r-- | lib/LXRng/Lang/C.pm | 3 | ||||
-rw-r--r-- | lib/LXRng/Markup/File.pm | 2 | ||||
-rw-r--r-- | lib/LXRng/Repo/Git.pm | 17 | ||||
-rw-r--r-- | lib/LXRng/Search/Xapian.pm | 29 |
8 files changed, 161 insertions, 28 deletions
diff --git a/lib/LXRng/Context.pm b/lib/LXRng/Context.pm index 46faa21..caaa473 100644 --- a/lib/LXRng/Context.pm +++ b/lib/LXRng/Context.pm @@ -9,7 +9,10 @@ sub new { $self = bless({}, $self); if ($args{'query'}) { - $$self{'req_url'} = $args{'query'}->url(); + # CGI::Simple appears to confuse '' with undef for SCRIPT_NAME. + # $$self{'req_url'} = $args{'query'}->url(); + $$self{'req_url'} = + $args{'query'}->url(-base => 1).'/'.$ENV{'SCRIPT_NAME'}; foreach my $p ($args{'query'}->param) { $$self{'params'}{$p} = [$args{'query'}->param($p)]; @@ -28,7 +31,7 @@ sub new { } if ($$self{'tree'} =~ s/[+](.*)$//) { - $$self{'release'} = $1; + $$self{'release'} = $1 if $1 ne '*'; } if ($$self{'tree'}) { @@ -137,7 +140,7 @@ sub path_elements { sub config { my ($self) = @_; - return $$self{'config'}; + return $$self{'config'} || {}; } sub prefs { @@ -147,7 +150,7 @@ sub prefs { } sub base_url { - my ($self) = @_; + my ($self, $notree) = @_; my $base = $self->config->{'base_url'}; unless ($base) { @@ -156,7 +159,10 @@ sub base_url { } $base =~ s,/+$,,; - $base .= '/lxr/'.$self->vtree.'/'; + + return $base if $notree; + + $base .= '/'.$self->vtree.'/'; $base =~ s,//+$,/,; return $base; diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm index 602eac8..932202c 100644 --- a/lib/LXRng/Index/DBI.pm +++ b/lib/LXRng/Index/DBI.pm @@ -73,6 +73,110 @@ sub _get_tree { return $id; } +sub pending_files { + my ($self, $tree) = @_; + + my $tree_id = $self->_get_tree($tree); + return [] unless $tree_id; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + + # Can be made more fine grained by consulting filestatus, but all + # hashed documents need to have their termlist updated... Just + # include all files participating in releases not yet fully + # indexed. + my $sth = $$self{'sth'}{'pending_files'} ||= + $dbh->prepare(qq{ + select rv.id, f.path, rv.revision + from ${pre}revisions rv, ${pre}files f + where rv.id_file = f.id + and rv.id in (select fr.id_rfile + from ${pre}releases r, ${pre}filereleases fr + where r.id = fr.id_release + and r.id_tree = ? + and r.is_indexed = 'f')}); + +# $dbh->prepare(qq{ +# select rv.id, f.path, rv.revision +# from ${pre}files f, ${pre}revisions rv +# where rv.id_file = f.id +# and not exists(select 1 from ${pre}filestatus fs +# where fs.id_rfile = rv.id +# and fs.indexed = 't' +# and fs.hashed = 't' +# and fs.referenced = 't') +# and exists(select 1 from ${pre}filereleases fr, ${pre}releases r +# where fr.id_rfile = rv.id +# and fr.id_release = r.id +# and r.id_tree = ?)}); + if ($sth->execute($tree_id) > 0) { + return $sth->fetchall_arrayref(); + } + else { + $sth->finish(); + return []; + } +} + +sub new_releases_by_file { + my ($self, $file_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'releases_by_file'} ||= + $dbh->prepare(qq{ + select r.release_tag from ${pre}releases r, ${pre}filereleases f + where r.id = f.id_release and f.id_rfile = ? and r.is_indexed = 'f'}); + if ($sth->execute($file_id) > 0) { + return [map { $$_[0] } @{$sth->fetchall_arrayref()}]; + } + else { + $sth->finish(); + return []; + } +} + +sub update_indexed_releases { + my ($self, $tree) = @_; + + my $tree_id = $self->_get_tree($tree); + return [] unless $tree_id; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'update_indexed_releases_find'} ||= + $dbh->prepare(qq{ + select r.id, r.release_tag + from ${pre}releases r + where is_indexed = 'f' + and not exists (select 1 + from ${pre}filereleases fr + left outer join ${pre}filestatus fs + on (fr.id_rfile = fs.id_rfile) + where fr.id_release = r.id + and (fs.id_rfile is null + or fs.indexed = 'f' + or fs.hashed = 'f' + or fs.referenced = 'f'))}); + + if ($sth->execute() > 0) { + my $rels = $sth->fetchall_arrayref(); + $sth->finish(); + $sth = $$self{'sth'}{'update_indexed_releases_set'} ||= + $dbh->prepare(qq{ + update ${pre}releases set is_indexed = 't' where id = ?}); + foreach my $r (@$rels) { + $sth->execute($$r[0]); + } + $sth->finish(); + return [map { $$_[1] } @$rels]; + } + else { + return []; + } +} + sub _get_release { my ($self, $tree_id, $release) = @_; @@ -345,14 +449,19 @@ sub get_symbol_usage { my $dbh = $self->dbh; my $pre = $self->prefix; - my $sth = $$self{'sth'}{'get_symbol_usage'} ||= + + # Postgres' query optimizer deals badly with placeholders and + # prepared statements in this case. + return undef unless $symid =~ /^\d+$/s; + my $sth = $dbh->prepare(qq{ select u.id_rfile, u.line from ${pre}usage u, ${pre}filereleases fr - where u.id_symbol = ? - and u.id_rfile = fr.id_rfile and fr.id_release = ?}); + where u.id_symbol = $symid + and u.id_rfile = fr.id_rfile and fr.id_release = ? + limit 1000}); - $sth->execute($symid, $rel_id); + $sth->execute($rel_id); my $res = $sth->fetchall_arrayref(); $sth->finish(); diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm index 05fe3a0..1b905c0 100644 --- a/lib/LXRng/Index/Pg.pm +++ b/lib/LXRng/Index/Pg.pm @@ -179,6 +179,8 @@ sub init_db { or die($dbh->errstr); $dbh->do(qq{create index ${pre}file_idx1 on ${pre}files using btree (path)}) or die($dbh->errstr); + $dbh->do(qq{create index ${pre}filerel_idx1 on ${pre}filereleases using btree (id_release)}) + or die($dbh->errstr); $dbh->do(qq{grant select on ${pre}charsets to public}) or die($dbh->errstr); $dbh->do(qq{grant select on ${pre}trees to public}) or die($dbh->errstr); @@ -212,6 +214,7 @@ sub drop_db { $dbh->do(qq{drop index ${pre}usage_idx2}); $dbh->do(qq{drop index ${pre}include_idx1}); $dbh->do(qq{drop index ${pre}file_idx1}); + $dbh->do(qq{drop index ${pre}filerel_idx1}); $dbh->do(qq{drop table ${pre}usage}); $dbh->do(qq{drop table ${pre}identifiers}); diff --git a/lib/LXRng/Index/PgBatch.pm b/lib/LXRng/Index/PgBatch.pm index 8f8844c..19c9fa9 100644 --- a/lib/LXRng/Index/PgBatch.pm +++ b/lib/LXRng/Index/PgBatch.pm @@ -77,7 +77,7 @@ sub flush { } } $self->dbh->commit() unless $self->dbh->{AutoCommit}; - $self->dbh->do(q(analyze)) if $i > 100000; + $self->dbh->do(q(analyze)) if $i > 500000; $self->dbh->disconnect(); warn "\n*** index: flushed $i rows\n"; kill(9, $$); diff --git a/lib/LXRng/Lang/C.pm b/lib/LXRng/Lang/C.pm index c88f424..1a826bd 100644 --- a/lib/LXRng/Lang/C.pm +++ b/lib/LXRng/Lang/C.pm @@ -51,7 +51,8 @@ sub parsespec { 'comment', '//', "\$", 'string', '"', '"', 'string', "'", "'", - 'include', '#\s*include', "\$"]; + 'include', '#\s*include\s+"', '"', + 'include', '#\s*include\s+<', '>']; } sub typemap { diff --git a/lib/LXRng/Markup/File.pm b/lib/LXRng/Markup/File.pm index 406737c..abb763c 100644 --- a/lib/LXRng/Markup/File.pm +++ b/lib/LXRng/Markup/File.pm @@ -16,7 +16,7 @@ sub context { sub safe_html { my ($str) = @_; - return encode_entities($str, '^\n\r\t !\#\$\(-;=?-~'); + return encode_entities($str, '^\n\r\t !\#\$\(-;=?-~\200-\377'); } sub make_format_newline { diff --git a/lib/LXRng/Repo/Git.pm b/lib/LXRng/Repo/Git.pm index 2d6ea33..757da26 100644 --- a/lib/LXRng/Repo/Git.pm +++ b/lib/LXRng/Repo/Git.pm @@ -74,7 +74,7 @@ sub allversions { } sub node { - my ($self, $path, $release) = @_; + my ($self, $path, $release, $rev) = @_; $path =~ s,^/+,,; $path =~ s,/+$,,; @@ -88,14 +88,21 @@ sub node { return LXRng::Repo::Git::Directory->new($self, '', $ref); } - my $git = $self->_git_cmd('ls-tree', $release, $path); - my ($mode, $type, $ref, $gitpath) = split(" ", <$git>); + my $type; + if ($rev) { + $type = 'blob'; + } + else { + my $git = $self->_git_cmd('ls-tree', $release, $path); + my ($mode, $gitpath); + ($mode, $type, $rev, $gitpath) = split(" ", <$git>); + } if ($type eq 'tree') { - return LXRng::Repo::Git::Directory->new($self, $path, $ref, $release); + return LXRng::Repo::Git::Directory->new($self, $path, $rev, $release); } elsif ($type eq 'blob') { - return LXRng::Repo::Git::File->new($self, $path, $ref, $release); + return LXRng::Repo::Git::File->new($self, $path, $rev, $release); } else { return undef; diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm index 42c7580..b6e28a0 100644 --- a/lib/LXRng/Search/Xapian.pm +++ b/lib/LXRng/Search/Xapian.pm @@ -32,9 +32,11 @@ sub new_document { } sub add_document { - my ($self, $doc, $rel_id) = @_; + my ($self, $doc, $rel_ids) = @_; - $doc->add_term('__@@LXRREL_'.$rel_id); + foreach my $r (@$rel_ids) { + $doc->add_term('__@@LXRREL_'.$r); + } my $doc_id = $self->wrdb->add_document($doc); $self->{'writes'}++; $self->flush() if $self->{'writes'} % 499 == 0; @@ -42,20 +44,25 @@ sub add_document { } sub add_release { - my ($self, $doc_id, $rel_id) = @_; + my ($self, $doc_id, $rel_ids) = @_; - my $reltag = '__@@LXRREL_'.$rel_id; my $doc = $self->wrdb->get_document($doc_id); - my $term = $doc->termlist_begin; my $termend = $doc->termlist_end; - $term->skip_to($reltag); - if ($term ne $termend) { - return 0 if $term->get_termname eq $reltag; + my $changes = 0; + foreach my $r (@$rel_ids) { + my $reltag = '__@@LXRREL_'.$r; + my $term = $doc->termlist_begin; + $term->skip_to($reltag); + if ($term ne $termend) { + next if $term->get_termname eq $reltag; + } + $doc->add_term($reltag); + $changes++; } - $doc->add_term($reltag); - $self->wrdb->replace_document($doc_id, $doc); - return 1; + + $self->wrdb->replace_document($doc_id, $doc) if $changes; + return $changes; } sub flush { |