diff options
| author | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-11-15 21:51:00 +0100 | 
|---|---|---|
| committer | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-11-15 21:51:00 +0100 | 
| commit | 8c978d76179b4f573c1eb9b9bb9db966c81330bb (patch) | |
| tree | ade066d6c36105de19e2a826188d0f1c14818f59 /lib/LXRng | |
| parent | e9fa4c98bb5f084739d3418ade3f0c51e34a0aa1 (diff) | |
Too many changes...
Diffstat (limited to 'lib/LXRng')
| -rw-r--r-- | lib/LXRng/Context.pm | 16 | ||||
| -rw-r--r-- | lib/LXRng/Index/DBI.pm | 117 | ||||
| -rw-r--r-- | lib/LXRng/Index/Pg.pm | 3 | ||||
| -rw-r--r-- | lib/LXRng/Index/PgBatch.pm | 2 | ||||
| -rw-r--r-- | lib/LXRng/Lang/C.pm | 3 | ||||
| -rw-r--r-- | lib/LXRng/Markup/File.pm | 2 | ||||
| -rw-r--r-- | lib/LXRng/Repo/Git.pm | 17 | ||||
| -rw-r--r-- | lib/LXRng/Search/Xapian.pm | 29 | 
8 files changed, 161 insertions, 28 deletions
| diff --git a/lib/LXRng/Context.pm b/lib/LXRng/Context.pm index 46faa21..caaa473 100644 --- a/lib/LXRng/Context.pm +++ b/lib/LXRng/Context.pm @@ -9,7 +9,10 @@ sub new {      $self = bless({}, $self);      if ($args{'query'}) { -	$$self{'req_url'} = $args{'query'}->url(); +	# CGI::Simple appears to confuse '' with undef for SCRIPT_NAME. +	# $$self{'req_url'} = $args{'query'}->url(); +	$$self{'req_url'} = +	    $args{'query'}->url(-base => 1).'/'.$ENV{'SCRIPT_NAME'};  	foreach my $p ($args{'query'}->param) {  	    $$self{'params'}{$p} = [$args{'query'}->param($p)]; @@ -28,7 +31,7 @@ sub new {      }      if ($$self{'tree'} =~ s/[+](.*)$//) { -	$$self{'release'} = $1; +	$$self{'release'} = $1 if $1 ne '*';      }      if ($$self{'tree'}) { @@ -137,7 +140,7 @@ sub path_elements {  sub config {      my ($self) = @_; -    return $$self{'config'}; +    return $$self{'config'} || {};  }  sub prefs { @@ -147,7 +150,7 @@ sub prefs {  }  sub base_url { -    my ($self) = @_; +    my ($self, $notree) = @_;      my $base = $self->config->{'base_url'};      unless ($base) { @@ -156,7 +159,10 @@ sub base_url {      }      $base =~ s,/+$,,; -    $base .= '/lxr/'.$self->vtree.'/'; + +    return $base if $notree; + +    $base .= '/'.$self->vtree.'/';      $base =~ s,//+$,/,;      return $base; diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm index 602eac8..932202c 100644 --- a/lib/LXRng/Index/DBI.pm +++ b/lib/LXRng/Index/DBI.pm @@ -73,6 +73,110 @@ sub _get_tree {      return $id;  } +sub pending_files { +    my ($self, $tree) = @_; + +    my $tree_id = $self->_get_tree($tree); +    return [] unless $tree_id; + +    my $dbh = $self->dbh; +    my $pre = $self->prefix; + +    # Can be made more fine grained by consulting filestatus, but all +    # hashed documents need to have their termlist updated...  Just +    # include all files participating in releases not yet fully +    # indexed. +    my $sth = $$self{'sth'}{'pending_files'} ||= +	$dbh->prepare(qq{ +	    select rv.id, f.path, rv.revision +		from ${pre}revisions rv, ${pre}files f +		where rv.id_file = f.id +		and rv.id in (select fr.id_rfile +			      from ${pre}releases r, ${pre}filereleases fr +			      where r.id = fr.id_release +			      and r.id_tree = ? +			      and r.is_indexed = 'f')}); + +# 	$dbh->prepare(qq{ +# 	    select rv.id, f.path, rv.revision  +# 		from ${pre}files f, ${pre}revisions rv  +# 		where rv.id_file = f.id +# 		and not exists(select 1 from ${pre}filestatus fs +# 			       where fs.id_rfile = rv.id +# 			       and fs.indexed = 't' +# 			       and fs.hashed = 't' +# 			       and fs.referenced = 't') +# 		and exists(select 1 from ${pre}filereleases fr, ${pre}releases r +# 			   where fr.id_rfile = rv.id +# 			   and fr.id_release = r.id +# 			   and r.id_tree = ?)}); +    if ($sth->execute($tree_id) > 0) { +	return $sth->fetchall_arrayref(); +    } +    else { +	$sth->finish(); +	return []; +    } +} + +sub new_releases_by_file { +    my ($self, $file_id) = @_; + +    my $dbh = $self->dbh; +    my $pre = $self->prefix; +    my $sth = $$self{'sth'}{'releases_by_file'} ||= +	$dbh->prepare(qq{ +	    select r.release_tag from ${pre}releases r, ${pre}filereleases f +		where r.id = f.id_release and f.id_rfile = ? and r.is_indexed = 'f'}); +    if ($sth->execute($file_id) > 0) { +	return [map { $$_[0] } @{$sth->fetchall_arrayref()}]; +    } +    else { +	$sth->finish(); +	return []; +    } +} + +sub update_indexed_releases { +    my ($self, $tree) = @_; + +    my $tree_id = $self->_get_tree($tree); +    return [] unless $tree_id; +     +    my $dbh = $self->dbh; +    my $pre = $self->prefix; +    my $sth = $$self{'sth'}{'update_indexed_releases_find'} ||= +	$dbh->prepare(qq{ +	    select r.id, r.release_tag +		from ${pre}releases r +		where is_indexed = 'f' +		and not exists (select 1 +				from ${pre}filereleases fr +				left outer join ${pre}filestatus fs +				on (fr.id_rfile = fs.id_rfile) +				where fr.id_release = r.id +				and (fs.id_rfile is null +				     or fs.indexed = 'f' +				     or fs.hashed = 'f' +				     or fs.referenced = 'f'))}); +     +    if ($sth->execute() > 0) { +	my $rels = $sth->fetchall_arrayref(); +	$sth->finish(); +	$sth = $$self{'sth'}{'update_indexed_releases_set'} ||= +	    $dbh->prepare(qq{ +		update ${pre}releases set is_indexed = 't' where id = ?}); +	foreach my $r (@$rels) { +	    $sth->execute($$r[0]); +	} +	$sth->finish(); +	return [map { $$_[1] } @$rels]; +    } +    else { +	return []; +    } +} +  sub _get_release {      my ($self, $tree_id, $release) = @_; @@ -345,14 +449,19 @@ sub get_symbol_usage {      my $dbh = $self->dbh;      my $pre = $self->prefix; -    my $sth = $$self{'sth'}{'get_symbol_usage'} ||= + +    # Postgres' query optimizer deals badly with placeholders and +    # prepared statements in this case. +    return undef unless $symid =~ /^\d+$/s; +    my $sth =  	$dbh->prepare(qq{  	    select u.id_rfile, u.line  		from ${pre}usage u, ${pre}filereleases fr -		where u.id_symbol = ?  -		and u.id_rfile = fr.id_rfile and fr.id_release = ?}); +		where u.id_symbol = $symid +		and u.id_rfile = fr.id_rfile and fr.id_release = ? +		limit 1000}); -    $sth->execute($symid, $rel_id); +    $sth->execute($rel_id);      my $res = $sth->fetchall_arrayref();      $sth->finish(); diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm index 05fe3a0..1b905c0 100644 --- a/lib/LXRng/Index/Pg.pm +++ b/lib/LXRng/Index/Pg.pm @@ -179,6 +179,8 @@ sub init_db {  	or die($dbh->errstr);      $dbh->do(qq{create index ${pre}file_idx1 on ${pre}files using btree (path)})  	or die($dbh->errstr); +    $dbh->do(qq{create index ${pre}filerel_idx1 on ${pre}filereleases using btree (id_release)}) +	or die($dbh->errstr);      $dbh->do(qq{grant select on ${pre}charsets to public}) or die($dbh->errstr);      $dbh->do(qq{grant select on ${pre}trees to public}) or die($dbh->errstr); @@ -212,6 +214,7 @@ sub drop_db {      $dbh->do(qq{drop index ${pre}usage_idx2});      $dbh->do(qq{drop index ${pre}include_idx1});      $dbh->do(qq{drop index ${pre}file_idx1}); +    $dbh->do(qq{drop index ${pre}filerel_idx1});      $dbh->do(qq{drop table ${pre}usage});      $dbh->do(qq{drop table ${pre}identifiers}); diff --git a/lib/LXRng/Index/PgBatch.pm b/lib/LXRng/Index/PgBatch.pm index 8f8844c..19c9fa9 100644 --- a/lib/LXRng/Index/PgBatch.pm +++ b/lib/LXRng/Index/PgBatch.pm @@ -77,7 +77,7 @@ sub flush {  	    }  	}  	$self->dbh->commit() unless $self->dbh->{AutoCommit}; -	$self->dbh->do(q(analyze)) if $i > 100000; +	$self->dbh->do(q(analyze)) if $i > 500000;  	$self->dbh->disconnect();  	warn "\n*** index: flushed $i rows\n";  	kill(9, $$); diff --git a/lib/LXRng/Lang/C.pm b/lib/LXRng/Lang/C.pm index c88f424..1a826bd 100644 --- a/lib/LXRng/Lang/C.pm +++ b/lib/LXRng/Lang/C.pm @@ -51,7 +51,8 @@ sub parsespec {  	    'comment',	'//',		"\$",  	    'string',	'"',		'"',  	    'string',	"'",		"'", -	    'include',	'#\s*include',	"\$"]; +	    'include',	'#\s*include\s+"',	'"', +	    'include',	'#\s*include\s+<',	'>'];  }  sub typemap { diff --git a/lib/LXRng/Markup/File.pm b/lib/LXRng/Markup/File.pm index 406737c..abb763c 100644 --- a/lib/LXRng/Markup/File.pm +++ b/lib/LXRng/Markup/File.pm @@ -16,7 +16,7 @@ sub context {  sub safe_html {      my ($str) = @_; -    return encode_entities($str, '^\n\r\t !\#\$\(-;=?-~'); +    return encode_entities($str, '^\n\r\t !\#\$\(-;=?-~\200-\377');  }  sub make_format_newline { diff --git a/lib/LXRng/Repo/Git.pm b/lib/LXRng/Repo/Git.pm index 2d6ea33..757da26 100644 --- a/lib/LXRng/Repo/Git.pm +++ b/lib/LXRng/Repo/Git.pm @@ -74,7 +74,7 @@ sub allversions {  }  sub node { -    my ($self, $path, $release) = @_; +    my ($self, $path, $release, $rev) = @_;      $path =~ s,^/+,,;      $path =~ s,/+$,,; @@ -88,14 +88,21 @@ sub node {  	return LXRng::Repo::Git::Directory->new($self, '', $ref);      } -    my $git = $self->_git_cmd('ls-tree', $release, $path); -    my ($mode, $type, $ref, $gitpath) = split(" ", <$git>); +    my $type; +    if ($rev) { +	$type = 'blob'; +    } +    else { +	my $git = $self->_git_cmd('ls-tree', $release, $path); +	my ($mode, $gitpath); +	($mode, $type, $rev, $gitpath) = split(" ", <$git>); +    }      if ($type eq 'tree') { -	return LXRng::Repo::Git::Directory->new($self, $path, $ref, $release); +	return LXRng::Repo::Git::Directory->new($self, $path, $rev, $release);      }      elsif ($type eq 'blob') { -	return LXRng::Repo::Git::File->new($self, $path, $ref, $release); +	return LXRng::Repo::Git::File->new($self, $path, $rev, $release);      }      else {  	return undef; diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm index 42c7580..b6e28a0 100644 --- a/lib/LXRng/Search/Xapian.pm +++ b/lib/LXRng/Search/Xapian.pm @@ -32,9 +32,11 @@ sub new_document {  }  sub add_document { -    my ($self, $doc, $rel_id) = @_; +    my ($self, $doc, $rel_ids) = @_; -    $doc->add_term('__@@LXRREL_'.$rel_id); +    foreach my $r (@$rel_ids) { +	$doc->add_term('__@@LXRREL_'.$r); +    }      my $doc_id = $self->wrdb->add_document($doc);      $self->{'writes'}++;      $self->flush() if $self->{'writes'} % 499 == 0; @@ -42,20 +44,25 @@ sub add_document {  }  sub add_release { -    my ($self, $doc_id, $rel_id) = @_; +    my ($self, $doc_id, $rel_ids) = @_; -    my $reltag = '__@@LXRREL_'.$rel_id;      my $doc = $self->wrdb->get_document($doc_id); -    my $term = $doc->termlist_begin;      my $termend = $doc->termlist_end; -    $term->skip_to($reltag); -    if ($term ne $termend) { -	return 0 if $term->get_termname eq $reltag; +    my $changes = 0; +    foreach my $r (@$rel_ids) { +	my $reltag = '__@@LXRREL_'.$r; +	my $term = $doc->termlist_begin; +	$term->skip_to($reltag); +	if ($term ne $termend) { +	    next if $term->get_termname eq $reltag; +	} +	$doc->add_term($reltag); +	$changes++;      } -    $doc->add_term($reltag); -    $self->wrdb->replace_document($doc_id, $doc); -    return 1; + +    $self->wrdb->replace_document($doc_id, $doc) if $changes; +    return $changes;  }  sub flush { | 
