diff options
author | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-07-05 00:51:08 +0200 |
---|---|---|
committer | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-07-05 00:51:08 +0200 |
commit | e9fa4c98bb5f084739d3418ade3f0c51e34a0aa1 (patch) | |
tree | fec1d635625e031cde7cba1b0a1d95ee92ac760b /lib |
Rebase tree.
Diffstat (limited to 'lib')
29 files changed, 2887 insertions, 0 deletions
diff --git a/lib/LXRng.pm b/lib/LXRng.pm new file mode 100644 index 0000000..11415cc --- /dev/null +++ b/lib/LXRng.pm @@ -0,0 +1,12 @@ +package LXRng; + +use strict; +use vars qw($ROOT); + +sub import { + my ($class, %args) = @_; + + $ROOT = $args{'ROOT'} if exists $args{'ROOT'}; +} + +1; diff --git a/lib/LXRng/Cached.pm b/lib/LXRng/Cached.pm new file mode 100644 index 0000000..f11a749 --- /dev/null +++ b/lib/LXRng/Cached.pm @@ -0,0 +1,63 @@ +package LXRng::Cached; + +use strict; +require Exporter; +use vars qw($memcached @ISA @EXPORT); +@ISA = qw(Exporter); +@EXPORT = qw(cached); + +BEGIN { + eval { require Cache::Memcached; + require Storable; + require Digest::SHA1; + }; + if ($@ eq '') { + $memcached = Cache::Memcached->new({ + 'servers' => ['127.0.0.1:11211']}); + $memcached = undef + unless ($memcached->set(':lxrng_caching' => 1)) + } +} + +# Caches result from block enclosed by cache { ... }. File/linenumber +# of the "cache" keyword is used as the caching key. If additional +# arguments are given after the sub to cache, they are used to further +# specify the caching key. Otherwise, the arguments supplied to the +# function containing the call to cached are used. + +sub cached(&;@); +*cached = \&DB::LXRng_Cached_cached; + +package DB; + +sub LXRng_Cached_cached(&;@) { + my ($func, @args) = @_; + if ($LXRng::Cached::memcached) { + my ($pkg, $file, $line) = caller(0); + my $params; + if (@args > 0) { + $params = Storable::freeze(\@args); + } + else { + my @caller = caller(1); + $params = Storable::freeze(\@DB::args); + } + my $key = ':lxrng:'. + Digest::SHA1::sha1_hex(join("\0", $file, $line, $params)); + my $val = $LXRng::Cached::memcached->get($key); + unless ($val) { + $val = [$func->()]; + $LXRng::Cached::memcached->set($key, $val); + warn "cache miss for $key"; + } + else { + warn "cache hit for $key"; + } + return @$val; + } + else { + return $func->(); + } +} + +1; diff --git a/lib/LXRng/Context.pm b/lib/LXRng/Context.pm new file mode 100644 index 0000000..46faa21 --- /dev/null +++ b/lib/LXRng/Context.pm @@ -0,0 +1,174 @@ +package LXRng::Context; + +use strict; +use LXRng; + +sub new { + my ($self, %args) = @_; + + $self = bless({}, $self); + + if ($args{'query'}) { + $$self{'req_url'} = $args{'query'}->url(); + + foreach my $p ($args{'query'}->param) { + $$self{'params'}{$p} = [$args{'query'}->param($p)]; + } + my @prefs = $args{'query'}->cookie('lxr_prefs'); + if (@prefs) { + $$self{'prefs'} = { + map { /^(.*?)(?:=(.*)|)$/; ($1 => $2) } @prefs }; + } + @$self{'tree', 'path'} = $args{'query'}->path_info =~ m,([^/]+)/*(.*),; + $$self{'tree'} = $args{'query'}->param('tree') + if $args{'query'}->param('tree'); + } + if ($args{'tree'}) { + $$self{'tree'} = $args{'tree'}; + } + + if ($$self{'tree'} =~ s/[+](.*)$//) { + $$self{'release'} = $1; + } + + if ($$self{'tree'}) { + my $tree = $$self{'tree'}; + my @config = $self->read_config(); + die("No config for tree $tree") + unless ref($config[0]) eq 'HASH' and exists($config[0]{$tree}); + + $$self{'config'} = $config[0]{$tree}; + } + + if (exists $$self{'params'}{'v'} and $$self{'params'}{'v'}) { + $$self{'release'} ||= $$self{'params'}{'v'}[0]; + delete($$self{'params'}{'v'}); + } + + if ($$self{'config'}) { + $$self{'release'} ||= $$self{'config'}{'ver_default'}; + } + + return $self; +} + +sub read_config { + my ($self) = @_; + + my $confpath = $LXRng::ROOT.'/lxrng.conf'; + + if (open(my $cfgfile, $confpath)) { + my @config = eval("use strict; use warnings;\n". + "#line 1 \"configuration file\"\n". + join("", <$cfgfile>)); + die($@) if $@; + + return @config; + } + else { + die("Couldn't open configuration file \"$confpath\"."); + } +} + +sub release { + my ($self, $value) = @_; + + $$self{'release'} = $value if @_ == 2; + return $$self{'release'}; +} + +sub default_release { + my ($self, $value) = @_; + + return $$self{'config'}{'ver_default'}; +} + +sub all_releases { + my ($self) = @_; + + return $$self{'config'}{'ver_list'}; +} + +sub param { + my ($self, $key) = @_; + my @res; + + @res = @{$$self{'params'}{$key}} if + exists $$self{'params'}{$key}; + + return wantarray ? @res : $res[0]; +} + +sub path { + my ($self, $value) = @_; + + $$self{'path'} = $value if @_ == 2; + return $$self{'path'}; +} + +sub tree { + my ($self) = @_; + + return $$self{'tree'}; +} + +sub vtree { + my ($self) = @_; + + if ($self->release ne $self->default_release) { + return $self->tree.'+'.$self->release; + } + else { + return $self->tree; + } +} + +sub path_elements { + my ($self) = @_; + + return [] if $self->path =~ /^[ +]/; + + my @path; + return [map { + push(@path, $_); { 'node' => $_, 'path' => join('', @path) } + } $self->path =~ m,([^/]+\/?),g]; +} + +sub config { + my ($self) = @_; + + return $$self{'config'}; +} + +sub prefs { + my ($self) = @_; + + return $$self{'prefs'}; +} + +sub base_url { + my ($self) = @_; + + my $base = $self->config->{'base_url'}; + unless ($base) { + $base = $$self{'req_url'}; + $base =~ s/lxr$//; + } + + $base =~ s,/+$,,; + $base .= '/lxr/'.$self->vtree.'/'; + $base =~ s,//+$,/,; + + return $base; +} + +sub args_url { + my ($self, %args) = @_; + + # Todo: escape + my $args = join(';', map { $_.'='.$args{$_} } keys %args); + $args = '?'.$args if $args; + return $args; +} + +1; diff --git a/lib/LXRng/Index.pm b/lib/LXRng/Index.pm new file mode 100644 index 0000000..e0d5794 --- /dev/null +++ b/lib/LXRng/Index.pm @@ -0,0 +1,11 @@ +package LXRng::Index; + +use strict; + +sub transaction(&@) { + my ($code, $index) = @_; + + $index->transaction($code); +} + +1; diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm new file mode 100644 index 0000000..602eac8 --- /dev/null +++ b/lib/LXRng/Index/DBI.pm @@ -0,0 +1,430 @@ +package LXRng::Index::DBI; + +use strict; +use DBI; + +use base qw(LXRng::Index::Generic); + +sub transaction { + my ($self, $code) = @_; + if ($self->dbh->{AutoCommit}) { + $self->dbh->{AutoCommit} = 0; + $code->(); + $self->dbh->{AutoCommit} = 1; + } + else { + # If we're in a transaction already, don't return to + # AutoCommit state. + $code->(); + } + $self->dbh->commit(); +} + +sub _to_task { + my ($self, $rfile_id, $task) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_to_task_ins'} ||= + $dbh->prepare(qq{insert into ${pre}filestatus(id_rfile) + select ? where not exists + (select 1 from ${pre}filestatus + where id_rfile = ?)}); + $sth->execute($rfile_id, $rfile_id); + + $sth = $$self{'sth'}{'_to_task_upd'}{$task} ||= + $dbh->prepare(qq{update ${pre}filestatus set $task = 't' + where $task = 'f' and id_rfile = ?}); + return $sth->execute($rfile_id) > 0; +} + + +sub to_index { + my ($self, $rfile_id) = @_; + + return $self->_to_task($rfile_id, 'indexed'); +} + +sub to_reference { + my ($self, $rfile_id) = @_; + + return $self->_to_task($rfile_id, 'referenced'); +} + +sub to_hash { + my ($self, $rfile_id) = @_; + + return $self->_to_task($rfile_id, 'hashed'); +} + +sub _get_tree { + my ($self, $tree) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_get_tree'} ||= + $dbh->prepare(qq{select id from ${pre}trees where name = ?}); + my $id; + if ($sth->execute($tree) > 0) { + ($id) = $sth->fetchrow_array(); + } + $sth->finish(); + + return $id; +} + +sub _get_release { + my ($self, $tree_id, $release) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_get_release'} ||= + $dbh->prepare(qq{select id from ${pre}releases + where id_tree = ? and release_tag = ?}); + my $id; + if ($sth->execute($tree_id, $release) > 0) { + ($id) = $sth->fetchrow_array(); + } + $sth->finish(); + + return $id; +} + +sub _get_file { + my ($self, $path) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_get_file'} ||= + $dbh->prepare(qq{select id from ${pre}files where path = ?}); + my $id; + if ($sth->execute($path) > 0) { + ($id) = $sth->fetchrow_array(); + } + $sth->finish(); + + return $id; +} + +sub _get_rfile_by_release { + my ($self, $rel_id, $path) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_get_rfile_by_release'} ||= + $dbh->prepare(qq{select r.id + from ${pre}filereleases fr, ${pre}files f, + ${pre}revisions r + where fr.id_rfile = r.id and r.id_file = f.id + and fr.id_release = ? and f.path = ?}); + + my $id; + if ($sth->execute($rel_id, $path) > 0) { + ($id) = $sth->fetchrow_array(); + } + $sth->finish(); + + return $id; +} + +sub _get_symbol { + my ($self, $symbol) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_get_symbol'} ||= + $dbh->prepare(qq{select id from ${pre}symbols where name = ?}); + my $id; + if ($sth->execute($symbol) > 0) { + ($id) = $sth->fetchrow_array(); + } + $sth->finish(); + + return $id; +} + + +sub _add_include { + my ($self, $file_id, $inc_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_include'} ||= + $dbh->prepare(qq{insert into ${pre}includes(id_rfile, id_include_path) + values (?, ?)}); + my $id; + $sth->execute($file_id, $inc_id); + + return 1; +} + +sub _includes_by_id { + my ($self, $file_id) = @_; + +} + +sub _symbol_by_id { + my ($self, $id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_symbol_by_id'} ||= + $dbh->prepare(qq{select * from ${pre}symbols + where id = ?}); + my @res; + if ($sth->execute($id) > 0) { + @res = $sth->fetchrow_array(); + } + $sth->finish(); + + return @res; +} + +sub _identifiers_by_name { + my ($self, $rel_id, $symbol) = @_; + + my $sym_id = $self->_get_symbol($symbol); + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_identifiers_by_name'} ||= + $dbh->prepare(qq{ + select i.id, i.type, f.path, i.line, s.name, c.type, c.id, + i.id_rfile + from ${pre}identifiers i + left outer join ${pre}identifiers c on i.context = c.id + left outer join ${pre}symbols s on c.id_symbol = s.id, + ${pre}files f, ${pre}filereleases r, ${pre}revisions v + where i.id_rfile = v.id and v.id = r.id_rfile + and r.id_release = ? and v.id_file = f.id + and i.id_symbol = ?}); + + $sth->execute($rel_id, $sym_id); + my $res = $sth->fetchall_arrayref(); + + use Data::Dumper; + foreach my $def (@$res) { +# warn Dumper($def); + $$def[7] = 42; +# my @files = $self->get_referring_files($rel_id, $$def[7]); +# warn Dumper(\@files); + } + + return $res; +} + +sub _symbols_by_file { + my ($self, $rfile_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_symbols_by_file'} ||= + $dbh->prepare(qq{select distinct s.name + from ${pre}usage u, ${pre}symbols s + where id_rfile = ? and u.id_symbol = s.id}); + $sth->execute($rfile_id); + my %res; + while (my ($symname) = $sth->fetchrow_array()) { + $res{$symname} = 1; + } + + return \%res; +} + +sub _add_usage { + my ($self, $file_id, $line, $symbol_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_usage'} ||= + $dbh->prepare(qq{insert into ${pre}usage(id_rfile, line, id_symbol) + values (?, ?, ?)}); + $sth->execute($file_id, $line, $symbol_id); + + return 1; +} + +sub _usage_by_file { + my ($self, $rfile_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_usage_by_file'} ||= + $dbh->prepare(qq{select s.name, u.line + from ${pre}usage u, ${pre}symbols s + where id_rfile = ? and u.id_symbol = s.id}); + $sth->execute($rfile_id); + + die "Unimplemented"; +} + +sub _rfile_path_by_id { + my ($self, $rfile_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_rfile_path_by_id'} ||= + $dbh->prepare(qq{select f.path from ${pre}files f, ${pre}revisions r + where f.id = r.id_file and r.id = ?}); + my $path; + if ($sth->execute($rfile_id) > 0) { + ($path) = $sth->fetchrow_array(); + } + $sth->finish(); + + return $path; +} + +sub _get_includes_by_file { + my ($self, $res, $rel_id, @rfile_ids) = @_; + + my $placeholders = join(', ', ('?') x @rfile_ids); + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $dbh->prepare(qq{select rf.id, f.path + from ${pre}revisions rf, + ${pre}filereleases v, + ${pre}includes i, + ${pre}revisions ri, + ${pre}files f + where rf.id = i.id_rfile + and rf.id_file = f.id + and rf.id = v.id_rfile + and v.id_release = ? + and i.id_include_path = ri.id_file + and ri.id in ($placeholders)}); + + + $sth->execute($rel_id, @rfile_ids); + my $files = $sth->fetchall_arrayref(); + $sth->finish(); + + my @recurse; + foreach my $r (@$files) { + push(@recurse, $$r[0]) unless exists($$res{$$r[0]}); + + $$res{$$r[0]} = $$r[1]; + } + + $self->_get_includes_by_file($res, $rel_id, @recurse) if @recurse; + + return 1; +} + +sub add_hashed_document { + my ($self, $rfile_id, $doc_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'add_hashed_document'} ||= + $dbh->prepare(qq{insert into ${pre}hashed_documents(id_rfile, doc_id) + values (?, ?)}); + $sth->execute($rfile_id, $doc_id); + + return 1; +} + +sub get_hashed_document { + my ($self, $rfile_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'get_hashed_document'} ||= + $dbh->prepare(qq{select doc_id from ${pre}hashed_documents + where id_rfile = ?}); + my $doc_id; + if ($sth->execute($rfile_id) > 0) { + ($doc_id) = $sth->fetchrow_array(); + } + $sth->finish(); + + return $doc_id; +} + +sub get_symbol_usage { + my ($self, $rel_id, $symid) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'get_symbol_usage'} ||= + $dbh->prepare(qq{ + select u.id_rfile, u.line + from ${pre}usage u, ${pre}filereleases fr + where u.id_symbol = ? + and u.id_rfile = fr.id_rfile and fr.id_release = ?}); + + $sth->execute($symid, $rel_id); + my $res = $sth->fetchall_arrayref(); + $sth->finish(); + + return $res; +} + +sub get_identifier_info { + my ($self, $ident, $rel_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'get_identifier_info'} ||= + $dbh->prepare(qq{ + select s.name, s.id, + i.id, i.type, f.path, i.line, cs.name, c.type, c.id, + i.id_rfile + from ${pre}identifiers i + left outer join ${pre}identifiers c on i.context = c.id + left outer join ${pre}symbols cs on c.id_symbol = cs.id, + ${pre}symbols s, ${pre}revisions r, ${pre}files f + where i.id = ? and i.id_symbol = s.id + and i.id_rfile = r.id and r.id_file = f.id}); + +# select i.id_rfile, f.path, i.line, i.type, i.context, s.id, s.name +# from identifiers i, symbols s, revisions r, files f +# where i.id = ? and i.id_symbol = s.id +# and i.id_rfile = r.id and r.id_file = f.id}); + + unless ($sth->execute($ident) == 1) { + return undef; + } + + my ($symname, $symid, + $iid, $type, $path, $line, $cname, $ctype, $cid, $rfile_id) = + $sth->fetchrow_array(); + $sth->finish(); + + my $refs = {$rfile_id => $path}; + $self->get_referring_files($rel_id, $rfile_id, $refs); + my $usage = $self->get_symbol_usage($rel_id, $symid); + + my %reflines; + foreach my $u (@$usage) { + next unless $$refs{$$u[0]}; + $reflines{$$refs{$$u[0]}} ||= []; + push(@{$reflines{$$refs{$$u[0]}}}, $$u[1]); + } + + return ($symname, $symid, + [$iid, $type, $path, $line, $cname, $ctype, $cid, $rfile_id], + \%reflines); +} + +sub get_rfile_timestamp { + my ($self, $rfile_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'get_rfile_timestamp'} ||= + $dbh->prepare(qq{ + select extract(epoch from last_modified_gmt)::integer, + last_modified_tz + from ${pre}revisions where id = ?}); + + unless ($sth->execute($rfile_id) == 1) { + return undef; + } + + my ($epoch, $tz) = $sth->fetchrow_array(); + $sth->finish(); + + return ($epoch, $tz); +} + +1; diff --git a/lib/LXRng/Index/Generic.pm b/lib/LXRng/Index/Generic.pm new file mode 100644 index 0000000..cab2513 --- /dev/null +++ b/lib/LXRng/Index/Generic.pm @@ -0,0 +1,172 @@ +package LXRng::Index::Generic; + +use strict; +use Memoize; + + +sub new { + my ($class, %args) = @_; + + memoize('tree_id'); + memoize('release_id'); + memoize('file_id'); + memoize('symbol_id'); + + return bless(\%args, $class); +} + +sub prefix { + my ($self) = @_; + if (exists $$self{'table_prefix'}) { + return $$self{'table_prefix'}.'_'; + } + else { + return ''; + } +} + +sub tree_id { + my ($self, $tree, $update) = @_; + + return $self->_get_tree($tree) || + ($update ? $self->_add_tree($tree) : undef); +} + +sub release_id { + my ($self, $tree, $release, $update) = @_; + + my $tree_id = $self->tree_id($tree, $update); + + return $self->_get_release($tree_id, $release) || + ($update ? $self->_add_release($tree_id, $release) : undef); +} + +sub file_id { + my ($self, $path, $update) = @_; + + return $self->_get_file($path) || + ($update ? $self->_add_file($path) : undef); +} + +sub rfile_id { + my ($self, $node, $update) = @_; + + my $path = $node->name; + my $revision = $node->revision; + + my $file_id = $self->file_id($path, $update); + return undef unless $file_id; + + my ($id, $old_stamp) = $self->_get_rfile($file_id, $revision); + return $id unless $update; + + if ($id) { + my ($new_stamp) = $node->time =~ /^(\d+)/; + if ($update and $old_stamp > $new_stamp) { + $self->_update_rfile_timestamp($id, $node->time); + } + } + else { + $id = $self->_add_rfile($file_id, $revision, $node->time); + } + return $id; +} + +sub symbol_id { + my ($self, $symbol, $update) = @_; + + return $self->_get_symbol($symbol) || + ($update ? $self->_add_symbol($symbol) : undef); +} + +sub add_filerelease { + my ($self, $tree, $release, $rfile_id) = @_; + + my $rel_id = $self->release_id($tree, $release, 1); + + $self->_add_filerelease($rfile_id, $rel_id); +} + +sub add_include { + my ($self, $file_id, $include_path) = @_; + + my $inc_id = $self->_get_file($include_path); + + return 0 unless $inc_id; + return $self->_add_include($file_id, $inc_id); +} + +sub includes_by_file { + my ($self, $tree, $release, $path) = @_; + + my $file_id = $self->file_id($tree, $release, $path); + + return $self->_includes_by_id($file_id); +} + +#sub add_symbol { +# my ($self, $file_id, $line, $symbol, $type, $ctx_id) = @_; +# return $self->_add_symbol($file_id, $line, $symbol, $type, $ctx_id); +#} + +sub add_ident { + my ($self, $rfile_id, $line, $symbol, $type, $ctx_id) = @_; + + my $sym_id = $self->symbol_id($symbol, 1); + + return $self->_add_ident($rfile_id, $line, $sym_id, $type, $ctx_id); +} + +sub symbol_by_id { + my ($self, $id) = @_; + + return $self->_symbol_by_id($id); +} + +sub symbols_by_name { + my ($self, $tree, $release, $symbol) = @_; + + my $rel_id = $self->release_id($tree, $release); +# return $cache_sym{$rel_id}{$symbol} if +# exists $cache_sym{$rel_id} and exists $cache_sym{$rel_id}{$symbol}; + return $self->_identifiers_by_name($rel_id, $symbol); +# $cache_sym{$rel_id}{$symbol} = $id; +} + +sub symbols_by_file { + my ($self, $tree, $release, $path) = @_; + + $path =~ s!^/!!; + my $rel_id = $self->_get_release($self->_get_tree($tree), $release); + my $rfile_id = $self->_get_rfile_by_release($rel_id, $path); + + return $self->_symbols_by_file($rfile_id); +} + +sub add_usage { + my ($self, $file_id, $line, $symbol) = @_; + + my $sym_id = $self->symbol_id($symbol, 1); + + return $self->_add_usage($file_id, $line, $sym_id); +} + +sub usage_by_file { + my ($self, $tree, $release, $path) = @_; + + my $rel_id = $self->_get_release($self->_get_tree($tree), $release); + my $rfile_id = $self->_get_rfile_by_release($rel_id, $path); + + return $self->_usage_by_file($rfile_id); +} + +sub get_referring_files { + my ($self, $rel_id, $rfile_id, $res) = @_; + + $res ||= {}; + $self->_get_includes_by_file($res, $rel_id, $rfile_id); + + return keys %$res; +} + +1; diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm new file mode 100644 index 0000000..05fe3a0 --- /dev/null +++ b/lib/LXRng/Index/Pg.pm @@ -0,0 +1,417 @@ +package LXRng::Index::Pg; + +use strict; +use DBI; + +use base qw(LXRng::Index::DBI); + +sub dbh { + my ($self) = @_; + + $$self{'dbh'} ||= DBI->connect('dbi:Pg:'.$$self{'db_spec'}, + $$self{'db_user'}, $$self{'db_pass'}, + {AutoCommit => 1, + RaiseError => 1}) + or die($DBI::errstr); + + return $$self{'dbh'}; +} + +sub init_db { + my ($self) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + $dbh->{AutoCommit} = 0; + + $dbh->do(qq{create sequence ${pre}treenum}) or die($dbh->errstr); + $dbh->do(qq{create sequence ${pre}relnum}) or die($dbh->errstr); + $dbh->do(qq{create sequence ${pre}filenum cache 50}) or die($dbh->errstr); + $dbh->do(qq{create sequence ${pre}revnum cache 50}) or die($dbh->errstr); + $dbh->do(qq{create sequence ${pre}symnum cache 50}) or die($dbh->errstr); + $dbh->do(qq{create sequence ${pre}identnum cache 50}) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}charsets + ( + id serial, + name varchar, + primary key (id) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{insert into ${pre}charsets(name) values ('ascii')}) + or die($dbh->errstr); + $dbh->do(qq{insert into ${pre}charsets(name) values ('utf-8')}) + or die($dbh->errstr); + $dbh->do(qq{insert into ${pre}charsets(name) values ('iso8859-1')}) + or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}trees + ( + id int default nextval('${pre}treenum'), + name varchar, + primary key (id) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}releases + ( + id int default nextval('${pre}relnum'), + id_tree int references ${pre}trees(id), + release_tag varchar, + is_indexed bool default 'f', + primary key (id) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}files + ( + id int default nextval('${pre}filenum'), + path varchar, + primary key (id), + unique (path) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}revisions + ( + id int default nextval('${pre}revnum'), + id_file int references ${pre}files(id), + revision varchar, + last_modified_gmt timestamp without time zone, -- GMT + last_modified_tz varchar(5), -- Optional TZ + body_charset int references ${pre}charsets(id), + primary key (id), + unique (id_file, revision) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}filestatus + ( + id_rfile int references ${pre}revisions(id), + indexed bool default 'f', + referenced bool default 'f', + hashed bool default 'f', + primary key (id_rfile) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}hashed_documents + ( + id_rfile int references ${pre}revisions(id), + doc_id int not null, + primary key (id_rfile) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}filereleases + ( + id_rfile int references ${pre}revisions(id), + id_release int references ${pre}releases(id), + primary key (id_rfile, id_release) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}includes + ( + id_rfile int references ${pre}revisions(id), + id_include_path int references ${pre}files(id) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}symbols + ( + id int default nextval('${pre}symnum'), + name varchar, + primary key (id), + unique (name) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}identifiers + ( + id int default nextval('${pre}identnum'), + id_symbol int references ${pre}symbols(id) deferrable, + id_rfile int references ${pre}revisions(id) deferrable, + line int, + type char(1), + context int references ${pre}identifiers(id) deferrable, + primary key (id) + ) + }) or die($dbh->errstr); + + $dbh->do(qq{ + create table ${pre}usage + ( + id_rfile int references ${pre}revisions(id) deferrable, + id_symbol int references ${pre}symbols(id) deferrable, + line int + ) + }) or die($dbh->errstr); + + $dbh->do(qq{alter table ${pre}usage alter column id_symbol set statistics 250}); + $dbh->do(qq{alter table ${pre}identifiers alter column id_symbol set statistics 250}); + + $dbh->do(qq{create index ${pre}symbol_idx1 on ${pre}symbols using btree (name)}) + or die($dbh->errstr); + $dbh->do(qq{create index ${pre}ident_idx1 on ${pre}identifiers using btree (id_symbol)}) + or die($dbh->errstr); + $dbh->do(qq{create index ${pre}ident_idx2 on ${pre}identifiers using btree (id_rfile)}) + or die($dbh->errstr); + $dbh->do(qq{create index ${pre}ident_idx3 on ${pre}identifiers using btree (id_symbol, id_rfile)}) + or die($dbh->errstr); + $dbh->do(qq{create index ${pre}usage_idx1 on ${pre}usage using btree (id_symbol)}) + or die($dbh->errstr); + $dbh->do(qq{create index ${pre}usage_idx2 on ${pre}usage using btree (id_rfile)}) + or die($dbh->errstr); + $dbh->do(qq{create index ${pre}include_idx1 on ${pre}includes using btree (id_rfile)}) + or die($dbh->errstr); + $dbh->do(qq{create index ${pre}file_idx1 on ${pre}files using btree (path)}) + or die($dbh->errstr); + + $dbh->do(qq{grant select on ${pre}charsets to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}trees to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}releases to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}files to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}releases to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}filereleases to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}filestatus to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}hashed_documents to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}includes to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}symbols to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}identifiers to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}usage to public}) or die($dbh->errstr); + $dbh->do(qq{grant select on ${pre}revisions to public}) or die($dbh->errstr); + + $dbh->commit(); + $dbh->{AutoCommit} = 0; +} + +sub drop_db { + my ($self) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + local($dbh->{RaiseError}) = 0; + + $dbh->do(qq{drop index ${pre}symbol_idx1}); + $dbh->do(qq{drop index ${pre}ident_idx1}); + $dbh->do(qq{drop index ${pre}ident_idx2}); + $dbh->do(qq{drop index ${pre}usage_idx1}); + $dbh->do(qq{drop index ${pre}usage_idx2}); + $dbh->do(qq{drop index ${pre}include_idx1}); + $dbh->do(qq{drop index ${pre}file_idx1}); + + $dbh->do(qq{drop table ${pre}usage}); + $dbh->do(qq{drop table ${pre}identifiers}); + $dbh->do(qq{drop table ${pre}symbols}); + $dbh->do(qq{drop table ${pre}includes}); + $dbh->do(qq{drop table ${pre}filereleases}); + $dbh->do(qq{drop table ${pre}hashed_documents}); + $dbh->do(qq{drop table ${pre}filestatus}); + $dbh->do(qq{drop table ${pre}revisions}); + $dbh->do(qq{drop table ${pre}files}); + $dbh->do(qq{drop table ${pre}releases}); + $dbh->do(qq{drop table ${pre}trees}); + $dbh->do(qq{drop table ${pre}charsets}); + + $dbh->do(qq{drop sequence ${pre}treenum}); + $dbh->do(qq{drop sequence ${pre}relnum}); + $dbh->do(qq{drop sequence ${pre}filenum}); + $dbh->do(qq{drop sequence ${pre}revnum}); + $dbh->do(qq{drop sequence ${pre}symnum}); + $dbh->do(qq{drop sequence ${pre}identnum}); +} + +sub _add_tree { + my ($self, $tree) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_tree_ins'} ||= + $dbh->prepare(qq{insert into ${pre}trees(name) values (?)}); + $sth->execute($tree); + + $sth = $$self{'sth'}{'_add_tree_insid'} ||= + $dbh->prepare(qq{select currval('${pre}treenum')}); + $sth->execute(); + my ($id) = $sth->fetchrow_array(); + $sth->finish(); + + return $id; +} + +sub _add_release { + my ($self, $tree_id, $release) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_relase_ins'} ||= + $dbh->prepare(qq{insert into ${pre}releases(id_tree, release_tag) + values (?, ?)}); + $sth->execute($tree_id, $release); + + $sth = $$self{'sth'}{'_add_release_insid'} ||= + $dbh->prepare(qq{select currval('${pre}relnum')}); + $sth->execute(); + my ($id) = $sth->fetchrow_array(); + $sth->finish(); + + return $id; +} + +sub _add_file { + my ($self, $path) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_file_ins'} ||= + $dbh->prepare(qq{insert into ${pre}files(path) values (?)}); + $sth->execute($path); + + $sth = $$self{'sth'}{'_add_file_insid'} ||= + $dbh->prepare(qq{select currval('${pre}filenum')}); + $sth->execute(); + my ($id) = $sth->fetchrow_array(); + $sth->finish(); + + return $id; +} + +sub _get_rfile { + my ($self, $file_id, $revision) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_get_rfile'} ||= + $dbh->prepare(qq{select id, extract('epoch' from last_modified_gmt + at time zone 'UTC') + from ${pre}revisions + where id_file = ? and revision = ?}); + my ($id, $gmt); + if ($sth->execute($file_id, $revision) > 0) { + ($id, $gmt) = $sth->fetchrow_array(); + } + $sth->finish(); + + return ($id, $gmt); +} + +sub _add_rfile { + my ($self, $file_id, $revision, $time) = @_; + + my ($epoch, $zone) = $time =~ /^(\d+)(?: ([-+]\d\d\d\d)|)$/; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_rfile_ins'} ||= + $dbh->prepare(qq{ + insert into ${pre}revisions(id_file, revision, + last_modified_gmt, + last_modified_tz) + values (?, ?, + timestamp 'epoch' + ? * interval '1 second', ?)}); + $sth->execute($file_id, $revision, $epoch, $zone) + or die($dbh->errstr); + + $sth = $$self{'sth'}{'_add_rfile_insid'} ||= + $dbh->prepare(qq{select currval('${pre}revnum')}); + $sth->execute(); + my ($id) = $sth->fetchrow_array(); + $sth->finish(); + + return $id; +} + +sub _update_rfile_timestamp { + my ($self, $rfile_id, $time) = @_; + + my ($epoch, $zone) = $time =~ /^(\d+)(?: ([-+]\d\d\d\d)|)$/; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_update_rfile_timestamp'} ||= + $dbh->prepare(qq{ + update ${pre}revisions set + last_modified_gmt = timestamp 'epoch' + ? * interval '1 second', + last_modified_tz = ? + where id = ?}); + + $sth->execute($epoch, $zone, $rfile_id) + or die($dbh->errstr); + $sth->finish(); +} + +sub _add_filerelease { + my ($self, $rfile_id, $rel_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_filerelease'} ||= + $dbh->prepare(qq{insert into ${pre}filereleases(id_rfile, id_release) + select ?, ? where not exists + (select 1 from ${pre}filereleases + where id_rfile = ? and id_release = ?)}); + $sth->execute($rfile_id, $rel_id, $rfile_id, $rel_id); +} + +sub _add_symbol { + my ($self, $symbol) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_symbol_ins'} ||= + $dbh->prepare(qq{insert into ${pre}symbols(name) values (?)}); + $sth->execute($symbol); + + $sth = $$self{'sth'}{'_add_symbol_insid'} ||= + $dbh->prepare(qq{select currval('${pre}symnum')}); + $sth->execute(); + my ($id) = $sth->fetchrow_array(); + $sth->finish(); + + return $id; +} + +sub _add_ident { + my ($self, $rfile_id, $line, $sym_id, $type, $ctx_id) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_add_ident_ins'} ||= + $dbh->prepare(qq{insert into ${pre}identifiers + (id_rfile, line, id_symbol, type, context) + values (?, ?, ?, ?, ?)}); + $sth->execute($rfile_id, $line, $sym_id, $type, $ctx_id); + + $sth = $$self{'sth'}{'_add_ident_insid'} ||= + $dbh->prepare(qq{select currval('${pre}identnum')}); + $sth->execute(); + my ($id) = $sth->fetchrow_array(); + $sth->finish(); + + return $id; +} + +sub DESTROY { + my ($self) = @_; + + if ($$self{'dbh'}) { + $$self{'dbh'}->rollback(); + $$self{'dbh'}->disconnect(); + delete($$self{'dbh'}); + } +} + +1; diff --git a/lib/LXRng/Index/PgBatch.pm b/lib/LXRng/Index/PgBatch.pm new file mode 100644 index 0000000..8f8844c --- /dev/null +++ b/lib/LXRng/Index/PgBatch.pm @@ -0,0 +1,217 @@ +package LXRng::Index::PgBatch; + +# Specialized subclass of LXRng::Index::Pg for doing parallelized +# batched inserts into database. Higher performance (and higher +# complexity). + +use strict; +use DBI; +use POSIX qw(:sys_wait_h); + +use base qw(LXRng::Index::Pg); + + +sub transaction { + my ($self, $code) = @_; + + if ($self->dbh->{AutoCommit}) { + $self->dbh->{AutoCommit} = 0; + $self->dbh->do(q(set constraints all deferred)); + $code->(); + $self->flush(); + $self->dbh->commit(); + $self->dbh->{AutoCommit} = 1; + + # At end of outermost transaction, wait for outstanding flushes + $self->_flush_wait(); + } + else { + # If we're in a transaction already, don't return to + # AutoCommit state. + $code->(); + + # Only occasional synchronization if we're inside another + # transaction. + if ($self->{'writes'}++ % 997 == 0) { + $self->flush(); + $self->dbh->commit(); + } + } +} + +sub new { + my ($class, @args) = @_; + + my $self = $class->SUPER::new(@args); + $$self{'writes'} = 0; + + return $self; +} + +sub flush { + my ($self) = @_; + + return unless exists($$self{'cache'}); + + $self->_flush_wait(); + + my $pre = $self->prefix; + $self->dbh->commit() unless $self->dbh->{AutoCommit}; + my $pid = fork(); + die("fork failed: $!") unless defined($pid); + if ($pid == 0) { + $SIG{'INT'} = 'IGNORE'; + $SIG{'QUIT'} = 'IGNORE'; + $SIG{'TERM'} = 'IGNORE'; + + my $i = 0; + $$self{'dbh'} = undef; + foreach my $table (qw(symbols identifiers usage)) { + if (exists($$self{'cache'}{$table})) { + $self->dbh->do(qq{copy $pre$table from stdin}); + foreach my $l (@{$$self{'cache'}{$table}}) { + $i++; + $self->dbh->pg_putline($l); + } + $self->dbh->pg_endcopy; + } + } + $self->dbh->commit() unless $self->dbh->{AutoCommit}; + $self->dbh->do(q(analyze)) if $i > 100000; + $self->dbh->disconnect(); + warn "\n*** index: flushed $i rows\n"; + kill(9, $$); + } + $$self{'flush_pid'} = $pid; + delete($$self{'cache'}); + warn "\n*** index: flushing in background\n"; +} + +sub _flush_wait { + my ($self) = @_; + + return unless $$self{'flush_pid'}; + waitpid($$self{'flush_pid'}, WNOHANG); # Reap zombies + return unless kill(0, $$self{'flush_pid'}); + + warn "\n*** index: waiting for running flush to complete...\n"; + $self->dbh->commit() unless $self->dbh->{AutoCommit}; + waitpid($$self{'flush_pid'}, 0); +} + +sub _cache { + my ($self, $name) = @_; + + $$self{'cache'}{$name} ||= []; + return $$self{'cache'}{$name}; +} + +sub _cached_seqno { + my ($self, $seqname) = @_; + + unless (exists($$self{'cached_seqno'}{$seqname}) and + $$self{'cached_seqno'}{$seqname}{'min'} <= + $$self{'cached_seqno'}{$seqname}{'max'}) + { + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_cached_seqno'}{$seqname} ||= + $dbh->prepare(qq{select setval('$pre$seqname', + nextval('$pre$seqname')+1000)}); + $sth->execute(); + my ($id) = $sth->fetchrow_array(); + $sth->finish(); + $$self{'cached_seqno'}{$seqname}{'min'} = $id-1000; + $$self{'cached_seqno'}{$seqname}{'max'} = $id; + } + + return $$self{'cached_seqno'}{$seqname}{'min'}++; +} + +sub _prime_symbol_cache { + my ($self) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'_prime_symbol_cache'} ||= + $dbh->prepare(qq{select name, id from ${pre}symbols}); + $sth->execute(); + my %cache; + while (my ($name, $id) = $sth->fetchrow_array()) { + $cache{$name} = $id; + } + $sth->finish; + + $$self{'__symbol_cache'} = \%cache; +} + +sub _add_usage { + my ($self, $file_id, $line, $symbol_id) = @_; + + push(@{$self->_cache('usage')}, "$file_id\t$symbol_id\t$line\n"); + + return 1; +} + +sub _add_symbol { + my ($self, $symbol) = @_; + + my $id = $self->_cached_seqno('symnum'); + push(@{$self->_cache('symbols')}, "$id\t$symbol\n"); + + $self->_prime_symbol_cache() + unless exists $$self{'__symbol_cache'}; + + $$self{'__symbol_cache'}{$symbol} = $id; + + return $id; +} + +sub _add_ident { + my ($self, $rfile_id, $line, $sym_id, $type, $ctx_id) = @_; + + $ctx_id = '\\N' unless defined($ctx_id); + + my $id = $self->_cached_seqno('identnum'); + push(@{$self->_cache('identifiers')}, join("\t", $id, $sym_id, + $rfile_id, $line, $type, + $ctx_id)."\n"); + + return $id; +} + +my $_get_symbol_usage = 0; +sub _get_symbol { + my ($self, $symbol) = @_; + + unless (exists($$self{'__symbol_cache'})) { + # Only prime the cache once it's clear that we're likely to + # hit it a significant number of times. + return $self->SUPER::_get_symbol($symbol) if + $_get_symbol_usage++ < 100; + + $self->_prime_symbol_cache(); + } + + return $$self{'__symbol_cache'}{$symbol} if + exists $$self{'__symbol_cache'}{$symbol}; + + return undef; +} + +sub DESTROY { + my ($self) = @_; + + if ($$self{'writes'} > 0) { + $self->flush(); + $self->_flush_wait(); + } + + if ($$self{'dbh'}) { + $$self{'dbh'}->rollback() unless $$self{'dbh'}{'AutoCommit'}; + $$self{'dbh'}->disconnect(); + delete($$self{'dbh'}); + } +} + +1; diff --git a/lib/LXRng/Lang.pm b/lib/LXRng/Lang.pm new file mode 100644 index 0000000..7e6c278 --- /dev/null +++ b/lib/LXRng/Lang.pm @@ -0,0 +1,56 @@ +package LXRng::Lang; + +use strict; +use vars qw(@languages %deftypes %defweight); + + +%deftypes = + ( + 'c' => 'class', + 'd' => 'macro (un)definition', + 'e' => 'enumerator', + 'f' => 'function', + 'g' => 'enumeration name', + 'm' => 'class, struct, or union member', + 'n' => 'namespace', + 'p' => 'function prototype or declaration', + 's' => 'structure', + 't' => 'typedef', + 'u' => 'union', + 'v' => 'variable', + 'l' => 'local variable', + 'x' => 'extern or forward variable declaration', + 'i' => 'interface' + ); + +%defweight = do { my $i = 0; + map { $_ => $i++ } + qw(c f i n s t u p x v d e g m l) }; + + +sub import { + my ($self, @langs) = @_; + + push(@langs, 'Undefined'); + foreach my $l (@langs) { + eval "require LXRng::Lang::$l"; + die $@ if $@; + push(@languages, "LXRng::Lang::$l"); + } +} + +sub new { + my ($self, $file) = @_; + + my $pathname = $file->name(); + + foreach my $l (@languages) { + if ($pathname =~ $l->pathexp) { + return $l; + } + } + + die "No language found for $pathname"; +} + +1; diff --git a/lib/LXRng/Lang/C.pm b/lib/LXRng/Lang/C.pm new file mode 100644 index 0000000..c88f424 --- /dev/null +++ b/lib/LXRng/Lang/C.pm @@ -0,0 +1,137 @@ +package LXRng::Lang::C; + +use strict; +use Subst::Complex; + +use base qw(LXRng::Lang::Generic); + + +sub doindex { + return 1; +} + +sub ctagslangname { + return 'c'; +} + +sub ctagsopts { + return ('--c-types=+lpx'); +} + +sub pathexp { + return qr/\.[ch]$/; +} + +my $_identifier_re = qr( + (?m:^|(?<=[^a-zA-Z0-9_\#])) # Non-symbol chars. + (_*[a-zA-Z][a-zA-Z0-9_]*) # The symbol. + \b + )x; + +sub identifier_re { + return $_identifier_re; +} + +my $_reserved ||= { map { $_ => 1 } + qw(asm auto break case char continue default do + double else enum extern float for fortran goto + if int long register return short signed sizeof + static struct switch typedef union unsigned + void volatile while + #define #else #endif #if #ifdef #ifndef #include + #undef)}; + +sub reserved { + return $_reserved; +} + +sub parsespec { + return ['atom', '\\\\.', undef, + 'comment', '/\*', '\*/', + 'comment', '//', "\$", + 'string', '"', '"', + 'string', "'", "'", + 'include', '#\s*include', "\$"]; +} + +sub typemap { + return { + 'c' => 'class', + 'd' => 'macro (un)definition', + 'e' => 'enumerator', + 'f' => 'function definition', + 'g' => 'enumeration name', + 'm' => 'class, struct, or union member', + 'n' => 'namespace', + 'p' => 'function prototype or declaration', + 's' => 'structure name', + 't' => 'typedef', + 'u' => 'union name', + 'v' => 'variable definition', + 'x' => 'extern or forward variable declaration', + 'i' => 'interface'}; +} + +sub markuphandlers { + my ($self, $context, $node, $markup) = @_; + + my $index = $context->config->{'index'}; + my $syms = $index->symbols_by_file($context->tree, $context->release, + $node->name); + my $idre = $self->identifier_re(); + + my %subst; + + my $format_newline = $markup->make_format_newline($node); + $subst{'comment'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n]+/ => sub { $markup->format_comment(@_) }; + + $subst{'string'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n\"\']+/ => sub { $markup->format_string(@_) }; + + $subst{'include'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/(include\s*\")(.*?)(\")/ => sub { + $markup->format_include([$self->resolve_include($context, $node, @_)], + @_) }, + + qr/(include\s*\<)(.*?)(\>)/ => sub { + $markup->format_include([$self->resolve_include($context, $node, @_)], + @_) }; + + $subst{'code'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n]*/ => sub { $markup->format_code($idre, $syms, @_) }; + + $subst{'start'} = new Subst::Complex + qr/^/ => $format_newline; + + return \%subst; +} + +sub resolve_include { + my ($self, $context, $node, $frag) = @_; + + if ($frag =~ /include\s+<(.*?)>/) { + return $self->expand_include($context, $node, $1); + } + elsif ($frag =~ /include\s+\"(.*?)\"/) { + my $incl = $1; + my $bare = $1; + my $name = $node->name(); + if ($name =~ /(.*\/)/) { + $incl = $1.$incl; + 1 while $incl =~ s,/[^/]+/../,/,; + + my $file = $context->config->{'repository'}->node($incl, $context->release); + return $incl if $file; + return $self->expand_include($context, $node, $bare); + } + } + + return (); +} + +1; diff --git a/lib/LXRng/Lang/Generic.pm b/lib/LXRng/Lang/Generic.pm new file mode 100644 index 0000000..29443f4 --- /dev/null +++ b/lib/LXRng/Lang/Generic.pm @@ -0,0 +1,22 @@ +package LXRng::Lang::Generic; + +use strict; + +sub expand_include { + my ($self, $context, $node, $include) = @_; + + return () unless $context->config->{'include_maps'}; + + my $file = $node->name(); + foreach my $map (@{$context->config->{'include_maps'}}) { + my @key = $file =~ /($$map[0])/ or next; + my @val = $include =~ /($$map[1])/ or next; + shift(@key); + shift(@val); + my @paths = $$map[2]->(@key, @val); + + return map { /([^\/].*)/ ? $1 : $_ } @paths; + } +} + +1; diff --git a/lib/LXRng/Lang/Undefined.pm b/lib/LXRng/Lang/Undefined.pm new file mode 100644 index 0000000..c4c3c72 --- /dev/null +++ b/lib/LXRng/Lang/Undefined.pm @@ -0,0 +1,45 @@ +package LXRng::Lang::Undefined; + +use strict; +use Subst::Complex; + +use base qw(LXRng::Lang::Generic); + + +sub doindex { + return 0; +} + +sub pathexp { + return qr/$/; +} + +sub reserved { + return {}; +} + +sub parsespec { + return ['atom', '\\\\.', undef]; +} + +sub typemap { + return {}; +} + +sub markuphandlers { + my ($self, $context, $node, $markup) = @_; + + my $format_newline = $markup->make_format_newline($node); + + my %subst; + $subst{'code'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n]*/ => sub { $markup->format_raw(@_) }; + + $subst{'start'} = new Subst::Complex + qr/^/ => $format_newline; + + return \%subst; +} + +1; diff --git a/lib/LXRng/Markup/Dir.pm b/lib/LXRng/Markup/Dir.pm new file mode 100644 index 0000000..b7743ee --- /dev/null +++ b/lib/LXRng/Markup/Dir.pm @@ -0,0 +1,64 @@ +package LXRng::Markup::Dir; + +use strict; +use POSIX qw(strftime); +use LXRng::Cached; + +sub new { + my ($class, %args) = @_; + + return bless(\%args, $class); +} + +sub context { + my ($self) = @_; + return $$self{'context'}; +} + +sub _format_time { + my ($secs, $zone) = @_; + + my $offset = 0; + if ($zone and $zone =~ /^([-+])(\d\d)(\d\d)$/) { + $offset = ($2 * 60 + $3) * 60; + $offset = -$offset if $1 eq '-'; + $secs += $offset; + } + else { + $zone = ''; + } + return strftime("%F %T $zone", gmtime($secs)); +} + +sub listing { + my ($self) = @_; + + cached { + my @list; + foreach my $n ($$self{'node'}->contents) { + if ($n->isa('LXRng::Repo::Directory')) { + push(@list, {'name' => $n->name, + 'node' => $n->node, + 'size' => '', + 'time' => '', + 'desc' => ''}); + } + else { + my $rfile_id = $self->context->config->{'index'}->rfile_id($n); + my ($s, $tz) = + $self->context->config->{'index'}->get_rfile_timestamp($rfile_id); + ($s, $tz) = $n->time =~ /^(\d+)(?: ([-+]\d\d\d\d)|)$/ + unless $s; + + push(@list, {'name' => $n->name, + 'node' => $n->node, + 'size' => $n->size, + 'time' => _format_time($s, $tz), + 'desc' => ''}); + } + } + \@list; + } $$self{'node'}; +} + +1; diff --git a/lib/LXRng/Markup/File.pm b/lib/LXRng/Markup/File.pm new file mode 100644 index 0000000..406737c --- /dev/null +++ b/lib/LXRng/Markup/File.pm @@ -0,0 +1,120 @@ +package LXRng::Markup::File; + +use strict; +use HTML::Entities; + +sub new { + my ($class, %args) = @_; + + return bless(\%args, $class); +} + +sub context { + my ($self) = @_; + return $$self{'context'}; +} + +sub safe_html { + my ($str) = @_; + return encode_entities($str, '^\n\r\t !\#\$\(-;=?-~'); +} + +sub make_format_newline { + my ($self, $node) = @_; + my $line = 0; + my $tree = $self->context->vtree(); + my $name = $node->name; + + sub { + my ($nl) = @_; + $line++; + $nl = safe_html($nl); + + # id="<num>" is not valid XHTML 1.0, but it is an extremely + # handy shorthand for generating line numbers that don't + # affect cut-n-paste. + return qq{$nl<a id="L$line" name="L$line"></a>}. + qq{<a href="$name#L$line" id="$line" class="line"></a>}; + } +} + +sub format_comment { + my ($self, $com) = @_; + + $com = safe_html($com); + return qq{<span class="comment">$com</span>}; +} + + +sub format_string { + my ($self, $str) = @_; + + $str = safe_html($str); + return qq{<span class="string">$str</span>} +} + +sub format_include { + my ($self, $paths, $all, $pre, $inc, $suf) = @_; + + my $tree = $self->context->vtree(); + if (@$paths > 1) { + $pre = safe_html($pre); + $inc = safe_html($inc); + $suf = safe_html($suf); + my $alts = join("|", map { $_ } @$paths); + return qq{$pre<a href="+ambig=$alts" class="falt">$inc</a>$suf}; + } + elsif (@$paths > 0) { + $pre = safe_html($pre); + $inc = safe_html($inc); + $suf = safe_html($suf); + return qq{$pre<a href="$$paths[0]" class="fref">$inc</a>$suf}; + } + else { + return safe_html($all); + } +} + +sub format_code { + my ($self, $idre, $syms, $frag) = @_; + + my $tree = $self->context->vtree(); + my $path = $self->context->path(); + Subst::Complex::s($frag, + $idre => sub { + my $sym = $_[1]; + if (exists($$syms{$sym})) { + $sym = safe_html($sym); + return qq{<a href="+code=$sym" class="sref">$sym</a>} + } + else { + return safe_html($sym); + } + }, + qr/(.*?)/ => sub { return safe_html($_[0]) }, + ); +} + +sub format_raw { + my ($self, $str) = @_; + + return safe_html($str); +} + +sub markupfile { + my ($self, $subst, $parse) = @_; + + my ($btype, $frag) = $parse->nextfrag; + + return () unless defined $frag; + + $btype ||= 'code'; + if ($btype and exists $$subst{$btype}) { + return $$subst{$btype}->s($frag); + } + else { + return $frag; + } +} + +1; diff --git a/lib/LXRng/Parse/Simple.pm b/lib/LXRng/Parse/Simple.pm new file mode 100644 index 0000000..53f7e10 --- /dev/null +++ b/lib/LXRng/Parse/Simple.pm @@ -0,0 +1,127 @@ +package LXRng::Parse::Simple; + +use strict; +use integer; +use IO::Handle; + +sub new { + my ($class, $fileh, $tabhint, @blksep) = @_; + + my (@bodyid, @open, @term); + + while (my @a = splice(@blksep,0,3)) { + push(@bodyid, $a[0]); + push(@open, $a[1]); + push(@term, $a[2]); + } + + my $self = { + 'fileh' => $fileh, # File handle + 'tabwidth' => $tabhint||8, # Tab width + 'frags' => [], # Fragments in queue + 'bodyid' => \@bodyid, # Array of body type ids + 'bofseen' => 0, # Beginning-of-file seen? + 'term' => \@term, + # Fragment closing delimiters + 'open' => join('|', map { "($_)" } @open), + # Fragment opening regexp + 'split' => join('|', @open, map { $_ eq '' ? () : $_ } @term), + # Fragmentation regexp + }; + + return bless $self, $class; + +# @frags $fileh $tabwidth $split @term $open $bodyid + +} + +sub untabify { + my $t = $_[1] || 8; + + $_[0] =~ s/^(\t+)/(' ' x ($t * length($1)))/ge; # Optimize for common case. + $_[0] =~ s/([^\t]*)\t/$1.(' ' x ($t - (length($1) % $t)))/ge; + return($_[0]); +} + + +sub nextfrag { + my ($self) = @_; + + my $btype = undef; + my $frag = undef; + my $line = ''; + + while (1) { + # read one more line if we have processed + # all of the previously read line + if (@{$$self{'frags'}} == 0) { + $line = $$self{'fileh'}->getline; + + if ($. <= 2 && + $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) { + # make sure there really is a non-zero tabwidth + $$self{'tabwidth'} = $1 if $1 > 0; + } + + if(defined($line)) { + untabify($line, $$self{'tabwidth'}); + + # split the line into fragments + $$self{'frags'} = [split(/($$self{'split'})/, $line)]; + } + } + + last if @{$$self{'frags'}} == 0; + + unless ($$self{'bofseen'}) { + # return start marker if file has contents + $$self{'bofseen'} = 1; + return ('start', ''); + } + + # skip empty fragments + if ($$self{'frags'}[0] eq '') { + shift(@{$$self{'frags'}}); + } + + # check if we are inside a fragment + if (defined($frag)) { + if (defined($btype)) { + my $next = shift(@{$$self{'frags'}}); + + # Add to the fragment + $frag .= $next; + # We are done if this was the terminator + last if $next =~ /^$$self{'term'}[$btype]$/; + + } + else { + if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) { + last; + } + $frag .= shift(@{$$self{'frags'}}); + } + } + else { + # Find the blocktype of the current block + $frag = shift(@{$$self{'frags'}}); + if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) { + # grep in a scalar context returns the number of times + # EXPR evaluates to true, which is this case will be + # the index of the first defined element in @_. + + my $i = 1; + $btype = grep { $i &&= !defined($_) } @_; + if(!defined($$self{'term'}[$btype])) { + # Opening regexp captures entire block. + last; + } + } + } + } + $btype = $$self{'bodyid'}[$btype] if defined($btype); + + return ($btype, $frag); +} + +1; diff --git a/lib/LXRng/Repo/Directory.pm b/lib/LXRng/Repo/Directory.pm new file mode 100644 index 0000000..ad892b2 --- /dev/null +++ b/lib/LXRng/Repo/Directory.pm @@ -0,0 +1,17 @@ +package LXRng::Repo::Directory; + +use strict; + +sub name { + my ($self) = @_; + + return $$self{'name'}; +} + +sub node { + my ($self) = @_; + + $self->name =~ m,([^/]+)/?$, and return $1; +} + +1; diff --git a/lib/LXRng/Repo/File.pm b/lib/LXRng/Repo/File.pm new file mode 100644 index 0000000..7df5a04 --- /dev/null +++ b/lib/LXRng/Repo/File.pm @@ -0,0 +1,17 @@ +package LXRng::Repo::File; + +use strict; + +sub name { + my ($self) = @_; + + return $$self{'name'}; +} + +sub node { + my ($self) = @_; + + $self->name =~ m,([^/]+)$, and return $1; +} + +1; diff --git a/lib/LXRng/Repo/Git.pm b/lib/LXRng/Repo/Git.pm new file mode 100644 index 0000000..2d6ea33 --- /dev/null +++ b/lib/LXRng/Repo/Git.pm @@ -0,0 +1,111 @@ +package LXRng::Repo::Git; + +use strict; +use Memoize; +use LXRng::Cached; +use LXRng::Repo::Git::Iterator; +use LXRng::Repo::Git::File; +use LXRng::Repo::Git::Directory; + +sub _git_cmd { + my ($self, $cmd, @args) = @_; + + my $git; + my $pid = open($git, "-|"); + die $! unless defined $pid; + if ($pid == 0) { + $ENV{'GIT_DIR'} = $$self{'root'}; + exec('git', $cmd, @args); + warn $!; + kill(9, $$); + } + return $git; +} + +sub new { + my ($class, $root, %args) = @_; + + memoize('_release_timestamp'); + + return bless({root => $root, %args}, $class); +} + +sub _release_timestamp { + my ($self, $release) = @_; + + my $cinfo = $self->_git_cmd('cat-file', 'commit', $release); + + my $time; + while (<$cinfo>) { + $time = $1 if /^author .*? (\d+(?: [-+]\d+|))$/ ; + $time ||= $1 if /^committer .*? (\d+(?: [-+]\d+|))$/ ; + } + + return $time; +} + +sub _use_author_timestamp { + my ($self) = @_; + + return $$self{'author_timestamp'}; +} + +sub _sort_key { + my ($v) = @_; + + $v =~ s/(\d+)/sprintf("%05d", $1)/ge; + return $v; +} + +sub allversions { + my ($self) = @_; + + cached { + my @tags; + my $tags = $self->_git_cmd('tag', '-l'); + while (<$tags>) { + chomp; + next if $$self{'release_re'} and $_ !~ $$self{'release_re'}; + push(@tags, $_); + } + + return (sort {_sort_key($b) cmp _sort_key($a) } @tags); + }; +} + +sub node { + my ($self, $path, $release) = @_; + + $path =~ s,^/+,,; + $path =~ s,/+$,,; + + if ($path eq '') { + open(my $tag, '<', $$self{'root'}.'/refs/tags/'.$release) + or return undef; + my $ref = <$tag>; + close($tag); + chomp($ref); + return LXRng::Repo::Git::Directory->new($self, '', $ref); + } + + my $git = $self->_git_cmd('ls-tree', $release, $path); + my ($mode, $type, $ref, $gitpath) = split(" ", <$git>); + + if ($type eq 'tree') { + return LXRng::Repo::Git::Directory->new($self, $path, $ref, $release); + } + elsif ($type eq 'blob') { + return LXRng::Repo::Git::File->new($self, $path, $ref, $release); + } + else { + return undef; + } +} + +sub iterator { + my ($self, $release) = @_; + + return LXRng::Repo::Git::Iterator->new($self, $release); +} + +1; diff --git a/lib/LXRng/Repo/Git/Directory.pm b/lib/LXRng/Repo/Git/Directory.pm new file mode 100644 index 0000000..592e608 --- /dev/null +++ b/lib/LXRng/Repo/Git/Directory.pm @@ -0,0 +1,56 @@ +package LXRng::Repo::Git::Directory; + +use strict; + +use base qw(LXRng::Repo::Directory); + +sub new { + my ($class, $repo, $name, $ref, $rel) = @_; + + $name =~ s,/*$,/,; + return bless({repo => $repo, name => $name, ref => $ref, rel => $rel}, + $class); +} + +sub time { + my ($self) = @_; + + return 0; +# return $$self{'stat'}[9]; +} + +sub size { + my ($self) = @_; + + return ''; +} + +sub contents { + my ($self) = @_; + + my $git = $$self{'repo'}->_git_cmd('ls-tree', $$self{'ref'}); + + my $prefix = $$self{'name'}; + $prefix =~ s,^/+,,; + my (@dirs, @files); + while (<$git>) { + chomp; + my ($mode, $type, $ref, $node) = split(" ", $_); + if ($type eq 'tree') { + push(@dirs, LXRng::Repo::Git::Directory->new($$self{'repo'}, + $prefix.$node, + $ref, + $$self{'rel'})); + } + elsif ($type eq 'blob') { + push(@files, LXRng::Repo::Git::File->new($$self{'repo'}, + $prefix.$node, + $ref, + $$self{'rel'})); + } + } + + return (@dirs, @files); +} + +1; diff --git a/lib/LXRng/Repo/Git/File.pm b/lib/LXRng/Repo/Git/File.pm new file mode 100644 index 0000000..b0bb9a3 --- /dev/null +++ b/lib/LXRng/Repo/Git/File.pm @@ -0,0 +1,80 @@ +package LXRng::Repo::Git::File; + +use strict; + +use base qw(LXRng::Repo::File); +use LXRng::Repo::TmpFile; +use File::Temp qw(tempdir); + +sub new { + my ($class, $repo, $name, $ref, $rel) = @_; + + return bless({repo => $repo, name => $name, ref => $ref, rel => $rel}, + $class); +} + +sub time { + my ($self) = @_; + + if ($$self{'repo'}->_use_author_timestamp) { + # This is painfully slow. It is only performed index-time, + # but that might stil be bad enough that you would want to + # just use the release-timestamp insted. + my $cinfo = $$self{'repo'}->_git_cmd('log', '--pretty=raw', + '--max-count=1', '--all', + '..'.$$self{'rel'}, + '--', $self->name); + + my $time; + while (<$cinfo>) { + $time = $1 if /^author .*? (\d+(?: [-+]\d+|))$/ ; + $time ||= $1 if /^committer .*? (\d+(?: [-+]\d+|))$/ ; + } + + return $time if $time; + } + + return $$self{'repo'}->_release_timestamp($$self{'rel'}); +} + +sub size { + my ($self) = @_; + + my $git = $$self{'repo'}->_git_cmd('cat-file', '-s', $$self{'ref'}); + my $size = <$git>; + close($git); + chomp($size); + return $size; +} + +sub handle { + my ($self) = @_; + + return $$self{'repo'}->_git_cmd('cat-file', 'blob', $$self{'ref'}); +} + +sub revision { + my ($self) = @_; + + return $$self{'ref'}; +} + +sub phys_path { + my ($self) = @_; + + my $tmpdir = tempdir() or die($!); + open(my $phys, ">", $tmpdir.'/'.$self->node) or die($!); + + my $handle = $self->handle(); + my $buf = ''; + while (sysread($handle, $buf, 64*1024) > 0) { + print($phys $buf) or die($!); + } + close($handle); + close($phys) or die($!); + + return LXRng::Repo::TmpFile->new(dir => $tmpdir, + node => $self->node); +} + +1; diff --git a/lib/LXRng/Repo/Git/Iterator.pm b/lib/LXRng/Repo/Git/Iterator.pm new file mode 100644 index 0000000..978e584 --- /dev/null +++ b/lib/LXRng/Repo/Git/Iterator.pm @@ -0,0 +1,33 @@ +package LXRng::Repo::Git::Iterator; + +use strict; +use LXRng::Repo::Git::File; + +sub new { + my ($class, $repo, $release) = @_; + + my @refs; + my $git = $repo->_git_cmd('ls-tree', '-r', $release); + while (<$git>) { + if (/\S+\s+blob\s+(\S+)\s+(\S+)/) { + push(@refs, [$2, $1]); + } + } + close($git); + + return bless({refs => \@refs, repo => $repo, rel => $release}, $class); +} + +sub next { + my ($self) = @_; + + return undef unless @{$$self{'refs'}} > 0; + my $file = shift(@{$$self{'refs'}}); + + return LXRng::Repo::Git::File->new($$self{'repo'}, + $$file[0], + $$file[1], + $$self{'rel'}); +} + +1; diff --git a/lib/LXRng/Repo/Git/TarFile.pm b/lib/LXRng/Repo/Git/TarFile.pm new file mode 100644 index 0000000..33af87d --- /dev/null +++ b/lib/LXRng/Repo/Git/TarFile.pm @@ -0,0 +1,98 @@ +package LXRng::Repo::Git::TarFile; + +use strict; +use File::Temp qw(tempdir); +use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC); + +use base qw(LXRng::Repo::File); + +sub new { + my ($class, $tar, $ref) = @_; + + return bless({tar => $tar, ref => $ref}, $class); +} + +sub name { + my ($self) = @_; + + return $$self{'tar'}->name(); +} + +sub node { + my ($self) = @_; + + $self->name =~ m,.*/([^/]+), and return $1; +} + +sub time { + my ($self) = @_; + + return $$self{'tar'}->mtime(); +} + +sub size { + my ($self) = @_; + + return $$self{'tar'}->size; +} + +sub phys_path { + my ($self) = @_; + + my $tmpdir = tempdir() or die($!); + open(my $phys, ">", $tmpdir.'/'.$self->node); + + my $data = $$self{'tar'}->get_content_by_ref(); + my $len = $$self{'tar'}->size(); + my $pos = 0; + while ($pos < $len) { + print($phys substr($$data, $pos, 64*1024)); + $pos += 64*1024; + } + close($phys); + + return LXRng::Repo::Git::TarFile::Virtual->new(dir => $tmpdir, + node => $self->node); +} + +sub handle { + my ($self) = @_; + + my $data = $$self{'tar'}->get_content_by_ref(); + open(my $fh, "<", $data); + + return $fh; +} + +sub revision { + my ($self) = @_; + + $$self{'ref'} ||= $self->time.'.'.$self->size; + return $$self{'ref'}; +} + +package LXRng::Repo::Git::TarFile::Virtual; + +use strict; +use overload '""' => \&filename; + +sub new { + my ($class, %args) = @_; + + return bless(\%args, $class); +} + +sub filename { + my ($self) = @_; + + return $$self{'dir'}.'/'.$$self{'node'}; +} + +sub DESTROY { + my ($self) = @_; + unlink($$self{'dir'}.'/'.$$self{'node'}); + rmdir($$self{'dir'}); +# kill(9, $$self{'pid'}); +} + +1; diff --git a/lib/LXRng/Repo/Plain.pm b/lib/LXRng/Repo/Plain.pm new file mode 100644 index 0000000..c30835e --- /dev/null +++ b/lib/LXRng/Repo/Plain.pm @@ -0,0 +1,38 @@ +package LXRng::Repo::Plain; + +use strict; +use LXRng::Repo::Plain::Iterator; +use LXRng::Repo::Plain::File; +use LXRng::Repo::Plain::Directory; + +sub new { + my ($class, $root) = @_; + + return bless({root => $root}, $class); +} + +sub allversions { + my ($self) = @_; + + my @ver = (sort + grep { $_ ne "." and $_ ne ".." } + map { substr($_, length($$self{'root'})) =~ /([^\/]*)/; $1 } + glob($$self{'root'}."*/")); + + return @ver; +} + +sub node { + my ($self, $path, $release) = @_; + + my $realpath = join('/', $$self{'root'}, $release, $path); + return LXRng::Repo::Plain::File->new($path, $realpath); +} + +sub iterator { + my ($self, $release) = @_; + + return LXRng::Repo::Plain::Iterator->new($self->node('', $release)); +} + +1; diff --git a/lib/LXRng/Repo/Plain/Directory.pm b/lib/LXRng/Repo/Plain/Directory.pm new file mode 100644 index 0000000..8d7e701 --- /dev/null +++ b/lib/LXRng/Repo/Plain/Directory.pm @@ -0,0 +1,45 @@ +package LXRng::Repo::Plain::Directory; + +use strict; + +use base qw(LXRng::Repo::Directory); + +sub new { + my ($class, $name, $path, $stat) = @_; + + $name =~ s,(.)/*$,$1/,; + $path =~ s,/*$,/,; + return bless({name => $name, path => $path, stat => $stat}, $class); +} + +sub time { + my ($self) = @_; + + return $$self{'stat'}[9]; +} + +sub size { + my ($self) = @_; + + return ''; +} + +sub contents { + my ($self) = @_; + + my (@dirs, @files); + my ($dir, $node); + opendir($dir, $$self{'path'}) or die("Can't open ".$$self{'path'}.": $!"); + while (defined($node = readdir($dir))) { + next if $node =~ /^\.|~$|\.orig$/; + next if $node eq 'CVS'; + + push(@files, LXRng::Repo::Plain::File->new($$self{'name'}.$node, + $$self{'path'}.$node)); + } + closedir($dir); + + return sort { ref($a) cmp ref($b) || $$a{'name'} cmp $$b{'name'} } @files; +} + +1; diff --git a/lib/LXRng/Repo/Plain/File.pm b/lib/LXRng/Repo/Plain/File.pm new file mode 100644 index 0000000..cf2d6d5 --- /dev/null +++ b/lib/LXRng/Repo/Plain/File.pm @@ -0,0 +1,51 @@ +package LXRng::Repo::Plain::File; + +use strict; + +use base qw(LXRng::Repo::File); +use Fcntl; + +sub new { + my ($class, $name, $path) = @_; + + my @stat = stat($path); + + return undef unless @stat; + + return LXRng::Repo::Plain::Directory->new($name, $path, \@stat) if -d _; + + return bless({name => $name, path => $path, stat => \@stat}, $class); +} + +sub time { + my ($self) = @_; + + return $$self{'stat'}[9]; +} + +sub size { + my ($self) = @_; + + return $$self{'stat'}[7]; +} + +sub phys_path { + my ($self) = @_; + + return $$self{'path'}; +} + +sub revision { + my ($self) = @_; + + return $self->time.'.'.$self->size; +} + +sub handle { + my ($self) = @_; + + sysopen(my $handle, $self->phys_path, O_RDONLY) or die($!); + return $handle; +} + +1; diff --git a/lib/LXRng/Repo/Plain/Iterator.pm b/lib/LXRng/Repo/Plain/Iterator.pm new file mode 100644 index 0000000..b086860 --- /dev/null +++ b/lib/LXRng/Repo/Plain/Iterator.pm @@ -0,0 +1,29 @@ +package LXRng::Repo::Plain::Iterator; + +use strict; +use LXRng::Repo::Plain; + +sub new { + my ($class, $dir) = @_; + + return bless({dir => $dir, stack => [], nodes => [$dir->contents]}, $class); +} + +sub next { + my ($self) = @_; + + while (@{$$self{'nodes'}} == 0) { + return undef unless @{$$self{'stack'}}; + $$self{'nodes'} = pop(@{$$self{'stack'}}); + } + + my $node = shift(@{$$self{'nodes'}}); + if ($node->isa('LXRng::Repo::Directory')) { + push(@{$$self{'stack'}}, $$self{'nodes'}); + $$self{'nodes'} = [$node->contents]; + return $self->next; + } + return $node; +} + +1; diff --git a/lib/LXRng/Repo/TmpFile.pm b/lib/LXRng/Repo/TmpFile.pm new file mode 100644 index 0000000..bc9024a --- /dev/null +++ b/lib/LXRng/Repo/TmpFile.pm @@ -0,0 +1,30 @@ +package LXRng::Repo::TmpFile; + +# This package is used to hold on to a reference to a physical copy of +# a file normally only present inside a repo of some sort. When it +# leaves scopy, the destructor will remove it. (The object acts as +# string containing the path of the physical manifestation of the +# file.) + +use strict; +use overload '""' => \&filename; + +sub new { + my ($class, %args) = @_; + + return bless(\%args, $class); +} + +sub filename { + my ($self) = @_; + + return $$self{'dir'}.'/'.$$self{'node'}; +} + +sub DESTROY { + my ($self) = @_; + unlink($$self{'dir'}.'/'.$$self{'node'}); + rmdir($$self{'dir'}); +} + +1; diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm new file mode 100644 index 0000000..42c7580 --- /dev/null +++ b/lib/LXRng/Search/Xapian.pm @@ -0,0 +1,152 @@ +package LXRng::Search::Xapian; + +use strict; +use Search::Xapian qw/:ops :db :qpstem/; +use Search::Xapian::QueryParser; + + +sub new { + my ($class, $db_root) = @_; + + $ENV{'XAPIAN_PREFER_FLINT'} = 1; + my $self = bless({'db_root' => $db_root, + 'writes' => 0}, + $class); + + return $self; +} + +sub wrdb { + my ($self) = @_; + + return $$self{'wrdb'} ||= Search::Xapian::WritableDatabase + ->new($$self{'db_root'}, Search::Xapian::DB_CREATE_OR_OPEN); +} + +sub new_document { + my ($self, $desc) = @_; + + my $doc = Search::Xapian::Document->new(); + $doc->set_data($desc); + return $doc; +} + +sub add_document { + my ($self, $doc, $rel_id) = @_; + + $doc->add_term('__@@LXRREL_'.$rel_id); + my $doc_id = $self->wrdb->add_document($doc); + $self->{'writes'}++; + $self->flush() if $self->{'writes'} % 499 == 0; + return $doc_id; +} + +sub add_release { + my ($self, $doc_id, $rel_id) = @_; + + my $reltag = '__@@LXRREL_'.$rel_id; + my $doc = $self->wrdb->get_document($doc_id); + + my $term = $doc->termlist_begin; + my $termend = $doc->termlist_end; + $term->skip_to($reltag); + if ($term ne $termend) { + return 0 if $term->get_termname eq $reltag; + } + $doc->add_term($reltag); + $self->wrdb->replace_document($doc_id, $doc); + return 1; +} + +sub flush { + my ($self) = @_; + + warn "\n*** hash: flushing\n"; + $self->wrdb->flush(); +} + +sub search { + my ($self, $rel_id, $query) = @_; + + my $db = Search::Xapian::Database->new($$self{'db_root'}); + my $qp = new Search::Xapian::QueryParser($db); + $qp->set_stemming_strategy(STEM_NONE); + $qp->set_default_op(OP_AND); + + if ($query =~ /\"/) { + # Only moderate fixup of advanced queries + $query =~ s/\b([A-Z]+)\b/\L$1\E/g; + } + else { + $query =~ s/([\S_]+_[\S_]*)/\"$1\"/g; + $query =~ s/_/ /g; + $query =~ s/\b(?![A-Z][^A-Z]*\b)(\S+)/\L$1\E/g; + } + + my $query = $qp->parse_query($query); + $query = Search::Xapian::Query + ->new(OP_FILTER, $query, + Search::Xapian::Query->new('__@@LXRREL_'.$rel_id)); + + my $enq = $db->enquire($query); + + my $matches = $enq->get_mset(0, 100); + my $total = $matches->get_matches_estimated(); + my $size = $matches->size(); + + my @res; + + my $match = $matches->begin(); + my $i = 0; + while ($i++ < $size) { + my $term = $enq->get_matching_terms_begin($match); + my $termend = $enq->get_matching_terms_end($match); + my %lines; + my $hits = 0; + while ($term ne $termend) { + if ($term !~ /^__\@\@LXR/) { + my $pos = $db->positionlist_begin($match->get_docid(), $term); + my $posend = $db->positionlist_end($match->get_docid(), $term); + while ($pos ne $posend) { + $lines{int($pos/100)}{$term} = 1; + $hits++; + $pos++; + } + } + $term++; + } + # Sort lines in order of the most matching terms + my %byhits; + foreach my $l (keys %lines) { + $byhits{0+keys(%{$lines{$l}})}{$l} = 1; + } + # Only consider the lines having the max number of terms + my ($max) = sort { $b <=> $a } keys %byhits; + my @lines = sort keys(%{$byhits{$max}}); + + push(@res, [$match->get_percent(), + $match->get_document->get_data(), + $lines[0], + 0+@lines]) + if @lines; + $match++; + } + + return ($total, \@res); +} + +sub reset_db { + my ($self) = @_; + + foreach my $f (glob($$self{'db_root'}.'/*')) { + unlink($f); + } +} + +sub DESTROY { + my ($self) = @_; + + $self->flush() if $self->{'writes'} > 0; +} + +1; diff --git a/lib/Subst/Complex.pm b/lib/Subst/Complex.pm new file mode 100644 index 0000000..788d469 --- /dev/null +++ b/lib/Subst/Complex.pm @@ -0,0 +1,63 @@ +package Subst::Complex; + +use strict; + +sub new { + my ($self, @args) = @_; + + my (@re, @ac); + my $l = 1; + while (@args) { + my ($r, $a) = splice(@args, 0, 2); + "" =~ /|$r/; + + push(@ac, [$l+1, $l+1+$#+, $a]); + $l += 1+$#+; + + push(@re, "($r)"); + } + + return bless {'re' => '((?s:.*?))(?:'.join('|', @re).')', + 'ac' => \@ac}, $self; +} + +sub s { + my $self; + my $str; + + if (ref($_[0])) { + $self = shift; + $str = shift; + } + else { + $str = shift; + $self = __PACKAGE__->new(@_); + } + + my @res; +# $str =~ s{$$self{'re'}}{ +# push(@res, $1) if length($1); +# my ($a) = grep { defined $-[$$_[0]] } @{$$self{'ac'}}; +# my @g = map { substr($str, $-[$_], $+[$_] - $-[$_]); +# } $$a[0]..$$a[1]; +# push(@res, $$a[2]->(@g)); +# ''; +# }ge; + $str =~ s{$$self{'re'}}{ + push(@res, $1) if length($1); + my ($a) = grep { defined $-[$$_[0]] } @{$$self{'ac'}}; + my @g = map { substr($str, $-[$_], $+[$_] - $-[$_]); + } $$a[0]..$$a[1]; + push(@res, [$$a[2], [@g]]); + ''; + }ge; + + push(@res, $str) if length($str); + @res = map { + ref($_) ? $$_[0]->(@{$$_[1]}) : $_ + } @res; + + return @res; +} + +1; |