diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/LXRng/Index/DBI.pm | 16 | ||||
-rw-r--r-- | lib/LXRng/Index/Pg.pm | 2 | ||||
-rw-r--r-- | lib/LXRng/Lang/C.pm | 37 | ||||
-rw-r--r-- | lib/LXRng/Lang/Generic.pm | 5 | ||||
-rw-r--r-- | lib/LXRng/Lang/GnuAsm.pm | 164 | ||||
-rw-r--r-- | lib/LXRng/Lang/Kconfig.pm | 146 | ||||
-rw-r--r-- | lib/LXRng/Lang/Undefined.pm | 4 | ||||
-rw-r--r-- | lib/LXRng/Markup/File.pm | 13 | ||||
-rw-r--r-- | lib/LXRng/Parse/Simple.pm | 114 | ||||
-rw-r--r-- | lib/LXRng/Search/Xapian.pm | 33 | ||||
-rw-r--r-- | lib/LXRng/Web.pm | 2 |
11 files changed, 434 insertions, 102 deletions
diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm index 763f534..55e215d 100644 --- a/lib/LXRng/Index/DBI.pm +++ b/lib/LXRng/Index/DBI.pm @@ -520,6 +520,22 @@ sub get_identifier_info { \%reflines); } +sub set_rfile_charset { + my ($self, $rfile_id, $charset) = @_; + + my $dbh = $self->dbh; + my $pre = $self->prefix; + my $sth = $$self{'sth'}{'set_rfile_charset'} ||= + $dbh->prepare(qq{ + update ${pre}revisions + set body_charset = (select id from ${pre}charsets + where name = ?) + where id = ?}); + + return $sth->execute($charset, $rfile_id); +} + + sub get_rfile_timestamp { my ($self, $rfile_id) = @_; diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm index 3654a72..7387abc 100644 --- a/lib/LXRng/Index/Pg.pm +++ b/lib/LXRng/Index/Pg.pm @@ -67,7 +67,7 @@ sub init_db { or die($dbh->errstr); $dbh->do(qq{insert into ${pre}charsets(name) values ('utf-8')}) or die($dbh->errstr); - $dbh->do(qq{insert into ${pre}charsets(name) values ('iso8859-1')}) + $dbh->do(qq{insert into ${pre}charsets(name) values ('iso-8859-1')}) or die($dbh->errstr); $dbh->do(qq{ diff --git a/lib/LXRng/Lang/C.pm b/lib/LXRng/Lang/C.pm index 60a571b..db3a204 100644 --- a/lib/LXRng/Lang/C.pm +++ b/lib/LXRng/Lang/C.pm @@ -51,13 +51,13 @@ sub identifier_re { return $_identifier_re; } -my $_reserved ||= { map { $_ => 1 } - qw(asm auto break case char const continue default - do double else enum extern float for fortran - goto if int long register return short signed - sizeof static struct switch typedef union - unsigned void volatile while #define #else - #endif #if #ifdef #ifndef #include #undef)}; +my $_reserved = { map { $_ => 1 } + qw(asm auto break case char const continue default + do double else enum extern float for fortran goto + if int long register return short signed sizeof + static struct switch typedef union unsigned void + volatile while #define #else #endif #if #ifdef + #ifndef #include #undef)}; sub reserved { return $_reserved; @@ -73,31 +73,10 @@ sub parsespec { 'include', '#\s*include\s+<', '>']; } -sub typemap { - return { - 'c' => 'class', - 'd' => 'macro (un)definition', - 'e' => 'enumerator', - 'f' => 'function definition', - 'g' => 'enumeration name', - 'm' => 'class, struct, or union member', - 'n' => 'namespace', - 'p' => 'function prototype or declaration', - 's' => 'structure name', - 't' => 'typedef', - 'u' => 'union name', - 'v' => 'variable definition', - 'x' => 'extern or forward variable declaration', - 'i' => 'interface'}; -} - sub markuphandlers { my ($self, $context, $node, $markup) = @_; my $index = $context->config->{'index'}; - my $idre = $self->identifier_re(); - my $res = $self->reserved(); - my %subst; my $format_newline = $markup->make_format_newline($node); @@ -121,7 +100,7 @@ sub markuphandlers { $subst{'code'} = new Subst::Complex qr/\n/ => $format_newline, - qr/[^\n]*/ => sub { $markup->format_code($idre, $res, @_) }; + qr/[^\n]*/ => sub { $markup->format_code($self, @_) }; $subst{'start'} = new Subst::Complex qr/^/ => $format_newline; diff --git a/lib/LXRng/Lang/Generic.pm b/lib/LXRng/Lang/Generic.pm index 82e9b9d..3e1f545 100644 --- a/lib/LXRng/Lang/Generic.pm +++ b/lib/LXRng/Lang/Generic.pm @@ -40,4 +40,9 @@ sub expand_include { return (); } +sub mangle_sym { + my ($self, $sym) = @_; + return $sym; +} + 1; diff --git a/lib/LXRng/Lang/GnuAsm.pm b/lib/LXRng/Lang/GnuAsm.pm new file mode 100644 index 0000000..acdcdef --- /dev/null +++ b/lib/LXRng/Lang/GnuAsm.pm @@ -0,0 +1,164 @@ +# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no>. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution +# in the file called COPYING. + +package LXRng::Lang::GnuAsm; + +use strict; +use Subst::Complex; + +use base qw(LXRng::Lang::Generic); + + +sub doindex { + return 1; +} + +sub ctagslangname { + return 'asm'; +} + +sub ctagsopts { + return (); +} + +sub pathexp { + return qr/\.[sS]$/; +} + +my $_identifier_re = qr( + (?m:^|(?<=[^a-zA-Z0-9_\#])) # Non-symbol chars. + (_*[a-zA-Z][a-zA-Z0-9_]*) # The symbol. + \b + )x; + +sub identifier_re { + return $_identifier_re; +} + +my $_reserved ||= { map { $_ => 1 } + (qw(aaa aad aam aas adc bound bsf bsr bswap btc + btr call cbw cwde cdqe cwd cdq cqo clc cld + clflush cmc cmps cmpsb cmpsw cmpsd cmpsq + cmpxchg cmpxchg8b cmpxchg16b cpuid daa das + enter ins insb insw insd int into jcxz jecxz + jrcxz jmp lahf lds les lfs lgs lss leave lfence + lock lods lodsb lodsw lodsd lodsq loop loope + loopne loopnz loopz mfence movd movmskpd + movmskps movnti movs movsb movsw movsd movsq + movsx movsxd movzx nop outs outsb outsw outsd + pause popa popad prefetch prefetchw pusha + pushad pushfd pushfq ret sahf sbb scas scasb + scasw scasd scasq sfence stc std stos stosb + stosw stosd stosq xadd xchg xlat xlatb arpl + clgi cli clts hlt int invd invlpg invlpga iret + iretd iretq lar lgdt lidt lldt lmsw lretq lsl + ltr rep rdmsr rdpmc rdtsc rdtscp rsm sgdt sidt + skinit sldt smsw sti stgi str swapgs syscall + sysenter sysexit sysret ud2 verr verw vmload + vmmcall vmrun vmsave wbinvd wrmsr), + + (map { $_, $_.'b', $_.'w', $_.'l', $_.'q' } + qw(add and mov bt bts cmp dec div idiv imul inc + in lea mul neg not or out pop popf push pushf + rcl rcr rol ror sal shl sar shl shr sub test + xor)), + + (map { 'cmov'.$_, 'j'.$_, 'set'.$_ } + qw(o no b c nae nb nc ae z e nz ne be na nbe a s + ns p pe np po l nge nl ge le ng nle g)) + )}; + + +sub reserved { + return $_reserved; +} + +sub parsespec { + return ['atom', '\\\\.', undef, + 'atom', '%[a-z][a-z0-9]+', undef, # Registers + 'atom', '[.][a-z0-9]+', undef, # Directives + 'comment', '/\*', '\*/', + 'comment', '//', "\$", + 'string', '"', '"', + 'string', "'", "'", + 'atom', '#\s*(?:ifn?def|define|else|endif|undef)', undef, + 'include', '#\s*include\s+"', '"', + 'include', '#\s*include\s+<', '>', + 'comment', '#', "\$"]; +} + +sub markuphandlers { + my ($self, $context, $node, $markup) = @_; + + my $index = $context->config->{'index'}; + my %subst; + + my $format_newline = $markup->make_format_newline($node); + $subst{'comment'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n]+/ => sub { $markup->format_comment(@_) }; + + $subst{'string'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n\"\']+/ => sub { $markup->format_string(@_) }; + + $subst{'include'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/(include\s*\")(.*?)(\")/ => sub { + $markup->format_include([$self->resolve_include($context, $node, @_)], + @_) }, + + qr/(include\s*\<)(.*?)(\>)/ => sub { + $markup->format_include([$self->resolve_include($context, $node, @_)], + @_) }; + + $subst{'code'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n]*/ => sub { $markup->format_code($self, @_) }; + + $subst{'start'} = new Subst::Complex + qr/^/ => $format_newline; + + return \%subst; +} + +sub resolve_include { + my ($self, $context, $node, $frag) = @_; + + if ($frag =~ /include\s+<(.*?)>/) { + return $self->expand_include($context, $node, $1); + } + elsif ($frag =~ /include\s+\"(.*?)\"/) { + my $incl = $1; + my $bare = $1; + my $name = $node->name(); + if ($name =~ /(.*\/)/) { + $incl = $1.$incl; + 1 while $incl =~ s,/[^/]+/../,/,; + + my $file = $context->config->{'repository'}->node($incl, $context->release); + return $incl if $file; + return $self->expand_include($context, $node, $bare); + } + } + + return (); +} + +1; diff --git a/lib/LXRng/Lang/Kconfig.pm b/lib/LXRng/Lang/Kconfig.pm new file mode 100644 index 0000000..2239a87 --- /dev/null +++ b/lib/LXRng/Lang/Kconfig.pm @@ -0,0 +1,146 @@ +# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no>. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution +# in the file called COPYING. + +package LXRng::Lang::Kconfig; + +use strict; +use Subst::Complex; + +use base qw(LXRng::Lang::Generic); + + +sub doindex { + return 1; +} + +sub ctagslangname { + return undef; +} + +sub pathexp { + return qr/Kconfig$/; +} + +my $_identifier_re = qr( + (?m:^|(?<=[^A-Z0-9_\#])) # Non-symbol chars. + (_*[A-Z][A-Z0-9_]*) # The symbol. + \b + )x; + +sub identifier_re { + return $_identifier_re; +} + +my $_reserved = { map { $_ => 1 } + qw(menu source endmenu config bool if default help + tristate depends on y n m)}; + +sub reserved { + return $_reserved; +} + +sub parsespec { + return ['atom', '\\\\.', undef, + 'comment', '#', "\$", + 'string', '"', '"', + 'string', "'", "'", + 'help', 'help', "^(?=[^ \t\n])", + 'include', '^source\s+"', '"']; +} + +sub mangle_sym { + return $_[1] =~ /^[A-Z0-9_]+$/ ? 'CONFIG_'.$_[1] : $_[1]; +} + +sub markuphandlers { + my ($self, $context, $node, $markup) = @_; + + my $index = $context->config->{'index'}; + my %subst; + + my $format_newline = $markup->make_format_newline($node); + $subst{'comment'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n]+/ => sub { $markup->format_comment(@_) }; + + $subst{'help'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/^[ \t]*help[ \t]*/ => sub { $markup->format_code($self, @_) }, + qr/[^\n\"\']+/ => sub { $markup->format_string(@_) }; + + $subst{'string'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n\"\']+/ => sub { $markup->format_string(@_) }; + + $subst{'include'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/(include\s*\")(.*?)(\")/ => sub { + $markup->format_include([$self->resolve_include($context, $node, @_)], + @_) }, + + qr/(include\s*\<)(.*?)(\>)/ => sub { + $markup->format_include([$self->resolve_include($context, $node, @_)], + @_) }; + + $subst{'code'} = new Subst::Complex + qr/\n/ => $format_newline, + qr/[^\n]*/ => sub { $markup->format_code($self, @_) }; + + $subst{'start'} = new Subst::Complex + qr/^/ => $format_newline; + + return \%subst; +} + +sub resolve_include { + my ($self, $context, $node, $frag) = @_; + + return (); +} + +sub index_file { + my ($self, $context, $file, $add_ident) = @_; + + my $handle = $file->handle(); + my $parse = LXRng::Parse::Simple->new($handle, 8, + @{$self->parsespec}); + + my $line = 1; + while (1) { + my ($btype, $frag) = $parse->nextfrag; + + return 1 unless defined $frag; + + $btype ||= 'code'; + if ($btype eq 'code') { + while ($frag =~ s/\A(.*)^config (\w+)//) { + my ($pref, $sym) = ($1, $2); + $line += $pref =~ tr/\n/\n/; + $add_ident->($self->mangle_sym($sym), + {'kind' => 'd', + 'line' => $line}); + } + } + $line += $frag =~ tr/\n/\n/; + } +} + + +1; + diff --git a/lib/LXRng/Lang/Undefined.pm b/lib/LXRng/Lang/Undefined.pm index c7d5d09..0989bdb 100644 --- a/lib/LXRng/Lang/Undefined.pm +++ b/lib/LXRng/Lang/Undefined.pm @@ -41,10 +41,6 @@ sub parsespec { return ['atom', '\\\\.', undef]; } -sub typemap { - return {}; -} - sub markuphandlers { my ($self, $context, $node, $markup) = @_; diff --git a/lib/LXRng/Markup/File.pm b/lib/LXRng/Markup/File.pm index 1351cd8..0d4b141 100644 --- a/lib/LXRng/Markup/File.pm +++ b/lib/LXRng/Markup/File.pm @@ -92,18 +92,23 @@ sub format_include { } sub format_code { - my ($self, $idre, $res, $frag) = @_; + my ($self, $lang, $frag) = @_; my $tree = $self->context->vtree(); my $path = $self->context->path(); + my $idre = $lang->identifier_re(); + my $res = $lang->reserved(); $frag =~ s{(.*?)$idre|(.+)}{ if ($2) { unless (exists($$res{$2})) { my $pre = $1; - my $sym = safe_html($2); + my $sym = $2; + my $ref = safe_html($lang->mangle_sym($sym)); + $sym = safe_html($sym); + safe_html($pre). - qq{<a href="+code=$sym" class="sref">$sym</a>}; + qq{<a href="+code=$ref" class="sref">$sym</a>}; } else { safe_html($1.$2); @@ -129,7 +134,7 @@ sub markupfile { my ($self, $subst, $parse) = @_; my ($btype, $frag) = $parse->nextfrag; - + return () unless defined $frag; $btype ||= 'code'; diff --git a/lib/LXRng/Parse/Simple.pm b/lib/LXRng/Parse/Simple.pm index 215ce5b..3cbebf1 100644 --- a/lib/LXRng/Parse/Simple.pm +++ b/lib/LXRng/Parse/Simple.pm @@ -38,6 +38,8 @@ sub new { 'fileh' => $fileh, # File handle 'tabwidth' => $tabhint||8, # Tab width 'frags' => [], # Fragments in queue + 'pref' => '', + 'rest' => '', 'bodyid' => \@bodyid, # Array of body type ids 'bofseen' => 0, # Beginning-of-file seen? 'term' => \@term, @@ -63,81 +65,69 @@ sub untabify { sub nextfrag { my ($self) = @_; - my $btype = undef; - my $frag = undef; - my $line = ''; - + my $btype; + my $pos = 0; while (1) { - # read one more line if we have processed - # all of the previously read line - if (@{$$self{'frags'}} == 0) { - $line = $$self{'fileh'}->getline; - - if ($. <= 2 && - $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) { - # make sure there really is a non-zero tabwidth - $$self{'tabwidth'} = $1 if $1 > 0; + if (defined $btype) { + if ($$self{'rest'} =~ s/\A((?s:.*?)$$self{'term'}[$btype])//m) { + my $ret = $$self{'pref'}.$1; + $$self{'pref'} = ''; + return ($$self{'bodyid'}[$btype], $ret); } - - if(defined($line)) { - untabify($line, $$self{'tabwidth'}); + else { + $$self{'pref'} .= $$self{'rest'}; + $$self{'rest'} = ''; + } + } + else { + pos($$self{'rest'}) = $pos; + if ($$self{'rest'} =~ s/\G((?s).*?)($$self{'open'})//m) { + my $pref = substr($$self{'rest'}, 0, $pos, '').$1; + my $frag = $2; + + if ($pref ne '') { + $$self{'rest'} = $frag.$$self{'rest'}; + return ('', $pref); + } + + $btype = 3; + $btype++ while $btype < $#- and !defined($-[$btype]); + $btype -= 3; - # split the line into fragments - $$self{'frags'} = [split(/($$self{'split'})/, $line)]; + if (!defined($$self{'term'}[$btype])) { + # Opening regexp captures entire block. + return ($$self{'bodyid'}[$btype], $frag); + } + $$self{'pref'} = $frag; } } - last if @{$$self{'frags'}} == 0; + my $line = $$self{'fileh'}->getline; + unless (defined $line) { + my $ret = $$self{'pref'}.$$self{'rest'}; + $$self{'pref'} = ''; + $$self{'rest'} = ''; + undef($ret) unless length($ret) > 0; + + return (defined($btype) ? $$self{'bodyid'}[$btype] : '', $ret); + } + + if ($. <= 2 && + $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) { + # make sure there really is a non-zero tabwidth + $$self{'tabwidth'} = $1 if $1 > 0; + } + + untabify($line, $$self{'tabwidth'}); + $pos = length($$self{'rest'}); + $$self{'rest'} .= $line; unless ($$self{'bofseen'}) { # return start marker if file has contents $$self{'bofseen'} = 1; return ('start', ''); } - - # skip empty fragments - if ($$self{'frags'}[0] eq '') { - shift(@{$$self{'frags'}}); - } - - # check if we are inside a fragment - if (defined($frag)) { - if (defined($btype)) { - my $next = shift(@{$$self{'frags'}}); - - # Add to the fragment - $frag .= $next; - # We are done if this was the terminator - last if $next =~ /^$$self{'term'}[$btype]$/; - - } - else { - if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) { - last; - } - $frag .= shift(@{$$self{'frags'}}); - } - } - else { - # Find the blocktype of the current block - $frag = shift(@{$$self{'frags'}}); - if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) { - # grep in a scalar context returns the number of times - # EXPR evaluates to true, which is this case will be - # the index of the first defined element in @_. - - my $i = 1; - $btype = grep { $i &&= !defined($_) } @_; - if(!defined($$self{'term'}[$btype])) { - # Opening regexp captures entire block. - last; - } - } - } } - $btype = $$self{'bodyid'}[$btype] if defined($btype); - - return ($btype, $frag); } 1; diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm index 03db5b8..014d57a 100644 --- a/lib/LXRng/Search/Xapian.pm +++ b/lib/LXRng/Search/Xapian.pm @@ -23,6 +23,13 @@ use strict; use Search::Xapian qw/:ops :db :qpstem/; use Search::Xapian::QueryParser; +our @STOPWORDS = qw(our ours you your yours him his she her hers they + them their theirs what which who whom this that + these those are was were been being have has had + having does did doing would should could the and + but for with all any); +our %STOPWORD = map { $_ => 1 } @STOPWORDS; + sub new { my ($class, $db_root) = @_; @@ -31,7 +38,7 @@ sub new { my $self = bless({'db_root' => $db_root, 'writes' => 0}, $class); - + return $self; } @@ -100,6 +107,30 @@ sub add_release { return $changes; } +sub make_add_text { + my ($index, $doc) = @_; + + return sub { + my ($pos, $text) = @_; + + foreach my $term ($text =~ /(_*\w[\w_]*)/g) { + $term = lc($term); + next if length($term) <= 2; + next if length($term) > 128; + next if $STOPWORD{$term}; + + $doc->add_posting($term, $pos++); + if ($term =~ /_/) { + foreach my $subt ($term =~ /([^_]+)/g) { + next if length($subt) <= 2; + next if $STOPWORD{$subt}; + $doc->add_posting($subt, $pos++); + } + } + }; + } +} + sub flush { my ($self) = @_; diff --git a/lib/LXRng/Web.pm b/lib/LXRng/Web.pm index df2422e..9e9e24a 100644 --- a/lib/LXRng/Web.pm +++ b/lib/LXRng/Web.pm @@ -153,7 +153,7 @@ sub print_markedup_file { if ($line % FRAGMENT_SIZE == 0) { print("<a class=\"line\"></a>\n" x FRAGMENT_SIZE) unless $focus; - if (defined($fline)) { + if (defined($fline) and $cache) { $focus = ($line <= ($fline + 100) and $line > ($fline - FRAGMENT_SIZE)); } |