aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/LXRng/Index/DBI.pm16
-rw-r--r--lib/LXRng/Index/Pg.pm2
-rw-r--r--lib/LXRng/Lang/C.pm37
-rw-r--r--lib/LXRng/Lang/Generic.pm5
-rw-r--r--lib/LXRng/Lang/GnuAsm.pm164
-rw-r--r--lib/LXRng/Lang/Kconfig.pm118
-rw-r--r--lib/LXRng/Lang/Undefined.pm4
-rw-r--r--lib/LXRng/Markup/File.pm13
-rw-r--r--lib/LXRng/Parse/Simple.pm111
-rw-r--r--lib/LXRng/Search/Xapian.pm33
10 files changed, 402 insertions, 101 deletions
diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm
index 763f534..55e215d 100644
--- a/lib/LXRng/Index/DBI.pm
+++ b/lib/LXRng/Index/DBI.pm
@@ -520,6 +520,22 @@ sub get_identifier_info {
\%reflines);
}
+sub set_rfile_charset {
+ my ($self, $rfile_id, $charset) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'set_rfile_charset'} ||=
+ $dbh->prepare(qq{
+ update ${pre}revisions
+ set body_charset = (select id from ${pre}charsets
+ where name = ?)
+ where id = ?});
+
+ return $sth->execute($charset, $rfile_id);
+}
+
+
sub get_rfile_timestamp {
my ($self, $rfile_id) = @_;
diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm
index 3654a72..7387abc 100644
--- a/lib/LXRng/Index/Pg.pm
+++ b/lib/LXRng/Index/Pg.pm
@@ -67,7 +67,7 @@ sub init_db {
or die($dbh->errstr);
$dbh->do(qq{insert into ${pre}charsets(name) values ('utf-8')})
or die($dbh->errstr);
- $dbh->do(qq{insert into ${pre}charsets(name) values ('iso8859-1')})
+ $dbh->do(qq{insert into ${pre}charsets(name) values ('iso-8859-1')})
or die($dbh->errstr);
$dbh->do(qq{
diff --git a/lib/LXRng/Lang/C.pm b/lib/LXRng/Lang/C.pm
index 60a571b..db3a204 100644
--- a/lib/LXRng/Lang/C.pm
+++ b/lib/LXRng/Lang/C.pm
@@ -51,13 +51,13 @@ sub identifier_re {
return $_identifier_re;
}
-my $_reserved ||= { map { $_ => 1 }
- qw(asm auto break case char const continue default
- do double else enum extern float for fortran
- goto if int long register return short signed
- sizeof static struct switch typedef union
- unsigned void volatile while #define #else
- #endif #if #ifdef #ifndef #include #undef)};
+my $_reserved = { map { $_ => 1 }
+ qw(asm auto break case char const continue default
+ do double else enum extern float for fortran goto
+ if int long register return short signed sizeof
+ static struct switch typedef union unsigned void
+ volatile while #define #else #endif #if #ifdef
+ #ifndef #include #undef)};
sub reserved {
return $_reserved;
@@ -73,31 +73,10 @@ sub parsespec {
'include', '#\s*include\s+<', '>'];
}
-sub typemap {
- return {
- 'c' => 'class',
- 'd' => 'macro (un)definition',
- 'e' => 'enumerator',
- 'f' => 'function definition',
- 'g' => 'enumeration name',
- 'm' => 'class, struct, or union member',
- 'n' => 'namespace',
- 'p' => 'function prototype or declaration',
- 's' => 'structure name',
- 't' => 'typedef',
- 'u' => 'union name',
- 'v' => 'variable definition',
- 'x' => 'extern or forward variable declaration',
- 'i' => 'interface'};
-}
-
sub markuphandlers {
my ($self, $context, $node, $markup) = @_;
my $index = $context->config->{'index'};
- my $idre = $self->identifier_re();
- my $res = $self->reserved();
-
my %subst;
my $format_newline = $markup->make_format_newline($node);
@@ -121,7 +100,7 @@ sub markuphandlers {
$subst{'code'} = new Subst::Complex
qr/\n/ => $format_newline,
- qr/[^\n]*/ => sub { $markup->format_code($idre, $res, @_) };
+ qr/[^\n]*/ => sub { $markup->format_code($self, @_) };
$subst{'start'} = new Subst::Complex
qr/^/ => $format_newline;
diff --git a/lib/LXRng/Lang/Generic.pm b/lib/LXRng/Lang/Generic.pm
index 82e9b9d..3e1f545 100644
--- a/lib/LXRng/Lang/Generic.pm
+++ b/lib/LXRng/Lang/Generic.pm
@@ -40,4 +40,9 @@ sub expand_include {
return ();
}
+sub mangle_sym {
+ my ($self, $sym) = @_;
+ return $sym;
+}
+
1;
diff --git a/lib/LXRng/Lang/GnuAsm.pm b/lib/LXRng/Lang/GnuAsm.pm
new file mode 100644
index 0000000..acdcdef
--- /dev/null
+++ b/lib/LXRng/Lang/GnuAsm.pm
@@ -0,0 +1,164 @@
+# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# The full GNU General Public License is included in this distribution
+# in the file called COPYING.
+
+package LXRng::Lang::GnuAsm;
+
+use strict;
+use Subst::Complex;
+
+use base qw(LXRng::Lang::Generic);
+
+
+sub doindex {
+ return 1;
+}
+
+sub ctagslangname {
+ return 'asm';
+}
+
+sub ctagsopts {
+ return ();
+}
+
+sub pathexp {
+ return qr/\.[sS]$/;
+}
+
+my $_identifier_re = qr(
+ (?m:^|(?<=[^a-zA-Z0-9_\#])) # Non-symbol chars.
+ (_*[a-zA-Z][a-zA-Z0-9_]*) # The symbol.
+ \b
+ )x;
+
+sub identifier_re {
+ return $_identifier_re;
+}
+
+my $_reserved ||= { map { $_ => 1 }
+ (qw(aaa aad aam aas adc bound bsf bsr bswap btc
+ btr call cbw cwde cdqe cwd cdq cqo clc cld
+ clflush cmc cmps cmpsb cmpsw cmpsd cmpsq
+ cmpxchg cmpxchg8b cmpxchg16b cpuid daa das
+ enter ins insb insw insd int into jcxz jecxz
+ jrcxz jmp lahf lds les lfs lgs lss leave lfence
+ lock lods lodsb lodsw lodsd lodsq loop loope
+ loopne loopnz loopz mfence movd movmskpd
+ movmskps movnti movs movsb movsw movsd movsq
+ movsx movsxd movzx nop outs outsb outsw outsd
+ pause popa popad prefetch prefetchw pusha
+ pushad pushfd pushfq ret sahf sbb scas scasb
+ scasw scasd scasq sfence stc std stos stosb
+ stosw stosd stosq xadd xchg xlat xlatb arpl
+ clgi cli clts hlt int invd invlpg invlpga iret
+ iretd iretq lar lgdt lidt lldt lmsw lretq lsl
+ ltr rep rdmsr rdpmc rdtsc rdtscp rsm sgdt sidt
+ skinit sldt smsw sti stgi str swapgs syscall
+ sysenter sysexit sysret ud2 verr verw vmload
+ vmmcall vmrun vmsave wbinvd wrmsr),
+
+ (map { $_, $_.'b', $_.'w', $_.'l', $_.'q' }
+ qw(add and mov bt bts cmp dec div idiv imul inc
+ in lea mul neg not or out pop popf push pushf
+ rcl rcr rol ror sal shl sar shl shr sub test
+ xor)),
+
+ (map { 'cmov'.$_, 'j'.$_, 'set'.$_ }
+ qw(o no b c nae nb nc ae z e nz ne be na nbe a s
+ ns p pe np po l nge nl ge le ng nle g))
+ )};
+
+
+sub reserved {
+ return $_reserved;
+}
+
+sub parsespec {
+ return ['atom', '\\\\.', undef,
+ 'atom', '%[a-z][a-z0-9]+', undef, # Registers
+ 'atom', '[.][a-z0-9]+', undef, # Directives
+ 'comment', '/\*', '\*/',
+ 'comment', '//', "\$",
+ 'string', '"', '"',
+ 'string', "'", "'",
+ 'atom', '#\s*(?:ifn?def|define|else|endif|undef)', undef,
+ 'include', '#\s*include\s+"', '"',
+ 'include', '#\s*include\s+<', '>',
+ 'comment', '#', "\$"];
+}
+
+sub markuphandlers {
+ my ($self, $context, $node, $markup) = @_;
+
+ my $index = $context->config->{'index'};
+ my %subst;
+
+ my $format_newline = $markup->make_format_newline($node);
+ $subst{'comment'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n]+/ => sub { $markup->format_comment(@_) };
+
+ $subst{'string'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n\"\']+/ => sub { $markup->format_string(@_) };
+
+ $subst{'include'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/(include\s*\")(.*?)(\")/ => sub {
+ $markup->format_include([$self->resolve_include($context, $node, @_)],
+ @_) },
+
+ qr/(include\s*\<)(.*?)(\>)/ => sub {
+ $markup->format_include([$self->resolve_include($context, $node, @_)],
+ @_) };
+
+ $subst{'code'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n]*/ => sub { $markup->format_code($self, @_) };
+
+ $subst{'start'} = new Subst::Complex
+ qr/^/ => $format_newline;
+
+ return \%subst;
+}
+
+sub resolve_include {
+ my ($self, $context, $node, $frag) = @_;
+
+ if ($frag =~ /include\s+<(.*?)>/) {
+ return $self->expand_include($context, $node, $1);
+ }
+ elsif ($frag =~ /include\s+\"(.*?)\"/) {
+ my $incl = $1;
+ my $bare = $1;
+ my $name = $node->name();
+ if ($name =~ /(.*\/)/) {
+ $incl = $1.$incl;
+ 1 while $incl =~ s,/[^/]+/../,/,;
+
+ my $file = $context->config->{'repository'}->node($incl, $context->release);
+ return $incl if $file;
+ return $self->expand_include($context, $node, $bare);
+ }
+ }
+
+ return ();
+}
+
+1;
diff --git a/lib/LXRng/Lang/Kconfig.pm b/lib/LXRng/Lang/Kconfig.pm
new file mode 100644
index 0000000..7417236
--- /dev/null
+++ b/lib/LXRng/Lang/Kconfig.pm
@@ -0,0 +1,118 @@
+# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# The full GNU General Public License is included in this distribution
+# in the file called COPYING.
+
+package LXRng::Lang::Kconfig;
+
+use strict;
+use Subst::Complex;
+
+use base qw(LXRng::Lang::Generic);
+
+
+sub doindex {
+ return 1;
+}
+
+sub ctagslangname {
+ return undef;
+}
+
+sub pathexp {
+ return qr/Kconfig$/;
+}
+
+my $_identifier_re = qr(
+ (?m:^|(?<=[^A-Z0-9_\#])) # Non-symbol chars.
+ (_*[A-Z][A-Z0-9_]*) # The symbol.
+ \b
+ )x;
+
+sub identifier_re {
+ return $_identifier_re;
+}
+
+my $_reserved = { map { $_ => 1 }
+ qw(menu source endmenu config bool if default help
+ tristate depends on y n m)};
+
+sub reserved {
+ return $_reserved;
+}
+
+sub parsespec {
+ return ['atom', '\\\\.', undef,
+ 'comment', '#', "\$",
+ 'string', '"', '"',
+ 'string', "'", "'",
+ 'help', 'help', "^(?=[^ \t\n])",
+ 'include', '^source\s+"', '"'];
+}
+
+sub mangle_sym {
+ return $_[1] =~ /^[A-Z0-9_]+$/ ? 'CONFIG_'.$_[1] : $_[1];
+}
+
+sub markuphandlers {
+ my ($self, $context, $node, $markup) = @_;
+
+ my $index = $context->config->{'index'};
+ my %subst;
+
+ my $format_newline = $markup->make_format_newline($node);
+ $subst{'comment'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n]+/ => sub { $markup->format_comment(@_) };
+
+ $subst{'help'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/^[ \t]*help[ \t]*/ => sub { $markup->format_code($self, @_) },
+ qr/[^\n\"\']+/ => sub { $markup->format_string(@_) };
+
+ $subst{'string'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n\"\']+/ => sub { $markup->format_string(@_) };
+
+ $subst{'include'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/(include\s*\")(.*?)(\")/ => sub {
+ $markup->format_include([$self->resolve_include($context, $node, @_)],
+ @_) },
+
+ qr/(include\s*\<)(.*?)(\>)/ => sub {
+ $markup->format_include([$self->resolve_include($context, $node, @_)],
+ @_) };
+
+ $subst{'code'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n]*/ => sub { $markup->format_code($self, @_) };
+
+ $subst{'start'} = new Subst::Complex
+ qr/^/ => $format_newline;
+
+ return \%subst;
+}
+
+sub resolve_include {
+ my ($self, $context, $node, $frag) = @_;
+
+ return ();
+}
+
+1;
+
diff --git a/lib/LXRng/Lang/Undefined.pm b/lib/LXRng/Lang/Undefined.pm
index c7d5d09..0989bdb 100644
--- a/lib/LXRng/Lang/Undefined.pm
+++ b/lib/LXRng/Lang/Undefined.pm
@@ -41,10 +41,6 @@ sub parsespec {
return ['atom', '\\\\.', undef];
}
-sub typemap {
- return {};
-}
-
sub markuphandlers {
my ($self, $context, $node, $markup) = @_;
diff --git a/lib/LXRng/Markup/File.pm b/lib/LXRng/Markup/File.pm
index c3d576c..054463a 100644
--- a/lib/LXRng/Markup/File.pm
+++ b/lib/LXRng/Markup/File.pm
@@ -92,18 +92,23 @@ sub format_include {
}
sub format_code {
- my ($self, $idre, $res, $frag) = @_;
+ my ($self, $lang, $frag) = @_;
my $tree = $self->context->vtree();
my $path = $self->context->path();
+ my $idre = $lang->identifier_re();
+ my $res = $lang->reserved();
$frag =~ s{(.*?)$idre|(.+)}{
if ($2) {
unless (exists($$res{$2})) {
my $pre = $1;
- my $sym = safe_html($2);
+ my $sym = $2;
+ my $ref = safe_html($lang->mangle_sym($sym));
+ $sym = safe_html($sym);
+
safe_html($pre).
- qq{<a href="+code=$sym" class="sref">$sym</a>};
+ qq{<a href="+code=$ref" class="sref">$sym</a>};
}
else {
safe_html($1.$2);
@@ -129,7 +134,7 @@ sub markupfile {
my ($self, $subst, $parse) = @_;
my ($btype, $frag) = $parse->nextfrag;
-
+
return () unless defined $frag;
$btype ||= 'code';
diff --git a/lib/LXRng/Parse/Simple.pm b/lib/LXRng/Parse/Simple.pm
index 215ce5b..d89ea22 100644
--- a/lib/LXRng/Parse/Simple.pm
+++ b/lib/LXRng/Parse/Simple.pm
@@ -38,6 +38,8 @@ sub new {
'fileh' => $fileh, # File handle
'tabwidth' => $tabhint||8, # Tab width
'frags' => [], # Fragments in queue
+ 'pref' => '',
+ 'rest' => '',
'bodyid' => \@bodyid, # Array of body type ids
'bofseen' => 0, # Beginning-of-file seen?
'term' => \@term,
@@ -63,81 +65,66 @@ sub untabify {
sub nextfrag {
my ($self) = @_;
- my $btype = undef;
- my $frag = undef;
- my $line = '';
-
+ my $btype;
while (1) {
- # read one more line if we have processed
- # all of the previously read line
- if (@{$$self{'frags'}} == 0) {
- $line = $$self{'fileh'}->getline;
-
- if ($. <= 2 &&
- $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) {
- # make sure there really is a non-zero tabwidth
- $$self{'tabwidth'} = $1 if $1 > 0;
+ if (defined $btype) {
+ if ($$self{'rest'} =~ s/\A((?s:.*?)$$self{'term'}[$btype])//m) {
+ my $ret = $$self{'pref'}.$1;
+ $$self{'pref'} = '';
+ return ($$self{'bodyid'}[$btype], $ret);
}
-
- if(defined($line)) {
- untabify($line, $$self{'tabwidth'});
+ else {
+ $$self{'pref'} .= $$self{'rest'};
+ $$self{'rest'} = '';
+ }
+ }
+ else {
+ if ($$self{'rest'} =~ s/\A((?s).*?)($$self{'open'})//m) {
+ my $pref = $1;
+ my $frag = $2;
- # split the line into fragments
- $$self{'frags'} = [split(/($$self{'split'})/, $line)];
+ if ($pref ne '') {
+ $$self{'rest'} = $frag.$$self{'rest'};
+ return ('', $pref);
+ }
+
+ $btype = 3;
+ $btype++ while $btype < $#- and !defined($-[$btype]);
+ $btype -= 3;
+
+ if (!defined($$self{'term'}[$btype])) {
+ # Opening regexp captures entire block.
+ return ($$self{'bodyid'}[$btype], $frag);
+ }
+ $$self{'pref'} = $frag;
}
}
- last if @{$$self{'frags'}} == 0;
+ my $line = $$self{'fileh'}->getline;
+ unless (defined $line) {
+ my $ret = $$self{'pref'}.$$self{'rest'};
+ $$self{'pref'} = '';
+ $$self{'rest'} = '';
+ undef($ret) unless length($ret) > 0;
+
+ return (defined($btype) ? $$self{'bodyid'}[$btype] : '', $ret);
+ }
+
+ if ($. <= 2 &&
+ $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) {
+ # make sure there really is a non-zero tabwidth
+ $$self{'tabwidth'} = $1 if $1 > 0;
+ }
+
+ untabify($line, $$self{'tabwidth'});
+ $$self{'rest'} .= $line;
unless ($$self{'bofseen'}) {
# return start marker if file has contents
$$self{'bofseen'} = 1;
return ('start', '');
}
-
- # skip empty fragments
- if ($$self{'frags'}[0] eq '') {
- shift(@{$$self{'frags'}});
- }
-
- # check if we are inside a fragment
- if (defined($frag)) {
- if (defined($btype)) {
- my $next = shift(@{$$self{'frags'}});
-
- # Add to the fragment
- $frag .= $next;
- # We are done if this was the terminator
- last if $next =~ /^$$self{'term'}[$btype]$/;
-
- }
- else {
- if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) {
- last;
- }
- $frag .= shift(@{$$self{'frags'}});
- }
- }
- else {
- # Find the blocktype of the current block
- $frag = shift(@{$$self{'frags'}});
- if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) {
- # grep in a scalar context returns the number of times
- # EXPR evaluates to true, which is this case will be
- # the index of the first defined element in @_.
-
- my $i = 1;
- $btype = grep { $i &&= !defined($_) } @_;
- if(!defined($$self{'term'}[$btype])) {
- # Opening regexp captures entire block.
- last;
- }
- }
- }
}
- $btype = $$self{'bodyid'}[$btype] if defined($btype);
-
- return ($btype, $frag);
}
1;
diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm
index 03db5b8..014d57a 100644
--- a/lib/LXRng/Search/Xapian.pm
+++ b/lib/LXRng/Search/Xapian.pm
@@ -23,6 +23,13 @@ use strict;
use Search::Xapian qw/:ops :db :qpstem/;
use Search::Xapian::QueryParser;
+our @STOPWORDS = qw(our ours you your yours him his she her hers they
+ them their theirs what which who whom this that
+ these those are was were been being have has had
+ having does did doing would should could the and
+ but for with all any);
+our %STOPWORD = map { $_ => 1 } @STOPWORDS;
+
sub new {
my ($class, $db_root) = @_;
@@ -31,7 +38,7 @@ sub new {
my $self = bless({'db_root' => $db_root,
'writes' => 0},
$class);
-
+
return $self;
}
@@ -100,6 +107,30 @@ sub add_release {
return $changes;
}
+sub make_add_text {
+ my ($index, $doc) = @_;
+
+ return sub {
+ my ($pos, $text) = @_;
+
+ foreach my $term ($text =~ /(_*\w[\w_]*)/g) {
+ $term = lc($term);
+ next if length($term) <= 2;
+ next if length($term) > 128;
+ next if $STOPWORD{$term};
+
+ $doc->add_posting($term, $pos++);
+ if ($term =~ /_/) {
+ foreach my $subt ($term =~ /([^_]+)/g) {
+ next if length($subt) <= 2;
+ next if $STOPWORD{$subt};
+ $doc->add_posting($subt, $pos++);
+ }
+ }
+ };
+ }
+}
+
sub flush {
my ($self) = @_;