aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Georg Gleditsch <argggh@taniquetil.(none)>2009-03-06 21:13:18 +0100
committerArne Georg Gleditsch <argggh@taniquetil.(none)>2009-03-06 21:13:18 +0100
commit8d7d1d2595c2c3475ec27d07d8544e345dab5851 (patch)
tree6337fd7aa489a73c44abb742b2bac22741b00779
parenta25f8442450d7e9fb188a979ab0897cb86e9d84f (diff)
Added more language modules, threw things about a bit.
-rw-r--r--INSTALL4
-rw-r--r--lib/LXRng/Index/DBI.pm16
-rw-r--r--lib/LXRng/Index/Pg.pm2
-rw-r--r--lib/LXRng/Lang/C.pm37
-rw-r--r--lib/LXRng/Lang/Generic.pm5
-rw-r--r--lib/LXRng/Lang/GnuAsm.pm164
-rw-r--r--lib/LXRng/Lang/Kconfig.pm118
-rw-r--r--lib/LXRng/Lang/Undefined.pm4
-rw-r--r--lib/LXRng/Markup/File.pm13
-rw-r--r--lib/LXRng/Parse/Simple.pm111
-rw-r--r--lib/LXRng/Search/Xapian.pm33
-rwxr-xr-xlxr-genxref37
-rw-r--r--lxrng.conf-dist8
13 files changed, 433 insertions, 119 deletions
diff --git a/INSTALL b/INSTALL
index 7c18f11..f82d3e6 100644
--- a/INSTALL
+++ b/INSTALL
@@ -20,6 +20,10 @@ DEPENDENCIES
- HTML::Entities [libhtml-parser-perl]
- Template [libtemplate-perl]
+* Misc Perl modules
+ - Devel::Size [libdevel-size-perl]
+ - Term::ProgressBar [libterm-progressbar-perl]
+
* (For gzip content transfer compression: PerlIO::gzip)
* "Exuberant ctags", runnable as ctags-exuberant somewhere in the
diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm
index 763f534..55e215d 100644
--- a/lib/LXRng/Index/DBI.pm
+++ b/lib/LXRng/Index/DBI.pm
@@ -520,6 +520,22 @@ sub get_identifier_info {
\%reflines);
}
+sub set_rfile_charset {
+ my ($self, $rfile_id, $charset) = @_;
+
+ my $dbh = $self->dbh;
+ my $pre = $self->prefix;
+ my $sth = $$self{'sth'}{'set_rfile_charset'} ||=
+ $dbh->prepare(qq{
+ update ${pre}revisions
+ set body_charset = (select id from ${pre}charsets
+ where name = ?)
+ where id = ?});
+
+ return $sth->execute($charset, $rfile_id);
+}
+
+
sub get_rfile_timestamp {
my ($self, $rfile_id) = @_;
diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm
index 3654a72..7387abc 100644
--- a/lib/LXRng/Index/Pg.pm
+++ b/lib/LXRng/Index/Pg.pm
@@ -67,7 +67,7 @@ sub init_db {
or die($dbh->errstr);
$dbh->do(qq{insert into ${pre}charsets(name) values ('utf-8')})
or die($dbh->errstr);
- $dbh->do(qq{insert into ${pre}charsets(name) values ('iso8859-1')})
+ $dbh->do(qq{insert into ${pre}charsets(name) values ('iso-8859-1')})
or die($dbh->errstr);
$dbh->do(qq{
diff --git a/lib/LXRng/Lang/C.pm b/lib/LXRng/Lang/C.pm
index 60a571b..db3a204 100644
--- a/lib/LXRng/Lang/C.pm
+++ b/lib/LXRng/Lang/C.pm
@@ -51,13 +51,13 @@ sub identifier_re {
return $_identifier_re;
}
-my $_reserved ||= { map { $_ => 1 }
- qw(asm auto break case char const continue default
- do double else enum extern float for fortran
- goto if int long register return short signed
- sizeof static struct switch typedef union
- unsigned void volatile while #define #else
- #endif #if #ifdef #ifndef #include #undef)};
+my $_reserved = { map { $_ => 1 }
+ qw(asm auto break case char const continue default
+ do double else enum extern float for fortran goto
+ if int long register return short signed sizeof
+ static struct switch typedef union unsigned void
+ volatile while #define #else #endif #if #ifdef
+ #ifndef #include #undef)};
sub reserved {
return $_reserved;
@@ -73,31 +73,10 @@ sub parsespec {
'include', '#\s*include\s+<', '>'];
}
-sub typemap {
- return {
- 'c' => 'class',
- 'd' => 'macro (un)definition',
- 'e' => 'enumerator',
- 'f' => 'function definition',
- 'g' => 'enumeration name',
- 'm' => 'class, struct, or union member',
- 'n' => 'namespace',
- 'p' => 'function prototype or declaration',
- 's' => 'structure name',
- 't' => 'typedef',
- 'u' => 'union name',
- 'v' => 'variable definition',
- 'x' => 'extern or forward variable declaration',
- 'i' => 'interface'};
-}
-
sub markuphandlers {
my ($self, $context, $node, $markup) = @_;
my $index = $context->config->{'index'};
- my $idre = $self->identifier_re();
- my $res = $self->reserved();
-
my %subst;
my $format_newline = $markup->make_format_newline($node);
@@ -121,7 +100,7 @@ sub markuphandlers {
$subst{'code'} = new Subst::Complex
qr/\n/ => $format_newline,
- qr/[^\n]*/ => sub { $markup->format_code($idre, $res, @_) };
+ qr/[^\n]*/ => sub { $markup->format_code($self, @_) };
$subst{'start'} = new Subst::Complex
qr/^/ => $format_newline;
diff --git a/lib/LXRng/Lang/Generic.pm b/lib/LXRng/Lang/Generic.pm
index 82e9b9d..3e1f545 100644
--- a/lib/LXRng/Lang/Generic.pm
+++ b/lib/LXRng/Lang/Generic.pm
@@ -40,4 +40,9 @@ sub expand_include {
return ();
}
+sub mangle_sym {
+ my ($self, $sym) = @_;
+ return $sym;
+}
+
1;
diff --git a/lib/LXRng/Lang/GnuAsm.pm b/lib/LXRng/Lang/GnuAsm.pm
new file mode 100644
index 0000000..acdcdef
--- /dev/null
+++ b/lib/LXRng/Lang/GnuAsm.pm
@@ -0,0 +1,164 @@
+# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# The full GNU General Public License is included in this distribution
+# in the file called COPYING.
+
+package LXRng::Lang::GnuAsm;
+
+use strict;
+use Subst::Complex;
+
+use base qw(LXRng::Lang::Generic);
+
+
+sub doindex {
+ return 1;
+}
+
+sub ctagslangname {
+ return 'asm';
+}
+
+sub ctagsopts {
+ return ();
+}
+
+sub pathexp {
+ return qr/\.[sS]$/;
+}
+
+my $_identifier_re = qr(
+ (?m:^|(?<=[^a-zA-Z0-9_\#])) # Non-symbol chars.
+ (_*[a-zA-Z][a-zA-Z0-9_]*) # The symbol.
+ \b
+ )x;
+
+sub identifier_re {
+ return $_identifier_re;
+}
+
+my $_reserved ||= { map { $_ => 1 }
+ (qw(aaa aad aam aas adc bound bsf bsr bswap btc
+ btr call cbw cwde cdqe cwd cdq cqo clc cld
+ clflush cmc cmps cmpsb cmpsw cmpsd cmpsq
+ cmpxchg cmpxchg8b cmpxchg16b cpuid daa das
+ enter ins insb insw insd int into jcxz jecxz
+ jrcxz jmp lahf lds les lfs lgs lss leave lfence
+ lock lods lodsb lodsw lodsd lodsq loop loope
+ loopne loopnz loopz mfence movd movmskpd
+ movmskps movnti movs movsb movsw movsd movsq
+ movsx movsxd movzx nop outs outsb outsw outsd
+ pause popa popad prefetch prefetchw pusha
+ pushad pushfd pushfq ret sahf sbb scas scasb
+ scasw scasd scasq sfence stc std stos stosb
+ stosw stosd stosq xadd xchg xlat xlatb arpl
+ clgi cli clts hlt int invd invlpg invlpga iret
+ iretd iretq lar lgdt lidt lldt lmsw lretq lsl
+ ltr rep rdmsr rdpmc rdtsc rdtscp rsm sgdt sidt
+ skinit sldt smsw sti stgi str swapgs syscall
+ sysenter sysexit sysret ud2 verr verw vmload
+ vmmcall vmrun vmsave wbinvd wrmsr),
+
+ (map { $_, $_.'b', $_.'w', $_.'l', $_.'q' }
+ qw(add and mov bt bts cmp dec div idiv imul inc
+ in lea mul neg not or out pop popf push pushf
+ rcl rcr rol ror sal shl sar shl shr sub test
+ xor)),
+
+ (map { 'cmov'.$_, 'j'.$_, 'set'.$_ }
+ qw(o no b c nae nb nc ae z e nz ne be na nbe a s
+ ns p pe np po l nge nl ge le ng nle g))
+ )};
+
+
+sub reserved {
+ return $_reserved;
+}
+
+sub parsespec {
+ return ['atom', '\\\\.', undef,
+ 'atom', '%[a-z][a-z0-9]+', undef, # Registers
+ 'atom', '[.][a-z0-9]+', undef, # Directives
+ 'comment', '/\*', '\*/',
+ 'comment', '//', "\$",
+ 'string', '"', '"',
+ 'string', "'", "'",
+ 'atom', '#\s*(?:ifn?def|define|else|endif|undef)', undef,
+ 'include', '#\s*include\s+"', '"',
+ 'include', '#\s*include\s+<', '>',
+ 'comment', '#', "\$"];
+}
+
+sub markuphandlers {
+ my ($self, $context, $node, $markup) = @_;
+
+ my $index = $context->config->{'index'};
+ my %subst;
+
+ my $format_newline = $markup->make_format_newline($node);
+ $subst{'comment'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n]+/ => sub { $markup->format_comment(@_) };
+
+ $subst{'string'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n\"\']+/ => sub { $markup->format_string(@_) };
+
+ $subst{'include'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/(include\s*\")(.*?)(\")/ => sub {
+ $markup->format_include([$self->resolve_include($context, $node, @_)],
+ @_) },
+
+ qr/(include\s*\<)(.*?)(\>)/ => sub {
+ $markup->format_include([$self->resolve_include($context, $node, @_)],
+ @_) };
+
+ $subst{'code'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n]*/ => sub { $markup->format_code($self, @_) };
+
+ $subst{'start'} = new Subst::Complex
+ qr/^/ => $format_newline;
+
+ return \%subst;
+}
+
+sub resolve_include {
+ my ($self, $context, $node, $frag) = @_;
+
+ if ($frag =~ /include\s+<(.*?)>/) {
+ return $self->expand_include($context, $node, $1);
+ }
+ elsif ($frag =~ /include\s+\"(.*?)\"/) {
+ my $incl = $1;
+ my $bare = $1;
+ my $name = $node->name();
+ if ($name =~ /(.*\/)/) {
+ $incl = $1.$incl;
+ 1 while $incl =~ s,/[^/]+/../,/,;
+
+ my $file = $context->config->{'repository'}->node($incl, $context->release);
+ return $incl if $file;
+ return $self->expand_include($context, $node, $bare);
+ }
+ }
+
+ return ();
+}
+
+1;
diff --git a/lib/LXRng/Lang/Kconfig.pm b/lib/LXRng/Lang/Kconfig.pm
new file mode 100644
index 0000000..7417236
--- /dev/null
+++ b/lib/LXRng/Lang/Kconfig.pm
@@ -0,0 +1,118 @@
+# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# The full GNU General Public License is included in this distribution
+# in the file called COPYING.
+
+package LXRng::Lang::Kconfig;
+
+use strict;
+use Subst::Complex;
+
+use base qw(LXRng::Lang::Generic);
+
+
+sub doindex {
+ return 1;
+}
+
+sub ctagslangname {
+ return undef;
+}
+
+sub pathexp {
+ return qr/Kconfig$/;
+}
+
+my $_identifier_re = qr(
+ (?m:^|(?<=[^A-Z0-9_\#])) # Non-symbol chars.
+ (_*[A-Z][A-Z0-9_]*) # The symbol.
+ \b
+ )x;
+
+sub identifier_re {
+ return $_identifier_re;
+}
+
+my $_reserved = { map { $_ => 1 }
+ qw(menu source endmenu config bool if default help
+ tristate depends on y n m)};
+
+sub reserved {
+ return $_reserved;
+}
+
+sub parsespec {
+ return ['atom', '\\\\.', undef,
+ 'comment', '#', "\$",
+ 'string', '"', '"',
+ 'string', "'", "'",
+ 'help', 'help', "^(?=[^ \t\n])",
+ 'include', '^source\s+"', '"'];
+}
+
+sub mangle_sym {
+ return $_[1] =~ /^[A-Z0-9_]+$/ ? 'CONFIG_'.$_[1] : $_[1];
+}
+
+sub markuphandlers {
+ my ($self, $context, $node, $markup) = @_;
+
+ my $index = $context->config->{'index'};
+ my %subst;
+
+ my $format_newline = $markup->make_format_newline($node);
+ $subst{'comment'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n]+/ => sub { $markup->format_comment(@_) };
+
+ $subst{'help'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/^[ \t]*help[ \t]*/ => sub { $markup->format_code($self, @_) },
+ qr/[^\n\"\']+/ => sub { $markup->format_string(@_) };
+
+ $subst{'string'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n\"\']+/ => sub { $markup->format_string(@_) };
+
+ $subst{'include'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/(include\s*\")(.*?)(\")/ => sub {
+ $markup->format_include([$self->resolve_include($context, $node, @_)],
+ @_) },
+
+ qr/(include\s*\<)(.*?)(\>)/ => sub {
+ $markup->format_include([$self->resolve_include($context, $node, @_)],
+ @_) };
+
+ $subst{'code'} = new Subst::Complex
+ qr/\n/ => $format_newline,
+ qr/[^\n]*/ => sub { $markup->format_code($self, @_) };
+
+ $subst{'start'} = new Subst::Complex
+ qr/^/ => $format_newline;
+
+ return \%subst;
+}
+
+sub resolve_include {
+ my ($self, $context, $node, $frag) = @_;
+
+ return ();
+}
+
+1;
+
diff --git a/lib/LXRng/Lang/Undefined.pm b/lib/LXRng/Lang/Undefined.pm
index c7d5d09..0989bdb 100644
--- a/lib/LXRng/Lang/Undefined.pm
+++ b/lib/LXRng/Lang/Undefined.pm
@@ -41,10 +41,6 @@ sub parsespec {
return ['atom', '\\\\.', undef];
}
-sub typemap {
- return {};
-}
-
sub markuphandlers {
my ($self, $context, $node, $markup) = @_;
diff --git a/lib/LXRng/Markup/File.pm b/lib/LXRng/Markup/File.pm
index c3d576c..054463a 100644
--- a/lib/LXRng/Markup/File.pm
+++ b/lib/LXRng/Markup/File.pm
@@ -92,18 +92,23 @@ sub format_include {
}
sub format_code {
- my ($self, $idre, $res, $frag) = @_;
+ my ($self, $lang, $frag) = @_;
my $tree = $self->context->vtree();
my $path = $self->context->path();
+ my $idre = $lang->identifier_re();
+ my $res = $lang->reserved();
$frag =~ s{(.*?)$idre|(.+)}{
if ($2) {
unless (exists($$res{$2})) {
my $pre = $1;
- my $sym = safe_html($2);
+ my $sym = $2;
+ my $ref = safe_html($lang->mangle_sym($sym));
+ $sym = safe_html($sym);
+
safe_html($pre).
- qq{<a href="+code=$sym" class="sref">$sym</a>};
+ qq{<a href="+code=$ref" class="sref">$sym</a>};
}
else {
safe_html($1.$2);
@@ -129,7 +134,7 @@ sub markupfile {
my ($self, $subst, $parse) = @_;
my ($btype, $frag) = $parse->nextfrag;
-
+
return () unless defined $frag;
$btype ||= 'code';
diff --git a/lib/LXRng/Parse/Simple.pm b/lib/LXRng/Parse/Simple.pm
index 215ce5b..d89ea22 100644
--- a/lib/LXRng/Parse/Simple.pm
+++ b/lib/LXRng/Parse/Simple.pm
@@ -38,6 +38,8 @@ sub new {
'fileh' => $fileh, # File handle
'tabwidth' => $tabhint||8, # Tab width
'frags' => [], # Fragments in queue
+ 'pref' => '',
+ 'rest' => '',
'bodyid' => \@bodyid, # Array of body type ids
'bofseen' => 0, # Beginning-of-file seen?
'term' => \@term,
@@ -63,81 +65,66 @@ sub untabify {
sub nextfrag {
my ($self) = @_;
- my $btype = undef;
- my $frag = undef;
- my $line = '';
-
+ my $btype;
while (1) {
- # read one more line if we have processed
- # all of the previously read line
- if (@{$$self{'frags'}} == 0) {
- $line = $$self{'fileh'}->getline;
-
- if ($. <= 2 &&
- $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) {
- # make sure there really is a non-zero tabwidth
- $$self{'tabwidth'} = $1 if $1 > 0;
+ if (defined $btype) {
+ if ($$self{'rest'} =~ s/\A((?s:.*?)$$self{'term'}[$btype])//m) {
+ my $ret = $$self{'pref'}.$1;
+ $$self{'pref'} = '';
+ return ($$self{'bodyid'}[$btype], $ret);
}
-
- if(defined($line)) {
- untabify($line, $$self{'tabwidth'});
+ else {
+ $$self{'pref'} .= $$self{'rest'};
+ $$self{'rest'} = '';
+ }
+ }
+ else {
+ if ($$self{'rest'} =~ s/\A((?s).*?)($$self{'open'})//m) {
+ my $pref = $1;
+ my $frag = $2;
- # split the line into fragments
- $$self{'frags'} = [split(/($$self{'split'})/, $line)];
+ if ($pref ne '') {
+ $$self{'rest'} = $frag.$$self{'rest'};
+ return ('', $pref);
+ }
+
+ $btype = 3;
+ $btype++ while $btype < $#- and !defined($-[$btype]);
+ $btype -= 3;
+
+ if (!defined($$self{'term'}[$btype])) {
+ # Opening regexp captures entire block.
+ return ($$self{'bodyid'}[$btype], $frag);
+ }
+ $$self{'pref'} = $frag;
}
}
- last if @{$$self{'frags'}} == 0;
+ my $line = $$self{'fileh'}->getline;
+ unless (defined $line) {
+ my $ret = $$self{'pref'}.$$self{'rest'};
+ $$self{'pref'} = '';
+ $$self{'rest'} = '';
+ undef($ret) unless length($ret) > 0;
+
+ return (defined($btype) ? $$self{'bodyid'}[$btype] : '', $ret);
+ }
+
+ if ($. <= 2 &&
+ $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) {
+ # make sure there really is a non-zero tabwidth
+ $$self{'tabwidth'} = $1 if $1 > 0;
+ }
+
+ untabify($line, $$self{'tabwidth'});
+ $$self{'rest'} .= $line;
unless ($$self{'bofseen'}) {
# return start marker if file has contents
$$self{'bofseen'} = 1;
return ('start', '');
}
-
- # skip empty fragments
- if ($$self{'frags'}[0] eq '') {
- shift(@{$$self{'frags'}});
- }
-
- # check if we are inside a fragment
- if (defined($frag)) {
- if (defined($btype)) {
- my $next = shift(@{$$self{'frags'}});
-
- # Add to the fragment
- $frag .= $next;
- # We are done if this was the terminator
- last if $next =~ /^$$self{'term'}[$btype]$/;
-
- }
- else {
- if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) {
- last;
- }
- $frag .= shift(@{$$self{'frags'}});
- }
- }
- else {
- # Find the blocktype of the current block
- $frag = shift(@{$$self{'frags'}});
- if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) {
- # grep in a scalar context returns the number of times
- # EXPR evaluates to true, which is this case will be
- # the index of the first defined element in @_.
-
- my $i = 1;
- $btype = grep { $i &&= !defined($_) } @_;
- if(!defined($$self{'term'}[$btype])) {
- # Opening regexp captures entire block.
- last;
- }
- }
- }
}
- $btype = $$self{'bodyid'}[$btype] if defined($btype);
-
- return ($btype, $frag);
}
1;
diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm
index 03db5b8..014d57a 100644
--- a/lib/LXRng/Search/Xapian.pm
+++ b/lib/LXRng/Search/Xapian.pm
@@ -23,6 +23,13 @@ use strict;
use Search::Xapian qw/:ops :db :qpstem/;
use Search::Xapian::QueryParser;
+our @STOPWORDS = qw(our ours you your yours him his she her hers they
+ them their theirs what which who whom this that
+ these those are was were been being have has had
+ having does did doing would should could the and
+ but for with all any);
+our %STOPWORD = map { $_ => 1 } @STOPWORDS;
+
sub new {
my ($class, $db_root) = @_;
@@ -31,7 +38,7 @@ sub new {
my $self = bless({'db_root' => $db_root,
'writes' => 0},
$class);
-
+
return $self;
}
@@ -100,6 +107,30 @@ sub add_release {
return $changes;
}
+sub make_add_text {
+ my ($index, $doc) = @_;
+
+ return sub {
+ my ($pos, $text) = @_;
+
+ foreach my $term ($text =~ /(_*\w[\w_]*)/g) {
+ $term = lc($term);
+ next if length($term) <= 2;
+ next if length($term) > 128;
+ next if $STOPWORD{$term};
+
+ $doc->add_posting($term, $pos++);
+ if ($term =~ /_/) {
+ foreach my $subt ($term =~ /([^_]+)/g) {
+ next if length($subt) <= 2;
+ next if $STOPWORD{$subt};
+ $doc->add_posting($subt, $pos++);
+ }
+ }
+ };
+ }
+}
+
sub flush {
my ($self) = @_;
diff --git a/lxr-genxref b/lxr-genxref
index 3173bdd..b390b35 100755
--- a/lxr-genxref
+++ b/lxr-genxref
@@ -37,6 +37,7 @@ use IO::Handle;
use Fcntl;
use Term::ProgressBar;
use Devel::Size qw(size total_size);
+use Encode;
$SIG{'INT'} = sub { die "\nSIGINT: $$: please wait, flushing caches...\n"; };
$SIG{'QUIT'} = sub { die "\nSIGQUIT: $$: please wait, flushing caches...\n"; };
@@ -51,6 +52,7 @@ my $tree = shift(@ARGV);
my @versions = @ARGV;
my $context = LXRng::Context->new('tree' => $tree);
+die "Usage: $0 <tree-id>\n" unless $context and $context->tree;
LXRng::Lang->init($context);
my $index = $context->config->{'index'};
@@ -87,7 +89,8 @@ sub make_add_ident($) {
$last_func = $symbol;
}
if ($$info{'kind'} eq 'l') {
- $$info{'context'} = $identcache{$last_func};
+ $$info{'context'} = $identcache{$last_func} if
+ defined($last_func);
}
if (exists $$info{'class'}) {
$$info{'context'} = $identcache{$$info{'class'}};
@@ -122,11 +125,12 @@ sub index_file($$) {
return 0 unless $index->to_index($fileid);
return 1 unless $lang->doindex();
+ return 1 unless $lang->ctagslangname();
my $add_ident = make_add_ident($fileid);
warn("--- indexing ".$file->name." [".$file->revision."]\n");
- my @extra_flags = ('-IEXPORT_SYMBOL+', '-I__initcall+');
+ my $extra_flags = $context->config->{'ctags_flags'} || [];
my $ctags;
my $pid = open($ctags, '-|');
@@ -134,7 +138,7 @@ sub index_file($$) {
if ($pid == 0) {
exec('ctags-exuberant',
- @extra_flags,
+ @$extra_flags,
'--fields=+aifmknsSz', '--sort=no',
'--excmd=number', '-f', '-',
'--language-force='.$lang->ctagslangname,
@@ -181,12 +185,7 @@ sub reference_file($$$) {
warn("--- referencing ".$file->name." [".$file->revision."]\n");
my $reserved = $lang->reserved();
-
- my $re = qr(
- (?m:^|[^a-zA-Z0-9_]) # Non-symbol chars.
- (_*[a-zA-Z][a-zA-Z0-9_]*) # The symbol.
- \b
- )x;
+ my $re = $lang->identifier_re();
my %refs;
my $line = 1;
@@ -199,7 +198,7 @@ sub reference_file($$$) {
while ($frag =~ /\G.*?(?:(\n)|$re)/gc) {
$line++ && next if defined $1;
- my $sym = $2;
+ my $sym = $lang->mangle_sym($2);
next if $$reserved{$sym};
push(@{$refs{$sym} ||= []}, $line);
@@ -236,20 +235,28 @@ sub hash_file($$$) {
sysopen($handle, $file->phys_path, 0) || die($!);
warn("--- hashing ".$file->name." [".$file->revision."]\n");
my $doc = $hash->new_document($file->name);
+ my $charset = $context->config->{'content_charset'} || [];
+ $charset = [ref($charset) eq 'ARRAY' ? @$charset : $charset];
+ push(@$charset, 'iso-8859-1'); # Fall back
+ my $add_line = $hash->make_add_text($doc);
+
while (<$handle>) {
my $pos = 0;
- # Latin-1 word characters.
- foreach my $term (/([0-9a-zA-Z\300-\326\330-\366\370-\377]+)/g) {
- $term = lc($term);
- next if length($term) > 128;
- $doc->add_posting($term, $.*100 + $pos++);
+ my $text;
+ while (@$charset) {
+ $text = eval { decode($$charset[0], $_, Encode::FB_CROAK); };
+ last unless $@;
+ shift(@$charset);
}
+
+ $add_line->($.*100, $text);
}
reference_file($file, $fileid, $doc);
$docid = $hash->add_document($doc, [map {
$index->release_id($tree, $_) } @$rels]);
$index->add_hashed_document($fileid, $docid);
+ $index->set_rfile_charset($fileid, $$charset[0]);
$handle->close();
return 1;
}
diff --git a/lxrng.conf-dist b/lxrng.conf-dist
index 27abb1b..ab4229a 100644
--- a/lxrng.conf-dist
+++ b/lxrng.conf-dist
@@ -24,14 +24,16 @@ return {
'index' => $index,
'search' => $search,
- 'base_url' => 'http://lxr-test.linpro.no/lxr',
+ 'base_url' => 'http://localhost/lxr',
# Must be writable by httpd user:
'cache' => '/var/lib/lxrng/cache',
'fs_charset' => 'iso-8859-1',
- 'content_charset' => 'iso-8859-1',
+ # Tried successively
+ 'content_charset' => ['utf-8', 'iso-8859-1'],
- 'languages' => ['C'],
+ 'languages' => ['C', 'GnuAsm', 'Kconfig'],
+ 'ctags_flags' => ["-I\@$LXRng::ROOT/lxr-ctags-quirks"],
'ver_list' => [$gitrepo->allversions],
'ver_default' => 'v2.6.20.3',