diff options
Diffstat (limited to 'lib/LXRng')
| -rw-r--r-- | lib/LXRng/Index/DBI.pm | 16 | ||||
| -rw-r--r-- | lib/LXRng/Index/Pg.pm | 2 | ||||
| -rw-r--r-- | lib/LXRng/Lang/C.pm | 37 | ||||
| -rw-r--r-- | lib/LXRng/Lang/Generic.pm | 5 | ||||
| -rw-r--r-- | lib/LXRng/Lang/GnuAsm.pm | 164 | ||||
| -rw-r--r-- | lib/LXRng/Lang/Kconfig.pm | 118 | ||||
| -rw-r--r-- | lib/LXRng/Lang/Undefined.pm | 4 | ||||
| -rw-r--r-- | lib/LXRng/Markup/File.pm | 13 | ||||
| -rw-r--r-- | lib/LXRng/Parse/Simple.pm | 111 | ||||
| -rw-r--r-- | lib/LXRng/Search/Xapian.pm | 33 | 
10 files changed, 402 insertions, 101 deletions
| diff --git a/lib/LXRng/Index/DBI.pm b/lib/LXRng/Index/DBI.pm index 763f534..55e215d 100644 --- a/lib/LXRng/Index/DBI.pm +++ b/lib/LXRng/Index/DBI.pm @@ -520,6 +520,22 @@ sub get_identifier_info {  	    \%reflines);  } +sub set_rfile_charset { +    my ($self, $rfile_id, $charset) = @_; + +    my $dbh = $self->dbh; +    my $pre = $self->prefix; +    my $sth = $$self{'sth'}{'set_rfile_charset'} ||= +	$dbh->prepare(qq{ +	    update ${pre}revisions +		set body_charset = (select id from ${pre}charsets +				    where name = ?)  +		where id = ?}); +     +    return $sth->execute($charset, $rfile_id); +} + +  sub get_rfile_timestamp {      my ($self, $rfile_id) = @_; diff --git a/lib/LXRng/Index/Pg.pm b/lib/LXRng/Index/Pg.pm index 3654a72..7387abc 100644 --- a/lib/LXRng/Index/Pg.pm +++ b/lib/LXRng/Index/Pg.pm @@ -67,7 +67,7 @@ sub init_db {  	or die($dbh->errstr);      $dbh->do(qq{insert into ${pre}charsets(name) values ('utf-8')})  	or die($dbh->errstr); -    $dbh->do(qq{insert into ${pre}charsets(name) values ('iso8859-1')}) +    $dbh->do(qq{insert into ${pre}charsets(name) values ('iso-8859-1')})  	or die($dbh->errstr);      $dbh->do(qq{ diff --git a/lib/LXRng/Lang/C.pm b/lib/LXRng/Lang/C.pm index 60a571b..db3a204 100644 --- a/lib/LXRng/Lang/C.pm +++ b/lib/LXRng/Lang/C.pm @@ -51,13 +51,13 @@ sub identifier_re {      return $_identifier_re;  } -my $_reserved ||= { map { $_ => 1 } -		    qw(asm auto break case char const continue default -		       do double else enum extern float for fortran -		       goto if int long register return short signed -		       sizeof static struct switch typedef union -		       unsigned void volatile while #define #else -		       #endif #if #ifdef #ifndef #include #undef)}; +my $_reserved = { map { $_ => 1 } +		  qw(asm auto break case char const continue default +		     do double else enum extern float for fortran goto +		     if int long register return short signed sizeof +		     static struct switch typedef union unsigned void +		     volatile while #define #else #endif #if #ifdef +		     #ifndef #include #undef)};  sub reserved {      return $_reserved; @@ -73,31 +73,10 @@ sub parsespec {  	    'include',	'#\s*include\s+<',	'>'];  } -sub typemap { -    return { -	'c' => 'class', -	'd' => 'macro (un)definition', -	'e' => 'enumerator', -	'f' => 'function definition', -	'g' => 'enumeration name', -	'm' => 'class, struct, or union member', -	'n' => 'namespace', -	'p' => 'function prototype or declaration', -	's' => 'structure name', -	't' => 'typedef', -	'u' => 'union name', -	'v' => 'variable definition', -	'x' => 'extern or forward variable declaration', -	'i' => 'interface'}; -} -  sub markuphandlers {      my ($self, $context, $node, $markup) = @_;      my $index = $context->config->{'index'}; -    my $idre = $self->identifier_re(); -    my $res  = $self->reserved(); -      my %subst;      my $format_newline = $markup->make_format_newline($node); @@ -121,7 +100,7 @@ sub markuphandlers {      $subst{'code'} = new Subst::Complex  	qr/\n/	   => $format_newline, -	qr/[^\n]*/ => sub { $markup->format_code($idre, $res, @_) }; +	qr/[^\n]*/ => sub { $markup->format_code($self, @_) };      $subst{'start'} = new Subst::Complex  	qr/^/	   => $format_newline; diff --git a/lib/LXRng/Lang/Generic.pm b/lib/LXRng/Lang/Generic.pm index 82e9b9d..3e1f545 100644 --- a/lib/LXRng/Lang/Generic.pm +++ b/lib/LXRng/Lang/Generic.pm @@ -40,4 +40,9 @@ sub expand_include {      return ();  } +sub mangle_sym { +    my ($self, $sym) = @_; +    return $sym; +} +  1; diff --git a/lib/LXRng/Lang/GnuAsm.pm b/lib/LXRng/Lang/GnuAsm.pm new file mode 100644 index 0000000..acdcdef --- /dev/null +++ b/lib/LXRng/Lang/GnuAsm.pm @@ -0,0 +1,164 @@ +# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no>. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution +# in the file called COPYING. + +package LXRng::Lang::GnuAsm; + +use strict; +use Subst::Complex; + +use base qw(LXRng::Lang::Generic); + + +sub doindex { +    return 1; +} + +sub ctagslangname { +    return 'asm'; +} + +sub ctagsopts { +    return (); +} + +sub pathexp { +    return qr/\.[sS]$/; +} + +my $_identifier_re = qr( +			(?m:^|(?<=[^a-zA-Z0-9_\#]))	# Non-symbol chars. +			(_*[a-zA-Z][a-zA-Z0-9_]*)	# The symbol. +			\b +			)x; + +sub identifier_re { +    return $_identifier_re; +} + +my $_reserved ||= { map { $_ => 1 } +		    (qw(aaa aad aam aas adc bound bsf bsr bswap btc +		       btr call cbw cwde cdqe cwd cdq cqo clc cld +		       clflush cmc cmps cmpsb cmpsw cmpsd cmpsq +		       cmpxchg cmpxchg8b cmpxchg16b cpuid daa das +		       enter ins insb insw insd int into jcxz jecxz +		       jrcxz jmp lahf lds les lfs lgs lss leave lfence +		       lock lods lodsb lodsw lodsd lodsq loop loope +		       loopne loopnz loopz mfence movd movmskpd +		       movmskps movnti movs movsb movsw movsd movsq +		       movsx movsxd movzx nop outs outsb outsw outsd +		       pause popa popad prefetch prefetchw pusha +		       pushad pushfd pushfq ret sahf sbb scas scasb +		       scasw scasd scasq sfence stc std stos stosb +		       stosw stosd stosq xadd xchg xlat xlatb arpl +		       clgi cli clts hlt int invd invlpg invlpga iret +		       iretd iretq lar lgdt lidt lldt lmsw lretq lsl +		       ltr rep rdmsr rdpmc rdtsc rdtscp rsm sgdt sidt +		       skinit sldt smsw sti stgi str swapgs syscall +		       sysenter sysexit sysret ud2 verr verw vmload +		       vmmcall vmrun vmsave wbinvd wrmsr), + +		     (map { $_, $_.'b', $_.'w', $_.'l', $_.'q' } +		      qw(add and mov bt bts cmp dec div idiv imul inc +			 in lea mul neg not or out pop popf push pushf +			 rcl rcr rol ror sal shl sar shl shr sub test +			 xor)), + +		     (map { 'cmov'.$_, 'j'.$_, 'set'.$_ } +		      qw(o no b c nae nb nc ae z e nz ne be na nbe a s +			 ns p pe np po l nge nl ge le ng nle g)) +		     )}; +		      + +sub reserved { +    return $_reserved; +} + +sub parsespec { +    return ['atom',	'\\\\.',	undef, +	    'atom',	'%[a-z][a-z0-9]+', undef, # Registers +	    'atom',	'[.][a-z0-9]+', undef, # Directives +	    'comment',	'/\*',		'\*/', +	    'comment',	'//',		"\$", +	    'string',	'"',		'"', +	    'string',	"'",		"'", +	    'atom',	'#\s*(?:ifn?def|define|else|endif|undef)', undef, +	    'include',	'#\s*include\s+"',	'"', +	    'include',	'#\s*include\s+<',	'>', +	    'comment',	'#',		"\$"]; +} + +sub markuphandlers { +    my ($self, $context, $node, $markup) = @_; + +    my $index = $context->config->{'index'}; +    my %subst; + +    my $format_newline = $markup->make_format_newline($node); +    $subst{'comment'} = new Subst::Complex +	qr/\n/     => $format_newline, +	qr/[^\n]+/ => sub { $markup->format_comment(@_) }; +	 +    $subst{'string'} = new Subst::Complex +	qr/\n/        => $format_newline, +	qr/[^\n\"\']+/ => sub { $markup->format_string(@_) }; + +    $subst{'include'} = new Subst::Complex +	qr/\n/ => $format_newline, +	qr/(include\s*\")(.*?)(\")/ => sub { +	    $markup->format_include([$self->resolve_include($context, $node, @_)], +				    @_) }, +				   +	qr/(include\s*\<)(.*?)(\>)/ => sub { +	    $markup->format_include([$self->resolve_include($context, $node, @_)], +				    @_) }; +	 +    $subst{'code'} = new Subst::Complex +	qr/\n/	   => $format_newline, +	qr/[^\n]*/ => sub { $markup->format_code($self, @_) }; + +    $subst{'start'} = new Subst::Complex +	qr/^/	   => $format_newline; +     +    return \%subst; +} + +sub resolve_include { +    my ($self, $context, $node, $frag) = @_; + +    if ($frag =~ /include\s+<(.*?)>/) { +	return $self->expand_include($context, $node, $1); +    } +    elsif ($frag =~ /include\s+\"(.*?)\"/) { +	my $incl = $1; +	my $bare = $1; +	my $name = $node->name(); +	if ($name =~ /(.*\/)/) { +	    $incl = $1.$incl; +	    1 while $incl =~ s,/[^/]+/../,/,; +	     +	    my $file = $context->config->{'repository'}->node($incl, $context->release); +	    return $incl if $file; +	    return $self->expand_include($context, $node, $bare); +	} +    } + +    return (); +} + +1; diff --git a/lib/LXRng/Lang/Kconfig.pm b/lib/LXRng/Lang/Kconfig.pm new file mode 100644 index 0000000..7417236 --- /dev/null +++ b/lib/LXRng/Lang/Kconfig.pm @@ -0,0 +1,118 @@ +# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no>. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution +# in the file called COPYING. + +package LXRng::Lang::Kconfig; + +use strict; +use Subst::Complex; + +use base qw(LXRng::Lang::Generic); + + +sub doindex { +    return 1; +} + +sub ctagslangname { +    return undef; +} + +sub pathexp { +    return qr/Kconfig$/; +} + +my $_identifier_re = qr( +			(?m:^|(?<=[^A-Z0-9_\#]))	# Non-symbol chars. +			(_*[A-Z][A-Z0-9_]*)		# The symbol. +			\b +			)x; + +sub identifier_re { +    return $_identifier_re; +} + +my $_reserved = { map { $_ => 1 } +		  qw(menu source endmenu config bool if default help +		     tristate depends on y n m)}; + +sub reserved { +    return $_reserved; +} + +sub parsespec { +    return ['atom',	'\\\\.',	undef, +	    'comment',	'#',		"\$", +	    'string',	'"',		'"', +	    'string',	"'",		"'", +	    'help',     'help', 	"^(?=[^ \t\n])", +	    'include',	'^source\s+"',	'"']; +} + +sub mangle_sym { +    return $_[1] =~ /^[A-Z0-9_]+$/ ? 'CONFIG_'.$_[1] : $_[1]; +} + +sub markuphandlers { +    my ($self, $context, $node, $markup) = @_; + +    my $index = $context->config->{'index'}; +    my %subst; + +    my $format_newline = $markup->make_format_newline($node); +    $subst{'comment'} = new Subst::Complex +	qr/\n/     => $format_newline, +	qr/[^\n]+/ => sub { $markup->format_comment(@_) }; +	 +    $subst{'help'} = new Subst::Complex +	qr/\n/        => $format_newline, +	qr/^[ \t]*help[ \t]*/ => sub { $markup->format_code($self, @_) }, +	qr/[^\n\"\']+/ => sub { $markup->format_string(@_) }; + +    $subst{'string'} = new Subst::Complex +	qr/\n/        => $format_newline, +	qr/[^\n\"\']+/ => sub { $markup->format_string(@_) }; + +    $subst{'include'} = new Subst::Complex +	qr/\n/ => $format_newline, +	qr/(include\s*\")(.*?)(\")/ => sub { +	    $markup->format_include([$self->resolve_include($context, $node, @_)], +				    @_) }, +				   +	qr/(include\s*\<)(.*?)(\>)/ => sub { +	    $markup->format_include([$self->resolve_include($context, $node, @_)], +				    @_) }; +	 +    $subst{'code'} = new Subst::Complex +	qr/\n/	   => $format_newline, +	qr/[^\n]*/ => sub { $markup->format_code($self, @_) }; + +    $subst{'start'} = new Subst::Complex +	qr/^/	   => $format_newline; +     +    return \%subst; +} + +sub resolve_include { +    my ($self, $context, $node, $frag) = @_; + +    return (); +} + +1; + diff --git a/lib/LXRng/Lang/Undefined.pm b/lib/LXRng/Lang/Undefined.pm index c7d5d09..0989bdb 100644 --- a/lib/LXRng/Lang/Undefined.pm +++ b/lib/LXRng/Lang/Undefined.pm @@ -41,10 +41,6 @@ sub parsespec {      return ['atom',	'\\\\.',	undef];  } -sub typemap { -    return {}; -} -  sub markuphandlers {      my ($self, $context, $node, $markup) = @_; diff --git a/lib/LXRng/Markup/File.pm b/lib/LXRng/Markup/File.pm index c3d576c..054463a 100644 --- a/lib/LXRng/Markup/File.pm +++ b/lib/LXRng/Markup/File.pm @@ -92,18 +92,23 @@ sub format_include {  }  sub format_code { -    my ($self, $idre, $res, $frag) = @_; +    my ($self, $lang, $frag) = @_;      my $tree = $self->context->vtree();      my $path = $self->context->path(); +    my $idre = $lang->identifier_re(); +    my $res  = $lang->reserved();      $frag =~ s{(.*?)$idre|(.+)}{  	if ($2) {  	    unless (exists($$res{$2})) {  		my $pre = $1; -		my $sym = safe_html($2); +		my $sym = $2; +		my $ref = safe_html($lang->mangle_sym($sym)); +		$sym = safe_html($sym); +  		safe_html($pre). -		    qq{<a href="+code=$sym" class="sref">$sym</a>}; +		    qq{<a href="+code=$ref" class="sref">$sym</a>};  	    }  	    else {  		safe_html($1.$2); @@ -129,7 +134,7 @@ sub markupfile {      my ($self, $subst, $parse) = @_;      my ($btype, $frag) = $parse->nextfrag; -     +      return () unless defined $frag;      $btype ||= 'code'; diff --git a/lib/LXRng/Parse/Simple.pm b/lib/LXRng/Parse/Simple.pm index 215ce5b..d89ea22 100644 --- a/lib/LXRng/Parse/Simple.pm +++ b/lib/LXRng/Parse/Simple.pm @@ -38,6 +38,8 @@ sub new {  	'fileh'		=> $fileh,	# File handle  	'tabwidth'	=> $tabhint||8,	# Tab width  	'frags'		=> [],		# Fragments in queue +	'pref'		=> '', +	'rest'		=> '',  	'bodyid'	=> \@bodyid,	# Array of body type ids  	'bofseen'	=> 0,		# Beginning-of-file seen?  	'term'		=> \@term, @@ -63,81 +65,66 @@ sub untabify {  sub nextfrag {      my ($self) = @_; -    my $btype = undef; -    my $frag = undef; -    my $line = ''; - +    my $btype;      while (1) { -	# read one more line if we have processed  -	# all of the previously read line -	if (@{$$self{'frags'}} == 0) { -	    $line = $$self{'fileh'}->getline; -	     -	    if ($. <= 2 && -		$line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) { -		# make sure there really is a non-zero tabwidth -		$$self{'tabwidth'} = $1 if $1 > 0; +	if (defined $btype) { +	    if ($$self{'rest'} =~ s/\A((?s:.*?)$$self{'term'}[$btype])//m) { +		my $ret = $$self{'pref'}.$1; +		$$self{'pref'} = ''; +		return ($$self{'bodyid'}[$btype], $ret);  	    } -	     -	    if(defined($line)) { -		untabify($line, $$self{'tabwidth'}); +	    else { +		$$self{'pref'} .= $$self{'rest'}; +		$$self{'rest'} = ''; +	    } +	} +	else { +	    if ($$self{'rest'} =~ s/\A((?s).*?)($$self{'open'})//m) { +		my $pref = $1; +		my $frag = $2; -		# split the line into fragments -		$$self{'frags'} = [split(/($$self{'split'})/, $line)]; +		if ($pref ne '') { +		    $$self{'rest'} = $frag.$$self{'rest'}; +		    return ('', $pref); +		} + +		$btype = 3; +		$btype++ while $btype < $#- and !defined($-[$btype]); +		$btype -= 3; + +		if (!defined($$self{'term'}[$btype])) { +		    # Opening regexp captures entire block. +		    return ($$self{'bodyid'}[$btype], $frag); +		} +		$$self{'pref'} = $frag;  	    }  	} -	last if @{$$self{'frags'}} == 0; +	my $line = $$self{'fileh'}->getline; +	unless (defined $line) { +	    my $ret = $$self{'pref'}.$$self{'rest'}; +	    $$self{'pref'} = ''; +	    $$self{'rest'} = ''; +	    undef($ret) unless length($ret) > 0; +	     +	    return (defined($btype) ? $$self{'bodyid'}[$btype] : '', $ret); +	} + +	if ($. <= 2 && +	    $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) { +	    # make sure there really is a non-zero tabwidth +	    $$self{'tabwidth'} = $1 if $1 > 0; +	} +	     +	untabify($line, $$self{'tabwidth'}); +	$$self{'rest'} .= $line;  	unless ($$self{'bofseen'}) {  	    # return start marker if file has contents  	    $$self{'bofseen'} = 1;  	    return ('start', '');  	} -	 -	# skip empty fragments -	if ($$self{'frags'}[0] eq '') { -	    shift(@{$$self{'frags'}}); -	} - -	# check if we are inside a fragment -	if (defined($frag)) { -	    if (defined($btype)) { -		my $next = shift(@{$$self{'frags'}}); -		 -		# Add to the fragment -		$frag .= $next; -		# We are done if this was the terminator -		last if $next =~ /^$$self{'term'}[$btype]$/; -		 -	    } -	    else { -		if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) { -		    last; -		} -		$frag .= shift(@{$$self{'frags'}}); -	    } -	} -	else { -	    # Find the blocktype of the current block -	    $frag = shift(@{$$self{'frags'}}); -	    if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) { -		# grep in a scalar context returns the number of times -		# EXPR evaluates to true, which is this case will be -		# the index of the first defined element in @_. - -		my $i = 1; -		$btype = grep { $i &&= !defined($_) } @_; -		if(!defined($$self{'term'}[$btype])) { -		    # Opening regexp captures entire block. -		    last; -		} -	    } -	}      } -    $btype = $$self{'bodyid'}[$btype] if defined($btype); -     -    return ($btype, $frag);  }  1; diff --git a/lib/LXRng/Search/Xapian.pm b/lib/LXRng/Search/Xapian.pm index 03db5b8..014d57a 100644 --- a/lib/LXRng/Search/Xapian.pm +++ b/lib/LXRng/Search/Xapian.pm @@ -23,6 +23,13 @@ use strict;  use Search::Xapian qw/:ops :db :qpstem/;  use Search::Xapian::QueryParser; +our @STOPWORDS = qw(our ours you your yours him his she her hers they +                    them their theirs what which who whom this that +                    these those are was were been being have has had +                    having does did doing would should could the and +                    but for with all any); +our %STOPWORD = map { $_ => 1 } @STOPWORDS; +  sub new {      my ($class, $db_root) = @_; @@ -31,7 +38,7 @@ sub new {      my $self = bless({'db_root' => $db_root,  		      'writes' => 0},  		     $class); - +          return $self;  } @@ -100,6 +107,30 @@ sub add_release {      return $changes;  } +sub make_add_text { +    my ($index, $doc) = @_; + +    return sub { +	my ($pos, $text) = @_; + +	foreach my $term ($text =~ /(_*\w[\w_]*)/g) { +	    $term = lc($term); +	    next if length($term) <= 2; +	    next if length($term) > 128; +	    next if $STOPWORD{$term}; + +	    $doc->add_posting($term, $pos++); +	    if ($term =~ /_/) { +		foreach my $subt ($term =~ /([^_]+)/g) { +		    next if length($subt) <= 2; +		    next if $STOPWORD{$subt}; +		    $doc->add_posting($subt, $pos++); +		} +	    } +	}; +    } +} +  sub flush {      my ($self) = @_; | 
