diff options
author | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-07-05 00:51:08 +0200 |
---|---|---|
committer | Arne Georg Gleditsch <argggh@lxr.linpro.no> | 2007-07-05 00:51:08 +0200 |
commit | e9fa4c98bb5f084739d3418ade3f0c51e34a0aa1 (patch) | |
tree | fec1d635625e031cde7cba1b0a1d95ee92ac760b /lib/LXRng/Parse |
Rebase tree.
Diffstat (limited to 'lib/LXRng/Parse')
-rw-r--r-- | lib/LXRng/Parse/Simple.pm | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/lib/LXRng/Parse/Simple.pm b/lib/LXRng/Parse/Simple.pm new file mode 100644 index 0000000..53f7e10 --- /dev/null +++ b/lib/LXRng/Parse/Simple.pm @@ -0,0 +1,127 @@ +package LXRng::Parse::Simple; + +use strict; +use integer; +use IO::Handle; + +sub new { + my ($class, $fileh, $tabhint, @blksep) = @_; + + my (@bodyid, @open, @term); + + while (my @a = splice(@blksep,0,3)) { + push(@bodyid, $a[0]); + push(@open, $a[1]); + push(@term, $a[2]); + } + + my $self = { + 'fileh' => $fileh, # File handle + 'tabwidth' => $tabhint||8, # Tab width + 'frags' => [], # Fragments in queue + 'bodyid' => \@bodyid, # Array of body type ids + 'bofseen' => 0, # Beginning-of-file seen? + 'term' => \@term, + # Fragment closing delimiters + 'open' => join('|', map { "($_)" } @open), + # Fragment opening regexp + 'split' => join('|', @open, map { $_ eq '' ? () : $_ } @term), + # Fragmentation regexp + }; + + return bless $self, $class; + +# @frags $fileh $tabwidth $split @term $open $bodyid + +} + +sub untabify { + my $t = $_[1] || 8; + + $_[0] =~ s/^(\t+)/(' ' x ($t * length($1)))/ge; # Optimize for common case. + $_[0] =~ s/([^\t]*)\t/$1.(' ' x ($t - (length($1) % $t)))/ge; + return($_[0]); +} + + +sub nextfrag { + my ($self) = @_; + + my $btype = undef; + my $frag = undef; + my $line = ''; + + while (1) { + # read one more line if we have processed + # all of the previously read line + if (@{$$self{'frags'}} == 0) { + $line = $$self{'fileh'}->getline; + + if ($. <= 2 && + $line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) { + # make sure there really is a non-zero tabwidth + $$self{'tabwidth'} = $1 if $1 > 0; + } + + if(defined($line)) { + untabify($line, $$self{'tabwidth'}); + + # split the line into fragments + $$self{'frags'} = [split(/($$self{'split'})/, $line)]; + } + } + + last if @{$$self{'frags'}} == 0; + + unless ($$self{'bofseen'}) { + # return start marker if file has contents + $$self{'bofseen'} = 1; + return ('start', ''); + } + + # skip empty fragments + if ($$self{'frags'}[0] eq '') { + shift(@{$$self{'frags'}}); + } + + # check if we are inside a fragment + if (defined($frag)) { + if (defined($btype)) { + my $next = shift(@{$$self{'frags'}}); + + # Add to the fragment + $frag .= $next; + # We are done if this was the terminator + last if $next =~ /^$$self{'term'}[$btype]$/; + + } + else { + if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) { + last; + } + $frag .= shift(@{$$self{'frags'}}); + } + } + else { + # Find the blocktype of the current block + $frag = shift(@{$$self{'frags'}}); + if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) { + # grep in a scalar context returns the number of times + # EXPR evaluates to true, which is this case will be + # the index of the first defined element in @_. + + my $i = 1; + $btype = grep { $i &&= !defined($_) } @_; + if(!defined($$self{'term'}[$btype])) { + # Opening regexp captures entire block. + last; + } + } + } + } + $btype = $$self{'bodyid'}[$btype] if defined($btype); + + return ($btype, $frag); +} + +1; |