aboutsummaryrefslogtreecommitdiffstats
path: root/lib/LXRng/Parse/Simple.pm
blob: 215ce5b4069df0b16f3fc9c98a35f0b66f7bd76b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no> and others.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# The full GNU General Public License is included in this distribution
# in the file called COPYING.

package LXRng::Parse::Simple;

use strict;
use integer;
use IO::Handle;

sub new {
    my ($class, $fileh, $tabhint, @blksep) = @_;

    my (@bodyid, @open, @term);

    while (my @a = splice(@blksep,0,3)) {
	push(@bodyid, $a[0]);
	push(@open,   $a[1]);
	push(@term,   $a[2]);
    }

    my $self = {
	'fileh'		=> $fileh,	# File handle
	'tabwidth'	=> $tabhint||8,	# Tab width
	'frags'		=> [],		# Fragments in queue
	'bodyid'	=> \@bodyid,	# Array of body type ids
	'bofseen'	=> 0,		# Beginning-of-file seen?
	'term'		=> \@term,
					# Fragment closing delimiters
	'open'		=> join('|', map { "($_)" } @open),
					# Fragment opening regexp
	'split'		=> join('|', @open, map { $_ eq '' ? () : $_ } @term),
					# Fragmentation regexp
    };

    return bless $self, $class;
}

sub untabify {
    my $t = $_[1] || 8;

    $_[0] =~ s/^(\t+)/(' ' x ($t * length($1)))/ge; # Optimize for common case.
    $_[0] =~ s/([^\t]*)\t/$1.(' ' x ($t - (length($1) % $t)))/ge;
    return($_[0]);
}


sub nextfrag {
    my ($self) = @_;

    my $btype = undef;
    my $frag = undef;
    my $line = '';

    while (1) {
	# read one more line if we have processed 
	# all of the previously read line
	if (@{$$self{'frags'}} == 0) {
	    $line = $$self{'fileh'}->getline;
	    
	    if ($. <= 2 &&
		$line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) {
		# make sure there really is a non-zero tabwidth
		$$self{'tabwidth'} = $1 if $1 > 0;
	    }
	    
	    if(defined($line)) {
		untabify($line, $$self{'tabwidth'});

		# split the line into fragments
		$$self{'frags'} = [split(/($$self{'split'})/, $line)];
	    }
	}

	last if @{$$self{'frags'}} == 0;

	unless ($$self{'bofseen'}) {
	    # return start marker if file has contents
	    $$self{'bofseen'} = 1;
	    return ('start', '');
	}
	
	# skip empty fragments
	if ($$self{'frags'}[0] eq '') {
	    shift(@{$$self{'frags'}});
	}

	# check if we are inside a fragment
	if (defined($frag)) {
	    if (defined($btype)) {
		my $next = shift(@{$$self{'frags'}});
		
		# Add to the fragment
		$frag .= $next;
		# We are done if this was the terminator
		last if $next =~ /^$$self{'term'}[$btype]$/;
		
	    }
	    else {
		if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) {
		    last;
		}
		$frag .= shift(@{$$self{'frags'}});
	    }
	}
	else {
	    # Find the blocktype of the current block
	    $frag = shift(@{$$self{'frags'}});
	    if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) {
		# grep in a scalar context returns the number of times
		# EXPR evaluates to true, which is this case will be
		# the index of the first defined element in @_.

		my $i = 1;
		$btype = grep { $i &&= !defined($_) } @_;
		if(!defined($$self{'term'}[$btype])) {
		    # Opening regexp captures entire block.
		    last;
		}
	    }
	}
    }
    $btype = $$self{'bodyid'}[$btype] if defined($btype);
    
    return ($btype, $frag);
}

1;