1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
package LXRng::Parse::Simple;
use strict;
use integer;
use IO::Handle;
sub new {
my ($class, $fileh, $tabhint, @blksep) = @_;
my (@bodyid, @open, @term);
while (my @a = splice(@blksep,0,3)) {
push(@bodyid, $a[0]);
push(@open, $a[1]);
push(@term, $a[2]);
}
my $self = {
'fileh' => $fileh, # File handle
'tabwidth' => $tabhint||8, # Tab width
'frags' => [], # Fragments in queue
'bodyid' => \@bodyid, # Array of body type ids
'bofseen' => 0, # Beginning-of-file seen?
'term' => \@term,
# Fragment closing delimiters
'open' => join('|', map { "($_)" } @open),
# Fragment opening regexp
'split' => join('|', @open, map { $_ eq '' ? () : $_ } @term),
# Fragmentation regexp
};
return bless $self, $class;
# @frags $fileh $tabwidth $split @term $open $bodyid
}
sub untabify {
my $t = $_[1] || 8;
$_[0] =~ s/^(\t+)/(' ' x ($t * length($1)))/ge; # Optimize for common case.
$_[0] =~ s/([^\t]*)\t/$1.(' ' x ($t - (length($1) % $t)))/ge;
return($_[0]);
}
sub nextfrag {
my ($self) = @_;
my $btype = undef;
my $frag = undef;
my $line = '';
while (1) {
# read one more line if we have processed
# all of the previously read line
if (@{$$self{'frags'}} == 0) {
$line = $$self{'fileh'}->getline;
if ($. <= 2 &&
$line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) {
# make sure there really is a non-zero tabwidth
$$self{'tabwidth'} = $1 if $1 > 0;
}
if(defined($line)) {
untabify($line, $$self{'tabwidth'});
# split the line into fragments
$$self{'frags'} = [split(/($$self{'split'})/, $line)];
}
}
last if @{$$self{'frags'}} == 0;
unless ($$self{'bofseen'}) {
# return start marker if file has contents
$$self{'bofseen'} = 1;
return ('start', '');
}
# skip empty fragments
if ($$self{'frags'}[0] eq '') {
shift(@{$$self{'frags'}});
}
# check if we are inside a fragment
if (defined($frag)) {
if (defined($btype)) {
my $next = shift(@{$$self{'frags'}});
# Add to the fragment
$frag .= $next;
# We are done if this was the terminator
last if $next =~ /^$$self{'term'}[$btype]$/;
}
else {
if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) {
last;
}
$frag .= shift(@{$$self{'frags'}});
}
}
else {
# Find the blocktype of the current block
$frag = shift(@{$$self{'frags'}});
if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) {
# grep in a scalar context returns the number of times
# EXPR evaluates to true, which is this case will be
# the index of the first defined element in @_.
my $i = 1;
$btype = grep { $i &&= !defined($_) } @_;
if(!defined($$self{'term'}[$btype])) {
# Opening regexp captures entire block.
last;
}
}
}
}
$btype = $$self{'bodyid'}[$btype] if defined($btype);
return ($btype, $frag);
}
1;
|