lib/LXRng/Parse/Simple.pm


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

package LXRng::Parse::Simple;

use strict;
use integer;
use IO::Handle;

sub new {
    my ($class, $fileh, $tabhint, @blksep) = @_;

    my (@bodyid, @open, @term);

    while (my @a = splice(@blksep,0,3)) {
	push(@bodyid, $a[0]);
	push(@open,   $a[1]);
	push(@term,   $a[2]);
    }

    my $self = {
	'fileh'		=> $fileh,	# File handle
	'tabwidth'	=> $tabhint||8,	# Tab width
	'frags'		=> [],		# Fragments in queue
	'bodyid'	=> \@bodyid,	# Array of body type ids
	'bofseen'	=> 0,		# Beginning-of-file seen?
	'term'		=> \@term,
					# Fragment closing delimiters
	'open'		=> join('|', map { "($_)" } @open),
					# Fragment opening regexp
	'split'		=> join('|', @open, map { $_ eq '' ? () : $_ } @term),
					# Fragmentation regexp
    };

    return bless $self, $class;

# @frags $fileh $tabwidth $split @term $open $bodyid

}

sub untabify {
    my $t = $_[1] || 8;

    $_[0] =~ s/^(\t+)/(' ' x ($t * length($1)))/ge; # Optimize for common case.
    $_[0] =~ s/([^\t]*)\t/$1.(' ' x ($t - (length($1) % $t)))/ge;
    return($_[0]);
}


sub nextfrag {
    my ($self) = @_;

    my $btype = undef;
    my $frag = undef;
    my $line = '';

    while (1) {
	# read one more line if we have processed 
	# all of the previously read line
	if (@{$$self{'frags'}} == 0) {
	    $line = $$self{'fileh'}->getline;
	    
	    if ($. <= 2 &&
		$line =~ /^.*-[*]-.*?[ \t;]tab-width:[ \t]*([0-9]+).*-[*]-/) {
		# make sure there really is a non-zero tabwidth
		$$self{'tabwidth'} = $1 if $1 > 0;
	    }
	    
	    if(defined($line)) {
		untabify($line, $$self{'tabwidth'});

		# split the line into fragments
		$$self{'frags'} = [split(/($$self{'split'})/, $line)];
	    }
	}

	last if @{$$self{'frags'}} == 0;

	unless ($$self{'bofseen'}) {
	    # return start marker if file has contents
	    $$self{'bofseen'} = 1;
	    return ('start', '');
	}
	
	# skip empty fragments
	if ($$self{'frags'}[0] eq '') {
	    shift(@{$$self{'frags'}});
	}

	# check if we are inside a fragment
	if (defined($frag)) {
	    if (defined($btype)) {
		my $next = shift(@{$$self{'frags'}});
		
		# Add to the fragment
		$frag .= $next;
		# We are done if this was the terminator
		last if $next =~ /^$$self{'term'}[$btype]$/;
		
	    }
	    else {
		if ($$self{'frags'}[0] =~ /^$$self{'open'}$/) {
		    last;
		}
		$frag .= shift(@{$$self{'frags'}});
	    }
	}
	else {
	    # Find the blocktype of the current block
	    $frag = shift(@{$$self{'frags'}});
	    if (defined($frag) && (@_ = $frag =~ /^$$self{'open'}$/)) {
		# grep in a scalar context returns the number of times
		# EXPR evaluates to true, which is this case will be
		# the index of the first defined element in @_.

		my $i = 1;
		$btype = grep { $i &&= !defined($_) } @_;
		if(!defined($$self{'term'}[$btype])) {
		    # Opening regexp captures entire block.
		    last;
		}
	    }
	}
    }
    $btype = $$self{'bodyid'}[$btype] if defined($btype);
    
    return ($btype, $frag);
}

1;