#!/usr/bin/perl
#
# Copyright (C) 2008 Arne Georg Gleditsch <lxr@linux.no> and others.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# The full GNU General Public License is included in this distribution
# in the file called COPYING.

use strict;
use warnings;

use FindBin;
use lib "$FindBin::Bin/lib";

use LXRng ROOT => $FindBin::Bin;
use LXRng::Context;
use LXRng::Lang;
use LXRng::Index;
use LXRng::Parse::Simple;

use Carp;
use Data::Dumper;
use IO::Handle;
use Fcntl;
use Term::ProgressBar;
use Devel::Size qw(size total_size);
use Encode;

$SIG{'INT'}  = sub { die "\nSIGINT: $$: please wait, flushing caches...\n"; };
$SIG{'QUIT'} = sub { die "\nSIGQUIT: $$: please wait, flushing caches...\n"; };
$SIG{'TERM'} = sub { die "\nSIGTERM: $$: please wait, flushing caches...\n"; };
$SIG{'PIPE'} = sub { die "\nSIGTERM: $$: please wait, flushing caches...\n"; };
$SIG{'USR1'} = \&memory_status;

autoflush STDOUT 1;
autoflush STDERR 1;

my $tree = shift(@ARGV);
my @versions = @ARGV;

my $context = LXRng::Context->new('tree' => $tree);
die "Usage: $0 <tree-id>\n" unless $context and $context->tree;
LXRng::Lang->init($context);

my $index   = $context->config->{'index'};
my $usage   = $context->config->{'usage'};
my $hash    = $context->config->{'search'};
my $rep     = $context->config->{'repository'};
my $progress;
my $progress_ident;
my $progress_count;
my $progress_next;
my $progress_target;

$SIG{'__WARN__'} = sub { $progress ? $progress->message(shift) : warn(@_) };

sub memory_status {
    open(my $statf, "< /proc/$$/statm");
    my %stat;
    @stat{qw(size resident shared text lib data)} =
	split(" ", <$statf>);
    warn '$$$ total size  '.$stat{'size'}."\n";
    foreach my $k (keys %$index) {
	warn '$$$ index       '.
	    sprintf("%-17s %10d\n", $k, total_size($$index{$k}));
    }
}

sub progress_init {
    my ($ident, $total) = @_;

    if (-t STDOUT) {
	$progress = Term::ProgressBar->new({name => $ident,
					    count => $total,
					    ETA => 'linear'});
	$progress->max_update_rate(0.25);
    }
    else {
	$progress_ident  = $ident;
	$progress_target = $total;
	$progress_next   = int($progress_target/10);
	$progress_count  = 0;
    }
}

sub progress_update {
    my ($count) = @_;

    return $progress->update($count) if $progress;

    if (defined($count)) {
	$progress_count = $count;
    }
    else {
	$progress_count++;
    }
    if ($progress_count > $progress_next or
	$progress_count >= $progress_target)
    {
	print("$progress_ident: $progress_count/$progress_target done...\n");
	$progress_next = $progress_count + int($progress_target/10);
    }
}

sub progress_target {
    my ($target) = @_;

    return $progress->target($target) if $progress;

    $progress_target = $target;
}

sub make_add_ident($) {
    my ($fileid) = @_;

    my $last_func;
    my %identcache;

    my $add_ident;
    $add_ident = sub {
	my ($symbol, $info) = @_;
	
	if ($$info{'kind'} eq 'f') {
	    $last_func = $symbol;
	}
	if ($$info{'kind'} eq 'l') {
	    $$info{'context'} = $identcache{$last_func} if
		defined($last_func);
	}
	if (exists $$info{'class'}) {
	    $$info{'context'} = $identcache{$$info{'class'}};
	}
	if (exists $$info{'struct'}) {
	    $$info{'context'} = $identcache{$$info{'struct'}};
	}

	$identcache{$symbol} = 
	    $index->add_ident($fileid, $$info{'line'},
			      $symbol, $$info{'kind'},
			      $$info{'context'});
    
	if ($$info{'kind'} eq 'f' and exists $$info{'signature'}) {
	    # This needs to be more robust.  Perhaps ctags ought to do it.
	    
	    foreach my $v (split(/,/, $$info{'signature'})) {
		next if 
		    $v !~ /([a-zA-Z_0-9]+)[^a-zA-Z_0-9]*$/ or
		    $1 eq 'void';

		$add_ident->($1, {'kind' => 'l', 'line' => $$info{'line'}});
	    }
	}
    }
}

sub index_file($$) {
    my ($file, $fileid) = @_;

    my $lang = LXRng::Lang->new($file);

    return 0 unless $index->to_index($fileid);
    return 1 unless $lang->doindex();

    warn("--- indexing    ".$file->name." [".$file->revision."]\n");

    my $add_ident = make_add_ident($fileid);
    return $lang->index_file($context, $file, $add_ident)
	if $lang->can('index_file');
    return 1 unless $lang->ctagslangname();

    my $extra_flags = $context->config->{'ctags_flags'} || [];

    my $ctags;
    my $pid = open($ctags, '-|');
    die $! unless defined $pid;
    
    if ($pid == 0) {
	exec('ctags-exuberant',
	     @$extra_flags,
	     '--fields=+aifmknsSz', '--sort=no',
	     '--excmd=number', '-f', '-',
	     '--language-force='.$lang->ctagslangname,
	     $lang->ctagsopts,
	     $file->phys_path);

	# Still here?
	warn $!;
	kill(9, $$);
    }

    while (<$ctags>) {
	chomp;
	my ($symbol, $file, $excmd, @info) = split(/\t/);
	$symbol = $lang->mangle_sym($symbol);
	my %info = map { split(/:/, $_, 2) } @info;
	    
	$add_ident->($symbol, \%info);
    }
    return 1;
}

# We allow $usage to be supplied both by the Search and Index
# backends, since it's not quite clear which is better.  There's a
# certain added complexity because of this, so perhaps this feature
# ought to go once a clear best choice emerges.  (For instance,
# reference_file is called from hash_file with a reference to the
# hash indexing's document object, which is only actually used if
# $usage is the Search backend.)

sub reference_file($$$) {
    my ($file, $fileid, $doc) = @_;

    return 0 unless $index->to_reference($fileid);
    my $lang = LXRng::Lang->new($file);
    return 1 unless $lang->doindex();

    my $handle;
    sysopen($handle, $file->phys_path, 0) || die($!);

    my $parse = new LXRng::Parse::Simple($handle, 8,
					 @{$lang->parsespec});

    warn("--- referencing ".$file->name." [".$file->revision."]\n");

    my $reserved = $lang->reserved();
    my $re = $lang->identifier_re();

    my %refs;
    my $line = 1;
    while (1) {
	my ($btype, $frag) = $parse->nextfrag;
    
	last unless defined $frag;
	$btype ||= 'code';
	if ($btype eq 'code') {
	    
	    while ($frag =~ /\G.*?(?:(\n)|$re)/gc) {
		$line++ && next if defined $1;
		my $sym = $lang->mangle_sym($2);

		next if $$reserved{$sym};
		push(@{$refs{$sym} ||= []}, $line);
	    }
	}
	else {
	    if ($btype eq 'include') {
		my @paths = 
		    $lang->resolve_include($context, $file, $frag);

		foreach my $path (@paths) {
		    $index->add_include($fileid, $path);
		}
	    }
	    $line += $frag =~ tr/\n/\n/;
	}
    }
    close($handle);

    foreach my $sym (keys %refs) {
	my $sym_id = $index->symbol_id($sym, 1);
	$usage->add_usage($doc, $fileid, $sym_id, $refs{$sym});
    }
    undef %refs;
    return 1;
}

sub hash_file($$$) {
    my ($file, $fileid, $rels) = @_;

    return 0 if
	defined($context->config->{'search_size_limit'}) and
	$context->config->{'search_size_limit'} > 0 and
	$file->size > $context->config->{'search_size_limit'};

    my $docid;
    if ($index->to_hash($fileid)) {
	my $handle;
	sysopen($handle, $file->phys_path, 0) || die($!);
	unless (-T $handle) {
	    $handle->close();
	    return 0;
	}

	warn("--- hashing     ".$file->name." [".$file->revision."]\n");
	my $doc = $hash->new_document($file->name);
	my $charset = $context->config->{'content_charset'} || [];
	$charset = [ref($charset) eq 'ARRAY' ? @$charset : $charset];
	push(@$charset, 'iso-8859-1'); # Fall back
	my $add_line = $hash->make_add_text($doc);

	while (<$handle>) {
	    my $pos = 0;
	    my $text;
	    while (@$charset) {
		$text = eval { decode($$charset[0], $_, Encode::FB_CROAK); };
		last unless $@;
		shift(@$charset);
	    }

	    $add_line->($.*100, $text);
	}

	reference_file($file, $fileid, $doc);
	$docid = $hash->add_document($doc, [map {
	    $index->release_id($tree, $_) } @$rels]);
	$index->add_hashed_document($fileid, $docid);
	$index->set_rfile_charset($fileid, $$charset[0] || 'ascii');
	$handle->close();
	return 1;
    }
    else {
	$docid = $index->get_hashed_document($fileid);
	return 0 unless $docid;
	my $doc = $hash->get_document($docid);
	if (reference_file($file, $fileid, $doc)) {
	    $hash->save_document($docid, $doc);
	}

	my $changed = $hash->add_release($docid, [map {
	    $index->release_id($tree, $_) } @$rels]);
	return $changed;
    }
}


sub inventory_release($) {
    my ($version) = @_;

    warn("--- recording all files for $version\n");

    my $iter = $rep->iterator($version);
    LXRng::Index::transaction {
	my $root = $rep->node('/', $version) or die "Bad root for release $version";

	my $node;
	while (defined($node = $iter->next)) {
	    next if $node->name =~ /\.o$/;
	    my $fileid = $index->rfile_id($node, 1);
	    $index->add_filerelease($tree, $version, $fileid);
	}
    } $index;
}

sub index_pending() {
    my $pending = $index->pending_files($tree);

    my $total = 0+@$pending;
    my $count = 0;

    print("\n");
    progress_init('Indexing', $total);
    warn("--- indexing/updating $total files...\n");

    foreach my $p (@$pending) {
	LXRng::Index::transaction {
 	    my ($fileid, $path, $rev) = @$p;
 	    my $rels = $index->new_releases_by_file($fileid);
 	    next unless @$rels;

	    $context->release($$rels[0]); # Needed for include resolution.
 	    my $node = $rep->node($path, $$rels[0], $rev);
 	    next unless $node;

 	    if (hash_file($node, $fileid, $rels) |
		index_file($node, $fileid))
	    {
		$count++;
		progress_update($count);
	    }
	    else {
		$total--;
		my $skip = @$pending - $total; 
		if ($skip % 100 == 0) {
		    warn("--- skipped/refreshed $skip files...\n");
		}
		progress_target($total);
	    }
	} $index;
      }
    progress_update($total);

    my $done = $index->update_indexed_releases($tree);

    warn("=== releases: ".join(", ", @$done)."\n") if
	@$done;
    print("\n");
}


progress_init('Recording', 1);

if (@versions) {
    progress_target(1+@versions);
    foreach my $version (@versions) {
	inventory_release($version);
	progress_update();
    }
}
else {
    @versions = grep { ! $index->_get_release($index->tree_id($tree), $_);
		   } @{$context->all_releases};
    progress_target(1+@versions);
    LXRng::Index::transaction {
	foreach my $version (reverse @versions) {
	    # TODO: Breaking during the inventory process renders
	    # version half-recorded.
	    inventory_release($version);
	    progress_update();
	}
    } $index;
}
progress_update();

LXRng::Index::transaction {
    index_pending();
} $index;
$hash->flush();

undef $progress;