package LXRng::Search::Xapian;

use strict;
use Search::Xapian qw/:ops :db :qpstem/;
use Search::Xapian::QueryParser;


sub new {
    my ($class, $db_root) = @_;

    $ENV{'XAPIAN_PREFER_FLINT'} = 1;
    my $self = bless({'db_root' => $db_root,
		      'writes' => 0},
		     $class);

    return $self;
}

sub wrdb {
    my ($self) = @_;

    return $$self{'wrdb'} ||= Search::Xapian::WritableDatabase
	->new($$self{'db_root'}, Search::Xapian::DB_CREATE_OR_OPEN);
}

sub new_document {
    my ($self, $desc) = @_;

    my $doc = Search::Xapian::Document->new();
    $doc->set_data($desc);
    return $doc;
}

sub add_document {
    my ($self, $doc, $rel_ids) = @_;
   
    foreach my $r (@$rel_ids) {
	$doc->add_term('__@@LXRREL_'.$r);
    }
    my $doc_id = $self->wrdb->add_document($doc);
    $self->{'writes'}++;
    $self->flush() if $self->{'writes'} % 499 == 0;
    return $doc_id;
}

sub add_release {
    my ($self, $doc_id, $rel_ids) = @_;

    my $doc = $self->wrdb->get_document($doc_id);

    my $termend = $doc->termlist_end;
    my $changes = 0;
    foreach my $r (@$rel_ids) {
	my $reltag = '__@@LXRREL_'.$r;
	my $term = $doc->termlist_begin;
	$term->skip_to($reltag);
	if ($term ne $termend) {
	    next if $term->get_termname eq $reltag;
	}
	$doc->add_term($reltag);
	$changes++;
    }

    $self->wrdb->replace_document($doc_id, $doc) if $changes;
    return $changes;
}

sub flush {
    my ($self) = @_;

    warn "\n*** hash: flushing\n";
    $self->wrdb->flush();
}

sub search {
    my ($self, $rel_id, $query) = @_;

    my $db = Search::Xapian::Database->new($$self{'db_root'});
    my $qp = new Search::Xapian::QueryParser($db);
    $qp->set_stemming_strategy(STEM_NONE);
    $qp->set_default_op(OP_AND);

    if ($query =~ /\"/) {
	# Only moderate fixup of advanced queries
	$query =~ s/\b([A-Z]+)\b/\L$1\E/g;
    }
    else {
	$query =~ s/([\S_]+_[\S_]*)/\"$1\"/g;
	$query =~ s/_/ /g;
	$query =~ s/\b(?![A-Z][^A-Z]*\b)(\S+)/\L$1\E/g;
    }

    my $query = $qp->parse_query($query);
    $query = Search::Xapian::Query
	->new(OP_FILTER, $query, 
	      Search::Xapian::Query->new('__@@LXRREL_'.$rel_id));

    my $enq = $db->enquire($query);

    my $matches = $enq->get_mset(0, 100);
    my $total = $matches->get_matches_estimated();
    my $size = $matches->size();

    my @res;

    my $match = $matches->begin();
    my $i = 0;
    while ($i++ < $size) {
	my $term = $enq->get_matching_terms_begin($match);
	my $termend = $enq->get_matching_terms_end($match);
	my %lines;
	my $hits = 0;
	while ($term ne $termend) {
	    if ($term !~ /^__\@\@LXR/) {
		my $pos = $db->positionlist_begin($match->get_docid(), $term);
		my $posend = $db->positionlist_end($match->get_docid(), $term);
		while ($pos ne $posend) {
		    $lines{int($pos/100)}{$term} = 1;
		    $hits++;
		    $pos++;
		}
	    }
	    $term++;
	}
	# Sort lines in order of the most matching terms
	my %byhits;
	foreach my $l (keys %lines) {
	    $byhits{0+keys(%{$lines{$l}})}{$l} = 1;
	}
	# Only consider the lines having the max number of terms
	my ($max) = sort { $b <=> $a } keys %byhits;
	my @lines = sort keys(%{$byhits{$max}});

	push(@res, [$match->get_percent(),
		    $match->get_document->get_data(),
		    $lines[0],
		    0+@lines])
	    if @lines;
	$match++;
    }

    return ($total, \@res);
}

sub reset_db {
    my ($self) = @_;

    foreach my $f (glob($$self{'db_root'}.'/*')) {
	unlink($f);
    }
}

sub DESTROY {
    my ($self) = @_;
   
    $self->flush() if $self->{'writes'} > 0;
}

1;