diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2012-07-13 13:07:48 +0200 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2012-07-13 13:07:48 +0200 |
commit | 8866e843a0011d7fb77cfaa41eeb3791d993a705 (patch) | |
tree | 49ac0ee1f908485b822df611c8d5de1988a1a4dc | |
parent | b023d46935617dcdefbf80bb751c4dbc9987d53a (diff) |
Web frontend prototype.
-rwxr-xr-x | cgi-bin/felles-postjournal | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/cgi-bin/felles-postjournal b/cgi-bin/felles-postjournal new file mode 100755 index 0000000..0fc9470 --- /dev/null +++ b/cgi-bin/felles-postjournal @@ -0,0 +1,329 @@ +#!/usr/bin/perl -w + +use warnings; +use strict; + +use CGI; +use DBI; +use XML::RSS; +use Data::Dumper; + +my $dbdriver = "Pg"; +my $dbhost = "sqldb.nuug.no"; +my $dbtable = "journal"; +my $dbname = "postjournal"; +my $dbuser = "pere"; +my $dbpwd = load_pwd("/home/pere/.nuug-fpj-pwd"); + +sub load_pwd { + my $filename = shift; + open (my $fh, "<", $filename) || die "unable to read $filename"; + my $pwd = <$fh>; + chomp $pwd; + close($fh); + return $pwd; +} + + +sub list_sources { + my $dbh = shift; + my $sql = "SELECT scraper AS source, COUNT(*) AS count, MIN(recorddate) AS firstdate, MAX(recorddate) AS lastdate, MAX(scrapestamputc) as lastscrape FROM journal GROUP BY scraper ORDER BY lastdate, firstdate, count, source"; + my %sources; + print <<EOF; +<table class="sourcelist"> +<tr><th>Source</th><th>Count</th><th>First record date</th><th>Last record date</th><th>Last scraper update</th></tr> +EOF + for my $ref ( @{ $dbh->selectall_arrayref($sql, { Slice => {} }) } ) { +# print Dumper ($ref); + print "<tr>\n"; + for my $field (qw(source count firstdate lastdate lastscrape)) { + my $value = $ref->{$field}; + if ('source' eq $field) { + print "<td><a href='https://scraperwiki.com/scrapers/$value'>$value</a></td>\n"; + } else { + print "<td>$value</td>\n"; + } + } + print "</tr>\n"; + } + print "</table>\n" + + # Average interval between document date and record date for the + # last months outgoing email. Should be positive and close to + # zero. + my $sql = "SELECT agency, ROUND(AVG(recorddate-docdate)) AS avg FROM journal WHERE docdate > CURRENT_DATE - INTERVAL '1 months' AND doctype = 'U' GROUP BY agency ORDER BY avg;"; + +} + +sub agencies { + my $dbh = shift; + my $sql = "SELECT agency, count(*) as entries FROM journal group by agency ORDER BY agency;"; + my %agencies; + for my $ref ( @{ $dbh->selectall_arrayref($sql, { Slice => {} }) } ) { +# print Dumper ($ref); + my $agency = $ref->{'agency'}; + my $entries = $ref->{'entries'}; + $agencies{$agency} = $entries; + } + + return %agencies; +} + +sub list_entries { + my ($dbh, $sqlref, $argsref, $format) = @_; + my %noshow = + ( + 'scrapedurl' => 1, + 'scrapestamputc' => 1, + 'scraper' => 1, + 'caseid' => 1, + 'caseyear' => 1, + 'caseseqnr' => 1, + 'journalyear' => 1, + 'journalseqnr' => 1, + ); + my @htmlfieldorder = ( + 'agency', + 'recorddate', + 'docdate', + 'docdesc', + 'casedesc', + 'sender', + 'recipient', + 'journalid', + 'casedocseq', + 'doctype', + 'exemption', + ); + my $where = ""; + if (@{$sqlref}) { + $where = "WHERE (" . join(") and (", @{$sqlref}) . ")"; + } + my $sql = "SELECT * FROM journal $where ORDER BY recorddate DESC, caseyear DESC, caseseqnr DESC, casedocseq DESC LIMIT 200 ;"; + my $first = 1; +# print "SQL: $sql"; + my $sth = $dbh->prepare($sql) or die $dbh->errstr; + $sth->execute(@{$argsref}) or die $dbh->errstr; + if ("csv" eq $format) { + # Output according to RFC 4180 + sub csv_format { + my $s = shift; + if ($s =~ m/"/) { + $s =~ s/"/""/ + } + if ($s =~ m/,/ or $s =~ m/\n/ or $s =~ m/\r/ or $s =~ m/"/) { + $s = "\"$s\""; + } + return $s + } + while (my $ref = $sth->fetchrow_hashref ) { + if ($first) { + $first = 0; + print join(",", keys %{$ref}), "\r\n"; + } + my @values = map { + defined $ref->{$_} ? csv_format($ref->{$_}) : "" + } keys %{$ref}; + print join(",", @values), "\r\n"; + } + } elsif ("rss" eq $format) { + # create the RSS file (version 1.0) + my $rss = new XML::RSS( version => '1.0', + encoding => 'UTF-8', + # Encoding is a no-op, all our input is UTF-8 + encode_cb => sub { return $_[1]; }); + my $channellink = "http://www.nuug.no/"; + $rss->channel( + title => "Postjournalsøk", + link => $channellink, + description => "Søk i felles postjournal", + ); + + while (my $ref = $sth->fetchrow_hashref ) { + my $docdesc = $ref->{'docdesc'}; + my $casedesc = $ref->{'casedesc'}; + my $agency = $ref->{'agency'}; + my $caseid = $ref->{'caseid'}; + my $casedocseq = $ref->{'casedocseq'}; + my $docdate = $ref->{'docdate'}; + my $recorddate = $ref->{'recorddate'}; + my $sender = $ref->{'sender'}; + my $recipient = $ref->{'recipient'}; + my $exemption = $ref->{'exemption'}; + + my $link = "http://www.nuug.no/cgi-bin/felles-postjournal?agency=$agency&caseid=$caseid&casedocseq=$casedocseq"; + + my $description = "<p>Agency: $agency</p>". + "<p>Docdate: $docdate</p>". + "<p>Docdate: $docdate</p>". + "<p>Casedesc: $casedesc</p>". + "<p>Exemption: $exemption</p>"; + $description .= "<p>Sender: $sender</p>" if $sender; + $description .= "<p>Recipient: $recipient</p>" if $recipient; + + $rss->add_item( + title => "$docdesc ($agency)", + link => $link, + description => $description, + dc => { + date => $recorddate + }, + ); + } + + print $rss->as_string; + } else { + print "<table class='journaltable' border='1'>\n"; + while (my $ref = $sth->fetchrow_hashref ) { + if ($first) { + print "<tr>\n"; + for my $field (@htmlfieldorder) { + next if exists $noshow{$field}; + print "<th>$field</th>\n"; + } + $first = 0; + print "</tr>\n\n"; + } + print "<tr>\n"; + for my $field (@htmlfieldorder) { + next if exists $noshow{$field}; + my $value = $ref->{$field}; + if (defined $value) { + if ("casedesc" eq $field) { + my $caseid = $ref->{'caseid'}; + my $thisagency = $ref->{'agency'}; + my $url="?caseid=$caseid&agency=$thisagency"; + print "<td><a href='$url'>$value</a></td>\n"; + } else { + print "<td>$value</td>\n"; + } + } else { + print "<td></td>\n"; + } + } + print "</tr>\n\n"; + } + print "</table>\n"; + } +} + +my $query = new CGI; + +my $dbh = + DBI->connect("dbi:$dbdriver:dbname=$dbname;host=$dbhost;sslmode=require", + $dbuser, $dbpwd) || + die "Unable to connect to DB: $DBI::errstr"; + +if ($dbdriver eq "Pg") { # How is this done with other databases? + $dbh->do("SET CLIENT_ENCODING TO 'UTF-8'"); +} + +my $agency = $query->param('agency'); + +my @sql = (); +my @args = (); + +my $format = "html"; +my $q = ''; +my $caseid; +my $listsource; + +if ($query->param('listsource')) { + $listsource = $query->param('listsource'); +} + +if ($query->param('q')) { + $q = $query->param('q'); + my $st = "%$q%"; + push(@sql, "docdesc ilike ? or casedesc ilike ? or sender ilike ? or recipient ilike ?"); + push(@args, $st, $st, $st, $st); +} +if ($query->param('caseid')) { + $caseid = $query->param('caseid'); + my ($caseyear, $caseseqnr) = split("/", $caseid); + push(@sql, "caseyear = ? and caseseqnr = ?"); + push(@args, $caseyear, $caseseqnr); +} +if ($agency) { + push(@sql, "agency = ?"); + push(@args, $agency); +} +if ($query->param('format')) { + $format = $query->param('format'); +} + +if ("csv" eq $format) { + print $query->header(-type => 'text/csv', + -attachment => 'postjournal-utdrag.csv', + -charset => 'utf-8'); + list_entries($dbh, \@sql, \@args, "csv"); +} elsif ("rss" eq $format) { + print $query->header(-type => 'application/rss+xml', + -attachment => 'postjournal-utdrag.rss', + -charset => 'utf-8'); + list_entries($dbh, \@sql, \@args, "rss"); +} elsif ("html" eq $format) { + print $query->header(-charset=> 'utf-8'); + print <<EOF; +<html> +<head> +<title>Offentlige postjournaler</title> +</head> +<body> + +<p>Se <a href="https://scraperwiki.com/tags/postjournal">scraperwiki</a> +for kildedata (<a href="?listsource=yes">kildestatus</a>). Dette er et +<a href="http://www.nuug.no/">NUUG</a>-prosjekt.</p> + +<h1>Offentlige postjournaler</h1> +EOF + + print <<EOF; + +<p><form> +EOF + if ($agency) { + print "<input type='hidden' name='agency' value='$agency'>\n"; + print "Søk (kun $agency): <input name='q' value='$q'> <input type='submit'>\n"; + } else { + print "Søk: <input name='q' value='$q'> <input type='submit'>\n"; + } + print <<EOF; +</form></p> + +EOF + + # Only + if (! $q and ! defined $caseid and ! defined $agency and ! defined $listsource) { + my %agencies = agencies($dbh); + print <<EOF; +<p>Journal for: +EOF + for my $aagency (sort keys %agencies) { + print "<b>"; +# print "*" if (defined $agency and $agency eq $aagency); + print "<a href='?agency=$aagency'>$aagency</a></b>"; + print " (", $agencies{$aagency}, ")\n"; + } + } + + if ($listsource) { + list_sources($dbh); + } else { + my $href=""; + $href .= "&agency=$agency" if defined $agency; + $href .= "&q=$q" if defined $q; + $href .= "&caseid=$caseid" if defined $caseid; + print "<p>Last ned som "; + print "<a href='?format=csv$href'>CSV/regneark</a> (<a href=\"http://www.ietf.org/rfc/rfc4180.txt\">RFC 4180</a>)"; + print ", <a href='?format=rss$href'>RSS</a>"; + print "</p>\n\n"; + + list_entries($dbh, \@sql, \@args, "html"); + } + print <<EOF; +</body> +</html> +EOF + +} |