diff options
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/canonicalise-csv | 110 | ||||
-rwxr-xr-x | bin/councils-no-contact | 106 | ||||
-rwxr-xr-x | bin/load-contacts | 48 |
3 files changed, 158 insertions, 106 deletions
diff --git a/bin/canonicalise-csv b/bin/canonicalise-csv new file mode 100755 index 000000000..72502e2d8 --- /dev/null +++ b/bin/canonicalise-csv @@ -0,0 +1,110 @@ +#!/usr/bin/perl -w + +# canonicalise-csv: +# Convert provided CSV file into one with standard names for MaPit +# +# Copyright (c) 2006 UK Citizens Online Democracy. All rights reserved. +# Email: matthew@mysociety.org. WWW: http://www.mysociety.org +# +# $Id: canonicalise-csv,v 1.1 2006-10-13 15:37:48 matthew Exp $ + +use strict; +require 5.8.0; + +# Horrible boilerplate to set up appropriate library paths. +use FindBin; +use lib "$FindBin::Bin/../perllib"; +use lib "$FindBin::Bin/../../perllib"; + +use mySociety::Config; +use mySociety::MaPit; +use mySociety::VotingArea; + +BEGIN { + mySociety::Config::set_file("$FindBin::Bin/../conf/general"); + mySociety::MaPit::configure(); +} + +my %councils; +open(FP, "$FindBin::Bin/../data/councils.csv"); +while (<FP>) { + s/\r?\n//g; + my ($name, $email) = split /,/; + $email ||= ''; + + # Canonicalise + next if $name eq 'Londonderry'; # Dupe of Derry + next if $name eq 'Kingston upon Hull'; # Dupe of Hull + next if $name eq 'London' || $name eq 'Greater London'; # Untrustworthy + + # Different + $name = 'Corporation of London' if $name eq 'City of London'; + $name = "Renfrewsh'r" if $name eq 'Renfrewshire'; + # Shorter + $name = 'Blackburn' if $name eq 'Blackburn with Darwen'; + $name = 'Dungannon' if $name eq 'Dungannon & South Tyrone'; + $name = 'Staffordshire' if $name eq 'Staffordshire County'; + $name = 'Armagh' if $name eq 'Armagh City'; + # Longer + $name = 'Kingston upon Hull' if $name eq 'Hull'; + $name = "City of $name" if $name =~ /^(Edinburgh|Glasgow|York)$/; + $name .= ' Islands' if $name eq 'Shetland'; + $name .= ' & Chelsea' if $name eq 'Kensington'; + # Wrong + $name =~ s/King's Lynn/Kings Lynn/; + $name = 'Surrey Heath' if $name eq 'Surrey Health'; + $name = 'Barking & Dagenham' if $name eq 'Barking-Dagenham'; + $name = 'Newtownabbey' if $name eq 'Newtonabbey'; + $name = 'Isles of Scilly' if $name eq 'Isle of Scilly'; + # Compass + $name =~ s/North East /N. E. /; + $name =~ s/^North West /N. W. /; + $name =~ s/^North (?!Lincolnshire|Norfolk|Somerset)/N. /; + $name =~ s/^South (?!Shropshire|Staffordshire|Somerset)/S. /; + $name =~ s/^East (?!Staffordshire)/E. /; + $name =~ s/^West(ern)? (?!Berkshire|Wiltshire)/W. /; + $name =~ s/ W / W. /; + # Various + $name =~ s/^Great /Gt. /; + $name =~ s/^St /St. /; + $name =~ s/ and / & /; + $name =~ s/ ?Royal$//; + $name =~ s/ Borough$//; + $name =~ s/-(upon|on|le)-/ $1 /; + $councils{$name} = $email; +} +close(FP); + +my $types = $mySociety::VotingArea::council_parent_types; +my (%out, @missing); +foreach my $type (@$types) { + my $areas = mySociety::MaPit::get_areas_by_type($type); + my $areas_info = mySociety::MaPit::get_voting_areas_info($areas); + foreach my $id (keys %$areas_info) { + my $area_info = $areas_info->{$id}; + my $name = $area_info->{name}; + if ($name eq 'Durham City Council') { + $out{$id} = $councils{'Durham City'}; + next; + } elsif ($name eq 'Durham County Council') { + $out{$id} = $councils{'Durham County'}; + next; + } + $name =~ s/( (Borough|City|District|County))* Council//; + if ($councils{$name} && $councils{$name} =~ /@/) { + $out{$id} = $councils{$name}; + } elsif ($councils{$name} || exists($councils{$name})) { + push @missing, $id; + } + } +} + +# Output missing IDs to STDOUT +print join(',', @missing) . "\n"; + +# Output emails to canonical CSV +open(FP, ">$FindBin::Bin/../data/councils_canonical.csv"); +foreach (sort keys %out) { + print FP "$_," . $out{$_} . "\n"; +} +close FP; diff --git a/bin/councils-no-contact b/bin/councils-no-contact deleted file mode 100755 index 22855aaf0..000000000 --- a/bin/councils-no-contact +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/perl -w - -# councils-no-contact: -# Given list of councils without email, generate list of MaPit area IDs. -# -# Copyright (c) 2006 UK Citizens Online Democracy. All rights reserved. -# Email: matthew@mysociety.org. WWW: http://www.mysociety.org -# -# $Id: councils-no-contact,v 1.2 2006-10-07 21:06:31 matthew Exp $ - -use strict; -require 5.8.0; - -# Horrible boilerplate to set up appropriate library paths. -use FindBin; -use lib "$FindBin::Bin/../perllib"; -use lib "$FindBin::Bin/../../perllib"; - -use mySociety::Config; -use mySociety::MaPit; -use mySociety::VotingArea; - -BEGIN { - mySociety::Config::set_file("$FindBin::Bin/../conf/general"); - mySociety::MaPit::configure(); -} - -my @councils_no_email = ( -'Aberdeenshire', -'Antrim', -'Ashford', -'Blaenau Gwent', -'Bradford', -'Breckland', -'Castlereagh', -'Charnwood', -'Cherwell', -'Chesterfield', -'Corporation of London', -'Congleton', -'Derwentside', -'Dorset', -'Durham County', -'E. Ayrshire', -'E. Dorset', -'E. Hampshire', -'E. Lothian', -'E. Sussex', -'Enfield', -'Essex', -'Halton', -'Harborough', -'Inverclyde', -'Kingston upon Hull', -'Magherafelt', -'Melton', -'Middlesbrough', -'Mole Valley', -'Newham', -'N. Devon', -'North Norfolk', -'N. Tyneside', -'N. Yorkshire', -'Northumberland', -'Nottinghamshire', -'Oadby & Wigston', -'Oldham', -'Purbeck', -'Reading', -'Rhondda Cynon Taff', -'Scottish Borders', -'S. Bedfordshire', -'S. Ribble', -'South Somerset', -'Southend on Sea', -'Surrey', -'Tendring', -'Thurrock', -'Tonbridge & Malling', -'Wakefield', -'Wansbeck', -'W. Devon', -'W. Sussex', -'Westminster', -'Weymouth & Portland', -'Worcestershire' -); - -my $types = $mySociety::VotingArea::council_parent_types; -my @out; -foreach my $type (@$types) { - my $areas = mySociety::MaPit::get_areas_by_type($type); - my $areas_info = mySociety::MaPit::get_voting_areas_info($areas); - foreach my $id (keys %$areas_info) { - my $area_info = $areas_info->{$id}; - my $name = $area_info->{name}; - foreach my $council (@councils_no_email) { - if ($name =~ /^$council( ((Borough|City|District|County) )*Council)?$/) { - push(@out, $id); - } - } - } -} - -print join(',', @out); - diff --git a/bin/load-contacts b/bin/load-contacts new file mode 100755 index 000000000..bac8497d3 --- /dev/null +++ b/bin/load-contacts @@ -0,0 +1,48 @@ +#!/usr/bin/perl -w + +# canonicalise-csv: +# Convert provided CSV file into one with standard names for MaPit +# +# Copyright (c) 2006 UK Citizens Online Democracy. All rights reserved. +# Email: matthew@mysociety.org. WWW: http://www.mysociety.org +# +# $Id: load-contacts,v 1.1 2006-10-13 15:37:48 matthew Exp $ + +use strict; +require 5.8.0; + +# Horrible boilerplate to set up appropriate library paths. +use FindBin; +use lib "$FindBin::Bin/../perllib"; +use lib "$FindBin::Bin/../../perllib"; + +use mySociety::Config; +use mySociety::DBHandle qw(dbh select_all); + +BEGIN { + mySociety::Config::set_file("$FindBin::Bin/../conf/general"); + mySociety::DBHandle::configure( + Name => mySociety::Config::get('BCI_DB_NAME'), + User => mySociety::Config::get('BCI_DB_USER'), + Password => mySociety::Config::get('BCI_DB_PASS'), + Host => mySociety::Config::get('BCI_DB_HOST', undef), + Port => mySociety::Config::get('BCI_DB_PORT', undef) + ); + + if (!dbh()->selectrow_array('select secret from secret for update of secret')) { + local dbh()->{HandleError}; + dbh()->do('insert into secret (secret) values (?)', {}, unpack('h*', mySociety::Util::random_bytes(32))); + } + dbh()->commit(); +} + +open(FP, "$FindBin::Bin/../data/councils_canonical.csv"); +while (<FP>) { + s/\r?\n//g; + my ($id, $email) = split /,/; + dbh()->do("INSERT INTO contacts (area_id, email, editor, whenedited, note) + VALUES (?, ?, 'import', ms_current_timestamp(), 'Initial import')", + {}, $id, $email); +} +dbh()->commit(); +close(FP); |