diff options
Diffstat (limited to 'bin/emptyhomes/canonicalise-eha')
-rwxr-xr-x | bin/emptyhomes/canonicalise-eha | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/bin/emptyhomes/canonicalise-eha b/bin/emptyhomes/canonicalise-eha new file mode 100755 index 000000000..1030982fa --- /dev/null +++ b/bin/emptyhomes/canonicalise-eha @@ -0,0 +1,80 @@ +#!/usr/bin/perl -w + +# canonicalise-eha: +# Convert provided TSV file into one with standard names for MaPit +# +# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved. +# Email: matthew@mysociety.org. WWW: http://www.mysociety.org +# +# $Id: canonicalise-eha,v 1.1 2009-05-27 11:11:18 matthew Exp $ + +use strict; +require 5.8.0; + +# Horrible boilerplate to set up appropriate library paths. +use FindBin; +use lib "$FindBin::Bin/../perllib"; +use lib "$FindBin::Bin/../commonlib/perllib"; + +use mySociety::Config; +use mySociety::MaPit; +use mySociety::VotingArea; + +BEGIN { + mySociety::Config::set_file("$FindBin::Bin/../conf/general"); + mySociety::MaPit::configure(); +} + +my %councils; +open(FP, "$FindBin::Bin/../data/eha_listN"); +while (<FP>) { + s/\r?\n//g; + my ($name, $email) = split /\s*\t+\s*/; + $email ||= ''; + #$name = 'Blackburn' if $name eq 'Blackburn with Darwen'; + $name =~ s/^St /St. /; + $name =~ s/-(upon|on|le)-/ $1 /; + $name =~ s/^LB //; + $name =~ s/ BC| DC| MDC| MBC| (Metropolitan|Borough|District|County|City) Council| Council//; + print "Already have $name\n" if $councils{$name}; + $councils{$name} = $email; +} +close(FP); + +my @types = grep { $_ !~ /CTY|LGD/ } @$mySociety::VotingArea::council_parent_types; +my (%out); +foreach my $type (@types) { + my $areas = mySociety::MaPit::get_areas_by_type($type); + my $areas_info = mySociety::MaPit::get_voting_areas_info($areas); + foreach my $id (keys %$areas_info) { + my $area_info = $areas_info->{$id}; + next if $area_info->{country} eq 'S'; + my $name = $area_info->{name}; + if ($name eq 'Durham City Council') { + $out{$id} = $councils{'Durham City'}; + delete $councils{'Durham City'}; + next; + #} elsif ($name eq 'Durham County Council') { + # $out{$id} = $councils{'Durham County'}; + # next; + } + $name =~ s/( (Borough|City|District|County))* Council//; + if ($councils{$name} && $councils{$name} =~ /@/) { + $out{$id} = $councils{$name}; + delete $councils{$name}; + } else { + print "Problem: $name $area_info->{country}\n"; + } + } +} + +foreach (sort keys %councils) { + print "Not matched: $_ $councils{$_}\n"; +} + +# Output emails to canonical CSV +open(FP, ">$FindBin::Bin/../data/eha.csv"); +foreach (sort keys %out) { + print FP "$_," . $out{$_} . "\n"; +} +close FP; |