#!/usr/bin/perl -w # canonicalise-eha: # Convert provided TSV file into one with standard names for MaPit # # Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved. # Email: matthew@mysociety.org. WWW: http://www.mysociety.org # # $Id: canonicalise-eha,v 1.1 2009-05-27 11:11:18 matthew Exp $ use strict; require 5.8.0; # Horrible boilerplate to set up appropriate library paths. use FindBin; use lib "$FindBin::Bin/../perllib"; use lib "$FindBin::Bin/../commonlib/perllib"; use mySociety::Config; use mySociety::MaPit; use mySociety::VotingArea; BEGIN { mySociety::Config::set_file("$FindBin::Bin/../conf/general"); mySociety::MaPit::configure(); } my %councils; open(FP, "$FindBin::Bin/../data/eha_listN"); while () { s/\r?\n//g; my ($name, $email) = split /\s*\t+\s*/; $email ||= ''; #$name = 'Blackburn' if $name eq 'Blackburn with Darwen'; $name =~ s/^St /St. /; $name =~ s/-(upon|on|le)-/ $1 /; $name =~ s/^LB //; $name =~ s/ BC| DC| MDC| MBC| (Metropolitan|Borough|District|County|City) Council| Council//; print "Already have $name\n" if $councils{$name}; $councils{$name} = $email; } close(FP); my @types = grep { $_ !~ /CTY|LGD/ } @$mySociety::VotingArea::council_parent_types; my (%out); foreach my $type (@types) { my $areas = mySociety::MaPit::get_areas_by_type($type); my $areas_info = mySociety::MaPit::get_voting_areas_info($areas); foreach my $id (keys %$areas_info) { my $area_info = $areas_info->{$id}; next if $area_info->{country} eq 'S'; my $name = $area_info->{name}; if ($name eq 'Durham City Council') { $out{$id} = $councils{'Durham City'}; delete $councils{'Durham City'}; next; #} elsif ($name eq 'Durham County Council') { # $out{$id} = $councils{'Durham County'}; # next; } $name =~ s/( (Borough|City|District|County))* Council//; if ($councils{$name} && $councils{$name} =~ /@/) { $out{$id} = $councils{$name}; delete $councils{$name}; } else { print "Problem: $name $area_info->{country}\n"; } } } foreach (sort keys %councils) { print "Not matched: $_ $councils{$_}\n"; } # Output emails to canonical CSV open(FP, ">$FindBin::Bin/../data/eha.csv"); foreach (sort keys %out) { print FP "$_," . $out{$_} . "\n"; } close FP;