aboutsummaryrefslogtreecommitdiffstats
path: root/bin/emptyhomes/canonicalise-eha
diff options
context:
space:
mode:
Diffstat (limited to 'bin/emptyhomes/canonicalise-eha')
-rwxr-xr-xbin/emptyhomes/canonicalise-eha80
1 files changed, 80 insertions, 0 deletions
diff --git a/bin/emptyhomes/canonicalise-eha b/bin/emptyhomes/canonicalise-eha
new file mode 100755
index 000000000..1030982fa
--- /dev/null
+++ b/bin/emptyhomes/canonicalise-eha
@@ -0,0 +1,80 @@
+#!/usr/bin/perl -w
+
+# canonicalise-eha:
+# Convert provided TSV file into one with standard names for MaPit
+#
+# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved.
+# Email: matthew@mysociety.org. WWW: http://www.mysociety.org
+#
+# $Id: canonicalise-eha,v 1.1 2009-05-27 11:11:18 matthew Exp $
+
+use strict;
+require 5.8.0;
+
+# Horrible boilerplate to set up appropriate library paths.
+use FindBin;
+use lib "$FindBin::Bin/../perllib";
+use lib "$FindBin::Bin/../commonlib/perllib";
+
+use mySociety::Config;
+use mySociety::MaPit;
+use mySociety::VotingArea;
+
+BEGIN {
+ mySociety::Config::set_file("$FindBin::Bin/../conf/general");
+ mySociety::MaPit::configure();
+}
+
+my %councils;
+open(FP, "$FindBin::Bin/../data/eha_listN");
+while (<FP>) {
+ s/\r?\n//g;
+ my ($name, $email) = split /\s*\t+\s*/;
+ $email ||= '';
+ #$name = 'Blackburn' if $name eq 'Blackburn with Darwen';
+ $name =~ s/^St /St. /;
+ $name =~ s/-(upon|on|le)-/ $1 /;
+ $name =~ s/^LB //;
+ $name =~ s/ BC| DC| MDC| MBC| (Metropolitan|Borough|District|County|City) Council| Council//;
+ print "Already have $name\n" if $councils{$name};
+ $councils{$name} = $email;
+}
+close(FP);
+
+my @types = grep { $_ !~ /CTY|LGD/ } @$mySociety::VotingArea::council_parent_types;
+my (%out);
+foreach my $type (@types) {
+ my $areas = mySociety::MaPit::get_areas_by_type($type);
+ my $areas_info = mySociety::MaPit::get_voting_areas_info($areas);
+ foreach my $id (keys %$areas_info) {
+ my $area_info = $areas_info->{$id};
+ next if $area_info->{country} eq 'S';
+ my $name = $area_info->{name};
+ if ($name eq 'Durham City Council') {
+ $out{$id} = $councils{'Durham City'};
+ delete $councils{'Durham City'};
+ next;
+ #} elsif ($name eq 'Durham County Council') {
+ # $out{$id} = $councils{'Durham County'};
+ # next;
+ }
+ $name =~ s/( (Borough|City|District|County))* Council//;
+ if ($councils{$name} && $councils{$name} =~ /@/) {
+ $out{$id} = $councils{$name};
+ delete $councils{$name};
+ } else {
+ print "Problem: $name $area_info->{country}\n";
+ }
+ }
+}
+
+foreach (sort keys %councils) {
+ print "Not matched: $_ $councils{$_}\n";
+}
+
+# Output emails to canonical CSV
+open(FP, ">$FindBin::Bin/../data/eha.csv");
+foreach (sort keys %out) {
+ print FP "$_," . $out{$_} . "\n";
+}
+close FP;