diff options
author | Matthew Somerville <matthew@mysociety.org> | 2020-04-03 14:00:41 +0100 |
---|---|---|
committer | Matthew Somerville <matthew@mysociety.org> | 2020-04-03 14:00:41 +0100 |
commit | 9ff04868ea123b8b309f1c5f22493bddff756c3a (patch) | |
tree | 5c3c06733daaccc62e3100bbb5b5bb2a572b3160 | |
parent | 4590344c558a925408499be5a977b1ed94a22b1b (diff) | |
parent | 592d52838f3d1cd1824f6a37e273705112a7011a (diff) |
Merge branch 'improve-fetch-scripts'
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rwxr-xr-x | bin/fetch | 58 | ||||
-rwxr-xr-x | bin/fetch-comments | 27 | ||||
-rwxr-xr-x | bin/fetch-comments-24hs | 39 | ||||
-rwxr-xr-x | bin/fetch-reports | 27 | ||||
-rwxr-xr-x | bin/fetch-reports-24hs | 37 | ||||
-rwxr-xr-x | bin/open311-update-reports | 2 | ||||
-rw-r--r-- | conf/crontab-example | 6 | ||||
-rw-r--r-- | conf/general.yml-example | 7 | ||||
-rw-r--r-- | cpanfile | 1 | ||||
-rw-r--r-- | cpanfile.snapshot | 18 | ||||
-rw-r--r-- | perllib/FixMyStreet.pm | 1 | ||||
-rw-r--r-- | perllib/Open311/GetServiceRequestUpdates.pm | 86 | ||||
-rw-r--r-- | perllib/Open311/GetServiceRequests.pm | 22 | ||||
-rw-r--r-- | t/open311/getservicerequests.t | 42 | ||||
-rw-r--r-- | t/open311/getservicerequestupdates.t | 22 |
16 files changed, 185 insertions, 211 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index ce54dcab0..ae3b4fcd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Refactor Script::Report into an object. - Move summary failures to a separate script. - Add script to export/import body data. + - Add fetch script that does combined job of fetch-comments and fetch-reports. - UK: - Added junction lookup, so you can search for things like "M60, Junction 2" diff --git a/bin/fetch b/bin/fetch new file mode 100755 index 000000000..5608cf195 --- /dev/null +++ b/bin/fetch @@ -0,0 +1,58 @@ +#!/usr/bin/env perl +# +# This script utilises Open311 as described at +# http://wiki.open311.org/GeoReport_v2/#get-service-requests +# and/or the Open311 extension explained at +# https://github.com/mysociety/FixMyStreet/wiki/Open311-FMS---Proposed-differences-to-Open311 +# to fetch service requests or updates on service requests. + +use strict; +use warnings; +use v5.14; + +BEGIN { + use File::Basename qw(dirname); + use File::Spec; + my $d = dirname(File::Spec->rel2abs($0)); + require "$d/../setenv.pl"; +} + +use DateTime; +use Getopt::Long::Descriptive; +use Open311::GetServiceRequests; +use Open311::GetServiceRequestUpdates; + +my ($opts, $usage) = describe_options( + '%c %o', + ['reports', 'fetch reports'], + ['updates', 'fetch updates'], + ['start|s:i', 'start time to use (hours before now), defaults to one (reports) or two (updates)' ], + ['end|e:i', 'end time to use (hours before now), defaults to zero' ], + ['body|b:s', 'body name to only fetch this body' ], + ['verbose|v', 'more verbose output'], + ['help|h', "print usage message and exit" ], +); +$usage->die if $opts->help; + +my %params = ( + verbose => $opts->verbose, + body => $opts->body, +); + +my $dt = DateTime->now(); +if ($opts->start) { + $params{start_date} = $dt->clone->add(hours => -$opts->start); +} +if ($opts->end) { + $params{end_date} = $dt->clone->add(hours => -$opts->end); +} + +if ($opts->reports) { + my $reports = Open311::GetServiceRequests->new(%params); + $reports->fetch; +} + +if ($opts->updates) { + my $updates = Open311::GetServiceRequestUpdates->new(%params); + $updates->fetch; +} diff --git a/bin/fetch-comments b/bin/fetch-comments index 55ba70d85..0fb30f236 100755 --- a/bin/fetch-comments +++ b/bin/fetch-comments @@ -1,25 +1,6 @@ -#!/usr/bin/env perl +#!/bin/bash # -# This script utilises the Open311 extension explained at -# https://github.com/mysociety/FixMyStreet/wiki/Open311-FMS---Proposed-differences-to-Open311 -# to fetch updates on service requests. +# Wrapper to call new script -use strict; -use warnings; -require 5.8.0; - -BEGIN { - use File::Basename qw(dirname); - use File::Spec; - my $d = dirname(File::Spec->rel2abs($0)); - require "$d/../setenv.pl"; -} - -use CronFns; -my ($verbose, $nomail) = CronFns::options(); - -use Open311::GetServiceRequestUpdates; - -my $updates = Open311::GetServiceRequestUpdates->new( verbose => $verbose ); - -$updates->fetch; +DIR="$(cd "$(dirname "$BASH_SOURCE")" && pwd -P)" +$DIR/fetch --updates diff --git a/bin/fetch-comments-24hs b/bin/fetch-comments-24hs index df208439e..86675be4c 100755 --- a/bin/fetch-comments-24hs +++ b/bin/fetch-comments-24hs @@ -1,37 +1,6 @@ -#!/usr/bin/env perl +#!/bin/bash # -# This script utilises the Open311 extension explained at -# https://github.com/mysociety/FixMyStreet/wiki/Open311-FMS---Proposed-differences-to-Open311 -# to fetch updates on service requests from the past 24 hours, to check none -# were missed. +# Wrapper to call new script -use strict; -use warnings; -require 5.8.0; - -BEGIN { - use File::Basename qw(dirname); - use File::Spec; - my $d = dirname(File::Spec->rel2abs($0)); - require "$d/../setenv.pl"; -} - -use DateTime; -use DateTime::Format::W3CDTF; - -use CronFns; -my ($verbose, $nomail) = CronFns::options(); - -use Open311::GetServiceRequestUpdates; - -my $dt = DateTime->now(); -my $dt_24hrs_ago = $dt->clone; -$dt_24hrs_ago->add( hours => -24 ); - -my $updates = Open311::GetServiceRequestUpdates->new( - verbose => 1, - start_date => DateTime::Format::W3CDTF->format_datetime( $dt_24hrs_ago ), - end_date => DateTime::Format::W3CDTF->format_datetime( $dt ) -); - -$updates->fetch; +DIR="$(cd "$(dirname "$BASH_SOURCE")" && pwd -P)" +$DIR/fetch --updates --start 24 diff --git a/bin/fetch-reports b/bin/fetch-reports index 20de10f4b..11bb0c44a 100755 --- a/bin/fetch-reports +++ b/bin/fetch-reports @@ -1,25 +1,6 @@ -#!/usr/bin/env perl +#!/bin/bash # -# This script utilises Open311 as described at -# http://wiki.open311.org/GeoReport_v2/#get-service-requests -# to fetch service requests. +# Wrapper to call new script -use strict; -use warnings; -require 5.8.0; - -BEGIN { - use File::Basename qw(dirname); - use File::Spec; - my $d = dirname(File::Spec->rel2abs($0)); - require "$d/../setenv.pl"; -} - -use CronFns; -my ($verbose, $nomail) = CronFns::options(); - -use Open311::GetServiceRequests; - -my $reports = Open311::GetServiceRequests->new( verbose => $verbose ); - -$reports->fetch; +DIR="$(cd "$(dirname "$BASH_SOURCE")" && pwd -P)" +$DIR/fetch --reports diff --git a/bin/fetch-reports-24hs b/bin/fetch-reports-24hs index ec0eabc2e..ffc1e9e72 100755 --- a/bin/fetch-reports-24hs +++ b/bin/fetch-reports-24hs @@ -1,35 +1,6 @@ -#!/usr/bin/env perl +#!/bin/bash # -# This script utilises Open311 as described at -# http://wiki.open311.org/GeoReport_v2/#get-service-requests -# to fetch service requests. +# Wrapper to call new script -use strict; -use warnings; -require 5.8.0; - -BEGIN { - use File::Basename qw(dirname); - use File::Spec; - my $d = dirname(File::Spec->rel2abs($0)); - require "$d/../setenv.pl"; -} - -use DateTime; - -use CronFns; -my ($verbose, $nomail) = CronFns::options(); - -use Open311::GetServiceRequests; - -my $dt = DateTime->now(); -my $dt_24hrs_ago = $dt->clone; -$dt_24hrs_ago->add( hours => -24 ); - -my $reports = Open311::GetServiceRequests->new( - verbose => 1, - start_date => $dt_24hrs_ago, - end_date => $dt -); - -$reports->fetch; +DIR="$(cd "$(dirname "$BASH_SOURCE")" && pwd -P)" +$DIR/fetch --reports --start 24 diff --git a/bin/open311-update-reports b/bin/open311-update-reports index 378f8e8bc..2d384b813 100755 --- a/bin/open311-update-reports +++ b/bin/open311-update-reports @@ -4,7 +4,7 @@ # (by fetching all reports for a body and looking for updates). If possible, # please use the extension explained at # https://github.com/mysociety/FixMyStreet/wiki/Open311-FMS---Proposed-differences-to-Open311 -# and the fetch-comments/send-comments scripts. +# and the fetch/send-comments scripts. use strict; use warnings; diff --git a/conf/crontab-example b/conf/crontab-example index 2bc51f20a..86fcee80c 100644 --- a/conf/crontab-example +++ b/conf/crontab-example @@ -18,10 +18,10 @@ PATH=/usr/local/bin:/usr/bin:/bin 22,52 * * * * "$FMS/commonlib/bin/run-with-lockfile.sh" -n "$LOCK_DIR/send-questionnaires.lock" "$FMS/bin/send-questionnaires" || echo "stalled?" # If you utilise Open311 and the updates extension, you will need to run these scripts. -# We currently run fetch-comments-24hs once a night to catch anything missed. +# We currently run fetch --updates with a 24hr timespan once a night to catch anything missed. #*/5 * * * * "$FMS/commonlib/bin/run-with-lockfile.sh" -n "$LOCK_DIR/send-comments.lock" "$FMS/bin/send-comments" || echo "stalled?" -#5,10,15,20,25,30,35,40,45,50,55 * * * * "$FMS/commonlib/bin/run-with-lockfile.sh" -n "$LOCK_DIR/fetch-comments.lock" "$FMS/bin/fetch-comments" || echo "stalled?" -#0 1 * * * "$FMS/commonlib/bin/run-with-lockfile.sh" -n "$LOCK_DIR/fetch-comments.lock" "$FMS/bin/fetch-comments-24hs" || echo "stalled?" +#5,10,15,20,25,30,35,40,45,50,55 * * * * "$FMS/commonlib/bin/run-with-lockfile.sh" -n "$LOCK_DIR/fetch-updates.lock" "$FMS/bin/fetch --updates" || echo "stalled?" +#0 1 * * * "$FMS/commonlib/bin/run-with-lockfile.sh" -n "$LOCK_DIR/fetch-updates.lock" "$FMS/bin/fetch --updates --start 24" || echo "stalled?" 47 0-7,9-23 * * * "$FMS/commonlib/bin/run-with-lockfile.sh" -n "$LOCK_DIR/open311-populate-service-list.lock" "$FMS/bin/open311-populate-service-list" || echo "stalled?" 47 8 * * * "$FMS/commonlib/bin/run-with-lockfile.sh" -n "$LOCK_DIR/open311-populate-service-list.lock" "$FMS/bin/open311-populate-service-list --warn" || echo "stalled?" diff --git a/conf/general.yml-example b/conf/general.yml-example index 8e8d0f76a..243d077f0 100644 --- a/conf/general.yml-example +++ b/conf/general.yml-example @@ -261,3 +261,10 @@ LOGIN_REQUIRED: 0 # If you want to stop new users from registering, set this to 1. # NB: This also disables all Facebook/Twitter logins. SIGNUPS_DISABLED: 0 + +# Setting these variable to more than 1 will let fetch-comments run in parallel +# with MIN to MAX children (new children will be added if a child takes longer +# than TIMEOUT on one body). +FETCH_COMMENTS_PROCESSES_MIN: 0 +FETCH_COMMENTS_PROCESSES_MAX: 0 +FETCH_COMMENTS_PROCESS_TIMEOUT: 0 @@ -110,6 +110,7 @@ requires 'Net::OAuth'; requires 'Net::Twitter::Lite::WithAPIv1_1', '0.12008'; requires 'Number::Phone', '3.5000'; requires 'OIDC::Lite'; +requires 'Parallel::ForkManager'; requires 'Path::Class'; requires 'POSIX'; requires 'Readonly'; diff --git a/cpanfile.snapshot b/cpanfile.snapshot index 42c125da6..8c55fec0c 100644 --- a/cpanfile.snapshot +++ b/cpanfile.snapshot @@ -6028,6 +6028,24 @@ DISTRIBUTIONS requirements: ExtUtils::MakeMaker 0 perl 5.008001 + Parallel-ForkManager-2.02 + pathname: Y/YA/YANICK/Parallel-ForkManager-2.02.tar.gz + provides: + Parallel::ForkManager 2.02 + Parallel::ForkManager::Child 2.02 + requirements: + Carp 0 + ExtUtils::MakeMaker 0 + File::Path 0 + File::Spec 0 + File::Temp 0 + Moo 0 + Moo::Role 0 + POSIX 0 + Storable 0 + perl 5.006 + strict 0 + warnings 0 Params-Classify-0.015 pathname: Z/ZE/ZEFRAM/Params-Classify-0.015.tar.gz provides: diff --git a/perllib/FixMyStreet.pm b/perllib/FixMyStreet.pm index f6a69928b..94ba7685f 100644 --- a/perllib/FixMyStreet.pm +++ b/perllib/FixMyStreet.pm @@ -154,6 +154,7 @@ sub dbic_connect_info { my $dbi_args = { AutoCommit => 1, + AutoInactiveDestroy => 1, }; my $local_time_zone = local_time_zone(); my $dbic_args = { diff --git a/perllib/Open311/GetServiceRequestUpdates.pm b/perllib/Open311/GetServiceRequestUpdates.pm index da3461f6e..09b1f6b26 100644 --- a/perllib/Open311/GetServiceRequestUpdates.pm +++ b/perllib/Open311/GetServiceRequestUpdates.pm @@ -2,6 +2,7 @@ package Open311::GetServiceRequestUpdates; use Moo; use Open311; +use Parallel::ForkManager; use FixMyStreet::DB; use FixMyStreet::App::Model::PhotoSet; use DateTime::Format::W3CDTF; @@ -37,7 +38,29 @@ sub fetch { $bodies = $bodies->search( { name => $self->body } ); } + my $procs_min = FixMyStreet->config('FETCH_COMMENTS_PROCESSES_MIN') || 0; + my $procs_max = FixMyStreet->config('FETCH_COMMENTS_PROCESSES_MAX'); + my $procs_timeout = FixMyStreet->config('FETCH_COMMENTS_PROCESS_TIMEOUT'); + + my $pm = Parallel::ForkManager->new(FixMyStreet->test_mode ? 0 : $procs_min); + + if ($procs_max && $procs_timeout) { + my %workers; + $pm->run_on_wait(sub { + while (my ($pid, $started_at) = each %workers) { + next unless time() - $started_at > $procs_timeout; + next if $pm->max_procs == $procs_max; + $pm->set_max_procs($pm->max_procs + 1); + delete $workers{$pid}; # Only want to increase once per long-running thing + } + }, 1); + $pm->run_on_start(sub { my $pid = shift; $workers{$pid} = time(); }); + $pm->run_on_finish(sub { my $pid = shift; delete $workers{$pid}; }); + } + while ( my $body = $bodies->next ) { + $pm->start and next; + $self->current_body( $body ); my %open311_conf = ( @@ -57,34 +80,46 @@ sub fetch { $self->blank_updates_permitted( $body->blank_updates_permitted ); $self->system_user( $body->comment_user ); $self->process_body(); + + $pm->finish; } + + $pm->wait_all_children; } -sub process_body { +sub parse_dates { my $self = shift; - - my $open311 = $self->current_open311; my $body = $self->current_body; my @args = (); - if ( $self->start_date || $self->end_date ) { - return 0 unless $self->start_date && $self->end_date; + my $dt = DateTime->now(); + # Oxfordshire uses local time and not UTC for dates + FixMyStreet->set_time_zone($dt) if $body->areas->{$AREA_ID_OXFORDSHIRE}; - push @args, $self->start_date; - push @args, $self->end_date; # default to asking for last 2 hours worth if not Bromley + if ($self->start_date) { + push @args, DateTime::Format::W3CDTF->format_datetime( $self->start_date ); } elsif ( ! $body->areas->{$AREA_ID_BROMLEY} ) { - my $end_dt = DateTime->now(); - # Oxfordshire uses local time and not UTC for dates - FixMyStreet->set_time_zone($end_dt) if ( $body->areas->{$AREA_ID_OXFORDSHIRE} ); - my $start_dt = $end_dt->clone; - $start_dt->add( hours => -2 ); - + my $start_dt = $dt->clone->add( hours => -2 ); push @args, DateTime::Format::W3CDTF->format_datetime( $start_dt ); - push @args, DateTime::Format::W3CDTF->format_datetime( $end_dt ); } + if ($self->end_date) { + push @args, DateTime::Format::W3CDTF->format_datetime( $self->end_date ); + } elsif ( ! $body->areas->{$AREA_ID_BROMLEY} ) { + push @args, DateTime::Format::W3CDTF->format_datetime( $dt ); + } + + return @args; +} + +sub process_body { + my $self = shift; + + my $open311 = $self->current_open311; + my $body = $self->current_body; + my @args = $self->parse_dates; my $requests = $open311->get_service_request_updates( @args ); unless ( $open311->success ) { @@ -106,16 +141,9 @@ sub process_body { return 1; } -sub find_problem { +sub check_date { my ($self, $request, @args) = @_; - my $body = $self->current_body; - my $request_id = $request->{service_request_id}; - - # If there's no request id then we can't work out - # what problem it belongs to so just skip - return unless $request_id || $request->{fixmystreet_id}; - my $comment_time = eval { DateTime::Format::W3CDTF->parse_datetime( $request->{updated_datetime} || "" ) ->set_time_zone(FixMyStreet->local_time_zone); @@ -124,6 +152,20 @@ sub find_problem { my $updated = DateTime::Format::W3CDTF->format_datetime($comment_time->clone->set_time_zone('UTC')); return if @args && ($updated lt $args[0] || $updated gt $args[1]); $request->{comment_time} = $comment_time; + return 1; +} + +sub find_problem { + my ($self, $request, @args) = @_; + + $self->check_date($request, @args) or return; + + my $body = $self->current_body; + my $request_id = $request->{service_request_id}; + + # If there's no request id then we can't work out + # what problem it belongs to so just skip + return unless $request_id || $request->{fixmystreet_id}; my $problem; my $criteria = { diff --git a/perllib/Open311/GetServiceRequests.pm b/perllib/Open311/GetServiceRequests.pm index a9ec88a70..e5fd6438e 100644 --- a/perllib/Open311/GetServiceRequests.pm +++ b/perllib/Open311/GetServiceRequests.pm @@ -10,6 +10,7 @@ use DateTime::Format::W3CDTF; has system_user => ( is => 'rw' ); has start_date => ( is => 'ro', default => sub { undef } ); has end_date => ( is => 'ro', default => sub { undef } ); +has body => ( is => 'ro', default => sub { undef } ); has fetch_all => ( is => 'rw', default => 0 ); has verbose => ( is => 'ro', default => 0 ); has schema => ( is =>'ro', lazy => 1, default => sub { FixMyStreet::DB->schema->connect } ); @@ -27,6 +28,10 @@ sub fetch { } ); + if ( $self->body ) { + $bodies = $bodies->search( { name => $self->body } ); + } + while ( my $body = $bodies->next ) { my $o = $self->create_open311_object( $body ); @@ -55,18 +60,17 @@ sub create_problems { my $args = {}; - if ( $self->start_date || $self->end_date ) { - return 0 unless $self->start_date && $self->end_date; - + my $dt = DateTime->now(); + if ($self->start_date) { $args->{start_date} = DateTime::Format::W3CDTF->format_datetime( $self->start_date ); - $args->{end_date} = DateTime::Format::W3CDTF->format_datetime( $self->end_date ); } elsif ( !$self->fetch_all ) { - my $end_dt = DateTime->now(); - my $start_dt = $end_dt->clone; - $start_dt->add( hours => -1 ); + $args->{start_date} = DateTime::Format::W3CDTF->format_datetime( $dt->clone->add(hours => -1) ); + } - $args->{start_date} = DateTime::Format::W3CDTF->format_datetime( $start_dt ); - $args->{end_date} = DateTime::Format::W3CDTF->format_datetime( $end_dt ); + if ($self->end_date) { + $args->{end_date} = DateTime::Format::W3CDTF->format_datetime( $self->end_date ); + } elsif ( !$self->fetch_all ) { + $args->{end_date} = DateTime::Format::W3CDTF->format_datetime( $dt ); } my $requests = $open311->get_service_requests( $args ); diff --git a/t/open311/getservicerequests.t b/t/open311/getservicerequests.t index c44be5099..672459f3f 100644 --- a/t/open311/getservicerequests.t +++ b/t/open311/getservicerequests.t @@ -211,48 +211,6 @@ my $date = DateTime->new( for my $test ( { - start_date => '1', - end_date => '', - desc => 'do not process if only a start_date', - subs => {}, - }, - { - start_date => '', - end_date => '1', - desc => 'do not process if only an end_date', - subs => {}, - }, -) { - subtest $test->{desc} => sub { - my $xml = prepare_xml( $test->{subs} ); - my $o = Open311->new( - jurisdiction => 'mysociety', - endpoint => 'http://example.com', - test_mode => 1, - test_get_returns => { 'requests.xml' => $xml} - ); - - my $update = Open311::GetServiceRequests->new( - start_date => $test->{start_date}, - end_date => $test->{end_date}, - system_user => $user, - ); - my $ret = $update->create_problems( $o, $body ); - - is $ret, 0, 'failed correctly' - }; -} - -$date = DateTime->new( - year => 2010, - month => 4, - day => 14, - hour => 6, - minute => 37 -); - -for my $test ( - { start_date => $date->clone->add(hours => -2), end_date => $date->clone->add(hours => -1), desc => 'do not process if update time after end_date', diff --git a/t/open311/getservicerequestupdates.t b/t/open311/getservicerequestupdates.t index 67a74e133..07c3b4cdd 100644 --- a/t/open311/getservicerequestupdates.t +++ b/t/open311/getservicerequestupdates.t @@ -683,31 +683,13 @@ subtest 'using start and end date' => sub { my $local_requests_xml = $requests_xml; my $o = Open311->new( jurisdiction => 'mysociety', endpoint => 'http://example.com', test_mode => 1, test_get_returns => { 'servicerequestupdates.xml' => $local_requests_xml } ); - my $start_dt = DateTime->now(); + my $start_dt = DateTime->now(formatter => DateTime::Format::W3CDTF->new); + my $end_dt = $start_dt->clone; $start_dt->subtract( days => 1 ); - my $end_dt = DateTime->now(); my $update = Open311::GetServiceRequestUpdates->new( system_user => $user, start_date => $start_dt, - current_open311 => $o, - ); - - my $res = $update->process_body; - is $res, 0, 'returns 0 if start but no end date'; - - $update = Open311::GetServiceRequestUpdates->new( - system_user => $user, - end_date => $end_dt, - current_open311 => $o, - ); - - $res = $update->process_body; - is $res, 0, 'returns 0 if end but no start date'; - - $update = Open311::GetServiceRequestUpdates->new( - system_user => $user, - start_date => $start_dt, end_date => $end_dt, current_open311 => $o, current_body => $bodies{2482}, |