aboutsummaryrefslogtreecommitdiffstats
path: root/perllib/Utils.pm
diff options
context:
space:
mode:
authorStruan Donald <struan@exo.org.uk>2011-05-20 10:42:47 +0100
committerStruan Donald <struan@exo.org.uk>2011-05-20 10:42:47 +0100
commitc81d9ee2c13b5430be1beb4210d10e1cbb31c194 (patch)
tree1f10c799bdc4a71ddfb27aca1765b47737fa6b08 /perllib/Utils.pm
parent0aff085b3b0b1323b6fd4d1cdf3f2b682f4b2d94 (diff)
move trim_text and cleanup_text to Utils
Diffstat (limited to 'perllib/Utils.pm')
-rw-r--r--perllib/Utils.pm70
1 files changed, 70 insertions, 0 deletions
diff --git a/perllib/Utils.pm b/perllib/Utils.pm
index c16a02cd4..c267bbea0 100644
--- a/perllib/Utils.pm
+++ b/perllib/Utils.pm
@@ -136,4 +136,74 @@ sub london_categories {
};
}
+=head2 trim_text
+
+ my $text = trim_text( $text_to_trim );
+
+Strip leading and trailing white space from a string. Also reduces all
+white space to a single space.
+
+Trim
+
+=cut
+
+sub trim_text {
+ my $input = shift;
+ for ($input) {
+ last unless $_;
+ s{\s+}{ }g; # all whitespace to single space
+ s{^ }{}; # trim leading
+ s{ $}{}; # trim trailing
+ }
+ return $input;
+}
+
+
+=head2 cleanup_text
+
+Tidy up text including removing contentious phrases,
+SHOUTING and new lines and adding sentence casing. Takes an optional HASHREF
+of args as follows.
+
+=over
+
+=item allow_multiline
+
+Do not flatten down to a single line if true.
+
+=back
+
+=cut
+
+sub cleanup_text {
+ my $input = shift || '';
+ my $args = shift || {};
+
+ # lowercase everything if looks like it might be SHOUTING
+ $input = lc $input if $input !~ /[a-z]/;
+
+ # clean up language and tradmarks
+ for ($input) {
+
+ # shit -> poo
+ s{\bdog\s*shit\b}{dog poo}ig;
+
+ # 'portakabin' to '[portable cabin]' (and variations)
+ s{\b(porta)\s*([ck]abin|loo)\b}{[$1ble $2]}ig;
+ s{kabin\]}{cabin\]}ig;
+ }
+
+ # Remove unneeded whitespace
+ my @lines = grep { m/\S/ } split m/\n\n/, $input;
+ for (@lines) {
+ $_ = trim_text($_);
+ $_ = ucfirst $_; # start with capital
+ }
+
+ my $join_char = $args->{allow_multiline} ? "\n\n" : " ";
+ $input = join $join_char, @lines;
+
+ return $input;
+}
+
1;