aboutsummaryrefslogtreecommitdiffstats
path: root/lib/model_patches.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/model_patches.rb')
-rw-r--r--lib/model_patches.rb77
1 files changed, 77 insertions, 0 deletions
diff --git a/lib/model_patches.rb b/lib/model_patches.rb
index 7f3b4de..3ad489e 100644
--- a/lib/model_patches.rb
+++ b/lib/model_patches.rb
@@ -14,4 +14,81 @@ Rails.configuration.to_prepare do
# "If you uncomment this line, this text will appear as default text in every message"
# end
# end
+
+ # Based on alaveteli/app/models/incoming_message.rb, see
+ # https://github.com/mysociety/alaveteli/issues/2662
+ InfoRequest.class_eval do
+ def self.remove_quoted_sections(text, replacement = "FOLDED_QUOTED_SECTION")
+ text = text.dup
+ replacement = "\n" + replacement + "\n"
+
+ # First do this peculiar form of quoting, as the > single line quoting
+ # further below messes with it. Note the carriage return where it wraps -
+ # this can happen anywhere according to length of the name/email. e.g.
+ # >>> D K Elwell <[email address]> 17/03/2008
+ # 01:51:50 >>>
+ # http://www.whatdotheyknow.com/request/71/response/108
+ # http://www.whatdotheyknow.com/request/police_powers_to_inform_car_insu
+ # http://www.whatdotheyknow.com/request/secured_convictions_aided_by_cct
+ multiline_original_message = '(' + '''>>>.* \d\d/\d\d/\d\d\d\d\s+\d\d:\d\d(?::\d\d)?\s*>>>''' + ')'
+ text.gsub!(/^(#{multiline_original_message}\n.*)$/m, replacement)
+
+ # On Thu, Nov 28, 2013 at 9:08 AM, A User
+ # <[1]request-7-skm40s2ls@xxx.xxxx> wrote:
+ text.gsub!(/^( On [^\n]+\n\s*\<[^>\n]+\> (wrote|said):\s*\n.*)$/m, replacement)
+
+ # Single line sections
+ text.gsub!(/^(>.*\n)/, replacement)
+ text.gsub!(/^(On .+ (wrote|said):\n)/, replacement)
+
+ ['-', '_', '*', '#'].each do |scorechar|
+ score = /(?:[#{scorechar}]\s*){8,}/
+ text.sub!(/(Disclaimer\s+)? # appears just before
+ (
+ \s*#{score}\n(?:(?!#{score}\n).)*? # top line
+ (disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+(for\s+|only\s+for\s+use\s+by\s+)the\s+addressee|routinely\s+monitored|MessageLabs|unauthorised\s+use)
+ .*?(?:#{score}|\z) # bottom line OR end of whole string (for ones with no terminator TODO: risky)
+ )
+ /imx, replacement)
+ end
+
+ # Special paragraphs
+ # http://www.whatdotheyknow.com/request/identity_card_scheme_expenditure
+ text.gsub!(/^[^\n]+Government\s+Secure\s+Intranet\s+virus\s+scanning
+ .*?
+ virus\sfree\.
+ /imx, replacement)
+ text.gsub!(/^Communications\s+via\s+the\s+GSi\s+
+ .*?
+ legal\spurposes\.
+ /imx, replacement)
+ # http://www.whatdotheyknow.com/request/net_promoter_value_scores_for_bb
+ text.gsub!(/^http:\/\/www.bbc.co.uk
+ .*?
+ Further\s+communication\s+will\s+signify\s+your\s+consent\s+to\s+this\.
+ /imx, replacement)
+
+
+ # To end of message sections
+ # http://www.whatdotheyknow.com/request/123/response/192
+ # http://www.whatdotheyknow.com/request/235/response/513
+ # http://www.whatdotheyknow.com/request/445/response/743
+ original_message =
+ '(' + '''----* This is a copy of the message, including all the headers. ----*''' +
+ '|' + '''----*\s*Original Message\s*----*''' +
+ '|' + '''----*\s*Forwarded message.+----*''' +
+ '|' + '''----*\s*Forwarded by.+----*''' +
+ ')'
+ # Could have a ^ at start here, but see messed up formatting here:
+ # http://www.whatdotheyknow.com/request/refuse_and_recycling_collection#incoming-842
+ text.gsub!(/(#{original_message}\n.*)$/mi, replacement)
+
+ # Some silly Microsoft XML gets into parts marked as plain text.
+ # e.g. http://www.whatdotheyknow.com/request/are_traffic_wardens_paid_commiss#incoming-401
+ # Don't replace with "replacement" as it's pretty messy
+ text.gsub!(/<\?xml:namespace[^>]*\/>/, " ")
+
+ return text
+ end
+ end
end