aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/incoming_message.rb42
-rw-r--r--todo.txt54
2 files changed, 62 insertions, 34 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 2f1184687..df06e35c1 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -18,7 +18,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: incoming_message.rb,v 1.105 2008-05-27 08:56:27 francis Exp $
+# $Id: incoming_message.rb,v 1.106 2008-05-29 20:06:05 francis Exp $
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
@@ -229,6 +229,14 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what
# http://www.whatdotheyknow.com/request/university_investment_in_the_arm
text.gsub!(/^#{name}[^\n]+\nSent by:[^\n]+\n.*/ims, "\n\n" + replacement)
+ # Some other sort of forwarding quoting
+ # http://www.whatdotheyknow.com/request/224/response/326
+ text.gsub!(/^#{name}[^\n]+\n[0-9\/:\s]+\s+To\s+FOI requests at.*/ims, "\n\n" + replacement)
+
+ # http://www.whatdotheyknow.com/request/how_do_the_pct_deal_with_retirin_33#incoming-930
+ # http://www.whatdotheyknow.com/request/229/response/809
+ text.gsub!(/^From: [^\n]+\nSent: [^\n]+\nTo:\s+['"?]#{name}['"]?\nSubject:.*/ims, "\n\n" + replacement)
+
return text
end
@@ -273,10 +281,12 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what
# http://www.whatdotheyknow.com/request/47/response/283
# http://www.whatdotheyknow.com/request/30/response/166
# http://www.whatdotheyknow.com/request/52/response/238
- ['-', '_', '*'].each do |score|
+ # http://www.whatdotheyknow.com/request/224/response/328 # example with * * * * *
+ # http://www.whatdotheyknow.com/request/297/response/506
+ ['-', '_', '*', '* ', '#'].each do |score|
text.gsub!(/(Disclaimer\s+)? # appears just before
(\s*[#{score}]{8,}\s*\n.*? # top line ------------
- (disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+for\s+the\s+addressee)
+ (disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+(for\s+|only\s+for\s+use\s+by\s+)the\s+addressee|routinely\s+monitored\s|MessageLabs)
.*?[#{score}]{8,}\s*\n) # bottom line -----------
/imx, replacement)
end
@@ -300,13 +310,23 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what
# To end of message sections
# http://www.whatdotheyknow.com/request/123/response/192
+ # http://www.whatdotheyknow.com/request/235/response/513
+ # http://www.whatdotheyknow.com/request/445/response/743
original_message =
- '(' + '''------ This is a copy of the message, including all the headers. ------''' +
- '|' + '''-----*\s*Original Message\s*-----*''' +
- '|' + '''-----*\s*Forwarded message.+-----*''' +
- '|' + '''-----*\s*Forwarded by.+-----*''' +
+ '(' + '''----* This is a copy of the message, including all the headers. ----*''' +
+ '|' + '''----*\s*Original Message\s*----*''' +
+ '|' + '''----*\s*Forwarded message.+----*''' +
+ '|' + '''----*\s*Forwarded by.+----*''' +
')'
- text.gsub!(/^(#{original_message}\n.*)$/m, replacement)
+ # Could have a ^ at start here, but see messed up formatting here:
+ # http://www.whatdotheyknow.com/request/refuse_and_recycling_collection#incoming-842
+ text.gsub!(/(#{original_message}\n.*)$/mi, replacement)
+
+
+ # Some silly Microsoft XML gets into parts marked as plain text.
+ # e.g. http://www.whatdotheyknow.com/request/are_traffic_wardens_paid_commiss#incoming-401
+ # Don't replace with "replacement" as it's pretty messy
+ text.gsub!(/<\?xml:namespace[^>]*\/>/, " ")
return text
end
@@ -394,6 +414,10 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what
# Charset conversion, turn everything into UTF-8
if not text_charset.nil?
begin
+ # XXX specially convert unicode pound signs, was needed here
+ # http://www.whatdotheyknow.com/request/88/response/352
+ text.gsub!("£", Iconv.conv(text_charset, 'utf-8', '£'))
+ # Try proper conversion
text = Iconv.conv('utf-8', text_charset, text)
rescue Iconv::IllegalSequence, Iconv::InvalidEncoding
# Clearly specified charset was nonsense
@@ -538,7 +562,7 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what
text = "[Subject only] " + CGI.escapeHTML(self.mail.subject) + text
end
# and display link for quoted stuff
- text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1">show quoted sections</a></span>' + "\n")
+ text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1">show quoted sections</a></span>' + "\n\n")
else
if folded_quoted_text.include?('FOLDED_QUOTED_SECTION')
text = text + "\n\n" + '<span class="unfold_link"><a href="?">hide quoted sections</a></span>'
diff --git a/todo.txt b/todo.txt
index 140045721..5e98439c8 100644
--- a/todo.txt
+++ b/todo.txt
@@ -35,7 +35,7 @@ Next
Advertise WDTK search queries on TWFY
Advertise alerts on end pages with WDTK
-Do up the EIR about text
+Internal review marker
Have a bash at quoted text fixing up
Antispam on contact form
@@ -188,39 +188,43 @@ Remove request email address from PDFs (we already do from docs)
to text/HTML?
Quoting fixing TODO:
+ #Footer onesided:
http://www.whatdotheyknow.com/request/55/response/96
- http://www.whatdotheyknow.com/request/m3_junction_2_eastbound_speed_re
- http://www.whatdotheyknow.com/request/taxi_ranking
+ http://www.whatdotheyknow.com/request/320/response/605 (vcf)
+ http://www.whatdotheyknow.com/request/268/response/929
http://www.whatdotheyknow.com/request/94/response/161
- http://www.whatdotheyknow.com/request/sale_of_public_land_in_worcester
- http://www.whatdotheyknow.com/request/35/response/191
- http://www.whatdotheyknow.com/request/88/response/352 (and charset)
- http://www.whatdotheyknow.com/request/51/response/93
- http://www.whatdotheyknow.com/request/56/response/252
http://www.whatdotheyknow.com/request/298/response/415
- http://www.whatdotheyknow.com/request/119/response/447 # remove double blank lines
- http://www.whatdotheyknow.com/request/council_tax_payments
- http://www.whatdotheyknow.com/request/56/response/94
- http://www.whatdotheyknow.com/request/83/response/212 # broken highlighted link
http://www.whatdotheyknow.com/request/234/response/342
- http://www.whatdotheyknow.com/request/297/response/506
http://www.whatdotheyknow.com/request/297/response/449
- http://www.whatdotheyknow.com/request/289/response/419
- http://www.whatdotheyknow.com/request/235/response/513
+ http://www.whatdotheyknow.com/request/40/response/163 - Disclaimer
+ http://www.whatdotheyknow.com/request/35/response/191
+ http://www.whatdotheyknow.com/request/268/response/405
+
+ http://www.whatdotheyknow.com/request/35/response/191 # Funny disclaimer
+
+ http://www.whatdotheyknow.com/request/m3_junction_2_eastbound_speed_re # cut here
+ http://www.whatdotheyknow.com/request/123/response/184 # nasty nasty quoting of message
+ http://www.whatdotheyknow.com/request/155/response/552
+ http://www.whatdotheyknow.com/request/51/response/93 # tough quoting with <
+ http://www.whatdotheyknow.com/request/265/response/688 - word wrapping of <
+
+ Done:
+ http://www.whatdotheyknow.com/request/229/response/809
+ http://www.whatdotheyknow.com/request/576/response/812
http://www.whatdotheyknow.com/request/299/response/484
- http://www.whatdotheyknow.com/request/320/response/605 (encoding, vcf)
- http://www.whatdotheyknow.com/request/118/response/600 (encoding)
+ http://www.whatdotheyknow.com/request/235/response/513
+ http://www.whatdotheyknow.com/request/88/response/352 (and charset)
http://www.whatdotheyknow.com/request/180/response/482 (charset)
http://www.whatdotheyknow.com/request/305/response/767 (charset)
+ http://www.whatdotheyknow.com/request/118/response/600 (encoding)
+ http://www.whatdotheyknow.com/request/83/response/212 # broken highlighted link
+ http://www.whatdotheyknow.com/request/sale_of_public_land_in_worcester
+ http://www.whatdotheyknow.com/request/council_tax_payments
+ http://www.whatdotheyknow.com/request/297/response/506
+ http://www.whatdotheyknow.com/request/289/response/419
http://www.whatdotheyknow.com/request/235/response/513
- http://www.whatdotheyknow.com/request/40/response/163 - Disclaimer
- http://www.whatdotheyknow.com/request/265/response/688 - word wrapping
- http://www.whatdotheyknow.com/request/576/response/812
- http://www.whatdotheyknow.com/request/35/response/191
- http://www.whatdotheyknow.com/request/229/response/809
- http://www.whatdotheyknow.com/request/268/response/405
- http://www.whatdotheyknow.com/request/268/response/929 - £ signs
-
+ http://www.whatdotheyknow.com/request/sale_of_land_to_fairview_homes_m#incoming-552 # nicer with * * *
+
Sources of public bodies
========================