diff options
-rw-r--r-- | app/models/incoming_message.rb | 42 | ||||
-rw-r--r-- | todo.txt | 54 |
2 files changed, 62 insertions, 34 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 2f1184687..df06e35c1 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -18,7 +18,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.105 2008-05-27 08:56:27 francis Exp $ +# $Id: incoming_message.rb,v 1.106 2008-05-29 20:06:05 francis Exp $ # TODO # Move some of the (e.g. quoting) functions here into rblib, as they feel @@ -229,6 +229,14 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what # http://www.whatdotheyknow.com/request/university_investment_in_the_arm text.gsub!(/^#{name}[^\n]+\nSent by:[^\n]+\n.*/ims, "\n\n" + replacement) + # Some other sort of forwarding quoting + # http://www.whatdotheyknow.com/request/224/response/326 + text.gsub!(/^#{name}[^\n]+\n[0-9\/:\s]+\s+To\s+FOI requests at.*/ims, "\n\n" + replacement) + + # http://www.whatdotheyknow.com/request/how_do_the_pct_deal_with_retirin_33#incoming-930 + # http://www.whatdotheyknow.com/request/229/response/809 + text.gsub!(/^From: [^\n]+\nSent: [^\n]+\nTo:\s+['"?]#{name}['"]?\nSubject:.*/ims, "\n\n" + replacement) + return text end @@ -273,10 +281,12 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what # http://www.whatdotheyknow.com/request/47/response/283 # http://www.whatdotheyknow.com/request/30/response/166 # http://www.whatdotheyknow.com/request/52/response/238 - ['-', '_', '*'].each do |score| + # http://www.whatdotheyknow.com/request/224/response/328 # example with * * * * * + # http://www.whatdotheyknow.com/request/297/response/506 + ['-', '_', '*', '* ', '#'].each do |score| text.gsub!(/(Disclaimer\s+)? # appears just before (\s*[#{score}]{8,}\s*\n.*? # top line ------------ - (disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+for\s+the\s+addressee) + (disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+(for\s+|only\s+for\s+use\s+by\s+)the\s+addressee|routinely\s+monitored\s|MessageLabs) .*?[#{score}]{8,}\s*\n) # bottom line ----------- /imx, replacement) end @@ -300,13 +310,23 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what # To end of message sections # http://www.whatdotheyknow.com/request/123/response/192 + # http://www.whatdotheyknow.com/request/235/response/513 + # http://www.whatdotheyknow.com/request/445/response/743 original_message = - '(' + '''------ This is a copy of the message, including all the headers. ------''' + - '|' + '''-----*\s*Original Message\s*-----*''' + - '|' + '''-----*\s*Forwarded message.+-----*''' + - '|' + '''-----*\s*Forwarded by.+-----*''' + + '(' + '''----* This is a copy of the message, including all the headers. ----*''' + + '|' + '''----*\s*Original Message\s*----*''' + + '|' + '''----*\s*Forwarded message.+----*''' + + '|' + '''----*\s*Forwarded by.+----*''' + ')' - text.gsub!(/^(#{original_message}\n.*)$/m, replacement) + # Could have a ^ at start here, but see messed up formatting here: + # http://www.whatdotheyknow.com/request/refuse_and_recycling_collection#incoming-842 + text.gsub!(/(#{original_message}\n.*)$/mi, replacement) + + + # Some silly Microsoft XML gets into parts marked as plain text. + # e.g. http://www.whatdotheyknow.com/request/are_traffic_wardens_paid_commiss#incoming-401 + # Don't replace with "replacement" as it's pretty messy + text.gsub!(/<\?xml:namespace[^>]*\/>/, " ") return text end @@ -394,6 +414,10 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what # Charset conversion, turn everything into UTF-8 if not text_charset.nil? begin + # XXX specially convert unicode pound signs, was needed here + # http://www.whatdotheyknow.com/request/88/response/352 + text.gsub!("£", Iconv.conv(text_charset, 'utf-8', '£')) + # Try proper conversion text = Iconv.conv('utf-8', text_charset, text) rescue Iconv::IllegalSequence, Iconv::InvalidEncoding # Clearly specified charset was nonsense @@ -538,7 +562,7 @@ text = IncomingMessage.mask_string_multicharset(text, 'request-144-a724c835@what text = "[Subject only] " + CGI.escapeHTML(self.mail.subject) + text end # and display link for quoted stuff - text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1">show quoted sections</a></span>' + "\n") + text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1">show quoted sections</a></span>' + "\n\n") else if folded_quoted_text.include?('FOLDED_QUOTED_SECTION') text = text + "\n\n" + '<span class="unfold_link"><a href="?">hide quoted sections</a></span>' @@ -35,7 +35,7 @@ Next Advertise WDTK search queries on TWFY Advertise alerts on end pages with WDTK -Do up the EIR about text +Internal review marker Have a bash at quoted text fixing up Antispam on contact form @@ -188,39 +188,43 @@ Remove request email address from PDFs (we already do from docs) to text/HTML? Quoting fixing TODO: + #Footer onesided: http://www.whatdotheyknow.com/request/55/response/96 - http://www.whatdotheyknow.com/request/m3_junction_2_eastbound_speed_re - http://www.whatdotheyknow.com/request/taxi_ranking + http://www.whatdotheyknow.com/request/320/response/605 (vcf) + http://www.whatdotheyknow.com/request/268/response/929 http://www.whatdotheyknow.com/request/94/response/161 - http://www.whatdotheyknow.com/request/sale_of_public_land_in_worcester - http://www.whatdotheyknow.com/request/35/response/191 - http://www.whatdotheyknow.com/request/88/response/352 (and charset) - http://www.whatdotheyknow.com/request/51/response/93 - http://www.whatdotheyknow.com/request/56/response/252 http://www.whatdotheyknow.com/request/298/response/415 - http://www.whatdotheyknow.com/request/119/response/447 # remove double blank lines - http://www.whatdotheyknow.com/request/council_tax_payments - http://www.whatdotheyknow.com/request/56/response/94 - http://www.whatdotheyknow.com/request/83/response/212 # broken highlighted link http://www.whatdotheyknow.com/request/234/response/342 - http://www.whatdotheyknow.com/request/297/response/506 http://www.whatdotheyknow.com/request/297/response/449 - http://www.whatdotheyknow.com/request/289/response/419 - http://www.whatdotheyknow.com/request/235/response/513 + http://www.whatdotheyknow.com/request/40/response/163 - Disclaimer + http://www.whatdotheyknow.com/request/35/response/191 + http://www.whatdotheyknow.com/request/268/response/405 + + http://www.whatdotheyknow.com/request/35/response/191 # Funny disclaimer + + http://www.whatdotheyknow.com/request/m3_junction_2_eastbound_speed_re # cut here + http://www.whatdotheyknow.com/request/123/response/184 # nasty nasty quoting of message + http://www.whatdotheyknow.com/request/155/response/552 + http://www.whatdotheyknow.com/request/51/response/93 # tough quoting with < + http://www.whatdotheyknow.com/request/265/response/688 - word wrapping of < + + Done: + http://www.whatdotheyknow.com/request/229/response/809 + http://www.whatdotheyknow.com/request/576/response/812 http://www.whatdotheyknow.com/request/299/response/484 - http://www.whatdotheyknow.com/request/320/response/605 (encoding, vcf) - http://www.whatdotheyknow.com/request/118/response/600 (encoding) + http://www.whatdotheyknow.com/request/235/response/513 + http://www.whatdotheyknow.com/request/88/response/352 (and charset) http://www.whatdotheyknow.com/request/180/response/482 (charset) http://www.whatdotheyknow.com/request/305/response/767 (charset) + http://www.whatdotheyknow.com/request/118/response/600 (encoding) + http://www.whatdotheyknow.com/request/83/response/212 # broken highlighted link + http://www.whatdotheyknow.com/request/sale_of_public_land_in_worcester + http://www.whatdotheyknow.com/request/council_tax_payments + http://www.whatdotheyknow.com/request/297/response/506 + http://www.whatdotheyknow.com/request/289/response/419 http://www.whatdotheyknow.com/request/235/response/513 - http://www.whatdotheyknow.com/request/40/response/163 - Disclaimer - http://www.whatdotheyknow.com/request/265/response/688 - word wrapping - http://www.whatdotheyknow.com/request/576/response/812 - http://www.whatdotheyknow.com/request/35/response/191 - http://www.whatdotheyknow.com/request/229/response/809 - http://www.whatdotheyknow.com/request/268/response/405 - http://www.whatdotheyknow.com/request/268/response/929 - £ signs - + http://www.whatdotheyknow.com/request/sale_of_land_to_fairview_homes_m#incoming-552 # nicer with * * * + Sources of public bodies ======================== |