diff options
Diffstat (limited to 'app/models/incoming_message.rb')
-rw-r--r-- | app/models/incoming_message.rb | 34 |
1 files changed, 24 insertions, 10 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 581c73f8b..16ae38b92 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -255,7 +255,8 @@ class FOIAttachment text = CGI.escapeHTML(text) text = MySociety::Format.make_clickable(text) html = text.gsub(/\n/, '<br>') - return "<html><head></head><body>" + html + "</body></html>", wrapper_id + return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id end # the extractions will also produce image files, which go in the @@ -431,7 +432,7 @@ class IncomingMessage < ActiveRecord::Base text.gsub!(self.info_request.public_body.request_email, "[" + self.info_request.public_body.short_or_long_name + " request email]") end text.gsub!(self.info_request.incoming_email, "[FOI #" + self.info_request.id.to_s + " email]") - text.gsub!(MySociety::Config.get("CONTACT_EMAIL", 'contact@localhost'), "[WhatDoTheyKnow contact email]") + text.gsub!(MySociety::Config.get("CONTACT_EMAIL", 'contact@localhost'), "[#{MySociety::Config.get('SITE_NAME', 'Alaveteli')} contact email]") end # Replaces all email addresses in (possibly binary data) with equal length alternative ones. @@ -461,11 +462,23 @@ class IncomingMessage < ActiveRecord::Base if censored_uncompressed_text != uncompressed_text # then use the altered file (recompressed) recompressed_text = nil - IO.popen("/usr/bin/pdftk - output - compress", "r+") do |child| + if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true + command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -" + else + command = "/usr/bin/pdftk - output - compress" + end + IO.popen(command, "r+") do |child| child.write(censored_uncompressed_text) child.close_write() recompressed_text = child.read() end + if recompressed_text.nil? || recompressed_text.empty? + # buggy versions of pdftk sometimes fail on + # compression, I don't see it's a disaster in + # these cases to save an uncompressed version? + recompressed_text = censored_uncompressed_text + logger.warn "Unable to compress PDF; problem with your pdftk version?" + end if !recompressed_text.nil? && !recompressed_text.empty? text[0..-1] = recompressed_text # [0..-1] makes it change the 'text' string in place end @@ -557,7 +570,8 @@ class IncomingMessage < ActiveRecord::Base text.gsub!(/(Complaints and Corporate Affairs Officer)\s+Westminster Primary Care Trust.+/ms, "\\1") # Remove WhatDoTheyKnow signup links - text.gsub!(/http:\/\/www.whatdotheyknow.com\/c\/[^\s]+/, "[WDTK login link]") + domain = MySociety::Config.get('DOMAIN') + text.gsub!(/http:\/\/#{domain}\/c\/[^\s]+/, "[WDTK login link]") # Remove Home Office survey links # e.g. http://www.whatdotheyknow.com/request/serious_crime_act_2007_section_7#incoming-12650 @@ -850,7 +864,9 @@ class IncomingMessage < ActiveRecord::Base text = Iconv.conv('utf-8', 'windows-1252', text) rescue Iconv::IllegalSequence # Text looks like unlabelled nonsense, strip out anything that isn't UTF-8 - text = Iconv.conv('utf-8//IGNORE', 'utf-8', text) + "\n\n[ WhatDoTheyKnow note: The above text was badly encoded, and has had strange characters removed. ]" + text = Iconv.conv('utf-8//IGNORE', 'utf-8', text) + + _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", + :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli')) end end end @@ -972,7 +988,6 @@ class IncomingMessage < ActiveRecord::Base attachment.filename = _get_censored_part_file_name(leaf) if leaf.within_rfc822_attachment attachment.within_rfc822_subject = leaf.within_rfc822_attachment.subject - # Test to see if we are in the first part of the attached # RFC822 message and it is text, if so add headers. # XXX should probably use hunting algorithm to find main text part, rather than @@ -1119,8 +1134,8 @@ class IncomingMessage < ActiveRecord::Base external_command("/usr/bin/catdoc", tempfile.path, :append_to => text) elsif content_type == 'text/html' # lynx wordwraps links in its output, which then don't get formatted properly - # by WhatDoTheyKnow. We use elinks instead, which doesn't do that. - external_command("/usr/bin/elinks", "-dump-charset", "utf-8", "-force-html", "-dump", + # by Alaveteli. We use elinks instead, which doesn't do that. + external_command("/usr/bin/elinks", "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump", tempfile.path, :append_to => text) elsif content_type == 'application/vnd.ms-excel' # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and @@ -1237,9 +1252,8 @@ class IncomingMessage < ActiveRecord::Base info_request_event.track_things_sent_emails.each { |a| a.destroy } info_request_event.user_info_request_sent_alerts.each { |a| a.destroy } info_request_event.destroy - raw_email = self.raw_email + self.raw_email.destroy_file_representation! self.destroy - self.raw_email.destroy end end |