diff options
Diffstat (limited to 'app/models')
-rw-r--r-- | app/models/about_me_validator.rb | 2 | ||||
-rw-r--r-- | app/models/censor_rule.rb | 8 | ||||
-rw-r--r-- | app/models/change_email_validator.rb | 9 | ||||
-rw-r--r-- | app/models/comment.rb | 7 | ||||
-rw-r--r-- | app/models/exim_log.rb | 6 | ||||
-rw-r--r-- | app/models/foi_attachment.rb | 56 | ||||
-rw-r--r-- | app/models/holiday.rb | 6 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 452 | ||||
-rw-r--r-- | app/models/info_request.rb | 15 | ||||
-rw-r--r-- | app/models/info_request_event.rb | 57 | ||||
-rw-r--r-- | app/models/outgoing_message.rb | 6 | ||||
-rw-r--r-- | app/models/post_redirect.rb | 8 | ||||
-rw-r--r-- | app/models/profile_photo.rb | 6 | ||||
-rw-r--r-- | app/models/public_body.rb | 4 | ||||
-rw-r--r-- | app/models/raw_email.rb | 35 | ||||
-rw-r--r-- | app/models/request_mailer.rb | 13 | ||||
-rw-r--r-- | app/models/track_thing.rb | 27 | ||||
-rw-r--r-- | app/models/track_things_sent_email.rb | 12 | ||||
-rw-r--r-- | app/models/user.rb | 31 | ||||
-rw-r--r-- | app/models/user_info_request_sent_alert.rb | 4 |
20 files changed, 283 insertions, 481 deletions
diff --git a/app/models/about_me_validator.rb b/app/models/about_me_validator.rb index ec2b03201..e24c5512c 100644 --- a/app/models/about_me_validator.rb +++ b/app/models/about_me_validator.rb @@ -21,7 +21,7 @@ class AboutMeValidator < ActiveRecord::BaseWithoutTable def validate if !self.about_me.blank? && self.about_me.size > 500 - errors.add(_("Please keep it shorter than 500 characters")) + errors.add(:about_me, _("Please keep it shorter than 500 characters")) end end diff --git a/app/models/censor_rule.rb b/app/models/censor_rule.rb index e2dc12d6f..201e60746 100644 --- a/app/models/censor_rule.rb +++ b/app/models/censor_rule.rb @@ -1,12 +1,12 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: censor_rules # # id :integer not null, primary key -# info_request_id :integer -# user_id :integer -# public_body_id :integer +# info_request_id :integer +# user_id :integer +# public_body_id :integer # text :text not null # replacement :text not null # last_edit_editor :string(255) not null diff --git a/app/models/change_email_validator.rb b/app/models/change_email_validator.rb index f7ec6d17e..e3f8fa892 100644 --- a/app/models/change_email_validator.rb +++ b/app/models/change_email_validator.rb @@ -1,11 +1,12 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: change_email_validators # -# old_email :string -# new_email :string -# password :string +# old_email :string +# new_email :string +# password :string +# user_circumstance :string # # models/changeemail_validator.rb: diff --git a/app/models/comment.rb b/app/models/comment.rb index b7ece9ba9..44a1079cd 100644 --- a/app/models/comment.rb +++ b/app/models/comment.rb @@ -1,16 +1,17 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: comments # # id :integer not null, primary key # user_id :integer not null # comment_type :string(255) default("internal_error"), not null -# info_request_id :integer +# info_request_id :integer # body :text not null -# visible :boolean default(true), not null +# visible :boolean default(TRUE), not null # created_at :datetime not null # updated_at :datetime not null +# locale :text default(""), not null # # models/comments.rb: diff --git a/app/models/exim_log.rb b/app/models/exim_log.rb index 83f031a92..77e5e2d21 100644 --- a/app/models/exim_log.rb +++ b/app/models/exim_log.rb @@ -1,11 +1,11 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: exim_logs # # id :integer not null, primary key -# exim_log_done_id :integer -# info_request_id :integer +# exim_log_done_id :integer +# info_request_id :integer # order :integer not null # line :text not null # created_at :datetime not null diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index 057dcdb69..da92d1c2d 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -1,3 +1,19 @@ +# == Schema Information +# Schema version: 108 +# +# Table name: foi_attachments +# +# id :integer not null, primary key +# content_type :text +# filename :text +# charset :text +# display_size :text +# url_part_number :integer +# within_rfc822_subject :text +# incoming_message_id :integer +# hexdigest :string(32) +# + # encoding: UTF-8 # models/foi_attachment.rb: @@ -18,8 +34,15 @@ class FoiAttachment < ActiveRecord::Base before_validation :ensure_filename!, :only => [:filename] before_destroy :delete_cached_file! + BODY_MAX_TRIES = 3 + BODY_MAX_DELAY = 5 + def directory - base_dir = File.join("cache", "attachments_#{ENV['RAILS_ENV']}") + rails_env = ENV['RAILS_ENV'] + if rails_env.nil? || rails_env.empty? + raise "$RAILS_ENV is not set" + end + base_dir = File.join(File.dirname(__FILE__), "../../cache", "attachments_#{rails_env}") return File.join(base_dir, self.hexdigest[0..2]) end @@ -29,6 +52,7 @@ class FoiAttachment < ActiveRecord::Base def delete_cached_file! begin + @cached_body = nil File.delete(self.filepath) rescue end @@ -43,11 +67,29 @@ class FoiAttachment < ActiveRecord::Base file.write d } update_display_size! + @cached_body = d end def body if @cached_body.nil? - @cached_body = File.open(self.filepath, "rb" ).read + tries = 0 + delay = 1 + begin + @cached_body = File.open(self.filepath, "rb" ).read + rescue Errno::ENOENT + # we've lost our cached attachments for some reason. Reparse them. + if tries > BODY_MAX_TRIES + raise + else + sleep delay + end + tries += 1 + delay *= 2 + delay = BODY_MAX_DELAY if delay > BODY_MAX_DELAY + force = true + self.incoming_message.parse_raw_email!(force) + retry + end end return @cached_body end @@ -274,13 +316,9 @@ class FoiAttachment < ActiveRecord::Base tempfile.flush if self.content_type == 'application/pdf' - IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| - html = child.read() - end + html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path) elsif self.content_type == 'application/rtf' - IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child| - html = child.read() - end + html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path) elsif self.has_google_docs_viewer? html = '' # force error and using Google docs viewer else @@ -302,7 +340,7 @@ class FoiAttachment < ActiveRecord::Base body = $1.to_s body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "") contains_images = html.match(/<img/mi) ? true : false - if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images) + if html.size == 0 || !$?.success? || (body_without_tags.size == 0 && !contains_images) ret = "<html><head></head><body>"; if self.has_google_docs_viewer? wrapper_id = "wrapper_google_embed" diff --git a/app/models/holiday.rb b/app/models/holiday.rb index 4674d58f1..60b5ff443 100644 --- a/app/models/holiday.rb +++ b/app/models/holiday.rb @@ -1,11 +1,11 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: holidays # # id :integer not null, primary key -# day :date -# description :text +# day :date +# description :text # # models/holiday.rb: diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index a4519a17d..131970ba6 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -1,7 +1,5 @@ -# encoding: UTF-8 - # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: incoming_messages # @@ -10,11 +8,19 @@ # created_at :datetime not null # updated_at :datetime not null # raw_email_id :integer not null -# cached_attachment_text_clipped :text -# cached_main_body_text_folded :text -# cached_main_body_text_unfolded :text +# cached_attachment_text_clipped :text +# cached_main_body_text_folded :text +# cached_main_body_text_unfolded :text +# sent_at :time +# subject :text +# mail_from_domain :text +# valid_to_reply_to :boolean +# last_parsed :datetime +# mail_from :text # +# encoding: UTF-8 + # models/incoming_message.rb: # An (email) message from really anybody to be logged with a request. e.g. A # response from the public body. @@ -44,275 +50,6 @@ module TMail end end -# This is the type which is used to send data about attachments to the view -class FOIAttachment - attr_accessor :body - attr_accessor :content_type - attr_accessor :filename - attr_accessor :url_part_number - attr_accessor :within_rfc822_subject # we use the subject as the filename for email attachments - - # List of DSN codes taken from RFC 3463 - # http://tools.ietf.org/html/rfc3463 - DsnToMessage = { - 'X.1.0' => 'Other address status', - 'X.1.1' => 'Bad destination mailbox address', - 'X.1.2' => 'Bad destination system address', - 'X.1.3' => 'Bad destination mailbox address syntax', - 'X.1.4' => 'Destination mailbox address ambiguous', - 'X.1.5' => 'Destination mailbox address valid', - 'X.1.6' => 'Mailbox has moved', - 'X.1.7' => 'Bad sender\'s mailbox address syntax', - 'X.1.8' => 'Bad sender\'s system address', - 'X.2.0' => 'Other or undefined mailbox status', - 'X.2.1' => 'Mailbox disabled, not accepting messages', - 'X.2.2' => 'Mailbox full', - 'X.2.3' => 'Message length exceeds administrative limit.', - 'X.2.4' => 'Mailing list expansion problem', - 'X.3.0' => 'Other or undefined mail system status', - 'X.3.1' => 'Mail system full', - 'X.3.2' => 'System not accepting network messages', - 'X.3.3' => 'System not capable of selected features', - 'X.3.4' => 'Message too big for system', - 'X.4.0' => 'Other or undefined network or routing status', - 'X.4.1' => 'No answer from host', - 'X.4.2' => 'Bad connection', - 'X.4.3' => 'Routing server failure', - 'X.4.4' => 'Unable to route', - 'X.4.5' => 'Network congestion', - 'X.4.6' => 'Routing loop detected', - 'X.4.7' => 'Delivery time expired', - 'X.5.0' => 'Other or undefined protocol status', - 'X.5.1' => 'Invalid command', - 'X.5.2' => 'Syntax error', - 'X.5.3' => 'Too many recipients', - 'X.5.4' => 'Invalid command arguments', - 'X.5.5' => 'Wrong protocol version', - 'X.6.0' => 'Other or undefined media error', - 'X.6.1' => 'Media not supported', - 'X.6.2' => 'Conversion required and prohibited', - 'X.6.3' => 'Conversion required but not supported', - 'X.6.4' => 'Conversion with loss performed', - 'X.6.5' => 'Conversion failed', - 'X.7.0' => 'Other or undefined security status', - 'X.7.1' => 'Delivery not authorized, message refused', - 'X.7.2' => 'Mailing list expansion prohibited', - 'X.7.3' => 'Security conversion required but not possible', - 'X.7.4' => 'Security features not supported', - 'X.7.5' => 'Cryptographic failure', - 'X.7.6' => 'Cryptographic algorithm not supported', - 'X.7.7' => 'Message integrity failure' - } - - # Returns HTML, of extra comment to put by attachment - def extra_note - # For delivery status notification attachments, extract the status and - # look up what it means in the DSN table. - if @content_type == 'message/delivery-status' - if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/) - return "" - end - dsn = $1 - dsn_part = 'X.' + $2 - - dsn_message = "" - if DsnToMessage.include?(dsn_part) - dsn_message = " (" + DsnToMessage[dsn_part] + ")" - end - - return "<br><em>DSN: " + dsn + dsn_message + "</em>" - end - return "" - end - - # Called by controller so old filenames still work - def old_display_filename - filename = self._internal_display_filename - - # Convert weird spaces (e.g. \n) to normal ones - filename = filename.gsub(/\s/, " ") - # Remove slashes, they mess with URLs - filename = filename.gsub(/\//, "-") - - return filename - end - - # XXX changing this will break existing URLs, so have a care - maybe - # make another old_display_filename see above - def display_filename - filename = self._internal_display_filename - - # Sometimes filenames have e.g. %20 in - no point butchering that - # (without unescaping it, this would remove the % and leave 20s in there) - filename = CGI.unescape(filename) - - # Remove weird spaces - filename = filename.gsub(/\s+/, " ") - # Remove non-alphabetic characters - filename = filename.gsub(/[^A-Za-z0-9.]/, " ") - # Remove spaces near dots - filename = filename.gsub(/\s*\.\s*/, ".") - # Compress adjacent spaces down to a single one - filename = filename.gsub(/\s+/, " ") - filename = filename.strip - - return filename - end - - def _internal_display_filename - calc_ext = AlaveteliFileTypes.mimetype_to_extension(@content_type) - - if @filename - # Put right extension on if missing - if !filename.match(/\.#{calc_ext}$/) && calc_ext - filename + "." + calc_ext - else - filename - end - else - if !calc_ext - calc_ext = "bin" - end - if @within_rfc822_subject - @within_rfc822_subject + "." + calc_ext - else - "attachment." + calc_ext - end - end - end - - # Size to show next to the download link for the attachment - def display_size - s = self.body.size - - if s > 1024 * 1024 - return sprintf("%.1f", s.to_f / 1024 / 1024) + 'M' - else - return (s / 1024).to_s + 'K' - end - end - - # Whether this type can be shown in the Google Docs Viewer. - # The full list of supported types can be found at - # https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935 - def has_google_docs_viewer? - return !! { - "application/pdf" => true, # .pdf - "image/tiff" => true, # .tiff - - "application/vnd.ms-word" => true, # .doc - "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx - - "application/vnd.ms-powerpoint" => true, # .ppt - "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx - - "application/vnd.ms-excel" => true, # .xls - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx - - } [self.content_type] - end - - # Whether this type has a "View as HTML" - def has_body_as_html? - return ( - !!{ - "text/plain" => true, - "application/rtf" => true, - }[self.content_type] or - self.has_google_docs_viewer? - ) - end - - # Name of type of attachment type - only valid for things that has_body_as_html? - def name_of_content_type - return { - "text/plain" => "Text file", - 'application/rtf' => "RTF file", - - 'application/pdf' => "PDF file", - 'image/tiff' => "TIFF image", - - 'application/vnd.ms-word' => "Word document", - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document", - - 'application/vnd.ms-powerpoint' => "PowerPoint presentation", - 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation", - - 'application/vnd.ms-excel' => "Excel spreadsheet", - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet", - }[self.content_type] - end - - # For "View as HTML" of attachment - def body_as_html(dir) - html = nil - wrapper_id = "wrapper" - - # simple cases, can never fail - if self.content_type == 'text/plain' - text = self.body.strip - text = CGI.escapeHTML(text) - text = MySociety::Format.make_clickable(text) - html = text.gsub(/\n/, '<br>') - return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" - "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id - end - - # the extractions will also produce image files, which go in the - # current directory, so change to the directory the function caller - # wants everything in - Dir.chdir(dir) do - tempfile = Tempfile.new('foiextract', '.') - tempfile.print self.body - tempfile.flush - - if self.content_type == 'application/pdf' - IO.popen("#{`which pdftohtml`.chomp} -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| - html = child.read() - end - elsif self.content_type == 'application/rtf' - IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child| - html = child.read() - end - elsif self.has_google_docs_viewer? - html = '' # force error and using Google docs viewer - else - raise "No HTML conversion available for type " + self.content_type - end - - tempfile.close - tempfile.delete - end - - # We need to look at: - # a) Any error code - # b) The output size, as pdftohtml does not return an error code upon error. - # c) For cases when there is no text in the body of the HTML, or - # images, so nothing will be rendered. This is to detect some bug in - # pdftohtml, which sometimes makes it return just <hr>s and no other - # content. - html.match(/(\<body[^>]*\>.*)/mi) - body = $1.to_s - body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "") - contains_images = html.match(/<img/mi) ? true : false - if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images) - ret = "<html><head></head><body>"; - if self.has_google_docs_viewer? - wrapper_id = "wrapper_google_embed" - ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>"; - else - ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>" - end - ret = ret + "</body></html>" - return ret, wrapper_id - end - - return html, wrapper_id - end - -end - - class IncomingMessage < ActiveRecord::Base belongs_to :info_request validates_presence_of :info_request @@ -380,7 +117,7 @@ class IncomingMessage < ActiveRecord::Base if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>" return false end - if !self.mail['auto-submitted'].nil? && !self.mail['auto-submitted'].keys.empty? + if !self.mail['auto-submitted'].nil? return false end return true @@ -390,22 +127,27 @@ class IncomingMessage < ActiveRecord::Base # The following fields may be absent; we treat them as cached # values in case we want to regenerate them (due to mail # parsing bugs, etc). + if self.raw_email.nil? + raise "Incoming message id=#{id} has no raw_email" + end if (!force.nil? || self.last_parsed.nil?) - self.extract_attachments! - self.sent_at = self.mail.date || self.created_at - self.subject = self.mail.subject - # XXX can probably remove from_name_if_present (which is a - # monkey patch) by just calling .from_addrs[0].name here - # instead? - self.mail_from = self.mail.from_name_if_present - begin - self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec) - rescue NoMethodError - self.mail_from_domain = "" + ActiveRecord::Base.transaction do + self.extract_attachments! + self.sent_at = self.mail.date || self.created_at + self.subject = self.mail.subject + # XXX can probably remove from_name_if_present (which is a + # monkey patch) by just calling .from_addrs[0].name here + # instead? + self.mail_from = self.mail.from_name_if_present + begin + self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec) + rescue NoMethodError + self.mail_from_domain = "" + end + self.valid_to_reply_to = self._calculate_valid_to_reply_to + self.last_parsed = Time.now + self.save! end - self.valid_to_reply_to = self._calculate_valid_to_reply_to - self.last_parsed = Time.now - self.save! end end @@ -527,11 +269,7 @@ class IncomingMessage < ActiveRecord::Base # Special cases for some content types if content_type == 'application/pdf' uncompressed_text = nil - IO.popen("#{`which pdftk`.chomp} - output - uncompress", "r+") do |child| - child.write(text) - child.close_write() - uncompressed_text = child.read() - end + uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text) # if we managed to uncompress the PDF... if !uncompressed_text.nil? && !uncompressed_text.empty? # then censor stuff (making a copy so can compare again in a bit) @@ -542,15 +280,11 @@ class IncomingMessage < ActiveRecord::Base # then use the altered file (recompressed) recompressed_text = nil if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true - command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -" + command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"] else - command = "#{`which pdftk`.chomp} - output - compress" - end - IO.popen(command, "r+") do |child| - child.write(censored_uncompressed_text) - child.close_write() - recompressed_text = child.read() + command = ["pdftk", "-", "output", "-", "compress"] end + recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}])) if recompressed_text.nil? || recompressed_text.empty? # buggy versions of pdftk sometimes fail on # compression, I don't see it's a disaster in @@ -586,8 +320,8 @@ class IncomingMessage < ActiveRecord::Base emails = ascii_chars.scan(MySociety::Validate.email_find_regexp) # Convert back to UCS-2, making a mask at the same time emails.map! {|email| [ - Iconv.conv('ucs-2', 'ascii', email[0]), - Iconv.conv('ucs-2', 'ascii', email[0].gsub(/[^@.]/, 'x')) + Iconv.conv('ucs-2le', 'ascii', email[0]), + Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x')) ] } # Now search and replace the UCS-2 email with the UCS-2 mask for email, mask in emails @@ -792,7 +526,7 @@ class IncomingMessage < ActiveRecord::Base # it into conflict with ensure_parts_counted which it has to be # called both before and after. It will fail with cases of # attachments of attachments etc. - + charset = curr_mail.charset # save this, because overwriting content_type also resets charset # Don't allow nil content_types if curr_mail.content_type.nil? curr_mail.content_type = 'application/octet-stream' @@ -822,7 +556,6 @@ class IncomingMessage < ActiveRecord::Base curr_mail.content_type = 'application/octet-stream' end end - # If the part is an attachment of email if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' ensure_parts_counted # fills in rfc822_attachment variable @@ -832,6 +565,8 @@ class IncomingMessage < ActiveRecord::Base curr_mail.within_rfc822_attachment = within_rfc822_attachment leaves_found += [curr_mail] end + # restore original charset + curr_mail.charset = charset end return leaves_found end @@ -887,64 +622,58 @@ class IncomingMessage < ActiveRecord::Base end # Returns body text from main text part of email, converted to UTF-8 def get_main_body_text_internal + parse_raw_email! main_part = get_main_body_text_part return _convert_part_body_to_text(main_part) end + # Given a main text part, converts it to text def _convert_part_body_to_text(part) if part.nil? text = "[ Email has no body, please see attachments ]" - text_charset = "utf-8" + source_charset = "utf-8" else - text = part.body - text_charset = part.charset + text = part.body # by default, TMail converts to UTF8 in this call + source_charset = part.charset if part.content_type == 'text/html' # e.g. http://www.whatdotheyknow.com/request/35/response/177 - # XXX This is a bit of a hack as it is calling a convert to text routine. - # Could instead call a sanitize HTML one. - text = self.class._get_attachment_text_internal_one_file(part.content_type, text) - end - end - - # Charset conversion, turn everything into UTF-8 - if not text_charset.nil? - begin - # XXX specially convert unicode pound signs, was needed here - # http://www.whatdotheyknow.com/request/88/response/352 - text = text.gsub("£", Iconv.conv(text_charset, 'utf-8', '£')) - # Try proper conversion - text = Iconv.conv('utf-8', text_charset, text) - rescue Iconv::IllegalSequence, Iconv::InvalidEncoding - # Clearly specified charset was nonsense - text_charset = nil + # XXX This is a bit of a hack as it is calling a + # convert to text routine. Could instead call a + # sanitize HTML one. + + # If the text isn't UTF8, it means TMail had a problem + # converting it (invalid characters, etc), and we + # should instead tell elinks to respect the source + # charset + use_charset = "utf-8" + begin + text = Iconv.conv('utf-8', 'utf-8', text) + rescue Iconv::IllegalSequence + use_charset = source_charset + end + text = self.class._get_attachment_text_internal_one_file(part.content_type, text, use_charset) end end - if text_charset.nil? - # No specified charset, so guess - - # Could use rchardet here, but it had trouble with - # http://www.whatdotheyknow.com/request/107/response/144 - # So I gave up - most likely in UK we'll only get windows-1252 anyway. + # If TMail can't convert text, it just returns it, so we sanitise it. + begin + # Test if it's good UTF-8 + text = Iconv.conv('utf-8', 'utf-8', text) + rescue Iconv::IllegalSequence + # Text looks like unlabelled nonsense, + # strip out anything that isn't UTF-8 begin - # See if it is good UTF-8 anyway - text = Iconv.conv('utf-8', 'utf-8', text) - rescue Iconv::IllegalSequence - begin - # Or is it good windows-1252, most likely - text = Iconv.conv('utf-8', 'windows-1252', text) - rescue Iconv::IllegalSequence - # Text looks like unlabelled nonsense, strip out anything that isn't UTF-8 - text = Iconv.conv('utf-8//IGNORE', 'utf-8', text) + - _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", - :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli')) + text = Iconv.conv('utf-8//IGNORE', source_charset, text) + + _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", + :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli')) + rescue Iconv::InvalidEncoding, Iconv::IllegalSequence + if source_charset != "utf-8" + source_charset = "utf-8" + retry end end end - # An assertion that we have ended up with UTF-8 XXX can remove as this should - # always be fine if code above is - Iconv.conv('utf-8', 'utf-8', text) # Fix DOS style linefeeds to Unix style ones (or other later regexps won't work) # Needed for e.g. http://www.whatdotheyknow.com/request/60/response/98 @@ -1004,9 +733,7 @@ class IncomingMessage < ActiveRecord::Base tempfile = Tempfile.new('foiuu') tempfile.print uu tempfile.flush - IO.popen("/usr/bin/uudecode " + tempfile.path + " -o -", "r") do |child| - content = child.read() - end + content = AlaveteliExternalCommand.run("uudecode", "-o", "/dev/stdout", tempfile.path) tempfile.close # Make attachment type from it, working out filename and mime type filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1] @@ -1192,7 +919,9 @@ class IncomingMessage < ActiveRecord::Base return self.cached_attachment_text_clipped end - def IncomingMessage._get_attachment_text_internal_one_file(content_type, body) + def IncomingMessage._get_attachment_text_internal_one_file(content_type, body, charset = 'utf-8') + # note re. charset: TMail always tries to convert email bodies + # to UTF8 by default, so normally it should already be that. text = '' # XXX - tell all these command line tools to return utf-8 if content_type == 'text/plain' @@ -1202,22 +931,23 @@ class IncomingMessage < ActiveRecord::Base tempfile.print body tempfile.flush if content_type == 'application/vnd.ms-word' - AlaveteliExternalCommand.run(`which wvText`.chomp, tempfile.path, tempfile.path + ".txt") + AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt") # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701) if not File.exists?(tempfile.path + ".txt") - AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text) + AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text) else text += File.read(tempfile.path + ".txt") + "\n\n" File.unlink(tempfile.path + ".txt") end elsif content_type == 'application/rtf' # catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf - AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text) + AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text) elsif content_type == 'text/html' - # lynx wordwraps links in its output, which then don't get formatted properly - # by Alaveteli. We use elinks instead, which doesn't do that. - AlaveteliExternalCommand.run(`which elinks`.chomp, "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump", - tempfile.path, :append_to => text) + # lynx wordwraps links in its output, which then don't + # get formatted properly by Alaveteli. We use elinks + # instead, which doesn't do that. + AlaveteliExternalCommand.run("elinks", "-eval", "set document.codepage.assume = \"#{charset}\"", "-eval", "set document.codepage.force_assumed = 1", "-dump-charset", "utf-8", "-force-html", "-dump", + tempfile.path, :append_to => text, :env => {"LANG" => "C"}) elsif content_type == 'application/vnd.ms-excel' # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and # py_xls2txt only extract text from cells, not from floating @@ -1227,9 +957,9 @@ class IncomingMessage < ActiveRecord::Base elsif content_type == 'application/vnd.ms-powerpoint' # ppthtml seems to catch more text, but only outputs HTML when # we want text, so just use catppt for now - AlaveteliExternalCommand.run(`which catppt`.chomp, tempfile.path, :append_to => text) + AlaveteliExternalCommand.run("catppt", tempfile.path, :append_to => text) elsif content_type == 'application/pdf' - AlaveteliExternalCommand.run(`which pdftotext`.chomp, tempfile.path, "-", :append_to => text) + AlaveteliExternalCommand.run("pdftotext", tempfile.path, "-", :append_to => text) elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' # This is Microsoft's XML office document format. # Just pull out the main XML file, and strip it of text. @@ -1283,7 +1013,7 @@ class IncomingMessage < ActiveRecord::Base text = '' attachments = self.get_attachments_for_display for attachment in attachments - text += IncomingMessage._get_attachment_text_internal_one_file(attachment.content_type, attachment.body) + text += IncomingMessage._get_attachment_text_internal_one_file(attachment.content_type, attachment.body, attachment.charset) end # Remove any bad characters text = Iconv.conv('utf-8//IGNORE', 'utf-8', text) @@ -1376,7 +1106,7 @@ class IncomingMessage < ActiveRecord::Base if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>" return false end - if !self.mail['auto-submitted'].nil? && !self.mail['auto-submitted'].keys.empty? + if !self.mail['auto-submitted'].nil? return false end return true diff --git a/app/models/info_request.rb b/app/models/info_request.rb index cfef6ebd8..b5a1cd833 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -1,6 +1,5 @@ - # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: info_requests # @@ -11,23 +10,17 @@ # created_at :datetime not null # updated_at :datetime not null # described_state :string(255) not null -# awaiting_description :boolean default(false), not null +# awaiting_description :boolean default(FALSE), not null # prominence :string(255) default("normal"), not null # url_title :text not null # law_used :string(255) default("foi"), not null # allow_new_responses_from :string(255) default("anybody"), not null # handle_rejected_responses :string(255) default("bounce"), not null +# idhash :string(255) not null # -# models/info_request.rb: -# A Freedom of Information request. -# -# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. -# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ -# -# $Id: info_request.rb,v 1.217 2009-10-26 17:52:39 francis Exp $ + require 'digest/sha1' -require File.join(File.dirname(__FILE__),'../../vendor/plugins/acts_as_xapian/lib/acts_as_xapian') class InfoRequest < ActiveRecord::Base strip_attributes! diff --git a/app/models/info_request_event.rb b/app/models/info_request_event.rb index 4ea89bf81..99f34cf9e 100644 --- a/app/models/info_request_event.rb +++ b/app/models/info_request_event.rb @@ -1,5 +1,5 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: info_request_events # @@ -8,12 +8,12 @@ # event_type :text not null # params_yaml :text not null # created_at :datetime not null -# described_state :string(255) -# calculated_state :string(255) -# last_described_at :datetime -# incoming_message_id :integer -# outgoing_message_id :integer -# comment_id :integer +# described_state :string(255) +# calculated_state :string(255) +# last_described_at :datetime +# incoming_message_id :integer +# outgoing_message_id :integer +# comment_id :integer # prominence :string(255) default("normal"), not null # @@ -109,7 +109,7 @@ class InfoRequestEvent < ActiveRecord::Base [ :tags, 'U', "tag" ] ], :if => :indexed_by_search?, - :eager_load => [ :incoming_message, :outgoing_message, :comment, { :info_request => [ :user, :public_body, :censor_rules ] } ] + :eager_load => [ :outgoing_message, :comment, { :info_request => [ :user, :public_body, :censor_rules ] } ] def requested_by self.info_request.user.url_name @@ -147,6 +147,7 @@ class InfoRequestEvent < ActiveRecord::Base return event.calculated_state end end + return end def waiting_classification @@ -175,7 +176,41 @@ class InfoRequestEvent < ActiveRecord::Base # format it here as no datetime support in Xapian's value ranges return self.created_at.strftime("%Y%m%d%H%M%S") end - # clipped = true - means return shorter text. It is used for snippets for + + def incoming_message_selective_columns(fields) + message = IncomingMessage.find(:all, + :select => fields + ", incoming_messages.info_request_id", + :joins => "INNER JOIN info_request_events ON incoming_messages.id = incoming_message_id ", + :conditions => "info_request_events.id = #{self.id}" + ) + message = message[0] + if !message.nil? + message.info_request = InfoRequest.find(message.info_request_id) + end + return message + end + + def get_clipped_response_efficiently + # XXX this ugly code is an attempt to not always load all the + # columns for an incoming message, which can be *very* large + # (due to all the cached text). We care particularly in this + # case because it's called for every search result on a page + # (to show the search snippet). Actually, we should review if we + # need all this data to be cached in the database at all, and + # then we won't need this horrid workaround. + message = self.incoming_message_selective_columns("cached_attachment_text_clipped, cached_main_body_text_folded") + clipped_body = message.cached_main_body_text_folded + clipped_attachment = message.cached_attachment_text_clipped + if clipped_body.nil? || clipped_attachment.nil? + # we're going to have to load it anyway + text = self.incoming_message.get_text_for_indexing_clipped + else + text = clipped_body.gsub("FOLDED_QUOTED_SECTION", " ").strip + "\n\n" + clipped_attachment + end + return text + "\n\n" + end + + # clipped = true - means return shorter text. It is used for snippets fore # performance reasons. Xapian will take the full text. def search_text_main(clipped = false) text = '' @@ -185,7 +220,7 @@ class InfoRequestEvent < ActiveRecord::Base text = text + self.outgoing_message.get_text_for_indexing + "\n\n" elsif self.event_type == 'response' if clipped - text = text + self.incoming_message.get_text_for_indexing_clipped + "\n\n" + text = text + self.get_clipped_response_efficiently else text = text + self.incoming_message.get_text_for_indexing_full + "\n\n" end @@ -295,7 +330,7 @@ class InfoRequestEvent < ActiveRecord::Base end - def is_incoming_message?() not self.incoming_message.nil? end + def is_incoming_message?() not self.incoming_message_selective_columns("incoming_messages.id").nil? end def is_outgoing_message?() not self.outgoing_message.nil? end def is_comment?() not self.comment.nil? end diff --git a/app/models/outgoing_message.rb b/app/models/outgoing_message.rb index b7e310b1e..cc561b21d 100644 --- a/app/models/outgoing_message.rb +++ b/app/models/outgoing_message.rb @@ -1,5 +1,5 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: outgoing_messages # @@ -10,8 +10,8 @@ # message_type :string(255) not null # created_at :datetime not null # updated_at :datetime not null -# last_sent_at :datetime -# incoming_message_followup_id :integer +# last_sent_at :datetime +# incoming_message_followup_id :integer # what_doing :string(255) not null # diff --git a/app/models/post_redirect.rb b/app/models/post_redirect.rb index b111d019d..59cc86799 100644 --- a/app/models/post_redirect.rb +++ b/app/models/post_redirect.rb @@ -1,17 +1,17 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: post_redirects # # id :integer not null, primary key # token :text not null # uri :text not null -# post_params_yaml :text +# post_params_yaml :text # created_at :datetime not null # updated_at :datetime not null # email_token :text not null -# reason_params_yaml :text -# user_id :integer +# reason_params_yaml :text +# user_id :integer # circumstance :text default("normal"), not null # diff --git a/app/models/profile_photo.rb b/app/models/profile_photo.rb index b15e3e4f4..43dbbbf0a 100644 --- a/app/models/profile_photo.rb +++ b/app/models/profile_photo.rb @@ -1,12 +1,12 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: profile_photos # # id :integer not null, primary key # data :binary not null -# user_id :integer -# draft :boolean default(false), not null +# user_id :integer +# draft :boolean default(FALSE), not null # # models/profile_photo.rb: diff --git a/app/models/public_body.rb b/app/models/public_body.rb index 453e3a6cf..961fa3cbb 100644 --- a/app/models/public_body.rb +++ b/app/models/public_body.rb @@ -275,7 +275,7 @@ class PublicBody < ActiveRecord::Base ret = ret + types[-1] return ret else - return "A public authority" + return _("A public authority") end end @@ -395,7 +395,7 @@ class PublicBody < ActiveRecord::Base field_list = ['name', 'short_name', 'request_email', 'notes', 'publication_scheme', 'home_page', 'tag_string'] - if public_body = bodies_by_name[name] # Existing public body + if public_body = bodies_by_name[name] # Existing public body available_locales.each do |locale| PublicBody.with_locale(locale) do changed = ActiveSupport::OrderedHash.new diff --git a/app/models/raw_email.rb b/app/models/raw_email.rb index c6066cbf4..3e12a6feb 100644 --- a/app/models/raw_email.rb +++ b/app/models/raw_email.rb @@ -1,11 +1,10 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: raw_emails # -# id :integer not null, primary key -# data_text :text -# data_binary :binary +# id :integer not null, primary key +# # models/raw_email.rb: # The fat part of models/incoming_message.rb @@ -28,7 +27,7 @@ class RawEmail < ActiveRecord::Base def directory request_id = self.incoming_message.info_request.id.to_s if ENV["RAILS_ENV"] == "test" - return 'files/raw_email_test' + return File.join(RAILS_ROOT, 'files/raw_email_test') else return File.join(MySociety::Config.get('RAW_EMAILS_LOCATION', 'files/raw_emails'), @@ -44,41 +43,19 @@ class RawEmail < ActiveRecord::Base if !File.exists?(self.directory) FileUtils.mkdir_p self.directory end - File.open(self.filepath, "wb") { |file| + File.atomic_write(self.filepath) { |file| file.write d } end def data - if !File.exists?(self.filepath) - dbdata - else - File.open(self.filepath, "rb" ).read - end + File.open(self.filepath, "rb").read end def destroy_file_representation! File.delete(self.filepath) end - def dbdata=(d) - write_attribute(:data_binary, d) - end - - def dbdata - d = read_attribute(:data_binary) - if !d.nil? - return d - end - - d = read_attribute(:data_text) - if !d.nil? - return d - end - - raise "internal error, double nil value in RawEmail" - end - end diff --git a/app/models/request_mailer.rb b/app/models/request_mailer.rb index 272f2ea83..83cce9045 100644 --- a/app/models/request_mailer.rb +++ b/app/models/request_mailer.rb @@ -353,7 +353,18 @@ class RequestMailer < ApplicationMailer # That that patch has not been applied, despite bribes of beer, is # typical of the lack of quality of Rails. - info_requests = InfoRequest.find(:all, :conditions => [ "(select id from info_request_events where event_type = 'comment' and info_request_events.info_request_id = info_requests.id and created_at > ? limit 1) is not null", Time.now() - 1.month ], :include => [ { :info_request_events => :user_info_request_sent_alerts } ], :order => "info_requests.id, info_request_events.created_at" ) + info_requests = InfoRequest.find(:all, + :conditions => [ + "info_requests.id in ( + select info_request_id + from info_request_events + where event_type = 'comment' + and created_at > (now() - '1 month'::interval) + )" + ], + :include => [ { :info_request_events => :user_info_request_sent_alerts } ], + :order => "info_requests.id, info_request_events.created_at" + ) for info_request in info_requests # Count number of new comments to alert on diff --git a/app/models/track_thing.rb b/app/models/track_thing.rb index b74f7dad5..58d70ed86 100644 --- a/app/models/track_thing.rb +++ b/app/models/track_thing.rb @@ -1,18 +1,18 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: track_things # # id :integer not null, primary key # tracking_user_id :integer not null # track_query :string(255) not null -# info_request_id :integer -# tracked_user_id :integer -# public_body_id :integer +# info_request_id :integer +# tracked_user_id :integer +# public_body_id :integer # track_medium :string(255) not null # track_type :string(255) default("internal_error"), not null -# created_at :datetime -# updated_at :datetime +# created_at :datetime +# updated_at :datetime # # models/track_thing.rb: @@ -71,14 +71,13 @@ class TrackThing < ActiveRecord::Base def track_query_description # XXX this is very brittle... we should probably ask users # simply to name their tracks when they make them? - self.track_query = self.track_query.gsub(/([()]|OR)/, "") - filters = self.track_query.scan /\b\S+:\S+\b/ - text = self.track_query + original_text = parsed_text = self.track_query.gsub(/([()]|OR)/, "") + filters = parsed_text.scan /\b\S+:\S+\b/ varieties = Set.new date = "" statuses = Set.new for filter in filters - text = text.sub(filter, "") + parsed_text = parsed_text.sub(filter, "") if filter =~ /variety:user/ varieties << _("users") end @@ -105,7 +104,7 @@ class TrackThing < ActiveRecord::Base end end if filters.empty? - text = self.track_query + parsed_text = original_text end descriptions = [] if varieties.include? _("requests") @@ -116,10 +115,10 @@ class TrackThing < ActiveRecord::Base varieties << _("anything") end descriptions += Array(varieties) - text = text.strip + parsed_text = parsed_text.strip descriptions = descriptions.join(_(" or ")) - if !text.empty? - descriptions += _("{{list_of_things}} matching text '{{search_query}}'", :list_of_things => "", :search_query => text) + if !parsed_text.empty? + descriptions += _("{{list_of_things}} matching text '{{search_query}}'", :list_of_things => "", :search_query => parsed_text) end return descriptions end diff --git a/app/models/track_things_sent_email.rb b/app/models/track_things_sent_email.rb index d83bf05ff..777339d75 100644 --- a/app/models/track_things_sent_email.rb +++ b/app/models/track_things_sent_email.rb @@ -1,15 +1,15 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: track_things_sent_emails # # id :integer not null, primary key # track_thing_id :integer not null -# info_request_event_id :integer -# user_id :integer -# public_body_id :integer -# created_at :datetime -# updated_at :datetime +# info_request_event_id :integer +# user_id :integer +# public_body_id :integer +# created_at :datetime +# updated_at :datetime # # models/track_things_sent_email.rb: diff --git a/app/models/user.rb b/app/models/user.rb index e98d777b1..8c4b35fe6 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -1,5 +1,5 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: users # @@ -10,14 +10,16 @@ # salt :string(255) not null # created_at :datetime not null # updated_at :datetime not null -# email_confirmed :boolean default(false), not null +# email_confirmed :boolean default(FALSE), not null # url_name :text not null # last_daily_track_email :datetime default(Sat Jan 01 00:00:00 UTC 2000) # admin_level :string(255) default("none"), not null # ban_text :text default(""), not null # about_me :text default(""), not null -# email_bounced_at :datetime +# locale :string(255) +# email_bounced_at :datetime # email_bounce_message :text default(""), not null +# no_limit :boolean default(FALSE), not null # # models/user.rb: @@ -130,7 +132,7 @@ class User < ActiveRecord::Base name.strip! end if self.public_banned? - name = _("{{user_name}} (Banned)", :user_name=>name) + name = _("{{user_name}} (Account suspended)", :user_name=>name) end name end @@ -255,7 +257,7 @@ class User < ActiveRecord::Base end def User.owns_every_request?(user) - !user.nil? && user.owns_every_request? + !user.nil? && user.owns_every_request? end # Can the user see every request, even hidden ones? @@ -273,7 +275,18 @@ class User < ActiveRecord::Base end # Various ways the user can be banned, and text to describe it if failed def can_file_requests? - self.ban_text.empty? + self.ban_text.empty? && !self.exceeded_limit? + end + def exceeded_limit? + # Some users have no limit + return false if self.no_limit + + # Has the user issued as many as MAX_REQUESTS_PER_USER_PER_DAY requests in the past 24 hours? + daily_limit = MySociety::Config.get("MAX_REQUESTS_PER_USER_PER_DAY") + return false if daily_limit.nil? + recent_requests = InfoRequest.count(:conditions => ["user_id = ? and created_at > now() - '1 day'::interval", self.id]) + + return (recent_requests >= daily_limit) end def can_make_followup? self.ban_text.empty? @@ -285,7 +298,11 @@ class User < ActiveRecord::Base self.ban_text.empty? end def can_fail_html - text = self.ban_text.strip + if ban_text + text = self.ban_text.strip + else + raise "Unknown reason for ban" + end text = CGI.escapeHTML(text) text = MySociety::Format.make_clickable(text, :contract => 1) text = text.gsub(/\n/, '<br>') diff --git a/app/models/user_info_request_sent_alert.rb b/app/models/user_info_request_sent_alert.rb index d07b4e553..5f23355bf 100644 --- a/app/models/user_info_request_sent_alert.rb +++ b/app/models/user_info_request_sent_alert.rb @@ -1,5 +1,5 @@ # == Schema Information -# Schema version: 95 +# Schema version: 108 # # Table name: user_info_request_sent_alerts # @@ -7,7 +7,7 @@ # user_id :integer not null # info_request_id :integer not null # alert_type :string(255) not null -# info_request_event_id :integer +# info_request_event_id :integer # # models/user_info_request_sent_alert.rb: |