diff options
Diffstat (limited to 'app/models')
-rw-r--r-- | app/models/application_mailer.rb | 9 | ||||
-rw-r--r-- | app/models/censor_rule.rb | 10 | ||||
-rw-r--r-- | app/models/foi_attachment.rb | 68 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 475 | ||||
-rw-r--r-- | app/models/info_request.rb | 62 | ||||
-rw-r--r-- | app/models/outgoing_message.rb | 17 | ||||
-rw-r--r-- | app/models/post_redirect.rb | 26 | ||||
-rw-r--r-- | app/models/profile_photo.rb | 43 | ||||
-rw-r--r-- | app/models/public_body.rb | 51 | ||||
-rw-r--r-- | app/models/user.rb | 49 |
10 files changed, 280 insertions, 530 deletions
diff --git a/app/models/application_mailer.rb b/app/models/application_mailer.rb index cdb279c3c..859e8a0e2 100644 --- a/app/models/application_mailer.rb +++ b/app/models/application_mailer.rb @@ -67,7 +67,8 @@ class ApplicationMailer < ActionMailer::Base return nil end - if ActionMailer::VERSION::MAJOR == 2 + # FIXME: This check was disabled temporarily during the Rails 3 upgrade + #if ActionMailer::VERSION::MAJOR == 2 # This method is a customised version of ActionMailer::Base.create! # modified to allow templates to be selected correctly for multipart @@ -142,9 +143,9 @@ class ApplicationMailer < ActionMailer::Base # build the mail object itself @mail = create_mail end - else - raise "ApplicationMailer.create! is obsolete - find another way to ensure that themes can override mail templates for multipart mails" - end + # else + # raise "ApplicationMailer.create! is obsolete - find another way to ensure that themes can override mail templates for multipart mails" + # end end diff --git a/app/models/censor_rule.rb b/app/models/censor_rule.rb index f40ab6fbb..ec66074b7 100644 --- a/app/models/censor_rule.rb +++ b/app/models/censor_rule.rb @@ -33,13 +33,15 @@ class CensorRule < ActiveRecord::Base validate :require_valid_regexp, :if => proc{ |rule| rule.regexp? == true } validates_presence_of :text - named_scope :global, {:conditions => {:info_request_id => nil, - :user_id => nil, - :public_body_id => nil}} + scope :global, {:conditions => {:info_request_id => nil, + :user_id => nil, + :public_body_id => nil}} def require_user_request_or_public_body if self.info_request.nil? && self.user.nil? && self.public_body.nil? - errors.add("Censor must apply to an info request a user or a body; ") + [:info_request, :user, :public_body].each do |a| + errors.add(a, "Rule must apply to an info request, a user or a body") + end end end diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index a40898aef..2f8a9ab04 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -38,7 +38,7 @@ class FoiAttachment < ActiveRecord::Base BODY_MAX_DELAY = 5 def directory - rails_env = ENV['RAILS_ENV'] + rails_env = Rails.env if rails_env.nil? || rails_env.empty? raise "$RAILS_ENV is not set" end @@ -67,9 +67,22 @@ class FoiAttachment < ActiveRecord::Base file.write d } update_display_size! + encode_cached_body! @cached_body = d end + # If the original mail part had a charset, it's some kind of string, so assume that + # it should be handled as a string in the stated charset, not a bytearray, and then + # convert it our default encoding. For ruby 1.8 this is a noop. + def encode_cached_body! + if RUBY_VERSION.to_f >= 1.9 + if charset + @cached_body.force_encoding(charset) + @cached_body = @cached_body.encode(Encoding.default_internal, charset) + end + end + end + def body if @cached_body.nil? tries = 0 @@ -90,6 +103,7 @@ class FoiAttachment < ActiveRecord::Base self.incoming_message.parse_raw_email!(force) retry end + encode_cached_body! end return @cached_body end @@ -310,31 +324,41 @@ class FoiAttachment < ActiveRecord::Base # the extractions will also produce image files, which go in the # current directory, so change to the directory the function caller # wants everything in - Dir.chdir(dir) do - tempfile = Tempfile.new('foiextract', '.') - tempfile.print self.body - tempfile.flush - - html = nil - if self.content_type == 'application/pdf' - # We set a timeout here, because pdftohtml can spiral out of control - # on some PDF files and we don’t want to crash the whole server. - html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30) - elsif self.content_type == 'application/rtf' - html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120) - end - - if html.nil? - if self.has_google_docs_viewer? - html = '' # force error and using Google docs viewer + + html = nil + if ['application/pdf', 'application/rtf'].include?(self.content_type) + text = self.body + Dir.chdir(dir) do + if RUBY_VERSION.to_f >= 1.9 + tempfile = Tempfile.new('foiextract', '.', :encoding => text.encoding) else - raise "No HTML conversion available for type " + self.content_type + tempfile = Tempfile.new('foiextract', '.') end - end + tempfile.print text + tempfile.flush + - tempfile.close - tempfile.delete + if self.content_type == 'application/pdf' + # We set a timeout here, because pdftohtml can spiral out of control + # on some PDF files and we don't want to crash the whole server. + html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30) + elsif self.content_type == 'application/rtf' + html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120) + end + + tempfile.close + tempfile.delete + end end + if html.nil? + if self.has_google_docs_viewer? + html = '' # force error and using Google docs viewer + else + raise "No HTML conversion available for type " + self.content_type + end + end + + # We need to look at: # a) Any error code diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 98124b28e..ee8b92f11 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -70,41 +70,27 @@ class IncomingMessage < ActiveRecord::Base @mail end - def from_address - self.mail.from_addrs[0].address - end - def empty_from_field? self.mail.from_addrs.nil? || self.mail.from_addrs.size == 0 end def from_email - self.mail.from_addrs[0].spec + MailHandler.get_from_address(self.mail) end def addresses - ((self.mail.to || []) + - (self.mail.cc || []) + - (self.mail.envelope_to || [])).uniq + MailHandler.get_all_addresses(self.mail) end def message_id self.mail.message_id end - # Returns the name of the person the incoming message is from, or nil if - # there isn't one or if there is only an email address. XXX can probably - # remove from_name_if_present (which is a monkey patch) by just calling - # .from_addrs[0].name here instead? - # Return false if for some reason this is a message that we shouldn't let them reply to def _calculate_valid_to_reply_to # check validity of email - if empty_from_field? - return false - end email = self.from_email - if !MySociety::Validate.is_valid_email(email) + if email.nil? || !MySociety::Validate.is_valid_email(email) return false end @@ -114,13 +100,13 @@ class IncomingMessage < ActiveRecord::Base prefix = email prefix =~ /^(.*)@/ prefix = $1 - if !prefix.nil? && prefix.downcase.match(/^(postmaster|mailer-daemon|auto_reply|donotreply|no.reply)$/) + if !prefix.nil? && prefix.downcase.match(/^(postmaster|mailer-daemon|auto_reply|do.?not.?reply|no.reply)$/) return false end - if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>" + if MailHandler.empty_return_path?(self.mail) return false end - if !self.mail['auto-submitted'].nil? + if !MailHandler.get_auto_submitted(self.mail).nil? return false end return true @@ -138,13 +124,10 @@ class IncomingMessage < ActiveRecord::Base self.extract_attachments! self.sent_at = self.mail.date || self.created_at self.subject = self.mail.subject - # XXX can probably remove from_name_if_present (which is a - # monkey patch) by just calling .from_addrs[0].name here - # instead? - self.mail_from = self.mail.from_name_if_present - begin + self.mail_from = MailHandler.get_from_name(self.mail) + if self.from_email self.mail_from_domain = PublicBody.extract_domain_from_email(self.from_email) - rescue NoMethodError + else self.mail_from_domain = "" end self.valid_to_reply_to = self._calculate_valid_to_reply_to @@ -190,54 +173,8 @@ class IncomingMessage < ActiveRecord::Base super end - # Number the attachments in depth first tree order, for use in URLs. - # XXX This fills in part.rfc822_attachment and part.url_part_number within - # all the parts of the email (see monkeypatches in lib/mail_handler/tmail_extensions and - # lib/mail_handler/mail_extensions for how these attributes are added). ensure_parts_counted - # must be called before using the attributes. - def ensure_parts_counted - @count_parts_count = 0 - _count_parts_recursive(self.mail) - # we carry on using these numeric ids for attachments uudecoded from within text parts - @count_first_uudecode_count = @count_parts_count - end - def _count_parts_recursive(part) - if part.multipart? - part.parts.each do |p| - _count_parts_recursive(p) - end - else - part_filename = MailHandler.get_part_file_name(part) - begin - if part.content_type == 'message/rfc822' - # An email attached as text - # e.g. http://www.whatdotheyknow.com/request/64/response/102 - part.rfc822_attachment = MailHandler.mail_from_raw_email(part.body, decode=false) - elsif part.content_type == 'application/vnd.ms-outlook' || part_filename && AlaveteliFileTypes.filename_to_mimetype(part_filename) == 'application/vnd.ms-outlook' - # An email attached as an Outlook file - # e.g. http://www.whatdotheyknow.com/request/chinese_names_for_british_politi - msg = Mapi::Msg.open(StringIO.new(part.body)) - part.rfc822_attachment = MailHandler.mail_from_raw_email(msg.to_mime.to_s, decode=false) - elsif part.content_type == 'application/ms-tnef' - # A set of attachments in a TNEF file - part.rfc822_attachment = MailHandler.mail_from_tnef(part.body) - end - rescue - # If attached mail doesn't parse, treat it as text part - part.rfc822_attachment = nil - else - unless part.rfc822_attachment.nil? - _count_parts_recursive(part.rfc822_attachment) - end - end - if part.rfc822_attachment.nil? - @count_parts_count += 1 - part.url_part_number = @count_parts_count - end - end - end # And look up by URL part number to get an attachment - # XXX relies on extract_attachments calling ensure_parts_counted + # XXX relies on extract_attachments calling MailHandler.ensure_parts_counted def self.get_attachment_by_url_part_number(attachments, found_url_part_number) attachments.each do |a| if a.url_part_number == found_url_part_number @@ -458,95 +395,6 @@ class IncomingMessage < ActiveRecord::Base return text end - # (This risks losing info if the unchosen alternative is the only one to contain - # useful info, but let's worry about that another time) - def get_attachment_leaves - force = true - return _get_attachment_leaves_recursive(self.mail(force)) - end - def _get_attachment_leaves_recursive(curr_mail, within_rfc822_attachment = nil) - leaves_found = [] - if curr_mail.multipart? - if curr_mail.parts.size == 0 - raise "no parts on multipart mail" - end - - if curr_mail.sub_type == 'alternative' - # Choose best part from alternatives - best_part = nil - # Take the last text/plain one, or else the first one - curr_mail.parts.each do |m| - if not best_part - best_part = m - elsif m.content_type == 'text/plain' - best_part = m - end - end - # Take an HTML one as even higher priority. (They tend - # to render better than text/plain, e.g. don't wrap links here: - # http://www.whatdotheyknow.com/request/amount_and_cost_of_freedom_of_in#incoming-72238 ) - curr_mail.parts.each do |m| - if m.content_type == 'text/html' - best_part = m - end - end - leaves_found += _get_attachment_leaves_recursive(best_part, within_rfc822_attachment) - else - # Add all parts - curr_mail.parts.each do |m| - leaves_found += _get_attachment_leaves_recursive(m, within_rfc822_attachment) - end - end - else - # XXX Yuck. this section alters various content_type's. That puts - # it into conflict with ensure_parts_counted which it has to be - # called both before and after. It will fail with cases of - # attachments of attachments etc. - charset = curr_mail.charset # save this, because overwriting content_type also resets charset - # Don't allow nil content_types - if curr_mail.content_type.nil? - curr_mail.content_type = 'application/octet-stream' - end - # PDFs often come with this mime type, fix it up for view code - if curr_mail.content_type == 'application/octet-stream' - part_file_name = MailHandler.get_part_file_name(curr_mail) - calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, curr_mail.body) - if calc_mime - curr_mail.content_type = calc_mime - end - end - - # Use standard content types for Word documents etc. - curr_mail.content_type = normalise_content_type(curr_mail.content_type) - if curr_mail.content_type == 'message/rfc822' - ensure_parts_counted # fills in rfc822_attachment variable - if curr_mail.rfc822_attachment.nil? - # Attached mail didn't parse, so treat as text - curr_mail.content_type = 'text/plain' - end - end - if curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' - ensure_parts_counted # fills in rfc822_attachment variable - if curr_mail.rfc822_attachment.nil? - # Attached mail didn't parse, so treat as binary - curr_mail.content_type = 'application/octet-stream' - end - end - # If the part is an attachment of email - if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' - ensure_parts_counted # fills in rfc822_attachment variable - leaves_found += _get_attachment_leaves_recursive(curr_mail.rfc822_attachment, curr_mail.rfc822_attachment) - else - # Store leaf - curr_mail.within_rfc822_attachment = within_rfc822_attachment - leaves_found += [curr_mail] - end - # restore original charset - curr_mail.charset = charset - end - return leaves_found - end - # Removes anything cached about the object in the database, and saves def clear_in_database_caches! self.cached_attachment_text_clipped = nil @@ -609,7 +457,8 @@ class IncomingMessage < ActiveRecord::Base text = "[ Email has no body, please see attachments ]" source_charset = "utf-8" else - text = part.body # by default, TMail converts to UTF8 in this call + # by default, the body (coming from an foi_attachment) should have been converted to utf-8 + text = part.body source_charset = part.charset if part.content_type == 'text/html' # e.g. http://www.whatdotheyknow.com/request/35/response/177 @@ -617,42 +466,31 @@ class IncomingMessage < ActiveRecord::Base # convert to text routine. Could instead call a # sanitize HTML one. - # If the text isn't UTF8, it means TMail had a problem + # If the text isn't UTF8, it means we had a problem # converting it (invalid characters, etc), and we # should instead tell elinks to respect the source # charset use_charset = "utf-8" - begin - text = Iconv.conv('utf-8', 'utf-8', text) - rescue Iconv::IllegalSequence - use_charset = source_charset - end - text = self.class._get_attachment_text_internal_one_file(part.content_type, text, use_charset) - end - end - - # If TMail can't convert text, it just returns it, so we sanitise it. - begin - # Test if it's good UTF-8 - text = Iconv.conv('utf-8', 'utf-8', text) - rescue Iconv::IllegalSequence - # Text looks like unlabelled nonsense, - # strip out anything that isn't UTF-8 - begin - source_charset = 'utf-8' if source_charset.nil? - text = Iconv.conv('utf-8//IGNORE', source_charset, text) + - _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", - :site_name => Configuration::site_name) - rescue Iconv::InvalidEncoding, Iconv::IllegalSequence - if source_charset != "utf-8" - source_charset = "utf-8" - retry + if RUBY_VERSION.to_f >= 1.9 + begin + text.encode('utf-8') + rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError + use_charset = source_charset + end + else + begin + text = Iconv.conv('utf-8', 'utf-8', text) + rescue Iconv::IllegalSequence + use_charset = source_charset + end end + text = MailHandler.get_attachment_text_one_file(part.content_type, text, use_charset) end end + # If text hasn't been converted, we sanitise it. + text = _sanitize_text(text) # Fix DOS style linefeeds to Unix style ones (or other later regexps won't work) - # Needed for e.g. http://www.whatdotheyknow.com/request/60/response/98 text = text.gsub(/\r\n/, "\n") # Compress extra spaces down to save space, and to stop regular expressions @@ -662,6 +500,51 @@ class IncomingMessage < ActiveRecord::Base return text end + + def _sanitize_text(text) + if RUBY_VERSION.to_f >= 1.9 + begin + # Test if it's good UTF-8 + text.encode('utf-8') + rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError + source_charset = 'utf-8' if source_charset.nil? + # strip out anything that isn't UTF-8 + begin + text = text.encode("utf-8", :invalid => :replace, + :undef => :replace, + :replace => "") + + _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", + :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli')) + rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError + if source_charset != "utf-8" + source_charset = "utf-8" + retry + end + end + end + else + begin + # Test if it's good UTF-8 + text = Iconv.conv('utf-8', 'utf-8', text) + rescue Iconv::IllegalSequence + # Text looks like unlabelled nonsense, + # strip out anything that isn't UTF-8 + begin + source_charset = 'utf-8' if source_charset.nil? + text = Iconv.conv('utf-8//IGNORE', source_charset, text) + + _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", + :site_name => Configuration::site_name) + rescue Iconv::InvalidEncoding, Iconv::IllegalSequence + if source_charset != "utf-8" + source_charset = "utf-8" + retry + end + end + end + end + text + end + # Returns part which contains main body text, or nil if there isn't one def get_main_body_text_part leaves = self.foi_attachments @@ -715,7 +598,7 @@ class IncomingMessage < ActiveRecord::Base filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1] calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(filename, content) if calc_mime - calc_mime = normalise_content_type(calc_mime) + calc_mime = MailHandler.normalise_content_type(calc_mime) content_type = calc_mime else content_type = 'application/octet-stream' @@ -744,55 +627,15 @@ class IncomingMessage < ActiveRecord::Base end def extract_attachments! - leaves = get_attachment_leaves # XXX check where else this is called from - # XXX we have to call ensure_parts_counted after get_attachment_leaves - # which is really messy. - ensure_parts_counted + force = true + attachment_attributes = MailHandler.get_attachment_attributes(self.mail(force)) attachments = [] - for leaf in leaves - body = leaf.body - # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here - # to prevent excess memory use. XXX not really sure if this helps reduce - # peak RAM use overall. Anyway, maybe there is something better to do than this. - GC.start - if leaf.within_rfc822_attachment - within_rfc822_subject = leaf.within_rfc822_attachment.subject - # Test to see if we are in the first part of the attached - # RFC822 message and it is text, if so add headers. - # XXX should probably use hunting algorithm to find main text part, rather than - # just expect it to be first. This will do for now though. - # Example request that needs this: - # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt - if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' - headers = "" - for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] - if leaf.within_rfc822_attachment.header.include?(header.downcase) - header_value = leaf.within_rfc822_attachment.header[header.downcase] - # Example message which has a blank Date header: - # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html - if !header_value.blank? - headers = headers + header + ": " + header_value.to_s + "\n" - end - end - end - # XXX call _convert_part_body_to_text here, but need to get charset somehow - # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt - body = headers + "\n" + body - - # This is quick way of getting all headers, but instead we only add some a) to - # make it more usable, b) as at least one authority accidentally leaked security - # information into a header. - #attachment.body = leaf.within_rfc822_attachment.port.to_s - end - end - hexdigest = Digest::MD5.hexdigest(body) - attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest) - attachment.update_attributes(:url_part_number => leaf.url_part_number, - :content_type => leaf.content_type, - :filename => MailHandler.get_part_file_name(leaf), - :charset => leaf.charset, - :within_rfc822_subject => within_rfc822_subject, - :body => body) + attachment_attributes.each do |attrs| + attachment = self.foi_attachments.find_or_create_by_hexdigest(attrs[:hexdigest]) + body = attrs.delete(:body) + attachment.update_attributes(attrs) + # Set the body separately as its handling can depend on the value of charset + attachment.body = body attachment.save! attachments << attachment.id end @@ -802,7 +645,7 @@ class IncomingMessage < ActiveRecord::Base # e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550 if !main_part.nil? uudecoded_attachments = _uudecode_and_save_attachments(main_part.body) - c = @count_first_uudecode_count + c = self.mail.count_first_uudecode_count for uudecode_attachment in uudecoded_attachments c += 1 uudecode_attachment.url_part_number = c @@ -894,101 +737,15 @@ class IncomingMessage < ActiveRecord::Base return self.cached_attachment_text_clipped end - def IncomingMessage._get_attachment_text_internal_one_file(content_type, body, charset = 'utf-8') - # note re. charset: TMail always tries to convert email bodies - # to UTF8 by default, so normally it should already be that. - text = '' - # XXX - tell all these command line tools to return utf-8 - if content_type == 'text/plain' - text += body + "\n\n" - else - tempfile = Tempfile.new('foiextract') - tempfile.print body - tempfile.flush - if content_type == 'application/vnd.ms-word' - AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt") - # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701) - if not File.exists?(tempfile.path + ".txt") - AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text) - else - text += File.read(tempfile.path + ".txt") + "\n\n" - File.unlink(tempfile.path + ".txt") - end - elsif content_type == 'application/rtf' - # catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf - AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text) - elsif content_type == 'text/html' - # lynx wordwraps links in its output, which then don't - # get formatted properly by Alaveteli. We use elinks - # instead, which doesn't do that. - AlaveteliExternalCommand.run("elinks", "-eval", "set document.codepage.assume = \"#{charset}\"", "-eval", "set document.codepage.force_assumed = 1", "-dump-charset", "utf-8", "-force-html", "-dump", - tempfile.path, :append_to => text, :env => {"LANG" => "C"}) - elsif content_type == 'application/vnd.ms-excel' - # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and - # py_xls2txt only extract text from cells, not from floating - # notes. catdoc may be fooled by weird character sets, but will - # probably do for UK FOI requests. - AlaveteliExternalCommand.run("/usr/bin/strings", tempfile.path, :append_to => text) - elsif content_type == 'application/vnd.ms-powerpoint' - # ppthtml seems to catch more text, but only outputs HTML when - # we want text, so just use catppt for now - AlaveteliExternalCommand.run("catppt", tempfile.path, :append_to => text) - elsif content_type == 'application/pdf' - AlaveteliExternalCommand.run("pdftotext", tempfile.path, "-", :append_to => text) - elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' - # This is Microsoft's XML office document format. - # Just pull out the main XML file, and strip it of text. - xml = AlaveteliExternalCommand.run("/usr/bin/unzip", "-qq", "-c", tempfile.path, "word/document.xml") - if !xml.nil? - doc = REXML::Document.new(xml) - text += doc.each_element( './/text()' ){}.join(" ") - end - elsif content_type == 'application/zip' - # recurse into zip files - begin - zip_file = Zip::ZipFile.open(tempfile.path) - text += IncomingMessage._get_attachment_text_from_zip_file(zip_file) - zip_file.close() - rescue - $stderr.puts("Error processing zip file: #{$!.inspect}") - end - end - tempfile.close - end - - return text - end - def IncomingMessage._get_attachment_text_from_zip_file(zip_file) - text = "" - for entry in zip_file - if entry.file? - filename = entry.to_s - begin - body = entry.get_input_stream.read - rescue - # move to next attachment silently if there were problems - # XXX really should reduce this to specific exceptions? - # e.g. password protected - next - end - calc_mime = AlaveteliFileTypes.filename_to_mimetype(filename) - if calc_mime - content_type = calc_mime - else - content_type = 'application/octet-stream' - end - text += _get_attachment_text_internal_one_file(content_type, body) - end - end - return text - end def _get_attachment_text_internal # Extract text from each attachment text = '' attachments = self.get_attachments_for_display for attachment in attachments - text += IncomingMessage._get_attachment_text_internal_one_file(attachment.content_type, attachment.body, attachment.charset) + text += MailHandler.get_attachment_text_one_file(attachment.content_type, + attachment.body, + attachment.charset) end # Remove any bad characters text = Iconv.conv('utf-8//IGNORE', 'utf-8', text) @@ -1056,65 +813,11 @@ class IncomingMessage < ActiveRecord::Base return AlaveteliFileTypes.all_extensions.join(" ") end - # Return false if for some reason this is a message that we shouldn't let them reply to - def valid_to_reply_to? - # check validity of email - if empty_from_field? - return false - end - email = self.from_email - if !MySociety::Validate.is_valid_email(email) - return false - end - - # reject postmaster - authorities seem to nearly always not respond to - # email to postmaster, and it tends to only happen after delivery failure. - # likewise Mailer-Daemon, Auto_Reply... - prefix = email - prefix =~ /^(.*)@/ - prefix = $1 - if !prefix.nil? && prefix.downcase.match(/^(postmaster|mailer-daemon|auto_reply|do.?not.?reply|no.reply)$/) - return false - end - if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>" - return false - end - if !self.mail['auto-submitted'].nil? - return false - end - return true - end - - def normalise_content_type(content_type) - # e.g. http://www.whatdotheyknow.com/request/93/response/250 - if content_type == 'application/excel' or content_type == 'application/msexcel' or content_type == 'application/x-ms-excel' - content_type = 'application/vnd.ms-excel' - end - if content_type == 'application/mspowerpoint' or content_type == 'application/x-ms-powerpoint' - content_type = 'application/vnd.ms-powerpoint' + def for_admin_column + self.class.content_columns.each do |column| + yield(column.human_name, self.send(column.name), column.type.to_s, column.name) end - if content_type == 'application/msword' or content_type == 'application/x-ms-word' - content_type = 'application/vnd.ms-word' - end - if content_type == 'application/x-zip-compressed' - content_type = 'application/zip' - end - - # e.g. http://www.whatdotheyknow.com/request/copy_of_current_swessex_scr_opt#incoming-9928 - if content_type == 'application/acrobat' - content_type = 'application/pdf' - end - - return content_type - end - - def for_admin_column - self.class.content_columns.each do |column| - yield(column.human_name, self.send(column.name), column.type.to_s, column.name) end - end - - private :normalise_content_type end diff --git a/app/models/info_request.rb b/app/models/info_request.rb index 194f8e105..08b331c26 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -27,7 +27,7 @@ require 'digest/sha1' class InfoRequest < ActiveRecord::Base include ActionView::Helpers::UrlHelper - include ActionController::UrlWriter + include Rails.application.routes.url_helpers strip_attributes! @@ -51,7 +51,7 @@ class InfoRequest < ActiveRecord::Base has_tag_string - named_scope :visible, :conditions => {:prominence => "normal"} + scope :visible, :conditions => {:prominence => "normal"} # user described state (also update in info_request_event, admin_request/edit.rhtml) validate :must_be_valid_state @@ -81,6 +81,11 @@ class InfoRequest < ActiveRecord::Base 'blackhole' # just dump them ] + # only check on create, so existing models with mixed case are allowed + validate :title_formatting, :on => :create + + after_initialize :set_defaults + def self.enumerate_states states = [ 'waiting_response', @@ -138,7 +143,7 @@ class InfoRequest < ActiveRecord::Base if external_user_name.nil? fake_slug = "anonymous" else - fake_slug = external_user_name.parameterize + fake_slug = MySociety::Format.simplify_url_part(external_user_name, 'external_user', 32) end (public_body.url_name || "") + "_" + fake_slug else @@ -156,31 +161,8 @@ class InfoRequest < ActiveRecord::Base rescue MissingSourceFile, NameError end - # only check on create, so existing models with mixed case are allowed - def validate_on_create - if !self.title.nil? && !MySociety::Validate.uses_mixed_capitals(self.title, 10) - errors.add(:title, _('Please write the summary using a mixture of capital and lower case letters. This makes it easier for others to read.')) - end - if !self.title.nil? && title.size > 200 - errors.add(:title, _('Please keep the summary short, like in the subject of an email. You can use a phrase, rather than a full sentence.')) - end - if !self.title.nil? && self.title =~ /^(FOI|Freedom of Information)\s*requests?$/i - errors.add(:title, _('Please describe more what the request is about in the subject. There is no need to say it is an FOI request, we add that on anyway.')) - end - end - OLD_AGE_IN_DAYS = 21.days - def after_initialize - if self.described_state.nil? - self.described_state = 'waiting_response' - end - # FOI or EIR? - if !self.public_body.nil? && self.public_body.eir_only? - self.law_used = 'eir' - end - end - def visible_comments self.comments.find(:all, :conditions => 'visible') end @@ -434,11 +416,11 @@ public elsif self.allow_new_responses_from == 'anybody' allow = true elsif self.allow_new_responses_from == 'authority_only' - if email.from_addrs.nil? || email.from_addrs.size == 0 + sender_email = MailHandler.get_from_address(email) + if sender_email.nil? allow = false reason = _('Only the authority can reply to this request, but there is no "From" address to check against') else - sender_email = email.from_addrs[0].spec sender_domain = PublicBody.extract_domain_from_email(sender_email) reason = _("Only the authority can reply to this request, and I don't recognise the address this reply was sent from") allow = false @@ -1155,5 +1137,29 @@ public yield(column.human_name, self.send(column.name), column.type.to_s, column.name) end end + + private + + def set_defaults + if self.described_state.nil? + self.described_state = 'waiting_response' + end + # FOI or EIR? + if !self.public_body.nil? && self.public_body.eir_only? + self.law_used = 'eir' + end + end + + def title_formatting + if !self.title.nil? && !MySociety::Validate.uses_mixed_capitals(self.title, 10) + errors.add(:title, _('Please write the summary using a mixture of capital and lower case letters. This makes it easier for others to read.')) + end + if !self.title.nil? && title.size > 200 + errors.add(:title, _('Please keep the summary short, like in the subject of an email. You can use a phrase, rather than a full sentence.')) + end + if !self.title.nil? && self.title =~ /^(FOI|Freedom of Information)\s*requests?$/i + errors.add(:title, _('Please describe more what the request is about in the subject. There is no need to say it is an FOI request, we add that on anyway.')) + end + end end diff --git a/app/models/outgoing_message.rb b/app/models/outgoing_message.rb index 441813e5f..23b5c904b 100644 --- a/app/models/outgoing_message.rb +++ b/app/models/outgoing_message.rb @@ -50,6 +50,8 @@ class OutgoingMessage < ActiveRecord::Base end end + after_initialize :set_default_letter + # How the default letter starts and ends def get_salutation ret = "" @@ -129,13 +131,6 @@ class OutgoingMessage < ActiveRecord::Base MySociety::Validate.contains_postcode?(self.body) end - # Set default letter - def after_initialize - if self.body.nil? - self.body = get_default_message - end - end - # Check have edited letter def validate if self.body.empty? || self.body =~ /\A#{get_salutation}\s+#{get_signoff}/ || self.body =~ /#{get_internal_review_insert_here_note}/ @@ -275,6 +270,14 @@ class OutgoingMessage < ActiveRecord::Base yield(column.human_name, self.send(column.name), column.type.to_s, column.name) end end + + private + + def set_default_letter + if self.body.nil? + self.body = get_default_message + end + end end diff --git a/app/models/post_redirect.rb b/app/models/post_redirect.rb index 31f08c21a..dfca936e2 100644 --- a/app/models/post_redirect.rb +++ b/app/models/post_redirect.rb @@ -32,6 +32,8 @@ class PostRedirect < ActiveRecord::Base # Optional, does a login confirm before redirect for use in email links. belongs_to :user + after_initialize :generate_token + # We store YAML version of POST parameters in the database def post_params=(params) self.post_params_yaml = params.to_yaml @@ -62,18 +64,6 @@ class PostRedirect < ActiveRecord::Base MySociety::Util.generate_token end - # Make the token - def after_initialize - # The token is used to return you to what you are doing after the login form. - if not self.token - self.token = PostRedirect.generate_random_token - end - # There is a separate token to use in the URL if we send a confirmation email. - if not self.email_token - self.email_token = PostRedirect.generate_random_token - end - end - # Used by (rspec) test code only def self.get_last_post_redirect # XXX yeuch - no other easy way of getting the token so we can check @@ -89,6 +79,18 @@ class PostRedirect < ActiveRecord::Base PostRedirect.delete_all "updated_at < (now() - interval '2 months')" end + private + + def generate_token + # The token is used to return you to what you are doing after the login form. + if not self.token + self.token = PostRedirect.generate_random_token + end + # There is a separate token to use in the URL if we send a confirmation email. + if not self.email_token + self.email_token = PostRedirect.generate_random_token + end + end end diff --git a/app/models/profile_photo.rb b/app/models/profile_photo.rb index 6e605651d..41cb298b3 100644 --- a/app/models/profile_photo.rb +++ b/app/models/profile_photo.rb @@ -23,29 +23,15 @@ class ProfilePhoto < ActiveRecord::Base belongs_to :user + validate :data_and_draft_checks + # deliberately don't strip_attributes, so keeps raw photo properly attr_accessor :x, :y, :w, :h - # convert binary data blob into ImageMagick image when assigned attr_accessor :image - def after_initialize - if data.nil? - self.image = nil - return - end - image_list = Magick::ImageList.new - begin - image_list.from_blob(data) - rescue Magick::ImageMagickError - self.image = nil - return - end - - self.image = image_list[0] # XXX perhaps take largest image or somesuch if there were multiple in the file? - self.convert_image - end + after_initialize :convert_data_to_image # make image valid format and size def convert_image @@ -81,7 +67,9 @@ class ProfilePhoto < ActiveRecord::Base end end - def validate + private + + def data_and_draft_checks if self.data.nil? errors.add(:data, N_("Please choose a file containing your photo.")) return @@ -108,6 +96,25 @@ class ProfilePhoto < ActiveRecord::Base raise "Internal error, real pictures must have a user" end end + + # Convert binary data blob into ImageMagick image when assigned + def convert_data_to_image + if data.nil? + self.image = nil + return + end + + image_list = Magick::ImageList.new + begin + image_list.from_blob(data) + rescue Magick::ImageMagickError + self.image = nil + return + end + + self.image = image_list[0] # XXX perhaps take largest image or somesuch if there were multiple in the file? + self.convert_image + end end diff --git a/app/models/public_body.rb b/app/models/public_body.rb index 6f6b37ba8..b48c57228 100644 --- a/app/models/public_body.rb +++ b/app/models/public_body.rb @@ -43,7 +43,7 @@ class PublicBody < ActiveRecord::Base before_save :set_api_key, :set_default_publication_scheme # Every public body except for the internal admin one is visible - named_scope :visible, lambda { + scope :visible, lambda { { :conditions => "public_bodies.id <> #{PublicBody.internal_admin_body.id}" } @@ -104,28 +104,25 @@ class PublicBody < ActiveRecord::Base # like find_by_url_name but also search historic url_name if none found def self.find_by_url_name_with_historic(name) - locale = self.locale || I18n.locale - PublicBody.with_locale(locale) do - found = PublicBody.find(:all, - :conditions => ["public_body_translations.url_name=?", name], - :joins => :translations, - :readonly => false) - # If many bodies are found (usually because the url_name is the same across - # locales) return any of them - return found.first if found.size >= 1 - - # If none found, then search the history of short names - old = PublicBody::Version.find_all_by_url_name(name) - # Find unique public bodies in it - old = old.map { |x| x.public_body_id } - old = old.uniq - # Maybe return the first one, so we show something relevant, - # rather than throwing an error? - raise "Two bodies with the same historical URL name: #{name}" if old.size > 1 - return unless old.size == 1 - # does acts_as_versioned provide a method that returns the current version? - return PublicBody.find(old.first) - end + found = PublicBody.find(:all, + :conditions => ["public_body_translations.url_name=?", name], + :joins => :translations, + :readonly => false) + # If many bodies are found (usually because the url_name is the same across + # locales) return any of them + return found.first if found.size >= 1 + + # If none found, then search the history of short names + old = PublicBody::Version.find_all_by_url_name(name) + # Find unique public bodies in it + old = old.map { |x| x.public_body_id } + old = old.uniq + # Maybe return the first one, so we show something relevant, + # rather than throwing an error? + raise "Two bodies with the same historical URL name: #{name}" if old.size > 1 + return unless old.size == 1 + # does acts_as_versioned provide a method that returns the current version? + return PublicBody.find(old.first) end # Set the first letter, which is used for faster queries @@ -336,7 +333,7 @@ class PublicBody < ActiveRecord::Base # The "internal admin" is a special body for internal use. def PublicBody.internal_admin_body - PublicBody.with_locale(I18n.default_locale) do + I18n.with_locale(I18n.default_locale) do pb = PublicBody.find_by_url_name("internal_admin_authority") if pb.nil? pb = PublicBody.new( @@ -374,7 +371,7 @@ class PublicBody < ActiveRecord::Base # of updating them bodies_by_name = {} set_of_existing = Set.new() - PublicBody.with_locale(I18n.default_locale) do + I18n.with_locale(I18n.default_locale) do bodies = (tag.nil? || tag.empty?) ? PublicBody.find(:all) : PublicBody.find_by_tag(tag) for existing_body in bodies # Hide InternalAdminBody from import notes @@ -417,7 +414,7 @@ class PublicBody < ActiveRecord::Base if public_body = bodies_by_name[name] # Existing public body available_locales.each do |locale| - PublicBody.with_locale(locale) do + I18n.with_locale(locale) do changed = ActiveSupport::OrderedHash.new field_list.each do |field_name| localized_field_name = (locale.to_s == I18n.default_locale.to_s) ? field_name : "#{field_name}.#{locale}" @@ -452,7 +449,7 @@ class PublicBody < ActiveRecord::Base else # New public body public_body = PublicBody.new(:name=>"", :short_name=>"", :request_email=>"") available_locales.each do |locale| - PublicBody.with_locale(locale) do + I18n.with_locale(locale) do changed = ActiveSupport::OrderedHash.new field_list.each do |field_name| localized_field_name = (locale.to_s == I18n.default_locale.to_s) ? field_name : "#{field_name}.#{locale}" diff --git a/app/models/user.rb b/app/models/user.rb index 6e1e21481..490587c39 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -50,12 +50,17 @@ class User < ActiveRecord::Base 'super', ], :message => N_('Admin level is not included in list') + validate :email_and_name_are_valid + acts_as_xapian :texts => [ :name, :about_me ], :values => [ [ :created_at_numeric, 1, "created_at", :number ] # for sorting ], :terms => [ [ :variety, 'V', "variety" ] ], :if => :indexed_by_search? + + after_initialize :set_defaults + def created_at_numeric # format it here as no datetime support in Xapian's value ranges return self.created_at.strftime("%Y%m%d%H%M%S") @@ -65,17 +70,6 @@ class User < ActiveRecord::Base "user" end - def after_initialize - if self.admin_level.nil? - self.admin_level = 'none' - end - if self.new_record? - # make alert emails go out at a random time for each new user, so - # overall they are spread out throughout the day. - self.last_daily_track_email = User.random_time_in_last_day - end - end - # requested_by: and commented_by: search queries also need updating after save after_update :reindex_referencing_models def reindex_referencing_models @@ -108,15 +102,6 @@ class User < ActiveRecord::Base self.comments.find(:all, :conditions => 'visible') end - def validate - if self.email != "" && !MySociety::Validate.is_valid_email(self.email) - errors.add(:email, _("Please enter a valid email address")) - end - if MySociety::Validate.is_valid_email(self.name) - errors.add(:name, _("Please enter your name, not your email address, in the name field.")) - end - end - # Don't display any leading/trailing spaces # XXX we have strip_attributes! now, so perhaps this can be removed (might # be still needed for existing cases) @@ -148,14 +133,14 @@ class User < ActiveRecord::Base if user # There is user with email, check password if !user.has_this_password?(params[:password]) - user.errors.add_to_base(auth_fail_message) + user.errors.add(:base, auth_fail_message) end else # No user of same email, make one (that we don't save in the database) # for the forms code to use. user = User.new(params) # deliberately same message as above so as not to leak whether registered - user.errors.add_to_base(auth_fail_message) + user.errors.add(:base, auth_fail_message) end user end @@ -413,6 +398,26 @@ class User < ActiveRecord::Base self.salt = self.object_id.to_s + rand.to_s end + def set_defaults + if self.admin_level.nil? + self.admin_level = 'none' + end + if self.new_record? + # make alert emails go out at a random time for each new user, so + # overall they are spread out throughout the day. + self.last_daily_track_email = User.random_time_in_last_day + end + end + + def email_and_name_are_valid + if self.email != "" && !MySociety::Validate.is_valid_email(self.email) + errors.add(:email, _("Please enter a valid email address")) + end + if MySociety::Validate.is_valid_email(self.name) + errors.add(:name, _("Please enter your name, not your email address, in the name field.")) + end + end + ## Class methods def User.encrypted_password(password, salt) string_to_hash = password + salt # XXX need to add a secret here too? |