diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/alaveteli_external_command.rb | 14 | ||||
-rw-r--r-- | lib/configuration.rb | 11 | ||||
-rw-r--r-- | lib/i18n_fixes.rb | 10 | ||||
-rw-r--r-- | lib/mail_handler/backends/mail_backend.rb | 278 | ||||
-rw-r--r-- | lib/mail_handler/backends/mail_extensions.rb | 60 | ||||
-rw-r--r-- | lib/mail_handler/backends/tmail_backend.rb | 228 | ||||
-rw-r--r-- | lib/mail_handler/backends/tmail_extensions.rb | 2 | ||||
-rw-r--r-- | lib/mail_handler/mail_handler.rb | 131 | ||||
-rw-r--r-- | lib/old_rubygems_patch.rb | 46 | ||||
-rw-r--r-- | lib/tasks/.gitkeep | 0 | ||||
-rw-r--r-- | lib/tasks/rspec.rake | 147 |
11 files changed, 723 insertions, 204 deletions
diff --git a/lib/alaveteli_external_command.rb b/lib/alaveteli_external_command.rb index 3bfc34e3a..24b4b1aa8 100644 --- a/lib/alaveteli_external_command.rb +++ b/lib/alaveteli_external_command.rb @@ -2,6 +2,12 @@ require 'external_command' module AlaveteliExternalCommand class << self + # Final argument can be a hash of options. + # Valid options are: + # :append_to - string to append the output of the process to + # :stdin_string - stdin string to pass to the process + # :binary_output - boolean flag for treating the output as binary or text (only significant + # ruby 1.9 and above) def run(program_name, *args) # Run an external program, and return its output. # Standard error is suppressed unless the program @@ -10,7 +16,7 @@ module AlaveteliExternalCommand if !args.empty? && args[-1].is_a?(Hash) opts = args.pop end - + if program_name =~ %r(^/) program_path = program_name else @@ -24,12 +30,16 @@ module AlaveteliExternalCommand end raise "Could not find #{program_name} in any of #{Configuration::utility_search_path.join(', ')}" if !found end - + xc = ExternalCommand.new(program_path, *args) if opts.has_key? :append_to xc.out = opts[:append_to] end + if opts.has_key? :binary_output + xc.binary_mode = opts[:binary_output] + end xc.run(opts[:stdin_string] || "", opts[:env] || {}) + if xc.status != 0 # Error $stderr.puts("Error from #{program_name} #{args.join(' ')}:") diff --git a/lib/configuration.rb b/lib/configuration.rb index abd0f5cdc..9c369b2e7 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -1,3 +1,13 @@ +require File.dirname(__FILE__) + '/../commonlib/rblib/config' + +# Load intial mySociety config +if ENV["RAILS_ENV"] == "test" + MySociety::Config.set_file(File.join(File.dirname(__FILE__), 'test'), true) +else + MySociety::Config.set_file(File.join(File.dirname(__FILE__), 'general'), true) +end +MySociety::Config.load_default + # Configuration values with defaults # TODO: Make this return different values depending on the current rails environment @@ -25,6 +35,7 @@ module Configuration :GA_CODE => '', :GAZE_URL => '', :HTML_TO_PDF_COMMAND => '', + :INCLUDE_DEFAULT_LOCALE_IN_URLS => true, :INCOMING_EMAIL_DOMAIN => 'localhost', :INCOMING_EMAIL_PREFIX => '', :INCOMING_EMAIL_SECRET => 'dummysecret', diff --git a/lib/i18n_fixes.rb b/lib/i18n_fixes.rb index f75b969c4..bb339fc55 100644 --- a/lib/i18n_fixes.rb +++ b/lib/i18n_fixes.rb @@ -6,10 +6,12 @@ # so that we can interpolate our translation strings nicely def _(key, options = {}) - # Assume the result of doing any translation is html_safe. - # In other words, we trust the translators. - translation = FastGettext._(key).html_safe || key - gettext_interpolate(translation, options) + # HACK: We should be going via GettextI18nRails instead of FastGettext below + # so that #translations_are_html_safe is respected but calling it directly + # doesn't work for me. I'm just marking the resulting string as html_safe. + # This whole hacky file should be removed + translation = FastGettext._(key) || key + gettext_interpolate(translation, options).html_safe end INTERPOLATION_RESERVED_KEYS = %w(scope default) diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 0e198adf0..b75e6ed63 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -23,19 +23,291 @@ module MailHandler main end + # Returns an outlook message as a Mail object + def mail_from_outlook(content) + msg = Mapi::Msg.open(StringIO.new(content)) + mail = mail_from_raw_email(msg.to_mime.to_s) + mail.ready_to_send! + mail + end + # Return a copy of the file name for the mail part - def get_part_file_name(mail_part) - part_file_name = mail_part.filename + def get_part_file_name(part) + part_file_name = part.filename part_file_name.nil? ? nil : part_file_name.dup end + # Get the body of a mail part + def get_part_body(part) + part.body.decoded + end + + # Return the first from field if any + def first_from(mail) + if mail[:from] + begin + mail[:from].addrs[0] + mail[:from].decoded + return mail[:from].addrs[0] + rescue + return mail[:from].value + end + else + nil + end + end + + # Return the first from address if any + def get_from_address(mail) + first_from = first_from(mail) + if first_from + if first_from.is_a?(String) + return nil + else + return first_from.address + end + else + return nil + end + end + + # Return the first from name if any + def get_from_name(mail) + first_from = first_from(mail) + if first_from + if first_from.is_a?(String) + return nil + else + return first_from.display_name ? eval(%Q{"#{first_from.display_name}"}) : nil + end + else + return nil + end + end + + def get_all_addresses(mail) + envelope_to = mail['envelope-to'] ? [mail['envelope-to'].value] : [] + ((mail.to || []) + + (mail.cc || []) + + (envelope_to || [])).uniq + end + + def empty_return_path?(mail) + return false if mail['return-path'].nil? + return true if mail['return-path'].value.blank? + return false + end + + def get_auto_submitted(mail) + mail['auto-submitted'] ? mail['auto-submitted'].value : nil + end + + def get_content_type(part) + part.content_type ? part.content_type.split(';')[0] : nil + end + + def get_header_string(header, mail) + mail.header[header] ? mail.header[header].to_s : nil + end + + # Detects whether a mail part is an Outlook email + def is_outlook?(part) + filename = get_part_file_name(part) + return true if get_content_type(part) == 'application/vnd.ms-outlook' + if filename && AlaveteliFileTypes.filename_to_mimetype(filename) == 'application/vnd.ms-outlook' + return true + end + return false + end + + # Convert a mail part which is an attached mail in one of + # several formats into a mail object and set it as the + # rfc822_attachment on the part. If the mail part can't be + # converted, the content type on the part is updated to + # 'text/plain' for an RFC822 attachment, and 'application/octet-stream' + # for other types + def decode_attached_part(part, parent_mail) + if get_content_type(part) == 'message/rfc822' + # An email attached as text + part.rfc822_attachment = mail_from_raw_email(part.body) + if part.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as text + part.content_type = 'text/plain' + end + elsif is_outlook?(part) + part.rfc822_attachment = mail_from_outlook(part.body.decoded) + if part.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as binary + part.content_type = 'application/octet-stream' + end + elsif get_content_type(part) == 'application/ms-tnef' + # A set of attachments in a TNEF file + part.rfc822_attachment = mail_from_tnef(part.body.decoded) + if part.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as binary + part.content_type = 'application/octet-stream' + end + end + if part.rfc822_attachment + expand_and_normalize_parts(part.rfc822_attachment, parent_mail) + end + end + + # Expand and normalize a mail part recursively. Decodes attached messages into + # Mail objects wherever possible. Sets a default content type if none is + # set. Tries to set a more specific content type for binary content types. + def expand_and_normalize_parts(part, parent_mail) + if part.multipart? + part.parts.each{ |sub_part| expand_and_normalize_parts(sub_part, parent_mail) } + else + part_filename = get_part_file_name(part) + charset = part.charset # save this, because overwriting content_type also resets charset + + # Don't allow nil content_types + if get_content_type(part).nil? + part.content_type = 'application/octet-stream' + end + + # PDFs often come with this mime type, fix it up for view code + if get_content_type(part) == 'application/octet-stream' + part_body = get_part_body(part) + calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_filename, + part_body) + if calc_mime + part.content_type = calc_mime + end + end + + # Use standard content types for Word documents etc. + part.content_type = normalise_content_type(get_content_type(part)) + decode_attached_part(part, parent_mail) + part.charset = charset + end + end + + # Count the parts in a mail part recursively, including any attached messages. + # Set the count on the parent mail, and set a url_part_number on the part itself. + # Set the count for the first uudecoded part on the parent mail also. + def count_parts(part, parent_mail) + if part.multipart? + part.parts.each { |p| count_parts(p, parent_mail) } + else + if part.rfc822_attachment + count_parts(part.rfc822_attachment, parent_mail) + else + parent_mail.count_parts_count += 1 + part.url_part_number = parent_mail.count_parts_count + end + end + parent_mail.count_first_uudecode_count = parent_mail.count_parts_count + end + + # Choose the best part from alternatives + def choose_best_alternative(mail) + if mail.html_part + return mail.html_part + elsif mail.text_part + return mail.text_part + else + return mail.parts.first + end + end + + # Expand and normalize the parts of a mail, select the best part + # wherever there is an alternative, and then count the returned + # leaves and assign url_part values to them + def get_attachment_leaves(mail) + expand_and_normalize_parts(mail, mail) + leaves = _get_attachment_leaves_recursive(mail, nil, mail) + mail.count_parts_count = 0 + count_parts(mail, mail) + return leaves + end + + # Recurse through a mail part, selecting the best part wherever there is + # an alternative + def _get_attachment_leaves_recursive(part, within_rfc822_attachment, parent_mail) + leaves_found = [] + if part.multipart? + raise "no parts on multipart mail" if part.parts.size == 0 + if part.sub_type == 'alternative' + best_part = choose_best_alternative(part) + leaves_found += _get_attachment_leaves_recursive(best_part, + within_rfc822_attachment, + parent_mail) + else + # Add all parts + part.parts.each do |sub_part| + leaves_found += _get_attachment_leaves_recursive(sub_part, + within_rfc822_attachment, + parent_mail) + end + end + else + # Add all the parts of a decoded attached message + if part.rfc822_attachment + leaves_found += _get_attachment_leaves_recursive(part.rfc822_attachment, + part.rfc822_attachment, + parent_mail) + else + # Store leaf + part.within_rfc822_attachment = within_rfc822_attachment + leaves_found += [part] + end + end + return leaves_found + end + + # Add selected useful headers from an attached message to its body + def extract_attached_message_headers(leaf) + body = get_part_body(leaf) + # Test to see if we are in the first part of the attached + # RFC822 message and it is text, if so add headers. + if leaf.within_rfc822_attachment == leaf && get_content_type(leaf) == 'text/plain' + headers = "" + [ 'Date', 'Subject', 'From', 'To', 'Cc' ].each do |header| + if header_value = get_header_string(header, leaf.within_rfc822_attachment) + if !header_value.blank? + headers = headers + header + ": " + header_value.to_s + "\n" + end + end + end + # XXX call _convert_part_body_to_text here, but need to get charset somehow + # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt + body = headers + "\n" + body + end + body + end + + # Generate a hash of the attributes associated with each significant part of a Mail object + def get_attachment_attributes(mail) + leaves = get_attachment_leaves(mail) + attachments = [] + for leaf in leaves + body = get_part_body(leaf) + if leaf.within_rfc822_attachment + within_rfc822_subject = leaf.within_rfc822_attachment.subject + body = extract_attached_message_headers(leaf) + end + leaf_attributes = { :url_part_number => leaf.url_part_number, + :content_type => get_content_type(leaf), + :filename => get_part_file_name(leaf), + :charset => leaf.charset, + :within_rfc822_subject => within_rfc822_subject, + :body => body, + :hexdigest => Digest::MD5.hexdigest(body) } + attachments << leaf_attributes + end + return attachments + end + # Format def address_from_name_and_email(name, email) if !MySociety::Validate.is_valid_email(email) raise "invalid email " + email + " passed to address_from_name_and_email" end if name.nil? - return Mail::Address.new(email) + return Mail::Address.new(email).to_s end address = Mail::Address.new address.display_name = name diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb index cbe0491ed..f756abd1a 100644 --- a/lib/mail_handler/backends/mail_extensions.rb +++ b/lib/mail_handler/backends/mail_extensions.rb @@ -1,7 +1,67 @@ +require 'mail/message' +require 'mail/fields/common/parameter_hash' module Mail class Message attr_accessor :url_part_number attr_accessor :rfc822_attachment # when a whole email message is attached as text attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly) + attr_accessor :count_parts_count + attr_accessor :count_first_uudecode_count + + # A patched version of the message initializer to work around a bug where stripping the original + # input removes meaningful spaces - e.g. in the case of uuencoded bodies. + def initialize(*args, &block) + @body = nil + @body_raw = nil + @separate_parts = false + @text_part = nil + @html_part = nil + @errors = nil + @header = nil + @charset = 'UTF-8' + @defaulted_charset = true + + @perform_deliveries = true + @raise_delivery_errors = true + + @delivery_handler = nil + + @delivery_method = Mail.delivery_method.dup + + @transport_encoding = Mail::Encodings.get_encoding('7bit') + + @mark_for_delete = false + + if args.flatten.first.respond_to?(:each_pair) + init_with_hash(args.flatten.first) + else + # The replacement of this commented out line is the change. + # init_with_string(args.flatten[0].to_s.strip) + init_with_string(args.flatten[0].to_s) + end + + if block_given? + instance_eval(&block) + end + + self + end + end + + # A patched version of the parameter hash that handles nil values without throwing + # an error. + class ParameterHash < IndifferentHash + + def encoded + map.sort { |a,b| a.first.to_s <=> b.first.to_s }.map do |key_name, value| + # The replacement of this commented out line is the change + # unless value.ascii_only? + unless value.nil? || value.ascii_only? + value = Mail::Encodings.param_encode(value) + key_name = "#{key_name}*" + end + %Q{#{key_name}=#{quote_token(value)}} + end.join(";\r\n\s") + end end end
\ No newline at end of file diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index 87aba73d7..02124cdb1 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -41,6 +41,234 @@ module MailHandler return part_file_name end + # Get the body of a mail part + def get_part_body(mail_part) + mail_part.body + end + + # Return the first from address if any + def get_from_address(mail) + if mail.from_addrs.nil? || mail.from_addrs.size == 0 + return nil + end + mail.from_addrs[0].spec + end + + # Return the first from name if any + def get_from_name(mail) + mail.from_name_if_present + end + + def get_all_addresses(mail) + ((mail.to || []) + + (mail.cc || []) + + (mail.envelope_to || [])).uniq + end + + def empty_return_path?(mail) + return false if mail['return-path'].nil? + return true if mail['return-path'].addr.to_s == '<>' + return false + end + + def get_auto_submitted(mail) + mail['auto-submitted'] ? mail['auto-submitted'].body : nil + end + + def get_content_type(part) + part.content_type + end + + def get_header_string(header, mail) + mail.header_string(header) + end + + # Number the attachments in depth first tree order, for use in URLs. + # XXX This fills in part.rfc822_attachment and part.url_part_number within + # all the parts of the email (see monkeypatches in lib/mail_handler/tmail_extensions and + # lib/mail_handler/mail_extensions for how these attributes are added). ensure_parts_counted + # must be called before using the attributes. + def ensure_parts_counted(mail) + mail.count_parts_count = 0 + _count_parts_recursive(mail, mail) + # we carry on using these numeric ids for attachments uudecoded from within text parts + mail.count_first_uudecode_count = mail.count_parts_count + end + def _count_parts_recursive(part, mail) + if part.multipart? + part.parts.each do |p| + _count_parts_recursive(p, mail) + end + else + part_filename = get_part_file_name(part) + begin + if part.content_type == 'message/rfc822' + # An email attached as text + # e.g. http://www.whatdotheyknow.com/request/64/response/102 + part.rfc822_attachment = mail_from_raw_email(part.body, decode=false) + elsif part.content_type == 'application/vnd.ms-outlook' || part_filename && AlaveteliFileTypes.filename_to_mimetype(part_filename) == 'application/vnd.ms-outlook' + # An email attached as an Outlook file + # e.g. http://www.whatdotheyknow.com/request/chinese_names_for_british_politi + msg = Mapi::Msg.open(StringIO.new(part.body)) + part.rfc822_attachment = mail_from_raw_email(msg.to_mime.to_s, decode=false) + elsif part.content_type == 'application/ms-tnef' + # A set of attachments in a TNEF file + part.rfc822_attachment = mail_from_tnef(part.body) + end + rescue + # If attached mail doesn't parse, treat it as text part + part.rfc822_attachment = nil + else + unless part.rfc822_attachment.nil? + _count_parts_recursive(part.rfc822_attachment, mail) + end + end + if part.rfc822_attachment.nil? + mail.count_parts_count += 1 + part.url_part_number = mail.count_parts_count + end + end + end + + def get_attachment_attributes(mail) + leaves = get_attachment_leaves(mail) + # XXX we have to call ensure_parts_counted after get_attachment_leaves + # which is really messy. + ensure_parts_counted(mail) + attachment_attributes = [] + for leaf in leaves + body = get_part_body(leaf) + # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here + # to prevent excess memory use. XXX not really sure if this helps reduce + # peak RAM use overall. Anyway, maybe there is something better to do than this. + GC.start + if leaf.within_rfc822_attachment + within_rfc822_subject = leaf.within_rfc822_attachment.subject + # Test to see if we are in the first part of the attached + # RFC822 message and it is text, if so add headers. + # XXX should probably use hunting algorithm to find main text part, rather than + # just expect it to be first. This will do for now though. + if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' + headers = "" + for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] + if leaf.within_rfc822_attachment.header.include?(header.downcase) + header_value = leaf.within_rfc822_attachment.header[header.downcase] + if !header_value.blank? + headers = headers + header + ": " + header_value.to_s + "\n" + end + end + end + # XXX call _convert_part_body_to_text here, but need to get charset somehow + # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt + body = headers + "\n" + body + + # This is quick way of getting all headers, but instead we only add some a) to + # make it more usable, b) as at least one authority accidentally leaked security + # information into a header. + #attachment.body = leaf.within_rfc822_attachment.port.to_s + end + end + attachment_attributes << {:url_part_number => leaf.url_part_number, + :content_type => get_content_type(leaf), + :filename => get_part_file_name(leaf), + :charset => leaf.charset, + :within_rfc822_subject => within_rfc822_subject, + :body => body, + :hexdigest => Digest::MD5.hexdigest(body) } + end + attachment_attributes + end + + # (This risks losing info if the unchosen alternative is the only one to contain + # useful info, but let's worry about that another time) + def get_attachment_leaves(mail) + return _get_attachment_leaves_recursive(mail, mail) + end + def _get_attachment_leaves_recursive(curr_mail, parent_mail, within_rfc822_attachment = nil) + leaves_found = [] + if curr_mail.multipart? + if curr_mail.parts.size == 0 + raise "no parts on multipart mail" + end + + if curr_mail.sub_type == 'alternative' + # Choose best part from alternatives + best_part = nil + # Take the last text/plain one, or else the first one + curr_mail.parts.each do |m| + if not best_part + best_part = m + elsif m.content_type == 'text/plain' + best_part = m + end + end + # Take an HTML one as even higher priority. (They tend + # to render better than text/plain, e.g. don't wrap links here: + # http://www.whatdotheyknow.com/request/amount_and_cost_of_freedom_of_in#incoming-72238 ) + curr_mail.parts.each do |m| + if m.content_type == 'text/html' + best_part = m + end + end + leaves_found += _get_attachment_leaves_recursive(best_part, parent_mail, within_rfc822_attachment) + else + # Add all parts + curr_mail.parts.each do |m| + leaves_found += _get_attachment_leaves_recursive(m, parent_mail, within_rfc822_attachment) + end + end + else + # XXX Yuck. this section alters various content_types. That puts + # it into conflict with ensure_parts_counted which it has to be + # called both before and after. It will fail with cases of + # attachments of attachments etc. + charset = curr_mail.charset # save this, because overwriting content_type also resets charset + # Don't allow nil content_types + if curr_mail.content_type.nil? + curr_mail.content_type = 'application/octet-stream' + end + # PDFs often come with this mime type, fix it up for view code + if curr_mail.content_type == 'application/octet-stream' + part_file_name = get_part_file_name(curr_mail) + part_body = get_part_body(curr_mail) + calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, part_body) + if calc_mime + curr_mail.content_type = calc_mime + end + end + + # Use standard content types for Word documents etc. + curr_mail.content_type = normalise_content_type(curr_mail.content_type) + if curr_mail.content_type == 'message/rfc822' + ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + if curr_mail.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as text + curr_mail.content_type = 'text/plain' + end + end + if curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' + ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + if curr_mail.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as binary + curr_mail.content_type = 'application/octet-stream' + end + end + # If the part is an attachment of email + if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' + ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + leaves_found += _get_attachment_leaves_recursive(curr_mail.rfc822_attachment, parent_mail, curr_mail.rfc822_attachment) + else + # Store leaf + curr_mail.within_rfc822_attachment = within_rfc822_attachment + leaves_found += [curr_mail] + end + # restore original charset + curr_mail.charset = charset + end + return leaves_found + end + + def address_from_name_and_email(name, email) if !MySociety::Validate.is_valid_email(email) raise "invalid email " + email + " passed to address_from_name_and_email" diff --git a/lib/mail_handler/backends/tmail_extensions.rb b/lib/mail_handler/backends/tmail_extensions.rb index 9359dfeea..3576a8eca 100644 --- a/lib/mail_handler/backends/tmail_extensions.rb +++ b/lib/mail_handler/backends/tmail_extensions.rb @@ -20,6 +20,8 @@ module TMail attr_accessor :url_part_number attr_accessor :rfc822_attachment # when a whole email message is attached as text attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly) + attr_accessor :count_parts_count + attr_accessor :count_first_uudecode_count # Monkeypatch! (check to see if this becomes a standard function in # TMail::Mail, then use that, whatever it is called) diff --git a/lib/mail_handler/mail_handler.rb b/lib/mail_handler/mail_handler.rb index 24d14b5c8..8b227b9ca 100644 --- a/lib/mail_handler/mail_handler.rb +++ b/lib/mail_handler/mail_handler.rb @@ -4,10 +4,12 @@ require 'tmpdir' module MailHandler if RUBY_VERSION.to_f >= 1.9 + require 'mail' require 'backends/mail_extensions' require 'backends/mail_backend' include Backends::MailBackend else + require 'action_mailer' require 'backends/tmail_extensions' require 'backends/tmail_backend' include Backends::TmailBackend @@ -19,7 +21,7 @@ module MailHandler def tnef_attachments(content) attachments = [] Dir.mktmpdir do |dir| - IO.popen("#{`which tnef`.chomp} -K -C #{dir}", "w") do |f| + IO.popen("#{`which tnef`.chomp} -K -C #{dir}", "wb") do |f| f.write(content) f.close if $?.signaled? @@ -32,7 +34,7 @@ module MailHandler found = 0 Dir.new(dir).sort.each do |file| # sort for deterministic behaviour if file != "." && file != ".." - file_content = File.open("#{dir}/#{file}", "r").read + file_content = File.open("#{dir}/#{file}", "rb").read attachments << { :content => file_content, :filename => file } found += 1 @@ -45,6 +47,131 @@ module MailHandler attachments end + def normalise_content_type(content_type) + # e.g. http://www.whatdotheyknow.com/request/93/response/250 + if content_type == 'application/excel' or content_type == 'application/msexcel' or content_type == 'application/x-ms-excel' + content_type = 'application/vnd.ms-excel' + end + if content_type == 'application/mspowerpoint' or content_type == 'application/x-ms-powerpoint' + content_type = 'application/vnd.ms-powerpoint' + end + if content_type == 'application/msword' or content_type == 'application/x-ms-word' + content_type = 'application/vnd.ms-word' + end + if content_type == 'application/x-zip-compressed' + content_type = 'application/zip' + end + + # e.g. http://www.whatdotheyknow.com/request/copy_of_current_swessex_scr_opt#incoming-9928 + if content_type == 'application/acrobat' + content_type = 'application/pdf' + end + + return content_type + end + + def get_attachment_text_one_file(content_type, body, charset = 'utf-8') + # note re. charset: TMail always tries to convert email bodies + # to UTF8 by default, so normally it should already be that. + text = '' + # XXX - tell all these command line tools to return utf-8 + if content_type == 'text/plain' + text += body + "\n\n" + else + tempfile = Tempfile.new('foiextract') + tempfile.binmode + tempfile.print body + tempfile.flush + default_params = { :append_to => text, :binary_output => false } + if content_type == 'application/vnd.ms-word' + AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt") + # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701) + if not File.exists?(tempfile.path + ".txt") + AlaveteliExternalCommand.run("catdoc", tempfile.path, default_params) + else + text += File.read(tempfile.path + ".txt") + "\n\n" + File.unlink(tempfile.path + ".txt") + end + elsif content_type == 'application/rtf' + # catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf + AlaveteliExternalCommand.run("catdoc", tempfile.path, default_params) + elsif content_type == 'text/html' + # lynx wordwraps links in its output, which then don't + # get formatted properly by Alaveteli. We use elinks + # instead, which doesn't do that. + AlaveteliExternalCommand.run("elinks", "-eval", "set document.codepage.assume = \"#{charset}\"", + "-eval", "set document.codepage.force_assumed = 1", + "-dump-charset", "utf-8", + "-force-html", "-dump", + tempfile.path, + default_params.merge(:env => {"LANG" => "C"})) + elsif content_type == 'application/vnd.ms-excel' + # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and + # py_xls2txt only extract text from cells, not from floating + # notes. catdoc may be fooled by weird character sets, but will + # probably do for UK FOI requests. + AlaveteliExternalCommand.run("/usr/bin/strings", tempfile.path, default_params) + elsif content_type == 'application/vnd.ms-powerpoint' + # ppthtml seems to catch more text, but only outputs HTML when + # we want text, so just use catppt for now + AlaveteliExternalCommand.run("catppt", tempfile.path, default_params) + elsif content_type == 'application/pdf' + AlaveteliExternalCommand.run("pdftotext", tempfile.path, "-", default_params) + elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' + # This is Microsoft's XML office document format. + # Just pull out the main XML file, and strip it of text. + xml = AlaveteliExternalCommand.run("/usr/bin/unzip", "-qq", + "-c", + tempfile.path, + "word/document.xml", + {:binary_output => false}) + if !xml.nil? + doc = REXML::Document.new(xml) + text += doc.each_element( './/text()' ){}.join(" ") + end + elsif content_type == 'application/zip' + # recurse into zip files + begin + zip_file = Zip::ZipFile.open(tempfile.path) + text += get_attachment_text_from_zip_file(zip_file) + zip_file.close() + rescue + $stderr.puts("Error processing zip file: #{$!.inspect}") + end + end + tempfile.close + end + + return text + end + def get_attachment_text_from_zip_file(zip_file) + + text = "" + for entry in zip_file + if entry.file? + filename = entry.to_s + begin + body = entry.get_input_stream.read + rescue + # move to next attachment silently if there were problems + # XXX really should reduce this to specific exceptions? + # e.g. password protected + next + end + calc_mime = AlaveteliFileTypes.filename_to_mimetype(filename) + if calc_mime + content_type = calc_mime + else + content_type = 'application/octet-stream' + end + + text += get_attachment_text_one_file(content_type, body) + + end + end + return text + end + # Turn instance methods into class methods extend self diff --git a/lib/old_rubygems_patch.rb b/lib/old_rubygems_patch.rb deleted file mode 100644 index 3001a7381..000000000 --- a/lib/old_rubygems_patch.rb +++ /dev/null @@ -1,46 +0,0 @@ -if File.exist? File.join(File.dirname(__FILE__),'..','vendor','rails','railties','lib','rails','gem_dependency.rb') - require File.join(File.dirname(__FILE__),'..','vendor','rails','railties','lib','rails','gem_dependency.rb') -else - require 'rails/gem_dependency' -end - -module Rails - class GemDependency < Gem::Dependency - - # This definition of the requirement method is a patch - if !method_defined?(:requirement) - def requirement - req = version_requirements - end - end - - def add_load_paths - self.class.add_frozen_gem_path - return if @loaded || @load_paths_added - if framework_gem? - @load_paths_added = @loaded = @frozen = true - return - end - - begin - dep = Gem::Dependency.new(name, requirement) - spec = Gem.source_index.find { |_,s| s.satisfies_requirement?(dep) }.last - spec.activate # a way that exists - rescue - begin - gem self.name, self.requirement # < 1.8 unhappy way - # This second rescue is a patch - fall back to passing Rails::GemDependency to gem - # for older rubygems - rescue ArgumentError - gem self - end - end - - @spec = Gem.loaded_specs[name] - @frozen = @spec.loaded_from.include?(self.class.unpacked_path) if @spec - @load_paths_added = true - rescue Gem::LoadError - end - end - -end diff --git a/lib/tasks/.gitkeep b/lib/tasks/.gitkeep new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/lib/tasks/.gitkeep diff --git a/lib/tasks/rspec.rake b/lib/tasks/rspec.rake deleted file mode 100644 index 1eee74aee..000000000 --- a/lib/tasks/rspec.rake +++ /dev/null @@ -1,147 +0,0 @@ -rspec_gem_dir = nil -Dir["#{Rails.root}/vendor/gems/*"].each do |subdir| - rspec_gem_dir = subdir if subdir.gsub("#{Rails.root}/vendor/gems/","") =~ /^(\w+-)?rspec-(\d+)/ && File.exist?("#{subdir}/lib/spec/rake/spectask.rb") -end -rspec_plugin_dir = File.expand_path(File.dirname(__FILE__) + '/../../vendor/plugins/rspec') - -if rspec_gem_dir && (test ?d, rspec_plugin_dir) - raise "\n#{'*'*50}\nYou have rspec installed in both vendor/gems and vendor/plugins\nPlease pick one and dispose of the other.\n#{'*'*50}\n\n" -end - -if rspec_gem_dir - $LOAD_PATH.unshift("#{rspec_gem_dir}/lib") -elsif File.exist?(rspec_plugin_dir) - $LOAD_PATH.unshift("#{rspec_plugin_dir}/lib") -end - -# Don't load rspec if running "rake gems:*" -unless ARGV.any? {|a| a =~ /^gems/} - -begin - require 'spec/rake/spectask' -rescue MissingSourceFile - module Spec - module Rake - class SpecTask - def initialize(name) - task name do - # if rspec-rails is a configured gem, this will output helpful material and exit ... - require File.expand_path(File.join(File.dirname(__FILE__),"..","..","config","environment")) - - # ... otherwise, do this: - raise <<-MSG - -#{"*" * 80} -* You are trying to run an rspec rake task defined in -* #{__FILE__}, -* but rspec can not be found in vendor/gems, vendor/plugins or system gems. -#{"*" * 80} -MSG - end - end - end - end - end -end - -Rake.application.instance_variable_get('@tasks').delete('default') - -spec_prereq = File.exist?(File.join(Rails.root, 'config', 'database.yml')) ? "db:test:prepare" : :noop -task :noop do -end - -task :default => :spec -task :stats => "spec:statsetup" -# XXX commonlib tests are not Ruby 1.9 compatible -#task :spec => ['spec:commonlib'] -task :test => ['spec'] -task :cruise => ['spec'] - -desc "Run all specs in spec directory (excluding plugin specs)" -Spec::Rake::SpecTask.new(:spec => spec_prereq) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList['spec/**/*_spec.rb'] -end - -namespace :spec do - desc "Run all specs in spec directory with RCov (excluding plugin specs)" - Spec::Rake::SpecTask.new(:rcov) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList['spec/**/*_spec.rb'] - t.rcov = true - t.rcov_opts = lambda do - IO.readlines("#{Rails.root}/spec/rcov.opts").map {|l| l.chomp.split " "}.flatten - end - end - - desc "Print Specdoc for all specs (excluding plugin specs)" - Spec::Rake::SpecTask.new(:doc) do |t| - t.spec_opts = ["--format", "specdoc", "--dry-run"] - t.spec_files = FileList['spec/**/*_spec.rb'] - end - - desc "Print Specdoc for all plugin examples" - Spec::Rake::SpecTask.new(:plugin_doc) do |t| - t.spec_opts = ["--format", "specdoc", "--dry-run"] - t.spec_files = FileList['vendor/plugins/**/spec/**/*_spec.rb'].exclude('vendor/plugins/rspec/*') - end - - [:models, :controllers, :views, :helpers, :lib, :integration].each do |sub| - desc "Run the code examples in spec/#{sub}" - Spec::Rake::SpecTask.new(sub => spec_prereq) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList["spec/#{sub}/**/*_spec.rb"] - end - end - - desc "Run the code examples in vendor/plugins (except RSpec's own)" - Spec::Rake::SpecTask.new(:plugins => spec_prereq) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList['vendor/plugins/**/spec/**/*_spec.rb'].exclude('vendor/plugins/rspec/*').exclude("vendor/plugins/rspec-rails/*") - end - - namespace :plugins do - desc "Runs the examples for rspec_on_rails" - Spec::Rake::SpecTask.new(:rspec_on_rails) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList['vendor/plugins/rspec-rails/spec/**/*_spec.rb'] - end - end - - # Setup specs for stats - task :statsetup do - require 'code_statistics' - ::STATS_DIRECTORIES << %w(Model\ specs spec/models) if File.exist?('spec/models') - ::STATS_DIRECTORIES << %w(View\ specs spec/views) if File.exist?('spec/views') - ::STATS_DIRECTORIES << %w(Controller\ specs spec/controllers) if File.exist?('spec/controllers') - ::STATS_DIRECTORIES << %w(Helper\ specs spec/helpers) if File.exist?('spec/helpers') - ::STATS_DIRECTORIES << %w(Library\ specs spec/lib) if File.exist?('spec/lib') - ::STATS_DIRECTORIES << %w(Routing\ specs spec/routing) if File.exist?('spec/routing') - ::STATS_DIRECTORIES << %w(Integration\ specs spec/integration) if File.exist?('spec/integration') - ::CodeStatistics::TEST_TYPES << "Model specs" if File.exist?('spec/models') - ::CodeStatistics::TEST_TYPES << "View specs" if File.exist?('spec/views') - ::CodeStatistics::TEST_TYPES << "Controller specs" if File.exist?('spec/controllers') - ::CodeStatistics::TEST_TYPES << "Helper specs" if File.exist?('spec/helpers') - ::CodeStatistics::TEST_TYPES << "Library specs" if File.exist?('spec/lib') - ::CodeStatistics::TEST_TYPES << "Routing specs" if File.exist?('spec/routing') - ::CodeStatistics::TEST_TYPES << "Integration specs" if File.exist?('spec/integration') - end - - namespace :db do - namespace :fixtures do - desc "Load fixtures (from spec/fixtures) into the current environment's database. Load specific fixtures using FIXTURES=x,y. Load from subdirectory in test/fixtures using FIXTURES_DIR=z." - task :load => :environment do - ActiveRecord::Base.establish_connection(Rails.env) - base_dir = File.join(Rails.root, 'spec', 'fixtures') - fixtures_dir = ENV['FIXTURES_DIR'] ? File.join(base_dir, ENV['FIXTURES_DIR']) : base_dir - - require 'active_record/fixtures' - (ENV['FIXTURES'] ? ENV['FIXTURES'].split(/,/).map {|f| File.join(fixtures_dir, f) } : Dir.glob(File.join(fixtures_dir, '*.{yml,csv}'))).each do |fixture_file| - Fixtures.create_fixtures(File.dirname(fixture_file), File.basename(fixture_file, '.*')) - end - end - end - end -end - -end |