diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/alaveteli_text_masker.rb | 127 | ||||
-rw-r--r-- | lib/languages.rb | 4 | ||||
-rw-r--r-- | lib/quiet_opener.rb | 15 | ||||
-rw-r--r-- | lib/tasks/gettext.rake | 25 |
4 files changed, 162 insertions, 9 deletions
diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb new file mode 100644 index 000000000..68ff0d318 --- /dev/null +++ b/lib/alaveteli_text_masker.rb @@ -0,0 +1,127 @@ +module AlaveteliTextMasker + extend self + DoNotBinaryMask = [ 'image/tiff', + 'image/gif', + 'image/jpeg', + 'image/png', + 'image/bmp', + 'application/zip' ] + + # Replaces all email addresses in (possibly binary) data + # Also applies custom masks and censor items + def apply_masks!(text, content_type, options = {}) + # See if content type is one that we mask - things like zip files and + # images may get broken if we try to. We err on the side of masking too + # much, as many unknown types will really be text. + + # Special cases for some content types + case content_type + when *DoNotBinaryMask + # do nothing + when 'text/html' + apply_text_masks!(text, options) + when 'application/pdf' + apply_pdf_masks!(text, options) + else + apply_binary_masks!(text, options) + end + end + + def apply_pdf_masks!(text, options = {}) + uncompressed_text = nil + uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", + :stdin_string => text) + # if we managed to uncompress the PDF... + if !uncompressed_text.blank? + # then censor stuff (making a copy so can compare again in a bit) + censored_uncompressed_text = uncompressed_text.dup + apply_binary_masks!(censored_uncompressed_text, options) + # if the censor rule removed something... + if censored_uncompressed_text != uncompressed_text + # then use the altered file (recompressed) + recompressed_text = nil + if AlaveteliConfiguration::use_ghostscript_compression == true + command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"] + else + command = ["pdftk", "-", "output", "-", "compress"] + end + recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}])) + if recompressed_text.blank? + # buggy versions of pdftk sometimes fail on + # compression, I don't see it's a disaster in + # these cases to save an uncompressed version? + recompressed_text = censored_uncompressed_text + logger.warn "Unable to compress PDF; problem with your pdftk version?" + end + if !recompressed_text.blank? + text.replace recompressed_text + end + end + end + end + + private + + # Replace text in place + def apply_binary_masks!(text, options = {}) + # Keep original size, so can check haven't resized it + orig_size = text.mb_chars.size + + # Replace ASCII email addresses... + text.gsub!(MySociety::Validate.email_find_regexp) do |email| + email.gsub(/[^@.]/, 'x') + end + + # And replace UCS-2 ones (for Microsoft Office documents)... + # Find emails, by finding them in parts of text that have ASCII + # equivalents to the UCS-2 + ascii_chars = text.gsub(/\0/, "") + emails = ascii_chars.scan(MySociety::Validate.email_find_regexp) + + # Convert back to UCS-2, making a mask at the same time + if String.method_defined?(:encode) + emails.map! do |email| + # We want the ASCII representation of UCS-2 + [email[0].encode('UTF-16LE').force_encoding('US-ASCII'), + email[0].gsub(/[^@.]/, 'x').encode('UTF-16LE').force_encoding('US-ASCII')] + end + else + emails.map! {|email| [ + Iconv.conv('ucs-2le', 'ascii', email[0]), + Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x')) + ] } + end + + # Now search and replace the UCS-2 email with the UCS-2 mask + for email, mask in emails + text.gsub!(email, mask) + end + + # Replace censor items + censor_rules = options[:censor_rules] || [] + censor_rules.each{ |censor_rule| censor_rule.apply_to_binary!(text) } + raise "internal error in apply_binary_masks!" if text.mb_chars.size != orig_size + return text + end + + # Remove any email addresses, login links and mobile phone numbers + def default_text_masks + [{ :to_replace => MySociety::Validate.email_find_regexp, + :replacement => "[#{_("email address")}]" }, + { :to_replace => /(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/, + :replacement => "[#{_("mobile number")}]" }, + { :to_replace => /https?:\/\/#{AlaveteliConfiguration::domain}\/c\/[^\s]+/, + :replacement => "[#{_("{{site_name}} login link", + :site_name => AlaveteliConfiguration::site_name)}]" }] + end + + def apply_text_masks!(text, options = {}) + masks = options[:masks] || [] + masks += default_text_masks + censor_rules = options[:censor_rules] || [] + masks.each{ |mask| text.gsub!(mask[:to_replace], mask[:replacement]) } + censor_rules.each{ |censor_rule| censor_rule.apply_to_text!(text) } + text + end + +end diff --git a/lib/languages.rb b/lib/languages.rb index 42231ef56..85d2d5950 100644 --- a/lib/languages.rb +++ b/lib/languages.rb @@ -111,11 +111,11 @@ class LanguageNames 'mn' => 'монгол', 'na' => 'Ekakairũ Naoero', 'nv' => 'Diné bizaad', - 'nb' => 'Norsk bokmål', + 'nb' => 'Bokmål', 'nd' => 'isiNdebele', 'ne' => 'नेपाली', 'ng' => 'Owambo', - 'nn' => 'Norsk nynorsk', + 'nn' => 'Nynorsk', 'no' => 'Norsk', 'ii' => 'ꆈꌠ꒿ Nuosuhxop', 'nr' => 'isiNdebele', diff --git a/lib/quiet_opener.rb b/lib/quiet_opener.rb index 16ea27b8e..c6e259b93 100644 --- a/lib/quiet_opener.rb +++ b/lib/quiet_opener.rb @@ -7,8 +7,19 @@ end def quietly_try_to_open(url) begin result = open(url).read.strip - rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET - Rails.logger.warn("Unable to open third-party URL #{url}") + rescue OpenURI::HTTPError, + SocketError, + Errno::ETIMEDOUT, + Errno::ECONNREFUSED, + Errno::EHOSTUNREACH, + Errno::ECONNRESET, + Timeout::Error => exception + e = Exception.new("Unable to open third-party URL #{url}: #{exception.message}") + e.set_backtrace(exception.backtrace) + if !AlaveteliConfiguration.exception_notifications_from.blank? && !AlaveteliConfiguration.exception_notifications_to.blank? + ExceptionNotifier::Notifier.exception_notification(request.env, e).deliver + end + Rails.logger.warn(e.message) result = "" end return result diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake index 3f357213f..9bdb6169b 100644 --- a/lib/tasks/gettext.rake +++ b/lib/tasks/gettext.rake @@ -11,11 +11,7 @@ namespace :gettext do desc "Update pot/po files for a theme." task :find_theme => :environment do - theme = ENV['THEME'] - unless theme - puts "Usage: Specify an Alaveteli-theme with THEME=[theme directory name]" - exit(0) - end + theme = find_theme(ENV['THEME']) load_gettext msgmerge = Rails.application.config.gettext_i18n_rails.msgmerge msgmerge ||= %w[--sort-output --no-location --no-wrap] @@ -28,6 +24,25 @@ namespace :gettext do ) end + desc 'Rewrite theme .po files into a consistent msgmerge format' + task :clean_theme do + theme = find_theme(ENV['THEME']) + load_gettext + + Dir.glob("#{ theme_locale_path(theme) }/*/app.po") do |po_file| + GetText::msgmerge(po_file, po_file, 'alaveteli', + :msgmerge => [:sort_output, :no_location, :no_wrap]) + end + end + + def find_theme(theme) + unless theme + puts "Usage: Specify an Alaveteli-theme with THEME=[theme directory name]" + exit(0) + end + theme + end + def theme_files_to_translate(theme) Dir.glob("{lib/themes/#{theme}/lib}/**/*.{rb,erb}") end |