diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/alaveteli_text_masker.rb | 127 | ||||
| -rw-r--r-- | lib/languages.rb | 4 | ||||
| -rw-r--r-- | lib/quiet_opener.rb | 15 | ||||
| -rw-r--r-- | lib/tasks/gettext.rake | 25 | 
4 files changed, 162 insertions, 9 deletions
| diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb new file mode 100644 index 000000000..68ff0d318 --- /dev/null +++ b/lib/alaveteli_text_masker.rb @@ -0,0 +1,127 @@ +module AlaveteliTextMasker +    extend self +    DoNotBinaryMask = [ 'image/tiff', +                        'image/gif', +                        'image/jpeg', +                        'image/png', +                        'image/bmp', +                        'application/zip' ] + +    # Replaces all email addresses in (possibly binary) data +    # Also applies custom masks and censor items +    def apply_masks!(text, content_type, options = {}) +        # See if content type is one that we mask - things like zip files and +        # images may get broken if we try to. We err on the side of masking too +        # much, as many unknown types will really be text. + +        # Special cases for some content types +        case content_type +            when *DoNotBinaryMask +                # do nothing +            when 'text/html' +                apply_text_masks!(text, options) +            when 'application/pdf' +                apply_pdf_masks!(text, options) +            else +                apply_binary_masks!(text, options) +        end +    end + +    def apply_pdf_masks!(text, options = {}) +        uncompressed_text = nil +        uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", +                                                         :stdin_string => text) +        # if we managed to uncompress the PDF... +        if !uncompressed_text.blank? +            # then censor stuff (making a copy so can compare again in a bit) +            censored_uncompressed_text = uncompressed_text.dup +            apply_binary_masks!(censored_uncompressed_text, options) +            # if the censor rule removed something... +            if censored_uncompressed_text != uncompressed_text +                # then use the altered file (recompressed) +                recompressed_text = nil +                if AlaveteliConfiguration::use_ghostscript_compression == true +                    command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"] +                else +                    command = ["pdftk", "-", "output", "-", "compress"] +                end +                recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}])) +                if recompressed_text.blank? +                    # buggy versions of pdftk sometimes fail on +                    # compression, I don't see it's a disaster in +                    # these cases to save an uncompressed version? +                    recompressed_text = censored_uncompressed_text +                    logger.warn "Unable to compress PDF; problem with your pdftk version?" +                end +                if !recompressed_text.blank? +                    text.replace recompressed_text +                end +            end +        end +    end + +    private + +    # Replace text in place +    def apply_binary_masks!(text, options = {}) +        # Keep original size, so can check haven't resized it +        orig_size = text.mb_chars.size + +        # Replace ASCII email addresses... +        text.gsub!(MySociety::Validate.email_find_regexp) do |email| +            email.gsub(/[^@.]/, 'x') +        end + +        # And replace UCS-2 ones (for Microsoft Office documents)... +        # Find emails, by finding them in parts of text that have ASCII +        # equivalents to the UCS-2 +        ascii_chars = text.gsub(/\0/, "") +        emails = ascii_chars.scan(MySociety::Validate.email_find_regexp) + +        # Convert back to UCS-2, making a mask at the same time +        if String.method_defined?(:encode) +            emails.map! do |email| +                # We want the ASCII representation of UCS-2 +                [email[0].encode('UTF-16LE').force_encoding('US-ASCII'), +                 email[0].gsub(/[^@.]/, 'x').encode('UTF-16LE').force_encoding('US-ASCII')] +            end +        else +            emails.map! {|email| [ +                    Iconv.conv('ucs-2le', 'ascii', email[0]), +                    Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x')) +            ] } +        end + +        # Now search and replace the UCS-2 email with the UCS-2 mask +        for email, mask in emails +            text.gsub!(email, mask) +        end + +        # Replace censor items +        censor_rules = options[:censor_rules] || [] +        censor_rules.each{ |censor_rule| censor_rule.apply_to_binary!(text) } +        raise "internal error in apply_binary_masks!" if text.mb_chars.size != orig_size +        return text +    end + +    # Remove any email addresses, login links and mobile phone numbers +    def default_text_masks +        [{ :to_replace => MySociety::Validate.email_find_regexp, +           :replacement => "[#{_("email address")}]" }, +         { :to_replace => /(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/, +           :replacement => "[#{_("mobile number")}]" }, +         { :to_replace => /https?:\/\/#{AlaveteliConfiguration::domain}\/c\/[^\s]+/, +           :replacement => "[#{_("{{site_name}} login link", +                                 :site_name => AlaveteliConfiguration::site_name)}]" }] +    end + +    def apply_text_masks!(text, options = {}) +        masks = options[:masks] || [] +        masks += default_text_masks +        censor_rules = options[:censor_rules] || [] +        masks.each{ |mask| text.gsub!(mask[:to_replace], mask[:replacement]) } +        censor_rules.each{ |censor_rule| censor_rule.apply_to_text!(text) } +        text +    end + +end diff --git a/lib/languages.rb b/lib/languages.rb index 42231ef56..85d2d5950 100644 --- a/lib/languages.rb +++ b/lib/languages.rb @@ -111,11 +111,11 @@ class LanguageNames              'mn'	=> 'монгол',              'na'	=> 'Ekakairũ Naoero',              'nv'	=> 'Diné bizaad', -            'nb'	=> 'Norsk bokmål', +            'nb'	=> 'Bokmål',              'nd'	=> 'isiNdebele',              'ne'	=> 'नेपाली',              'ng'	=> 'Owambo', -            'nn'	=> 'Norsk nynorsk', +            'nn'	=> 'Nynorsk',              'no'	=> 'Norsk',              'ii'	=> 'ꆈꌠ꒿ Nuosuhxop',              'nr'	=> 'isiNdebele', diff --git a/lib/quiet_opener.rb b/lib/quiet_opener.rb index 16ea27b8e..c6e259b93 100644 --- a/lib/quiet_opener.rb +++ b/lib/quiet_opener.rb @@ -7,8 +7,19 @@ end  def quietly_try_to_open(url)      begin          result = open(url).read.strip -    rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET -        Rails.logger.warn("Unable to open third-party URL #{url}") +    rescue OpenURI::HTTPError, +           SocketError, +           Errno::ETIMEDOUT, +           Errno::ECONNREFUSED, +           Errno::EHOSTUNREACH, +           Errno::ECONNRESET, +           Timeout::Error => exception +        e = Exception.new("Unable to open third-party URL #{url}: #{exception.message}") +        e.set_backtrace(exception.backtrace) +        if !AlaveteliConfiguration.exception_notifications_from.blank? && !AlaveteliConfiguration.exception_notifications_to.blank? +            ExceptionNotifier::Notifier.exception_notification(request.env, e).deliver +        end +        Rails.logger.warn(e.message)          result = ""      end      return result diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake index 3f357213f..9bdb6169b 100644 --- a/lib/tasks/gettext.rake +++ b/lib/tasks/gettext.rake @@ -11,11 +11,7 @@ namespace :gettext do    desc "Update pot/po files for a theme."    task :find_theme => :environment do -    theme = ENV['THEME'] -    unless theme -        puts "Usage: Specify an Alaveteli-theme with THEME=[theme directory name]" -        exit(0) -    end +    theme = find_theme(ENV['THEME'])      load_gettext      msgmerge = Rails.application.config.gettext_i18n_rails.msgmerge      msgmerge ||= %w[--sort-output --no-location --no-wrap] @@ -28,6 +24,25 @@ namespace :gettext do       )     end +    desc 'Rewrite theme .po files into a consistent msgmerge format' +    task :clean_theme do +        theme = find_theme(ENV['THEME']) +        load_gettext + +        Dir.glob("#{ theme_locale_path(theme) }/*/app.po") do |po_file| +            GetText::msgmerge(po_file, po_file, 'alaveteli', +                              :msgmerge => [:sort_output, :no_location, :no_wrap]) +        end +   end + +   def find_theme(theme) +       unless theme +           puts "Usage: Specify an Alaveteli-theme with THEME=[theme directory name]" +           exit(0) +       end +       theme +   end +     def theme_files_to_translate(theme)         Dir.glob("{lib/themes/#{theme}/lib}/**/*.{rb,erb}")     end | 
