aboutsummaryrefslogtreecommitdiffstats
path: root/web/js/front.js
blob: 67486888b85235a9e1ecb26ad2c567d4a6364b5f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
document.getElementById('pc').focus();

(function(){
    var around_forms = document.querySelectorAll('form[action*="around"]');
    for (var i=0; i<around_forms.length; i++) {
        var form = around_forms[i];
        var el = document.createElement('input');
        el.type = 'hidden';
        el.name = 'js';
        el.value = 1;
        form.insertBefore(el, form.firstChild);
    }
    var around_links = document.querySelectorAll('a[href*="around"]');
    for (i=0; i<around_links.length; i++) {
        var link = around_links[i];
        link.href = link.href + (link.href.indexOf('?') > -1 ? '&js=1' : '?js=1');
    }

    var lk = document.querySelector('span.report-a-problem-btn');
    if (lk.addEventListener) {
        lk.addEventListener('click', function(){
            scrollTo(0,0);
        });
    }
})();
='n48' href='#n48'>48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
# Handles the parsing of email
require 'tmpdir'

module MailHandler

    require 'mail'
    require 'backends/mail_extensions'
    require 'backends/mail_backend'
    include Backends::MailBackend

    class TNEFParsingError < StandardError
    end

    # Returns a set of attachments from the given TNEF contents
    # The TNEF contents also contains the message body, but in general this is the
    # same as the message body in the message proper.
    def tnef_attachments(content)
        attachments = []
        Dir.mktmpdir do |dir|
            IO.popen("tnef -K -C #{dir} 2> /dev/null", "wb") do |f|
                f.write(content)
                f.close
                if $?.signaled?
                    raise IOError, "tnef exited with signal #{$?.termsig}"
                end
                if $?.exited? && $?.exitstatus != 0
                    raise TNEFParsingError, "tnef exited with status #{$?.exitstatus}"
                end
            end
            found = 0
            Dir.new(dir).sort.each do |file| # sort for deterministic behaviour
                if file != "." && file != ".."
                    file_content = File.open("#{dir}/#{file}", "rb").read
                    attachments << { :content => file_content,
                                     :filename => file }
                    found += 1
                end
            end
            if found == 0
                raise TNEFParsingError, "tnef produced no attachments"
            end
        end
        attachments
    end

    def normalise_content_type(content_type)
        # e.g. http://www.whatdotheyknow.com/request/93/response/250
        if content_type == 'application/excel' or content_type == 'application/msexcel' or content_type == 'application/x-ms-excel'
            content_type = 'application/vnd.ms-excel'
        end
        if content_type == 'application/mspowerpoint' or content_type == 'application/x-ms-powerpoint'
            content_type = 'application/vnd.ms-powerpoint'
        end
        if content_type == 'application/msword' or content_type == 'application/x-ms-word'
            content_type = 'application/vnd.ms-word'
        end
        if content_type == 'application/x-zip-compressed'
            content_type = 'application/zip'
        end

        # e.g. http://www.whatdotheyknow.com/request/copy_of_current_swessex_scr_opt#incoming-9928
        if content_type == 'application/acrobat'
            content_type = 'application/pdf'
        end

        return content_type
    end

    def get_attachment_text_one_file(content_type, body, charset = 'utf-8')
        # note re. charset: TMail always tries to convert email bodies
        # to UTF8 by default, so normally it should already be that.
        text = ''
        # XXX - tell all these command line tools to return utf-8
        if content_type == 'text/plain'
            text += body + "\n\n"
        else
            tempfile = Tempfile.new('foiextract')
            tempfile.binmode
            tempfile.print body
            tempfile.flush
            default_params = { :append_to => text, :binary_output => false }
            if content_type == 'application/vnd.ms-word'
                AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt",
                                             { :memory_limit => 536870912 } )
                # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701)
                if not File.exists?(tempfile.path + ".txt")
                    AlaveteliExternalCommand.run("catdoc", tempfile.path, default_params)
                else
                    text += File.read(tempfile.path + ".txt") + "\n\n"
                    File.unlink(tempfile.path + ".txt")
                end
            elsif content_type == 'application/rtf'
                # catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf
                AlaveteliExternalCommand.run("catdoc", tempfile.path, default_params)
            elsif content_type == 'text/html'
                # lynx wordwraps links in its output, which then don't
                # get formatted properly by Alaveteli. We use elinks
                # instead, which doesn't do that.
                AlaveteliExternalCommand.run("elinks", "-eval", "set document.codepage.assume = \"#{charset}\"",
                                                       "-eval", "set document.codepage.force_assumed = 1",
                                                       "-dump-charset", "utf-8",
                                                       "-force-html", "-dump",
                                                       tempfile.path,
                                                       default_params.merge(:env => {"LANG" => "C"}))
            elsif content_type == 'application/vnd.ms-excel'
                # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and
                # py_xls2txt only extract text from cells, not from floating
                # notes. catdoc may be fooled by weird character sets, but will
                # probably do for UK FOI requests.
                AlaveteliExternalCommand.run("/usr/bin/strings", tempfile.path, default_params)
            elsif content_type == 'application/vnd.ms-powerpoint'
                # ppthtml seems to catch more text, but only outputs HTML when
                # we want text, so just use catppt for now
                AlaveteliExternalCommand.run("catppt", tempfile.path, default_params)
            elsif content_type == 'application/pdf'
                AlaveteliExternalCommand.run("pdftotext", tempfile.path, "-", default_params)
            elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
                # This is Microsoft's XML office document format.
                # Just pull out the main XML file, and strip it of text.
                xml = AlaveteliExternalCommand.run("/usr/bin/unzip", "-qq",
                                                                     "-c",
                                                                     tempfile.path,
                                                                     "word/document.xml",
                                                                     {:binary_output => false})
                if !xml.nil?
                    doc = REXML::Document.new(xml)
                    text += doc.each_element( './/text()' ){}.join(" ")
                end
            elsif content_type == 'application/zip'
                # recurse into zip files
                begin
                    zip_file = Zip::ZipFile.open(tempfile.path)
                    text += get_attachment_text_from_zip_file(zip_file)
                    zip_file.close()
                rescue
                    $stderr.puts("Error processing zip file: #{$!.inspect}")
                end
            end
            tempfile.close
        end

        return text
    end
    def get_attachment_text_from_zip_file(zip_file)

        text = ""
        for entry in zip_file
            if entry.file?
                filename = entry.to_s
                begin
                    body = entry.get_input_stream.read
                rescue
                    # move to next attachment silently if there were problems
                    # XXX really should reduce this to specific exceptions?
                    # e.g. password protected
                    next
                end
                calc_mime = AlaveteliFileTypes.filename_to_mimetype(filename)
                if calc_mime
                    content_type = calc_mime
                else
                    content_type = 'application/octet-stream'
                end

                text += get_attachment_text_one_file(content_type, body)

            end
        end
        return text
    end

    # Turn instance methods into class methods
    extend self

end