12 files changed, 565 insertions, 7 deletions
diff --git a/lib/attachment_to_html/adapters/could_not_convert.rb b/lib/attachment_to_html/adapters/could_not_convert.rb
new file mode 100644
index 000000000..8e4bf39dc
--- /dev/null
+++ b/lib/attachment_to_html/adapters/could_not_convert.rb
@@ -0,0 +1,49 @@
+module AttachmentToHTML
+    module Adapters
+        class CouldNotConvert
+
+            attr_reader :attachment
+
+            # Public: Initialize a PDF converter
+            #
+            # attachment - the FoiAttachment to convert to HTML
+            # opts       - a Hash of options (default: {}):
+            #              No options currently accepted
+            def initialize(attachment, opts = {})
+                @attachment = attachment
+            end
+
+            # Public: The title to use in the <title> tag
+            #
+            # Returns a String
+            def title
+                @title ||= attachment.display_filename
+            end
+
+            # Public: The contents of the extracted html <body> tag
+            #
+            # Returns a String
+            def body
+                @body ||= parse_body
+            end
+
+
+            # Public: Was the document conversion successful?
+            # As this is a fallback option and not doing anything dynamic
+            # we're assuming this is successful whatever the case
+            #
+            # Returns true
+            def success?
+                true
+            end
+
+            private
+
+            def parse_body
+                "<p>Sorry, we were unable to convert this file to HTML. " \
+                "Please use the download link at the top right.</p>"
+            end
+
+        end
+    end
+end
+\ No newline at end of file
diff --git a/lib/attachment_to_html/adapters/google_docs_viewer.rb b/lib/attachment_to_html/adapters/google_docs_viewer.rb
new file mode 100644
index 000000000..991fbb757
--- /dev/null
+++ b/lib/attachment_to_html/adapters/google_docs_viewer.rb
@@ -0,0 +1,56 @@
+module AttachmentToHTML
+    module Adapters
+        # Renders the attachment in a Google Docs Viewer
+        class GoogleDocsViewer
+
+            attr_reader :attachment, :attachment_url
+
+            # Public: Initialize a GoogleDocsViewer converter
+            #
+            # attachment - the FoiAttachment to convert to HTML
+            # opts       - a Hash of options (default: {}):
+            #              :attachment_url - a String url to the attachment for
+            #                                Google to render (default: nil)
+            def initialize(attachment, opts = {})
+                @attachment = attachment
+                @attachment_url = opts.fetch(:attachment_url, nil)
+            end
+
+            # Public: The title to use in the <title> tag
+            #
+            # Returns a String
+            def title
+                @title ||= attachment.display_filename
+            end
+
+            # Public: The contents of the extracted html <body> tag
+            #
+            # Returns a String
+            def body
+                @body ||= parse_body
+            end
+
+            # Public: Was the document conversion successful?
+            # We can't really tell whether the document conversion has been
+            # successful as such; We're assuming that given a correctly
+            # constructed iframe (which is tested) that Google will make this
+            # Just Work.
+            #
+            # Returns true
+            def success?
+                true
+            end
+
+            private
+
+            def parse_body
+                %Q(<iframe src="#{ protocol }://docs.google.com/viewer?url=#{ attachment_url }&amp;embedded=true" width="100%" height="100%" style="border: none;"></iframe>)
+            end
+
+            def protocol
+                AlaveteliConfiguration.force_ssl ? 'https' : 'http'
+            end
+
+        end
+    end
+end
diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb
new file mode 100644
index 000000000..b91958c52
--- /dev/null
+++ b/lib/attachment_to_html/adapters/pdf.rb
@@ -0,0 +1,108 @@
+module AttachmentToHTML
+    module Adapters
+        # Convert application/pdf documents in to HTML
+        class PDF
+            TOO_MANY_IMAGES = 51
+
+            attr_reader :attachment, :tmpdir
+
+            # Public: Initialize a PDF converter
+            #
+            # attachment - the FoiAttachment to convert to HTML
+            # opts       - a Hash of options (default: {}):
+            #              :tmpdir  - String name of directory to store the
+            #                         converted document
+            def initialize(attachment, opts = {})
+                @attachment = attachment
+                @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp'))
+            end
+
+            # Public: The title to use in the <title> tag
+            #
+            # Returns a String
+            def title
+                @title ||= attachment.display_filename
+            end
+
+            # Public: The contents of the extracted html <body> tag
+            #
+            # Returns a String
+            def body
+                @body ||= parse_body
+            end
+
+            # Public: Was the document conversion successful?
+            #
+            # Returns a Boolean
+            def success?
+                return false if contains_too_many_images?
+                has_content? || contains_images?
+            end
+
+            private
+
+            def parse_body
+                match = convert.match(/<body[^>]*>(.*?)<\/body>/mi)
+                match ? match[1] : ''
+            end
+
+            def has_content?
+                !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty?
+            end
+
+            def contains_images?
+                body.match(/<img[^>]*>/mi) ? true : false
+            end
+
+            # Works around https://bugs.freedesktop.org/show_bug.cgi?id=77932 in pdftohtml
+            def contains_too_many_images?
+                number_of_images_in_body >= TOO_MANY_IMAGES
+            end
+
+            def number_of_images_in_body
+                body.scan(/<img[^>]*>/i).size
+            end
+
+            def convert
+                # Get the attachment body outside of the chdir call as getting
+                # the body may require opening files too
+                text = attachment_body
+
+                @converted ||= Dir.chdir(tmpdir) do
+                    tempfile = create_tempfile(text)
+
+                    html = AlaveteliExternalCommand.run("pdftohtml",
+                      "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8",
+                      "-noframes", tempfile.path, :timeout => 30
+                    )
+
+                    cleanup_tempfile(tempfile)
+
+                    html
+                end
+            end
+
+            def create_tempfile(text)
+                tempfile = if RUBY_VERSION.to_f >= 1.9
+                               Tempfile.new('foiextract', '.',
+                                            :encoding => text.encoding)
+                           else
+                               Tempfile.new('foiextract', '.')
+                           end
+                tempfile.print(text)
+                tempfile.flush
+                tempfile
+            end
+
+            def cleanup_tempfile(tempfile)
+                tempfile.close
+                tempfile.delete
+            end
+
+            def attachment_body
+                @attachment_body ||= attachment.body
+            end
+
+        end
+    end
+end
diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb
new file mode 100644
index 000000000..859c0e541
--- /dev/null
+++ b/lib/attachment_to_html/adapters/rtf.rb
@@ -0,0 +1,107 @@
+module AttachmentToHTML
+    module Adapters
+        # Convert application/rtf documents in to HTML
+        class RTF
+
+            attr_reader :attachment, :tmpdir
+
+            # Public: Initialize a RTF converter
+            #
+            # attachment - the FoiAttachment to convert to HTML
+            # opts       - a Hash of options (default: {}):
+            #              :tmpdir  - String name of directory to store the
+            #                         converted document
+            def initialize(attachment, opts = {})
+                @attachment = attachment
+                @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp'))
+            end
+
+            # Public: The title to use in the <title> tag
+            #
+            # Returns a String
+            def title
+                @title ||= attachment.display_filename
+            end
+
+            # Public: The contents of the extracted html <body> tag
+            #
+            # Returns a String
+            def body
+                @body ||= parse_body
+            end
+
+            # Public: Was the document conversion successful?
+            #
+            # Returns a Boolean
+            def success?
+                has_content? || contains_images?
+            end
+
+            private
+
+            def parse_body
+                match = convert.match(/<body[^>]*>(.*?)<\/body>/mi)
+                match ? match[1] : ''
+            end
+
+            def has_content?
+                !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty?
+            end
+
+            def contains_images?
+                body.match(/<img[^>]*>/mi) ? true : false
+            end
+
+            def convert
+                # Get the attachment body outside of the chdir call as getting
+                # the body may require opening files too
+                text = attachment_body
+
+                @converted ||= Dir.chdir(tmpdir) do
+                    tempfile = create_tempfile(text)
+
+                    html = AlaveteliExternalCommand.run("unrtf", "--html",
+                      tempfile.path, :timeout => 120
+                    )
+
+                    cleanup_tempfile(tempfile)
+
+                    sanitize_converted(html)
+                end
+
+            end
+
+            # Works around http://savannah.gnu.org/bugs/?42015 in unrtf ~> 0.21
+            def sanitize_converted(html)
+                invalid = %Q(<!DOCTYPE html PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN>)
+                valid   = %Q(<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN>")
+                if html.include?(invalid)
+                   html.sub!(invalid, valid)
+                end
+                html
+            end
+
+            def create_tempfile(text)
+                tempfile = if RUBY_VERSION.to_f >= 1.9
+                               Tempfile.new('foiextract', '.',
+                                            :encoding => text.encoding)
+                           else
+                               Tempfile.new('foiextract', '.')
+                           end
+                tempfile.print(text)
+                tempfile.flush
+                tempfile
+            end
+
+            def cleanup_tempfile(tempfile)
+                tempfile.close
+                tempfile.delete
+            end
+
+            def attachment_body
+                @attachment_body ||= attachment.body
+            end
+
+        end
+    end
+end
diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb
new file mode 100644
index 000000000..e99183f0e
--- /dev/null
+++ b/lib/attachment_to_html/adapters/text.rb
@@ -0,0 +1,61 @@
+module AttachmentToHTML
+    module Adapters
+        # Convert text/plain documents in to HTML
+        class Text
+
+            attr_reader :attachment
+
+            # Public: Initialize a Text converter
+            #
+            # attachment - the FoiAttachment to convert to HTML
+            # opts       - a Hash of options (default: {}):
+            #              No options currently accepted
+            def initialize(attachment, opts = {})
+                @attachment = attachment
+            end
+
+            # Public: The title to use in the <title> tag
+            #
+            # Returns a String
+            def title
+                @title ||= attachment.display_filename
+            end
+
+            # Public: The contents of the extracted html <body> tag
+            #
+            # Returns a String
+            def body
+                @body ||= parse_body
+            end
+
+            # Public: Was the document conversion successful?
+            #
+            # Returns a Boolean
+            def success?
+                has_content? || contains_images?
+            end
+
+            private
+
+            def convert
+                text = attachment.body.strip
+                text = CGI.escapeHTML(text)
+                text = MySociety::Format.make_clickable(text)
+                text = text.gsub(/\n/, '<br>')
+            end
+
+            def parse_body
+                convert
+            end
+
+            def has_content?
+                !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty?
+            end
+
+            def contains_images?
+                body.match(/<img[^>]*>/mi) ? true : false
+            end
+
+         end
+    end
+end
diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb
new file mode 100644
index 000000000..2f7c08264
--- /dev/null
+++ b/lib/attachment_to_html/attachment_to_html.rb
@@ -0,0 +1,46 @@
+require 'view'
+
+Dir[File.dirname(__FILE__) + '/adapters/*.rb'].each do |file|
+    require file
+end
+
+module AttachmentToHTML
+    extend self
+
+    def to_html(attachment, opts = {})
+        adapter = adapter_for(attachment).new(attachment, opts)
+
+        unless adapter.success?
+          adapter = fallback_adapter_for(attachment).new(attachment, opts)
+        end
+
+        view = View.new(adapter)
+        view.wrapper = 'wrapper_google_embed' if adapter.is_a?(Adapters::GoogleDocsViewer)
+
+        view.render do
+           opts.fetch(:content_for, []).each do |k,v|
+              inject_content(k) { v }
+           end
+        end
+    end
+
+    private
+
+    def adapter_for(attachment)
+        case attachment.content_type
+        when 'text/plain' then Adapters::Text
+        when 'application/pdf' then Adapters::PDF
+        when 'application/rtf' then Adapters::RTF
+        else
+            fallback_adapter_for(attachment)
+        end
+    end
+
+    def fallback_adapter_for(attachment)
+        if attachment.has_google_docs_viewer?
+            Adapters::GoogleDocsViewer
+        else
+            Adapters::CouldNotConvert
+        end
+    end
+end
diff --git a/lib/attachment_to_html/template.html.erb b/lib/attachment_to_html/template.html.erb
new file mode 100644
index 000000000..38286a5f9
--- /dev/null
+++ b/lib/attachment_to_html/template.html.erb
@@ -0,0 +1,16 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title><%= title %></title>
+  <%= content_for(:head_suffix) %>
+</head>
+<body>
+  <%= content_for(:body_prefix) %>
+  <div id="<%= wrapper %>">
+    <div id="view-html-content">
+      <%= body %>
+    </div>
+  </div>
+  <%= content_for(:body_suffix) %>
+</body>
+</html>
diff --git a/lib/attachment_to_html/view.rb b/lib/attachment_to_html/view.rb
new file mode 100644
index 000000000..e6991d44e
--- /dev/null
+++ b/lib/attachment_to_html/view.rb
@@ -0,0 +1,39 @@
+module AttachmentToHTML
+    class View < ERB
+
+        def self.template
+            @template || "#{ File.dirname(__FILE__) }/template.html.erb"
+        end
+
+        def self.template=(path)
+            @template = path
+        end
+
+        attr_accessor :title, :body, :template, :wrapper
+
+        def initialize(adapter, opts = {})
+            self.title    = adapter.title
+            self.body     = adapter.body
+            self.template = opts.fetch(:template, self.class.template)
+            self.wrapper  = opts.fetch(:wrapper, 'wrapper')
+            super(File.read(template))
+        end
+
+        def render(&block)
+            instance_eval(&block) if block_given?
+            result(binding)
+        end
+
+        def content_for(area)
+            send(area) if respond_to?(area)
+        end
+
+        private
+
+        def inject_content(area, &block)
+          instance_variable_set("@#{ area }".to_sym, block.call)
+          self.class.send(:attr_accessor, area)
+        end
+
+    end
+end
diff --git a/lib/configuration.rb b/lib/configuration.rb
index bd705b777..d525bf712 100644
--- a/lib/configuration.rb
+++ b/lib/configuration.rb
@@ -58,6 +58,7 @@ module AlaveteliConfiguration
             :RECAPTCHA_PUBLIC_KEY => 'x',
             :REPLY_LATE_AFTER_DAYS => 20,
             :REPLY_VERY_LATE_AFTER_DAYS => 40,
+            :RESPONSIVE_STYLING => false,
             :SITE_NAME => 'Alaveteli',
             :SKIP_ADMIN_AUTH => false,
             :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60,
diff --git a/lib/date_quarter.rb b/lib/date_quarter.rb
new file mode 100644
index 000000000..ac159b420
--- /dev/null
+++ b/lib/date_quarter.rb
@@ -0,0 +1,22 @@
+module DateQuarter
+    extend self
+
+    def quarters_between(start_at, finish_at)
+        results = []
+
+        quarter_start = start_at.beginning_of_quarter
+        quarter_end   = start_at.end_of_quarter
+
+        while quarter_end <= finish_at.end_of_quarter do
+          # Collect these
+          results << [quarter_start, quarter_end]
+
+          # Update dates
+          quarter_start = quarter_end + 1.second
+          quarter_end = quarter_start.end_of_quarter
+        end
+
+        results
+    end
+
+end
diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb
index f02b18ee0..3b6116970 100644
--- a/lib/normalize_string.rb
+++ b/lib/normalize_string.rb
@@ -1,4 +1,4 @@
-require 'iconv' unless RUBY_VERSION.to_f >= 1.9
+require 'iconv' unless String.method_defined?(:encode)
 require 'charlock_holmes'
 
 class EncodingNormalizationError < StandardError
@@ -23,17 +23,16 @@ def normalize_string_to_utf8(s, suggested_character_encoding=nil)
     to_try.push guessed_encoding
 
     to_try.each do |from_encoding|
-        if RUBY_VERSION.to_f >= 1.9
+        if String.method_defined?(:encode)
             begin
                 s.force_encoding from_encoding
                 return s.encode('UTF-8') if s.valid_encoding?
-            rescue ArgumentError
+            rescue ArgumentError, Encoding::UndefinedConversionError
                 # We get this is there are invalid bytes when
                 # interpreted as from_encoding at the point of
                 # the encode('UTF-8'); move onto the next one...
             end
         else
-            to_encoding = 'UTF-8'
             begin
                 converted = Iconv.conv 'UTF-8', from_encoding, s
                 return converted
@@ -45,7 +44,6 @@ def normalize_string_to_utf8(s, suggested_character_encoding=nil)
         end
     end
     raise EncodingNormalizationError, "Couldn't find a valid character encoding for the string"
-
 end
 
 def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil)
@@ -69,13 +67,13 @@ def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil)
         result = normalize_string_to_utf8 s, suggested_character_encoding
     rescue EncodingNormalizationError
         result = s
-        s.force_encoding 'ASCII-8BIT' if RUBY_VERSION.to_f >= 1.9
+        s.force_encoding 'ASCII-8BIT' if String.method_defined?(:encode)
     end
     result
 end
 
 def log_text_details(message, text)
-    if RUBY_VERSION.to_f >= 1.9
+    if String.method_defined?(:encode)
         STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}"
     else
         STDERR.puts "#{message}, we have text: #{text}, of class #{text.class}"
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index 38eb15996..f09594529 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -97,6 +97,61 @@ namespace :stats do
     end
   end
 
+  desc <<-DESC
+Prints the per-quarter number of created FOI Requests made to each Public Body found by the query.
+Specify the search query as QUERY='london school'
+DESC
+  task :number_of_requests_created => :environment do
+    query = ENV['QUERY']
+    start_at = PublicBody.minimum(:created_at)
+    finish_at = PublicBody.maximum(:created_at)
+    public_bodies = PublicBody.search(query)
+    quarters = DateQuarter.quarters_between(start_at, finish_at)
+
+    # Headers
+    headers = ['Body'] + quarters.map { |date_tuple| date_tuple.join('~') }
+    puts headers.join(",")
+
+    public_bodies.each do |body|
+        stats = quarters.map do |quarter|
+                    conditions = ['created_at >= ? AND created_at < ?', quarter[0], quarter[1]]
+                    count = body.info_requests.count(:conditions => conditions)
+                    count ? count : 0
+                end
+
+      row = [body.name] + stats
+      puts row.join(",")
+    end
+  end
+
+  desc <<-DESC
+Prints the per-quarter number of successful FOI Requests made to each Public Body found by the query.
+Specify the search query as QUERY='london school'
+DESC
+  task :number_of_requests_successful => :environment do
+    query = ENV['QUERY']
+    start_at = PublicBody.minimum(:created_at)
+    finish_at = PublicBody.maximum(:created_at)
+    public_bodies = PublicBody.search(query)
+    quarters = DateQuarter.quarters_between(start_at, finish_at)
+
+    # Headers
+    headers = ['Body'] + quarters.map { |date_tuple| date_tuple.join('~') }
+    puts headers.join(",")
+
+    public_bodies.each do |body|
+      stats = quarters.map do |quarter|
+                  conditions = ['created_at >= ? AND created_at < ? AND described_state = ?',
+                                quarter[0], quarter[1], 'successful']
+                  count = body.info_requests.count(:conditions => conditions)
+                  count ? count : 0
+              end
+
+      row = [body.name] + stats
+      puts row.join(",")
+    end
+  end
+
   desc 'Update statistics in the public_bodies table'
   task :update_public_bodies_stats => :environment do
     verbose = ENV['VERBOSE'] == '1'