aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/controllers/request_controller.rb20
-rw-r--r--app/models/foi_attachment.rb81
-rw-r--r--spec/controllers/request_controller_spec.rb3
3 files changed, 17 insertions, 87 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb
index 6445dd685..d4c303e52 100644
--- a/app/controllers/request_controller.rb
+++ b/app/controllers/request_controller.rb
@@ -763,18 +763,22 @@ class RequestController < ApplicationController
key_path = foi_fragment_cache_path(key)
image_dir = File.dirname(key_path)
FileUtils.mkdir_p(image_dir)
- html, wrapper_id = @attachment.body_as_html(image_dir)
+ html = @attachment.body_as_html(image_dir, :attachment_url => CGI.escape(@attachment_url))
- view_html_stylesheet = render_to_string :partial => "request/view_html_stylesheet"
- html.sub!(/<head>/i, "<head>" + view_html_stylesheet)
- html.sub!(/<body[^>]*>/i, '<body><prefix-here><div id="' + wrapper_id + '"><div id="view-html-content">')
- html.sub!(/<\/body[^>]*>/i, '</div></div></body>')
+ view_html_stylesheet = render_to_string(:partial => "request/view_html_stylesheet")
+ view_html_prefix = render_to_string(:partial => "request/view_html_prefix")
- view_html_prefix = render_to_string :partial => "request/view_html_prefix"
- html.sub!("<prefix-here>", view_html_prefix)
- html.sub!("<attachment-url-here>", CGI.escape(@attachment_url))
+ # Parse the generated HTML so we can inject more stuff
+ parsed = Nokogiri::HTML.parse(html)
+ # Insert the stylesheet in the head
+ parsed.css('head').children.after(view_html_stylesheet)
+ # Insert the content prefix
+ parsed.css('body').children.before(view_html_prefix)
+ # Get the raw html to render
+ html = parsed.to_html
@incoming_message.html_mask_stuff!(html)
+
response.content_type = 'text/html'
render :text => html
end
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index ecd4a1872..99603ff69 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -292,83 +292,10 @@ class FoiAttachment < ActiveRecord::Base
end
# For "View as HTML" of attachment
- def body_as_html(dir)
- html = nil
- wrapper_id = "wrapper"
-
- # simple cases, can never fail
- if self.content_type == 'text/plain'
- text = self.body.strip
- text = CGI.escapeHTML(text)
- text = MySociety::Format.make_clickable(text)
- html = text.gsub(/\n/, '<br>')
- return '<!DOCTYPE html><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id
- end
-
- # the extractions will also produce image files, which go in the
- # current directory, so change to the directory the function caller
- # wants everything in
-
- html = nil
- if ['application/pdf', 'application/rtf'].include?(self.content_type)
- text = self.body
- Dir.chdir(dir) do
- if RUBY_VERSION.to_f >= 1.9
- tempfile = Tempfile.new('foiextract', '.', :encoding => text.encoding)
- else
- tempfile = Tempfile.new('foiextract', '.')
- end
- tempfile.print text
- tempfile.flush
-
-
- if self.content_type == 'application/pdf'
- # We set a timeout here, because pdftohtml can spiral out of control
- # on some PDF files and we don't want to crash the whole server.
- html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30)
- elsif self.content_type == 'application/rtf'
- html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120)
- end
-
- tempfile.close
- tempfile.delete
- end
- end
- if html.nil?
- if self.has_google_docs_viewer?
- html = '' # force error and using Google docs viewer
- else
- raise "No HTML conversion available for type " + self.content_type
- end
- end
-
-
-
- # We need to look at:
- # a) Any error code
- # b) The output size, as pdftohtml does not return an error code upon error.
- # c) For cases when there is no text in the body of the HTML, or
- # images, so nothing will be rendered. This is to detect some bug in
- # pdftohtml, which sometimes makes it return just <hr>s and no other
- # content.
- html.match(/(\<body[^>]*\>.*)/mi)
- body = $1.to_s
- body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "")
- contains_images = html.match(/<img/mi) ? true : false
- if html.size == 0 || !$?.success? || (body_without_tags.size == 0 && !contains_images)
- ret = "<html><head></head><body>";
- if self.has_google_docs_viewer?
- wrapper_id = "wrapper_google_embed"
- protocol = AlaveteliConfiguration::force_ssl ? 'https' : 'http'
- ret = ret + "<iframe src='#{protocol}://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>";
- else
- ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>"
- end
- ret = ret + "</body></html>"
- return ret, wrapper_id
- end
-
- return html, wrapper_id
+ def body_as_html(dir, opts = {})
+ attachment_url = opts.fetch(:attachment_url, nil)
+ html = AttachmentToHTML.to_html(self, :tmpdir => dir, :attachment_url => attachment_url)
+ html.to_s
end
end
diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb
index 1e7df4536..9353efcb3 100644
--- a/spec/controllers/request_controller_spec.rb
+++ b/spec/controllers/request_controller_spec.rb
@@ -2407,8 +2407,7 @@ describe RequestController, "when caching fragments" do
:html_mask_stuff! => nil,
:user_can_view? => true,
:all_can_view? => true)
- attachment = mock(FoiAttachment, :display_filename => long_name,
- :body_as_html => ['some text', 'wrapper'])
+ attachment = FactoryGirl.build(:body_text, :filename => long_name)
IncomingMessage.stub!(:find).with("44").and_return(incoming_message)
IncomingMessage.stub!(:get_attachment_by_url_part_number_and_filename).and_return(attachment)
InfoRequest.stub!(:find).with("132").and_return(info_request)