diff options
-rw-r--r-- | app/controllers/request_controller.rb | 20 | ||||
-rw-r--r-- | app/models/foi_attachment.rb | 81 | ||||
-rw-r--r-- | spec/controllers/request_controller_spec.rb | 3 |
3 files changed, 17 insertions, 87 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 6445dd685..d4c303e52 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -763,18 +763,22 @@ class RequestController < ApplicationController key_path = foi_fragment_cache_path(key) image_dir = File.dirname(key_path) FileUtils.mkdir_p(image_dir) - html, wrapper_id = @attachment.body_as_html(image_dir) + html = @attachment.body_as_html(image_dir, :attachment_url => CGI.escape(@attachment_url)) - view_html_stylesheet = render_to_string :partial => "request/view_html_stylesheet" - html.sub!(/<head>/i, "<head>" + view_html_stylesheet) - html.sub!(/<body[^>]*>/i, '<body><prefix-here><div id="' + wrapper_id + '"><div id="view-html-content">') - html.sub!(/<\/body[^>]*>/i, '</div></div></body>') + view_html_stylesheet = render_to_string(:partial => "request/view_html_stylesheet") + view_html_prefix = render_to_string(:partial => "request/view_html_prefix") - view_html_prefix = render_to_string :partial => "request/view_html_prefix" - html.sub!("<prefix-here>", view_html_prefix) - html.sub!("<attachment-url-here>", CGI.escape(@attachment_url)) + # Parse the generated HTML so we can inject more stuff + parsed = Nokogiri::HTML.parse(html) + # Insert the stylesheet in the head + parsed.css('head').children.after(view_html_stylesheet) + # Insert the content prefix + parsed.css('body').children.before(view_html_prefix) + # Get the raw html to render + html = parsed.to_html @incoming_message.html_mask_stuff!(html) + response.content_type = 'text/html' render :text => html end diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index ecd4a1872..99603ff69 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -292,83 +292,10 @@ class FoiAttachment < ActiveRecord::Base end # For "View as HTML" of attachment - def body_as_html(dir) - html = nil - wrapper_id = "wrapper" - - # simple cases, can never fail - if self.content_type == 'text/plain' - text = self.body.strip - text = CGI.escapeHTML(text) - text = MySociety::Format.make_clickable(text) - html = text.gsub(/\n/, '<br>') - return '<!DOCTYPE html><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id - end - - # the extractions will also produce image files, which go in the - # current directory, so change to the directory the function caller - # wants everything in - - html = nil - if ['application/pdf', 'application/rtf'].include?(self.content_type) - text = self.body - Dir.chdir(dir) do - if RUBY_VERSION.to_f >= 1.9 - tempfile = Tempfile.new('foiextract', '.', :encoding => text.encoding) - else - tempfile = Tempfile.new('foiextract', '.') - end - tempfile.print text - tempfile.flush - - - if self.content_type == 'application/pdf' - # We set a timeout here, because pdftohtml can spiral out of control - # on some PDF files and we don't want to crash the whole server. - html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30) - elsif self.content_type == 'application/rtf' - html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120) - end - - tempfile.close - tempfile.delete - end - end - if html.nil? - if self.has_google_docs_viewer? - html = '' # force error and using Google docs viewer - else - raise "No HTML conversion available for type " + self.content_type - end - end - - - - # We need to look at: - # a) Any error code - # b) The output size, as pdftohtml does not return an error code upon error. - # c) For cases when there is no text in the body of the HTML, or - # images, so nothing will be rendered. This is to detect some bug in - # pdftohtml, which sometimes makes it return just <hr>s and no other - # content. - html.match(/(\<body[^>]*\>.*)/mi) - body = $1.to_s - body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "") - contains_images = html.match(/<img/mi) ? true : false - if html.size == 0 || !$?.success? || (body_without_tags.size == 0 && !contains_images) - ret = "<html><head></head><body>"; - if self.has_google_docs_viewer? - wrapper_id = "wrapper_google_embed" - protocol = AlaveteliConfiguration::force_ssl ? 'https' : 'http' - ret = ret + "<iframe src='#{protocol}://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>"; - else - ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>" - end - ret = ret + "</body></html>" - return ret, wrapper_id - end - - return html, wrapper_id + def body_as_html(dir, opts = {}) + attachment_url = opts.fetch(:attachment_url, nil) + html = AttachmentToHTML.to_html(self, :tmpdir => dir, :attachment_url => attachment_url) + html.to_s end end diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index 1e7df4536..9353efcb3 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -2407,8 +2407,7 @@ describe RequestController, "when caching fragments" do :html_mask_stuff! => nil, :user_can_view? => true, :all_can_view? => true) - attachment = mock(FoiAttachment, :display_filename => long_name, - :body_as_html => ['some text', 'wrapper']) + attachment = FactoryGirl.build(:body_text, :filename => long_name) IncomingMessage.stub!(:find).with("44").and_return(incoming_message) IncomingMessage.stub!(:get_attachment_by_url_part_number_and_filename).and_return(attachment) InfoRequest.stub!(:find).with("132").and_return(info_request) |