diff options
25 files changed, 1027 insertions, 88 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 6445dd685..d66c28275 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -763,18 +763,17 @@ class RequestController < ApplicationController key_path = foi_fragment_cache_path(key) image_dir = File.dirname(key_path) FileUtils.mkdir_p(image_dir) - html, wrapper_id = @attachment.body_as_html(image_dir) - view_html_stylesheet = render_to_string :partial => "request/view_html_stylesheet" - html.sub!(/<head>/i, "<head>" + view_html_stylesheet) - html.sub!(/<body[^>]*>/i, '<body><prefix-here><div id="' + wrapper_id + '"><div id="view-html-content">') - html.sub!(/<\/body[^>]*>/i, '</div></div></body>') - - view_html_prefix = render_to_string :partial => "request/view_html_prefix" - html.sub!("<prefix-here>", view_html_prefix) - html.sub!("<attachment-url-here>", CGI.escape(@attachment_url)) + html = @attachment.body_as_html(image_dir, + :attachment_url => Rack::Utils.escape(@attachment_url), + :content_for => { + :head_suffix => render_to_string(:partial => "request/view_html_stylesheet"), + :body_prefix => render_to_string(:partial => "request/view_html_prefix") + } + ) @incoming_message.html_mask_stuff!(html) + response.content_type = 'text/html' render :text => html end diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index ecd4a1872..6f198249a 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -292,83 +292,10 @@ class FoiAttachment < ActiveRecord::Base end # For "View as HTML" of attachment - def body_as_html(dir) - html = nil - wrapper_id = "wrapper" - - # simple cases, can never fail - if self.content_type == 'text/plain' - text = self.body.strip - text = CGI.escapeHTML(text) - text = MySociety::Format.make_clickable(text) - html = text.gsub(/\n/, '<br>') - return '<!DOCTYPE html><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id - end - - # the extractions will also produce image files, which go in the - # current directory, so change to the directory the function caller - # wants everything in - - html = nil - if ['application/pdf', 'application/rtf'].include?(self.content_type) - text = self.body - Dir.chdir(dir) do - if RUBY_VERSION.to_f >= 1.9 - tempfile = Tempfile.new('foiextract', '.', :encoding => text.encoding) - else - tempfile = Tempfile.new('foiextract', '.') - end - tempfile.print text - tempfile.flush - - - if self.content_type == 'application/pdf' - # We set a timeout here, because pdftohtml can spiral out of control - # on some PDF files and we don't want to crash the whole server. - html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30) - elsif self.content_type == 'application/rtf' - html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120) - end - - tempfile.close - tempfile.delete - end - end - if html.nil? - if self.has_google_docs_viewer? - html = '' # force error and using Google docs viewer - else - raise "No HTML conversion available for type " + self.content_type - end - end - - - - # We need to look at: - # a) Any error code - # b) The output size, as pdftohtml does not return an error code upon error. - # c) For cases when there is no text in the body of the HTML, or - # images, so nothing will be rendered. This is to detect some bug in - # pdftohtml, which sometimes makes it return just <hr>s and no other - # content. - html.match(/(\<body[^>]*\>.*)/mi) - body = $1.to_s - body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "") - contains_images = html.match(/<img/mi) ? true : false - if html.size == 0 || !$?.success? || (body_without_tags.size == 0 && !contains_images) - ret = "<html><head></head><body>"; - if self.has_google_docs_viewer? - wrapper_id = "wrapper_google_embed" - protocol = AlaveteliConfiguration::force_ssl ? 'https' : 'http' - ret = ret + "<iframe src='#{protocol}://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>"; - else - ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>" - end - ret = ret + "</body></html>" - return ret, wrapper_id - end - - return html, wrapper_id + def body_as_html(dir, opts = {}) + attachment_url = opts.fetch(:attachment_url, nil) + to_html_opts = opts.merge(:tmpdir => dir, :attachment_url => attachment_url) + AttachmentToHTML.to_html(self, to_html_opts) end end diff --git a/config/application.rb b/config/application.rb index a944af928..1cff3a125 100644 --- a/config/application.rb +++ b/config/application.rb @@ -68,6 +68,7 @@ module Alaveteli end config.autoload_paths << "#{Rails.root.to_s}/lib/mail_handler" + config.autoload_paths << "#{Rails.root.to_s}/lib/attachment_to_html" # See Rails::Configuration for more options ENV['RECAPTCHA_PUBLIC_KEY'] = ::AlaveteliConfiguration::recaptcha_public_key diff --git a/lib/attachment_to_html/adapters/could_not_convert.rb b/lib/attachment_to_html/adapters/could_not_convert.rb new file mode 100644 index 000000000..8e4bf39dc --- /dev/null +++ b/lib/attachment_to_html/adapters/could_not_convert.rb @@ -0,0 +1,49 @@ +module AttachmentToHTML + module Adapters + class CouldNotConvert + + attr_reader :attachment + + # Public: Initialize a PDF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # No options currently accepted + def initialize(attachment, opts = {}) + @attachment = attachment + end + + # Public: The title to use in the <title> tag + # + # Returns a String + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body + end + + + # Public: Was the document conversion successful? + # As this is a fallback option and not doing anything dynamic + # we're assuming this is successful whatever the case + # + # Returns true + def success? + true + end + + private + + def parse_body + "<p>Sorry, we were unable to convert this file to HTML. " \ + "Please use the download link at the top right.</p>" + end + + end + end +end
\ No newline at end of file diff --git a/lib/attachment_to_html/adapters/google_docs_viewer.rb b/lib/attachment_to_html/adapters/google_docs_viewer.rb new file mode 100644 index 000000000..991fbb757 --- /dev/null +++ b/lib/attachment_to_html/adapters/google_docs_viewer.rb @@ -0,0 +1,56 @@ +module AttachmentToHTML + module Adapters + # Renders the attachment in a Google Docs Viewer + class GoogleDocsViewer + + attr_reader :attachment, :attachment_url + + # Public: Initialize a GoogleDocsViewer converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :attachment_url - a String url to the attachment for + # Google to render (default: nil) + def initialize(attachment, opts = {}) + @attachment = attachment + @attachment_url = opts.fetch(:attachment_url, nil) + end + + # Public: The title to use in the <title> tag + # + # Returns a String + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body + end + + # Public: Was the document conversion successful? + # We can't really tell whether the document conversion has been + # successful as such; We're assuming that given a correctly + # constructed iframe (which is tested) that Google will make this + # Just Work. + # + # Returns true + def success? + true + end + + private + + def parse_body + %Q(<iframe src="#{ protocol }://docs.google.com/viewer?url=#{ attachment_url }&embedded=true" width="100%" height="100%" style="border: none;"></iframe>) + end + + def protocol + AlaveteliConfiguration.force_ssl ? 'https' : 'http' + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb new file mode 100644 index 000000000..1fca2f201 --- /dev/null +++ b/lib/attachment_to_html/adapters/pdf.rb @@ -0,0 +1,97 @@ +module AttachmentToHTML + module Adapters + # Convert application/pdf documents in to HTML + class PDF + + attr_reader :attachment, :tmpdir + + # Public: Initialize a PDF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :tmpdir - String name of directory to store the + # converted document + def initialize(attachment, opts = {}) + @attachment = attachment + @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) + end + + # Public: The title to use in the <title> tag + # + # Returns a String + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def parse_body + match = convert.match(/<body[^>]*>(.*?)<\/body>/mi) + match ? match[1] : '' + end + + def has_content? + !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty? + end + + def contains_images? + body.match(/<img[^>]*>/mi) ? true : false + end + + def convert + # Get the attachment body outside of the chdir call as getting + # the body may require opening files too + text = attachment_body + + @converted ||= Dir.chdir(tmpdir) do + tempfile = create_tempfile(text) + + html = AlaveteliExternalCommand.run("pdftohtml", + "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", + "-noframes", tempfile.path, :timeout => 30 + ) + + cleanup_tempfile(tempfile) + + html + end + end + + def create_tempfile(text) + tempfile = if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', + :encoding => text.encoding) + else + Tempfile.new('foiextract', '.') + end + tempfile.print(text) + tempfile.flush + tempfile + end + + def cleanup_tempfile(tempfile) + tempfile.close + tempfile.delete + end + + def attachment_body + @attachment_body ||= attachment.body + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb new file mode 100644 index 000000000..859c0e541 --- /dev/null +++ b/lib/attachment_to_html/adapters/rtf.rb @@ -0,0 +1,107 @@ +module AttachmentToHTML + module Adapters + # Convert application/rtf documents in to HTML + class RTF + + attr_reader :attachment, :tmpdir + + # Public: Initialize a RTF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :tmpdir - String name of directory to store the + # converted document + def initialize(attachment, opts = {}) + @attachment = attachment + @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) + end + + # Public: The title to use in the <title> tag + # + # Returns a String + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def parse_body + match = convert.match(/<body[^>]*>(.*?)<\/body>/mi) + match ? match[1] : '' + end + + def has_content? + !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty? + end + + def contains_images? + body.match(/<img[^>]*>/mi) ? true : false + end + + def convert + # Get the attachment body outside of the chdir call as getting + # the body may require opening files too + text = attachment_body + + @converted ||= Dir.chdir(tmpdir) do + tempfile = create_tempfile(text) + + html = AlaveteliExternalCommand.run("unrtf", "--html", + tempfile.path, :timeout => 120 + ) + + cleanup_tempfile(tempfile) + + sanitize_converted(html) + end + + end + + # Works around http://savannah.gnu.org/bugs/?42015 in unrtf ~> 0.21 + def sanitize_converted(html) + invalid = %Q(<!DOCTYPE html PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN>) + valid = %Q(<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN>") + if html.include?(invalid) + html.sub!(invalid, valid) + end + html + end + + def create_tempfile(text) + tempfile = if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', + :encoding => text.encoding) + else + Tempfile.new('foiextract', '.') + end + tempfile.print(text) + tempfile.flush + tempfile + end + + def cleanup_tempfile(tempfile) + tempfile.close + tempfile.delete + end + + def attachment_body + @attachment_body ||= attachment.body + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb new file mode 100644 index 000000000..b431ada5e --- /dev/null +++ b/lib/attachment_to_html/adapters/text.rb @@ -0,0 +1,63 @@ +require 'nokogiri' + +module AttachmentToHTML + module Adapters + # Convert text/plain documents in to HTML + class Text + + attr_reader :attachment + + # Public: Initialize a Text converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # No options currently accepted + def initialize(attachment, opts = {}) + @attachment = attachment + end + + # Public: The title to use in the <title> tag + # + # Returns a String + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def convert + text = attachment.body.strip + text = CGI.escapeHTML(text) + text = MySociety::Format.make_clickable(text) + text = text.gsub(/\n/, '<br>') + end + + def parse_body + convert + end + + def has_content? + !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty? + end + + def contains_images? + body.match(/<img[^>]*>/mi) ? true : false + end + + end + end +end diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb new file mode 100644 index 000000000..2f7c08264 --- /dev/null +++ b/lib/attachment_to_html/attachment_to_html.rb @@ -0,0 +1,46 @@ +require 'view' + +Dir[File.dirname(__FILE__) + '/adapters/*.rb'].each do |file| + require file +end + +module AttachmentToHTML + extend self + + def to_html(attachment, opts = {}) + adapter = adapter_for(attachment).new(attachment, opts) + + unless adapter.success? + adapter = fallback_adapter_for(attachment).new(attachment, opts) + end + + view = View.new(adapter) + view.wrapper = 'wrapper_google_embed' if adapter.is_a?(Adapters::GoogleDocsViewer) + + view.render do + opts.fetch(:content_for, []).each do |k,v| + inject_content(k) { v } + end + end + end + + private + + def adapter_for(attachment) + case attachment.content_type + when 'text/plain' then Adapters::Text + when 'application/pdf' then Adapters::PDF + when 'application/rtf' then Adapters::RTF + else + fallback_adapter_for(attachment) + end + end + + def fallback_adapter_for(attachment) + if attachment.has_google_docs_viewer? + Adapters::GoogleDocsViewer + else + Adapters::CouldNotConvert + end + end +end diff --git a/lib/attachment_to_html/template.html.erb b/lib/attachment_to_html/template.html.erb new file mode 100644 index 000000000..38286a5f9 --- /dev/null +++ b/lib/attachment_to_html/template.html.erb @@ -0,0 +1,16 @@ +<!DOCTYPE html> +<html> +<head> + <title><%= title %></title> + <%= content_for(:head_suffix) %> +</head> +<body> + <%= content_for(:body_prefix) %> + <div id="<%= wrapper %>"> + <div id="view-html-content"> + <%= body %> + </div> + </div> + <%= content_for(:body_suffix) %> +</body> +</html> diff --git a/lib/attachment_to_html/view.rb b/lib/attachment_to_html/view.rb new file mode 100644 index 000000000..e6991d44e --- /dev/null +++ b/lib/attachment_to_html/view.rb @@ -0,0 +1,39 @@ +module AttachmentToHTML + class View < ERB + + def self.template + @template || "#{ File.dirname(__FILE__) }/template.html.erb" + end + + def self.template=(path) + @template = path + end + + attr_accessor :title, :body, :template, :wrapper + + def initialize(adapter, opts = {}) + self.title = adapter.title + self.body = adapter.body + self.template = opts.fetch(:template, self.class.template) + self.wrapper = opts.fetch(:wrapper, 'wrapper') + super(File.read(template)) + end + + def render(&block) + instance_eval(&block) if block_given? + result(binding) + end + + def content_for(area) + send(area) if respond_to?(area) + end + + private + + def inject_content(area, &block) + instance_variable_set("@#{ area }".to_sym, block.call) + self.class.send(:attr_accessor, area) + end + + end +end diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index 1e7df4536..9353efcb3 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -2407,8 +2407,7 @@ describe RequestController, "when caching fragments" do :html_mask_stuff! => nil, :user_can_view? => true, :all_can_view? => true) - attachment = mock(FoiAttachment, :display_filename => long_name, - :body_as_html => ['some text', 'wrapper']) + attachment = FactoryGirl.build(:body_text, :filename => long_name) IncomingMessage.stub!(:find).with("44").and_return(incoming_message) IncomingMessage.stub!(:get_attachment_by_url_part_number_and_filename).and_return(attachment) InfoRequest.stub!(:find).with("132").and_return(info_request) diff --git a/spec/factories/foi_attchments.rb b/spec/factories/foi_attchments.rb index d7a90efb8..4e9875a00 100644 --- a/spec/factories/foi_attchments.rb +++ b/spec/factories/foi_attchments.rb @@ -4,12 +4,18 @@ FactoryGirl.define do factory :body_text do content_type 'text/plain' body { 'hereisthetext' } + filename 'attachment.txt' end factory :pdf_attachment do content_type 'application/pdf' filename 'interesting.pdf' body { load_file_fixture('interesting.pdf') } end + factory :rtf_attachment do + content_type 'application/rtf' + filename 'interesting.rtf' + body { load_file_fixture('interesting.rtf') } + end end end diff --git a/spec/fixtures/files/attachment_to_html/alternative_template.html.erb b/spec/fixtures/files/attachment_to_html/alternative_template.html.erb new file mode 100644 index 000000000..024565d5a --- /dev/null +++ b/spec/fixtures/files/attachment_to_html/alternative_template.html.erb @@ -0,0 +1,2 @@ +<h1><%= @title %></h1> +<div><%= @body %></div>
\ No newline at end of file diff --git a/spec/fixtures/files/empty.rtf b/spec/fixtures/files/empty.rtf new file mode 100644 index 000000000..82dd2964a --- /dev/null +++ b/spec/fixtures/files/empty.rtf @@ -0,0 +1,5 @@ +{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf190 +{\fonttbl} +{\colortbl;\red255\green255\blue255;} +\paperw11900\paperh16840\margl1440\margr1440\vieww10800\viewh8400\viewkind0 +}
\ No newline at end of file diff --git a/spec/fixtures/files/empty.txt b/spec/fixtures/files/empty.txt new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/spec/fixtures/files/empty.txt diff --git a/spec/fixtures/files/interesting.rtf b/spec/fixtures/files/interesting.rtf new file mode 100644 index 000000000..fa95b53b5 --- /dev/null +++ b/spec/fixtures/files/interesting.rtf @@ -0,0 +1,7 @@ +{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf190 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\paperw11900\paperh16840\margl1440\margr1440\vieww10800\viewh8400\viewkind0 +\pard\tx566\tx1133\tx1700\tx2267\tx2834\tx3401\tx3968\tx4535\tx5102\tx5669\tx6236\tx6803\pardirnatural + +\f0\fs24 \cf0 thisisthebody}
\ No newline at end of file diff --git a/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb b/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb new file mode 100644 index 000000000..afdc5c552 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb @@ -0,0 +1,36 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::CouldNotConvert do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:adapter) do + AttachmentToHTML::Adapters::CouldNotConvert.new(attachment) + end + + describe :title do + + it 'uses the attachment filename for the title' do + adapter.title.should == attachment.display_filename + end + + end + + describe :body do + + it 'contains a message asking the user to download the file directly' do + expected = "<p>Sorry, we were unable to convert this file to HTML. " \ + "Please use the download link at the top right.</p>" + adapter.body.should == expected + end + + end + + describe :success? do + + it 'is always true' do + adapter.success?.should be_true + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb b/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb new file mode 100644 index 000000000..e7aafb40d --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb @@ -0,0 +1,49 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::GoogleDocsViewer do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:adapter) do + AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :attachment_url => 'http://example.com/test.pdf') + end + + describe :title do + + it 'uses the attachment filename for the title' do + adapter.title.should == attachment.display_filename + end + + end + + describe :body do + + it 'contains the google docs viewer iframe' do + expected = %Q(<iframe src="http://docs.google.com/viewer?url=http://example.com/test.pdf&embedded=true" width="100%" height="100%" style="border: none;"></iframe>) + adapter.body.should == expected + end + + describe 'uses the confugured alaveteli protocol' do + + it 'https if force_ssl is on' do + AlaveteliConfiguration.stub(:force_ssl).and_return(true) + adapter.body.should include('https://docs.google.com') + end + + it 'http if force_ssl is off' do + AlaveteliConfiguration.stub(:force_ssl).and_return(false) + adapter.body.should include('http://docs.google.com') + end + + end + + end + + describe :success? do + + it 'is always true' do + adapter.success?.should be_true + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/pdf_spec.rb b/spec/lib/attachment_to_html/adapters/pdf_spec.rb new file mode 100644 index 000000000..c02b157e4 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/pdf_spec.rb @@ -0,0 +1,63 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::PDF do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:adapter) { AttachmentToHTML::Adapters::PDF.new(attachment) } + + describe :tmpdir do + + it 'defaults to the rails tmp directory' do + adapter.tmpdir.should == Rails.root.join('tmp') + end + + it 'allows a tmpdir to be specified to store the converted document' do + adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp') + adapter.tmpdir.should == '/tmp' + end + + end + + describe :title do + + it 'uses the attachment filename for the title' do + adapter.title.should == attachment.display_filename + end + + end + + describe :body do + + it 'extracts the body from the document' do + adapter.body.should include('thisisthebody') + end + + it 'operates in the context of the supplied tmpdir' do + adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp') + Dir.should_receive(:chdir).with('/tmp').and_call_original + adapter.body + end + + end + + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + adapter.stub(:body).and_return('<p>some content</p>') + adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + adapter.stub(:body).and_return(%Q(<img src="logo.png" />)) + adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + adapter.stub(:body).and_return('<p></p>') + adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/rtf_spec.rb b/spec/lib/attachment_to_html/adapters/rtf_spec.rb new file mode 100644 index 000000000..a3bf0e27e --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/rtf_spec.rb @@ -0,0 +1,85 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::RTF do + + let(:attachment) { FactoryGirl.build(:rtf_attachment) } + let(:adapter) { AttachmentToHTML::Adapters::RTF.new(attachment) } + + describe :tmpdir do + + it 'defaults to the rails tmp directory' do + adapter.tmpdir.should == Rails.root.join('tmp') + end + + it 'allows a tmpdir to be specified to store the converted document' do + adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :tmpdir => '/tmp') + adapter.tmpdir.should == '/tmp' + end + + end + + describe :title do + + it 'uses the attachment filename for the title' do + adapter.title.should == attachment.display_filename + end + + end + + describe :body do + + it 'extracts the body from the document' do + adapter.body.should include('thisisthebody') + end + + it 'operates in the context of the supplied tmpdir' do + adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :tmpdir => '/tmp') + Dir.should_receive(:chdir).with('/tmp').and_call_original + adapter.body + end + + it 'does not result in incorrect conversion when unrtf returns an invalid doctype' do + # Doctype public identifier is unquoted + # Valid doctype would be: + # <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> + # See bug report http://savannah.gnu.org/bugs/?42015 + invalid = <<-DOC + <!DOCTYPE html PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN> + <html> + <head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <!-- Translation from RTF performed by UnRTF, version 0.21.5 --> + <!--font table contains 0 fonts total--> + <!--invalid font number 0--> + </head> + <body><font size="3"><font color="#000000">thisisthebody</font></font></body> + </html> + DOC + AlaveteliExternalCommand.stub(:run).and_return(invalid) + + adapter.body.should_not include('//W3C//DTD HTML 4.01 Transitional//EN') + end + + end + + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + adapter.stub(:body).and_return('<p>some content</p>') + adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + adapter.stub(:body).and_return(%Q(<img src="logo.png" />)) + adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + adapter.stub(:body).and_return('<p></p>') + adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/text_spec.rb b/spec/lib/attachment_to_html/adapters/text_spec.rb new file mode 100644 index 000000000..b2e8141e0 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/text_spec.rb @@ -0,0 +1,70 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::Text do + + let(:attachment) { FactoryGirl.build(:body_text) } + let(:adapter) { AttachmentToHTML::Adapters::Text.new(attachment) } + + describe :title do + + it 'uses the attachment filename for the title' do + adapter.title.should == attachment.display_filename + end + + end + + describe :body do + + it 'extracts the body from the document' do + adapter.body.should == attachment.body + end + + it 'strips the body of trailing whitespace' do + attachment = FactoryGirl.build(:body_text, :body => ' Hello ') + adapter = AttachmentToHTML::Adapters::Text.new(attachment) + adapter.body.should == 'Hello' + end + + it 'escapes special characters' do + attachment = FactoryGirl.build(:body_text, :body => 'Usage: foo "bar" >baz<') + adapter = AttachmentToHTML::Adapters::Text.new(attachment) + expected = %Q(Usage: foo "bar" >baz<) + adapter.body.should == expected + end + + it 'creates hyperlinks for text that looks like a url' do + attachment = FactoryGirl.build(:body_text, :body => 'http://www.whatdotheyknow.com') + adapter = AttachmentToHTML::Adapters::Text.new(attachment) + expected = %Q(<a href='http://www.whatdotheyknow.com'>http://www.whatdotheyknow.com</a>) + adapter.body.should == expected + end + + it 'substitutes newlines for br tags' do + attachment = FactoryGirl.build(:body_text, :body => "A\nNewline") + adapter = AttachmentToHTML::Adapters::Text.new(attachment) + expected = %Q(A<br>Newline) + adapter.body.should == expected + end + + end + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + adapter.stub(:body).and_return('<p>some content</p>') + adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + adapter.stub(:body).and_return(%Q(<img src="logo.png" />)) + adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + adapter.stub(:body).and_return('<p></p>') + adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/attachment_to_html_spec.rb b/spec/lib/attachment_to_html/attachment_to_html_spec.rb new file mode 100644 index 000000000..1cf7debb7 --- /dev/null +++ b/spec/lib/attachment_to_html/attachment_to_html_spec.rb @@ -0,0 +1,71 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper') + +describe AttachmentToHTML do + include AttachmentToHTML + + let(:attachment) { FactoryGirl.build(:body_text) } + + describe :to_html do + + it 'sends the attachment to the correct adapter for conversion' do + AttachmentToHTML::Adapters::Text.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + it 'renders the attachment as html' do + adapter = AttachmentToHTML::Adapters::Text.new(attachment) + expected = AttachmentToHTML::View.new(adapter).render + to_html(attachment).should == expected + end + + it 'passes content injections options when rendering the result' do + html = to_html(attachment, :content_for => { :body_prefix => '<p>prefix</p>' }) + html.should include('<p>prefix</p>') + end + + it 'accepts a hash of options to pass to the adapter' do + options = { :wrapper => 'wrap' } + AttachmentToHTML::Adapters::Text.should_receive(:new).with(attachment, options).and_call_original + to_html(attachment, options) + end + + it 'converts an attachment that has an adapter, fails to convert, but has a google viewer' do + attachment = FactoryGirl.build(:pdf_attachment) + AttachmentToHTML::Adapters::PDF.any_instance.stub(:success?).and_return(false) + AttachmentToHTML::Adapters::PDF.should_receive(:new).with(attachment, {}).and_call_original + AttachmentToHTML::Adapters::GoogleDocsViewer.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + it 'converts an attachment that doesnt have an adapter, but has a google viewer' do + attachment = FactoryGirl.build(:body_text, :content_type => 'application/vnd.ms-word') + AttachmentToHTML::Adapters::GoogleDocsViewer.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + it 'converts an attachment that has no adapter or google viewer' do + attachment = FactoryGirl.build(:body_text, :content_type => 'application/json') + AttachmentToHTML::Adapters::CouldNotConvert.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + describe 'when wrapping the content' do + + it 'uses a the default wrapper' do + attachment = FactoryGirl.build(:pdf_attachment) + to_html(attachment).should include(%Q(<div id="wrapper">)) + end + + it 'uses a custom wrapper for GoogleDocsViewer attachments' do + attachment = FactoryGirl.build(:pdf_attachment) + # TODO: Add a document that will always render in a + # GoogleDocsViewer for testing + AttachmentToHTML::Adapters::PDF.any_instance.stub(:success?).and_return(false) + to_html(attachment).should include(%Q(<div id="wrapper_google_embed">)) + end + + end + + end + +end diff --git a/spec/lib/attachment_to_html/view_spec.rb b/spec/lib/attachment_to_html/view_spec.rb new file mode 100644 index 000000000..65eff4cad --- /dev/null +++ b/spec/lib/attachment_to_html/view_spec.rb @@ -0,0 +1,145 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper') + +describe AttachmentToHTML::View do + + let(:adapter) do + OpenStruct.new( + :body => '<p>hello</p>', + :title => 'An attachment.txt', + :success? => true) + end + + let(:view) { AttachmentToHTML::View.new(adapter) } + + let(:default_template) do + "#{ Rails.root }/lib/attachment_to_html/template.html.erb" + end + + describe '.template' do + + after(:each) do + AttachmentToHTML::View.template = nil + end + + it 'has a default template location' do + AttachmentToHTML::View.template.should == default_template + end + + end + + describe '.template=' do + + after(:each) do + AttachmentToHTML::View.template = nil + end + + it 'allows a global template to be set' do + template = file_fixture_name('attachment_to_html/alternative_template.html.erb') + AttachmentToHTML::View.template = template + AttachmentToHTML::View.template.should == template + end + + end + + describe :new do + + it 'sets the title on initialization' do + view.title.should == adapter.title + end + + it 'sets the body on initialization' do + view.body.should == adapter.body + end + + it 'sets a default template if none is specified' do + view.template.should == default_template + end + + it 'allows a template to be set through an option' do + template = file_fixture_name('attachment_to_html/alternative_template.html.erb') + opts = { :template => template } + view = AttachmentToHTML::View.new(adapter, opts) + view.template.should == template + end + + end + + describe :title= do + + it 'allows the title to be set' do + view.title = adapter.title + view.title.should == adapter.title + end + + end + + describe :body= do + + it 'allows the body to be set' do + view.body = adapter.body + view.body.should == adapter.body + end + + end + + describe :template= do + + it 'allows the template to be set' do + template = file_fixture_name('attachment_to_html/alternative_template.html.erb') + view.template = template + view.template.should == template + end + + end + + describe :wrapper do + + it 'is set to wrapper by default' do + view.wrapper.should == 'wrapper' + end + + end + + describe :wrapper= do + + it 'allows the wrapper div to be customised' do + view.wrapper = 'wrap' + view.wrapper.should == 'wrap' + end + + end + + # Need to remove all whitespace to assert equal because + # ERB adds additional indentation after ERB tags + describe :render do + + it 'renders the contents in to the template' do + view.wrapper = 'wrap' + expected = <<-HTML +<!DOCTYPE html> +<html> +<head> + <title>An attachment.txt</title> +</head> +<body> + <div id="wrap"> + <div id="view-html-content"> + <p>hello</p> + </div> + </div> +</body> +</html> + HTML + + view.render.gsub(/\s+/, '').should == expected.gsub(/\s+/, '') + end + + it 'allows the dynamic injection of content' do + content = %Q(<meta charset="utf-8">) + result = view.render { inject_content(:head_suffix) { content } } + result.should include(content) + end + + end + +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index dc5a0d6eb..e391c97d3 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -13,6 +13,7 @@ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[ SimpleCov.start('rails') do add_filter 'commonlib' add_filter 'vendor/plugins' + add_filter 'lib/attachment_to_html' add_filter 'lib/strip_attributes' add_filter 'lib/has_tag_string' add_filter 'lib/acts_as_xapian' |