diff options
20 files changed, 1073 insertions, 0 deletions
diff --git a/config/application.rb b/config/application.rb index 6b7d1b976..4513e4eb7 100644 --- a/config/application.rb +++ b/config/application.rb @@ -68,6 +68,7 @@ module Alaveteli end config.autoload_paths << "#{Rails.root.to_s}/lib/mail_handler" + config.autoload_paths << "#{Rails.root.to_s}/lib/attachment_to_html" # See Rails::Configuration for more options ENV['RECAPTCHA_PUBLIC_KEY'] = ::AlaveteliConfiguration::recaptcha_public_key diff --git a/lib/attachment_to_html/adapters/could_not_convert.rb b/lib/attachment_to_html/adapters/could_not_convert.rb new file mode 100644 index 000000000..9ce28a848 --- /dev/null +++ b/lib/attachment_to_html/adapters/could_not_convert.rb @@ -0,0 +1,63 @@ +module AttachmentToHTML + module Adapters + class CouldNotConvert + + attr_reader :attachment, :wrapper + + # Public: Initialize a Text converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # As this is a fallback option and not doing anything dynamic + # we're assuming this is successful whatever the case + # + # Returns true + def success? + true + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + "<p>Sorry, we were unable to convert this file to HTML. " \ + "Please use the download link at the top right.</p>" + end + + end + end +end
\ No newline at end of file diff --git a/lib/attachment_to_html/adapters/google_docs_viewer.rb b/lib/attachment_to_html/adapters/google_docs_viewer.rb new file mode 100644 index 000000000..86908ad5c --- /dev/null +++ b/lib/attachment_to_html/adapters/google_docs_viewer.rb @@ -0,0 +1,73 @@ +module AttachmentToHTML + module Adapters + # Renders the attachment in a Google Docs Viewer + class GoogleDocsViewer + + attr_reader :attachment, :wrapper, :attachment_url + + # Public: Initialize a PDF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # (default: 'wrapper_google_embed') + # :attachment_url - a String url to the attachment for + # Google to render (default: nil) + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper_google_embed') + @attachment_url = opts.fetch(:attachment_url, nil) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # We can't really tell whether the document conversion has been + # successful as such; We're assuming that given a correctly + # constructed iframe (which is tested) that Google will make this + # Just Work. + # + # Returns true + def success? + true + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + %Q(<iframe src="#{ protocol }://docs.google.com/viewer?url=#{ attachment_url }&embedded=true" width="100%" height="100%" style="border: none;"></iframe>) + end + + def protocol + AlaveteliConfiguration.force_ssl ? 'https' : 'http' + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb new file mode 100644 index 000000000..8f826b910 --- /dev/null +++ b/lib/attachment_to_html/adapters/pdf.rb @@ -0,0 +1,121 @@ +module AttachmentToHTML + module Adapters + # Convert application/pdf documents in to HTML + class PDF + + attr_reader :attachment, :wrapper, :tmpdir + + # Public: Initialize a PDF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # :tmpdir - String name of directory to store the + # converted document + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + parsed_body + end + + # Parse the output of the converted attachment so that we can pluck + # the parts we need and insert in to our own sensible template + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(convert) + end + + def parsed_body + parsed.css('body').inner_html + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + def convert + @converted ||= Dir.chdir(tmpdir) do + tempfile = create_tempfile + write_attachment_body_to_tempfile(tempfile) + + html = AlaveteliExternalCommand.run("pdftohtml", + "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", + "-noframes", tempfile.path, :timeout => 30 + ) + + cleanup_tempfile(tempfile) + + html + end + end + + def create_tempfile + if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding) + else + Tempfile.new('foiextract', '.') + end + end + + def write_attachment_body_to_tempfile(tempfile) + tempfile.print(attachment.body) + tempfile.flush + end + + def cleanup_tempfile(tempfile) + tempfile.close + tempfile.delete + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb new file mode 100644 index 000000000..f38e5e381 --- /dev/null +++ b/lib/attachment_to_html/adapters/rtf.rb @@ -0,0 +1,120 @@ +module AttachmentToHTML + module Adapters + # Convert application/rtf documents in to HTML + class RTF + + attr_reader :attachment, :wrapper, :tmpdir + + # Public: Initialize a RTF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # :tmpdir - String name of directory to store the + # converted document + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + parsed_body + end + + # Parse the output of the converted attachment so that we can pluck + # the parts we need and insert in to our own sensible template + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(convert) + end + + def parsed_body + parsed.css('body').inner_html + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + def convert + @converted ||= Dir.chdir(tmpdir) do + tempfile = create_tempfile + write_attachment_body_to_tempfile(tempfile) + + html = AlaveteliExternalCommand.run("unrtf", "--html", + tempfile.path, :timeout => 120 + ) + + cleanup_tempfile(tempfile) + + html + end + end + + def create_tempfile + if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding) + else + Tempfile.new('foiextract', '.') + end + end + + def write_attachment_body_to_tempfile(tempfile) + tempfile.print(attachment.body) + tempfile.flush + end + + def cleanup_tempfile(tempfile) + tempfile.close + tempfile.delete + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb new file mode 100644 index 000000000..1ce616cf7 --- /dev/null +++ b/lib/attachment_to_html/adapters/text.rb @@ -0,0 +1,84 @@ +require 'nokogiri' + +module AttachmentToHTML + module Adapters + # Convert text/plain documents in to HTML + class Text + + attr_reader :attachment, :wrapper + + # Public: Initialize a Text converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + text = attachment.body.strip + text = CGI.escapeHTML(text) + text = MySociety::Format.make_clickable(text) + text = text.gsub(/\n/, '<br>') + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + # Parse the output of to_html to check for success + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(to_html) + end + + end + end +end diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb new file mode 100644 index 000000000..5f63661b4 --- /dev/null +++ b/lib/attachment_to_html/attachment_to_html.rb @@ -0,0 +1,41 @@ +require 'html' + +Dir[File.dirname(__FILE__) + '/adapters/*.rb'].each do |file| + require file +end + +module AttachmentToHTML + extend self + + def to_html(attachment, opts = {}) + adapter = adapter_for(attachment).new(attachment, opts) + html = HTML.new(adapter) + + if html.success? + html + else + fallback = fallback_adapter_for(attachment).new(attachment, opts) + HTML.new(fallback) + end + end + + private + + def adapter_for(attachment) + case attachment.content_type + when 'text/plain' then Adapters::Text + when 'application/pdf' then Adapters::PDF + when 'application/rtf' then Adapters::RTF + else + fallback_adapter_for(attachment) + end + end + + def fallback_adapter_for(attachment) + if attachment.has_google_docs_viewer? + Adapters::GoogleDocsViewer + else + Adapters::CouldNotConvert + end + end +end diff --git a/lib/attachment_to_html/html.rb b/lib/attachment_to_html/html.rb new file mode 100644 index 000000000..44d095be8 --- /dev/null +++ b/lib/attachment_to_html/html.rb @@ -0,0 +1,14 @@ +require 'forwardable' +module AttachmentToHTML + class HTML + extend Forwardable + + def_delegator :@adapter, :to_html, :to_s + def_delegator :@adapter, :success? + + def initialize(adapter) + @adapter = adapter + end + + end +end diff --git a/spec/factories/foi_attchments.rb b/spec/factories/foi_attchments.rb index d7a90efb8..4e9875a00 100644 --- a/spec/factories/foi_attchments.rb +++ b/spec/factories/foi_attchments.rb @@ -4,12 +4,18 @@ FactoryGirl.define do factory :body_text do content_type 'text/plain' body { 'hereisthetext' } + filename 'attachment.txt' end factory :pdf_attachment do content_type 'application/pdf' filename 'interesting.pdf' body { load_file_fixture('interesting.pdf') } end + factory :rtf_attachment do + content_type 'application/rtf' + filename 'interesting.rtf' + body { load_file_fixture('interesting.rtf') } + end end end diff --git a/spec/fixtures/files/empty.rtf b/spec/fixtures/files/empty.rtf new file mode 100644 index 000000000..82dd2964a --- /dev/null +++ b/spec/fixtures/files/empty.rtf @@ -0,0 +1,5 @@ +{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf190 +{\fonttbl} +{\colortbl;\red255\green255\blue255;} +\paperw11900\paperh16840\margl1440\margr1440\vieww10800\viewh8400\viewkind0 +}
\ No newline at end of file diff --git a/spec/fixtures/files/empty.txt b/spec/fixtures/files/empty.txt new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/spec/fixtures/files/empty.txt diff --git a/spec/fixtures/files/interesting.rtf b/spec/fixtures/files/interesting.rtf new file mode 100644 index 000000000..fa95b53b5 --- /dev/null +++ b/spec/fixtures/files/interesting.rtf @@ -0,0 +1,7 @@ +{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf190 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +\paperw11900\paperh16840\margl1440\margr1440\vieww10800\viewh8400\viewkind0 +\pard\tx566\tx1133\tx1700\tx2267\tx2834\tx3401\tx3968\tx4535\tx5102\tx5669\tx6236\tx6803\pardirnatural + +\f0\fs24 \cf0 thisisthebody}
\ No newline at end of file diff --git a/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb b/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb new file mode 100644 index 000000000..aee68e986 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb @@ -0,0 +1,67 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::CouldNotConvert do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:adapter) { AttachmentToHTML::Adapters::CouldNotConvert.new(attachment) } + + describe :wrapper do + + it 'defaults to wrapper' do + adapter.wrapper.should == 'wrapper' + end + + it 'accepts a wrapper option' do + adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap') + adapter.wrapper.should == 'wrap' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(adapter.to_html) do |config| + config.strict + end + parsed.css('title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(adapter.to_html) do |config| + config.strict + end + parsed.css('body div').first.attributes['id'].value.should == 'wrap' + end + + it 'should contain text about the conversion failure' do + adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(adapter.to_html) do |config| + config.strict + end + + expected = "<p>Sorry, we were unable to convert this file to HTML. " \ + "Please use the download link at the top right.</p>" + + parsed.css('div#wrap div#view-html-content').inner_html.should == expected + end + + end + + describe :success? do + + it 'is always true' do + adapter.success?.should be_true + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb b/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb new file mode 100644 index 000000000..166ca3241 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb @@ -0,0 +1,81 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::GoogleDocsViewer do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:google_adapter) do + AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :attachment_url => 'http://example.com/test.pdf') + end + + describe :wrapper do + + it 'defaults to wrapper_google_embed' do + google_adapter.wrapper.should == 'wrapper_google_embed' + end + + it 'accepts a wrapper option' do + google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :wrapper => 'wrap') + google_adapter.wrapper.should == 'wrap' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config| + config.strict + end + parsed.css('title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config| + config.strict + end + parsed.css('body div').first.attributes['id'].value.should == 'wrap' + end + + it 'contains the google docs viewer url in the wrapper div' do + options = { :wrapper => 'wrap', :attachment_url => 'http://example.com/test.pdf' } + google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, options) + parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config| + config.strict + end + expected = %Q(<iframe src="http://docs.google.com/viewer?url=http://example.com/test.pdf&embedded=true" width="100%" height="100%" style="border: none;"></iframe>) + parsed.css('div#wrap div#view-html-content').inner_html.should include(expected) + end + + describe 'uses the confugured alaveteli protocol' do + + it 'https if force_ssl is on' do + AlaveteliConfiguration.stub(:force_ssl).and_return(true) + google_adapter.to_html.should include('https://docs.google.com') + end + + it 'http if force_ssl is off' do + AlaveteliConfiguration.stub(:force_ssl).and_return(false) + google_adapter.to_html.should include('http://docs.google.com') + end + + end + + end + + describe :success? do + + it 'is always true' do + google_adapter.success?.should be_true + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/pdf_spec.rb b/spec/lib/attachment_to_html/adapters/pdf_spec.rb new file mode 100644 index 000000000..65c376043 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/pdf_spec.rb @@ -0,0 +1,98 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::PDF do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:pdf_adapter) { AttachmentToHTML::Adapters::PDF.new(attachment) } + + describe :wrapper do + + it 'defaults to wrapper' do + pdf_adapter.wrapper.should == 'wrapper' + end + + it 'accepts a wrapper option' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap') + pdf_adapter.wrapper.should == 'wrap' + end + + end + + describe :tmpdir do + + it 'defaults to the rails tmp directory' do + pdf_adapter.tmpdir.should == Rails.root.join('tmp') + end + + it 'allows a tmpdir to be specified to store the converted document' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp') + pdf_adapter.tmpdir.should == '/tmp' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config| + config.strict + end + parsed.css('title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config| + config.strict + end + parsed.css('body div').first.attributes['id'].value.should == 'wrap' + end + + it 'contains the attachment body in the wrapper div' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrap div#view-html-content').inner_html.should include('thisisthebody') + end + + it 'operates in the context of the supplied tmpdir' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp') + Dir.should_receive(:chdir).with('/tmp').and_call_original + pdf_adapter.to_html + end + + end + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + pdf_adapter.to_html + pdf_adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>) + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment) + pdf_adapter.stub(:to_html).and_return(mocked_return) + pdf_adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + # TODO: Add and use spec/fixtures/files/empty.pdf + attachment = FactoryGirl.build(:body_text, :body => '') + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment) + pdf_adapter.to_html + pdf_adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/rtf_spec.rb b/spec/lib/attachment_to_html/adapters/rtf_spec.rb new file mode 100644 index 000000000..f84073c51 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/rtf_spec.rb @@ -0,0 +1,98 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::RTF do + + let(:attachment) { FactoryGirl.build(:rtf_attachment) } + let(:rtf_adapter) { AttachmentToHTML::Adapters::RTF.new(attachment) } + + describe :wrapper do + + it 'defaults to wrapper' do + rtf_adapter.wrapper.should == 'wrapper' + end + + it 'accepts a wrapper option' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap') + rtf_adapter.wrapper.should == 'wrap' + end + + end + + describe :tmpdir do + + it 'defaults to the rails tmp directory' do + rtf_adapter.tmpdir.should == Rails.root.join('tmp') + end + + it 'allows a tmpdir to be specified to store the converted document' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :tmpdir => '/tmp') + rtf_adapter.tmpdir.should == '/tmp' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.css('head title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.css('body div').first.attributes['id'].value.should == 'wrap' + end + + it 'contains the attachment body in the wrapper div' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrap div#view-html-content').inner_text.should include('thisisthebody') + end + + it 'operates in the context of the supplied tmpdir' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :tmpdir => '/tmp') + Dir.should_receive(:chdir).with('/tmp').and_call_original + rtf_adapter.to_html + end + + end + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + rtf_adapter.to_html + rtf_adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>) + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment) + rtf_adapter.stub(:to_html).and_return(mocked_return) + rtf_adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + empty_rtf = load_file_fixture('empty.rtf') + attachment = FactoryGirl.build(:rtf_attachment, :body => empty_rtf) + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment) + rtf_adapter.to_html + rtf_adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/text_spec.rb b/spec/lib/attachment_to_html/adapters/text_spec.rb new file mode 100644 index 000000000..599670603 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/text_spec.rb @@ -0,0 +1,121 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::Text do + + let(:attachment) { FactoryGirl.build(:body_text) } + let(:text_adapter) { AttachmentToHTML::Adapters::Text.new(attachment) } + + describe :wrapper do + + it 'defaults to wrapper' do + text_adapter.wrapper.should == 'wrapper' + end + + it 'accepts a wrapper option' do + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap') + text_adapter.wrapper.should == 'wrap' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('body').children.first.attributes['id'].value.should == 'wrap' + end + + it 'contains the attachment body in the wrapper div' do + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrap div#view-html-content').inner_html.should == attachment.body + end + + it 'strips the body of trailing whitespace' do + attachment = FactoryGirl.build(:body_text, :body => ' Hello ') + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrapper div#view-html-content').inner_html.should == 'Hello' + end + + # NOTE: Can't parse this spec with Nokogiri at the moment because even + # in strict mode Nokogiri tampers with the HTML returned: + # Failure/Error: parsed.css('div#wrapper div#view-html-content'). + # inner_html.should == expected + # expected: "Usage: foo "bar" >baz<" + # got: "Usage: foo \"bar\" >baz<" (using ==) + it 'escapes special characters' do + attachment = FactoryGirl.build(:body_text, :body => 'Usage: foo "bar" >baz<') + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + expected = %Q(Usage: foo "bar" >baz<) + text_adapter.to_html.should include(expected) + end + + it 'creates hyperlinks for text that looks like a url' do + attachment = FactoryGirl.build(:body_text, :body => 'http://www.whatdotheyknow.com') + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrapper div#view-html-content a').first.text.should == 'http://www.whatdotheyknow.com' + parsed.css('div#wrapper div#view-html-content a').first['href'].should == 'http://www.whatdotheyknow.com' + end + + it 'substitutes newlines for br tags' do + attachment = FactoryGirl.build(:body_text, :body => "A\nNewline") + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + expected = %Q(A<br>Newline) + parsed.css('div#wrapper div#view-html-content').inner_html.should == expected + end + + end + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + text_adapter.to_html + text_adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>) + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + text_adapter.stub(:to_html).and_return(mocked_return) + text_adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + empty_txt = load_file_fixture('empty.txt') + attachment = FactoryGirl.build(:body_text, :body => empty_txt) + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + text_adapter.to_html + text_adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/attachment_to_html_spec.rb b/spec/lib/attachment_to_html/attachment_to_html_spec.rb new file mode 100644 index 000000000..f7df06f87 --- /dev/null +++ b/spec/lib/attachment_to_html/attachment_to_html_spec.rb @@ -0,0 +1,48 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper') + +describe AttachmentToHTML do + include AttachmentToHTML + + let(:attachment) { FactoryGirl.build(:body_text) } + + describe :to_html do + + it 'sends the attachment to the correct adapter for conversion' do + AttachmentToHTML::Adapters::Text.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + it 'returns the results in a HTML class' do + expected = AttachmentToHTML::Adapters::Text.new(attachment).to_html + to_html(attachment).should be_instance_of(AttachmentToHTML::HTML) + end + + it 'accepts a hash of options to pass to the adapter' do + options = { :wrapper => 'wrap' } + AttachmentToHTML::Adapters::Text.should_receive(:new).with(attachment, options).and_call_original + to_html(attachment, options) + end + + it 'converts an attachment that has an adapter, fails to convert, but has a google viewer' do + attachment = FactoryGirl.build(:pdf_attachment) + AttachmentToHTML::HTML.any_instance.stub(:success?).and_return(false) + AttachmentToHTML::Adapters::PDF.should_receive(:new).with(attachment, {}).and_call_original + AttachmentToHTML::Adapters::GoogleDocsViewer.should_receive(:new).with(attachment, {}) + to_html(attachment) + end + + it 'converts an attachment that doesnt have an adapter, but has a google viewer' do + attachment = FactoryGirl.build(:body_text, :content_type => 'application/vnd.ms-word') + AttachmentToHTML::Adapters::GoogleDocsViewer.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + it 'converts an attachment that has no adapter or google viewer' do + attachment = FactoryGirl.build(:body_text, :content_type => 'application/json') + AttachmentToHTML::Adapters::CouldNotConvert.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + end + +end diff --git a/spec/lib/attachment_to_html/html_spec.rb b/spec/lib/attachment_to_html/html_spec.rb new file mode 100644 index 000000000..65b63d383 --- /dev/null +++ b/spec/lib/attachment_to_html/html_spec.rb @@ -0,0 +1,24 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper') + +describe AttachmentToHTML::HTML do + + let(:adapter) { OpenStruct.new(:to_html => '<p>hello</p>', :success? => true) } + let(:html) { AttachmentToHTML::HTML.new(adapter) } + + describe :to_s do + + it 'returns the raw html' do + html.to_s.should == '<p>hello</p>' + end + + end + + describe :success? do + + it 'returns whether the conversion succeeded' do + html.success?.should be_true + end + + end + +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index dc5a0d6eb..e391c97d3 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -13,6 +13,7 @@ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[ SimpleCov.start('rails') do add_filter 'commonlib' add_filter 'vendor/plugins' + add_filter 'lib/attachment_to_html' add_filter 'lib/strip_attributes' add_filter 'lib/has_tag_string' add_filter 'lib/acts_as_xapian' |