aboutsummaryrefslogtreecommitdiffstats
path: root/spec/lib/attachment_to_html
diff options
context:
space:
mode:
authorGareth Rees <gareth@mysociety.org>2014-03-17 11:15:40 +0000
committerGareth Rees <gareth@mysociety.org>2014-03-28 09:39:04 +0000
commitd57ca2a22579df4c634d554989c0ee9e4ebb5165 (patch)
treee1d11c626cedf57373be95b6b1ec6ce4dc22ea30 /spec/lib/attachment_to_html
parent0adf9399cbef42054809479c8f1b64dad7bbf8ca (diff)
Add AttachmentToHTML library
Extracts the attachment processing from FoiAttachment#body_to_html AttachmentToHTML contains adapters which convert - text/plain - application/pdf - application/rtf Results are returned as an AttachmentHTML::HTML instance which contains the raw HTML and other metadata about the conversion.
Diffstat (limited to 'spec/lib/attachment_to_html')
-rw-r--r--spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb67
-rw-r--r--spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb81
-rw-r--r--spec/lib/attachment_to_html/adapters/pdf_spec.rb98
-rw-r--r--spec/lib/attachment_to_html/adapters/rtf_spec.rb98
-rw-r--r--spec/lib/attachment_to_html/adapters/text_spec.rb121
-rw-r--r--spec/lib/attachment_to_html/attachment_to_html_spec.rb48
-rw-r--r--spec/lib/attachment_to_html/html_spec.rb24
7 files changed, 537 insertions, 0 deletions
diff --git a/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb b/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb
new file mode 100644
index 000000000..aee68e986
--- /dev/null
+++ b/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb
@@ -0,0 +1,67 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper')
+
+describe AttachmentToHTML::Adapters::CouldNotConvert do
+
+ let(:attachment) { FactoryGirl.build(:pdf_attachment) }
+ let(:adapter) { AttachmentToHTML::Adapters::CouldNotConvert.new(attachment) }
+
+ describe :wrapper do
+
+ it 'defaults to wrapper' do
+ adapter.wrapper.should == 'wrapper'
+ end
+
+ it 'accepts a wrapper option' do
+ adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap')
+ adapter.wrapper.should == 'wrap'
+ end
+
+ end
+
+ describe :to_html do
+
+ it 'should be a valid html document' do
+ parsed = Nokogiri::HTML.parse(adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.errors.any?.should be_false
+ end
+
+ it 'contains the attachment filename in the title tag' do
+ parsed = Nokogiri::HTML.parse(adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('title').inner_html.should == attachment.display_filename
+ end
+
+ it 'contains the wrapper div in the body tag' do
+ adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('body div').first.attributes['id'].value.should == 'wrap'
+ end
+
+ it 'should contain text about the conversion failure' do
+ adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(adapter.to_html) do |config|
+ config.strict
+ end
+
+ expected = "<p>Sorry, we were unable to convert this file to HTML. " \
+ "Please use the download link at the top right.</p>"
+
+ parsed.css('div#wrap div#view-html-content').inner_html.should == expected
+ end
+
+ end
+
+ describe :success? do
+
+ it 'is always true' do
+ adapter.success?.should be_true
+ end
+
+ end
+
+end
diff --git a/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb b/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb
new file mode 100644
index 000000000..166ca3241
--- /dev/null
+++ b/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb
@@ -0,0 +1,81 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper')
+
+describe AttachmentToHTML::Adapters::GoogleDocsViewer do
+
+ let(:attachment) { FactoryGirl.build(:pdf_attachment) }
+ let(:google_adapter) do
+ AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :attachment_url => 'http://example.com/test.pdf')
+ end
+
+ describe :wrapper do
+
+ it 'defaults to wrapper_google_embed' do
+ google_adapter.wrapper.should == 'wrapper_google_embed'
+ end
+
+ it 'accepts a wrapper option' do
+ google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :wrapper => 'wrap')
+ google_adapter.wrapper.should == 'wrap'
+ end
+
+ end
+
+ describe :to_html do
+
+ it 'should be a valid html document' do
+ parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.errors.any?.should be_false
+ end
+
+ it 'contains the attachment filename in the title tag' do
+ parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('title').inner_html.should == attachment.display_filename
+ end
+
+ it 'contains the wrapper div in the body tag' do
+ google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('body div').first.attributes['id'].value.should == 'wrap'
+ end
+
+ it 'contains the google docs viewer url in the wrapper div' do
+ options = { :wrapper => 'wrap', :attachment_url => 'http://example.com/test.pdf' }
+ google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, options)
+ parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config|
+ config.strict
+ end
+ expected = %Q(<iframe src="http://docs.google.com/viewer?url=http://example.com/test.pdf&amp;embedded=true" width="100%" height="100%" style="border: none;"></iframe>)
+ parsed.css('div#wrap div#view-html-content').inner_html.should include(expected)
+ end
+
+ describe 'uses the confugured alaveteli protocol' do
+
+ it 'https if force_ssl is on' do
+ AlaveteliConfiguration.stub(:force_ssl).and_return(true)
+ google_adapter.to_html.should include('https://docs.google.com')
+ end
+
+ it 'http if force_ssl is off' do
+ AlaveteliConfiguration.stub(:force_ssl).and_return(false)
+ google_adapter.to_html.should include('http://docs.google.com')
+ end
+
+ end
+
+ end
+
+ describe :success? do
+
+ it 'is always true' do
+ google_adapter.success?.should be_true
+ end
+
+ end
+
+end
diff --git a/spec/lib/attachment_to_html/adapters/pdf_spec.rb b/spec/lib/attachment_to_html/adapters/pdf_spec.rb
new file mode 100644
index 000000000..65c376043
--- /dev/null
+++ b/spec/lib/attachment_to_html/adapters/pdf_spec.rb
@@ -0,0 +1,98 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper')
+
+describe AttachmentToHTML::Adapters::PDF do
+
+ let(:attachment) { FactoryGirl.build(:pdf_attachment) }
+ let(:pdf_adapter) { AttachmentToHTML::Adapters::PDF.new(attachment) }
+
+ describe :wrapper do
+
+ it 'defaults to wrapper' do
+ pdf_adapter.wrapper.should == 'wrapper'
+ end
+
+ it 'accepts a wrapper option' do
+ pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap')
+ pdf_adapter.wrapper.should == 'wrap'
+ end
+
+ end
+
+ describe :tmpdir do
+
+ it 'defaults to the rails tmp directory' do
+ pdf_adapter.tmpdir.should == Rails.root.join('tmp')
+ end
+
+ it 'allows a tmpdir to be specified to store the converted document' do
+ pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp')
+ pdf_adapter.tmpdir.should == '/tmp'
+ end
+
+ end
+
+ describe :to_html do
+
+ it 'should be a valid html document' do
+ parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.errors.any?.should be_false
+ end
+
+ it 'contains the attachment filename in the title tag' do
+ parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('title').inner_html.should == attachment.display_filename
+ end
+
+ it 'contains the wrapper div in the body tag' do
+ pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('body div').first.attributes['id'].value.should == 'wrap'
+ end
+
+ it 'contains the attachment body in the wrapper div' do
+ pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('div#wrap div#view-html-content').inner_html.should include('thisisthebody')
+ end
+
+ it 'operates in the context of the supplied tmpdir' do
+ pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp')
+ Dir.should_receive(:chdir).with('/tmp').and_call_original
+ pdf_adapter.to_html
+ end
+
+ end
+
+ describe :success? do
+
+ it 'is successful if the body has content excluding the tags' do
+ pdf_adapter.to_html
+ pdf_adapter.success?.should be_true
+ end
+
+ it 'is successful if the body contains images' do
+ mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>)
+ pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment)
+ pdf_adapter.stub(:to_html).and_return(mocked_return)
+ pdf_adapter.success?.should be_true
+ end
+
+ it 'is not successful if the body has no content other than tags' do
+ # TODO: Add and use spec/fixtures/files/empty.pdf
+ attachment = FactoryGirl.build(:body_text, :body => '')
+ pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment)
+ pdf_adapter.to_html
+ pdf_adapter.success?.should be_false
+ end
+
+ end
+
+end
diff --git a/spec/lib/attachment_to_html/adapters/rtf_spec.rb b/spec/lib/attachment_to_html/adapters/rtf_spec.rb
new file mode 100644
index 000000000..f84073c51
--- /dev/null
+++ b/spec/lib/attachment_to_html/adapters/rtf_spec.rb
@@ -0,0 +1,98 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper')
+
+describe AttachmentToHTML::Adapters::RTF do
+
+ let(:attachment) { FactoryGirl.build(:rtf_attachment) }
+ let(:rtf_adapter) { AttachmentToHTML::Adapters::RTF.new(attachment) }
+
+ describe :wrapper do
+
+ it 'defaults to wrapper' do
+ rtf_adapter.wrapper.should == 'wrapper'
+ end
+
+ it 'accepts a wrapper option' do
+ rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap')
+ rtf_adapter.wrapper.should == 'wrap'
+ end
+
+ end
+
+ describe :tmpdir do
+
+ it 'defaults to the rails tmp directory' do
+ rtf_adapter.tmpdir.should == Rails.root.join('tmp')
+ end
+
+ it 'allows a tmpdir to be specified to store the converted document' do
+ rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :tmpdir => '/tmp')
+ rtf_adapter.tmpdir.should == '/tmp'
+ end
+
+ end
+
+ describe :to_html do
+
+ it 'should be a valid html document' do
+ parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.errors.any?.should be_false
+ end
+
+ it 'contains the attachment filename in the title tag' do
+ parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('head title').inner_html.should == attachment.display_filename
+ end
+
+ it 'contains the wrapper div in the body tag' do
+ rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('body div').first.attributes['id'].value.should == 'wrap'
+ end
+
+ it 'contains the attachment body in the wrapper div' do
+ rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('div#wrap div#view-html-content').inner_text.should include('thisisthebody')
+ end
+
+ it 'operates in the context of the supplied tmpdir' do
+ rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :tmpdir => '/tmp')
+ Dir.should_receive(:chdir).with('/tmp').and_call_original
+ rtf_adapter.to_html
+ end
+
+ end
+
+ describe :success? do
+
+ it 'is successful if the body has content excluding the tags' do
+ rtf_adapter.to_html
+ rtf_adapter.success?.should be_true
+ end
+
+ it 'is successful if the body contains images' do
+ mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>)
+ rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment)
+ rtf_adapter.stub(:to_html).and_return(mocked_return)
+ rtf_adapter.success?.should be_true
+ end
+
+ it 'is not successful if the body has no content other than tags' do
+ empty_rtf = load_file_fixture('empty.rtf')
+ attachment = FactoryGirl.build(:rtf_attachment, :body => empty_rtf)
+ rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment)
+ rtf_adapter.to_html
+ rtf_adapter.success?.should be_false
+ end
+
+ end
+
+end
diff --git a/spec/lib/attachment_to_html/adapters/text_spec.rb b/spec/lib/attachment_to_html/adapters/text_spec.rb
new file mode 100644
index 000000000..599670603
--- /dev/null
+++ b/spec/lib/attachment_to_html/adapters/text_spec.rb
@@ -0,0 +1,121 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper')
+
+describe AttachmentToHTML::Adapters::Text do
+
+ let(:attachment) { FactoryGirl.build(:body_text) }
+ let(:text_adapter) { AttachmentToHTML::Adapters::Text.new(attachment) }
+
+ describe :wrapper do
+
+ it 'defaults to wrapper' do
+ text_adapter.wrapper.should == 'wrapper'
+ end
+
+ it 'accepts a wrapper option' do
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap')
+ text_adapter.wrapper.should == 'wrap'
+ end
+
+ end
+
+ describe :to_html do
+
+ it 'should be a valid html document' do
+ parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.errors.any?.should be_false
+ end
+
+ it 'contains the attachment filename in the title tag' do
+ parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('title').inner_html.should == attachment.display_filename
+ end
+
+ it 'contains the wrapper div in the body tag' do
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('body').children.first.attributes['id'].value.should == 'wrap'
+ end
+
+ it 'contains the attachment body in the wrapper div' do
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap')
+ parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('div#wrap div#view-html-content').inner_html.should == attachment.body
+ end
+
+ it 'strips the body of trailing whitespace' do
+ attachment = FactoryGirl.build(:body_text, :body => ' Hello ')
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment)
+ parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('div#wrapper div#view-html-content').inner_html.should == 'Hello'
+ end
+
+ # NOTE: Can't parse this spec with Nokogiri at the moment because even
+ # in strict mode Nokogiri tampers with the HTML returned:
+ # Failure/Error: parsed.css('div#wrapper div#view-html-content').
+ # inner_html.should == expected
+ # expected: "Usage: foo &quot;bar&quot; &gt;baz&lt;"
+ # got: "Usage: foo \"bar\" &gt;baz&lt;" (using ==)
+ it 'escapes special characters' do
+ attachment = FactoryGirl.build(:body_text, :body => 'Usage: foo "bar" >baz<')
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment)
+ expected = %Q(Usage: foo &quot;bar&quot; &gt;baz&lt;)
+ text_adapter.to_html.should include(expected)
+ end
+
+ it 'creates hyperlinks for text that looks like a url' do
+ attachment = FactoryGirl.build(:body_text, :body => 'http://www.whatdotheyknow.com')
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment)
+ parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config|
+ config.strict
+ end
+ parsed.css('div#wrapper div#view-html-content a').first.text.should == 'http://www.whatdotheyknow.com'
+ parsed.css('div#wrapper div#view-html-content a').first['href'].should == 'http://www.whatdotheyknow.com'
+ end
+
+ it 'substitutes newlines for br tags' do
+ attachment = FactoryGirl.build(:body_text, :body => "A\nNewline")
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment)
+ parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config|
+ config.strict
+ end
+ expected = %Q(A<br>Newline)
+ parsed.css('div#wrapper div#view-html-content').inner_html.should == expected
+ end
+
+ end
+
+ describe :success? do
+
+ it 'is successful if the body has content excluding the tags' do
+ text_adapter.to_html
+ text_adapter.success?.should be_true
+ end
+
+ it 'is successful if the body contains images' do
+ mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>)
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment)
+ text_adapter.stub(:to_html).and_return(mocked_return)
+ text_adapter.success?.should be_true
+ end
+
+ it 'is not successful if the body has no content other than tags' do
+ empty_txt = load_file_fixture('empty.txt')
+ attachment = FactoryGirl.build(:body_text, :body => empty_txt)
+ text_adapter = AttachmentToHTML::Adapters::Text.new(attachment)
+ text_adapter.to_html
+ text_adapter.success?.should be_false
+ end
+
+ end
+
+end
diff --git a/spec/lib/attachment_to_html/attachment_to_html_spec.rb b/spec/lib/attachment_to_html/attachment_to_html_spec.rb
new file mode 100644
index 000000000..f7df06f87
--- /dev/null
+++ b/spec/lib/attachment_to_html/attachment_to_html_spec.rb
@@ -0,0 +1,48 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
+
+describe AttachmentToHTML do
+ include AttachmentToHTML
+
+ let(:attachment) { FactoryGirl.build(:body_text) }
+
+ describe :to_html do
+
+ it 'sends the attachment to the correct adapter for conversion' do
+ AttachmentToHTML::Adapters::Text.should_receive(:new).with(attachment, {}).and_call_original
+ to_html(attachment)
+ end
+
+ it 'returns the results in a HTML class' do
+ expected = AttachmentToHTML::Adapters::Text.new(attachment).to_html
+ to_html(attachment).should be_instance_of(AttachmentToHTML::HTML)
+ end
+
+ it 'accepts a hash of options to pass to the adapter' do
+ options = { :wrapper => 'wrap' }
+ AttachmentToHTML::Adapters::Text.should_receive(:new).with(attachment, options).and_call_original
+ to_html(attachment, options)
+ end
+
+ it 'converts an attachment that has an adapter, fails to convert, but has a google viewer' do
+ attachment = FactoryGirl.build(:pdf_attachment)
+ AttachmentToHTML::HTML.any_instance.stub(:success?).and_return(false)
+ AttachmentToHTML::Adapters::PDF.should_receive(:new).with(attachment, {}).and_call_original
+ AttachmentToHTML::Adapters::GoogleDocsViewer.should_receive(:new).with(attachment, {})
+ to_html(attachment)
+ end
+
+ it 'converts an attachment that doesnt have an adapter, but has a google viewer' do
+ attachment = FactoryGirl.build(:body_text, :content_type => 'application/vnd.ms-word')
+ AttachmentToHTML::Adapters::GoogleDocsViewer.should_receive(:new).with(attachment, {}).and_call_original
+ to_html(attachment)
+ end
+
+ it 'converts an attachment that has no adapter or google viewer' do
+ attachment = FactoryGirl.build(:body_text, :content_type => 'application/json')
+ AttachmentToHTML::Adapters::CouldNotConvert.should_receive(:new).with(attachment, {}).and_call_original
+ to_html(attachment)
+ end
+
+ end
+
+end
diff --git a/spec/lib/attachment_to_html/html_spec.rb b/spec/lib/attachment_to_html/html_spec.rb
new file mode 100644
index 000000000..65b63d383
--- /dev/null
+++ b/spec/lib/attachment_to_html/html_spec.rb
@@ -0,0 +1,24 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
+
+describe AttachmentToHTML::HTML do
+
+ let(:adapter) { OpenStruct.new(:to_html => '<p>hello</p>', :success? => true) }
+ let(:html) { AttachmentToHTML::HTML.new(adapter) }
+
+ describe :to_s do
+
+ it 'returns the raw html' do
+ html.to_s.should == '<p>hello</p>'
+ end
+
+ end
+
+ describe :success? do
+
+ it 'returns whether the conversion succeeded' do
+ html.success?.should be_true
+ end
+
+ end
+
+end