diff options
author | Gareth Rees <gareth@mysociety.org> | 2014-03-17 11:15:40 +0000 |
---|---|---|
committer | Gareth Rees <gareth@mysociety.org> | 2014-03-28 09:39:04 +0000 |
commit | d57ca2a22579df4c634d554989c0ee9e4ebb5165 (patch) | |
tree | e1d11c626cedf57373be95b6b1ec6ce4dc22ea30 /spec/lib/attachment_to_html | |
parent | 0adf9399cbef42054809479c8f1b64dad7bbf8ca (diff) |
Add AttachmentToHTML library
Extracts the attachment processing from FoiAttachment#body_to_html
AttachmentToHTML contains adapters which convert
- text/plain
- application/pdf
- application/rtf
Results are returned as an AttachmentHTML::HTML instance which contains
the raw HTML and other metadata about the conversion.
Diffstat (limited to 'spec/lib/attachment_to_html')
-rw-r--r-- | spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb | 67 | ||||
-rw-r--r-- | spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb | 81 | ||||
-rw-r--r-- | spec/lib/attachment_to_html/adapters/pdf_spec.rb | 98 | ||||
-rw-r--r-- | spec/lib/attachment_to_html/adapters/rtf_spec.rb | 98 | ||||
-rw-r--r-- | spec/lib/attachment_to_html/adapters/text_spec.rb | 121 | ||||
-rw-r--r-- | spec/lib/attachment_to_html/attachment_to_html_spec.rb | 48 | ||||
-rw-r--r-- | spec/lib/attachment_to_html/html_spec.rb | 24 |
7 files changed, 537 insertions, 0 deletions
diff --git a/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb b/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb new file mode 100644 index 000000000..aee68e986 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/could_not_convert_spec.rb @@ -0,0 +1,67 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::CouldNotConvert do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:adapter) { AttachmentToHTML::Adapters::CouldNotConvert.new(attachment) } + + describe :wrapper do + + it 'defaults to wrapper' do + adapter.wrapper.should == 'wrapper' + end + + it 'accepts a wrapper option' do + adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap') + adapter.wrapper.should == 'wrap' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(adapter.to_html) do |config| + config.strict + end + parsed.css('title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(adapter.to_html) do |config| + config.strict + end + parsed.css('body div').first.attributes['id'].value.should == 'wrap' + end + + it 'should contain text about the conversion failure' do + adapter = AttachmentToHTML::Adapters::CouldNotConvert.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(adapter.to_html) do |config| + config.strict + end + + expected = "<p>Sorry, we were unable to convert this file to HTML. " \ + "Please use the download link at the top right.</p>" + + parsed.css('div#wrap div#view-html-content').inner_html.should == expected + end + + end + + describe :success? do + + it 'is always true' do + adapter.success?.should be_true + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb b/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb new file mode 100644 index 000000000..166ca3241 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/google_docs_viewer_spec.rb @@ -0,0 +1,81 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::GoogleDocsViewer do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:google_adapter) do + AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :attachment_url => 'http://example.com/test.pdf') + end + + describe :wrapper do + + it 'defaults to wrapper_google_embed' do + google_adapter.wrapper.should == 'wrapper_google_embed' + end + + it 'accepts a wrapper option' do + google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :wrapper => 'wrap') + google_adapter.wrapper.should == 'wrap' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config| + config.strict + end + parsed.css('title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config| + config.strict + end + parsed.css('body div').first.attributes['id'].value.should == 'wrap' + end + + it 'contains the google docs viewer url in the wrapper div' do + options = { :wrapper => 'wrap', :attachment_url => 'http://example.com/test.pdf' } + google_adapter = AttachmentToHTML::Adapters::GoogleDocsViewer.new(attachment, options) + parsed = Nokogiri::HTML.parse(google_adapter.to_html) do |config| + config.strict + end + expected = %Q(<iframe src="http://docs.google.com/viewer?url=http://example.com/test.pdf&embedded=true" width="100%" height="100%" style="border: none;"></iframe>) + parsed.css('div#wrap div#view-html-content').inner_html.should include(expected) + end + + describe 'uses the confugured alaveteli protocol' do + + it 'https if force_ssl is on' do + AlaveteliConfiguration.stub(:force_ssl).and_return(true) + google_adapter.to_html.should include('https://docs.google.com') + end + + it 'http if force_ssl is off' do + AlaveteliConfiguration.stub(:force_ssl).and_return(false) + google_adapter.to_html.should include('http://docs.google.com') + end + + end + + end + + describe :success? do + + it 'is always true' do + google_adapter.success?.should be_true + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/pdf_spec.rb b/spec/lib/attachment_to_html/adapters/pdf_spec.rb new file mode 100644 index 000000000..65c376043 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/pdf_spec.rb @@ -0,0 +1,98 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::PDF do + + let(:attachment) { FactoryGirl.build(:pdf_attachment) } + let(:pdf_adapter) { AttachmentToHTML::Adapters::PDF.new(attachment) } + + describe :wrapper do + + it 'defaults to wrapper' do + pdf_adapter.wrapper.should == 'wrapper' + end + + it 'accepts a wrapper option' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap') + pdf_adapter.wrapper.should == 'wrap' + end + + end + + describe :tmpdir do + + it 'defaults to the rails tmp directory' do + pdf_adapter.tmpdir.should == Rails.root.join('tmp') + end + + it 'allows a tmpdir to be specified to store the converted document' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp') + pdf_adapter.tmpdir.should == '/tmp' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config| + config.strict + end + parsed.css('title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config| + config.strict + end + parsed.css('body div').first.attributes['id'].value.should == 'wrap' + end + + it 'contains the attachment body in the wrapper div' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(pdf_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrap div#view-html-content').inner_html.should include('thisisthebody') + end + + it 'operates in the context of the supplied tmpdir' do + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp') + Dir.should_receive(:chdir).with('/tmp').and_call_original + pdf_adapter.to_html + end + + end + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + pdf_adapter.to_html + pdf_adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>) + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment) + pdf_adapter.stub(:to_html).and_return(mocked_return) + pdf_adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + # TODO: Add and use spec/fixtures/files/empty.pdf + attachment = FactoryGirl.build(:body_text, :body => '') + pdf_adapter = AttachmentToHTML::Adapters::PDF.new(attachment) + pdf_adapter.to_html + pdf_adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/rtf_spec.rb b/spec/lib/attachment_to_html/adapters/rtf_spec.rb new file mode 100644 index 000000000..f84073c51 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/rtf_spec.rb @@ -0,0 +1,98 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::RTF do + + let(:attachment) { FactoryGirl.build(:rtf_attachment) } + let(:rtf_adapter) { AttachmentToHTML::Adapters::RTF.new(attachment) } + + describe :wrapper do + + it 'defaults to wrapper' do + rtf_adapter.wrapper.should == 'wrapper' + end + + it 'accepts a wrapper option' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap') + rtf_adapter.wrapper.should == 'wrap' + end + + end + + describe :tmpdir do + + it 'defaults to the rails tmp directory' do + rtf_adapter.tmpdir.should == Rails.root.join('tmp') + end + + it 'allows a tmpdir to be specified to store the converted document' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :tmpdir => '/tmp') + rtf_adapter.tmpdir.should == '/tmp' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.css('head title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.css('body div').first.attributes['id'].value.should == 'wrap' + end + + it 'contains the attachment body in the wrapper div' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrap div#view-html-content').inner_text.should include('thisisthebody') + end + + it 'operates in the context of the supplied tmpdir' do + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment, :tmpdir => '/tmp') + Dir.should_receive(:chdir).with('/tmp').and_call_original + rtf_adapter.to_html + end + + end + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + rtf_adapter.to_html + rtf_adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>) + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment) + rtf_adapter.stub(:to_html).and_return(mocked_return) + rtf_adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + empty_rtf = load_file_fixture('empty.rtf') + attachment = FactoryGirl.build(:rtf_attachment, :body => empty_rtf) + rtf_adapter = AttachmentToHTML::Adapters::RTF.new(attachment) + rtf_adapter.to_html + rtf_adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/adapters/text_spec.rb b/spec/lib/attachment_to_html/adapters/text_spec.rb new file mode 100644 index 000000000..599670603 --- /dev/null +++ b/spec/lib/attachment_to_html/adapters/text_spec.rb @@ -0,0 +1,121 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper') + +describe AttachmentToHTML::Adapters::Text do + + let(:attachment) { FactoryGirl.build(:body_text) } + let(:text_adapter) { AttachmentToHTML::Adapters::Text.new(attachment) } + + describe :wrapper do + + it 'defaults to wrapper' do + text_adapter.wrapper.should == 'wrapper' + end + + it 'accepts a wrapper option' do + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap') + text_adapter.wrapper.should == 'wrap' + end + + end + + describe :to_html do + + it 'should be a valid html document' do + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.errors.any?.should be_false + end + + it 'contains the attachment filename in the title tag' do + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('title').inner_html.should == attachment.display_filename + end + + it 'contains the wrapper div in the body tag' do + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('body').children.first.attributes['id'].value.should == 'wrap' + end + + it 'contains the attachment body in the wrapper div' do + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment, :wrapper => 'wrap') + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrap div#view-html-content').inner_html.should == attachment.body + end + + it 'strips the body of trailing whitespace' do + attachment = FactoryGirl.build(:body_text, :body => ' Hello ') + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrapper div#view-html-content').inner_html.should == 'Hello' + end + + # NOTE: Can't parse this spec with Nokogiri at the moment because even + # in strict mode Nokogiri tampers with the HTML returned: + # Failure/Error: parsed.css('div#wrapper div#view-html-content'). + # inner_html.should == expected + # expected: "Usage: foo "bar" >baz<" + # got: "Usage: foo \"bar\" >baz<" (using ==) + it 'escapes special characters' do + attachment = FactoryGirl.build(:body_text, :body => 'Usage: foo "bar" >baz<') + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + expected = %Q(Usage: foo "bar" >baz<) + text_adapter.to_html.should include(expected) + end + + it 'creates hyperlinks for text that looks like a url' do + attachment = FactoryGirl.build(:body_text, :body => 'http://www.whatdotheyknow.com') + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + parsed.css('div#wrapper div#view-html-content a').first.text.should == 'http://www.whatdotheyknow.com' + parsed.css('div#wrapper div#view-html-content a').first['href'].should == 'http://www.whatdotheyknow.com' + end + + it 'substitutes newlines for br tags' do + attachment = FactoryGirl.build(:body_text, :body => "A\nNewline") + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + parsed = Nokogiri::HTML.parse(text_adapter.to_html) do |config| + config.strict + end + expected = %Q(A<br>Newline) + parsed.css('div#wrapper div#view-html-content').inner_html.should == expected + end + + end + + describe :success? do + + it 'is successful if the body has content excluding the tags' do + text_adapter.to_html + text_adapter.success?.should be_true + end + + it 'is successful if the body contains images' do + mocked_return = %Q(<!DOCTYPE html><html><head></head><body><img src="logo.png" /></body></html>) + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + text_adapter.stub(:to_html).and_return(mocked_return) + text_adapter.success?.should be_true + end + + it 'is not successful if the body has no content other than tags' do + empty_txt = load_file_fixture('empty.txt') + attachment = FactoryGirl.build(:body_text, :body => empty_txt) + text_adapter = AttachmentToHTML::Adapters::Text.new(attachment) + text_adapter.to_html + text_adapter.success?.should be_false + end + + end + +end diff --git a/spec/lib/attachment_to_html/attachment_to_html_spec.rb b/spec/lib/attachment_to_html/attachment_to_html_spec.rb new file mode 100644 index 000000000..f7df06f87 --- /dev/null +++ b/spec/lib/attachment_to_html/attachment_to_html_spec.rb @@ -0,0 +1,48 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper') + +describe AttachmentToHTML do + include AttachmentToHTML + + let(:attachment) { FactoryGirl.build(:body_text) } + + describe :to_html do + + it 'sends the attachment to the correct adapter for conversion' do + AttachmentToHTML::Adapters::Text.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + it 'returns the results in a HTML class' do + expected = AttachmentToHTML::Adapters::Text.new(attachment).to_html + to_html(attachment).should be_instance_of(AttachmentToHTML::HTML) + end + + it 'accepts a hash of options to pass to the adapter' do + options = { :wrapper => 'wrap' } + AttachmentToHTML::Adapters::Text.should_receive(:new).with(attachment, options).and_call_original + to_html(attachment, options) + end + + it 'converts an attachment that has an adapter, fails to convert, but has a google viewer' do + attachment = FactoryGirl.build(:pdf_attachment) + AttachmentToHTML::HTML.any_instance.stub(:success?).and_return(false) + AttachmentToHTML::Adapters::PDF.should_receive(:new).with(attachment, {}).and_call_original + AttachmentToHTML::Adapters::GoogleDocsViewer.should_receive(:new).with(attachment, {}) + to_html(attachment) + end + + it 'converts an attachment that doesnt have an adapter, but has a google viewer' do + attachment = FactoryGirl.build(:body_text, :content_type => 'application/vnd.ms-word') + AttachmentToHTML::Adapters::GoogleDocsViewer.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + it 'converts an attachment that has no adapter or google viewer' do + attachment = FactoryGirl.build(:body_text, :content_type => 'application/json') + AttachmentToHTML::Adapters::CouldNotConvert.should_receive(:new).with(attachment, {}).and_call_original + to_html(attachment) + end + + end + +end diff --git a/spec/lib/attachment_to_html/html_spec.rb b/spec/lib/attachment_to_html/html_spec.rb new file mode 100644 index 000000000..65b63d383 --- /dev/null +++ b/spec/lib/attachment_to_html/html_spec.rb @@ -0,0 +1,24 @@ +require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper') + +describe AttachmentToHTML::HTML do + + let(:adapter) { OpenStruct.new(:to_html => '<p>hello</p>', :success? => true) } + let(:html) { AttachmentToHTML::HTML.new(adapter) } + + describe :to_s do + + it 'returns the raw html' do + html.to_s.should == '<p>hello</p>' + end + + end + + describe :success? do + + it 'returns whether the conversion succeeded' do + html.success?.should be_true + end + + end + +end |