diff options
-rw-r--r-- | lib/attachment_to_html/adapters/rtf.rb | 13 | ||||
-rw-r--r-- | spec/lib/attachment_to_html/adapters/rtf_spec.rb | 25 |
2 files changed, 37 insertions, 1 deletions
diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb index 24987a975..871ca2c60 100644 --- a/lib/attachment_to_html/adapters/rtf.rb +++ b/lib/attachment_to_html/adapters/rtf.rb @@ -96,8 +96,19 @@ module AttachmentToHTML cleanup_tempfile(tempfile) - html + sanitize_converted(html) end + + end + + # Works around http://savannah.gnu.org/bugs/?42015 in unrtf ~> 0.21 + def sanitize_converted(html) + invalid = %Q(<!DOCTYPE html PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN>) + valid = %Q(<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN>") + if html.include?(invalid) + html.sub!(invalid, valid) + end + html end def create_tempfile(text) diff --git a/spec/lib/attachment_to_html/adapters/rtf_spec.rb b/spec/lib/attachment_to_html/adapters/rtf_spec.rb index f84073c51..75fd467f6 100644 --- a/spec/lib/attachment_to_html/adapters/rtf_spec.rb +++ b/spec/lib/attachment_to_html/adapters/rtf_spec.rb @@ -69,6 +69,31 @@ describe AttachmentToHTML::Adapters::RTF do rtf_adapter.to_html end + it 'does not result in incorrect conversion when unrtf returns an invalid doctype' do + # Doctype public identifier is unquoted + # Valid doctype would be: + # <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> + # See bug report http://savannah.gnu.org/bugs/?42015 + invalid = <<-DOC + <!DOCTYPE html PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN> + <html> + <head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <!-- Translation from RTF performed by UnRTF, version 0.21.5 --> + <!--font table contains 0 fonts total--> + <!--invalid font number 0--> + </head> + <body><font size="3"><font color="#000000">thisisthebody</font></font></body> + </html> + DOC + AlaveteliExternalCommand.stub(:run).and_return(invalid) + + parsed = Nokogiri::HTML.parse(rtf_adapter.to_html) do |config| + config.strict + end + parsed.css('body').inner_text.should_not include('//W3C//DTD HTML 4.01 Transitional//EN') + end + end describe :success? do |