spec/lib/attachment_to_html/adapters/pdf_spec.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper')

describe AttachmentToHTML::Adapters::PDF do

    let(:attachment) { FactoryGirl.build(:pdf_attachment) }
    let(:adapter) { AttachmentToHTML::Adapters::PDF.new(attachment) }

    describe :tmpdir do

        it 'defaults to the rails tmp directory' do
           adapter.tmpdir.should == Rails.root.join('tmp')
        end

        it 'allows a tmpdir to be specified to store the converted document' do
            adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp')
            adapter.tmpdir.should == '/tmp'
        end

    end

    describe :title do

        it 'uses the attachment filename for the title' do
            adapter.title.should == attachment.display_filename
        end

        it 'returns the title encoded as UTF-8' do
            if RUBY_VERSION.to_f >= 1.9
                adapter.title.encoding.should == Encoding.find('UTF-8')
            end
        end


    end

    describe :body do

        it 'extracts the body from the document' do
            adapter.body.should include('thisisthebody')
        end

        it 'operates in the context of the supplied tmpdir' do
            adapter = AttachmentToHTML::Adapters::PDF.new(attachment, :tmpdir => '/tmp')
            Dir.should_receive(:chdir).with('/tmp').and_call_original
            adapter.body
        end

        it 'returns the body encoded as UTF-8' do
            if RUBY_VERSION.to_f >= 1.9
                adapter.body.encoding.should == Encoding.find('UTF-8')
            end
        end

    end


    describe :success? do

        it 'is successful if the body has content excluding the tags' do
            adapter.stub(:body).and_return('<p>some content</p>')
            adapter.success?.should be_true
        end

        it 'is successful if the body contains images' do
            adapter.stub(:body).and_return(%Q(<img src="logo.png" />))
            adapter.success?.should be_true
        end

        it 'is not successful if the body has no content other than tags' do
            adapter.stub(:body).and_return('<p></p>')
            adapter.success?.should be_false
        end

        it 'is not successful if convert returns nil' do
            adapter.stub(:convert).and_return(nil)
            adapter.success?.should be_false
        end

        it 'is not successful if the body contains more than 50 images' do
            # Sometimes pdftohtml extracts images incorrectly, resulting
            # in thousands of PNGs being created for one image. This creates
            # a huge request spike when the converted attachment is requested.
            #
            # See bug report https://bugs.freedesktop.org/show_bug.cgi?id=77932

            # Construct mocked HTML output with 51 images
            invalid = <<-DOC
            <!DOCTYPE html>
            <HTML xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
            <HEAD>
            <TITLE>Microsoft Word - FOI 12-01605 Resp 1.doc</TITLE>
            <META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
            <META name="generator" content="pdftohtml 0.36"/>
            <META name="author" content="8065"/>
            <META name="date" content="2012-09-24T15:37:06+00:00"/>
            </HEAD>
            <BODY bgcolor="#A0A0A0" vlink="blue" link="blue">
            <A name=1></a><IMG src="FOI 12 01605 Resp 1 PDF-1_1.png"/><br/>
            <IMG src="FOI 12 01605 Resp 1 PDF-1_2.png"/><br/>
            DOC

            (3..51).each { |i| invalid += %Q(<IMG src="FOI 12 01605 Resp 1 PDF-1_#{i}.png"/><br/>) }

            invalid += <<-DOC
            &#160;<br/>
            Some Content<br/>
            <hr>
            </BODY>
            </HTML>
            DOC
            AlaveteliExternalCommand.stub(:run).and_return(invalid)

            adapter.success?.should be_false
        end

    end

end