# coding: utf-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
describe IncomingMessage, " when dealing with incoming mail" do
before(:each) do
@im = incoming_messages(:useless_incoming_message)
load_raw_emails_data
end
after(:all) do
ActionMailer::Base.deliveries.clear
end
it 'should correctly parse multipart mails with a linebreak in the boundary marker' do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('space-boundary.email', ir.incoming_email)
message = ir.incoming_messages[1]
message.mail.parts.size.should == 2
message.mail.multipart?.should == true
end
it "should return the mail Date header date for sent at" do
@im.parse_raw_email!(true)
@im.reload
@im.sent_at.should == @im.mail.date
end
it "should correctly fold various types of footer" do
Dir.glob(File.join(Spec::Runner.configuration.fixture_path, "files", "email-folding-example-*.txt")).each do |file|
message = File.read(file)
parsed = IncomingMessage.remove_quoted_sections(message)
expected = File.read("#{file}.expected")
parsed.should include(expected)
end
end
it "should ensure cached body text has been parsed correctly" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('quoted-subject-iso8859-1.email', ir.incoming_email)
message = ir.incoming_messages[1]
message.get_main_body_text_unfolded.should_not include("Email has no body")
end
it "should correctly convert HTML even when there's a meta tag asserting that it is iso-8859-1 which would normally confuse elinks" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('quoted-subject-iso8859-1.email', ir.incoming_email)
message = ir.incoming_messages[1]
message.parse_raw_email!
message.get_main_body_text_part.charset.should == "iso-8859-1"
message.get_main_body_text_internal.should include("política")
end
it "should unquote RFC 2047 headers" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('quoted-subject-iso8859-1.email', ir.incoming_email)
message = ir.incoming_messages[1]
message.mail_from.should == "Coordenação de Relacionamento, Pesquisa e Informação/CEDI"
message.subject.should == "Câmara Responde: Banco de ideias"
end
it 'should not error on display of a message which has no charset set on the body part and
is not good utf-8' do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('no-part-charset-bad-utf8.email', ir.incoming_email)
message = ir.incoming_messages[1]
message.parse_raw_email!
message.get_main_body_text_internal.should include("The above text was badly encoded")
end
it "should fold multiline sections" do
{
"foo\n--------\nconfidential" => "foo\nFOLDED_QUOTED_SECTION\n", # basic test
"foo\n--------\nbar - confidential" => "foo\nFOLDED_QUOTED_SECTION\n", # allow scorechar inside folded section
"foo\n--------\nbar\n--------\nconfidential" => "foo\n--------\nbar\nFOLDED_QUOTED_SECTION\n", # don't assume that anything after a score is a folded section
"foo\n--------\nbar\n--------\nconfidential\n--------\nrest" => "foo\n--------\nbar\nFOLDED_QUOTED_SECTION\nrest", # don't assume that a folded section continues to the end of the message
"foo\n--------\nbar\n- - - - - - - -\nconfidential\n--------\nrest" => "foo\n--------\nbar\nFOLDED_QUOTED_SECTION\nrest", # allow spaces in the score
}.each do |input,output|
IncomingMessage.remove_quoted_sections(input).should == output
end
end
it "should load an email with funny MIME settings" do
ActionMailer::Base.deliveries.clear
# just send it to the holding pen
InfoRequest.holding_pen_request.incoming_messages.size.should == 0
receive_incoming_mail("humberside-police-odd-mime-type.email", 'dummy')
InfoRequest.holding_pen_request.incoming_messages.size.should == 1
# clear the notification of new message in holding pen
deliveries = ActionMailer::Base.deliveries
deliveries.size.should == 1
deliveries.clear
incoming_message = InfoRequest.holding_pen_request.incoming_messages[0]
# This will raise an error if the bug in TMail hasn't been fixed
incoming_message.get_body_for_html_display()
end
end
describe IncomingMessage, "when parsing HTML mail" do
it "should display UTF-8 characters in the plain text version correctly" do
html = "foo është"
plain_text = IncomingMessage._get_attachment_text_internal_one_file('text/html', html)
plain_text.should match(/është/)
end
end
describe IncomingMessage, "when getting the attachment text" do
it "should not raise an error if the expansion of a zip file raises an error" do
mock_entry = mock('ZipFile entry', :file? => true)
mock_entry.stub!(:get_input_stream).and_raise("invalid distance too far back")
Zip::ZipFile.stub!(:open).and_return([mock_entry])
IncomingMessage._get_attachment_text_internal_one_file('application/zip', "some string")
end
end
describe IncomingMessage, " display attachments" do
it "should not show slashes in filenames" do
foi_attachment = FoiAttachment.new()
# http://www.whatdotheyknow.com/request/post_commercial_manager_librarie#incoming-17233
foi_attachment.filename = "FOI/09/066 RESPONSE TO FOI REQUEST RECEIVED 21st JANUARY 2009.txt"
expected_display_filename = foi_attachment.filename.gsub(/\//, " ")
foi_attachment.display_filename.should == expected_display_filename
end
it "should not show slashes in subject generated filenames" do
foi_attachment = FoiAttachment.new()
# http://www.whatdotheyknow.com/request/post_commercial_manager_librarie#incoming-17233
foi_attachment.within_rfc822_subject = "FOI/09/066 RESPONSE TO FOI REQUEST RECEIVED 21st JANUARY 2009"
foi_attachment.content_type = 'text/plain'
foi_attachment.ensure_filename!
expected_display_filename = foi_attachment.within_rfc822_subject.gsub(/\//, " ") + ".txt"
foi_attachment.display_filename.should == expected_display_filename
end
end
describe IncomingMessage, " folding quoted parts of emails" do
it 'should fold an example lotus notes quoted part converted from HTML correctly' do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('lotus-notes-quoting.email', ir.incoming_email)
message = ir.incoming_messages[1]
message.get_main_body_text_folded.should match(/FOLDED_QUOTED_SECTION/)
end
it 'should fold a plain text lotus notes quoted part correctly' do
text = "FOI Team\n\n\nInfo Requester =20\nSent by: Info Requester \n06/03/08 10:00\nPlease respond to\nInfo Requester "
@incoming_message = IncomingMessage.new()
@incoming_message.stub_chain(:info_request, :user_name).and_return("Info Requester")
@incoming_message.remove_lotus_quoting(text).should match(/FOLDED_QUOTED_SECTION/)
end
it "cope with [ in user names properly" do
@incoming_message = IncomingMessage.new()
@incoming_message.stub_chain(:info_request, :user_name).and_return("Sir [ Bobble")
# this gives a warning if [ is in the name
text = @incoming_message.remove_lotus_quoting("Sir [ Bobble \nSent by: \n")
text.should == "\n\nFOLDED_QUOTED_SECTION"
end
it 'should fold an example of another kind of forward quoting' do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('forward-quoting-example.email', ir.incoming_email)
message = ir.incoming_messages[1]
message.get_main_body_text_folded.should match(/FOLDED_QUOTED_SECTION/)
end
it 'should fold a further example of forward quoting' do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('forward-quoting-example-2.email', ir.incoming_email)
message = ir.incoming_messages[1]
body_text = message.get_main_body_text_folded
body_text.should match(/FOLDED_QUOTED_SECTION/)
# check that the quoted section incorporates both quoted messages
body_text.should_not match('Subject: RE: Freedom of Information request')
end
end
describe IncomingMessage, " checking validity to reply to" do
def test_email(result, email, empty_return_path, autosubmitted = nil)
@mail = mock('mail')
MailHandler.stub!(:get_from_address).and_return(email)
MailHandler.stub!(:empty_return_path?).with(@mail).and_return(empty_return_path)
MailHandler.stub!(:get_auto_submitted).with(@mail).and_return(autosubmitted)
@incoming_message = IncomingMessage.new()
@incoming_message.stub!(:mail).and_return(@mail)
@incoming_message._calculate_valid_to_reply_to.should == result
end
it "says a valid email is fine" do
test_email(true, "team@mysociety.org", false)
end
it "says postmaster email is bad" do
test_email(false, "postmaster@mysociety.org", false)
end
it "says Mailer-Daemon email is bad" do
test_email(false, "Mailer-Daemon@mysociety.org", false)
end
it "says case mangled MaIler-DaemOn email is bad" do
test_email(false, "MaIler-DaemOn@mysociety.org", false)
end
it "says Auto_Reply email is bad" do
test_email(false, "Auto_Reply@mysociety.org", false)
end
it "says DoNotReply email is bad" do
test_email(false, "DoNotReply@tube.tfl.gov.uk", false)
end
it "says a filled-out return-path is fine" do
test_email(true, "team@mysociety.org", false)
end
it "says an empty return-path is bad" do
test_email(false, "team@mysociety.org", true)
end
it "says an auto-submitted keyword is bad" do
test_email(false, "team@mysociety.org", false, "auto-replied")
end
end
describe IncomingMessage, " checking validity to reply to with real emails" do
after(:all) do
ActionMailer::Base.deliveries.clear
end
it "should allow a reply to plain emails" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-plain.email', ir.incoming_email)
ir.incoming_messages[1].valid_to_reply_to?.should == true
end
it "should not allow a reply to emails with empty return-paths" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('empty-return-path.email', ir.incoming_email)
ir.incoming_messages[1].valid_to_reply_to?.should == false
end
it "should not allow a reply to emails with autoresponse headers" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('autoresponse-header.email', ir.incoming_email)
ir.incoming_messages[1].valid_to_reply_to?.should == false
end
end
describe IncomingMessage, " when censoring data" do
before(:each) do
@test_data = "There was a mouse called Stilton, he wished that he was blue."
@im = incoming_messages(:useless_incoming_message)
@censor_rule_1 = CensorRule.new()
@censor_rule_1.text = "Stilton"
@censor_rule_1.replacement = "Jarlsberg"
@censor_rule_1.last_edit_editor = "unknown"
@censor_rule_1.last_edit_comment = "none"
@im.info_request.censor_rules << @censor_rule_1
@censor_rule_2 = CensorRule.new()
@censor_rule_2.text = "blue"
@censor_rule_2.replacement = "yellow"
@censor_rule_2.last_edit_editor = "unknown"
@censor_rule_2.last_edit_comment = "none"
@im.info_request.censor_rules << @censor_rule_2
@regex_censor_rule = CensorRule.new()
@regex_censor_rule.text = 'm[a-z][a-z][a-z]e'
@regex_censor_rule.regexp = true
@regex_censor_rule.replacement = 'cat'
@regex_censor_rule.last_edit_editor = 'unknown'
@regex_censor_rule.last_edit_comment = 'none'
@im.info_request.censor_rules << @regex_censor_rule
load_raw_emails_data
end
it "should do nothing to a JPEG" do
data = @test_data.dup
@im.binary_mask_stuff!(data, "image/jpeg")
data.should == @test_data
end
it "should replace censor text in Word documents" do
data = @test_data.dup
@im.binary_mask_stuff!(data, "application/vnd.ms-word")
data.should == "There was a xxxxx called xxxxxxx, he wished that he was xxxx."
end
it "should replace ASCII email addresses in Word documents" do
orig_data = "His email was foo@bar.com"
data = orig_data.dup
@im.binary_mask_stuff!(data, "application/vnd.ms-word")
data.should == "His email was xxx@xxx.xxx"
end
it "should replace UCS-2 addresses in Word documents" do
orig_data = "His email was f\000o\000o\000@\000b\000a\000r\000.\000c\000o\000m\000, indeed"
data = orig_data.dup
@im.binary_mask_stuff!(data, "application/vnd.ms-word")
data.should == "His email was x\000x\000x\000@\000x\000x\000x\000.\000x\000x\000x\000, indeed"
end
def pdf_replacement_test(use_ghostscript_compression)
config = MySociety::Config.load_default()
previous = config['USE_GHOSTSCRIPT_COMPRESSION']
config['USE_GHOSTSCRIPT_COMPRESSION'] = use_ghostscript_compression
orig_pdf = load_file_fixture('tfl.pdf')
pdf = orig_pdf.dup
orig_text = IncomingMessage._get_attachment_text_internal_one_file('application/pdf', pdf)
orig_text.should match(/foi@tfl.gov.uk/)
@im.binary_mask_stuff!(pdf, "application/pdf")
masked_text = IncomingMessage._get_attachment_text_internal_one_file('application/pdf', pdf)
masked_text.should_not match(/foi@tfl.gov.uk/)
masked_text.should match(/xxx@xxx.xxx.xx/)
config['USE_GHOSTSCRIPT_COMPRESSION'] = previous
end
it "should replace everything in PDF files using pdftk" do
pdf_replacement_test(false)
end
it "should replace everything in PDF files using ghostscript" do
pdf_replacement_test(true)
end
it "should not produce zero length output if pdftk silently fails" do
orig_pdf = load_file_fixture('psni.pdf')
pdf = orig_pdf.dup
@im.binary_mask_stuff!(pdf, "application/pdf")
pdf.should_not == ""
end
it "should apply censor rules to HTML files" do
data = @test_data.dup
@im.html_mask_stuff!(data)
data.should == "There was a cat called Jarlsberg, he wished that he was yellow."
end
it "should apply hard-coded privacy rules to HTML files" do
data = "http://#{Configuration::domain}/c/cheese"
@im.html_mask_stuff!(data)
data.should == "[WDTK login link]"
end
it "should apply censor rules to From: addresses" do
@im.stub!(:mail_from).and_return("Stilton Mouse")
@im.stub!(:last_parsed).and_return(Time.now)
safe_mail_from = @im.safe_mail_from
safe_mail_from.should == "Jarlsberg Mouse"
end
end
describe IncomingMessage, " when censoring whole users" do
before(:each) do
@test_data = "There was a mouse called Stilton, he wished that he was blue."
@im = incoming_messages(:useless_incoming_message)
@censor_rule_1 = CensorRule.new()
@censor_rule_1.text = "Stilton"
@censor_rule_1.replacement = "Gorgonzola"
@censor_rule_1.last_edit_editor = "unknown"
@censor_rule_1.last_edit_comment = "none"
@im.info_request.user.censor_rules << @censor_rule_1
load_raw_emails_data
end
it "should apply censor rules to HTML files" do
data = @test_data.dup
@im.html_mask_stuff!(data)
data.should == "There was a mouse called Gorgonzola, he wished that he was blue."
end
it "should replace censor text to Word documents" do
data = @test_data.dup
@im.binary_mask_stuff!(data, "application/vnd.ms-word")
data.should == "There was a mouse called xxxxxxx, he wished that he was blue."
end
end
describe IncomingMessage, " when uudecoding bad messages" do
it "should be able to do it at all" do
mail = get_fixture_mail('incoming-request-bad-uuencoding.email')
im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
im.extract_attachments!
attachments = im.foi_attachments
attachments.size.should == 2
attachments[1].filename.should == 'moo.txt'
im.get_attachments_for_display.size.should == 1
end
it "should apply censor rules" do
mail = get_fixture_mail('incoming-request-bad-uuencoding.email')
im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
ir = info_requests(:fancy_dog_request)
@censor_rule = CensorRule.new()
@censor_rule.text = "moo"
@censor_rule.replacement = "bah"
@censor_rule.last_edit_editor = "unknown"
@censor_rule.last_edit_comment = "none"
ir.censor_rules << @censor_rule
im.extract_attachments!
im.get_attachments_for_display.map(&:display_filename).should == [
'bah.txt',
]
end
end
describe IncomingMessage, "when messages are attached to messages" do
it 'should expand an RFC822 attachment' do
mail_body = load_file_fixture('rfc822-attachment.email')
mail = MailHandler.mail_from_raw_email(mail_body)
im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
attachments = im.get_attachments_for_display
attachments.size.should == 1
attachment = attachments.first
attachment.content_type.should == 'text/plain'
attachment.filename.should == "Freedom of Information request.txt"
attachment.charset.should == "utf-8"
attachment.within_rfc822_subject.should == "Freedom of Information request"
attachment.hexdigest.should == 'f10fe56e4f2287685a58b71329f09639'
end
it "should flatten all the attachments out" do
mail = get_fixture_mail('incoming-request-attach-attachments.email')
im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
im.extract_attachments!
attachments = im.get_attachments_for_display
attachments.map(&:display_filename).should == [
'Same attachment twice.txt',
'hello.txt',
'hello.txt',
]
end
it 'should add headers to attached plain text message bodies' do
mail_body = load_file_fixture('incoming-request-attachment-headers.email')
mail = MailHandler.mail_from_raw_email(mail_body)
im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
attachments = im.get_attachments_for_display
attachments.size.should == 2
attachments[0].body.should match('Date: Fri, 23 May 2008')
end
end
describe IncomingMessage, "when Outlook messages are attached to messages" do
it "should flatten all the attachments out" do
mail = get_fixture_mail('incoming-request-oft-attachments.email')
im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
im.extract_attachments!
im.get_attachments_for_display.map(&:display_filename).should == [
'test.html', # picks HTML rather than text by default, as likely to render better
'attach.txt',
]
end
end
describe IncomingMessage, "when TNEF attachments are attached to messages" do
it "should flatten all the attachments out" do
mail = get_fixture_mail('incoming-request-tnef-attachments.email')
im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
im.extract_attachments!
im.get_attachments_for_display.map(&:display_filename).should == [
'FOI 09 02976i.doc',
'FOI 09 02976iii.doc',
]
end
end