aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/incoming_message.rb16
-rw-r--r--spec/fixtures/files/email-folding-example-1.txt32
-rw-r--r--spec/fixtures/files/email-folding-example-1.txt.expected10
-rw-r--r--spec/fixtures/files/email-folding-example-10.txt52
-rw-r--r--spec/fixtures/files/email-folding-example-10.txt.expected25
-rw-r--r--spec/fixtures/files/email-folding-example-2.txt7
-rw-r--r--spec/fixtures/files/email-folding-example-2.txt.expected4
-rw-r--r--spec/fixtures/files/email-folding-example-3.txt18
-rw-r--r--spec/fixtures/files/email-folding-example-3.txt.expected5
-rw-r--r--spec/fixtures/files/email-folding-example-4.txt37
-rw-r--r--spec/fixtures/files/email-folding-example-4.txt.expected15
-rw-r--r--spec/fixtures/files/email-folding-example-5.txt35
-rw-r--r--spec/fixtures/files/email-folding-example-5.txt.expected24
-rw-r--r--spec/fixtures/files/email-folding-example-6.txt30
-rw-r--r--spec/fixtures/files/email-folding-example-6.txt.expected15
-rw-r--r--spec/fixtures/files/email-folding-example-7.txt30
-rw-r--r--spec/fixtures/files/email-folding-example-7.txt.expected16
-rw-r--r--spec/fixtures/files/email-folding-example-8.txt18
-rw-r--r--spec/fixtures/files/email-folding-example-8.txt.expected6
-rw-r--r--spec/fixtures/files/email-folding-example-9.txt29
-rw-r--r--spec/fixtures/files/email-folding-example-9.txt.expected9
-rw-r--r--spec/models/incoming_message_spec.rb9
22 files changed, 430 insertions, 12 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 2b795ddf5..97e4a6c09 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -606,21 +606,13 @@ class IncomingMessage < ActiveRecord::Base
text.gsub!(/^(>.*\n)/, replacement)
text.gsub!(/^(On .+ (wrote|said):\n)/, replacement)
- # Multiple line sections
- # http://www.whatdotheyknow.com/request/identity_card_scheme_expenditure
- # http://www.whatdotheyknow.com/request/parliament_protest_actions
- # http://www.whatdotheyknow.com/request/64/response/102
- # http://www.whatdotheyknow.com/request/47/response/283
- # http://www.whatdotheyknow.com/request/30/response/166
- # http://www.whatdotheyknow.com/request/52/response/238
- # http://www.whatdotheyknow.com/request/224/response/328 # example with * * * * *
- # http://www.whatdotheyknow.com/request/297/response/506
- ['-', '_', '*', '#'].each do |score|
+ ['-', '_', '*', '#'].each do |scorechar|
+ score = /(?:[#{scorechar}]\s*){8,}/
text.sub!(/(Disclaimer\s+)? # appears just before
(
- \s*(?:[#{score}]\s*){8,}\s*\n.*? # top line
+ \s*#{score}\n(?:(?!#{score}\n).)*? # top line
(disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+(for\s+|only\s+for\s+use\s+by\s+)the\s+addressee|routinely\s+monitored|MessageLabs|unauthorised\s+use)
- .*?((?:[#{score}]\s*){8,}\s*\n|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky)
+ .*?(?:#{score}|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky)
)
/imx, replacement)
end
diff --git a/spec/fixtures/files/email-folding-example-1.txt b/spec/fixtures/files/email-folding-example-1.txt
new file mode 100644
index 000000000..9d0810a36
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-1.txt
@@ -0,0 +1,32 @@
+Dear Mr Pollard,
+
+Thank you for your email of 26 February. Please find a response attached.
+
+Yours faithfully,
+
+On behalf of James Hall, Chief Executive
+Identity and Passport Service
+
+<<9032 C Pollard final response.doc>>
+
+**********************************************************************
+
+This email and any files transmitted with it are confidential and
+
+intended solely for the use of the individual or entity to whom they
+
+are addressed. If you have received this email in error please notify
+
+the system manager.
+
+This footnote also confirms that this email message has been swept for the
+presence of computer viruses.
+
+**********************************************************************
+
+The original of this email was scanned for viruses by the Government
+Secure Intranet virus scanning service supplied by Cable&Wireless in
+partnership with MessageLabs. (CCTM Certificate Number 2007/11/0032.) On
+leaving the GSi this email was certified virus free.
+Communications via the GSi may be automatically logged, monitored and/or
+recorded for legal purposes.
diff --git a/spec/fixtures/files/email-folding-example-1.txt.expected b/spec/fixtures/files/email-folding-example-1.txt.expected
new file mode 100644
index 000000000..801542288
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-1.txt.expected
@@ -0,0 +1,10 @@
+Dear Mr Pollard,
+
+Thank you for your email of 26 February. Please find a response attached.
+
+Yours faithfully,
+
+On behalf of James Hall, Chief Executive
+Identity and Passport Service
+
+<<9032 C Pollard final response.doc>>
diff --git a/spec/fixtures/files/email-folding-example-10.txt b/spec/fixtures/files/email-folding-example-10.txt
new file mode 100644
index 000000000..0fabb7f9c
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-10.txt
@@ -0,0 +1,52 @@
+Please note: it is not possible to reply to this email. To contact the
+Department of Health, please visit the 'Contact us' page on the
+Department’s website.
+
+-----------------------------------------------------------------------------------------
+
+ Apologies that you were not able to read our previous response of 4
+ October. Please find the text of that email below.
+
+Our ref: DE00000642471
+
+Dear Ms Peters Rock,
+
+You requested your correspondence to be treated under the Freedom of
+Information Act.  However, as your correspondence asked for general
+information, rather than requesting recorded information or documentation,
+I should advise you that on this occasion we have not considered your
+correspondence under the provisions of the Act.
+
+I am sorry I cannot be more helpful.
+
+Yours sincerely,
+Simon Dove
+Customer Service Centre
+Department of Health
+
+
+
+
+-------------------------------------------------------------------------------------------------------------------------
+
+
+Please do not reply to this email. To contact the Department of Health,
+please visit the 'Contact us' page on the Department’s website, where you
+can also view our performance against quarterly service targets.
+
+
+- - Disclaimer - -
+This e-mail and any files transmitted with it are confidential. If you are
+not the intended recipient, any reading, printing, storage, disclosure,
+copying or any other action taken in respect of this e-mail is prohibited
+and may be unlawful. If you are not the intended recipient, please notify
+the sender immediately by using the reply function and then permanently
+delete what you have received.
+
+Incoming and outgoing e-mail messages are routinely monitored for
+compliance with the Department of Health's policy on the use of electronic
+communications. For more information on the Department of Health's e-mail
+policy click here http://www.dh.gov.uk/terms
+
+The original of this email was scanned for viruses by the Government Secure Intranet virus scanning service supplied by Cable&Wireless Worldwide in partnership with MessageLabs. (CCTM Certificate Number 2009/09/0052.) On leaving the GSi this email was certified virus free.
+Communications via the GSi may be automatically logged, monitored and/or recorded for legal purposes.
diff --git a/spec/fixtures/files/email-folding-example-10.txt.expected b/spec/fixtures/files/email-folding-example-10.txt.expected
new file mode 100644
index 000000000..e4f704c0e
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-10.txt.expected
@@ -0,0 +1,25 @@
+Please note: it is not possible to reply to this email. To contact the
+Department of Health, please visit the 'Contact us' page on the
+Department’s website.
+
+-----------------------------------------------------------------------------------------
+
+ Apologies that you were not able to read our previous response of 4
+ October. Please find the text of that email below.
+
+Our ref: DE00000642471
+
+Dear Ms Peters Rock,
+
+You requested your correspondence to be treated under the Freedom of
+Information Act.  However, as your correspondence asked for general
+information, rather than requesting recorded information or documentation,
+I should advise you that on this occasion we have not considered your
+correspondence under the provisions of the Act.
+
+I am sorry I cannot be more helpful.
+
+Yours sincerely,
+Simon Dove
+Customer Service Centre
+Department of Health
diff --git a/spec/fixtures/files/email-folding-example-2.txt b/spec/fixtures/files/email-folding-example-2.txt
new file mode 100644
index 000000000..13dd39a69
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-2.txt
@@ -0,0 +1,7 @@
+Preface to the message which we are not interested in
+
+-----------------------------------------------------------------------------------------
+Important message about cheese
+-----------------------------------------------------------------------------------------
+
+Actual footer that contains the word confidential
diff --git a/spec/fixtures/files/email-folding-example-2.txt.expected b/spec/fixtures/files/email-folding-example-2.txt.expected
new file mode 100644
index 000000000..e52fbe443
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-2.txt.expected
@@ -0,0 +1,4 @@
+Preface to the message which we are not interested in
+
+-----------------------------------------------------------------------------------------
+Important message about cheese
diff --git a/spec/fixtures/files/email-folding-example-3.txt b/spec/fixtures/files/email-folding-example-3.txt
new file mode 100644
index 000000000..28a3861f6
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-3.txt
@@ -0,0 +1,18 @@
+Reference : T3241/8
+
+Thank you for your e-mail enquiry of 12th February.
+
+A reply is attached.
+
+**********************************************************************
+This email and any files transmitted with it are private and intended
+solely for the use of the individual or entity to whom they are addressed.
+If you have received this email in error please return it to the address
+it came from telling them it is not for you and then delete it from your system.
+
+This email message has been swept for computer viruses.
+
+**********************************************************************
+
+The original of this email was scanned for viruses by the Government Secure Intranet virus scanning service supplied by Cable&Wireless in partnership with MessageLabs. (CCTM Certificate Number 2007/11/0032.) On leaving the GSi this email was certified virus free.
+Communications via the GSi may be automatically logged, monitored and/or recorded for legal purposes.
diff --git a/spec/fixtures/files/email-folding-example-3.txt.expected b/spec/fixtures/files/email-folding-example-3.txt.expected
new file mode 100644
index 000000000..e2cca4933
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-3.txt.expected
@@ -0,0 +1,5 @@
+Reference : T3241/8
+
+Thank you for your e-mail enquiry of 12th February.
+
+A reply is attached.
diff --git a/spec/fixtures/files/email-folding-example-4.txt b/spec/fixtures/files/email-folding-example-4.txt
new file mode 100644
index 000000000..63b94a35c
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-4.txt
@@ -0,0 +1,37 @@
+<<Freedom of Information request - Contracts or options with Kimberley
+Developments or Waitrose>>
+
+Arthur Pritchard
+Property & Assets Manager
+Tel: 01625 504234
+Fax: 01625 504268
+e-mail: [1][email address]
+
+***********************************************************************************
+The information in this Email and any attachments is personal to the
+sender and the views of the author may not necessarily reflect those
+of Macclesfield Borough Council. The information is strictly confidential
+and is intended only for the named person or organisation to whom it is
+addressed as it may contain privileged and confidential information. If
+you are not the intended recipient do not copy, distribute or use this
+Email, and please notify the sender. Please note that we cannot
+guarantee that this message or any attachment is virus free or has not
+been intercepted and amended.
+***********************************************************************************
+
+Disclaimer
+
+--------------------------------------------------------------------------
+
+This email message has been scanned for viruses by Mimecast.
+Mimecast delivers a complete managed email solution from a single web
+based platform.
+For more information please visit [2]http://www.mimecast.com
+
+--------------------------------------------------------------------------
+
+References
+
+Visible links
+1. mailto:[email address]
+2. http://www.mimecast.com/
diff --git a/spec/fixtures/files/email-folding-example-4.txt.expected b/spec/fixtures/files/email-folding-example-4.txt.expected
new file mode 100644
index 000000000..42334a290
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-4.txt.expected
@@ -0,0 +1,15 @@
+<<Freedom of Information request - Contracts or options with Kimberley
+Developments or Waitrose>>
+
+Arthur Pritchard
+Property & Assets Manager
+Tel: 01625 504234
+Fax: 01625 504268
+e-mail: [1][email address]
+FOLDED_QUOTED_SECTION
+FOLDED_QUOTED_SECTION
+References
+
+Visible links
+1. mailto:[email address]
+2. http://www.mimecast.com/
diff --git a/spec/fixtures/files/email-folding-example-5.txt b/spec/fixtures/files/email-folding-example-5.txt
new file mode 100644
index 000000000..3d0964722
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-5.txt
@@ -0,0 +1,35 @@
+Hi Simon
+
+My apologies for timescale of response. The data forwarded is a public
+register, and is updated on a frequent and regular basis; your request
+unfortunately coincided with annual leave and a monthly update of the
+spreadsheet. As the definition of an HMO under the Housing Act 2004
+differs to that under planning legislation, I have forwarded this and
+your original request on to Andy England, Development Control Manager to
+respond independantly.
+
+If I can be of further assistance please contact me
+
+Barry Turnbull
+Environmental Health Co-ordinator (Housing)
+Housing, Health and Community Safety
+
+--
+
+****************************************************************
+Any opinions expressed are not necessarily those of Penwith
+District Council. This e-mail and any attachments, replies
+and forwarded copies are confidential and are strictly for
+the use of named recipient(s) only. If you have received
+it in error you may not make use of it. Please e-mail us,
+including a copy of the message to, [email address].
+Then delete the e-mail and any copies.
+****************************************************************
+
+**********************************************************************
+This footnote confirms that this message, and any
+attachments, have been screened by McAffee
+Webshield for the presence of virus code.
+
+Penwith District Council
+**********************************************************************
diff --git a/spec/fixtures/files/email-folding-example-5.txt.expected b/spec/fixtures/files/email-folding-example-5.txt.expected
new file mode 100644
index 000000000..fbb0f0f50
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-5.txt.expected
@@ -0,0 +1,24 @@
+Hi Simon
+
+My apologies for timescale of response. The data forwarded is a public
+register, and is updated on a frequent and regular basis; your request
+unfortunately coincided with annual leave and a monthly update of the
+spreadsheet. As the definition of an HMO under the Housing Act 2004
+differs to that under planning legislation, I have forwarded this and
+your original request on to Andy England, Development Control Manager to
+respond independantly.
+
+If I can be of further assistance please contact me
+
+Barry Turnbull
+Environmental Health Co-ordinator (Housing)
+Housing, Health and Community Safety
+
+--
+FOLDED_QUOTED_SECTION
+This footnote confirms that this message, and any
+attachments, have been screened by McAffee
+Webshield for the presence of virus code.
+
+Penwith District Council
+**********************************************************************
diff --git a/spec/fixtures/files/email-folding-example-6.txt b/spec/fixtures/files/email-folding-example-6.txt
new file mode 100644
index 000000000..272d6c9da
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-6.txt
@@ -0,0 +1,30 @@
+Dear Mr. Brown,
+
+Please find attached a reply to your FOI request.
+
+Yours ever,
+
+Adetokunbo Ighodaro
+
+<<FOI 0169-08 final.doc>>
+
+***********************************************************************************
+Visit [1]http://www.fco.gov.uk for British foreign policy news and travel
+advice; and [2]http://www.i-uk.com - the essential guide to the UK.
+
+We keep and use information in line with the Data Protection Act 1998. We
+may release this personal information to other UK government departments
+and public authorities.
+
+Please note that all messages sent and received by members of the Foreign
+& Commonwealth Office and its
+missions overseas may be monitored centrally. This is done to ensure the
+integrity of the system.
+
+***********************************************************************************
+
+References
+
+Visible links
+1. http://www.fco.gov.uk/
+2. http://www.i-uk.com/
diff --git a/spec/fixtures/files/email-folding-example-6.txt.expected b/spec/fixtures/files/email-folding-example-6.txt.expected
new file mode 100644
index 000000000..58021ce12
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-6.txt.expected
@@ -0,0 +1,15 @@
+Dear Mr. Brown,
+
+Please find attached a reply to your FOI request.
+
+Yours ever,
+
+Adetokunbo Ighodaro
+
+<<FOI 0169-08 final.doc>>
+FOLDED_QUOTED_SECTION
+References
+
+Visible links
+1. http://www.fco.gov.uk/
+2. http://www.i-uk.com/ \ No newline at end of file
diff --git a/spec/fixtures/files/email-folding-example-7.txt b/spec/fixtures/files/email-folding-example-7.txt
new file mode 100644
index 000000000..e10fe4657
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-7.txt
@@ -0,0 +1,30 @@
+Mr Hearne,
+Please see attached our response to your request dated 06 March 2008.
+Kind Regards,
+Linda Dempsey
+
+Information Assistant DP/FOI
+Data Protection/Information Security
+Professional Standards
+Leicestershire Constabulary
+http://www.leics.police.uk
+mailto [Leicestershire Constabulary request email]
+Telephone +44 (0) 116 2222222
+Extn 5221 VM No. 8035
+Fax + 44 (0) 116 2485217
+
+<<0001_00035908_Resp_12RESPONSE LETTER_20080408_112311_01.TIF>>
+
+**********
+
+Internet email is not to be treated as a secure means of communication.
+
+Leicestershire Constabulary monitors all internet email activity and content.
+
+This communication is intended for the addressee(s) only. Please notify the sender if received in error. Unauthorised use or disclosure of the content may be unlawful. Opinions expressed in this document may not be official policy.
+
+Thank you for your co-operation.
+
+© Leicestershire Constabulary
+
+**********
diff --git a/spec/fixtures/files/email-folding-example-7.txt.expected b/spec/fixtures/files/email-folding-example-7.txt.expected
new file mode 100644
index 000000000..0ef8fd82b
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-7.txt.expected
@@ -0,0 +1,16 @@
+Mr Hearne,
+Please see attached our response to your request dated 06 March 2008.
+Kind Regards,
+Linda Dempsey
+
+Information Assistant DP/FOI
+Data Protection/Information Security
+Professional Standards
+Leicestershire Constabulary
+http://www.leics.police.uk
+mailto [Leicestershire Constabulary request email]
+Telephone +44 (0) 116 2222222
+Extn 5221 VM No. 8035
+Fax + 44 (0) 116 2485217
+
+<<0001_00035908_Resp_12RESPONSE LETTER_20080408_112311_01.TIF>> \ No newline at end of file
diff --git a/spec/fixtures/files/email-folding-example-8.txt b/spec/fixtures/files/email-folding-example-8.txt
new file mode 100644
index 000000000..c1899e7c8
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-8.txt
@@ -0,0 +1,18 @@
+I will be out of the office starting 11/04/2008 and will not return until
+22/04/2008.
+
+I will respond to your message when I return. If you have any urgent
+queries please ring 02085419088 for Legal Business Support queries or
+contact Eileen Perren for FOI or DP queries
+
+* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+This email and any attachments with it are intended for the addressee only.
+It may be confidential and may be the subject of legal and/or professional privilege.
+If you have received this email in error please notify the sender or [email address]
+The content may be personal or contain personal opinions and cannot be taken as an expression of the County Council's position.
+Surrey County Council reserves the right to monitor all incoming and outgoing mail.
+Whilst every care has been taken to check this outgoing e-mail for viruses, it is your responsibility to carry out any checks upon receipt.
+
+Visit the Surrey County Council website - http://www.surreycc.gov.uk
+
+* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
diff --git a/spec/fixtures/files/email-folding-example-8.txt.expected b/spec/fixtures/files/email-folding-example-8.txt.expected
new file mode 100644
index 000000000..b5dc10c0d
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-8.txt.expected
@@ -0,0 +1,6 @@
+I will be out of the office starting 11/04/2008 and will not return until
+22/04/2008.
+
+I will respond to your message when I return. If you have any urgent
+queries please ring 02085419088 for Legal Business Support queries or
+contact Eileen Perren for FOI or DP queries \ No newline at end of file
diff --git a/spec/fixtures/files/email-folding-example-9.txt b/spec/fixtures/files/email-folding-example-9.txt
new file mode 100644
index 000000000..1f3d4c34a
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-9.txt
@@ -0,0 +1,29 @@
+Dear Mr Cross
+
+Freedom of Information Request Reference No: 2008040590
+
+Yours sincerely
+
+MICHAEL HEGARTY
+
+FOI Officer
+
+**********************************************************************************************
+Please Note: Incoming and Outgoing E-mail messages are routinely monitored
+for compliance with our policy on the use of electronic communications.
+
+Interested in Occupational Health & Safety information?
+Please visit the HSE website at the following address to keep yourself up
+to date
+
+www.hse.gov.uk
+
+Or contact HSE Infoline on 0845 345 0055 or email [HSE request email]
+
+**********************************************************************************************
+The original of this email was scanned for viruses by the Government
+Secure Intranet virus scanning service supplied by Cable&Wireless in
+partnership with MessageLabs. (CCTM Certificate Number 2007/11/0032.) On
+leaving the GSi this email was certified virus free.
+Communications via the GSi may be automatically logged, monitored and/or
+recorded for legal purposes.
diff --git a/spec/fixtures/files/email-folding-example-9.txt.expected b/spec/fixtures/files/email-folding-example-9.txt.expected
new file mode 100644
index 000000000..2d2381a34
--- /dev/null
+++ b/spec/fixtures/files/email-folding-example-9.txt.expected
@@ -0,0 +1,9 @@
+Dear Mr Cross
+
+Freedom of Information Request Reference No: 2008040590
+
+Yours sincerely
+
+MICHAEL HEGARTY
+
+FOI Officer
diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb
index d6923da21..662b4bf9a 100644
--- a/spec/models/incoming_message_spec.rb
+++ b/spec/models/incoming_message_spec.rb
@@ -17,6 +17,15 @@ describe IncomingMessage, " when dealing with incoming mail" do
TMail::Address.parse(em)
end
+ it "should correctly fold various types of footer" do
+ Dir.glob(File.join(Spec::Runner.configuration.fixture_path, "files", "email-folding-example-*.txt")).each do |file|
+ message = File.read(file)
+ parsed = IncomingMessage.remove_quoted_sections(message)
+ expected = File.read("#{file}.expected")
+ parsed.should include(expected)
+ end
+ end
+
end
describe IncomingMessage, "when parsing HTML mail" do