From c6f3630c4723194c74721300156fa464009285a6 Mon Sep 17 00:00:00 2001
From: Matthew Landauer <matthew@openaustralia.org>
Date: Fri, 25 Jan 2013 13:49:05 +1100
Subject: read_attribute does timezone conversion in rails 3. So using
 attributes_before_type_cast instead

---
 spec/lib/timezone_fixes_spec.rb | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'spec/lib')

diff --git a/spec/lib/timezone_fixes_spec.rb b/spec/lib/timezone_fixes_spec.rb
index 525bd7561..9d6ade526 100644
--- a/spec/lib/timezone_fixes_spec.rb
+++ b/spec/lib/timezone_fixes_spec.rb
@@ -11,14 +11,13 @@ describe "when doing things with timezones" do
       with_active_record_default_timezone :utc do
         time = Time.local(2000)
         mail_server_log_done = MailServerLogDone.create('last_stat' => time, 'filename' => 'dummy')
-        raw_saved_time = MailServerLogDone.find(mail_server_log_done.id).read_attribute(:last_stat)
+        raw_saved_time = MailServerLogDone.find(mail_server_log_done.id).attributes_before_type_cast["last_stat"]
         saved_time = MailServerLogDone.find(mail_server_log_done.id).last_stat
         assert_equal time, saved_time
-        assert_equal saved_time, raw_saved_time
         # Time is created in EST by local method (using ENV['TZ'])
         assert_equal [0, 0, 0, 1, 1, 2000, 6, 1, false, "EST"], time.to_a
         # Due to :utc active_record_default_timezone, everything saved as UTC
-        assert_equal [0, 0, 5, 1, 1, 2000, 6, 1, false, "UTC"], raw_saved_time.to_a
+        assert_equal "2000-01-01 05:00:00", raw_saved_time
         # As config.time_zone is UTC (from config default), times returned in UTC
         assert_equal [0, 0, 5, 1, 1, 2000, 6, 1, false, "UTC"], saved_time.to_a
       end
@@ -33,14 +32,12 @@ describe "when doing things with timezones" do
         Time.use_zone 'Central Time (US & Canada)' do
           time = Time.zone.local(2000)
           mail_server_log_done = MailServerLogDone.create('last_stat' => time, 'filename' => 'dummy')
-          raw_saved_time = MailServerLogDone.find(mail_server_log_done.id).read_attribute(:last_stat)
+          raw_saved_time = MailServerLogDone.find(mail_server_log_done.id).attributes_before_type_cast["last_stat"]
           saved_time = MailServerLogDone.find(mail_server_log_done.id).last_stat
-          assert_equal time, saved_time
-          assert_equal saved_time, raw_saved_time
           # Time is created in CST by Time.local (as Time.zone has been set)
           assert_equal [0, 0, 0, 1, 1, 2000, 6, 1, false, "CST"], time.to_a
           # Due to :utc active_record_default_timezone, everything saved as UTC
-          assert_equal [0, 0, 6, 1, 1, 2000, 6, 1, false, "UTC"], raw_saved_time.to_a
+          assert_equal "2000-01-01 06:00:00", raw_saved_time
           # Times returned in CST due to Time.use_zone and ActiveRecord::time_zone_aware_attributes
           # being true
           assert_equal [0, 0, 0, 1, 1, 2000, 6, 1, false, "CST"], saved_time.to_a
@@ -55,14 +52,13 @@ describe "when doing things with timezones" do
      with_active_record_default_timezone :local do
        time = Time.utc(2000)
        mail_server_log_done = MailServerLogDone.create('last_stat' => time, 'filename' => 'dummy')
-       raw_saved_time = MailServerLogDone.find(mail_server_log_done.id).read_attribute(:last_stat)
+       raw_saved_time = MailServerLogDone.find(mail_server_log_done.id).attributes_before_type_cast["last_stat"]
        saved_time = MailServerLogDone.find(mail_server_log_done.id).last_stat
        assert_equal time, saved_time
-       assert_equal saved_time, raw_saved_time
        # Time is created in UTC by Time.utc method
        assert_equal [0, 0, 0, 1, 1, 2000, 6, 1, false, "UTC"], time.to_a
        # Due to :local active_record_default_timezone, saved as EST
-       assert_equal [0, 0, 19, 31, 12, 1999, 5, 365, false, "EST"], raw_saved_time.to_a
+       assert_equal "1999-12-31 19:00:00", raw_saved_time
        # As config.time_zone is UTC (from config default), times returned in UTC
        assert_equal [0, 0, 0, 1, 1, 2000, 6, 1, false, "UTC"], saved_time.to_a
      end
@@ -76,14 +72,13 @@ describe "when doing things with timezones" do
         Time.use_zone 'Central Time (US & Canada)' do
           time = Time.zone.local(2000)
           mail_server_log_done = MailServerLogDone.create('last_stat' => time, 'filename' => 'dummy')
-          raw_saved_time = MailServerLogDone.find(mail_server_log_done.id).read_attribute(:last_stat)
+          raw_saved_time = MailServerLogDone.find(mail_server_log_done.id).attributes_before_type_cast["last_stat"]
           saved_time = MailServerLogDone.find(mail_server_log_done.id).last_stat
           assert_equal time, saved_time
-          assert_equal saved_time, raw_saved_time
           # Time is created in CST by Time.zone.local
           assert_equal [0, 0, 0, 1, 1, 2000, 6, 1, false, "CST"], time.to_a
           # Due to :local active_record_default_timezone, saved as EST
-          assert_equal [0, 0, 1, 1, 1, 2000, 6, 1, false, "EST"], raw_saved_time.to_a
+          assert_equal "2000-01-01 01:00:00", raw_saved_time
           # Due to Time.use_zone, and ActiveRecord::time_zone_aware_attributes
           # being true, time returned in CST
           assert_equal [0, 0, 0, 1, 1, 2000, 6, 1, false, "CST"], saved_time.to_a
-- 
cgit v1.2.3


From eef9d83ff28b9d3fdea11dabfab465adefbdf683 Mon Sep 17 00:00:00 2001
From: Matthew Landauer <matthew@openaustralia.org>
Date: Fri, 25 Jan 2013 13:53:20 +1100
Subject: Remove timezone monkeypatch fixes because they're not necessary in
 Rails 3 anymore

---
 spec/lib/timezone_fixes_spec.rb | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/timezone_fixes_spec.rb b/spec/lib/timezone_fixes_spec.rb
index 9d6ade526..8a9a3bf31 100644
--- a/spec/lib/timezone_fixes_spec.rb
+++ b/spec/lib/timezone_fixes_spec.rb
@@ -3,6 +3,11 @@
 # We use MailServerLogDone here just as a totally random model that has a datetime type.
 require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
 
+# In Rails 3 the monkeypatch that these tests are testing is not necessary. So,
+# since these tests are testing the Rails internals you could argue that they shouldn't
+# be here. Well, you're right. But let's leave them in for the time being until the upgrade is finished.
+# Then, we should probably delete this whole file
+
 describe "when doing things with timezones" do
 
   it "should preserve time objects with local time conversion to default timezone UTC
-- 
cgit v1.2.3


From 1ed216502e3b2ef22a1765f985aebf0c5920153e Mon Sep 17 00:00:00 2001
From: Matthew Landauer <matthew@openaustralia.org>
Date: Fri, 25 Jan 2013 14:25:25 +1100
Subject: Rename mailer method to avoid naming conflict in Rails 3

---
 spec/lib/sendmail_return_path_spec.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'spec/lib')

diff --git a/spec/lib/sendmail_return_path_spec.rb b/spec/lib/sendmail_return_path_spec.rb
index 7708edb35..137869b6e 100644
--- a/spec/lib/sendmail_return_path_spec.rb
+++ b/spec/lib/sendmail_return_path_spec.rb
@@ -28,7 +28,7 @@ describe "when sending email with an altered return path" do
         Net::SMTP.stub!(:new).and_return(mock_smtp)
 
         with_delivery_method :smtp do
-            ContactMailer.deliver_message(
+            ContactMailer.deliver_to_admin_message(
                 "Mr. Test", "test@localhost", "Test script spec/lib/sendmail_return_path_spec.rb",
                 "This is just a test for a test script", nil, nil, nil
             )
@@ -42,7 +42,7 @@ describe "when sending email with an altered return path" do
         with_stub_popen do
             IO.should_receive(:popen).once.with('/usr/sbin/sendmail -i -t -f "test@localhost"', "w+")
             with_delivery_method :sendmail do
-                ContactMailer.deliver_message(
+                ContactMailer.deliver_to_admin_message(
                     "Mr. Test", "test@localhost", "Test script spec/lib/sendmail_return_path_spec.rb",
                     "This is just a test for a test script", nil, nil, nil
                 )
-- 
cgit v1.2.3


From 65680320bee44812394041492c8492e95b1a3d78 Mon Sep 17 00:00:00 2001
From: Matthew Landauer <matthew@openaustralia.org>
Date: Fri, 25 Jan 2013 14:43:40 +1100
Subject: In rails 3 sendmail is passed the destination of the email on the
 commandline. Also, monkeypatch not needed anymore

---
 spec/lib/sendmail_return_path_spec.rb | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'spec/lib')

diff --git a/spec/lib/sendmail_return_path_spec.rb b/spec/lib/sendmail_return_path_spec.rb
index 137869b6e..b4bdda12c 100644
--- a/spec/lib/sendmail_return_path_spec.rb
+++ b/spec/lib/sendmail_return_path_spec.rb
@@ -1,5 +1,10 @@
 # This is a test of the monkey patches in sendmail_return_path.rb
 
+# In Rails 3 the monkeypatches are not needed anymore because sendmail now has the "-f" flag
+# set correctly. So, strictly these tests are testing the Rails internals. So, that means we really
+# should delete them. Let's do that later when things have settled down. For the time being leave
+# them in
+
 require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
 
 describe "when sending email with an altered return path" do
@@ -40,7 +45,7 @@ describe "when sending email with an altered return path" do
 
     it "should set the return path when sending email using sendmail" do
         with_stub_popen do
-            IO.should_receive(:popen).once.with('/usr/sbin/sendmail -i -t -f "test@localhost"', "w+")
+            IO.should_receive(:popen).once.with('/usr/sbin/sendmail -i -t -f "test@localhost" postmaster@localhost', "w+")
             with_delivery_method :sendmail do
                 ContactMailer.deliver_to_admin_message(
                     "Mr. Test", "test@localhost", "Test script spec/lib/sendmail_return_path_spec.rb",
-- 
cgit v1.2.3


From c0360fa374b096964b67ff5195e86ef83936c62f Mon Sep 17 00:00:00 2001
From: Matthew Landauer <matthew@openaustralia.org>
Date: Tue, 29 Jan 2013 10:40:07 +1100
Subject: Replace use of have_text with contain from webrat or match

---
 spec/lib/mail_handler/mail_handler_spec.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 48c32e2bc..79b779687 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -22,7 +22,7 @@ describe 'when creating a mail object from raw data' do
 
     it 'should convert an iso8859 email to utf8' do
         mail = get_fixture_mail('iso8859_2_raw_email.email')
-        mail.subject.should have_text(/gjatë/u)
+        mail.subject.should match /gjatë/u
         MailHandler.get_part_body(mail).is_utf8?.should == true
     end
 
-- 
cgit v1.2.3


From 532a21ba4941197a13524ac3f1ce9267a46bec1d Mon Sep 17 00:00:00 2001
From: Henare Degan <henare.degan@gmail.com>
Date: Mon, 25 Feb 2013 17:25:59 +1100
Subject: Update to new mail sending API

---
 spec/lib/sendmail_return_path_spec.rb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'spec/lib')

diff --git a/spec/lib/sendmail_return_path_spec.rb b/spec/lib/sendmail_return_path_spec.rb
index b4bdda12c..83436c2bd 100644
--- a/spec/lib/sendmail_return_path_spec.rb
+++ b/spec/lib/sendmail_return_path_spec.rb
@@ -33,10 +33,10 @@ describe "when sending email with an altered return path" do
         Net::SMTP.stub!(:new).and_return(mock_smtp)
 
         with_delivery_method :smtp do
-            ContactMailer.deliver_to_admin_message(
+            ContactMailer.to_admin_message(
                 "Mr. Test", "test@localhost", "Test script spec/lib/sendmail_return_path_spec.rb",
                 "This is just a test for a test script", nil, nil, nil
-            )
+            ).deliver
         end
 
         deliveries = ActionMailer::Base.deliveries
@@ -47,10 +47,10 @@ describe "when sending email with an altered return path" do
         with_stub_popen do
             IO.should_receive(:popen).once.with('/usr/sbin/sendmail -i -t -f "test@localhost" postmaster@localhost', "w+")
             with_delivery_method :sendmail do
-                ContactMailer.deliver_to_admin_message(
+                ContactMailer.to_admin_message(
                     "Mr. Test", "test@localhost", "Test script spec/lib/sendmail_return_path_spec.rb",
                     "This is just a test for a test script", nil, nil, nil
-                )
+                ).deliver
             end
         end
 
-- 
cgit v1.2.3


From ad0af8f9deb3da28c3630e369b74932d465b349f Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Mon, 29 Apr 2013 17:06:01 +0100
Subject: Add tests for TNEF attachments that should be handled

These two cases were ignored previously, and we need to make sure
that they still are under the switch from TMail to Mail.

One TNEF attachment is a heavily truncated one from a real example
from Alaveteli that has no personal data in it.  The other is
an example from the tests in the distribution of the tnef
package for Ubuntu 1.4.9-1 - it's an HTML version of the US
constitution.
---
 spec/lib/mail_handler/mail_handler_spec.rb | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 79b779687..487f3bf0d 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -304,6 +304,30 @@ describe 'when getting attachment attributes' do
         attributes = MailHandler.get_attachment_attributes(mail)
     end
 
+    it 'should ignore truncated TNEF attachment' do
+        mail = get_fixture_mail('tnef-attachment-truncated.email')
+        attributes = MailHandler.get_attachment_attributes(mail)
+        attributes.length.should == 2
+    end
+
+    it 'should ignore a TNEF attachment with no usable contents' do
+        # FIXME: "no usable contents" is slightly misleading.  The
+        # attachment in this example email does have usable content in
+        # the body of the TNEF attachment, but the invocation of tnef
+        # historically used to unpack these attachments doesn't add
+        # the --save-body parameter, so that they have been ignored so
+        # far.  We probably should include the body from such
+        # attachments, but, at the moment, with the pending upgrade to
+        # Rails 3, we just want to check that the behaviour is the
+        # same as before.
+        mail = get_fixture_mail('tnef-attachment-empty.email')
+        attributes = MailHandler.get_attachment_attributes(mail)
+        attributes.length.should == 2
+        # This is the size of the TNEF-encoded attachment; currently,
+        # we expect the code just to return this without decoding:
+        attributes[1][:body].length.should == 7769
+    end
+
     it 'should produce a consistent set of url_part_numbers, content_types, within_rfc822_subjects
         and filenames from an example mail with lots of attachments' do
         mail = get_fixture_mail('many-attachments-date-header.email')
-- 
cgit v1.2.3


From f93caeb5fad489943615453304b8b41062e67aa5 Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Tue, 30 Apr 2013 14:50:25 +0100
Subject: Add a test to check that anything in the MIME epilogue is ignored

There is currently a difference in behaviour in the parsing
of nested MIME multipart attachments between the Mail and
TMail based backends.  This commit adds a test that will
pass if the behaviour is the same as the the old (TMail-based)
version, which I believe is correct according to RFC 1521.
The example email has a PNG attachment after the final MIME
boundary, and the RFC says that anything after the final
boundary ("the epilogue") should be ignored.
---
 spec/lib/mail_handler/mail_handler_spec.rb | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 487f3bf0d..eca5e2dd4 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -310,6 +310,17 @@ describe 'when getting attachment attributes' do
         attributes.length.should == 2
     end
 
+    it 'should ignore anything beyond the final MIME boundary' do
+        # This example raw email has a premature closing boundary for
+        # the outer multipart/mixed - my reading of RFC 1521 is that
+        # the "epilogue" beyond that should be ignored.
+        # See https://github.com/mysociety/alaveteli/issues/922 for
+        # more discussion.
+        mail = get_fixture_mail('nested-attachments-premature-end.email')
+        attributes = MailHandler.get_attachment_attributes(mail)
+        attributes.length.should == 3
+    end
+
     it 'should ignore a TNEF attachment with no usable contents' do
         # FIXME: "no usable contents" is slightly misleading.  The
         # attachment in this example email does have usable content in
-- 
cgit v1.2.3


From d51afddb19c2520542d0ad92c8afa2085fae300d Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Tue, 30 Apr 2013 18:07:50 +0100
Subject: Add a test for a missing final MIME boundary

If there is a missing final MIME boundary, the behaviour
of Alaveteli with the TMail backend was to still parse
the attachment, but with the new code it currently throws
an exception.  This commit adds a test that asserts that
the attachment should be parsed despite the email being
malformed in this way.
---
 spec/lib/mail_handler/mail_handler_spec.rb | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index eca5e2dd4..6b01326ed 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -321,6 +321,15 @@ describe 'when getting attachment attributes' do
         attributes.length.should == 3
     end
 
+    it 'should cope with a missing final MIME boundary' do
+        mail = get_fixture_mail('multipart-no-final-boundary.email')
+        attributes = MailHandler.get_attachment_attributes(mail)
+        attributes.length.should == 1
+        attributes[0][:body].should match(/This is an acknowledgement of your email/)
+        attributes[0][:content_type].should == "text/html"
+        attributes[0][:url_part_number].should == 1
+    end
+
     it 'should ignore a TNEF attachment with no usable contents' do
         # FIXME: "no usable contents" is slightly misleading.  The
         # attachment in this example email does have usable content in
-- 
cgit v1.2.3


From ec414d4dcb0c027be6c59ce873127dc10037dc50 Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Wed, 1 May 2013 13:48:51 +0100
Subject: Add a test for parsing a malformed email

This example email indicates the wrong charset and includes
a top bit set character despite Content-Transfer-Encoding: 7bit
- nonetheless, we should be able to convert it to UTF-8 and
interpret the character correctly.
---
 spec/lib/mail_handler/mail_handler_spec.rb | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 6b01326ed..3f3be1f20 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -26,6 +26,21 @@ describe 'when creating a mail object from raw data' do
         MailHandler.get_part_body(mail).is_utf8?.should == true
     end
 
+    it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do
+        mail = get_fixture_mail('mislabelled-as-iso-8859-1.email')
+        body = MailHandler.get_part_body(mail)
+        body.is_utf8?.should == true
+        # This email is broken in at least these two ways:
+        #  1. It contains a top bit set character (0x96) despite the
+        #     "Content-Transfer-Encoding: 7bit"
+        #  2. The charset in the Content-Type header is "iso-8859-1"
+        #     but 0x96 is actually a Windows-1252 en dash, which would
+        #     be Unicode codepoint 2013.  It should be possible to
+        #     spot the mislabelling, since 0x96 isn't a valid
+        #     ISO-8859-1 character.
+        body.should match / \xe2\x80\x93 /
+    end
+
 end
 
 describe 'when asked for the from name' do
-- 
cgit v1.2.3


From 55fd2004f75b2c77c6f875aa5a392bb375f82657 Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Mon, 13 May 2013 17:51:59 +0100
Subject: Fix a syntax error

---
 spec/lib/mail_handler/mail_handler_spec.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 3f3be1f20..2a083d65c 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -38,7 +38,7 @@ describe 'when creating a mail object from raw data' do
         #     be Unicode codepoint 2013.  It should be possible to
         #     spot the mislabelling, since 0x96 isn't a valid
         #     ISO-8859-1 character.
-        body.should match / \xe2\x80\x93 /
+        body.should match(/ \xe2\x80\x93 /)
     end
 
 end
-- 
cgit v1.2.3


From 95cf55aad1f0985d28c28beb61e122dc7465b039 Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Wed, 15 May 2013 14:52:07 +0100
Subject: Add functions for converting from arbitrary text data to UTF-8

Throughout the codebase it is simplest and most consistent
if we could assume that all text/* attachments are represented
by UTF-8 strings, and this was largely true with the TMail
backend which ensured that all returned text parts were in
UTF-8.  We have to change the replacement Mail-backed to
similarly attempt to convert text parts to UTF-8.  This commit
introduces two functions which are useful for this.

The normalize_string_to_utf8 function will try various
encodings, either suggested or guessed (with charlock_holmes)
to convert the passed string to UTF-8, and if it can't find a
suitable encoding will throw an exception.

Unfortunately, the current behaviour of the site is that
uninterpretable text/* attachments are still passed around and
mangled to UTF-8 just before display.  To mimic this it's also
useful to have the convert_string_to_utf8_or_binary function,
which tries to convert the string to UTF-8 with
normalize_string_to_utf8, but if that's not possible just
returns the original string.  (In Ruby 1.9, encoding will be
set to UTF-8 or ASCII-8BIT appropriately.)
---
 spec/lib/basic_encoding_tests.rb | 157 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100644 spec/lib/basic_encoding_tests.rb

(limited to 'spec/lib')

diff --git a/spec/lib/basic_encoding_tests.rb b/spec/lib/basic_encoding_tests.rb
new file mode 100644
index 000000000..35d35fd4a
--- /dev/null
+++ b/spec/lib/basic_encoding_tests.rb
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+
+def bytes_to_binary_string( bytes, claimed_encoding = nil )
+    claimed_encoding ||= 'ASCII-8BIT'
+    bytes_string = bytes.pack('c*')
+    if RUBY_VERSION.to_f >= 1.9
+        bytes_string.force_encoding! claimed_encoding
+    end
+    bytes_string
+end
+
+random_string = bytes_to_binary_string [ 0x0f, 0x58, 0x1c, 0x8f, 0xa4, 0xcf,
+                                         0xf6, 0x8c, 0x9d, 0xa7, 0x06, 0xd9,
+                                         0xf7, 0x90, 0x6c, 0x6f]
+
+windows_1252_string = bytes_to_binary_string [ 0x44, 0x41, 0x53, 0x48, 0x20,
+                                               0x96, 0x20, 0x44, 0x41, 0x53,
+                                               0x48 ]
+
+# It's a shame this example is so long, but if we don't take enough it
+# gets misinterpreted as Shift_JIS
+
+gb_18030_bytes = [ 0xb9, 0xf3, 0xb9, 0xab, 0xcb, 0xbe, 0xb8, 0xba, 0xd4, 0xf0,
+                   0xc8, 0xcb, 0x28, 0xbe, 0xad, 0xc0, 0xed, 0x2f, 0xb2, 0xc6,
+                   0xce, 0xf1, 0x29, 0xc4, 0xfa, 0xba, 0xc3, 0xa3, 0xba, 0x0d,
+                   0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+                   0x20, 0x20, 0x20, 0xb1, 0xbe, 0xb9, 0xab, 0xcb, 0xbe, 0xd4,
+                   0xda, 0x31, 0x39, 0x39, 0x37, 0xc4, 0xea, 0xb3, 0xc9, 0xc1,
+                   0xa2, 0xb9, 0xfa, 0xbc, 0xd2, 0xb9, 0xa4, 0xc9, 0xcc, 0xd7,
+                   0xa2, 0xb2, 0xe1, 0x2e, 0xca, 0xb5, 0xc1, 0xa6, 0xd0, 0xdb,
+                   0xba, 0xf1, 0xa1, 0xa3, 0xd3, 0xd0, 0xb6, 0xc0, 0xc1, 0xa2,
+                   0xcb, 0xb0, 0xce, 0xf1, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd7, 0xa8, 0xd2, 0xb5,
+                   0xc8, 0xcb, 0xd4, 0xb1, 0x3b, 0xd4, 0xda, 0xc8, 0xab, 0xb9,
+                   0xfa, 0xb8, 0xf7, 0xb3, 0xc7, 0xca, 0xd0, 0xc9, 0xe8, 0xc1,
+                   0xa2, 0xb7, 0xd6, 0xb9, 0xab, 0xcb, 0xbe, 0xa3, 0xa8, 0xd5,
+                   0xe3, 0xbd, 0xad, 0xa1, 0xa2, 0xc9, 0xcf, 0xba, 0xa3, 0xa1,
+                   0xa2, 0xb9, 0xe3, 0xd6, 0xdd, 0xa1, 0xa2, 0xbd, 0xad, 0xcb,
+                   0xd5, 0xb5, 0xc8, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+                   0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xd8, 0xb7, 0xbd, 0xa3,
+                   0xa9, 0xd2, 0xf2, 0xbd, 0xf8, 0xcf, 0xee, 0xbd, 0xcf, 0xb6,
+                   0xe0, 0xcf, 0xd6, 0xcd, 0xea, 0xb3, 0xc9, 0xb2, 0xbb, 0xc1,
+                   0xcb, 0xc3, 0xbf, 0xd4, 0xc2, 0xcf, 0xfa, 0xca, 0xdb, 0xb6,
+                   0xee, 0xb6, 0xc8, 0xa1, 0xa3, 0xc3, 0xbf, 0xd4, 0xc2, 0xd3,
+                   0xd0, 0xd2, 0xbb, 0xb2, 0xbf, 0xb7, 0xd6, 0x0d, 0x0a, 0x20,
+                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd4,
+                   0xf6, 0xd6, 0xb5, 0xb6, 0x90, 0xa3, 0xa8, 0x36, 0x2d, 0x37,
+                   0x25, 0xd7, 0xf3, 0xd3, 0xd2, 0x29, 0xba, 0xcd, 0xc6, 0xd5,
+                   0xc6, 0xb1, 0xa3, 0xa8, 0x30, 0x2e, 0x35, 0x25, 0x2d, 0x32,
+                   0x25, 0x20, 0xd7, 0xf3, 0xd3, 0xd2, 0xa3, 0xa9, 0xd3, 0xc5,
+                   0xbb, 0xdd, 0xb4, 0xfa, 0xbf, 0xaa, 0xbb, 0xf2, 0xba, 0xcf,
+                   0xd7, 0xf7, 0xa3, 0xac, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+                   0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xe3, 0xca, 0xfd,
+                   0xbd, 0xcf, 0xb5, 0xcd, 0xa1, 0xa3, 0xb4, 0xfa, 0xc0, 0xed,
+                   0xb7, 0xb6, 0xce, 0xa7, 0xc8, 0xe7, 0xcf, 0xc2, 0xa3, 0xba,
+                   0x0d, 0x0a ]
+
+gb_18030_spam_string = bytes_to_binary_string gb_18030_bytes
+
+describe "normalize_string_to_utf8" do
+
+    describe "when passed uniterpretable character data" do
+
+        it "should reject it as invalid" do
+
+            expect {
+                normalize_string_to_utf8 random_string
+            }.to raise_error(EncodingNormalizationError)
+
+            expect {
+                normalize_string_to_utf8 random_string, 'UTF-8'
+            }.to raise_error(EncodingNormalizationError)
+
+        end
+    end
+
+    describe "when passed unlabelled Windows 1252 data" do
+
+        it "should correctly convert it to UTF-8" do
+
+            normalized = normalize_string_to_utf8 windows_1252_string
+
+            normalized.should ==  "DASH – DASH"
+
+        end
+
+    end
+
+    describe "when passed GB 18030 data" do
+
+        it "should correctly convert it to UTF-8 if unlabelled" do
+
+            normalized = normalize_string_to_utf8 gb_18030_spam_string
+
+            normalized.should start_with("贵公司负责人")
+
+        end
+
+    end
+
+end
+
+describe "convert_string_to_utf8_or_binary" do
+
+    describe "when passed uniterpretable character data" do
+
+        it "should return it as a binary string" do
+
+            converted = convert_string_to_utf8_or_binary random_string
+            converted.should == random_string
+
+            if RUBY_VERSION.to_f >= 1.9
+                converted.encoding.should == 'ASCII-8BIT'
+            end
+
+            converted = convert_string_to_utf8_or_binary random_string,'UTF-8'
+            converted.should == random_string
+
+            if RUBY_VERSION.to_f >= 1.9
+                converted.encoding.should == 'ASCII-8BIT'
+            end
+
+        end
+    end
+
+    describe "when passed unlabelled Windows 1252 data" do
+
+        it "should correctly convert it to UTF-8" do
+
+            converted = convert_string_to_utf8_or_binary windows_1252_string
+
+            converted.should ==  "DASH – DASH"
+
+            if RUBY_VERSION.to_f >= 1.9
+                converted.encoding.should == 'UTF-8'
+            end
+        end
+
+    end
+
+    describe "when passed GB 18030 data" do
+
+        it "should correctly convert it to UTF-8 if unlabelled" do
+
+            converted = convert_string_to_utf8_or_binary gb_18030_spam_string
+
+            converted.should start_with("贵公司负责人")
+
+            if RUBY_VERSION.to_f >= 1.9
+                converted.encoding.should == 'UTF-8'
+            end
+        end
+
+    end
+
+end
-- 
cgit v1.2.3


From ad56713504b3bb59d32e4f61d30c41fcab89db86 Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Mon, 13 May 2013 17:51:44 +0100
Subject: Add another mail parsing test

At one point in development this email was misparsed, so I've
added this as test to check for regressions.
---
 spec/lib/mail_handler/mail_handler_spec.rb | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 2a083d65c..048bc3eaf 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -297,6 +297,13 @@ describe 'when getting attachment attributes' do
         attributes.size.should == 2
     end
 
+    it 'should get one attachment from a multipart mail with text and HTML alternatives, which should be UTF-8' do
+        mail = get_fixture_mail('iso8859_2_raw_email.email')
+        attributes = MailHandler.get_attachment_attributes(mail)
+        attributes.length.should == 1
+        attributes[0][:body].is_utf8?.should == true
+    end
+
     it 'should expand a mail attached as text' do
         # Note that this spec will only pass using Tmail in the timezone set as datetime headers
         # are rendered out in the local time - using the Mail gem this is not necessary
-- 
cgit v1.2.3


From 527669bdb09a5d3add9270983a93320555e8bf7c Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Thu, 16 May 2013 08:34:55 +0100
Subject: Mark as "pending" two tests relating to odd MIME boundary cases

These cases are rare, and probably need to be resolved by
reporting issues against the Mail gem (although it's debatable
what the more correct or pragmatic behaviour should be in both
cases).
---
 spec/lib/mail_handler/mail_handler_spec.rb | 32 +++++++++++++++++-------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 048bc3eaf..5e00fda16 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -333,23 +333,27 @@ describe 'when getting attachment attributes' do
     end
 
     it 'should ignore anything beyond the final MIME boundary' do
-        # This example raw email has a premature closing boundary for
-        # the outer multipart/mixed - my reading of RFC 1521 is that
-        # the "epilogue" beyond that should be ignored.
-        # See https://github.com/mysociety/alaveteli/issues/922 for
-        # more discussion.
-        mail = get_fixture_mail('nested-attachments-premature-end.email')
-        attributes = MailHandler.get_attachment_attributes(mail)
-        attributes.length.should == 3
+        pending do
+            # This example raw email has a premature closing boundary for
+            # the outer multipart/mixed - my reading of RFC 1521 is that
+            # the "epilogue" beyond that should be ignored.
+            # See https://github.com/mysociety/alaveteli/issues/922 for
+            # more discussion.
+            mail = get_fixture_mail('nested-attachments-premature-end.email')
+            attributes = MailHandler.get_attachment_attributes(mail)
+            attributes.length.should == 3
+        end
     end
 
     it 'should cope with a missing final MIME boundary' do
-        mail = get_fixture_mail('multipart-no-final-boundary.email')
-        attributes = MailHandler.get_attachment_attributes(mail)
-        attributes.length.should == 1
-        attributes[0][:body].should match(/This is an acknowledgement of your email/)
-        attributes[0][:content_type].should == "text/html"
-        attributes[0][:url_part_number].should == 1
+        pending do
+            mail = get_fixture_mail('multipart-no-final-boundary.email')
+            attributes = MailHandler.get_attachment_attributes(mail)
+            attributes.length.should == 1
+            attributes[0][:body].should match(/This is an acknowledgement of your email/)
+            attributes[0][:content_type].should == "text/html"
+            attributes[0][:url_part_number].should == 1
+        end
     end
 
     it 'should ignore a TNEF attachment with no usable contents' do
-- 
cgit v1.2.3


From 6530624e51a5a7c6c6cc870cf1e9114c7da6396d Mon Sep 17 00:00:00 2001
From: Louise Crow <louise.crow@gmail.com>
Date: Thu, 16 May 2013 16:04:16 +0100
Subject: Add failing test for case where a mail part has a content-type header
 with no charset field.

---
 spec/lib/mail_handler/mail_handler_spec.rb | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 5e00fda16..c49e2ea07 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -290,6 +290,12 @@ end
 
 describe 'when getting attachment attributes' do
 
+    it 'should handle a mail with a non-multipart part with no charset in the Content-Type header' do
+        mail = get_fixture_mail('part-without-charset-in-content-type.email')
+        attributes = MailHandler.get_attachment_attributes(mail)
+        attributes.size.should == 2
+    end
+
     it 'should get two attachment parts from a multipart mail with text and html alternatives
     and an image' do
         mail = get_fixture_mail('quoted-subject-iso8859-1.email')
-- 
cgit v1.2.3


From 46e7df935929793fafb6069fbd272f5a35752e89 Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Fri, 17 May 2013 11:48:14 +0100
Subject: Cope with emails with a missing final MIME boundary

The Mail gem deals with multipart messages that look as if
they should have 1 part but are missing the final MIME boundary,
by make the parts list empty and setting part.body to the
text of the email.  Rather than throwing an exception in this
case, we just pretend that part is text/plain and return it, so
that the page doesn't error and we still have a chance of some
useful text being displayed.

Note that we haven't investigated yet the case of emails that
have more than one start boundary, but no final boundary.

Fixes #921
---
 spec/lib/mail_handler/mail_handler_spec.rb | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index c49e2ea07..d4f5737bb 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -352,14 +352,12 @@ describe 'when getting attachment attributes' do
     end
 
     it 'should cope with a missing final MIME boundary' do
-        pending do
-            mail = get_fixture_mail('multipart-no-final-boundary.email')
-            attributes = MailHandler.get_attachment_attributes(mail)
-            attributes.length.should == 1
-            attributes[0][:body].should match(/This is an acknowledgement of your email/)
-            attributes[0][:content_type].should == "text/html"
-            attributes[0][:url_part_number].should == 1
-        end
+        mail = get_fixture_mail('multipart-no-final-boundary.email')
+        attributes = MailHandler.get_attachment_attributes(mail)
+        attributes.length.should == 1
+        attributes[0][:body].should match(/This is an acknowledgement of your email/)
+        attributes[0][:content_type].should == "text/plain"
+        attributes[0][:url_part_number].should == 1
     end
 
     it 'should ignore a TNEF attachment with no usable contents' do
-- 
cgit v1.2.3


From 6e64eb8fd3a346c24990553f294fb9d1f0ae6bbc Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Tue, 21 May 2013 17:03:08 +0100
Subject: Retain old handling of malformed addresses in To and Cc lines

The behaviour of the TMail backend's 'to' and 'cc' methods
where there was a malformed To: or Cc: line was to return
nil, whereas Mail returns a version of the string anyway.
We'd have to change quite a lot of code to deal with an
extra possible class of returned objects, so it's simplest
for the moment to monkey-patch Mail::Message's 'to' and 'cc'
methods to restore the old behaviour.
---
 spec/lib/mail_handler/mail_handler_spec.rb | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index d4f5737bb..01bf179f8 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -20,6 +20,12 @@ describe 'when creating a mail object from raw data' do
         mail.to.should == ["request-66666-caa77777@whatdotheyknow.com", "foi@example.com"]
     end
 
+    it 'should return nil for malformed To: and Cc: lines' do
+        mail = get_fixture_mail('malformed-to-and-cc.email')
+        mail.to.should == nil
+        mail.cc.should == nil
+    end
+
     it 'should convert an iso8859 email to utf8' do
         mail = get_fixture_mail('iso8859_2_raw_email.email')
         mail.subject.should match /gjatë/u
-- 
cgit v1.2.3


From d5725cac044cc46245edc209e7c61c717e0d23db Mon Sep 17 00:00:00 2001
From: Mark Longair <mhl@pobox.com>
Date: Mon, 3 Jun 2013 15:11:05 +0100
Subject: Fix for subject lines with invalid UTF-8 as the last character

This seems to be the bug mentioned here:

  http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/

That explains that some versions of Iconv don't ignore
invalid characters when converting to UTF-8 even with
//IGNORE if that invalid character happens to be at the end
of the string.  In fact, as Matthew Somerville pointed out,
with some versions of iconv (e.g. 1.14 on Mac OS, apparently)
it's necessary to add and remove more than one space at the end,
in case the first character of the byte sequence indicates a
long sequence.  We add and remove 4 to be on the safe side.
---
 spec/lib/mail_handler/mail_handler_spec.rb | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 01bf179f8..fde21b0a7 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -32,6 +32,19 @@ describe 'when creating a mail object from raw data' do
         MailHandler.get_part_body(mail).is_utf8?.should == true
     end
 
+    it 'should not be confused by subject lines with malformed UTF-8 at the end' do
+        # The base64 subject line was generated with:
+        #   printf "hello\360" | base64
+        # ... and wrapping the result in '=?UTF-8?B?' and '?='
+        mail = get_fixture_mail('subject-bad-utf-8-trailing-base64.email')
+        mail.subject.should == 'hello'
+        # The quoted printable subject line was generated with:
+        #   printf "hello\360" | qprint -b -e
+        # ... and wrapping the result in '=?UTF-8?Q?' and '?='
+        mail = get_fixture_mail('subject-bad-utf-8-trailing-quoted-printable.email')
+        mail.subject.should == 'hello'
+    end
+
     it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do
         mail = get_fixture_mail('mislabelled-as-iso-8859-1.email')
         body = MailHandler.get_part_body(mail)
-- 
cgit v1.2.3


From e503bf89c973dad5bdbffb3e2ec4d15cf063bf91 Mon Sep 17 00:00:00 2001
From: Louise Crow <louise.crow@gmail.com>
Date: Mon, 3 Jun 2013 13:10:46 +0100
Subject: Parse the 'to' address as if on a real mail to trigger quoted string
 encoding.

---
 spec/lib/mail_handler/mail_handler_spec.rb | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'spec/lib')

diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index fde21b0a7..272b56d0b 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -478,3 +478,11 @@ describe 'when getting attachment attributes' do
         end
     end
 end
+
+describe 'when getting the address part from an address string' do
+
+    it 'should handle non-ascii characters in the name input' do
+        address = "\"Someone’s name\" <test@example.com>"
+        MailHandler.address_from_string(address).should == 'test@example.com'
+    end
+end
-- 
cgit v1.2.3