aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/raw_email.rb19
-rw-r--r--app/views/admin_raw_email/show.html.erb5
-rw-r--r--config/initializers/alaveteli.rb2
-rw-r--r--lib/acts_as_xapian/acts_as_xapian.rb5
-rw-r--r--spec/factories/raw_emails.rb2
-rw-r--r--spec/lib/acts_as_xapian_spec.rb110
-rw-r--r--spec/models/raw_email_spec.rb70
-rw-r--r--spec/models/xapian_spec.rb71
8 files changed, 178 insertions, 106 deletions
diff --git a/app/models/raw_email.rb b/app/models/raw_email.rb
index 2a52921f0..58ae29a3b 100644
--- a/app/models/raw_email.rb
+++ b/app/models/raw_email.rb
@@ -40,11 +40,26 @@ class RawEmail < ActiveRecord::Base
def data=(d)
FileUtils.mkdir_p(directory) unless File.exists?(directory)
- File.atomic_write(filepath) { |file| file.write(d) }
+ File.atomic_write(filepath) do |file|
+ file.binmode
+ file.write(d)
+ end
end
def data
- File.open(filepath, "r").read
+ File.open(filepath, "rb").read
+ end
+
+ def data_as_text
+ text = data
+ if text.respond_to?(:encoding)
+ text = text.encode("UTF-8", :invalid => :replace,
+ :undef => :replace,
+ :replace => "")
+ else
+ text = Iconv.conv('UTF-8//IGNORE', 'UTF-8', text)
+ end
+ text
end
def destroy_file_representation!
diff --git a/app/views/admin_raw_email/show.html.erb b/app/views/admin_raw_email/show.html.erb
index f88b00ef0..1de719544 100644
--- a/app/views/admin_raw_email/show.html.erb
+++ b/app/views/admin_raw_email/show.html.erb
@@ -59,5 +59,8 @@
<p><%= link_to "Download", admin_raw_email_path(@raw_email, :format => 'txt') %></p>
-<pre><%=h(@raw_email.data).gsub(/\n/, '<br>').html_safe %></pre>
+<h2>Preview</h2>
+
+For an exact rendering of this email, use the "Download" link.
+<pre><%=h(@raw_email.data_as_text).gsub(/\n/, '<br>').html_safe %></pre>
diff --git a/config/initializers/alaveteli.rb b/config/initializers/alaveteli.rb
index cda163a9b..7c3b76b43 100644
--- a/config/initializers/alaveteli.rb
+++ b/config/initializers/alaveteli.rb
@@ -11,7 +11,7 @@ load "debug_helpers.rb"
load "util.rb"
# Application version
-ALAVETELI_VERSION = '0.21.0.32'
+ALAVETELI_VERSION = '0.21.0.33'
# Add new inflection rules using the following format
# (all these examples are active by default):
diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb
index 565212904..e0c7c6ae7 100644
--- a/lib/acts_as_xapian/acts_as_xapian.rb
+++ b/lib/acts_as_xapian/acts_as_xapian.rb
@@ -379,7 +379,10 @@ module ActsAsXapian
if correction.empty?
return nil
end
- return correction
+ if correction.respond_to?(:force_encoding)
+ correction = correction.force_encoding('UTF-8')
+ end
+ correction
end
# Return array of models found
diff --git a/spec/factories/raw_emails.rb b/spec/factories/raw_emails.rb
index a2b6496e8..a6e3c21ac 100644
--- a/spec/factories/raw_emails.rb
+++ b/spec/factories/raw_emails.rb
@@ -1,6 +1,4 @@
# -*- encoding : utf-8 -*-
FactoryGirl.define do
-
factory :raw_email
-
end
diff --git a/spec/lib/acts_as_xapian_spec.rb b/spec/lib/acts_as_xapian_spec.rb
new file mode 100644
index 000000000..1d9256441
--- /dev/null
+++ b/spec/lib/acts_as_xapian_spec.rb
@@ -0,0 +1,110 @@
+# -*- encoding : utf-8 -*-
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+
+describe ActsAsXapian::Search do
+
+ describe "#words_to_highlight" do
+
+ before :all do
+ # make sure an index exists
+ @alice = FactoryGirl.create(:public_body, :name => 'alice')
+ ActsAsXapian.update_index
+ end
+
+ after :all do
+ @alice.destroy
+ ActsAsXapian.update_index
+ end
+
+ it "should return a list of words used in the search" do
+ s = ActsAsXapian::Search.new([PublicBody], "albatross words", :limit => 100)
+ s.words_to_highlight.should == ["albatross", "word"]
+ end
+
+ it "should remove any operators" do
+ s = ActsAsXapian::Search.new([PublicBody], "albatross words tag:mice", :limit => 100)
+ s.words_to_highlight.should == ["albatross", "word"]
+ end
+
+ it "should separate punctuation" do
+ s = ActsAsXapian::Search.new([PublicBody], "The doctor's patient", :limit => 100)
+ s.words_to_highlight.should == ["the", "doctor", "patient"].sort
+ end
+
+ it "should handle non-ascii characters" do
+ s = ActsAsXapian::Search.new([PublicBody], "adatigénylés words tag:mice", :limit => 100)
+ s.words_to_highlight.should == ["adatigénylé", "word"]
+ end
+
+ it "should ignore stopwords" do
+ s = ActsAsXapian::Search.new([PublicBody], "department of humpadinking", :limit => 100)
+ s.words_to_highlight.should_not include('of')
+ end
+
+ it "uses stemming" do
+ s = ActsAsXapian::Search.new([PublicBody], 'department of humpadinking', :limit => 100)
+ s.words_to_highlight.should == ["depart", "humpadink"]
+ end
+
+ it "doesn't stem proper nouns" do
+ s = ActsAsXapian::Search.new([PublicBody], 'department of Humpadinking', :limit => 1)
+ s.words_to_highlight.should == ["depart", "humpadinking"]
+ end
+
+ it "includes the original search terms if requested" do
+ s = ActsAsXapian::Search.new([PublicBody], 'boring', :limit => 1)
+ s.words_to_highlight(:include_original => true).should == ['bore', 'boring']
+ end
+
+ it "does not return duplicate terms" do
+ s = ActsAsXapian::Search.new([PublicBody], 'boring boring', :limit => 1)
+ s.words_to_highlight.should == ['bore']
+ end
+
+ context 'the :regex option' do
+
+ it 'wraps each words in a regex that matches the full word' do
+ expected = [/\b(albatross)\b/iu]
+ s = ActsAsXapian::Search.new([PublicBody], 'Albatross', :limit => 1)
+ s.words_to_highlight(:regex => true).should == expected
+ end
+
+ it 'wraps each stem in a regex' do
+ expected = [/\b(depart)\w*\b/iu]
+ s = ActsAsXapian::Search.new([PublicBody], 'department', :limit => 1)
+ s.words_to_highlight(:regex => true).should == expected
+ end
+
+ end
+ end
+
+ describe :spelling_correction do
+
+ before :all do
+ @alice = FactoryGirl.create(:public_body, :name => 'alice')
+ @bob = FactoryGirl.create(:public_body, :name => 'bôbby')
+ ActsAsXapian.update_index
+ end
+
+ after :all do
+ @alice.destroy
+ @bob.destroy
+ ActsAsXapian.update_index
+ end
+
+ it 'returns a UTF-8 encoded string' do
+ s = ActsAsXapian::Search.new([PublicBody], "alece", :limit => 100)
+ s.spelling_correction.should == "alice"
+ if s.spelling_correction.respond_to? :encoding
+ s.spelling_correction.encoding.to_s.should == 'UTF-8'
+ end
+ end
+
+ it 'handles non-ASCII characters' do
+ s = ActsAsXapian::Search.new([PublicBody], "bobby", :limit => 100)
+ s.spelling_correction.should == "bôbby"
+ end
+
+ end
+
+end \ No newline at end of file
diff --git a/spec/models/raw_email_spec.rb b/spec/models/raw_email_spec.rb
index 044c89d3f..8e0d3b457 100644
--- a/spec/models/raw_email_spec.rb
+++ b/spec/models/raw_email_spec.rb
@@ -8,37 +8,51 @@
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
-describe User, "manipulating a raw email" do
- before do
- @raw_email = RawEmail.new
- incoming_message = mock_model(IncomingMessage)
- info_request = mock_model(InfoRequest)
- incoming_message.stub!(:info_request).and_return(info_request)
- @raw_email.stub!(:incoming_message).and_return(incoming_message)
+describe RawEmail do
+
+ def roundtrip_data(raw_email, data)
+ raw_email.data = data
+ raw_email.save!
+ raw_email.reload
+ raw_email.data
end
- it 'putting data in comes back out' do
- @raw_email.data = "Hello, world!"
- @raw_email.save!
- @raw_email.reload
- @raw_email.data.should == "Hello, world!"
+ describe :data do
+
+ it 'roundtrips data unchanged' do
+ raw_email = FactoryGirl.create(:incoming_message).raw_email
+ data = roundtrip_data(raw_email, "Hello, world!")
+ data.should == "Hello, world!"
+ end
+
+ it 'returns an unchanged binary string with a valid encoding if the data is non-ascii and non-utf-8' do
+ raw_email = FactoryGirl.create(:incoming_message).raw_email
+ data = roundtrip_data(raw_email, "\xA0")
+
+ if data.respond_to?(:encoding)
+ data.encoding.to_s.should == 'ASCII-8BIT'
+ data.valid_encoding?.should be_true
+ data = data.force_encoding('UTF-8')
+ end
+ data.should == "\xA0"
+ end
+
end
- # TODO: this test fails, hopefully will be fixed in later Rails.
- # Doesn't matter too much for us for storing raw_emails, it would seem,
- # but keep an eye out.
-
- # This is testing a bug in Rails PostgreSQL code
- # http://blog.aradine.com/2009/09/rubys-marshal-and-activerecord-and.html
- # https://rails.lighthouseapp.com/projects/8994/tickets/1063-binary-data-broken-with-postgresql-adapter
-# it 'putting data in comes back out even if it has a backslash in it' do
-# @raw_email.data = "This \\ that"
-# @raw_email.save!
-# @raw_email.reload
-# $stderr.puts @raw_email.data
-# $stderr.puts "This \\ that"
-# @raw_email.data.should == "This \\ that"
-# end
+ describe :data_as_text do
+
+ it 'returns a utf-8 string with a valid encoding if the data is non-ascii and non-utf8' do
+ raw_email = FactoryGirl.create(:incoming_message).raw_email
+ roundtrip_data(raw_email, "\xA0ccc")
+ data_as_text = raw_email.data_as_text
+ data_as_text.should == "ccc"
+ if data_as_text.respond_to?(:encoding)
+ data_as_text.encoding.to_s.should == 'UTF-8'
+ data_as_text.valid_encoding?.should be_true
+ end
+ end
+
+ end
end
-
+
diff --git a/spec/models/xapian_spec.rb b/spec/models/xapian_spec.rb
index b3f2e2b3c..212a1cc7e 100644
--- a/spec/models/xapian_spec.rb
+++ b/spec/models/xapian_spec.rb
@@ -370,77 +370,6 @@ describe PublicBody, " when only indexing selected things on a rebuild" do
end
end
-# I would expect ActsAsXapian to have some tests under lib/acts_as_xapian, but
-# it looks like this is not the case. Putting a test here instead.
-describe ActsAsXapian::Search, "#words_to_highlight" do
- before(:each) do
- load_raw_emails_data
- get_fixtures_xapian_index
- end
-
- it "should return a list of words used in the search" do
- s = ActsAsXapian::Search.new([PublicBody], "albatross words", :limit => 100)
- s.words_to_highlight.should == ["albatross", "word"]
- end
-
- it "should remove any operators" do
- s = ActsAsXapian::Search.new([PublicBody], "albatross words tag:mice", :limit => 100)
- s.words_to_highlight.should == ["albatross", "word"]
- end
-
- it "should separate punctuation" do
- s = ActsAsXapian::Search.new([PublicBody], "The doctor's patient", :limit => 100)
- s.words_to_highlight.should == ["the", "doctor", "patient"].sort
- end
-
- it "should handle non-ascii characters" do
- s = ActsAsXapian::Search.new([PublicBody], "adatigénylés words tag:mice", :limit => 100)
- s.words_to_highlight.should == ["adatigénylé", "word"]
- end
-
- it "should ignore stopwords" do
- s = ActsAsXapian::Search.new([PublicBody], "department of humpadinking", :limit => 100)
- s.words_to_highlight.should_not include('of')
- end
-
- it "uses stemming" do
- s = ActsAsXapian::Search.new([PublicBody], 'department of humpadinking', :limit => 100)
- s.words_to_highlight.should == ["depart", "humpadink"]
- end
-
- it "doesn't stem proper nouns" do
- s = ActsAsXapian::Search.new([PublicBody], 'department of Humpadinking', :limit => 1)
- s.words_to_highlight.should == ["depart", "humpadinking"]
- end
-
- it "includes the original search terms if requested" do
- s = ActsAsXapian::Search.new([PublicBody], 'boring', :limit => 1)
- s.words_to_highlight(:include_original => true).should == ['bore', 'boring']
- end
-
- it "does not return duplicate terms" do
- s = ActsAsXapian::Search.new([PublicBody], 'boring boring', :limit => 1)
- s.words_to_highlight.should == ['bore']
- end
-
- context 'the :regex option' do
-
- it 'wraps each words in a regex that matches the full word' do
- expected = [/\b(albatross)\b/iu]
- s = ActsAsXapian::Search.new([PublicBody], 'Albatross', :limit => 1)
- s.words_to_highlight(:regex => true).should == expected
- end
-
- it 'wraps each stem in a regex' do
- expected = [/\b(depart)\w*\b/iu]
- s = ActsAsXapian::Search.new([PublicBody], 'department', :limit => 1)
- s.words_to_highlight(:regex => true).should == expected
- end
-
- end
-
-end
-
describe InfoRequestEvent, " when faced with a race condition during xapian_mark_needs_index" do
before(:each) do