diff options
author | Seb Bacon <seb.bacon@gmail.com> | 2011-08-12 16:06:28 +0100 |
---|---|---|
committer | Seb Bacon <seb.bacon@gmail.com> | 2011-08-12 16:11:20 +0100 |
commit | dbac4121cae0d620c7c11c6731fe5344ff1c677e (patch) | |
tree | ca893bc09c9e09e93e1ed400495ba9c73333419a | |
parent | 7705f6754184d471fb5da38c5d8671a7737bcdd6 (diff) |
Force elinks to assume UTF8 character set for its input (used when making plain text versions of HTML email)
-rw-r--r-- | app/models/incoming_message.rb | 2 | ||||
-rw-r--r-- | spec/models/incoming_message_spec.rb | 7 |
2 files changed, 8 insertions, 1 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 3d128d8cd..b0b02fdda 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -1135,7 +1135,7 @@ class IncomingMessage < ActiveRecord::Base elsif content_type == 'text/html' # lynx wordwraps links in its output, which then don't get formatted properly # by Alaveteli. We use elinks instead, which doesn't do that. - external_command("/usr/bin/elinks", "-dump-charset", "utf-8", "-force-html", "-dump", + external_command("/usr/bin/elinks", "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump", tempfile.path, :append_to => text) elsif content_type == 'application/vnd.ms-excel' # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb index ad7aa8d43..183a258af 100644 --- a/spec/models/incoming_message_spec.rb +++ b/spec/models/incoming_message_spec.rb @@ -19,6 +19,13 @@ describe IncomingMessage, " when dealing with incoming mail" do end +describe IncomingMessage, "when parsing HTML mail" do + it "should display UTF-8 characters in the plain text version correctly" do + html = "<html><b>foo</b> është" + plain_text = IncomingMessage._get_attachment_text_internal_one_file('text/html', html) + plain_text.should match(/është/) + end +end describe IncomingMessage, "when getting the attachment text" do |