aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/ruby-msg/lib
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/ruby-msg/lib')
-rw-r--r--vendor/ruby-msg/lib/mapi.rb109
-rw-r--r--vendor/ruby-msg/lib/mapi/convert.rb61
-rw-r--r--vendor/ruby-msg/lib/mapi/convert/contact.rb142
-rw-r--r--vendor/ruby-msg/lib/mapi/convert/note-mime.rb274
-rw-r--r--vendor/ruby-msg/lib/mapi/convert/note-tmail.rb287
-rw-r--r--vendor/ruby-msg/lib/mapi/msg.rb440
-rw-r--r--vendor/ruby-msg/lib/mapi/property_set.rb269
-rw-r--r--vendor/ruby-msg/lib/mapi/pst.rb1806
-rw-r--r--vendor/ruby-msg/lib/mapi/rtf.rb169
-rw-r--r--vendor/ruby-msg/lib/mapi/types.rb51
-rw-r--r--vendor/ruby-msg/lib/mime.rb165
-rw-r--r--vendor/ruby-msg/lib/orderedhash.rb218
-rwxr-xr-xvendor/ruby-msg/lib/rtf.rb109
13 files changed, 0 insertions, 4100 deletions
diff --git a/vendor/ruby-msg/lib/mapi.rb b/vendor/ruby-msg/lib/mapi.rb
deleted file mode 100644
index b9d3413f7..000000000
--- a/vendor/ruby-msg/lib/mapi.rb
+++ /dev/null
@@ -1,109 +0,0 @@
-require 'mapi/types'
-require 'mapi/property_set'
-
-module Mapi
- VERSION = '1.4.0'
-
- #
- # Mapi::Item is the base class used for all mapi objects, and is purely a
- # property set container
- #
- class Item
- attr_reader :properties
- alias props properties
-
- # +properties+ should be a PropertySet instance.
- def initialize properties
- @properties = properties
- end
- end
-
- # a general attachment class. is subclassed by Msg and Pst attachment classes
- class Attachment < Item
- def filename
- props.attach_long_filename || props.attach_filename
- end
-
- def data
- @embedded_msg || @embedded_ole || props.attach_data
- end
-
- # with new stream work, its possible to not have the whole thing in memory at one time,
- # just to save an attachment
- #
- # a = msg.attachments.first
- # a.save open(File.basename(a.filename || 'attachment'), 'wb')
- def save io
- raise "can only save binary data blobs, not ole dirs" if @embedded_ole
- data.each_read { |chunk| io << chunk }
- end
-
- def inspect
- "#<#{self.class.to_s[/\w+$/]}" +
- (filename ? " filename=#{filename.inspect}" : '') +
- (@embedded_ole ? " embedded_type=#{@embedded_ole.embedded_type.inspect}" : '') + ">"
- end
- end
-
- class Recipient < Item
- # some kind of best effort guess for converting to standard mime style format.
- # there are some rules for encoding non 7bit stuff in mail headers. should obey
- # that here, as these strings could be unicode
- # email_address will be an EX:/ address (X.400?), unless external recipient. the
- # other two we try first.
- # consider using entry id for this too.
- def name
- name = props.transmittable_display_name || props.display_name
- # dequote
- name[/^'(.*)'/, 1] or name rescue nil
- end
-
- def email
- props.smtp_address || props.org_email_addr || props.email_address
- end
-
- RECIPIENT_TYPES = { 0 => :orig, 1 => :to, 2 => :cc, 3 => :bcc }
- def type
- RECIPIENT_TYPES[props.recipient_type]
- end
-
- def to_s
- if name = self.name and !name.empty? and email && name != email
- %{"#{name}" <#{email}>}
- else
- email || name
- end
- end
-
- def inspect
- "#<#{self.class.to_s[/\w+$/]}:#{self.to_s.inspect}>"
- end
- end
-
- # i refer to it as a message (as does mapi), although perhaps Item is better, as its a more general
- # concept than a message, as used in Pst files. though maybe i'll switch to using
- # Mapi::Object as the base class there.
- #
- # IMessage essentially, but there's also stuff like IMAPIFolder etc. so, for this to form
- # basis for PST Item, it'd need to be more general.
- class Message < Item
- # these 2 collections should be provided by our subclasses
- def attachments
- raise NotImplementedError
- end
-
- def recipients
- raise NotImplementedError
- end
-
- def inspect
- str = %w[message_class from to subject].map do |key|
- " #{key}=#{props.send(key).inspect}"
- end.compact.join
- str << " recipients=#{recipients.inspect}"
- str << " attachments=#{attachments.inspect}"
- "#<#{self.class.to_s[/\w+$/]}#{str}>"
- end
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/convert.rb b/vendor/ruby-msg/lib/mapi/convert.rb
deleted file mode 100644
index 4c7a0d298..000000000
--- a/vendor/ruby-msg/lib/mapi/convert.rb
+++ /dev/null
@@ -1,61 +0,0 @@
-# we have two different "backends" for note conversion. we're sticking with
-# the current (home grown) mime one until the tmail version is suitably
-# polished.
-require 'mapi/convert/note-mime'
-require 'mapi/convert/contact'
-
-module Mapi
- class Message
- CONVERSION_MAP = {
- 'text/x-vcard' => [:to_vcard, 'vcf'],
- 'message/rfc822' => [:to_mime, 'eml'],
- 'text/plain' => [:to_post, 'txt']
- # ...
- }
-
- # get the mime type of the message.
- def mime_type
- case props.message_class #.downcase <- have a feeling i saw other cased versions
- when 'IPM.Contact'
- # apparently "text/directory; profile=vcard" is what you're supposed to use
- 'text/x-vcard'
- when 'IPM.Note'
- 'message/rfc822'
- when 'IPM.Post'
- 'text/plain'
- when 'IPM.StickyNote'
- 'text/plain' # hmmm....
- else
- Mapi::Log.warn 'unknown message_class - %p' % props.message_class
- nil
- end
- end
-
- def convert
- type = mime_type
- unless pair = CONVERSION_MAP[type]
- raise 'unable to convert message with mime type - %p' % type
- end
- send pair.first
- end
-
- # should probably be moved to mapi/convert/post
- class Post
- # not really sure what the pertinent properties are. we just do nothing for now...
- def initialize message
- @message = message
- end
-
- def to_s
- # should maybe handle other types, like html body. need a better format for post
- # probably anyway, cause a lot of meta data is getting chucked.
- @message.props.body
- end
- end
-
- def to_post
- Post.new self
- end
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/convert/contact.rb b/vendor/ruby-msg/lib/mapi/convert/contact.rb
deleted file mode 100644
index 838ae6498..000000000
--- a/vendor/ruby-msg/lib/mapi/convert/contact.rb
+++ /dev/null
@@ -1,142 +0,0 @@
-require 'rubygems'
-require 'vpim/vcard'
-
-# patch Vpim. TODO - fix upstream, or verify old behaviour was ok
-def Vpim.encode_text v
- # think the regexp was wrong
- v.to_str.gsub(/(.)/m) do
- case $1
- when "\n"
- "\\n"
- when "\\", ",", ";"
- "\\#{$1}"
- else
- $1
- end
- end
-end
-
-module Mapi
- class Message
- class VcardConverter
- include Vpim
-
- # a very incomplete mapping, but its a start...
- # can't find where to set a lot of stuff, like zipcode, jobtitle etc
- VCARD_MAP = {
- # these are all standard mapi properties
- :name => [
- {
- :given => :given_name,
- :family => :surname,
- :fullname => :subject
- }
- ],
- # outlook seems to eschew the mapi properties this time,
- # like postal_address, street_address, home_address_city
- # so we use the named properties
- :addr => [
- {
- :location => 'work',
- :street => :business_address_street,
- :locality => proc do |props|
- [props.business_address_city, props.business_address_state].compact * ', '
- end
- }
- ],
-
- # right type? maybe date
- :birthday => :birthday,
- :nickname => :nickname
-
- # photo available?
- # FIXME finish, emails, telephones etc
- }
-
- attr_reader :msg
- def initialize msg
- @msg = msg
- end
-
- def field name, *args
- DirectoryInfo::Field.create name, Vpim.encode_text_list(args)
- end
-
- def get_property key
- if String === key
- return key
- elsif key.respond_to? :call
- value = key.call msg.props
- else
- value = msg.props[key]
- end
- if String === value and value.empty?
- nil
- else
- value
- end
- end
-
- def get_properties hash
- constants = {}
- others = {}
- hash.each do |to, from|
- if String === from
- constants[to] = from
- else
- value = get_property from
- others[to] = value if value
- end
- end
- return nil if others.empty?
- others.merge constants
- end
-
- def convert
- Vpim::Vcard::Maker.make2 do |m|
- # handle name
- [:name, :addr].each do |type|
- VCARD_MAP[type].each do |hash|
- next unless props = get_properties(hash)
- m.send "add_#{type}" do |n|
- props.each { |key, value| n.send "#{key}=", value }
- end
- end
- end
-
- (VCARD_MAP.keys - [:name, :addr]).each do |key|
- value = get_property VCARD_MAP[key]
- m.send "#{key}=", value if value
- end
-
- # the rest of the stuff is custom
-
- url = get_property(:webpage) || get_property(:business_home_page)
- m.add_field field('URL', url) if url
- m.add_field field('X-EVOLUTION-FILE-AS', get_property(:file_under)) if get_property(:file_under)
-
- addr = get_property(:email_email_address) || get_property(:email_original_display_name)
- if addr
- m.add_email addr do |e|
- e.format ='x400' unless msg.props.email_addr_type == 'SMTP'
- end
- end
-
- if org = get_property(:company_name)
- m.add_field field('ORG', get_property(:company_name))
- end
-
- # TODO: imaddress
- end
- end
- end
-
- def to_vcard
- #p props.raw.reject { |key, value| key.guid.inspect !~ /00062004-0000-0000-c000-000000000046/ }.
- # map { |key, value| [key.to_sym, value] }.reject { |a, b| b.respond_to? :read }
- #y props.to_h.reject { |a, b| b.respond_to? :read }
- VcardConverter.new(self).convert
- end
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/convert/note-mime.rb b/vendor/ruby-msg/lib/mapi/convert/note-mime.rb
deleted file mode 100644
index deb035f2c..000000000
--- a/vendor/ruby-msg/lib/mapi/convert/note-mime.rb
+++ /dev/null
@@ -1,274 +0,0 @@
-require 'base64'
-require 'mime'
-require 'time'
-
-# there is still some Msg specific stuff in here.
-
-module Mapi
- class Message
- def mime
- return @mime if @mime
- # if these headers exist at all, they can be helpful. we may however get a
- # application/ms-tnef mime root, which means there will be little other than
- # headers. we may get nothing.
- # and other times, when received from external, we get the full cigar, boundaries
- # etc and all.
- # sometimes its multipart, with no boundaries. that throws an error. so we'll be more
- # forgiving here
- @mime = SimpleMime.new props.transport_message_headers.to_s, true
- populate_headers
- @mime
- end
-
- def headers
- mime.headers
- end
-
- # copy data from msg properties storage to standard mime. headers
- # i've now seen it where the existing headers had heaps on stuff, and the msg#props had
- # practically nothing. think it was because it was a tnef - msg conversion done by exchange.
- def populate_headers
- # construct a From value
- # should this kind of thing only be done when headers don't exist already? maybe not. if its
- # sent, then modified and saved, the headers could be wrong?
- # hmmm. i just had an example where a mail is sent, from an internal user, but it has transport
- # headers, i think because one recipient was external. the only place the senders email address
- # exists is in the transport headers. so its maybe not good to overwrite from.
- # recipients however usually have smtp address available.
- # maybe we'll do it for all addresses that are smtp? (is that equivalent to
- # sender_email_address !~ /^\//
- name, email = props.sender_name, props.sender_email_address
- if props.sender_addrtype == 'SMTP'
- headers['From'] = if name and email and name != email
- [%{"#{name}" <#{email}>}]
- else
- [email || name]
- end
- elsif !headers.has_key?('From')
- # some messages were never sent, so that sender stuff isn't filled out. need to find another
- # way to get something
- # what about marking whether we thing the email was sent or not? or draft?
- # for partition into an eventual Inbox, Sent, Draft mbox set?
- # i've now seen cases where this stuff is missing, but exists in transport message headers,
- # so maybe i should inhibit this in that case.
- if email
- # disabling this warning for now
- #Log.warn "* no smtp sender email address available (only X.400). creating fake one"
- # this is crap. though i've specially picked the logic so that it generates the correct
- # email addresses in my case (for my organisation).
- # this user stuff will give valid email i think, based on alias.
- user = name ? name.sub(/(.*), (.*)/, "\\2.\\1") : email[/\w+$/].downcase
- domain = (email[%r{^/O=([^/]+)}i, 1].downcase + '.com' rescue email)
- headers['From'] = [name ? %{"#{name}" <#{user}@#{domain}>} : "<#{user}@#{domain}>" ]
- elsif name
- # we only have a name? thats screwed up.
- # disabling this warning for now
- #Log.warn "* no smtp sender email address available (only name). creating fake one"
- headers['From'] = [%{"#{name}"}]
- else
- # disabling this warning for now
- #Log.warn "* no sender email address available at all. FIXME"
- end
- # else we leave the transport message header version
- end
-
- # for all of this stuff, i'm assigning in utf8 strings.
- # thats ok i suppose, maybe i can say its the job of the mime class to handle that.
- # but a lot of the headers are overloaded in different ways. plain string, many strings
- # other stuff. what happens to a person who has a " in their name etc etc. encoded words
- # i suppose. but that then happens before assignment. and can't be automatically undone
- # until the header is decomposed into recipients.
- recips_by_type = recipients.group_by { |r| r.type }
- # i want to the the types in a specific order.
- [:to, :cc, :bcc].each do |type|
- # don't know why i bother, but if we can, we try to sort recipients by the numerical part
- # of the ole name, or just leave it if we can't
- recips = recips_by_type[type]
- recips = (recips.sort_by { |r| r.obj.name[/\d{8}$/].hex } rescue recips)
- # switched to using , for separation, not ;. see issue #4
- # recips.empty? is strange. i wouldn't have thought it possible, but it was right?
- headers[type.to_s.sub(/^(.)/) { $1.upcase }] = [recips.join(', ')] if recips and !recips.empty?
- end
- headers['Subject'] = [props.subject] if props.subject
-
- # fill in a date value. by default, we won't mess with existing value hear
- if !headers.has_key?('Date')
- # we want to get a received date, as i understand it.
- # use this preference order, or pull the most recent?
- keys = %w[message_delivery_time client_submit_time last_modification_time creation_time]
- time = keys.each { |key| break time if time = props.send(key) }
- time = nil unless Date === time
-
- # now convert and store
- # this is a little funky. not sure about time zone stuff either?
- # actually seems ok. maybe its always UTC and interpreted anyway. or can be timezoneless.
- # i have no timezone info anyway.
- # in gmail, i see stuff like 15 Jan 2007 00:48:19 -0000, and it displays as 11:48.
- # can also add .localtime here if desired. but that feels wrong.
- headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
- end
-
- # some very simplistic mapping between internet message headers and the
- # mapi properties
- # any of these could be causing duplicates due to case issues. the hack in #to_mime
- # just stops re-duplication at that point. need to move some smarts into the mime
- # code to handle it.
- mapi_header_map = [
- [:internet_message_id, 'Message-ID'],
- [:in_reply_to_id, 'In-Reply-To'],
- # don't set these values if they're equal to the defaults anyway
- [:importance, 'Importance', proc { |val| val.to_s == '1' ? nil : val }],
- [:priority, 'Priority', proc { |val| val.to_s == '1' ? nil : val }],
- [:sensitivity, 'Sensitivity', proc { |val| val.to_s == '0' ? nil : val }],
- # yeah?
- [:conversation_topic, 'Thread-Topic'],
- # not sure of the distinction here
- # :originator_delivery_report_requested ??
- [:read_receipt_requested, 'Disposition-Notification-To', proc { |val| from }]
- ]
- mapi_header_map.each do |mapi, mime, *f|
- next unless q = val = props.send(mapi) or headers.has_key?(mime)
- next if f[0] and !(val = f[0].call(val))
- headers[mime] = [val.to_s]
- end
- end
-
- # redundant?
- def type
- props.message_class[/IPM\.(.*)/, 1].downcase rescue nil
- end
-
- # shortcuts to some things from the headers
- %w[From To Cc Bcc Subject].each do |key|
- define_method(key.downcase) { headers[key].join(' ') if headers.has_key?(key) }
- end
-
- def body_to_mime
- # to create the body
- # should have some options about serializing rtf. and possibly options to check the rtf
- # for rtf2html conversion, stripping those html tags or other similar stuff. maybe want to
- # ignore it in the cases where it is generated from incoming html. but keep it if it was the
- # source for html and plaintext.
- if props.body_rtf or props.body_html
- # should plain come first?
- mime = SimpleMime.new "Content-Type: multipart/alternative\r\n\r\n"
- # its actually possible for plain body to be empty, but the others not.
- # if i can get an html version, then maybe a callout to lynx can be made...
- mime.parts << SimpleMime.new("Content-Type: text/plain\r\n\r\n" + props.body) if props.body
- # this may be automatically unwrapped from the rtf if the rtf includes the html
- mime.parts << SimpleMime.new("Content-Type: text/html\r\n\r\n" + props.body_html) if props.body_html
- # temporarily disabled the rtf. its just showing up as an attachment anyway.
- #mime.parts << SimpleMime.new("Content-Type: text/rtf\r\n\r\n" + props.body_rtf) if props.body_rtf
- # its thus currently possible to get no body at all if the only body is rtf. that is not
- # really acceptable FIXME
- mime
- else
- # check no header case. content type? etc?. not sure if my SimpleMime class will accept
- Log.debug "taking that other path"
- # body can be nil, hence the to_s
- SimpleMime.new "Content-Type: text/plain\r\n\r\n" + props.body.to_s
- end
- end
-
- def to_mime
- # intended to be used for IPM.note, which is the email type. can use it for others if desired,
- # YMMV
- Log.warn "to_mime used on a #{props.message_class}" unless props.message_class == 'IPM.Note'
- # we always have a body
- mime = body = body_to_mime
-
- # If we have attachments, we take the current mime root (body), and make it the first child
- # of a new tree that will contain body and attachments.
- unless attachments.empty?
- mime = SimpleMime.new "Content-Type: multipart/mixed\r\n\r\n"
- mime.parts << body
- # i don't know any better way to do this. need multipart/related for inline images
- # referenced by cid: urls to work, but don't want to use it otherwise...
- related = false
- attachments.each do |attach|
- part = attach.to_mime
- related = true if part.headers.has_key?('Content-ID') or part.headers.has_key?('Content-Location')
- mime.parts << part
- end
- mime.headers['Content-Type'] = ['multipart/related'] if related
- end
-
- # at this point, mime is either
- # - a single text/plain, consisting of the body ('taking that other path' above. rare)
- # - a multipart/alternative, consiting of a few bodies (plain and html body. common)
- # - a multipart/mixed, consisting of 1 of the above 2 types of bodies, and attachments.
- # we add this standard preamble if its multipart
- # FIXME preamble.replace, and body.replace both suck.
- # preamble= is doable. body= wasn't being done because body will get rewritten from parts
- # if multipart, and is only there readonly. can do that, or do a reparse...
- # The way i do this means that only the first preamble will say it, not preambles of nested
- # multipart chunks.
- mime.preamble.replace "This is a multi-part message in MIME format.\r\n" if mime.multipart?
-
- # now that we have a root, we can mix in all our headers
- headers.each do |key, vals|
- # don't overwrite the content-type, encoding style stuff
- next if mime.headers.has_key? key
- # some new temporary hacks
- next if key =~ /content-type/i and vals[0] =~ /base64/
- next if mime.headers.keys.map(&:downcase).include? key.downcase
- mime.headers[key] += vals
- end
- # just a stupid hack to make the content-type header last, when using OrderedHash
- mime.headers['Content-Type'] = mime.headers.delete 'Content-Type'
-
- mime
- end
- end
-
- class Attachment
- def to_mime
- # TODO: smarter mime typing.
- mimetype = props.attach_mime_tag || 'application/octet-stream'
- mime = SimpleMime.new "Content-Type: #{mimetype}\r\n\r\n"
- mime.headers['Content-Disposition'] = [%{attachment; filename="#{filename}"}]
- mime.headers['Content-Transfer-Encoding'] = ['base64']
- mime.headers['Content-Location'] = [props.attach_content_location] if props.attach_content_location
- mime.headers['Content-ID'] = [props.attach_content_id] if props.attach_content_id
- # data.to_s for now. data was nil for some reason.
- # perhaps it was a data object not correctly handled?
- # hmmm, have to use read here. that assumes that the data isa stream.
- # but if the attachment data is a string, then it won't work. possible?
- data_str = if @embedded_msg
- mime.headers['Content-Type'] = 'message/rfc822'
- # lets try making it not base64 for now
- mime.headers.delete 'Content-Transfer-Encoding'
- # not filename. rather name, or something else right?
- # maybe it should be inline?? i forget attach_method / access meaning
- mime.headers['Content-Disposition'] = [%{attachment; filename="#{@embedded_msg.subject}"}]
- @embedded_msg.to_mime.to_s
- elsif @embedded_ole
- # kind of hacky
- io = StringIO.new
- Ole::Storage.new io do |ole|
- ole.root.type = :dir
- Ole::Storage::Dirent.copy @embedded_ole, ole.root
- end
- io.string
- else
- # FIXME: shouldn't be required
- data.read.to_s rescue ''
- end
- mime.body.replace @embedded_msg ? data_str : Base64.encode64(data_str).gsub(/\n/, "\r\n")
- mime
- end
- end
-
- class Msg < Message
- def populate_headers
- super
- if !headers.has_key?('Date')
- # can employ other methods for getting a time. heres one in a similar vein to msgconvert.pl,
- # ie taking the time from an ole object
- time = @root.ole.dirents.map { |dirent| dirent.modify_time || dirent.create_time }.compact.sort.last
- headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
- end
- end
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/convert/note-tmail.rb b/vendor/ruby-msg/lib/mapi/convert/note-tmail.rb
deleted file mode 100644
index 9ccc9e0b3..000000000
--- a/vendor/ruby-msg/lib/mapi/convert/note-tmail.rb
+++ /dev/null
@@ -1,287 +0,0 @@
-require 'rubygems'
-require 'tmail'
-
-# these will be removed later
-require 'time'
-require 'mime'
-
-# there is some Msg specific stuff in here.
-
-class TMail::Mail
- def quoted_body= str
- body_port.wopen { |f| f.write str }
- str
- end
-end
-
-module Mapi
- class Message
- def mime
- return @mime if @mime
- # if these headers exist at all, they can be helpful. we may however get a
- # application/ms-tnef mime root, which means there will be little other than
- # headers. we may get nothing.
- # and other times, when received from external, we get the full cigar, boundaries
- # etc and all.
- # sometimes its multipart, with no boundaries. that throws an error. so we'll be more
- # forgiving here
- @mime = Mime.new props.transport_message_headers.to_s, true
- populate_headers
- @mime
- end
-
- def headers
- mime.headers
- end
-
- # copy data from msg properties storage to standard mime. headers
- # i've now seen it where the existing headers had heaps on stuff, and the msg#props had
- # practically nothing. think it was because it was a tnef - msg conversion done by exchange.
- def populate_headers
- # construct a From value
- # should this kind of thing only be done when headers don't exist already? maybe not. if its
- # sent, then modified and saved, the headers could be wrong?
- # hmmm. i just had an example where a mail is sent, from an internal user, but it has transport
- # headers, i think because one recipient was external. the only place the senders email address
- # exists is in the transport headers. so its maybe not good to overwrite from.
- # recipients however usually have smtp address available.
- # maybe we'll do it for all addresses that are smtp? (is that equivalent to
- # sender_email_address !~ /^\//
- name, email = props.sender_name, props.sender_email_address
- if props.sender_addrtype == 'SMTP'
- headers['From'] = if name and email and name != email
- [%{"#{name}" <#{email}>}]
- else
- [email || name]
- end
- elsif !headers.has_key?('From')
- # some messages were never sent, so that sender stuff isn't filled out. need to find another
- # way to get something
- # what about marking whether we thing the email was sent or not? or draft?
- # for partition into an eventual Inbox, Sent, Draft mbox set?
- # i've now seen cases where this stuff is missing, but exists in transport message headers,
- # so maybe i should inhibit this in that case.
- if email
- # disabling this warning for now
- #Log.warn "* no smtp sender email address available (only X.400). creating fake one"
- # this is crap. though i've specially picked the logic so that it generates the correct
- # email addresses in my case (for my organisation).
- # this user stuff will give valid email i think, based on alias.
- user = name ? name.sub(/(.*), (.*)/, "\\2.\\1") : email[/\w+$/].downcase
- domain = (email[%r{^/O=([^/]+)}i, 1].downcase + '.com' rescue email)
- headers['From'] = [name ? %{"#{name}" <#{user}@#{domain}>} : "<#{user}@#{domain}>" ]
- elsif name
- # we only have a name? thats screwed up.
- # disabling this warning for now
- #Log.warn "* no smtp sender email address available (only name). creating fake one"
- headers['From'] = [%{"#{name}"}]
- else
- # disabling this warning for now
- #Log.warn "* no sender email address available at all. FIXME"
- end
- # else we leave the transport message header version
- end
-
- # for all of this stuff, i'm assigning in utf8 strings.
- # thats ok i suppose, maybe i can say its the job of the mime class to handle that.
- # but a lot of the headers are overloaded in different ways. plain string, many strings
- # other stuff. what happens to a person who has a " in their name etc etc. encoded words
- # i suppose. but that then happens before assignment. and can't be automatically undone
- # until the header is decomposed into recipients.
- recips_by_type = recipients.group_by { |r| r.type }
- # i want to the the types in a specific order.
- [:to, :cc, :bcc].each do |type|
- # don't know why i bother, but if we can, we try to sort recipients by the numerical part
- # of the ole name, or just leave it if we can't
- recips = recips_by_type[type]
- recips = (recips.sort_by { |r| r.obj.name[/\d{8}$/].hex } rescue recips)
- # switched to using , for separation, not ;. see issue #4
- # recips.empty? is strange. i wouldn't have thought it possible, but it was right?
- headers[type.to_s.sub(/^(.)/) { $1.upcase }] = [recips.join(', ')] unless recips.empty?
- end
- headers['Subject'] = [props.subject] if props.subject
-
- # fill in a date value. by default, we won't mess with existing value hear
- if !headers.has_key?('Date')
- # we want to get a received date, as i understand it.
- # use this preference order, or pull the most recent?
- keys = %w[message_delivery_time client_submit_time last_modification_time creation_time]
- time = keys.each { |key| break time if time = props.send(key) }
- time = nil unless Date === time
-
- # now convert and store
- # this is a little funky. not sure about time zone stuff either?
- # actually seems ok. maybe its always UTC and interpreted anyway. or can be timezoneless.
- # i have no timezone info anyway.
- # in gmail, i see stuff like 15 Jan 2007 00:48:19 -0000, and it displays as 11:48.
- # can also add .localtime here if desired. but that feels wrong.
- headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
- end
-
- # some very simplistic mapping between internet message headers and the
- # mapi properties
- # any of these could be causing duplicates due to case issues. the hack in #to_mime
- # just stops re-duplication at that point. need to move some smarts into the mime
- # code to handle it.
- mapi_header_map = [
- [:internet_message_id, 'Message-ID'],
- [:in_reply_to_id, 'In-Reply-To'],
- # don't set these values if they're equal to the defaults anyway
- [:importance, 'Importance', proc { |val| val.to_s == '1' ? nil : val }],
- [:priority, 'Priority', proc { |val| val.to_s == '1' ? nil : val }],
- [:sensitivity, 'Sensitivity', proc { |val| val.to_s == '0' ? nil : val }],
- # yeah?
- [:conversation_topic, 'Thread-Topic'],
- # not sure of the distinction here
- # :originator_delivery_report_requested ??
- [:read_receipt_requested, 'Disposition-Notification-To', proc { |val| from }]
- ]
- mapi_header_map.each do |mapi, mime, *f|
- next unless q = val = props.send(mapi) or headers.has_key?(mime)
- next if f[0] and !(val = f[0].call(val))
- headers[mime] = [val.to_s]
- end
- end
-
- # redundant?
- def type
- props.message_class[/IPM\.(.*)/, 1].downcase rescue nil
- end
-
- # shortcuts to some things from the headers
- %w[From To Cc Bcc Subject].each do |key|
- define_method(key.downcase) { headers[key].join(' ') if headers.has_key?(key) }
- end
-
- def body_to_tmail
- # to create the body
- # should have some options about serializing rtf. and possibly options to check the rtf
- # for rtf2html conversion, stripping those html tags or other similar stuff. maybe want to
- # ignore it in the cases where it is generated from incoming html. but keep it if it was the
- # source for html and plaintext.
- if props.body_rtf or props.body_html
- # should plain come first?
- part = TMail::Mail.new
- # its actually possible for plain body to be empty, but the others not.
- # if i can get an html version, then maybe a callout to lynx can be made...
- part.parts << TMail::Mail.parse("Content-Type: text/plain\r\n\r\n" + props.body) if props.body
- # this may be automatically unwrapped from the rtf if the rtf includes the html
- part.parts << TMail::Mail.parse("Content-Type: text/html\r\n\r\n" + props.body_html) if props.body_html
- # temporarily disabled the rtf. its just showing up as an attachment anyway.
- #mime.parts << Mime.new("Content-Type: text/rtf\r\n\r\n" + props.body_rtf) if props.body_rtf
- # its thus currently possible to get no body at all if the only body is rtf. that is not
- # really acceptable FIXME
- part['Content-Type'] = 'multipart/alternative'
- part
- else
- # check no header case. content type? etc?. not sure if my Mime class will accept
- Log.debug "taking that other path"
- # body can be nil, hence the to_s
- TMail::Mail.parse "Content-Type: text/plain\r\n\r\n" + props.body.to_s
- end
- end
-
- def to_tmail
- # intended to be used for IPM.note, which is the email type. can use it for others if desired,
- # YMMV
- Log.warn "to_mime used on a #{props.message_class}" unless props.message_class == 'IPM.Note'
- # we always have a body
- mail = body = body_to_tmail
-
- # If we have attachments, we take the current mime root (body), and make it the first child
- # of a new tree that will contain body and attachments.
- unless attachments.empty?
- raise NotImplementedError
- mime = Mime.new "Content-Type: multipart/mixed\r\n\r\n"
- mime.parts << body
- # i don't know any better way to do this. need multipart/related for inline images
- # referenced by cid: urls to work, but don't want to use it otherwise...
- related = false
- attachments.each do |attach|
- part = attach.to_mime
- related = true if part.headers.has_key?('Content-ID') or part.headers.has_key?('Content-Location')
- mime.parts << part
- end
- mime.headers['Content-Type'] = ['multipart/related'] if related
- end
-
- # at this point, mime is either
- # - a single text/plain, consisting of the body ('taking that other path' above. rare)
- # - a multipart/alternative, consiting of a few bodies (plain and html body. common)
- # - a multipart/mixed, consisting of 1 of the above 2 types of bodies, and attachments.
- # we add this standard preamble if its multipart
- # FIXME preamble.replace, and body.replace both suck.
- # preamble= is doable. body= wasn't being done because body will get rewritten from parts
- # if multipart, and is only there readonly. can do that, or do a reparse...
- # The way i do this means that only the first preamble will say it, not preambles of nested
- # multipart chunks.
- mail.quoted_body = "This is a multi-part message in MIME format.\r\n" if mail.multipart?
-
- # now that we have a root, we can mix in all our headers
- headers.each do |key, vals|
- # don't overwrite the content-type, encoding style stuff
- next if mail[key]
- # some new temporary hacks
- next if key =~ /content-type/i and vals[0] =~ /base64/
- #next if mime.headers.keys.map(&:downcase).include? key.downcase
- mail[key] = vals.first
- end
- # just a stupid hack to make the content-type header last, when using OrderedHash
- #mime.headers['Content-Type'] = mime.headers.delete 'Content-Type'
-
- mail
- end
- end
-
- class Attachment
- def to_tmail
- # TODO: smarter mime typing.
- mimetype = props.attach_mime_tag || 'application/octet-stream'
- part = TMail::Mail.parse "Content-Type: #{mimetype}\r\n\r\n"
- part['Content-Disposition'] = %{attachment; filename="#{filename}"}
- part['Content-Transfer-Encoding'] = 'base64'
- part['Content-Location'] = props.attach_content_location if props.attach_content_location
- part['Content-ID'] = props.attach_content_id if props.attach_content_id
- # data.to_s for now. data was nil for some reason.
- # perhaps it was a data object not correctly handled?
- # hmmm, have to use read here. that assumes that the data isa stream.
- # but if the attachment data is a string, then it won't work. possible?
- data_str = if @embedded_msg
- raise NotImplementedError
- mime.headers['Content-Type'] = 'message/rfc822'
- # lets try making it not base64 for now
- mime.headers.delete 'Content-Transfer-Encoding'
- # not filename. rather name, or something else right?
- # maybe it should be inline?? i forget attach_method / access meaning
- mime.headers['Content-Disposition'] = [%{attachment; filename="#{@embedded_msg.subject}"}]
- @embedded_msg.to_mime.to_s
- elsif @embedded_ole
- raise NotImplementedError
- # kind of hacky
- io = StringIO.new
- Ole::Storage.new io do |ole|
- ole.root.type = :dir
- Ole::Storage::Dirent.copy @embedded_ole, ole.root
- end
- io.string
- else
- data.read.to_s
- end
- part.body = @embedded_msg ? data_str : Base64.encode64(data_str).gsub(/\n/, "\r\n")
- part
- end
- end
-
- class Msg < Message
- def populate_headers
- super
- if !headers.has_key?('Date')
- # can employ other methods for getting a time. heres one in a similar vein to msgconvert.pl,
- # ie taking the time from an ole object
- time = @root.ole.dirents.map { |dirent| dirent.modify_time || dirent.create_time }.compact.sort.last
- headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
- end
- end
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/msg.rb b/vendor/ruby-msg/lib/mapi/msg.rb
deleted file mode 100644
index fc30a9170..000000000
--- a/vendor/ruby-msg/lib/mapi/msg.rb
+++ /dev/null
@@ -1,440 +0,0 @@
-require 'rubygems'
-require 'ole/storage'
-require 'mapi'
-require 'mapi/rtf'
-
-module Mapi
- #
- # = Introduction
- #
- # Primary class interface to the vagaries of .msg files.
- #
- # The core of the work is done by the <tt>Msg::PropertyStore</tt> class.
- #
- class Msg < Message
- #
- # = Introduction
- #
- # A big compononent of +Msg+ files is the property store, which holds
- # all the key/value pairs of properties. The message itself, and all
- # its <tt>Attachment</tt>s and <tt>Recipient</tt>s have an instance of
- # this class.
- #
- # = Storage model
- #
- # Property keys (tags?) can be either simple hex numbers, in the
- # range 0x0000 - 0xffff, or they can be named properties. In fact,
- # properties in the range 0x0000 to 0x7fff are supposed to be the non-
- # named properties, and can be considered to be in the +PS_MAPI+
- # namespace. (correct?)
- #
- # Named properties are serialized in the 0x8000 to 0xffff range,
- # and are referenced as a guid and long/string pair.
- #
- # There are key ranges, which can be used to imply things generally
- # about keys.
- #
- # Further, we can give symbolic names to most keys, coming from
- # constants in various places. Eg:
- #
- # 0x0037 => subject
- # {00062002-0000-0000-C000-000000000046}/0x8218 => response_status
- # # displayed as categories in outlook
- # {00020329-0000-0000-C000-000000000046}/"Keywords" => categories
- #
- # Futher, there are completely different names, coming from other
- # object models that get mapped to these things (CDO's model,
- # Outlook's model etc). Eg "urn:schemas:httpmail:subject"
- # I think these can be ignored though, as they aren't defined clearly
- # in terms of mapi properties, and i'm really just trying to make
- # a mapi property store. (It should also be relatively easy to
- # support them later.)
- #
- # = Usage
- #
- # The api is driven by a desire to have the simple stuff "just work", ie
- #
- # properties.subject
- # properties.display_name
- #
- # There also needs to be a way to look up properties more specifically:
- #
- # properties[0x0037] # => gets the subject
- # properties[0x0037, PS_MAPI] # => still gets the subject
- # properties['Keywords', PS_PUBLIC_STRINGS] # => gets outlook's categories array
- #
- # The abbreviated versions work by "resolving" the symbols to full keys:
- #
- # # the guid here is just PS_PUBLIC_STRINGS
- # properties.resolve :keywords # => #<Key {00020329-0000-0000-c000-000000000046}/"Keywords">
- # # the result here is actually also a key
- # k = properties.resolve :subject # => 0x0037
- # # it has a guid
- # k.guid == Msg::Properties::PS_MAPI # => true
- #
- # = Parsing
- #
- # There are three objects that need to be parsed to load a +Msg+ property store:
- #
- # 1. The +nameid+ directory (<tt>Properties.parse_nameid</tt>)
- # 2. The many +substg+ objects, whose names should match <tt>Properties::SUBSTG_RX</tt>
- # (<tt>Properties#parse_substg</tt>)
- # 3. The +properties+ file (<tt>Properties#parse_properties</tt>)
- #
- # Understanding of the formats is by no means perfect.
- #
- # = TODO
- #
- # * While the key objects are sufficient, the value objects are just plain
- # ruby types. It currently isn't possible to write to the values, or to know
- # which encoding the value had.
- # * Update this doc.
- # * Perhaps change from eager loading, to be load-on-demand.
- #
- class PropertyStore
- include PropertySet::Constants
- Key = PropertySet::Key
-
- # note that binary and default both use obj.open. not the block form. this means we should
- # #close it later, which we don't. as we're only reading though, it shouldn't matter right?
- # not really good though FIXME
- # change these to use mapi symbolic const names
- ENCODINGS = {
- 0x000d => proc { |obj| obj }, # seems to be used when its going to be a directory instead of a file. eg nested ole. 3701 usually. in which case we shouldn't get here right?
- 0x001f => proc { |obj| Ole::Types::FROM_UTF16.iconv obj.read }, # unicode
- # ascii
- # FIXME hack did a[0..-2] before, seems right sometimes, but for some others it chopped the text. chomp
- 0x001e => proc { |obj| obj.read.chomp 0.chr },
- 0x0102 => proc { |obj| obj.open }, # binary?
- :default => proc { |obj| obj.open }
- }
-
- SUBSTG_RX = /^__substg1\.0_([0-9A-F]{4})([0-9A-F]{4})(?:-([0-9A-F]{8}))?$/
- PROPERTIES_RX = /^__properties_version1\.0$/
- NAMEID_RX = /^__nameid_version1\.0$/
- VALID_RX = /#{SUBSTG_RX}|#{PROPERTIES_RX}|#{NAMEID_RX}/
-
- attr_reader :nameid
-
- def initialize
- @nameid = nil
- # not exactly a cache currently
- @cache = {}
- end
-
- #--
- # The parsing methods
- #++
-
- def self.load obj
- prop = new
- prop.load obj
- prop
- end
-
- # Parse properties from the +Dirent+ obj
- def load obj
- # we need to do the nameid first, as it provides the map for later user defined properties
- if nameid_obj = obj.children.find { |child| child.name =~ NAMEID_RX }
- @nameid = PropertyStore.parse_nameid nameid_obj
- # hack to make it available to all msg files from the same ole storage object
- # FIXME - come up with a neater way
- class << obj.ole
- attr_accessor :msg_nameid
- end
- obj.ole.msg_nameid = @nameid
- elsif obj.ole
- @nameid = obj.ole.msg_nameid rescue nil
- end
- # now parse the actual properties. i think dirs that match the substg should be decoded
- # as properties to. 0x000d is just another encoding, the dir encoding. it should match
- # whether the object is file / dir. currently only example is embedded msgs anyway
- obj.children.each do |child|
- next unless child.file?
- case child.name
- when PROPERTIES_RX
- parse_properties child
- when SUBSTG_RX
- parse_substg(*($~[1..-1].map { |num| num.hex rescue nil } + [child]))
- end
- end
- end
-
- # Read nameid from the +Dirent+ obj, which is used for mapping of named properties keys to
- # proxy keys in the 0x8000 - 0xffff range.
- # Returns a hash of integer -> Key.
- def self.parse_nameid obj
- remaining = obj.children.dup
- guids_obj, props_obj, names_obj =
- %w[__substg1.0_00020102 __substg1.0_00030102 __substg1.0_00040102].map do |name|
- remaining.delete obj/name
- end
-
- # parse guids
- # this is the guids for named properities (other than builtin ones)
- # i think PS_PUBLIC_STRINGS, and PS_MAPI are builtin.
- guids = [PS_PUBLIC_STRINGS] + guids_obj.read.scan(/.{16}/mn).map do |str|
- Ole::Types.load_guid str
- end
-
- # parse names.
- # the string ids for named properties
- # they are no longer parsed, as they're referred to by offset not
- # index. they are simply sequentially packed, as a long, giving
- # the string length, then padding to 4 byte multiple, and repeat.
- names_data = names_obj.read
-
- # parse actual props.
- # not sure about any of this stuff really.
- # should flip a few bits in the real msg, to get a better understanding of how this works.
- props = props_obj.read.scan(/.{8}/mn).map do |str|
- flags, offset = str[4..-1].unpack 'v2'
- # the property will be serialised as this pseudo property, mapping it to this named property
- pseudo_prop = 0x8000 + offset
- named = flags & 1 == 1
- prop = if named
- str_off = *str.unpack('V')
- len = *names_data[str_off, 4].unpack('V')
- Ole::Types::FROM_UTF16.iconv names_data[str_off + 4, len]
- else
- a, b = str.unpack('v2')
- Log.debug "b not 0" if b != 0
- a
- end
- # a bit sus
- guid_off = flags >> 1
- # missing a few builtin PS_*
- Log.debug "guid off < 2 (#{guid_off})" if guid_off < 2
- guid = guids[guid_off - 2]
- [pseudo_prop, Key.new(prop, guid)]
- end
-
- #Log.warn "* ignoring #{remaining.length} objects in nameid" unless remaining.empty?
- # this leaves a bunch of other unknown chunks of data with completely unknown meaning.
- # pp [:unknown, child.name, child.data.unpack('H*')[0].scan(/.{16}/m)]
- Hash[*props.flatten]
- end
-
- # Parse an +Dirent+, as per <tt>msgconvert.pl</tt>. This is how larger properties, such
- # as strings, binary blobs, and other ole sub-directories (eg nested Msg) are stored.
- def parse_substg key, encoding, offset, obj
- if (encoding & 0x1000) != 0
- if !offset
- # there is typically one with no offset first, whose data is a series of numbers
- # equal to the lengths of all the sub parts. gives an implied array size i suppose.
- # maybe you can initialize the array at this time. the sizes are the same as all the
- # ole object sizes anyway, its to pre-allocate i suppose.
- #p obj.data.unpack('V*')
- # ignore this one
- return
- else
- # remove multivalue flag for individual pieces
- encoding &= ~0x1000
- end
- else
- Log.warn "offset specified for non-multivalue encoding #{obj.name}" if offset
- offset = nil
- end
- # offset is for multivalue encodings.
- unless encoder = ENCODINGS[encoding]
- Log.warn "unknown encoding #{encoding}"
- #encoder = proc { |obj| obj.io } #.read }. maybe not a good idea
- encoder = ENCODINGS[:default]
- end
- add_property key, encoder[obj], offset
- end
-
- # For parsing the +properties+ file. Smaller properties are serialized in one chunk,
- # such as longs, bools, times etc. The parsing has problems.
- def parse_properties obj
- data = obj.read
- # don't really understand this that well...
- pad = data.length % 16
- unless (pad == 0 || pad == 8) and data[0...pad] == "\000" * pad
- Log.warn "padding was not as expected #{pad} (#{data.length}) -> #{data[0...pad].inspect}"
- end
- data[pad..-1].scan(/.{16}/mn).each do |data|
- property, encoding = ('%08x' % data.unpack('V')).scan /.{4}/
- key = property.hex
- # doesn't make any sense to me. probably because its a serialization of some internal
- # outlook structure...
- next if property == '0000'
- case encoding
- when '0102', '001e', '001f', '101e', '101f', '000d'
- # ignore on purpose. not sure what its for
- # multivalue versions ignored also
- when '0003' # long
- # don't know what all the other data is for
- add_property key, *data[8, 4].unpack('V')
- when '000b' # boolean
- # again, heaps more data than needed. and its not always 0 or 1.
- # they are in fact quite big numbers. this is wrong.
-# p [property, data[4..-1].unpack('H*')[0]]
- add_property key, data[8, 4].unpack('V')[0] != 0
- when '0040' # systime
- # seems to work:
- add_property key, Ole::Types.load_time(data[8..-1])
- else
- #Log.warn "ignoring data in __properties section, encoding: #{encoding}"
- #Log << data.unpack('H*').inspect + "\n"
- end
- end
- end
-
- def add_property key, value, pos=nil
- # map keys in the named property range through nameid
- if Integer === key and key >= 0x8000
- if !@nameid
- Log.warn "no nameid section yet named properties used"
- key = Key.new key
- elsif real_key = @nameid[key]
- key = real_key
- else
- # i think i hit these when i have a named property, in the PS_MAPI
- # guid
- Log.warn "property in named range not in nameid #{key.inspect}"
- key = Key.new key
- end
- else
- key = Key.new key
- end
- if pos
- @cache[key] ||= []
- Log.warn "duplicate property" unless Array === @cache[key]
- # ^ this is actually a trickier problem. the issue is more that they must all be of
- # the same type.
- @cache[key][pos] = value
- else
- # take the last.
- Log.warn "duplicate property #{key.inspect}" if @cache[key]
- @cache[key] = value
- end
- end
-
- # delegate to cache
- def method_missing name, *args, &block
- @cache.send name, *args, &block
- end
- end
-
- # these 2 will actually be of the form
- # 1\.0_#([0-9A-Z]{8}), where $1 is the 0 based index number in hex
- # should i parse that and use it as an index, or just return in
- # file order? probably should use it later...
- ATTACH_RX = /^__attach_version1\.0_.*/
- RECIP_RX = /^__recip_version1\.0_.*/
- VALID_RX = /#{PropertyStore::VALID_RX}|#{ATTACH_RX}|#{RECIP_RX}/
-
- attr_reader :root
- attr_accessor :close_parent
-
- # Alternate constructor, to create an +Msg+ directly from +arg+ and +mode+, passed
- # directly to Ole::Storage (ie either filename or seekable IO object).
- def self.open arg, mode=nil
- msg = new Ole::Storage.open(arg, mode).root
- # we will close the ole when we are #closed
- msg.close_parent = true
- if block_given?
- begin yield msg
- ensure; msg.close
- end
- else msg
- end
- end
-
- # Create an Msg from +root+, an <tt>Ole::Storage::Dirent</tt> object
- def initialize root
- @root = root
- @close_parent = false
- super PropertySet.new(PropertyStore.load(@root))
- Msg.warn_unknown @root
- end
-
- def self.warn_unknown obj
- # bit of validation. not important if there is extra stuff, though would be
- # interested to know what it is. doesn't check dir/file stuff.
- unknown = obj.children.reject { |child| child.name =~ VALID_RX }
- Log.warn "skipped #{unknown.length} unknown msg object(s)" unless unknown.empty?
- end
-
- def close
- @root.ole.close if @close_parent
- end
-
- def attachments
- @attachments ||= @root.children.
- select { |child| child.dir? and child.name =~ ATTACH_RX }.
- map { |child| Attachment.new child }.
- select { |attach| attach.valid? }
- end
-
- def recipients
- @recipients ||= @root.children.
- select { |child| child.dir? and child.name =~ RECIP_RX }.
- map { |child| Recipient.new child }
- end
-
- class Attachment < Mapi::Attachment
- attr_reader :obj, :properties
- alias props :properties
-
- def initialize obj
- @obj = obj
- @embedded_ole = nil
- @embedded_msg = nil
-
- super PropertySet.new(PropertyStore.load(@obj))
- Msg.warn_unknown @obj
-
- @obj.children.each do |child|
- # temp hack. PropertyStore doesn't do directory properties atm - FIXME
- if child.dir? and child.name =~ PropertyStore::SUBSTG_RX and
- $1 == '3701' and $2.downcase == '000d'
- @embedded_ole = child
- class << @embedded_ole
- def compobj
- return nil unless compobj = self["\001CompObj"]
- compobj.read[/^.{32}([^\x00]+)/m, 1]
- end
-
- def embedded_type
- temp = compobj and return temp
- # try to guess more
- if children.select { |child| child.name =~ /__(substg|properties|recip|attach|nameid)/ }.length > 2
- return 'Microsoft Office Outlook Message'
- end
- nil
- end
- end
- if @embedded_ole.embedded_type == 'Microsoft Office Outlook Message'
- @embedded_msg = Msg.new @embedded_ole
- end
- end
- end
- end
-
- def valid?
- # something i started to notice when handling embedded ole object attachments is
- # the particularly strange case where there are empty attachments
- not props.raw.keys.empty?
- end
- end
-
- #
- # +Recipient+ serves as a container for the +recip+ directories in the .msg.
- # It has things like office_location, business_telephone_number, but I don't
- # think enough to make a vCard out of?
- #
- class Recipient < Mapi::Recipient
- attr_reader :obj, :properties
- alias props :properties
-
- def initialize obj
- @obj = obj
- super PropertySet.new(PropertyStore.load(@obj))
- Msg.warn_unknown @obj
- end
- end
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/property_set.rb b/vendor/ruby-msg/lib/mapi/property_set.rb
deleted file mode 100644
index 199bca525..000000000
--- a/vendor/ruby-msg/lib/mapi/property_set.rb
+++ /dev/null
@@ -1,269 +0,0 @@
-require 'yaml'
-require 'mapi/types'
-require 'mapi/rtf'
-require 'rtf'
-
-module Mapi
- #
- # The Mapi::PropertySet class is used to wrap the lower level Msg or Pst property stores,
- # and provide a consistent and more friendly interface. It allows you to just say:
- #
- # properties.subject
- #
- # instead of:
- #
- # properites.raw[0x0037, PS_MAPI]
- #
- # The underlying store can be just a hash, or lazily loading directly from the file. A good
- # compromise is to cache all the available keys, and just return the values on demand, rather
- # than load up many possibly unwanted values.
- #
- class PropertySet
- # the property set guid constants
- # these guids are all defined with the macro DEFINE_OLEGUID in mapiguid.h.
- # see http://doc.ddart.net/msdn/header/include/mapiguid.h.html
- oleguid = proc do |prefix|
- Ole::Types::Clsid.parse "{#{prefix}-0000-0000-c000-000000000046}"
- end
-
- NAMES = {
- oleguid['00020328'] => 'PS_MAPI',
- oleguid['00020329'] => 'PS_PUBLIC_STRINGS',
- oleguid['00020380'] => 'PS_ROUTING_EMAIL_ADDRESSES',
- oleguid['00020381'] => 'PS_ROUTING_ADDRTYPE',
- oleguid['00020382'] => 'PS_ROUTING_DISPLAY_NAME',
- oleguid['00020383'] => 'PS_ROUTING_ENTRYID',
- oleguid['00020384'] => 'PS_ROUTING_SEARCH_KEY',
- # string properties in this namespace automatically get added to the internet headers
- oleguid['00020386'] => 'PS_INTERNET_HEADERS',
- # theres are bunch of outlook ones i think
- # http://blogs.msdn.com/stephen_griffin/archive/2006/05/10/outlook-2007-beta-documentation-notification-based-indexing-support.aspx
- # IPM.Appointment
- oleguid['00062002'] => 'PSETID_Appointment',
- # IPM.Task
- oleguid['00062003'] => 'PSETID_Task',
- # used for IPM.Contact
- oleguid['00062004'] => 'PSETID_Address',
- oleguid['00062008'] => 'PSETID_Common',
- # didn't find a source for this name. it is for IPM.StickyNote
- oleguid['0006200e'] => 'PSETID_Note',
- # for IPM.Activity. also called the journal?
- oleguid['0006200a'] => 'PSETID_Log',
- }
-
- module Constants
- NAMES.each { |guid, name| const_set name, guid }
- end
-
- include Constants
-
- # +Properties+ are accessed by <tt>Key</tt>s, which are coerced to this class.
- # Includes a bunch of methods (hash, ==, eql?) to allow it to work as a key in
- # a +Hash+.
- #
- # Also contains the code that maps keys to symbolic names.
- class Key
- include Constants
-
- attr_reader :code, :guid
- def initialize code, guid=PS_MAPI
- @code, @guid = code, guid
- end
-
- def to_sym
- # hmmm, for some stuff, like, eg, the message class specific range, sym-ification
- # of the key depends on knowing our message class. i don't want to store anything else
- # here though, so if that kind of thing is needed, it can be passed to this function.
- # worry about that when some examples arise.
- case code
- when Integer
- if guid == PS_MAPI # and < 0x8000 ?
- # the hash should be updated now that i've changed the process
- TAGS['%04x' % code].first[/_(.*)/, 1].downcase.to_sym rescue code
- else
- # handle other guids here, like mapping names to outlook properties, based on the
- # outlook object model.
- NAMED_MAP[self].to_sym rescue code
- end
- when String
- # return something like
- # note that named properties don't go through the map at the moment. so #categories
- # doesn't work yet
- code.downcase.to_sym
- end
- end
-
- def to_s
- to_sym.to_s
- end
-
- # FIXME implement these
- def transmittable?
- # etc, can go here too
- end
-
- # this stuff is to allow it to be a useful key
- def hash
- [code, guid].hash
- end
-
- def == other
- hash == other.hash
- end
-
- alias eql? :==
-
- def inspect
- # maybe the way to do this, would be to be able to register guids
- # in a global lookup, which are used by Clsid#inspect itself, to
- # provide symbolic names...
- guid_str = NAMES[guid] || "{#{guid.format}}" rescue "nil"
- if Integer === code
- hex = '0x%04x' % code
- if guid == PS_MAPI
- # just display as plain hex number
- hex
- else
- "#<Key #{guid_str}/#{hex}>"
- end
- else
- # display full guid and code
- "#<Key #{guid_str}/#{code.inspect}>"
- end
- end
- end
-
- # duplicated here for now
- SUPPORT_DIR = File.dirname(__FILE__) + '/../..'
-
- # data files that provide for the code to symbolic name mapping
- # guids in named_map are really constant references to the above
- TAGS = YAML.load_file "#{SUPPORT_DIR}/data/mapitags.yaml"
- NAMED_MAP = YAML.load_file("#{SUPPORT_DIR}/data/named_map.yaml").inject({}) do |hash, (key, value)|
- hash.update Key.new(key[0], const_get(key[1])) => value
- end
-
- attr_reader :raw
-
- # +raw+ should be an hash-like object that maps <tt>Key</tt>s to values. Should respond_to?
- # [], keys, values, each, and optionally []=, and delete.
- def initialize raw
- @raw = raw
- end
-
- # resolve +arg+ (could be key, code, string, or symbol), and possible +guid+ to a key.
- # returns nil on failure
- def resolve arg, guid=nil
- if guid; Key.new arg, guid
- else
- case arg
- when Key; arg
- when Integer; Key.new arg
- else sym_to_key[arg.to_sym]
- end
- end
- end
-
- # this is the function that creates a symbol to key mapping. currently this works by making a
- # pass through the raw properties, but conceivably you could map symbols to keys using the
- # mapitags directly. problem with that would be that named properties wouldn't map automatically,
- # but maybe thats not too important.
- def sym_to_key
- return @sym_to_key if @sym_to_key
- @sym_to_key = {}
- raw.keys.each do |key|
- sym = key.to_sym
- unless Symbol === sym
- Log.debug "couldn't find symbolic name for key #{key.inspect}"
- next
- end
- if @sym_to_key[sym]
- Log.warn "duplicate key #{key.inspect}"
- # we give preference to PS_MAPI keys
- @sym_to_key[sym] = key if key.guid == PS_MAPI
- else
- # just assign
- @sym_to_key[sym] = key
- end
- end
- @sym_to_key
- end
-
- def keys
- sym_to_key.keys
- end
-
- def values
- sym_to_key.values.map { |key| raw[key] }
- end
-
- def [] arg, guid=nil
- raw[resolve(arg, guid)]
- end
-
- def []= arg, *args
- args.unshift nil if args.length == 1
- guid, value = args
- # FIXME this won't really work properly. it would need to go
- # to TAGS to resolve, as it often won't be there already...
- raw[resolve(arg, guid)] = value
- end
-
- def method_missing name, *args
- if name.to_s !~ /\=$/ and args.empty?
- self[name]
- elsif name.to_s =~ /(.*)\=$/ and args.length == 1
- self[$1] = args[0]
- else
- super
- end
- end
-
- def to_h
- sym_to_key.inject({}) { |hash, (sym, key)| hash.update sym => raw[key] }
- end
-
- def inspect
- "#<#{self.class} " + to_h.sort_by { |k, v| k.to_s }.map do |k, v|
- v = v.inspect
- "#{k}=#{v.length > 32 ? v[0..29] + '..."' : v}"
- end.join(' ') + '>'
- end
-
- # -----
-
- # temporary pseudo tags
-
- # for providing rtf to plain text conversion. later, html to text too.
- def body
- return @body if defined?(@body)
- @body = (self[:body] rescue nil)
- # last resort
- if !@body or @body.strip.empty?
- Log.warn 'creating text body from rtf'
- @body = (::RTF::Converter.rtf2text body_rtf rescue nil)
- end
- @body
- end
-
- # for providing rtf decompression
- def body_rtf
- return @body_rtf if defined?(@body_rtf)
- @body_rtf = (RTF.rtfdecompr rtf_compressed.read rescue nil)
- end
-
- # for providing rtf to html conversion
- def body_html
- return @body_html if defined?(@body_html)
- @body_html = (self[:body_html].read rescue nil)
- @body_html = (RTF.rtf2html body_rtf rescue nil) if !@body_html or @body_html.strip.empty?
- # last resort
- if !@body_html or @body_html.strip.empty?
- Log.warn 'creating html body from rtf'
- @body_html = (::RTF::Converter.rtf2text body_rtf, :html rescue nil)
- end
- @body_html
- end
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/pst.rb b/vendor/ruby-msg/lib/mapi/pst.rb
deleted file mode 100644
index 9ac64b097..000000000
--- a/vendor/ruby-msg/lib/mapi/pst.rb
+++ /dev/null
@@ -1,1806 +0,0 @@
-#
-# = Introduction
-#
-# This file is mostly an attempt to port libpst to ruby, and simplify it in the process. It
-# will leverage much of the existing MAPI => MIME conversion developed for Msg files, and as
-# such is purely concerned with the file structure details.
-#
-# = TODO
-#
-# 1. solve recipient table problem (test4).
-# this is done. turns out it was due to id2 clashes. find better solution
-# 2. check parse consistency. an initial conversion of a 30M file to pst, shows
-# a number of messages conveting badly. compare with libpst too.
-# 3. xattribs
-# 4. generalise the Mapi stuff better
-# 5. refactor index load
-# 6. msg serialization?
-#
-
-=begin
-
-quick plan for cleanup.
-
-have working tests for 97 and 03 file formats, so safe.
-
-want to fix up:
-
-64 bit unpacks scattered around. its ugly. not sure how best to handle it, but am slightly tempted
-to override String#unpack to support a 64 bit little endian unpack (like L vs N/V, for Q). one way or
-another need to fix it. Could really slow everything else down if its parsing the unpack strings twice,
-once in ruby, for every single unpack i do :/
-
-the index loading process, and the lack of shared code between normal vs 64 bit variants, and Index vs Desc.
-should be able to reduce code by factor of 4. also think I should move load code into the class too. then
-maybe have something like:
-
-class Header
- def index_class
- version_2003 ? Index64 : Index
- end
-end
-
-def load_idx
- header.index_class.load_index
-end
-
-OR
-
-def initialize
- @header = ...
- extend @header.index_class::Load
- load_idx
-end
-
-need to think about the role of the mapi code, and Pst::Item etc, but that layer can come later.
-
-=end
-
-require 'mapi'
-require 'enumerator'
-require 'ostruct'
-require 'ole/ranges_io'
-
-module Mapi
-class Pst
- class FormatError < StandardError
- end
-
- # unfortunately there is no Q analogue which is little endian only.
- # this translates T as an unsigned quad word, little endian byte order, to
- # not pollute the rest of the code.
- #
- # didn't want to override String#unpack, cause its too hacky, and incomplete.
- def self.unpack str, unpack_spec
- return str.unpack(unpack_spec) unless unpack_spec['T']
- @unpack_cache ||= {}
- t_offsets, new_spec = @unpack_cache[unpack_spec]
- unless t_offsets
- t_offsets = []
- offset = 0
- new_spec = ''
- unpack_spec.scan(/([^\d])_?(\*|\d+)?/o) do
- num_elems = $1.downcase == 'a' ? 1 : ($2 || 1).to_i
- if $1 == 'T'
- num_elems.times { |i| t_offsets << offset + i }
- new_spec << "V#{num_elems * 2}"
- else
- new_spec << $~[0]
- end
- offset += num_elems
- end
- @unpack_cache[unpack_spec] = [t_offsets, new_spec]
- end
- a = str.unpack(new_spec)
- t_offsets.each do |offset|
- low, high = a[offset, 2]
- a[offset, 2] = low && high ? low + (high << 32) : nil
- end
- a
- end
-
- #
- # this is the header and encryption encapsulation code
- # ----------------------------------------------------------------------------
- #
-
- # class which encapsulates the pst header
- class Header
- SIZE = 512
- MAGIC = 0x2142444e
-
- # these are the constants defined in libpst.c, that
- # are referenced in pst_open()
- INDEX_TYPE_OFFSET = 0x0A
- FILE_SIZE_POINTER = 0xA8
- FILE_SIZE_POINTER_64 = 0xB8
- SECOND_POINTER = 0xBC
- INDEX_POINTER = 0xC4
- SECOND_POINTER_64 = 0xE0
- INDEX_POINTER_64 = 0xF0
- ENC_OFFSET = 0x1CD
-
- attr_reader :magic, :index_type, :encrypt_type, :size
- attr_reader :index1_count, :index1, :index2_count, :index2
- attr_reader :version
- def initialize data
- @magic = data.unpack('N')[0]
- @index_type = data[INDEX_TYPE_OFFSET]
- @version = {0x0e => 1997, 0x17 => 2003}[@index_type]
-
- if version_2003?
- # don't know?
- # >> data1.unpack('V*').zip(data2.unpack('V*')).enum_with_index.select { |(c, d), i| c != d and not [46, 56, 60].include?(i) }.select { |(a, b), i| b == 0 }.map { |(a, b), i| [a / 256, i] }
- # [8, 76], [32768, 84], [128, 89]
- # >> data1.unpack('C*').zip(data2.unpack('C*')).enum_with_index.select { |(c, d), i| c != d and not [184..187, 224..227, 240..243].any? { |r| r === i } }.select { |(a, b), i| b == 0 and ((Math.log(a) / Math.log(2)) % 1) < 0.0001 }
- # [[[2, 0], 61], [[2, 0], 76], [[2, 0], 195], [[2, 0], 257], [[8, 0], 305], [[128, 0], 338], [[128, 0], 357]]
- # i have only 2 psts to base this guess on, so i can't really come up with anything that looks reasonable yet. not sure what the offset is. unfortunately there is so much in the header
- # that isn't understood...
- @encrypt_type = 1
-
- @index2_count, @index2 = data[SECOND_POINTER_64 - 4, 8].unpack('V2')
- @index1_count, @index1 = data[INDEX_POINTER_64 - 4, 8].unpack('V2')
-
- @size = data[FILE_SIZE_POINTER_64, 4].unpack('V')[0]
- else
- @encrypt_type = data[ENC_OFFSET]
-
- @index2_count, @index2 = data[SECOND_POINTER - 4, 8].unpack('V2')
- @index1_count, @index1 = data[INDEX_POINTER - 4, 8].unpack('V2')
-
- @size = data[FILE_SIZE_POINTER, 4].unpack('V')[0]
- end
-
- validate!
- end
-
- def version_2003?
- version == 2003
- end
-
- def encrypted?
- encrypt_type != 0
- end
-
- def validate!
- raise FormatError, "bad signature on pst file (#{'0x%x' % magic})" unless magic == MAGIC
- raise FormatError, "only index types 0x0e and 0x17 are handled (#{'0x%x' % index_type})" unless [0x0e, 0x17].include?(index_type)
- raise FormatError, "only encrytion types 0 and 1 are handled (#{encrypt_type.inspect})" unless [0, 1].include?(encrypt_type)
- end
- end
-
- # compressible encryption! :D
- #
- # simple substitution. see libpst.c
- # maybe test switch to using a String#tr!
- class CompressibleEncryption
- DECRYPT_TABLE = [
- 0x47, 0xf1, 0xb4, 0xe6, 0x0b, 0x6a, 0x72, 0x48,
- 0x85, 0x4e, 0x9e, 0xeb, 0xe2, 0xf8, 0x94, 0x53, # 0x0f
- 0xe0, 0xbb, 0xa0, 0x02, 0xe8, 0x5a, 0x09, 0xab,
- 0xdb, 0xe3, 0xba, 0xc6, 0x7c, 0xc3, 0x10, 0xdd, # 0x1f
- 0x39, 0x05, 0x96, 0x30, 0xf5, 0x37, 0x60, 0x82,
- 0x8c, 0xc9, 0x13, 0x4a, 0x6b, 0x1d, 0xf3, 0xfb, # 0x2f
- 0x8f, 0x26, 0x97, 0xca, 0x91, 0x17, 0x01, 0xc4,
- 0x32, 0x2d, 0x6e, 0x31, 0x95, 0xff, 0xd9, 0x23, # 0x3f
- 0xd1, 0x00, 0x5e, 0x79, 0xdc, 0x44, 0x3b, 0x1a,
- 0x28, 0xc5, 0x61, 0x57, 0x20, 0x90, 0x3d, 0x83, # 0x4f
- 0xb9, 0x43, 0xbe, 0x67, 0xd2, 0x46, 0x42, 0x76,
- 0xc0, 0x6d, 0x5b, 0x7e, 0xb2, 0x0f, 0x16, 0x29, # 0x5f
- 0x3c, 0xa9, 0x03, 0x54, 0x0d, 0xda, 0x5d, 0xdf,
- 0xf6, 0xb7, 0xc7, 0x62, 0xcd, 0x8d, 0x06, 0xd3, # 0x6f
- 0x69, 0x5c, 0x86, 0xd6, 0x14, 0xf7, 0xa5, 0x66,
- 0x75, 0xac, 0xb1, 0xe9, 0x45, 0x21, 0x70, 0x0c, # 0x7f
- 0x87, 0x9f, 0x74, 0xa4, 0x22, 0x4c, 0x6f, 0xbf,
- 0x1f, 0x56, 0xaa, 0x2e, 0xb3, 0x78, 0x33, 0x50, # 0x8f
- 0xb0, 0xa3, 0x92, 0xbc, 0xcf, 0x19, 0x1c, 0xa7,
- 0x63, 0xcb, 0x1e, 0x4d, 0x3e, 0x4b, 0x1b, 0x9b, # 0x9f
- 0x4f, 0xe7, 0xf0, 0xee, 0xad, 0x3a, 0xb5, 0x59,
- 0x04, 0xea, 0x40, 0x55, 0x25, 0x51, 0xe5, 0x7a, # 0xaf
- 0x89, 0x38, 0x68, 0x52, 0x7b, 0xfc, 0x27, 0xae,
- 0xd7, 0xbd, 0xfa, 0x07, 0xf4, 0xcc, 0x8e, 0x5f, # 0xbf
- 0xef, 0x35, 0x9c, 0x84, 0x2b, 0x15, 0xd5, 0x77,
- 0x34, 0x49, 0xb6, 0x12, 0x0a, 0x7f, 0x71, 0x88, # 0xcf
- 0xfd, 0x9d, 0x18, 0x41, 0x7d, 0x93, 0xd8, 0x58,
- 0x2c, 0xce, 0xfe, 0x24, 0xaf, 0xde, 0xb8, 0x36, # 0xdf
- 0xc8, 0xa1, 0x80, 0xa6, 0x99, 0x98, 0xa8, 0x2f,
- 0x0e, 0x81, 0x65, 0x73, 0xe4, 0xc2, 0xa2, 0x8a, # 0xef
- 0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2,
- 0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec # 0xff
- ]
-
- ENCRYPT_TABLE = [nil] * 256
- DECRYPT_TABLE.each_with_index { |i, j| ENCRYPT_TABLE[i] = j }
-
- def self.decrypt_alt encrypted
- decrypted = ''
- encrypted.length.times { |i| decrypted << DECRYPT_TABLE[encrypted[i]] }
- decrypted
- end
-
- def self.encrypt_alt decrypted
- encrypted = ''
- decrypted.length.times { |i| encrypted << ENCRYPT_TABLE[decrypted[i]] }
- encrypted
- end
-
- # an alternate implementation that is possibly faster....
- # TODO - bench
- DECRYPT_STR, ENCRYPT_STR = [DECRYPT_TABLE, (0...256)].map do |values|
- values.map { |i| i.chr }.join.gsub(/([\^\-\\])/, "\\\\\\1")
- end
-
- def self.decrypt encrypted
- encrypted.tr ENCRYPT_STR, DECRYPT_STR
- end
-
- def self.encrypt decrypted
- decrypted.tr DECRYPT_STR, ENCRYPT_STR
- end
- end
-
- class RangesIOEncryptable < RangesIO
- def initialize io, mode='r', params={}
- mode, params = 'r', mode if Hash === mode
- @decrypt = !!params[:decrypt]
- super
- end
-
- def encrypted?
- @decrypt
- end
-
- def read limit=nil
- buf = super
- buf = CompressibleEncryption.decrypt(buf) if encrypted?
- buf
- end
- end
-
- attr_reader :io, :header, :idx, :desc, :special_folder_ids
-
- # corresponds to
- # * pst_open
- # * pst_load_index
- def initialize io
- @io = io
- io.pos = 0
- @header = Header.new io.read(Header::SIZE)
-
- # would prefer this to be in Header#validate, but it doesn't have the io size.
- # should perhaps downgrade this to just be a warning...
- raise FormatError, "header size field invalid (#{header.size} != #{io.size}}" unless header.size == io.size
-
- load_idx
- load_desc
- load_xattrib
-
- @special_folder_ids = {}
- end
-
- def encrypted?
- @header.encrypted?
- end
-
- # until i properly fix logging...
- def warn s
- Mapi::Log.warn s
- end
-
- #
- # this is the index and desc record loading code
- # ----------------------------------------------------------------------------
- #
-
- ToTree = Module.new
-
- module Index2
- BLOCK_SIZE = 512
- module RecursiveLoad
- def load_chain
- #...
- end
- end
-
- module Base
- def read
- #...
- end
- end
-
- class Version1997 < Struct.new(:a)#...)
- SIZE = 12
-
- include RecursiveLoad
- include Base
- end
-
- class Version2003 < Struct.new(:a)#...)
- SIZE = 24
-
- include RecursiveLoad
- include Base
- end
- end
-
- module Desc2
- module Base
- def desc
- #...
- end
- end
-
- class Version1997 < Struct.new(:a)#...)
- #include Index::RecursiveLoad
- include Base
- end
-
- class Version2003 < Struct.new(:a)#...)
- #include Index::RecursiveLoad
- include Base
- end
- end
-
- # more constants from libpst.c
- # these relate to the index block
- ITEM_COUNT_OFFSET = 0x1f0 # count byte
- LEVEL_INDICATOR_OFFSET = 0x1f3 # node or leaf
- BACKLINK_OFFSET = 0x1f8 # backlink u1 value
-
- # these 3 classes are used to hold various file records
-
- # pst_index
- class Index < Struct.new(:id, :offset, :size, :u1)
- UNPACK_STR = 'VVvv'
- SIZE = 12
- BLOCK_SIZE = 512 # index blocks was 516 but bogus
- COUNT_MAX = 41 # max active items (ITEM_COUNT_OFFSET / Index::SIZE = 41)
-
- attr_accessor :pst
- def initialize data
- data = Pst.unpack data, UNPACK_STR if String === data
- super(*data)
- end
-
- def type
- @type ||= begin
- if id & 0x2 == 0
- :data
- else
- first_byte, second_byte = read.unpack('CC')
- if first_byte == 1
- raise second_byte unless second_byte == 1
- :data_chain_header
- elsif first_byte == 2
- raise second_byte unless second_byte == 0
- :id2_assoc
- else
- raise FormatError, 'unknown first byte for block - %p' % first_byte
- end
- end
- end
- end
-
- def data?
- (id & 0x2) == 0
- end
-
- def read decrypt=true
- # only data blocks are every encrypted
- decrypt = false unless data?
- pst.pst_read_block_size offset, size, decrypt
- end
-
- # show all numbers in hex
- def inspect
- super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }.sub(/Index /, "Index type=#{type.inspect}, ")
- end
- end
-
- # mostly guesses.
- ITEM_COUNT_OFFSET_64 = 0x1e8
- LEVEL_INDICATOR_OFFSET_64 = 0x1eb # diff of 3 between these 2 as above...
-
- # will maybe inherit from Index64, in order to get the same #type function.
- class Index64 < Index
- UNPACK_STR = 'TTvvV'
- SIZE = 24
- BLOCK_SIZE = 512
- COUNT_MAX = 20 # bit of a guess really. 512 / 24 = 21, but doesn't leave enough header room
-
- # this is the extra item on the end of the UNPACK_STR above
- attr_accessor :u2
-
- def initialize data
- data = Pst.unpack data, UNPACK_STR if String === data
- @u2 = data.pop
- super data
- end
-
- def inspect
- super.sub(/>$/, ', u2=%p>' % u2)
- end
-
- def self.load_chain io, header
- load_idx_rec io, header.index1, 0, 0
- end
-
- # almost identical to load code for Index, just different offsets and unpack strings.
- # can probably merge them, or write a generic load_tree function or something.
- def self.load_idx_rec io, offset, linku1, start_val
- io.seek offset
- buf = io.read BLOCK_SIZE
- idxs = []
-
- item_count = buf[ITEM_COUNT_OFFSET_64]
- raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
-
- #idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
- #raise 'blah 1' unless idx.id == linku1
-
- if buf[LEVEL_INDICATOR_OFFSET_64] == 0
- # leaf pointers
- # split the data into item_count index objects
- buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
- idx = new data
- # first entry
- raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
- #idx.pst = self
- break if idx.id == 0
- idxs << idx
- end
- else
- # node pointers
- # split the data into item_count table pointers
- buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
- start, u1, offset = Pst.unpack data, 'T3'
- # for the first value, we expect the start to be equal
- raise 'blah 3' if i == 0 and start_val != 0 and start != start_val
- break if start == 0
- idxs += load_idx_rec io, offset, u1, start
- end
- end
-
- idxs
- end
- end
-
- # pst_desc
- class Desc64 < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id, :u2)
- UNPACK_STR = 'T3VV'
- SIZE = 32
- BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
- COUNT_MAX = 15 # guess as per Index64
-
- include RecursivelyEnumerable
-
- attr_accessor :pst
- attr_reader :children
- def initialize data
- super(*Pst.unpack(data, UNPACK_STR))
- @children = []
- end
-
- def desc
- pst.idx_from_id idx_id
- end
-
- def list_index
- pst.idx_from_id idx2_id
- end
-
- def self.load_chain io, header
- load_desc_rec io, header.index2, 0, 0x21
- end
-
- def self.load_desc_rec io, offset, linku1, start_val
- io.seek offset
- buf = io.read BLOCK_SIZE
- descs = []
- item_count = buf[ITEM_COUNT_OFFSET_64]
-
- # not real desc
- #desc = Desc.new buf[BACKLINK_OFFSET, 4]
- #raise 'blah 1' unless desc.desc_id == linku1
-
- if buf[LEVEL_INDICATOR_OFFSET_64] == 0
- # leaf pointers
- raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
- # split the data into item_count desc objects
- buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
- desc = new data
- # first entry
- raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
- break if desc.desc_id == 0
- descs << desc
- end
- else
- # node pointers
- raise "have too many active items in index (#{item_count})" if item_count > Index64::COUNT_MAX
- # split the data into item_count table pointers
- buf[0, Index64::SIZE * item_count].scan(/.{#{Index64::SIZE}}/mo).each_with_index do |data, i|
- start, u1, offset = Pst.unpack data, 'T3'
- # for the first value, we expect the start to be equal note that ids -1, so even for the
- # first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
- # that the first desc record is always 33...
- # thats because 0x21 is the pst root itself...
- raise 'blah 3' if i == 0 and start_val != -1 and start != start_val
- # this shouldn't really happen i'd imagine
- break if start == 0
- descs += load_desc_rec io, offset, u1, start
- end
- end
-
- descs
- end
-
- def each_child(&block)
- @children.each(&block)
- end
- end
-
- # _pst_table_ptr_struct
- class TablePtr < Struct.new(:start, :u1, :offset)
- UNPACK_STR = 'V3'
- SIZE = 12
-
- def initialize data
- data = data.unpack(UNPACK_STR) if String === data
- super(*data)
- end
- end
-
- # pst_desc
- # idx_id is a pointer to an idx record which gets the primary data stream for the Desc record.
- # idx2_id gets you an idx record, that when read gives you an ID2 association list, which just maps
- # another set of ids to index values
- class Desc < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id)
- UNPACK_STR = 'V4'
- SIZE = 16
- BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
- COUNT_MAX = 31 # max active desc records (ITEM_COUNT_OFFSET / Desc::SIZE = 31)
-
- include ToTree
-
- attr_accessor :pst
- attr_reader :children
- def initialize data
- super(*data.unpack(UNPACK_STR))
- @children = []
- end
-
- def desc
- pst.idx_from_id idx_id
- end
-
- def list_index
- pst.idx_from_id idx2_id
- end
-
- # show all numbers in hex
- def inspect
- super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }
- end
- end
-
- # corresponds to
- # * _pst_build_id_ptr
- def load_idx
- @idx = []
- @idx_offsets = []
- if header.version_2003?
- @idx = Index64.load_chain io, header
- @idx.each { |idx| idx.pst = self }
- else
- load_idx_rec header.index1, header.index1_count, 0
- end
-
- # we'll typically be accessing by id, so create a hash as a lookup cache
- @idx_from_id = {}
- @idx.each do |idx|
- warn "there are duplicate idx records with id #{idx.id}" if @idx_from_id[idx.id]
- @idx_from_id[idx.id] = idx
- end
- end
-
- # load the flat idx table, which maps ids to file ranges. this is the recursive helper
- #
- # corresponds to
- # * _pst_build_id_ptr
- def load_idx_rec offset, linku1, start_val
- @idx_offsets << offset
-
- #_pst_read_block_size(pf, offset, BLOCK_SIZE, &buf, 0, 0) < BLOCK_SIZE)
- buf = pst_read_block_size offset, Index::BLOCK_SIZE, false
-
- item_count = buf[ITEM_COUNT_OFFSET]
- raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
-
- idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
- raise 'blah 1' unless idx.id == linku1
-
- if buf[LEVEL_INDICATOR_OFFSET] == 0
- # leaf pointers
- # split the data into item_count index objects
- buf[0, Index::SIZE * item_count].scan(/.{#{Index::SIZE}}/mo).each_with_index do |data, i|
- idx = Index.new data
- # first entry
- raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
- idx.pst = self
- # this shouldn't really happen i'd imagine
- break if idx.id == 0
- @idx << idx
- end
- else
- # node pointers
- # split the data into item_count table pointers
- buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
- table = TablePtr.new data
- # for the first value, we expect the start to be equal
- raise 'blah 3' if i == 0 and start_val != 0 and table.start != start_val
- # this shouldn't really happen i'd imagine
- break if table.start == 0
- load_idx_rec table.offset, table.u1, table.start
- end
- end
- end
-
- # most access to idx objects will use this function
- #
- # corresponds to
- # * _pst_getID
- def idx_from_id id
- @idx_from_id[id]
- end
-
- # corresponds to
- # * _pst_build_desc_ptr
- # * record_descriptor
- def load_desc
- @desc = []
- @desc_offsets = []
- if header.version_2003?
- @desc = Desc64.load_chain io, header
- @desc.each { |desc| desc.pst = self }
- else
- load_desc_rec header.index2, header.index2_count, 0x21
- end
-
- # first create a lookup cache
- @desc_from_id = {}
- @desc.each do |desc|
- desc.pst = self
- warn "there are duplicate desc records with id #{desc.desc_id}" if @desc_from_id[desc.desc_id]
- @desc_from_id[desc.desc_id] = desc
- end
-
- # now turn the flat list of loaded desc records into a tree
-
- # well, they have no parent, so they're more like, the toplevel descs.
- @orphans = []
- # now assign each node to the parents child array, putting the orphans in the above
- @desc.each do |desc|
- parent = @desc_from_id[desc.parent_desc_id]
- # note, besides this, its possible to create other circular structures.
- if parent == desc
- # this actually happens usually, for the root_item it appears.
- #warn "desc record's parent is itself (#{desc.inspect})"
- # maybe add some more checks in here for circular structures
- elsif parent
- parent.children << desc
- next
- end
- @orphans << desc
- end
-
- # maybe change this to some sort of sane-ness check. orphans are expected
-# warn "have #{@orphans.length} orphan desc record(s)." unless @orphans.empty?
- end
-
- # load the flat list of desc records recursively
- #
- # corresponds to
- # * _pst_build_desc_ptr
- # * record_descriptor
- def load_desc_rec offset, linku1, start_val
- @desc_offsets << offset
-
- buf = pst_read_block_size offset, Desc::BLOCK_SIZE, false
- item_count = buf[ITEM_COUNT_OFFSET]
-
- # not real desc
- desc = Desc.new buf[BACKLINK_OFFSET, 4]
- raise 'blah 1' unless desc.desc_id == linku1
-
- if buf[LEVEL_INDICATOR_OFFSET] == 0
- # leaf pointers
- raise "have too many active items in index (#{item_count})" if item_count > Desc::COUNT_MAX
- # split the data into item_count desc objects
- buf[0, Desc::SIZE * item_count].scan(/.{#{Desc::SIZE}}/mo).each_with_index do |data, i|
- desc = Desc.new data
- # first entry
- raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
- # this shouldn't really happen i'd imagine
- break if desc.desc_id == 0
- @desc << desc
- end
- else
- # node pointers
- raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
- # split the data into item_count table pointers
- buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
- table = TablePtr.new data
- # for the first value, we expect the start to be equal note that ids -1, so even for the
- # first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
- # that the first desc record is always 33...
- raise 'blah 3' if i == 0 and start_val != -1 and table.start != start_val
- # this shouldn't really happen i'd imagine
- break if table.start == 0
- load_desc_rec table.offset, table.u1, table.start
- end
- end
- end
-
- # as for idx
- #
- # corresponds to:
- # * _pst_getDptr
- def desc_from_id id
- @desc_from_id[id]
- end
-
- # corresponds to
- # * pst_load_extended_attributes
- def load_xattrib
- unless desc = desc_from_id(0x61)
- warn "no extended attributes desc record found"
- return
- end
- unless desc.desc
- warn "no desc idx for extended attributes"
- return
- end
- if desc.list_index
- end
- #warn "skipping loading xattribs"
- # FIXME implement loading xattribs
- end
-
- # corresponds to:
- # * _pst_read_block_size
- # * _pst_read_block ??
- # * _pst_ff_getIDblock_dec ??
- # * _pst_ff_getIDblock ??
- def pst_read_block_size offset, size, decrypt=true
- io.seek offset
- buf = io.read size
- warn "tried to read #{size} bytes but only got #{buf.length}" if buf.length != size
- encrypted? && decrypt ? CompressibleEncryption.decrypt(buf) : buf
- end
-
- #
- # id2
- # ----------------------------------------------------------------------------
- #
-
- class ID2Assoc < Struct.new(:id2, :id, :table2)
- UNPACK_STR = 'V3'
- SIZE = 12
-
- def initialize data
- data = data.unpack(UNPACK_STR) if String === data
- super(*data)
- end
- end
-
- class ID2Assoc64 < Struct.new(:id2, :u1, :id, :table2)
- UNPACK_STR = 'VVT2'
- SIZE = 24
-
- def initialize data
- if String === data
- data = Pst.unpack data, UNPACK_STR
- end
- super(*data)
- end
-
- def self.load_chain idx
- buf = idx.read
- type, count = buf.unpack 'v2'
- unless type == 0x0002
- raise 'unknown id2 type 0x%04x' % type
- #return
- end
- id2 = []
- count.times do |i|
- assoc = new buf[8 + SIZE * i, SIZE]
- id2 << assoc
- if assoc.table2 != 0
- id2 += load_chain idx.pst.idx_from_id(assoc.table2)
- end
- end
- id2
- end
- end
-
- class ID2Mapping
- attr_reader :list
- def initialize pst, list
- @pst = pst
- @list = list
- # create a lookup.
- @id_from_id2 = {}
- @list.each do |id2|
- # NOTE we take the last value seen value if there are duplicates. this "fixes"
- # test4-o1997.pst for the time being.
- warn "there are duplicate id2 records with id #{id2.id2}" if @id_from_id2[id2.id2]
- next if @id_from_id2[id2.id2]
- @id_from_id2[id2.id2] = id2.id
- end
- end
-
- # TODO: fix logging
- def warn s
- Mapi::Log.warn s
- end
-
- # corresponds to:
- # * _pst_getID2
- def [] id
- #id2 = @list.find { |x| x.id2 == id }
- id = @id_from_id2[id]
- id and @pst.idx_from_id(id)
- end
- end
-
- def load_idx2 idx
- if header.version_2003?
- id2 = ID2Assoc64.load_chain idx
- else
- id2 = load_idx2_rec idx
- end
- ID2Mapping.new self, id2
- end
-
- # corresponds to
- # * _pst_build_id2
- def load_idx2_rec idx
- # i should perhaps use a idx chain style read here?
- buf = pst_read_block_size idx.offset, idx.size, false
- type, count = buf.unpack 'v2'
- unless type == 0x0002
- raise 'unknown id2 type 0x%04x' % type
- #return
- end
- id2 = []
- count.times do |i|
- assoc = ID2Assoc.new buf[4 + ID2Assoc::SIZE * i, ID2Assoc::SIZE]
- id2 << assoc
- if assoc.table2 != 0
- id2 += load_idx2_rec idx_from_id(assoc.table2)
- end
- end
- id2
- end
-
- class RangesIOIdxChain < RangesIOEncryptable
- def initialize pst, idx_head
- @idxs = pst.id2_block_idx_chain idx_head
- # whether or not a given idx needs encrypting
- decrypts = @idxs.map do |idx|
- decrypt = (idx.id & 2) != 0 ? false : pst.encrypted?
- end.uniq
- raise NotImplementedError, 'partial encryption in RangesIOID2' if decrypts.length > 1
- decrypt = decrypts.first
- # convert idxs to ranges
- ranges = @idxs.map { |idx| [idx.offset, idx.size] }
- super pst.io, :ranges => ranges, :decrypt => decrypt
- end
- end
-
- class RangesIOID2 < RangesIOIdxChain
- def self.new pst, id2, idx2
- RangesIOIdxChain.new pst, idx2[id2]
- end
- end
-
- # corresponds to:
- # * _pst_ff_getID2block
- # * _pst_ff_getID2data
- # * _pst_ff_compile_ID
- def id2_block_idx_chain idx
- if (idx.id & 0x2) == 0
- [idx]
- else
- buf = idx.read
- type, fdepth, count = buf[0, 4].unpack 'CCv'
- unless type == 1 # libpst.c:3958
- warn 'Error in idx_chain - %p, %p, %p - attempting to ignore' % [type, fdepth, count]
- return [idx]
- end
- # there are 4 unaccounted for bytes here, 4...8
- if header.version_2003?
- ids = buf[8, count * 8].unpack("T#{count}")
- else
- ids = buf[8, count * 4].unpack('V*')
- end
- if fdepth == 1
- ids.map { |id| idx_from_id id }
- else
- ids.map { |id| id2_block_idx_chain idx_from_id(id) }.flatten
- end
- end
- end
-
- #
- # main block parsing code. gets raw properties
- # ----------------------------------------------------------------------------
- #
-
- # the job of this class, is to take a desc record, and be able to enumerate through the
- # mapi properties of the associated thing.
- #
- # corresponds to
- # * _pst_parse_block
- # * _pst_process (in some ways. although perhaps thats more the Item::Properties#add_property)
- class BlockParser
- include Mapi::Types::Constants
-
- TYPES = {
- 0xbcec => 1,
- 0x7cec => 2,
- # type 3 is removed. an artifact of not handling the indirect blocks properly in libpst.
- }
-
- PR_SUBJECT = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_SUBJECT' }.first.hex
- PR_BODY_HTML = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_BODY_HTML' }.first.hex
-
- # this stuff could maybe be moved to Ole::Types? or leverage it somehow?
- # whether or not a type is immeidate is more a property of the pst encoding though i expect.
- # what i probably can add is a generic concept of whether a type is of variadic length or not.
-
- # these lists are very incomplete. think they are largely copied from libpst
-
- IMMEDIATE_TYPES = [
- PT_SHORT, PT_LONG, PT_BOOLEAN
- ]
-
- INDIRECT_TYPES = [
- PT_DOUBLE, PT_OBJECT,
- 0x0014, # whats this? probably something like PT_LONGLONG, given the correspondence with the
- # ole variant types. (= VT_I8)
- PT_STRING8, PT_UNICODE, # unicode isn't in libpst, but added here for outlook 2003 down the track
- PT_SYSTIME,
- 0x0048, # another unknown
- 0x0102, # this is PT_BINARY vs PT_CLSID
- #0x1003, # these are vector types, but they're commented out for now because i'd expect that
- #0x1014, # there's extra decoding needed that i'm not doing. (probably just need a simple
- # # PT_* => unpack string mapping for the immediate types, and just do unpack('V*') etc
- #0x101e,
- #0x1102
- ]
-
- # the attachment and recipient arrays appear to be always stored with these fixed
- # id2 values. seems strange. are there other extra streams? can find out by making higher
- # level IO wrapper, which has the id2 value, and doing the diff of available id2 values versus
- # used id2 values in properties of an item.
- ID2_ATTACHMENTS = 0x671
- ID2_RECIPIENTS = 0x692
-
- attr_reader :desc, :data, :data_chunks, :offset_tables
- def initialize desc
- raise FormatError, "unable to get associated index record for #{desc.inspect}" unless desc.desc
- @desc = desc
- #@data = desc.desc.read
- if Pst::Index === desc.desc
- #@data = RangesIOIdxChain.new(desc.pst, desc.desc).read
- idxs = desc.pst.id2_block_idx_chain desc.desc
- # this gets me the plain index chain.
- else
- # fake desc
- #@data = desc.desc.read
- idxs = [desc.desc]
- end
-
- @data_chunks = idxs.map { |idx| idx.read }
- @data = @data_chunks.first
-
- load_header
-
- @index_offsets = [@index_offset] + @data_chunks[1..-1].map { |chunk| chunk.unpack('v')[0] }
- @offset_tables = []
- @ignored = []
- @data_chunks.zip(@index_offsets).each do |chunk, offset|
- ignore = chunk[offset, 2].unpack('v')[0]
- @ignored << ignore
-# p ignore
- @offset_tables.push offset_table = []
- # maybe its ok if there aren't to be any values ?
- raise FormatError if offset == 0
- offsets = chunk[offset + 2..-1].unpack('v*')
- #p offsets
- offsets[0, ignore + 2].each_cons 2 do |from, to|
- #next if to == 0
- raise FormatError, [from, to].inspect if from > to
- offset_table << [from, to]
- end
- end
-
- @offset_table = @offset_tables.first
- @idxs = idxs
-
- # now, we may have multiple different blocks
- end
-
- # a given desc record may or may not have associated idx2 data. we lazily load it here, so it will never
- # actually be requested unless get_data_indirect actually needs to use it.
- def idx2
- return @idx2 if @idx2
- raise FormatError, 'idx2 requested but no idx2 available' unless desc.list_index
- # should check this can't return nil
- @idx2 = desc.pst.load_idx2 desc.list_index
- end
-
- def load_header
- @index_offset, type, @offset1 = data.unpack 'vvV'
- raise FormatError, 'unknown block type signature 0x%04x' % type unless TYPES[type]
- @type = TYPES[type]
- end
-
- # based on the value of offset, return either some data from buf, or some data from the
- # id2 chain id2, where offset is some key into a lookup table that is stored as the id2
- # chain. i think i may need to create a BlockParser class that wraps up all this mess.
- #
- # corresponds to:
- # * _pst_getBlockOffsetPointer
- # * _pst_getBlockOffset
- def get_data_indirect offset
- return get_data_indirect_io(offset).read
-
- if offset == 0
- nil
- elsif (offset & 0xf) == 0xf
- RangesIOID2.new(desc.pst, offset, idx2).read
- else
- low, high = offset & 0xf, offset >> 4
- raise FormatError if low != 0 or (high & 0x1) != 0 or (high / 2) > @offset_table.length
- from, to = @offset_table[high / 2]
- data[from...to]
- end
- end
-
- def get_data_indirect_io offset
- if offset == 0
- nil
- elsif (offset & 0xf) == 0xf
- if idx2[offset]
- RangesIOID2.new desc.pst, offset, idx2
- else
- warn "tried to get idx2 record for #{offset} but failed"
- return StringIO.new('')
- end
- else
- low, high = offset & 0xf, offset >> 4
- if low != 0 or (high & 0x1) != 0
-# raise FormatError,
- warn "bad - #{low} #{high} (1)"
- return StringIO.new('')
- end
- # lets see which block it should come from.
- block_idx, i = high.divmod 4096
- unless block_idx < @data_chunks.length
- warn "bad - block_idx to high (not #{block_idx} < #{@data_chunks.length})"
- return StringIO.new('')
- end
- data_chunk, offset_table = @data_chunks[block_idx], @offset_tables[block_idx]
- if i / 2 >= offset_table.length
- warn "bad - #{low} #{high} - #{i / 2} >= #{offset_table.length} (2)"
- return StringIO.new('')
- end
- #warn "ok - #{low} #{high} #{offset_table.length}"
- from, to = offset_table[i / 2]
- StringIO.new data_chunk[from...to]
- end
- end
-
- def handle_indirect_values key, type, value
- case type
- when PT_BOOLEAN
- value = value != 0
- when *IMMEDIATE_TYPES # not including PT_BOOLEAN which we just did above
- # no processing current applied (needed?).
- when *INDIRECT_TYPES
- # the value is a pointer
- if String === value # ie, value size > 4 above
- value = StringIO.new value
- else
- value = get_data_indirect_io(value)
- end
- # keep strings as immediate values for now, for compatability with how i set up
- # Msg::Properties::ENCODINGS
- if value
- if type == PT_STRING8
- value = value.read
- elsif type == PT_UNICODE
- value = Ole::Types::FROM_UTF16.iconv value.read
- end
- end
- # special subject handling
- if key == PR_BODY_HTML and value
- # to keep the msg code happy, which thinks body_html will be an io
- # although, in 2003 version, they are 0102 already
- value = StringIO.new value unless value.respond_to?(:read)
- end
- if key == PR_SUBJECT and value
- ignore, offset = value.unpack 'C2'
- offset = (offset == 1 ? nil : offset - 3)
- value = value[2..-1]
-=begin
- index = value =~ /^[A-Z]*:/ ? $~[0].length - 1 : nil
- unless ignore == 1 and offset == index
- warn 'something wrong with subject hack'
- $x = [ignore, offset, value]
- require 'irb'
- IRB.start
- exit
- end
-=end
-=begin
-new idea:
-
-making sense of the \001\00[156] i've seen prefixing subject. i think its to do with the placement
-of the ':', or the ' '. And perhaps an optimization to do with thread topic, and ignoring the prefixes
-added by mailers. thread topic is equal to subject with all that crap removed.
-
-can test by creating some mails with bizarre subjects.
-
-subject="\001\005RE: blah blah"
-subject="\001\001blah blah"
-subject="\001\032Out of Office AutoReply: blah blah"
-subject="\001\020Undeliverable: blah blah"
-
-looks like it
-
-=end
-
- # now what i think, is that perhaps, value[offset..-1] ...
- # or something like that should be stored as a special tag. ie, do a double yield
- # for this case. probably PR_CONVERSATION_TOPIC, in which case i'd write instead:
- # yield [PR_SUBJECT, ref_type, value]
- # yield [PR_CONVERSATION_TOPIC, ref_type, value[offset..-1]
- # next # to skip the yield.
- end
-
- # special handling for embedded objects
- # used for attach_data for attached messages. in which case attach_method should == 5,
- # for embedded object.
- if type == PT_OBJECT and value
- value = value.read if value.respond_to?(:read)
- id2, unknown = value.unpack 'V2'
- io = RangesIOID2.new desc.pst, id2, idx2
-
- # hacky
- desc2 = OpenStruct.new(:desc => io, :pst => desc.pst, :list_index => desc.list_index, :children => [])
- # put nil instead of desc.list_index, otherwise the attachment is attached to itself ad infinitum.
- # should try and fix that FIXME
- # this shouldn't be done always. for an attached message, yes, but for an attached
- # meta file, for example, it shouldn't. difference between embedded_ole vs embedded_msg
- # really.
- # note that in the case where its a embedded ole, you actually get a regular serialized ole
- # object, so i need to create an ole storage object on a rangesioidxchain!
- # eg:
-=begin
-att.props.display_name # => "Picture (Metafile)"
-io = att.props.attach_data
-io.read(32).unpack('H*') # => ["d0cf11e0a1b11ae100000.... note the docfile signature.
-# plug some missing rangesio holes:
-def io.rewind; seek 0; end
-def io.flush; raise IOError; end
-ole = Ole::Storage.open io
-puts ole.root.to_tree
-
-- #<Dirent:"Root Entry">
- |- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
- |- #<Dirent:"CONTENTS" size=65696 data="\327\315\306\232\000...">
- \- #<Dirent:"\003MailStream" size=12 data="\001\000\000\000[...">
-=end
- # until properly fixed, i have disabled this code here, so this will break
- # nested messages temporarily.
- #value = Item.new desc2, RawPropertyStore.new(desc2).to_a
- #desc2.list_index = nil
- value = io
- end
- # this is PT_MV_STRING8, i guess.
- # should probably have the 0x1000 flag, and do the or-ring.
- # example of 0x1102 is PR_OUTLOOK_2003_ENTRYIDS. less sure about that one.
- when 0x101e, 0x1102
- # example data:
- # 0x802b "\003\000\000\000\020\000\000\000\030\000\000\000#\000\000\000BusinessCompetitionFavorites"
- # this 0x802b would be an extended attribute for categories / keywords.
- value = get_data_indirect_io(value).read unless String === value
- num = value.unpack('V')[0]
- offsets = value[4, 4 * num].unpack("V#{num}")
- value = (offsets + [value.length]).to_enum(:each_cons, 2).map { |from, to| value[from...to] }
- value.map! { |str| StringIO.new str } if type == 0x1102
- else
- name = Mapi::Types::DATA[type].first rescue nil
- warn '0x%04x %p' % [key, get_data_indirect_io(value).read]
- raise NotImplementedError, 'unsupported mapi property type - 0x%04x (%p)' % [type, name]
- end
- [key, type, value]
- end
- end
-
-=begin
-* recipients:
-
- affects: ["0x200764", "0x2011c4", "0x201b24", "0x201b44", "0x201ba4", "0x201c24", "0x201cc4", "0x202504"]
-
-after adding the rawpropertystoretable fix, all except the second parse properly, and satisfy:
-
- item.props.display_to == item.recipients.map { |r| r.props.display_name if r.props.recipient_type == 1 }.compact * '; '
-
-only the second still has a problem
-
-#[#<struct Pst::Desc desc_id=0x2011c4, idx_id=0x397c, idx2_id=0x398a, parent_desc_id=0x8082>]
-
-think this is related to a multi block #data3. ie, when you use @x * rec_size, and it
-goes > 8190, or there abouts, then it stuffs up. probably there is header gunk, or something,
-similar to when #data is multi block.
-
-same problem affects the attachment table in test4.
-
-fixed that issue. round data3 ranges to rec_size.
-
-fix other issue with attached objects.
-
-all recipients and attachments in test2 are fine.
-
-only remaining issue is test4 recipients of 200044. strange.
-
-=end
-
- # RawPropertyStore is used to iterate through the properties of an item, or the auxiliary
- # data for an attachment. its just a parser for the way the properties are serialized, when the
- # properties don't have to conform to a column structure.
- #
- # structure of this chunk of data is often
- # header, property keys, data values, and then indexes.
- # the property keys has value in it. value can be the actual value if its a short type,
- # otherwise you lookup the value in the indicies, where you get the offsets to use in the
- # main data body. due to the indirect thing though, any of these parts could actually come
- # from a separate stream.
- class RawPropertyStore < BlockParser
- include Enumerable
-
- attr_reader :length
- def initialize desc
- super
- raise FormatError, "expected type 1 - got #{@type}" unless @type == 1
-
- # the way that offset works, data1 may be a subset of buf, or something from id2. if its from buf,
- # it will be offset based on index_offset and offset. so it could be some random chunk of data anywhere
- # in the thing.
- header_data = get_data_indirect @offset1
- raise FormatError if header_data.length < 8
- signature, offset2 = header_data.unpack 'V2'
- #p [@type, signature]
- raise FormatError, 'unhandled block signature 0x%08x' % @type if signature != 0x000602b5
- # this is actually a big chunk of tag tuples.
- @index_data = get_data_indirect offset2
- @length = @index_data.length / 8
- end
-
- # iterate through the property tuples
- def each
- length.times do |i|
- key, type, value = handle_indirect_values(*@index_data[8 * i, 8].unpack('vvV'))
- yield key, type, value
- end
- end
- end
-
- # RawPropertyStoreTable is kind of like a database table.
- # it has a fixed set of columns.
- # #[] is kind of like getting a row from the table.
- # those rows are currently encapsulated by Row, which has #each like
- # RawPropertyStore.
- # only used for the recipients array, and the attachments array. completely lazy, doesn't
- # load any of the properties upon creation.
- class RawPropertyStoreTable < BlockParser
- class Column < Struct.new(:ref_type, :type, :ind2_off, :size, :slot)
- def initialize data
- super(*data.unpack('v3CC'))
- end
-
- def nice_type_name
- Mapi::Types::DATA[ref_type].first[/_(.*)/, 1].downcase rescue '0x%04x' % ref_type
- end
-
- def nice_prop_name
- Mapi::PropertyStore::TAGS['%04x' % type].first[/_(.*)/, 1].downcase rescue '0x%04x' % type
- end
-
- def inspect
- "#<#{self.class} name=#{nice_prop_name.inspect}, type=#{nice_type_name.inspect}>"
- end
- end
-
- include Enumerable
-
- attr_reader :length, :index_data, :data2, :data3, :rec_size
- def initialize desc
- super
- raise FormatError, "expected type 2 - got #{@type}" unless @type == 2
-
- header_data = get_data_indirect @offset1
- # seven_c_blk
- # often: u1 == u2 and u3 == u2 + 2, then rec_size == u3 + 4. wtf
- seven_c, @num_list, u1, u2, u3, @rec_size, b_five_offset,
- ind2_offset, u7, u8 = header_data[0, 22].unpack('CCv4V2v2')
- @index_data = header_data[22..-1]
-
- raise FormatError if @num_list != schema.length or seven_c != 0x7c
- # another check
- min_size = schema.inject(0) { |total, col| total + col.size }
- # seem to have at max, 8 padding bytes on the end of the record. not sure if it means
- # anything. maybe its just space that hasn't been reclaimed due to columns being
- # removed or something. probably should just check lower bound.
- range = (min_size..min_size + 8)
- warn "rec_size seems wrong (#{range} !=== #{rec_size})" unless range === rec_size
-
- header_data2 = get_data_indirect b_five_offset
- raise FormatError if header_data2.length < 8
- signature, offset2 = header_data2.unpack 'V2'
- # ??? seems a bit iffy
- # there's probably more to the differences than this, and the data2 difference below
- expect = desc.pst.header.version_2003? ? 0x000404b5 : 0x000204b5
- raise FormatError, 'unhandled block signature 0x%08x' % signature if signature != expect
-
- # this holds all the row data
- # handle multiple block issue.
- @data3_io = get_data_indirect_io ind2_offset
- if RangesIOIdxChain === @data3_io
- @data3_idxs =
- # modify ranges
- ranges = @data3_io.ranges.map { |offset, size| [offset, size / @rec_size * @rec_size] }
- @data3_io.instance_variable_set :@ranges, ranges
- end
- @data3 = @data3_io.read
-
- # there must be something to the data in data2. i think data2 is the array of objects essentially.
- # currently its only used to imply a length
- # actually, at size 6, its just some auxiliary data. i'm thinking either Vv/vV, for 97, and something
- # wider for 03. the second value is just the index (0...length), and the first value is
- # some kind of offset i expect. actually, they were all id2 values, in another case.
- # so maybe they're get_data_indirect values too?
- # actually, it turned out they were identical to the PR_ATTACHMENT_ID2 values...
- # id2_values = ie, data2.unpack('v*').to_enum(:each_slice, 3).transpose[0]
- # table[i].assoc(PR_ATTACHMENT_ID2).last == id2_values[i], for all i.
- @data2 = get_data_indirect(offset2) rescue nil
- #if data2
- # @length = (data2.length / 6.0).ceil
- #else
- # the above / 6, may have been ok for 97 files, but the new 0x0004 style block must have
- # different size records... just use this instead:
- # hmmm, actually, we can still figure it out:
- @length = @data3.length / @rec_size
- #end
-
- # lets try and at least use data2 for a warning for now
- if data2
- data2_rec_size = desc.pst.header.version_2003? ? 8 : 6
- warn 'somthing seems wrong with data3' unless @length == (data2.length / data2_rec_size)
- end
- end
-
- def schema
- @schema ||= index_data.scan(/.{8}/m).map { |data| Column.new data }
- end
-
- def [] idx
- # handle funky rounding
- Row.new self, idx * @rec_size
- end
-
- def each
- length.times { |i| yield self[i] }
- end
-
- class Row
- include Enumerable
-
- def initialize array_parser, x
- @array_parser, @x = array_parser, x
- end
-
- # iterate through the property tuples
- def each
- (@array_parser.index_data.length / 8).times do |i|
- ref_type, type, ind2_off, size, slot = @array_parser.index_data[8 * i, 8].unpack 'v3CC'
- # check this rescue too
- value = @array_parser.data3[@x + ind2_off, size]
-# if INDIRECT_TYPES.include? ref_type
- if size <= 4
- value = value.unpack('V')[0]
- end
- #p ['0x%04x' % ref_type, '0x%04x' % type, (Msg::Properties::MAPITAGS['%04x' % type].first[/^.._(.*)/, 1].downcase rescue nil),
- # value_orig, value, (get_data_indirect(value_orig.unpack('V')[0]) rescue nil), size, ind2_off, slot]
- key, type, value = @array_parser.handle_indirect_values type, ref_type, value
- yield key, type, value
- end
- end
- end
- end
-
- class AttachmentTable < BlockParser
- # a "fake" MAPI property name for this constant. if you get a mapi property with
- # this value, it is the id2 value to use to get attachment data.
- PR_ATTACHMENT_ID2 = 0x67f2
-
- attr_reader :desc, :table
- def initialize desc
- @desc = desc
- # no super, we only actually want BlockParser2#idx2
- @table = nil
- return unless desc.list_index
- return unless idx = idx2[ID2_ATTACHMENTS]
- # FIXME make a fake desc.
- @desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
- @table = RawPropertyStoreTable.new @desc2
- end
-
- def to_a
- return [] if !table
- table.map do |attachment|
- attachment = attachment.to_a
- #p attachment
- # potentially merge with yet more properties
- # this still seems pretty broken - especially the property overlap
- if attachment_id2 = attachment.assoc(PR_ATTACHMENT_ID2)
- #p attachment_id2.last
- #p idx2[attachment_id2.last]
- @desc2.desc = idx2[attachment_id2.last]
- RawPropertyStore.new(@desc2).each do |a, b, c|
- record = attachment.assoc a
- attachment << record = [] unless record
- record.replace [a, b, c]
- end
- end
- attachment
- end
- end
- end
-
- # there is no equivalent to this in libpst. ID2_RECIPIENTS was just guessed given the above
- # AttachmentTable.
- class RecipientTable < BlockParser
- attr_reader :desc, :table
- def initialize desc
- @desc = desc
- # no super, we only actually want BlockParser2#idx2
- @table = nil
- return unless desc.list_index
- return unless idx = idx2[ID2_RECIPIENTS]
- # FIXME make a fake desc.
- desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
- @table = RawPropertyStoreTable.new desc2
- end
-
- def to_a
- return [] if !table
- table.map { |x| x.to_a }
- end
- end
-
- #
- # higher level item code. wraps up the raw properties above, and gives nice
- # objects to work with. handles item relationships too.
- # ----------------------------------------------------------------------------
- #
-
- def self.make_property_set property_list
- hash = property_list.inject({}) do |hash, (key, type, value)|
- hash.update PropertySet::Key.new(key) => value
- end
- PropertySet.new hash
- end
-
- class Attachment < Mapi::Attachment
- def initialize list
- super Pst.make_property_set(list)
-
- @embedded_msg = props.attach_data if Item === props.attach_data
- end
- end
-
- class Recipient < Mapi::Recipient
- def initialize list
- super Pst.make_property_set(list)
- end
- end
-
- class Item < Mapi::Message
- class EntryID < Struct.new(:u1, :entry_id, :id)
- UNPACK_STR = 'VA16V'
-
- def initialize data
- data = data.unpack(UNPACK_STR) if String === data
- super(*data)
- end
- end
-
- include RecursivelyEnumerable
-
- attr_accessor :type, :parent
-
- def initialize desc, list, type=nil
- @desc = desc
- super Pst.make_property_set(list)
-
- # this is kind of weird, but the ids of the special folders are stored in a hash
- # when the root item is loaded
- if ipm_wastebasket_entryid
- desc.pst.special_folder_ids[ipm_wastebasket_entryid] = :wastebasket
- end
-
- if finder_entryid
- desc.pst.special_folder_ids[finder_entryid] = :finder
- end
-
- # and then here, those are used, along with a crappy heuristic to determine if we are an
- # item
-=begin
-i think the low bits of the desc_id can give some info on the type.
-
-it seems that 0x4 is for regular messages (and maybe contacts etc)
-0x2 is for folders, and 0x8 is for special things like rules etc, that aren't visible.
-=end
- unless type
- type = props.valid_folder_mask || ipm_subtree_entryid || props.content_count || props.subfolders ? :folder : :message
- if type == :folder
- type = desc.pst.special_folder_ids[desc.desc_id] || type
- end
- end
-
- @type = type
- end
-
- def each_child
- id = ipm_subtree_entryid
- if id
- root = @desc.pst.desc_from_id id
- raise "couldn't find root" unless root
- raise 'both kinds of children' unless @desc.children.empty?
- children = root.children
- # lets look up the other ids we have.
- # typically the wastebasket one "deleted items" is in the children already, but
- # the search folder isn't.
- extras = [ipm_wastebasket_entryid, finder_entryid].compact.map do |id|
- root = @desc.pst.desc_from_id id
- warn "couldn't find root for id #{id}" unless root
- root
- end.compact
- # i do this instead of union, so as not to mess with the order of the
- # existing children.
- children += (extras - children)
- children
- else
- @desc.children
- end.each do |desc|
- item = @desc.pst.pst_parse_item(desc)
- item.parent = self
- yield item
- end
- end
-
- def path
- parents, item = [], self
- parents.unshift item while item = item.parent
- # remove root
- parents.shift
- parents.map { |item| item.props.display_name or raise 'unable to construct path' } * '/'
- end
-
- def children
- to_enum(:each_child).to_a
- end
-
- # these are still around because they do different stuff
-
- # Top of Personal Folder Record
- def ipm_subtree_entryid
- @ipm_subtree_entryid ||= EntryID.new(props.ipm_subtree_entryid.read).id rescue nil
- end
-
- # Deleted Items Folder Record
- def ipm_wastebasket_entryid
- @ipm_wastebasket_entryid ||= EntryID.new(props.ipm_wastebasket_entryid.read).id rescue nil
- end
-
- # Search Root Record
- def finder_entryid
- @finder_entryid ||= EntryID.new(props.finder_entryid.read).id rescue nil
- end
-
- # all these have been replaced with the method_missing below
-=begin
- # States which folders are valid for this message store
- #def valid_folder_mask
- # props[0x35df]
- #end
-
- # Number of emails stored in a folder
- def content_count
- props[0x3602]
- end
-
- # Has children
- def subfolders
- props[0x360a]
- end
-=end
-
- # i think i will change these, so they can inherit the lazyness from RawPropertyStoreTable.
- # so if you want the last attachment, you can get it without creating the others perhaps.
- # it just has to handle the no table at all case a bit more gracefully.
-
- def attachments
- @attachments ||= AttachmentTable.new(@desc).to_a.map { |list| Attachment.new list }
- end
-
- def recipients
- #[]
- @recipients ||= RecipientTable.new(@desc).to_a.map { |list| Recipient.new list }
- end
-
- def each_recursive(&block)
- #p :self => self
- children.each do |child|
- #p :child => child
- block[child]
- child.each_recursive(&block)
- end
- end
-
- def inspect
- attrs = %w[display_name subject sender_name subfolders]
-# attrs = %w[display_name valid_folder_mask ipm_wastebasket_entryid finder_entryid content_count subfolders]
- str = attrs.map { |a| b = props.send a; " #{a}=#{b.inspect}" if b }.compact * ','
-
- type_s = type == :message ? 'Message' : type == :folder ? 'Folder' : type.to_s.capitalize + 'Folder'
- str2 = 'desc_id=0x%x' % @desc.desc_id
-
- !str.empty? ? "#<Pst::#{type_s} #{str2}#{str}>" : "#<Pst::#{type_s} #{str2} props=#{props.inspect}>" #\n" + props.transport_message_headers + ">"
- end
- end
-
- # corresponds to
- # * _pst_parse_item
- def pst_parse_item desc
- Item.new desc, RawPropertyStore.new(desc).to_a
- end
-
- #
- # other random code
- # ----------------------------------------------------------------------------
- #
-
- def dump_debug_info
- puts "* pst header"
- p header
-
-=begin
-Looking at the output of this, for blank-o1997.pst, i see this part:
-...
-- (26624,516) desc block data (overlap of 4 bytes)
-- (27136,516) desc block data (gap of 508 bytes)
-- (28160,516) desc block data (gap of 2620 bytes)
-...
-
-which confirms my belief that the block size for idx and desc is more likely 512
-=end
- if 0 + 0 == 0
- puts '* file range usage'
- file_ranges =
- # these 3 things, should account for most of the data in the file.
- [[0, Header::SIZE, 'pst file header']] +
- @idx_offsets.map { |offset| [offset, Index::BLOCK_SIZE, 'idx block data'] } +
- @desc_offsets.map { |offset| [offset, Desc::BLOCK_SIZE, 'desc block data'] } +
- @idx.map { |idx| [idx.offset, idx.size, 'idx id=0x%x (%s)' % [idx.id, idx.type]] }
- (file_ranges.sort_by { |idx| idx.first } + [nil]).to_enum(:each_cons, 2).each do |(offset, size, name), next_record|
- # i think there is a padding of the size out to 64 bytes
- # which is equivalent to padding out the final offset, because i think the offset is
- # similarly oriented
- pad_amount = 64
- warn 'i am wrong about the offset padding' if offset % pad_amount != 0
- # so, assuming i'm not wrong about that, then we can calculate how much padding is needed.
- pad = pad_amount - (size % pad_amount)
- pad = 0 if pad == pad_amount
- gap = next_record ? next_record.first - (offset + size + pad) : 0
- extra = case gap <=> 0
- when -1; ["overlap of #{gap.abs} bytes)"]
- when 0; []
- when +1; ["gap of #{gap} bytes"]
- end
- # how about we check that padding
- @io.pos = offset + size
- pad_bytes = @io.read(pad)
- extra += ["padding not all zero"] unless pad_bytes == 0.chr * pad
- puts "- #{offset}:#{size}+#{pad} #{name.inspect}" + (extra.empty? ? '' : ' [' + extra * ', ' + ']')
- end
- end
-
- # i think the idea of the idx, and indeed the idx2, is just to be able to
- # refer to data indirectly, which means it can get moved around, and you just update
- # the idx table. it is simply a list of file offsets and sizes.
- # not sure i get how id2 plays into it though....
- # the sizes seem to be all even. is that a co-incidence? and the ids are all even. that
- # seems to be related to something else (see the (id & 2) == 1 stuff)
- puts '* idx entries'
- @idx.each { |idx| puts "- #{idx.inspect}" }
-
- # if you look at the desc tree, you notice a few things:
- # 1. there is a desc that seems to be the parent of all the folders, messages etc.
- # it is the one whose parent is itself.
- # one of its children is referenced as the subtree_entryid of the first desc item,
- # the root.
- # 2. typically only 2 types of desc records have idx2_id != 0. messages themselves,
- # and the desc with id = 0x61 - the xattrib container. everything else uses the
- # regular ids to find its data. i think it should be reframed as small blocks and
- # big blocks, but i'll look into it more.
- #
- # idx_id and idx2_id are for getting to the data. desc_id and parent_desc_id just define
- # the parent <-> child relationship, and the desc_ids are how the items are referred to in
- # entryids.
- # note that these aren't unique! eg for 0, 4 etc. i expect these'd never change, as the ids
- # are stored in entryids. whereas the idx and idx2 could be a bit more volatile.
- puts '* desc tree'
- # make a dummy root hold everything just for convenience
- root = Desc.new ''
- def root.inspect; "#<Pst::Root>"; end
- root.children.replace @orphans
- # this still loads the whole thing as a string for gsub. should use directo output io
- # version.
- puts root.to_tree.gsub(/, (parent_desc_id|idx2_id)=0x0(?!\d)/, '')
-
- # this is fairly easy to understand, its just an attempt to display the pst items in a tree form
- # which resembles what you'd see in outlook.
- puts '* item tree'
- # now streams directly
- root_item.to_tree STDOUT
- end
-
- def root_desc
- @desc.first
- end
-
- def root_item
- item = pst_parse_item root_desc
- item.type = :root
- item
- end
-
- def root
- root_item
- end
-
- # depth first search of all items
- include Enumerable
-
- def each(&block)
- root = self.root
- block[root]
- root.each_recursive(&block)
- end
-
- def name
- @name ||= root_item.props.display_name
- end
-
- def inspect
- "#<Pst name=#{name.inspect} io=#{io.inspect}>"
- end
-end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/rtf.rb b/vendor/ruby-msg/lib/mapi/rtf.rb
deleted file mode 100644
index 9fa133fac..000000000
--- a/vendor/ruby-msg/lib/mapi/rtf.rb
+++ /dev/null
@@ -1,169 +0,0 @@
-require 'stringio'
-require 'strscan'
-require 'rtf'
-
-module Mapi
- #
- # = Introduction
- #
- # The +RTF+ module contains a few helper functions for dealing with rtf
- # in mapi messages: +rtfdecompr+, and <tt>rtf2html</tt>.
- #
- # Both were ported from their original C versions for simplicity's sake.
- #
- module RTF
- RTF_PREBUF =
- "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
- "{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
- "\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
- "{\\colortbl\\red0\\green0\\blue0\n\r\\par " \
- "\\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"
-
- # Decompresses compressed rtf +data+, as found in the mapi property
- # +PR_RTF_COMPRESSED+. Code converted from my C version, which in turn
- # I wrote from a Java source, in JTNEF I believe.
- #
- # C version was modified to use circular buffer for back references,
- # instead of the optimization of the Java version to index directly into
- # output buffer. This was in preparation to support streaming in a
- # read/write neutral fashion.
- def rtfdecompr data
- io = StringIO.new data
- buf = RTF_PREBUF + "\x00" * (4096 - RTF_PREBUF.length)
- wp = RTF_PREBUF.length
- rtf = ''
-
- # get header fields (as defined in RTFLIB.H)
- compr_size, uncompr_size, magic, crc32 = io.read(16).unpack 'V*'
- #warn "compressed-RTF data size mismatch" unless io.size == data.compr_size + 4
-
- # process the data
- case magic
- when 0x414c454d # "MELA" magic number that identifies the stream as a uncompressed stream
- rtf = io.read uncompr_size
- when 0x75465a4c # "LZFu" magic number that identifies the stream as a compressed stream
- flag_count = -1
- flags = nil
- while rtf.length < uncompr_size and !io.eof?
- # each flag byte flags 8 literals/references, 1 per bit
- flags = ((flag_count += 1) % 8 == 0) ? io.getc : flags >> 1
- if 1 == (flags & 1) # each flag bit is 1 for reference, 0 for literal
- rp, l = io.getc, io.getc
- # offset is a 12 byte number. 2^12 is 4096, so thats fine
- rp = (rp << 4) | (l >> 4) # the offset relative to block start
- l = (l & 0xf) + 2 # the number of bytes to copy
- l.times do
- rtf << buf[wp] = buf[rp]
- wp = (wp + 1) % 4096
- rp = (rp + 1) % 4096
- end
- else
- rtf << buf[wp] = io.getc
- wp = (wp + 1) % 4096
- end
- end
- else # unknown magic number
- raise "Unknown compression type (magic number 0x%08x)" % magic
- end
-
- # not sure if its due to a bug in the above code. doesn't seem to be
- # in my tests, but sometimes there's a trailing null. we chomp it here,
- # which actually makes the resultant rtf smaller than its advertised
- # size (+uncompr_size+).
- rtf.chomp! 0.chr
- rtf
- end
-
- # Note, this is a conversion of the original C code. Not great - needs tests and
- # some refactoring, and an attempt to correct some inaccuracies. Hacky but works.
- #
- # Returns +nil+ if it doesn't look like an rtf encapsulated rtf.
- #
- # Some cases that the original didn't deal with have been patched up, eg from
- # this chunk, where there are tags outside of the htmlrtf ignore block.
- #
- # "{\\*\\htmltag116 <br />}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84 <a href..."
- #
- # We take the approach of ignoring all rtf tags not explicitly handled. A proper
- # parse tree would be nicer to work with. will need to look for ruby rtf library
- #
- # Some of the original comment to the c code is excerpted here:
- #
- # Sometimes in MAPI, the PR_BODY_HTML property contains the HTML of a message.
- # But more usually, the HTML is encoded inside the RTF body (which you get in the
- # PR_RTF_COMPRESSED property). These routines concern the decoding of the HTML
- # from this RTF body.
- #
- # An encoded htmlrtf file is a valid RTF document, but which contains additional
- # html markup information in its comments, and sometimes contains the equivalent
- # rtf markup outside the comments. Therefore, when it is displayed by a plain
- # simple RTF reader, the html comments are ignored and only the rtf markup has
- # effect. Typically, this rtf markup is not as rich as the html markup would have been.
- # But for an html-aware reader (such as the code below), we can ignore all the
- # rtf markup, and extract the html markup out of the comments, and get a valid
- # html document.
- #
- # There are actually two kinds of html markup in comments. Most of them are
- # prefixed by "\*\htmltagNNN", for some number NNN. But sometimes there's one
- # prefixed by "\*\mhtmltagNNN" followed by "\*\htmltagNNN". In this case,
- # the two are equivalent, but the m-tag is for a MIME Multipart/Mixed Message
- # and contains tags that refer to content-ids (e.g. img src="cid:072344a7")
- # while the normal tag just refers to a name (e.g. img src="fred.jpg")
- # The code below keeps the m-tag and discards the normal tag.
- # If there are any m-tags like this, then the message also contains an
- # attachment with a PR_CONTENT_ID property e.g. "072344a7". Actually,
- # sometimes the m-tag is e.g. img src="http://outlook/welcome.html" and the
- # attachment has a PR_CONTENT_LOCATION "http://outlook/welcome.html" instead
- # of a PR_CONTENT_ID.
- #
- def rtf2html rtf
- scan = StringScanner.new rtf
- # require \fromhtml. is this worth keeping? apparently you see \\fromtext if it
- # was converted from plain text.
- return nil unless rtf["\\fromhtml"]
- html = ''
- ignore_tag = nil
- # skip up to the first htmltag. return nil if we don't ever find one
- return nil unless scan.scan_until /(?=\{\\\*\\htmltag)/
- until scan.empty?
- if scan.scan /\{/
- elsif scan.scan /\}/
- elsif scan.scan /\\\*\\htmltag(\d+) ?/
- #p scan[1]
- if ignore_tag == scan[1]
- scan.scan_until /\}/
- ignore_tag = nil
- end
- elsif scan.scan /\\\*\\mhtmltag(\d+) ?/
- ignore_tag = scan[1]
- elsif scan.scan /\\par ?/
- html << "\r\n"
- elsif scan.scan /\\tab ?/
- html << "\t"
- elsif scan.scan /\\'([0-9A-Za-z]{2})/
- html << scan[1].hex.chr
- elsif scan.scan /\\pntext/
- scan.scan_until /\}/
- elsif scan.scan /\\htmlrtf/
- scan.scan_until /\\htmlrtf0 ?/
- # a generic throw away unknown tags thing.
- # the above 2 however, are handled specially
- elsif scan.scan /\\[a-z-]+(\d+)? ?/
- #elsif scan.scan /\\li(\d+) ?/
- #elsif scan.scan /\\fi-(\d+) ?/
- elsif scan.scan /[\r\n]/
- elsif scan.scan /\\([{}\\])/
- html << scan[1]
- elsif scan.scan /(.)/
- html << scan[1]
- else
- p :wtf
- end
- end
- html.strip.empty? ? nil : html
- end
-
- module_function :rtf2html, :rtfdecompr
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mapi/types.rb b/vendor/ruby-msg/lib/mapi/types.rb
deleted file mode 100644
index 71416afd5..000000000
--- a/vendor/ruby-msg/lib/mapi/types.rb
+++ /dev/null
@@ -1,51 +0,0 @@
-require 'rubygems'
-require 'ole/types'
-
-module Mapi
- Log = Logger.new_with_callstack
-
- module Types
- #
- # Mapi property types, taken from http://msdn2.microsoft.com/en-us/library/bb147591.aspx.
- #
- # The fields are [mapi name, variant name, description]. Maybe I should just make it a
- # struct.
- #
- # seen some synonyms here, like PT_I8 vs PT_LONG. seen stuff like PT_SRESTRICTION, not
- # sure what that is. look at `grep ' PT_' data/mapitags.yaml | sort -u`
- # also, it has stuff like PT_MV_BINARY, where _MV_ probably means multi value, and is
- # likely just defined to | in 0x1000.
- #
- # Note that the last 2 are the only ones where the Mapi value differs from the Variant value
- # for the corresponding variant type. Odd. Also, the last 2 are currently commented out here
- # because of the clash.
- #
- # Note 2 - the strings here say VT_BSTR, but I don't have that defined in Ole::Types. Should
- # maybe change them to match. I've also seen reference to PT_TSTRING, which is defined as some
- # sort of get unicode first, and fallback to ansii or something.
- #
- DATA = {
- 0x0001 => ['PT_NULL', 'VT_NULL', 'Null (no valid data)'],
- 0x0002 => ['PT_SHORT', 'VT_I2', '2-byte integer (signed)'],
- 0x0003 => ['PT_LONG', 'VT_I4', '4-byte integer (signed)'],
- 0x0004 => ['PT_FLOAT', 'VT_R4', '4-byte real (floating point)'],
- 0x0005 => ['PT_DOUBLE', 'VT_R8', '8-byte real (floating point)'],
- 0x0006 => ['PT_CURRENCY', 'VT_CY', '8-byte integer (scaled by 10,000)'],
- 0x000a => ['PT_ERROR', 'VT_ERROR', 'SCODE value; 32-bit unsigned integer'],
- 0x000b => ['PT_BOOLEAN', 'VT_BOOL', 'Boolean'],
- 0x000d => ['PT_OBJECT', 'VT_UNKNOWN', 'Data object'],
- 0x001e => ['PT_STRING8', 'VT_BSTR', 'String'],
- 0x001f => ['PT_UNICODE', 'VT_BSTR', 'String'],
- 0x0040 => ['PT_SYSTIME', 'VT_DATE', '8-byte real (date in integer, time in fraction)'],
- #0x0102 => ['PT_BINARY', 'VT_BLOB', 'Binary (unknown format)'],
- #0x0102 => ['PT_CLSID', 'VT_CLSID', 'OLE GUID']
- }
-
- module Constants
- DATA.each { |num, (mapi_name, variant_name, desc)| const_set mapi_name, num }
- end
-
- include Constants
- end
-end
-
diff --git a/vendor/ruby-msg/lib/mime.rb b/vendor/ruby-msg/lib/mime.rb
deleted file mode 100644
index 4340e4901..000000000
--- a/vendor/ruby-msg/lib/mime.rb
+++ /dev/null
@@ -1,165 +0,0 @@
-#
-# = Introduction
-#
-# A *basic* mime class for _really_ _basic_ and probably non-standard parsing
-# and construction of MIME messages.
-#
-# Intended for two main purposes in this project:
-# 1. As the container that is used to build up the message for eventual
-# serialization as an eml.
-# 2. For assistance in parsing the +transport_message_headers+ provided in .msg files,
-# which are then kept through to the final eml.
-#
-# = TODO
-#
-# * Better streaming support, rather than an all-in-string approach.
-# * Add +OrderedHash+ optionally, to not lose ordering in headers.
-# * A fair bit remains to be done for this class, its fairly immature. But generally I'd like
-# to see it be more generally useful.
-# * All sorts of correctness issues, encoding particular.
-# * Duplication of work in net/http.rb's +HTTPHeader+? Don't know if the overlap is sufficient.
-# I don't want to lower case things, just for starters.
-# * Mime was the original place I wrote #to_tree, intended as a quick debug hack.
-#
-class SimpleMime
- Hash = begin
- require 'orderedhash'
- OrderedHash
- rescue LoadError
- Hash
- end
-
- attr_reader :headers, :body, :parts, :content_type, :preamble, :epilogue
-
- # Create a SimpleMime object using +str+ as an initial serialization, which must contain headers
- # and a body (even if empty). Needs work.
- def initialize str, ignore_body=false
- headers, @body = $~[1..-1] if str[/(.*?\r?\n)(?:\r?\n(.*))?\Z/m]
-
- @headers = Hash.new { |hash, key| hash[key] = [] }
- @body ||= ''
- headers.to_s.scan(/^\S+:\s*.*(?:\n\t.*)*/).each do |header|
- @headers[header[/(\S+):/, 1]] << header[/\S+:\s*(.*)/m, 1].gsub(/\s+/m, ' ').strip # this is kind of wrong
- end
-
- # don't have to have content type i suppose
- @content_type, attrs = nil, {}
- if content_type = @headers['Content-Type'][0]
- @content_type, attrs = SimpleMime.split_header content_type
- end
-
- return if ignore_body
-
- if multipart?
- if body.empty?
- @preamble = ''
- @epilogue = ''
- @parts = []
- else
- # we need to split the message at the boundary
- boundary = attrs['boundary'] or raise "no boundary for multipart message"
-
- # splitting the body:
- parts = body.split(/--#{Regexp.quote boundary}/m)
- unless parts[-1] =~ /^--/; warn "bad multipart boundary (missing trailing --)"
- else parts[-1][0..1] = ''
- end
- parts.each_with_index do |part, i|
- part =~ /^(\r?\n)?(.*?)(\r?\n)?\Z/m
- part.replace $2
- warn "bad multipart boundary" if (1...parts.length-1) === i and !($1 && $3)
- end
- @preamble = parts.shift
- @epilogue = parts.pop
- @parts = parts.map { |part| SimpleMime.new part }
- end
- end
- end
-
- def multipart?
- @content_type && @content_type =~ /^multipart/ ? true : false
- end
-
- def inspect
- # add some extra here.
- "#<SimpleMime content_type=#{@content_type.inspect}>"
- end
-
- def to_tree
- if multipart?
- str = "- #{inspect}\n"
- parts.each_with_index do |part, i|
- last = i == parts.length - 1
- part.to_tree.split(/\n/).each_with_index do |line, j|
- str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}" + line + "\n"
- end
- end
- str
- else
- "- #{inspect}\n"
- end
- end
-
- def to_s opts={}
- opts = {:boundary_counter => 0}.merge opts
- if multipart?
- boundary = SimpleMime.make_boundary opts[:boundary_counter] += 1, self
- @body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue].
- flatten.join("\r\n--" + boundary)
- content_type, attrs = SimpleMime.split_header @headers['Content-Type'][0]
- attrs['boundary'] = boundary
- @headers['Content-Type'] = [([content_type] + attrs.map { |key, val| %{#{key}="#{val}"} }).join('; ')]
- end
-
- str = ''
- @headers.each do |key, vals|
- vals.each { |val| str << "#{key}: #{val}\r\n" }
- end
- str << "\r\n" + @body
- end
-
- def self.split_header header
- # FIXME: haven't read standard. not sure what its supposed to do with " in the name, or if other
- # escapes are allowed. can't test on windows as " isn't allowed anyway. can be fixed with more
- # accurate parser later.
- # maybe move to some sort of Header class. but not all headers should be of it i suppose.
- # at least add a join_header then, taking name and {}. for use in SimpleMime#to_s (for boundary
- # rewrite), and Attachment#to_mime, among others...
- attrs = {}
- header.scan(/;\s*([^\s=]+)\s*=\s*("[^"]*"|[^\s;]*)\s*/m).each do |key, value|
- if attrs[key]; warn "ignoring duplicate header attribute #{key.inspect}"
- else attrs[key] = value[/^"/] ? value[1..-2] : value
- end
- end
-
- [header[/^[^;]+/].strip, attrs]
- end
-
- # +i+ is some value that should be unique for all multipart boundaries for a given message
- def self.make_boundary i, extra_obj = SimpleMime
- "----_=_NextPart_#{'%03d' % i}_#{'%08x' % extra_obj.object_id}.#{'%08x' % Time.now}"
- end
-end
-
-=begin
-things to consider for header work.
-encoded words:
-Subject: =?iso-8859-1?q?p=F6stal?=
-
-and other mime funkyness:
-Content-Disposition: attachment;
- filename*0*=UTF-8''09%20%D7%90%D7%A5;
- filename*1*=%20%D7%A1%D7%91-;
- filename*2*=%D7%A7%95%A5.wma
-Content-Transfer-Encoding: base64
-
-and another, doing a test with an embedded newline in an attachment name, I
-get this output from evolution. I get the feeling that this is probably a bug
-with their implementation though, they weren't expecting new lines in filenames.
-Content-Disposition: attachment; filename="asdf'b\"c
-d efgh=i: ;\\j"
-d efgh=i: ;\\j"; charset=us-ascii
-Content-Type: text/plain; name="asdf'b\"c"; charset=us-ascii
-
-=end
-
diff --git a/vendor/ruby-msg/lib/orderedhash.rb b/vendor/ruby-msg/lib/orderedhash.rb
deleted file mode 100644
index 16a4f5860..000000000
--- a/vendor/ruby-msg/lib/orderedhash.rb
+++ /dev/null
@@ -1,218 +0,0 @@
-# = OrderedHash
-#
-# == Version
-# 1.2006.07.13 (change of the first number means Big Change)
-#
-# == Description
-# Hash which preserves order of added items (like PHP array).
-#
-# == Usage
-#
-# (see examples directory under the ruby gems root directory)
-#
-# require 'rubygems'
-# require 'ordered_hash'
-#
-# hsh = OrderedHash.new
-# hsh['z'] = 1
-# hsh['a'] = 2
-# hsh['c'] = 3
-# p hsh.keys # ['z','a','c']
-#
-# == Source
-# http://simplypowerful.1984.cz/goodlibs/1.2006.07.13
-#
-# == Author
-# jan molic (/mig/at_sign/1984/dot/cz/)
-#
-# == Thanks to
-# Andrew Johnson for his suggestions and fixes of Hash[], merge, to_a, inspect and shift
-# Desmond Dsouza for == fixes
-#
-# == Licence
-# You can redistribute it and/or modify it under the same terms of Ruby's license;
-# either the dual license version in 2003, or any later version.
-#
-
-class OrderedHash < Hash
-
- attr_accessor :order
-
- class << self
-
- def [] *args
- hsh = OrderedHash.new
- if Hash === args[0]
- hsh.replace args[0]
- elsif (args.size % 2) != 0
- raise ArgumentError, "odd number of elements for Hash"
- else
- hsh[args.shift] = args.shift while args.size > 0
- end
- hsh
- end
-
- end
-
- def initialize(*a, &b)
- super
- @order = []
- end
-
- def store_only a,b
- store a,b
- end
-
- alias orig_store store
-
- def store a,b
- @order.push a unless has_key? a
- super a,b
- end
-
- alias []= store
-
- def == hsh2
- return hsh2==self if !hsh2.is_a?(OrderedHash)
- return false if @order != hsh2.order
- super hsh2
- end
-
- def clear
- @order = []
- super
- end
-
- def delete key
- @order.delete key
- super
- end
-
- def each_key
- @order.each { |k| yield k }
- self
- end
-
- def each_value
- @order.each { |k| yield self[k] }
- self
- end
-
- def each
- @order.each { |k| yield k,self[k] }
- self
- end
-
- alias each_pair each
-
- def delete_if
- @order.clone.each { |k|
- delete k if yield
- }
- self
- end
-
- def values
- ary = []
- @order.each { |k| ary.push self[k] }
- ary
- end
-
- def keys
- @order
- end
-
- def invert
- hsh2 = Hash.new
- @order.each { |k| hsh2[self[k]] = k }
- hsh2
- end
-
- def reject &block
- self.dup.delete_if( &block )
- end
-
- def reject! &block
- hsh2 = reject( &block )
- self == hsh2 ? nil : hsh2
- end
-
- def replace hsh2
- @order = hsh2.keys
- super hsh2
- end
-
- def shift
- key = @order.first
- key ? [key,delete(key)] : super
- end
-
- def unshift k,v
- unless self.include? k
- @order.unshift k
- orig_store(k,v)
- true
- else
- false
- end
- end
-
- def push k,v
- unless self.include? k
- @order.push k
- orig_store(k,v)
- true
- else
- false
- end
- end
-
- def pop
- key = @order.last
- key ? [key,delete(key)] : nil
- end
-
- def first
- self[@order.first]
- end
-
- def last
- self[@order.last]
- end
-
- def to_a
- ary = []
- each { |k,v| ary << [k,v] }
- ary
- end
-
- def to_s
- self.to_a.to_s
- end
-
- def inspect
- ary = []
- each {|k,v| ary << k.inspect + "=>" + v.inspect}
- '{' + ary.join(", ") + '}'
- end
-
- def update hsh2
- hsh2.each { |k,v| self[k] = v }
- self
- end
-
- alias :merge! update
-
- def merge hsh2
- self.dup update(hsh2)
- end
-
- def select
- ary = []
- each { |k,v| ary << [k,v] if yield k,v }
- ary
- end
-
-end
-
-#=end
diff --git a/vendor/ruby-msg/lib/rtf.rb b/vendor/ruby-msg/lib/rtf.rb
deleted file mode 100755
index 3afac68a8..000000000
--- a/vendor/ruby-msg/lib/rtf.rb
+++ /dev/null
@@ -1,109 +0,0 @@
-require 'stringio'
-
-# this file is pretty crap, its just to ensure there is always something readable if
-# there is an rtf only body, with no html encapsulation.
-
-module RTF
- class Tokenizer
- def self.process io
- while true do
- case c = io.getc
- when ?{; yield :open_group
- when ?}; yield :close_group
- when ?\\
- case c = io.getc
- when ?{, ?}, ?\\; yield :text, c.chr
- when ?'; yield :text, [io.read(2)].pack('H*')
- when ?a..?z, ?A..?Z
- # read control word
- str = c.chr
- str << c while c = io.read(1) and c =~ /[a-zA-Z]/
- neg = 1
- neg = -1 and c = io.read(1) if c == '-'
- num = if c =~ /[0-9]/
- num = c
- num << c while c = io.read(1) and c =~ /[0-9]/
- num.to_i * neg
- end
- raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text
- io.seek(-1, IO::SEEK_CUR) if c != ' '
- yield :control_word, str, num
- when nil
- raise "invalid rtf stream" # \EOF
- else
- # other kind of control symbol
- yield :control_symbol, c.chr
- end
- when nil
- return
- when ?\r, ?\n
- # ignore
- else yield :text, c.chr
- end
- end
- end
- end
-
- class Converter
- # crappy
- def self.rtf2text str, format=:text
- group = 0
- text = ''
- text << "<html>\n<body>" if format == :html
- group_type = []
- group_tags = []
- RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c|
- add_text = ''
- case a
- when :open_group; group += 1; group_type[group] = nil; group_tags[group] = []
- when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1;
- when :control_word; # ignore
- group_type[group] ||= b
- # maybe change this to use utf8 where possible
- add_text = if b == 'par' || b == 'line' || b == 'page'; "\n"
- elsif b == 'tab' || b == 'cell'; "\t"
- elsif b == 'endash' || b == 'emdash'; "-"
- elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " "
- elsif b == 'ldblquote'; '"'
- else ''
- end
- if b == 'b' || b == 'i' and format == :html
- close = c == 0 ? '/' : ''
- text << "<#{close}#{b}>"
- if c == 0
- group_tags[group].delete b
- else
- group_tags[group] << b
- end
- end
- # lot of other ones belong in here.\
-=begin
-\bullet Bullet character.
-\lquote Left single quotation mark.
-\rquote Right single quotation mark.
-\ldblquote Left double quotation mark.
-\rdblquote
-=end
- when :control_symbol; # ignore
- group_type[group] ||= b
- add_text = ' ' if b == '~' # non-breakable space
- add_text = '-' if b == '_' # non-breakable hypen
- when :text
- add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*')
- end
- if format == :html
- text << add_text.gsub(/([<>&"'])/) do
- ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1]
- "&#{ent};"
- end
- text << '<br>' if add_text == "\n"
- else
- text << add_text
- end
- end
- text << "</body>\n</html>\n" if format == :html
- text
- end
- end
-end
-