diff options
Diffstat (limited to 'vendor/ruby-msg/lib')
-rw-r--r-- | vendor/ruby-msg/lib/mapi.rb | 109 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/convert.rb | 61 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/convert/contact.rb | 142 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/convert/note-mime.rb | 274 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/convert/note-tmail.rb | 287 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/msg.rb | 440 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/property_set.rb | 269 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/pst.rb | 1806 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/rtf.rb | 169 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mapi/types.rb | 51 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/mime.rb | 165 | ||||
-rw-r--r-- | vendor/ruby-msg/lib/orderedhash.rb | 218 | ||||
-rwxr-xr-x | vendor/ruby-msg/lib/rtf.rb | 109 |
13 files changed, 0 insertions, 4100 deletions
diff --git a/vendor/ruby-msg/lib/mapi.rb b/vendor/ruby-msg/lib/mapi.rb deleted file mode 100644 index b9d3413f7..000000000 --- a/vendor/ruby-msg/lib/mapi.rb +++ /dev/null @@ -1,109 +0,0 @@ -require 'mapi/types' -require 'mapi/property_set' - -module Mapi - VERSION = '1.4.0' - - # - # Mapi::Item is the base class used for all mapi objects, and is purely a - # property set container - # - class Item - attr_reader :properties - alias props properties - - # +properties+ should be a PropertySet instance. - def initialize properties - @properties = properties - end - end - - # a general attachment class. is subclassed by Msg and Pst attachment classes - class Attachment < Item - def filename - props.attach_long_filename || props.attach_filename - end - - def data - @embedded_msg || @embedded_ole || props.attach_data - end - - # with new stream work, its possible to not have the whole thing in memory at one time, - # just to save an attachment - # - # a = msg.attachments.first - # a.save open(File.basename(a.filename || 'attachment'), 'wb') - def save io - raise "can only save binary data blobs, not ole dirs" if @embedded_ole - data.each_read { |chunk| io << chunk } - end - - def inspect - "#<#{self.class.to_s[/\w+$/]}" + - (filename ? " filename=#{filename.inspect}" : '') + - (@embedded_ole ? " embedded_type=#{@embedded_ole.embedded_type.inspect}" : '') + ">" - end - end - - class Recipient < Item - # some kind of best effort guess for converting to standard mime style format. - # there are some rules for encoding non 7bit stuff in mail headers. should obey - # that here, as these strings could be unicode - # email_address will be an EX:/ address (X.400?), unless external recipient. the - # other two we try first. - # consider using entry id for this too. - def name - name = props.transmittable_display_name || props.display_name - # dequote - name[/^'(.*)'/, 1] or name rescue nil - end - - def email - props.smtp_address || props.org_email_addr || props.email_address - end - - RECIPIENT_TYPES = { 0 => :orig, 1 => :to, 2 => :cc, 3 => :bcc } - def type - RECIPIENT_TYPES[props.recipient_type] - end - - def to_s - if name = self.name and !name.empty? and email && name != email - %{"#{name}" <#{email}>} - else - email || name - end - end - - def inspect - "#<#{self.class.to_s[/\w+$/]}:#{self.to_s.inspect}>" - end - end - - # i refer to it as a message (as does mapi), although perhaps Item is better, as its a more general - # concept than a message, as used in Pst files. though maybe i'll switch to using - # Mapi::Object as the base class there. - # - # IMessage essentially, but there's also stuff like IMAPIFolder etc. so, for this to form - # basis for PST Item, it'd need to be more general. - class Message < Item - # these 2 collections should be provided by our subclasses - def attachments - raise NotImplementedError - end - - def recipients - raise NotImplementedError - end - - def inspect - str = %w[message_class from to subject].map do |key| - " #{key}=#{props.send(key).inspect}" - end.compact.join - str << " recipients=#{recipients.inspect}" - str << " attachments=#{attachments.inspect}" - "#<#{self.class.to_s[/\w+$/]}#{str}>" - end - end -end - diff --git a/vendor/ruby-msg/lib/mapi/convert.rb b/vendor/ruby-msg/lib/mapi/convert.rb deleted file mode 100644 index 4c7a0d298..000000000 --- a/vendor/ruby-msg/lib/mapi/convert.rb +++ /dev/null @@ -1,61 +0,0 @@ -# we have two different "backends" for note conversion. we're sticking with -# the current (home grown) mime one until the tmail version is suitably -# polished. -require 'mapi/convert/note-mime' -require 'mapi/convert/contact' - -module Mapi - class Message - CONVERSION_MAP = { - 'text/x-vcard' => [:to_vcard, 'vcf'], - 'message/rfc822' => [:to_mime, 'eml'], - 'text/plain' => [:to_post, 'txt'] - # ... - } - - # get the mime type of the message. - def mime_type - case props.message_class #.downcase <- have a feeling i saw other cased versions - when 'IPM.Contact' - # apparently "text/directory; profile=vcard" is what you're supposed to use - 'text/x-vcard' - when 'IPM.Note' - 'message/rfc822' - when 'IPM.Post' - 'text/plain' - when 'IPM.StickyNote' - 'text/plain' # hmmm.... - else - Mapi::Log.warn 'unknown message_class - %p' % props.message_class - nil - end - end - - def convert - type = mime_type - unless pair = CONVERSION_MAP[type] - raise 'unable to convert message with mime type - %p' % type - end - send pair.first - end - - # should probably be moved to mapi/convert/post - class Post - # not really sure what the pertinent properties are. we just do nothing for now... - def initialize message - @message = message - end - - def to_s - # should maybe handle other types, like html body. need a better format for post - # probably anyway, cause a lot of meta data is getting chucked. - @message.props.body - end - end - - def to_post - Post.new self - end - end -end - diff --git a/vendor/ruby-msg/lib/mapi/convert/contact.rb b/vendor/ruby-msg/lib/mapi/convert/contact.rb deleted file mode 100644 index 838ae6498..000000000 --- a/vendor/ruby-msg/lib/mapi/convert/contact.rb +++ /dev/null @@ -1,142 +0,0 @@ -require 'rubygems' -require 'vpim/vcard' - -# patch Vpim. TODO - fix upstream, or verify old behaviour was ok -def Vpim.encode_text v - # think the regexp was wrong - v.to_str.gsub(/(.)/m) do - case $1 - when "\n" - "\\n" - when "\\", ",", ";" - "\\#{$1}" - else - $1 - end - end -end - -module Mapi - class Message - class VcardConverter - include Vpim - - # a very incomplete mapping, but its a start... - # can't find where to set a lot of stuff, like zipcode, jobtitle etc - VCARD_MAP = { - # these are all standard mapi properties - :name => [ - { - :given => :given_name, - :family => :surname, - :fullname => :subject - } - ], - # outlook seems to eschew the mapi properties this time, - # like postal_address, street_address, home_address_city - # so we use the named properties - :addr => [ - { - :location => 'work', - :street => :business_address_street, - :locality => proc do |props| - [props.business_address_city, props.business_address_state].compact * ', ' - end - } - ], - - # right type? maybe date - :birthday => :birthday, - :nickname => :nickname - - # photo available? - # FIXME finish, emails, telephones etc - } - - attr_reader :msg - def initialize msg - @msg = msg - end - - def field name, *args - DirectoryInfo::Field.create name, Vpim.encode_text_list(args) - end - - def get_property key - if String === key - return key - elsif key.respond_to? :call - value = key.call msg.props - else - value = msg.props[key] - end - if String === value and value.empty? - nil - else - value - end - end - - def get_properties hash - constants = {} - others = {} - hash.each do |to, from| - if String === from - constants[to] = from - else - value = get_property from - others[to] = value if value - end - end - return nil if others.empty? - others.merge constants - end - - def convert - Vpim::Vcard::Maker.make2 do |m| - # handle name - [:name, :addr].each do |type| - VCARD_MAP[type].each do |hash| - next unless props = get_properties(hash) - m.send "add_#{type}" do |n| - props.each { |key, value| n.send "#{key}=", value } - end - end - end - - (VCARD_MAP.keys - [:name, :addr]).each do |key| - value = get_property VCARD_MAP[key] - m.send "#{key}=", value if value - end - - # the rest of the stuff is custom - - url = get_property(:webpage) || get_property(:business_home_page) - m.add_field field('URL', url) if url - m.add_field field('X-EVOLUTION-FILE-AS', get_property(:file_under)) if get_property(:file_under) - - addr = get_property(:email_email_address) || get_property(:email_original_display_name) - if addr - m.add_email addr do |e| - e.format ='x400' unless msg.props.email_addr_type == 'SMTP' - end - end - - if org = get_property(:company_name) - m.add_field field('ORG', get_property(:company_name)) - end - - # TODO: imaddress - end - end - end - - def to_vcard - #p props.raw.reject { |key, value| key.guid.inspect !~ /00062004-0000-0000-c000-000000000046/ }. - # map { |key, value| [key.to_sym, value] }.reject { |a, b| b.respond_to? :read } - #y props.to_h.reject { |a, b| b.respond_to? :read } - VcardConverter.new(self).convert - end - end -end - diff --git a/vendor/ruby-msg/lib/mapi/convert/note-mime.rb b/vendor/ruby-msg/lib/mapi/convert/note-mime.rb deleted file mode 100644 index deb035f2c..000000000 --- a/vendor/ruby-msg/lib/mapi/convert/note-mime.rb +++ /dev/null @@ -1,274 +0,0 @@ -require 'base64' -require 'mime' -require 'time' - -# there is still some Msg specific stuff in here. - -module Mapi - class Message - def mime - return @mime if @mime - # if these headers exist at all, they can be helpful. we may however get a - # application/ms-tnef mime root, which means there will be little other than - # headers. we may get nothing. - # and other times, when received from external, we get the full cigar, boundaries - # etc and all. - # sometimes its multipart, with no boundaries. that throws an error. so we'll be more - # forgiving here - @mime = SimpleMime.new props.transport_message_headers.to_s, true - populate_headers - @mime - end - - def headers - mime.headers - end - - # copy data from msg properties storage to standard mime. headers - # i've now seen it where the existing headers had heaps on stuff, and the msg#props had - # practically nothing. think it was because it was a tnef - msg conversion done by exchange. - def populate_headers - # construct a From value - # should this kind of thing only be done when headers don't exist already? maybe not. if its - # sent, then modified and saved, the headers could be wrong? - # hmmm. i just had an example where a mail is sent, from an internal user, but it has transport - # headers, i think because one recipient was external. the only place the senders email address - # exists is in the transport headers. so its maybe not good to overwrite from. - # recipients however usually have smtp address available. - # maybe we'll do it for all addresses that are smtp? (is that equivalent to - # sender_email_address !~ /^\// - name, email = props.sender_name, props.sender_email_address - if props.sender_addrtype == 'SMTP' - headers['From'] = if name and email and name != email - [%{"#{name}" <#{email}>}] - else - [email || name] - end - elsif !headers.has_key?('From') - # some messages were never sent, so that sender stuff isn't filled out. need to find another - # way to get something - # what about marking whether we thing the email was sent or not? or draft? - # for partition into an eventual Inbox, Sent, Draft mbox set? - # i've now seen cases where this stuff is missing, but exists in transport message headers, - # so maybe i should inhibit this in that case. - if email - # disabling this warning for now - #Log.warn "* no smtp sender email address available (only X.400). creating fake one" - # this is crap. though i've specially picked the logic so that it generates the correct - # email addresses in my case (for my organisation). - # this user stuff will give valid email i think, based on alias. - user = name ? name.sub(/(.*), (.*)/, "\\2.\\1") : email[/\w+$/].downcase - domain = (email[%r{^/O=([^/]+)}i, 1].downcase + '.com' rescue email) - headers['From'] = [name ? %{"#{name}" <#{user}@#{domain}>} : "<#{user}@#{domain}>" ] - elsif name - # we only have a name? thats screwed up. - # disabling this warning for now - #Log.warn "* no smtp sender email address available (only name). creating fake one" - headers['From'] = [%{"#{name}"}] - else - # disabling this warning for now - #Log.warn "* no sender email address available at all. FIXME" - end - # else we leave the transport message header version - end - - # for all of this stuff, i'm assigning in utf8 strings. - # thats ok i suppose, maybe i can say its the job of the mime class to handle that. - # but a lot of the headers are overloaded in different ways. plain string, many strings - # other stuff. what happens to a person who has a " in their name etc etc. encoded words - # i suppose. but that then happens before assignment. and can't be automatically undone - # until the header is decomposed into recipients. - recips_by_type = recipients.group_by { |r| r.type } - # i want to the the types in a specific order. - [:to, :cc, :bcc].each do |type| - # don't know why i bother, but if we can, we try to sort recipients by the numerical part - # of the ole name, or just leave it if we can't - recips = recips_by_type[type] - recips = (recips.sort_by { |r| r.obj.name[/\d{8}$/].hex } rescue recips) - # switched to using , for separation, not ;. see issue #4 - # recips.empty? is strange. i wouldn't have thought it possible, but it was right? - headers[type.to_s.sub(/^(.)/) { $1.upcase }] = [recips.join(', ')] if recips and !recips.empty? - end - headers['Subject'] = [props.subject] if props.subject - - # fill in a date value. by default, we won't mess with existing value hear - if !headers.has_key?('Date') - # we want to get a received date, as i understand it. - # use this preference order, or pull the most recent? - keys = %w[message_delivery_time client_submit_time last_modification_time creation_time] - time = keys.each { |key| break time if time = props.send(key) } - time = nil unless Date === time - - # now convert and store - # this is a little funky. not sure about time zone stuff either? - # actually seems ok. maybe its always UTC and interpreted anyway. or can be timezoneless. - # i have no timezone info anyway. - # in gmail, i see stuff like 15 Jan 2007 00:48:19 -0000, and it displays as 11:48. - # can also add .localtime here if desired. but that feels wrong. - headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time - end - - # some very simplistic mapping between internet message headers and the - # mapi properties - # any of these could be causing duplicates due to case issues. the hack in #to_mime - # just stops re-duplication at that point. need to move some smarts into the mime - # code to handle it. - mapi_header_map = [ - [:internet_message_id, 'Message-ID'], - [:in_reply_to_id, 'In-Reply-To'], - # don't set these values if they're equal to the defaults anyway - [:importance, 'Importance', proc { |val| val.to_s == '1' ? nil : val }], - [:priority, 'Priority', proc { |val| val.to_s == '1' ? nil : val }], - [:sensitivity, 'Sensitivity', proc { |val| val.to_s == '0' ? nil : val }], - # yeah? - [:conversation_topic, 'Thread-Topic'], - # not sure of the distinction here - # :originator_delivery_report_requested ?? - [:read_receipt_requested, 'Disposition-Notification-To', proc { |val| from }] - ] - mapi_header_map.each do |mapi, mime, *f| - next unless q = val = props.send(mapi) or headers.has_key?(mime) - next if f[0] and !(val = f[0].call(val)) - headers[mime] = [val.to_s] - end - end - - # redundant? - def type - props.message_class[/IPM\.(.*)/, 1].downcase rescue nil - end - - # shortcuts to some things from the headers - %w[From To Cc Bcc Subject].each do |key| - define_method(key.downcase) { headers[key].join(' ') if headers.has_key?(key) } - end - - def body_to_mime - # to create the body - # should have some options about serializing rtf. and possibly options to check the rtf - # for rtf2html conversion, stripping those html tags or other similar stuff. maybe want to - # ignore it in the cases where it is generated from incoming html. but keep it if it was the - # source for html and plaintext. - if props.body_rtf or props.body_html - # should plain come first? - mime = SimpleMime.new "Content-Type: multipart/alternative\r\n\r\n" - # its actually possible for plain body to be empty, but the others not. - # if i can get an html version, then maybe a callout to lynx can be made... - mime.parts << SimpleMime.new("Content-Type: text/plain\r\n\r\n" + props.body) if props.body - # this may be automatically unwrapped from the rtf if the rtf includes the html - mime.parts << SimpleMime.new("Content-Type: text/html\r\n\r\n" + props.body_html) if props.body_html - # temporarily disabled the rtf. its just showing up as an attachment anyway. - #mime.parts << SimpleMime.new("Content-Type: text/rtf\r\n\r\n" + props.body_rtf) if props.body_rtf - # its thus currently possible to get no body at all if the only body is rtf. that is not - # really acceptable FIXME - mime - else - # check no header case. content type? etc?. not sure if my SimpleMime class will accept - Log.debug "taking that other path" - # body can be nil, hence the to_s - SimpleMime.new "Content-Type: text/plain\r\n\r\n" + props.body.to_s - end - end - - def to_mime - # intended to be used for IPM.note, which is the email type. can use it for others if desired, - # YMMV - Log.warn "to_mime used on a #{props.message_class}" unless props.message_class == 'IPM.Note' - # we always have a body - mime = body = body_to_mime - - # If we have attachments, we take the current mime root (body), and make it the first child - # of a new tree that will contain body and attachments. - unless attachments.empty? - mime = SimpleMime.new "Content-Type: multipart/mixed\r\n\r\n" - mime.parts << body - # i don't know any better way to do this. need multipart/related for inline images - # referenced by cid: urls to work, but don't want to use it otherwise... - related = false - attachments.each do |attach| - part = attach.to_mime - related = true if part.headers.has_key?('Content-ID') or part.headers.has_key?('Content-Location') - mime.parts << part - end - mime.headers['Content-Type'] = ['multipart/related'] if related - end - - # at this point, mime is either - # - a single text/plain, consisting of the body ('taking that other path' above. rare) - # - a multipart/alternative, consiting of a few bodies (plain and html body. common) - # - a multipart/mixed, consisting of 1 of the above 2 types of bodies, and attachments. - # we add this standard preamble if its multipart - # FIXME preamble.replace, and body.replace both suck. - # preamble= is doable. body= wasn't being done because body will get rewritten from parts - # if multipart, and is only there readonly. can do that, or do a reparse... - # The way i do this means that only the first preamble will say it, not preambles of nested - # multipart chunks. - mime.preamble.replace "This is a multi-part message in MIME format.\r\n" if mime.multipart? - - # now that we have a root, we can mix in all our headers - headers.each do |key, vals| - # don't overwrite the content-type, encoding style stuff - next if mime.headers.has_key? key - # some new temporary hacks - next if key =~ /content-type/i and vals[0] =~ /base64/ - next if mime.headers.keys.map(&:downcase).include? key.downcase - mime.headers[key] += vals - end - # just a stupid hack to make the content-type header last, when using OrderedHash - mime.headers['Content-Type'] = mime.headers.delete 'Content-Type' - - mime - end - end - - class Attachment - def to_mime - # TODO: smarter mime typing. - mimetype = props.attach_mime_tag || 'application/octet-stream' - mime = SimpleMime.new "Content-Type: #{mimetype}\r\n\r\n" - mime.headers['Content-Disposition'] = [%{attachment; filename="#{filename}"}] - mime.headers['Content-Transfer-Encoding'] = ['base64'] - mime.headers['Content-Location'] = [props.attach_content_location] if props.attach_content_location - mime.headers['Content-ID'] = [props.attach_content_id] if props.attach_content_id - # data.to_s for now. data was nil for some reason. - # perhaps it was a data object not correctly handled? - # hmmm, have to use read here. that assumes that the data isa stream. - # but if the attachment data is a string, then it won't work. possible? - data_str = if @embedded_msg - mime.headers['Content-Type'] = 'message/rfc822' - # lets try making it not base64 for now - mime.headers.delete 'Content-Transfer-Encoding' - # not filename. rather name, or something else right? - # maybe it should be inline?? i forget attach_method / access meaning - mime.headers['Content-Disposition'] = [%{attachment; filename="#{@embedded_msg.subject}"}] - @embedded_msg.to_mime.to_s - elsif @embedded_ole - # kind of hacky - io = StringIO.new - Ole::Storage.new io do |ole| - ole.root.type = :dir - Ole::Storage::Dirent.copy @embedded_ole, ole.root - end - io.string - else - # FIXME: shouldn't be required - data.read.to_s rescue '' - end - mime.body.replace @embedded_msg ? data_str : Base64.encode64(data_str).gsub(/\n/, "\r\n") - mime - end - end - - class Msg < Message - def populate_headers - super - if !headers.has_key?('Date') - # can employ other methods for getting a time. heres one in a similar vein to msgconvert.pl, - # ie taking the time from an ole object - time = @root.ole.dirents.map { |dirent| dirent.modify_time || dirent.create_time }.compact.sort.last - headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time - end - end - end -end - diff --git a/vendor/ruby-msg/lib/mapi/convert/note-tmail.rb b/vendor/ruby-msg/lib/mapi/convert/note-tmail.rb deleted file mode 100644 index 9ccc9e0b3..000000000 --- a/vendor/ruby-msg/lib/mapi/convert/note-tmail.rb +++ /dev/null @@ -1,287 +0,0 @@ -require 'rubygems' -require 'tmail' - -# these will be removed later -require 'time' -require 'mime' - -# there is some Msg specific stuff in here. - -class TMail::Mail - def quoted_body= str - body_port.wopen { |f| f.write str } - str - end -end - -module Mapi - class Message - def mime - return @mime if @mime - # if these headers exist at all, they can be helpful. we may however get a - # application/ms-tnef mime root, which means there will be little other than - # headers. we may get nothing. - # and other times, when received from external, we get the full cigar, boundaries - # etc and all. - # sometimes its multipart, with no boundaries. that throws an error. so we'll be more - # forgiving here - @mime = Mime.new props.transport_message_headers.to_s, true - populate_headers - @mime - end - - def headers - mime.headers - end - - # copy data from msg properties storage to standard mime. headers - # i've now seen it where the existing headers had heaps on stuff, and the msg#props had - # practically nothing. think it was because it was a tnef - msg conversion done by exchange. - def populate_headers - # construct a From value - # should this kind of thing only be done when headers don't exist already? maybe not. if its - # sent, then modified and saved, the headers could be wrong? - # hmmm. i just had an example where a mail is sent, from an internal user, but it has transport - # headers, i think because one recipient was external. the only place the senders email address - # exists is in the transport headers. so its maybe not good to overwrite from. - # recipients however usually have smtp address available. - # maybe we'll do it for all addresses that are smtp? (is that equivalent to - # sender_email_address !~ /^\// - name, email = props.sender_name, props.sender_email_address - if props.sender_addrtype == 'SMTP' - headers['From'] = if name and email and name != email - [%{"#{name}" <#{email}>}] - else - [email || name] - end - elsif !headers.has_key?('From') - # some messages were never sent, so that sender stuff isn't filled out. need to find another - # way to get something - # what about marking whether we thing the email was sent or not? or draft? - # for partition into an eventual Inbox, Sent, Draft mbox set? - # i've now seen cases where this stuff is missing, but exists in transport message headers, - # so maybe i should inhibit this in that case. - if email - # disabling this warning for now - #Log.warn "* no smtp sender email address available (only X.400). creating fake one" - # this is crap. though i've specially picked the logic so that it generates the correct - # email addresses in my case (for my organisation). - # this user stuff will give valid email i think, based on alias. - user = name ? name.sub(/(.*), (.*)/, "\\2.\\1") : email[/\w+$/].downcase - domain = (email[%r{^/O=([^/]+)}i, 1].downcase + '.com' rescue email) - headers['From'] = [name ? %{"#{name}" <#{user}@#{domain}>} : "<#{user}@#{domain}>" ] - elsif name - # we only have a name? thats screwed up. - # disabling this warning for now - #Log.warn "* no smtp sender email address available (only name). creating fake one" - headers['From'] = [%{"#{name}"}] - else - # disabling this warning for now - #Log.warn "* no sender email address available at all. FIXME" - end - # else we leave the transport message header version - end - - # for all of this stuff, i'm assigning in utf8 strings. - # thats ok i suppose, maybe i can say its the job of the mime class to handle that. - # but a lot of the headers are overloaded in different ways. plain string, many strings - # other stuff. what happens to a person who has a " in their name etc etc. encoded words - # i suppose. but that then happens before assignment. and can't be automatically undone - # until the header is decomposed into recipients. - recips_by_type = recipients.group_by { |r| r.type } - # i want to the the types in a specific order. - [:to, :cc, :bcc].each do |type| - # don't know why i bother, but if we can, we try to sort recipients by the numerical part - # of the ole name, or just leave it if we can't - recips = recips_by_type[type] - recips = (recips.sort_by { |r| r.obj.name[/\d{8}$/].hex } rescue recips) - # switched to using , for separation, not ;. see issue #4 - # recips.empty? is strange. i wouldn't have thought it possible, but it was right? - headers[type.to_s.sub(/^(.)/) { $1.upcase }] = [recips.join(', ')] unless recips.empty? - end - headers['Subject'] = [props.subject] if props.subject - - # fill in a date value. by default, we won't mess with existing value hear - if !headers.has_key?('Date') - # we want to get a received date, as i understand it. - # use this preference order, or pull the most recent? - keys = %w[message_delivery_time client_submit_time last_modification_time creation_time] - time = keys.each { |key| break time if time = props.send(key) } - time = nil unless Date === time - - # now convert and store - # this is a little funky. not sure about time zone stuff either? - # actually seems ok. maybe its always UTC and interpreted anyway. or can be timezoneless. - # i have no timezone info anyway. - # in gmail, i see stuff like 15 Jan 2007 00:48:19 -0000, and it displays as 11:48. - # can also add .localtime here if desired. but that feels wrong. - headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time - end - - # some very simplistic mapping between internet message headers and the - # mapi properties - # any of these could be causing duplicates due to case issues. the hack in #to_mime - # just stops re-duplication at that point. need to move some smarts into the mime - # code to handle it. - mapi_header_map = [ - [:internet_message_id, 'Message-ID'], - [:in_reply_to_id, 'In-Reply-To'], - # don't set these values if they're equal to the defaults anyway - [:importance, 'Importance', proc { |val| val.to_s == '1' ? nil : val }], - [:priority, 'Priority', proc { |val| val.to_s == '1' ? nil : val }], - [:sensitivity, 'Sensitivity', proc { |val| val.to_s == '0' ? nil : val }], - # yeah? - [:conversation_topic, 'Thread-Topic'], - # not sure of the distinction here - # :originator_delivery_report_requested ?? - [:read_receipt_requested, 'Disposition-Notification-To', proc { |val| from }] - ] - mapi_header_map.each do |mapi, mime, *f| - next unless q = val = props.send(mapi) or headers.has_key?(mime) - next if f[0] and !(val = f[0].call(val)) - headers[mime] = [val.to_s] - end - end - - # redundant? - def type - props.message_class[/IPM\.(.*)/, 1].downcase rescue nil - end - - # shortcuts to some things from the headers - %w[From To Cc Bcc Subject].each do |key| - define_method(key.downcase) { headers[key].join(' ') if headers.has_key?(key) } - end - - def body_to_tmail - # to create the body - # should have some options about serializing rtf. and possibly options to check the rtf - # for rtf2html conversion, stripping those html tags or other similar stuff. maybe want to - # ignore it in the cases where it is generated from incoming html. but keep it if it was the - # source for html and plaintext. - if props.body_rtf or props.body_html - # should plain come first? - part = TMail::Mail.new - # its actually possible for plain body to be empty, but the others not. - # if i can get an html version, then maybe a callout to lynx can be made... - part.parts << TMail::Mail.parse("Content-Type: text/plain\r\n\r\n" + props.body) if props.body - # this may be automatically unwrapped from the rtf if the rtf includes the html - part.parts << TMail::Mail.parse("Content-Type: text/html\r\n\r\n" + props.body_html) if props.body_html - # temporarily disabled the rtf. its just showing up as an attachment anyway. - #mime.parts << Mime.new("Content-Type: text/rtf\r\n\r\n" + props.body_rtf) if props.body_rtf - # its thus currently possible to get no body at all if the only body is rtf. that is not - # really acceptable FIXME - part['Content-Type'] = 'multipart/alternative' - part - else - # check no header case. content type? etc?. not sure if my Mime class will accept - Log.debug "taking that other path" - # body can be nil, hence the to_s - TMail::Mail.parse "Content-Type: text/plain\r\n\r\n" + props.body.to_s - end - end - - def to_tmail - # intended to be used for IPM.note, which is the email type. can use it for others if desired, - # YMMV - Log.warn "to_mime used on a #{props.message_class}" unless props.message_class == 'IPM.Note' - # we always have a body - mail = body = body_to_tmail - - # If we have attachments, we take the current mime root (body), and make it the first child - # of a new tree that will contain body and attachments. - unless attachments.empty? - raise NotImplementedError - mime = Mime.new "Content-Type: multipart/mixed\r\n\r\n" - mime.parts << body - # i don't know any better way to do this. need multipart/related for inline images - # referenced by cid: urls to work, but don't want to use it otherwise... - related = false - attachments.each do |attach| - part = attach.to_mime - related = true if part.headers.has_key?('Content-ID') or part.headers.has_key?('Content-Location') - mime.parts << part - end - mime.headers['Content-Type'] = ['multipart/related'] if related - end - - # at this point, mime is either - # - a single text/plain, consisting of the body ('taking that other path' above. rare) - # - a multipart/alternative, consiting of a few bodies (plain and html body. common) - # - a multipart/mixed, consisting of 1 of the above 2 types of bodies, and attachments. - # we add this standard preamble if its multipart - # FIXME preamble.replace, and body.replace both suck. - # preamble= is doable. body= wasn't being done because body will get rewritten from parts - # if multipart, and is only there readonly. can do that, or do a reparse... - # The way i do this means that only the first preamble will say it, not preambles of nested - # multipart chunks. - mail.quoted_body = "This is a multi-part message in MIME format.\r\n" if mail.multipart? - - # now that we have a root, we can mix in all our headers - headers.each do |key, vals| - # don't overwrite the content-type, encoding style stuff - next if mail[key] - # some new temporary hacks - next if key =~ /content-type/i and vals[0] =~ /base64/ - #next if mime.headers.keys.map(&:downcase).include? key.downcase - mail[key] = vals.first - end - # just a stupid hack to make the content-type header last, when using OrderedHash - #mime.headers['Content-Type'] = mime.headers.delete 'Content-Type' - - mail - end - end - - class Attachment - def to_tmail - # TODO: smarter mime typing. - mimetype = props.attach_mime_tag || 'application/octet-stream' - part = TMail::Mail.parse "Content-Type: #{mimetype}\r\n\r\n" - part['Content-Disposition'] = %{attachment; filename="#{filename}"} - part['Content-Transfer-Encoding'] = 'base64' - part['Content-Location'] = props.attach_content_location if props.attach_content_location - part['Content-ID'] = props.attach_content_id if props.attach_content_id - # data.to_s for now. data was nil for some reason. - # perhaps it was a data object not correctly handled? - # hmmm, have to use read here. that assumes that the data isa stream. - # but if the attachment data is a string, then it won't work. possible? - data_str = if @embedded_msg - raise NotImplementedError - mime.headers['Content-Type'] = 'message/rfc822' - # lets try making it not base64 for now - mime.headers.delete 'Content-Transfer-Encoding' - # not filename. rather name, or something else right? - # maybe it should be inline?? i forget attach_method / access meaning - mime.headers['Content-Disposition'] = [%{attachment; filename="#{@embedded_msg.subject}"}] - @embedded_msg.to_mime.to_s - elsif @embedded_ole - raise NotImplementedError - # kind of hacky - io = StringIO.new - Ole::Storage.new io do |ole| - ole.root.type = :dir - Ole::Storage::Dirent.copy @embedded_ole, ole.root - end - io.string - else - data.read.to_s - end - part.body = @embedded_msg ? data_str : Base64.encode64(data_str).gsub(/\n/, "\r\n") - part - end - end - - class Msg < Message - def populate_headers - super - if !headers.has_key?('Date') - # can employ other methods for getting a time. heres one in a similar vein to msgconvert.pl, - # ie taking the time from an ole object - time = @root.ole.dirents.map { |dirent| dirent.modify_time || dirent.create_time }.compact.sort.last - headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time - end - end - end -end - diff --git a/vendor/ruby-msg/lib/mapi/msg.rb b/vendor/ruby-msg/lib/mapi/msg.rb deleted file mode 100644 index fc30a9170..000000000 --- a/vendor/ruby-msg/lib/mapi/msg.rb +++ /dev/null @@ -1,440 +0,0 @@ -require 'rubygems' -require 'ole/storage' -require 'mapi' -require 'mapi/rtf' - -module Mapi - # - # = Introduction - # - # Primary class interface to the vagaries of .msg files. - # - # The core of the work is done by the <tt>Msg::PropertyStore</tt> class. - # - class Msg < Message - # - # = Introduction - # - # A big compononent of +Msg+ files is the property store, which holds - # all the key/value pairs of properties. The message itself, and all - # its <tt>Attachment</tt>s and <tt>Recipient</tt>s have an instance of - # this class. - # - # = Storage model - # - # Property keys (tags?) can be either simple hex numbers, in the - # range 0x0000 - 0xffff, or they can be named properties. In fact, - # properties in the range 0x0000 to 0x7fff are supposed to be the non- - # named properties, and can be considered to be in the +PS_MAPI+ - # namespace. (correct?) - # - # Named properties are serialized in the 0x8000 to 0xffff range, - # and are referenced as a guid and long/string pair. - # - # There are key ranges, which can be used to imply things generally - # about keys. - # - # Further, we can give symbolic names to most keys, coming from - # constants in various places. Eg: - # - # 0x0037 => subject - # {00062002-0000-0000-C000-000000000046}/0x8218 => response_status - # # displayed as categories in outlook - # {00020329-0000-0000-C000-000000000046}/"Keywords" => categories - # - # Futher, there are completely different names, coming from other - # object models that get mapped to these things (CDO's model, - # Outlook's model etc). Eg "urn:schemas:httpmail:subject" - # I think these can be ignored though, as they aren't defined clearly - # in terms of mapi properties, and i'm really just trying to make - # a mapi property store. (It should also be relatively easy to - # support them later.) - # - # = Usage - # - # The api is driven by a desire to have the simple stuff "just work", ie - # - # properties.subject - # properties.display_name - # - # There also needs to be a way to look up properties more specifically: - # - # properties[0x0037] # => gets the subject - # properties[0x0037, PS_MAPI] # => still gets the subject - # properties['Keywords', PS_PUBLIC_STRINGS] # => gets outlook's categories array - # - # The abbreviated versions work by "resolving" the symbols to full keys: - # - # # the guid here is just PS_PUBLIC_STRINGS - # properties.resolve :keywords # => #<Key {00020329-0000-0000-c000-000000000046}/"Keywords"> - # # the result here is actually also a key - # k = properties.resolve :subject # => 0x0037 - # # it has a guid - # k.guid == Msg::Properties::PS_MAPI # => true - # - # = Parsing - # - # There are three objects that need to be parsed to load a +Msg+ property store: - # - # 1. The +nameid+ directory (<tt>Properties.parse_nameid</tt>) - # 2. The many +substg+ objects, whose names should match <tt>Properties::SUBSTG_RX</tt> - # (<tt>Properties#parse_substg</tt>) - # 3. The +properties+ file (<tt>Properties#parse_properties</tt>) - # - # Understanding of the formats is by no means perfect. - # - # = TODO - # - # * While the key objects are sufficient, the value objects are just plain - # ruby types. It currently isn't possible to write to the values, or to know - # which encoding the value had. - # * Update this doc. - # * Perhaps change from eager loading, to be load-on-demand. - # - class PropertyStore - include PropertySet::Constants - Key = PropertySet::Key - - # note that binary and default both use obj.open. not the block form. this means we should - # #close it later, which we don't. as we're only reading though, it shouldn't matter right? - # not really good though FIXME - # change these to use mapi symbolic const names - ENCODINGS = { - 0x000d => proc { |obj| obj }, # seems to be used when its going to be a directory instead of a file. eg nested ole. 3701 usually. in which case we shouldn't get here right? - 0x001f => proc { |obj| Ole::Types::FROM_UTF16.iconv obj.read }, # unicode - # ascii - # FIXME hack did a[0..-2] before, seems right sometimes, but for some others it chopped the text. chomp - 0x001e => proc { |obj| obj.read.chomp 0.chr }, - 0x0102 => proc { |obj| obj.open }, # binary? - :default => proc { |obj| obj.open } - } - - SUBSTG_RX = /^__substg1\.0_([0-9A-F]{4})([0-9A-F]{4})(?:-([0-9A-F]{8}))?$/ - PROPERTIES_RX = /^__properties_version1\.0$/ - NAMEID_RX = /^__nameid_version1\.0$/ - VALID_RX = /#{SUBSTG_RX}|#{PROPERTIES_RX}|#{NAMEID_RX}/ - - attr_reader :nameid - - def initialize - @nameid = nil - # not exactly a cache currently - @cache = {} - end - - #-- - # The parsing methods - #++ - - def self.load obj - prop = new - prop.load obj - prop - end - - # Parse properties from the +Dirent+ obj - def load obj - # we need to do the nameid first, as it provides the map for later user defined properties - if nameid_obj = obj.children.find { |child| child.name =~ NAMEID_RX } - @nameid = PropertyStore.parse_nameid nameid_obj - # hack to make it available to all msg files from the same ole storage object - # FIXME - come up with a neater way - class << obj.ole - attr_accessor :msg_nameid - end - obj.ole.msg_nameid = @nameid - elsif obj.ole - @nameid = obj.ole.msg_nameid rescue nil - end - # now parse the actual properties. i think dirs that match the substg should be decoded - # as properties to. 0x000d is just another encoding, the dir encoding. it should match - # whether the object is file / dir. currently only example is embedded msgs anyway - obj.children.each do |child| - next unless child.file? - case child.name - when PROPERTIES_RX - parse_properties child - when SUBSTG_RX - parse_substg(*($~[1..-1].map { |num| num.hex rescue nil } + [child])) - end - end - end - - # Read nameid from the +Dirent+ obj, which is used for mapping of named properties keys to - # proxy keys in the 0x8000 - 0xffff range. - # Returns a hash of integer -> Key. - def self.parse_nameid obj - remaining = obj.children.dup - guids_obj, props_obj, names_obj = - %w[__substg1.0_00020102 __substg1.0_00030102 __substg1.0_00040102].map do |name| - remaining.delete obj/name - end - - # parse guids - # this is the guids for named properities (other than builtin ones) - # i think PS_PUBLIC_STRINGS, and PS_MAPI are builtin. - guids = [PS_PUBLIC_STRINGS] + guids_obj.read.scan(/.{16}/mn).map do |str| - Ole::Types.load_guid str - end - - # parse names. - # the string ids for named properties - # they are no longer parsed, as they're referred to by offset not - # index. they are simply sequentially packed, as a long, giving - # the string length, then padding to 4 byte multiple, and repeat. - names_data = names_obj.read - - # parse actual props. - # not sure about any of this stuff really. - # should flip a few bits in the real msg, to get a better understanding of how this works. - props = props_obj.read.scan(/.{8}/mn).map do |str| - flags, offset = str[4..-1].unpack 'v2' - # the property will be serialised as this pseudo property, mapping it to this named property - pseudo_prop = 0x8000 + offset - named = flags & 1 == 1 - prop = if named - str_off = *str.unpack('V') - len = *names_data[str_off, 4].unpack('V') - Ole::Types::FROM_UTF16.iconv names_data[str_off + 4, len] - else - a, b = str.unpack('v2') - Log.debug "b not 0" if b != 0 - a - end - # a bit sus - guid_off = flags >> 1 - # missing a few builtin PS_* - Log.debug "guid off < 2 (#{guid_off})" if guid_off < 2 - guid = guids[guid_off - 2] - [pseudo_prop, Key.new(prop, guid)] - end - - #Log.warn "* ignoring #{remaining.length} objects in nameid" unless remaining.empty? - # this leaves a bunch of other unknown chunks of data with completely unknown meaning. - # pp [:unknown, child.name, child.data.unpack('H*')[0].scan(/.{16}/m)] - Hash[*props.flatten] - end - - # Parse an +Dirent+, as per <tt>msgconvert.pl</tt>. This is how larger properties, such - # as strings, binary blobs, and other ole sub-directories (eg nested Msg) are stored. - def parse_substg key, encoding, offset, obj - if (encoding & 0x1000) != 0 - if !offset - # there is typically one with no offset first, whose data is a series of numbers - # equal to the lengths of all the sub parts. gives an implied array size i suppose. - # maybe you can initialize the array at this time. the sizes are the same as all the - # ole object sizes anyway, its to pre-allocate i suppose. - #p obj.data.unpack('V*') - # ignore this one - return - else - # remove multivalue flag for individual pieces - encoding &= ~0x1000 - end - else - Log.warn "offset specified for non-multivalue encoding #{obj.name}" if offset - offset = nil - end - # offset is for multivalue encodings. - unless encoder = ENCODINGS[encoding] - Log.warn "unknown encoding #{encoding}" - #encoder = proc { |obj| obj.io } #.read }. maybe not a good idea - encoder = ENCODINGS[:default] - end - add_property key, encoder[obj], offset - end - - # For parsing the +properties+ file. Smaller properties are serialized in one chunk, - # such as longs, bools, times etc. The parsing has problems. - def parse_properties obj - data = obj.read - # don't really understand this that well... - pad = data.length % 16 - unless (pad == 0 || pad == 8) and data[0...pad] == "\000" * pad - Log.warn "padding was not as expected #{pad} (#{data.length}) -> #{data[0...pad].inspect}" - end - data[pad..-1].scan(/.{16}/mn).each do |data| - property, encoding = ('%08x' % data.unpack('V')).scan /.{4}/ - key = property.hex - # doesn't make any sense to me. probably because its a serialization of some internal - # outlook structure... - next if property == '0000' - case encoding - when '0102', '001e', '001f', '101e', '101f', '000d' - # ignore on purpose. not sure what its for - # multivalue versions ignored also - when '0003' # long - # don't know what all the other data is for - add_property key, *data[8, 4].unpack('V') - when '000b' # boolean - # again, heaps more data than needed. and its not always 0 or 1. - # they are in fact quite big numbers. this is wrong. -# p [property, data[4..-1].unpack('H*')[0]] - add_property key, data[8, 4].unpack('V')[0] != 0 - when '0040' # systime - # seems to work: - add_property key, Ole::Types.load_time(data[8..-1]) - else - #Log.warn "ignoring data in __properties section, encoding: #{encoding}" - #Log << data.unpack('H*').inspect + "\n" - end - end - end - - def add_property key, value, pos=nil - # map keys in the named property range through nameid - if Integer === key and key >= 0x8000 - if !@nameid - Log.warn "no nameid section yet named properties used" - key = Key.new key - elsif real_key = @nameid[key] - key = real_key - else - # i think i hit these when i have a named property, in the PS_MAPI - # guid - Log.warn "property in named range not in nameid #{key.inspect}" - key = Key.new key - end - else - key = Key.new key - end - if pos - @cache[key] ||= [] - Log.warn "duplicate property" unless Array === @cache[key] - # ^ this is actually a trickier problem. the issue is more that they must all be of - # the same type. - @cache[key][pos] = value - else - # take the last. - Log.warn "duplicate property #{key.inspect}" if @cache[key] - @cache[key] = value - end - end - - # delegate to cache - def method_missing name, *args, &block - @cache.send name, *args, &block - end - end - - # these 2 will actually be of the form - # 1\.0_#([0-9A-Z]{8}), where $1 is the 0 based index number in hex - # should i parse that and use it as an index, or just return in - # file order? probably should use it later... - ATTACH_RX = /^__attach_version1\.0_.*/ - RECIP_RX = /^__recip_version1\.0_.*/ - VALID_RX = /#{PropertyStore::VALID_RX}|#{ATTACH_RX}|#{RECIP_RX}/ - - attr_reader :root - attr_accessor :close_parent - - # Alternate constructor, to create an +Msg+ directly from +arg+ and +mode+, passed - # directly to Ole::Storage (ie either filename or seekable IO object). - def self.open arg, mode=nil - msg = new Ole::Storage.open(arg, mode).root - # we will close the ole when we are #closed - msg.close_parent = true - if block_given? - begin yield msg - ensure; msg.close - end - else msg - end - end - - # Create an Msg from +root+, an <tt>Ole::Storage::Dirent</tt> object - def initialize root - @root = root - @close_parent = false - super PropertySet.new(PropertyStore.load(@root)) - Msg.warn_unknown @root - end - - def self.warn_unknown obj - # bit of validation. not important if there is extra stuff, though would be - # interested to know what it is. doesn't check dir/file stuff. - unknown = obj.children.reject { |child| child.name =~ VALID_RX } - Log.warn "skipped #{unknown.length} unknown msg object(s)" unless unknown.empty? - end - - def close - @root.ole.close if @close_parent - end - - def attachments - @attachments ||= @root.children. - select { |child| child.dir? and child.name =~ ATTACH_RX }. - map { |child| Attachment.new child }. - select { |attach| attach.valid? } - end - - def recipients - @recipients ||= @root.children. - select { |child| child.dir? and child.name =~ RECIP_RX }. - map { |child| Recipient.new child } - end - - class Attachment < Mapi::Attachment - attr_reader :obj, :properties - alias props :properties - - def initialize obj - @obj = obj - @embedded_ole = nil - @embedded_msg = nil - - super PropertySet.new(PropertyStore.load(@obj)) - Msg.warn_unknown @obj - - @obj.children.each do |child| - # temp hack. PropertyStore doesn't do directory properties atm - FIXME - if child.dir? and child.name =~ PropertyStore::SUBSTG_RX and - $1 == '3701' and $2.downcase == '000d' - @embedded_ole = child - class << @embedded_ole - def compobj - return nil unless compobj = self["\001CompObj"] - compobj.read[/^.{32}([^\x00]+)/m, 1] - end - - def embedded_type - temp = compobj and return temp - # try to guess more - if children.select { |child| child.name =~ /__(substg|properties|recip|attach|nameid)/ }.length > 2 - return 'Microsoft Office Outlook Message' - end - nil - end - end - if @embedded_ole.embedded_type == 'Microsoft Office Outlook Message' - @embedded_msg = Msg.new @embedded_ole - end - end - end - end - - def valid? - # something i started to notice when handling embedded ole object attachments is - # the particularly strange case where there are empty attachments - not props.raw.keys.empty? - end - end - - # - # +Recipient+ serves as a container for the +recip+ directories in the .msg. - # It has things like office_location, business_telephone_number, but I don't - # think enough to make a vCard out of? - # - class Recipient < Mapi::Recipient - attr_reader :obj, :properties - alias props :properties - - def initialize obj - @obj = obj - super PropertySet.new(PropertyStore.load(@obj)) - Msg.warn_unknown @obj - end - end - end -end - diff --git a/vendor/ruby-msg/lib/mapi/property_set.rb b/vendor/ruby-msg/lib/mapi/property_set.rb deleted file mode 100644 index 199bca525..000000000 --- a/vendor/ruby-msg/lib/mapi/property_set.rb +++ /dev/null @@ -1,269 +0,0 @@ -require 'yaml' -require 'mapi/types' -require 'mapi/rtf' -require 'rtf' - -module Mapi - # - # The Mapi::PropertySet class is used to wrap the lower level Msg or Pst property stores, - # and provide a consistent and more friendly interface. It allows you to just say: - # - # properties.subject - # - # instead of: - # - # properites.raw[0x0037, PS_MAPI] - # - # The underlying store can be just a hash, or lazily loading directly from the file. A good - # compromise is to cache all the available keys, and just return the values on demand, rather - # than load up many possibly unwanted values. - # - class PropertySet - # the property set guid constants - # these guids are all defined with the macro DEFINE_OLEGUID in mapiguid.h. - # see http://doc.ddart.net/msdn/header/include/mapiguid.h.html - oleguid = proc do |prefix| - Ole::Types::Clsid.parse "{#{prefix}-0000-0000-c000-000000000046}" - end - - NAMES = { - oleguid['00020328'] => 'PS_MAPI', - oleguid['00020329'] => 'PS_PUBLIC_STRINGS', - oleguid['00020380'] => 'PS_ROUTING_EMAIL_ADDRESSES', - oleguid['00020381'] => 'PS_ROUTING_ADDRTYPE', - oleguid['00020382'] => 'PS_ROUTING_DISPLAY_NAME', - oleguid['00020383'] => 'PS_ROUTING_ENTRYID', - oleguid['00020384'] => 'PS_ROUTING_SEARCH_KEY', - # string properties in this namespace automatically get added to the internet headers - oleguid['00020386'] => 'PS_INTERNET_HEADERS', - # theres are bunch of outlook ones i think - # http://blogs.msdn.com/stephen_griffin/archive/2006/05/10/outlook-2007-beta-documentation-notification-based-indexing-support.aspx - # IPM.Appointment - oleguid['00062002'] => 'PSETID_Appointment', - # IPM.Task - oleguid['00062003'] => 'PSETID_Task', - # used for IPM.Contact - oleguid['00062004'] => 'PSETID_Address', - oleguid['00062008'] => 'PSETID_Common', - # didn't find a source for this name. it is for IPM.StickyNote - oleguid['0006200e'] => 'PSETID_Note', - # for IPM.Activity. also called the journal? - oleguid['0006200a'] => 'PSETID_Log', - } - - module Constants - NAMES.each { |guid, name| const_set name, guid } - end - - include Constants - - # +Properties+ are accessed by <tt>Key</tt>s, which are coerced to this class. - # Includes a bunch of methods (hash, ==, eql?) to allow it to work as a key in - # a +Hash+. - # - # Also contains the code that maps keys to symbolic names. - class Key - include Constants - - attr_reader :code, :guid - def initialize code, guid=PS_MAPI - @code, @guid = code, guid - end - - def to_sym - # hmmm, for some stuff, like, eg, the message class specific range, sym-ification - # of the key depends on knowing our message class. i don't want to store anything else - # here though, so if that kind of thing is needed, it can be passed to this function. - # worry about that when some examples arise. - case code - when Integer - if guid == PS_MAPI # and < 0x8000 ? - # the hash should be updated now that i've changed the process - TAGS['%04x' % code].first[/_(.*)/, 1].downcase.to_sym rescue code - else - # handle other guids here, like mapping names to outlook properties, based on the - # outlook object model. - NAMED_MAP[self].to_sym rescue code - end - when String - # return something like - # note that named properties don't go through the map at the moment. so #categories - # doesn't work yet - code.downcase.to_sym - end - end - - def to_s - to_sym.to_s - end - - # FIXME implement these - def transmittable? - # etc, can go here too - end - - # this stuff is to allow it to be a useful key - def hash - [code, guid].hash - end - - def == other - hash == other.hash - end - - alias eql? :== - - def inspect - # maybe the way to do this, would be to be able to register guids - # in a global lookup, which are used by Clsid#inspect itself, to - # provide symbolic names... - guid_str = NAMES[guid] || "{#{guid.format}}" rescue "nil" - if Integer === code - hex = '0x%04x' % code - if guid == PS_MAPI - # just display as plain hex number - hex - else - "#<Key #{guid_str}/#{hex}>" - end - else - # display full guid and code - "#<Key #{guid_str}/#{code.inspect}>" - end - end - end - - # duplicated here for now - SUPPORT_DIR = File.dirname(__FILE__) + '/../..' - - # data files that provide for the code to symbolic name mapping - # guids in named_map are really constant references to the above - TAGS = YAML.load_file "#{SUPPORT_DIR}/data/mapitags.yaml" - NAMED_MAP = YAML.load_file("#{SUPPORT_DIR}/data/named_map.yaml").inject({}) do |hash, (key, value)| - hash.update Key.new(key[0], const_get(key[1])) => value - end - - attr_reader :raw - - # +raw+ should be an hash-like object that maps <tt>Key</tt>s to values. Should respond_to? - # [], keys, values, each, and optionally []=, and delete. - def initialize raw - @raw = raw - end - - # resolve +arg+ (could be key, code, string, or symbol), and possible +guid+ to a key. - # returns nil on failure - def resolve arg, guid=nil - if guid; Key.new arg, guid - else - case arg - when Key; arg - when Integer; Key.new arg - else sym_to_key[arg.to_sym] - end - end - end - - # this is the function that creates a symbol to key mapping. currently this works by making a - # pass through the raw properties, but conceivably you could map symbols to keys using the - # mapitags directly. problem with that would be that named properties wouldn't map automatically, - # but maybe thats not too important. - def sym_to_key - return @sym_to_key if @sym_to_key - @sym_to_key = {} - raw.keys.each do |key| - sym = key.to_sym - unless Symbol === sym - Log.debug "couldn't find symbolic name for key #{key.inspect}" - next - end - if @sym_to_key[sym] - Log.warn "duplicate key #{key.inspect}" - # we give preference to PS_MAPI keys - @sym_to_key[sym] = key if key.guid == PS_MAPI - else - # just assign - @sym_to_key[sym] = key - end - end - @sym_to_key - end - - def keys - sym_to_key.keys - end - - def values - sym_to_key.values.map { |key| raw[key] } - end - - def [] arg, guid=nil - raw[resolve(arg, guid)] - end - - def []= arg, *args - args.unshift nil if args.length == 1 - guid, value = args - # FIXME this won't really work properly. it would need to go - # to TAGS to resolve, as it often won't be there already... - raw[resolve(arg, guid)] = value - end - - def method_missing name, *args - if name.to_s !~ /\=$/ and args.empty? - self[name] - elsif name.to_s =~ /(.*)\=$/ and args.length == 1 - self[$1] = args[0] - else - super - end - end - - def to_h - sym_to_key.inject({}) { |hash, (sym, key)| hash.update sym => raw[key] } - end - - def inspect - "#<#{self.class} " + to_h.sort_by { |k, v| k.to_s }.map do |k, v| - v = v.inspect - "#{k}=#{v.length > 32 ? v[0..29] + '..."' : v}" - end.join(' ') + '>' - end - - # ----- - - # temporary pseudo tags - - # for providing rtf to plain text conversion. later, html to text too. - def body - return @body if defined?(@body) - @body = (self[:body] rescue nil) - # last resort - if !@body or @body.strip.empty? - Log.warn 'creating text body from rtf' - @body = (::RTF::Converter.rtf2text body_rtf rescue nil) - end - @body - end - - # for providing rtf decompression - def body_rtf - return @body_rtf if defined?(@body_rtf) - @body_rtf = (RTF.rtfdecompr rtf_compressed.read rescue nil) - end - - # for providing rtf to html conversion - def body_html - return @body_html if defined?(@body_html) - @body_html = (self[:body_html].read rescue nil) - @body_html = (RTF.rtf2html body_rtf rescue nil) if !@body_html or @body_html.strip.empty? - # last resort - if !@body_html or @body_html.strip.empty? - Log.warn 'creating html body from rtf' - @body_html = (::RTF::Converter.rtf2text body_rtf, :html rescue nil) - end - @body_html - end - end -end - diff --git a/vendor/ruby-msg/lib/mapi/pst.rb b/vendor/ruby-msg/lib/mapi/pst.rb deleted file mode 100644 index 9ac64b097..000000000 --- a/vendor/ruby-msg/lib/mapi/pst.rb +++ /dev/null @@ -1,1806 +0,0 @@ -# -# = Introduction -# -# This file is mostly an attempt to port libpst to ruby, and simplify it in the process. It -# will leverage much of the existing MAPI => MIME conversion developed for Msg files, and as -# such is purely concerned with the file structure details. -# -# = TODO -# -# 1. solve recipient table problem (test4). -# this is done. turns out it was due to id2 clashes. find better solution -# 2. check parse consistency. an initial conversion of a 30M file to pst, shows -# a number of messages conveting badly. compare with libpst too. -# 3. xattribs -# 4. generalise the Mapi stuff better -# 5. refactor index load -# 6. msg serialization? -# - -=begin - -quick plan for cleanup. - -have working tests for 97 and 03 file formats, so safe. - -want to fix up: - -64 bit unpacks scattered around. its ugly. not sure how best to handle it, but am slightly tempted -to override String#unpack to support a 64 bit little endian unpack (like L vs N/V, for Q). one way or -another need to fix it. Could really slow everything else down if its parsing the unpack strings twice, -once in ruby, for every single unpack i do :/ - -the index loading process, and the lack of shared code between normal vs 64 bit variants, and Index vs Desc. -should be able to reduce code by factor of 4. also think I should move load code into the class too. then -maybe have something like: - -class Header - def index_class - version_2003 ? Index64 : Index - end -end - -def load_idx - header.index_class.load_index -end - -OR - -def initialize - @header = ... - extend @header.index_class::Load - load_idx -end - -need to think about the role of the mapi code, and Pst::Item etc, but that layer can come later. - -=end - -require 'mapi' -require 'enumerator' -require 'ostruct' -require 'ole/ranges_io' - -module Mapi -class Pst - class FormatError < StandardError - end - - # unfortunately there is no Q analogue which is little endian only. - # this translates T as an unsigned quad word, little endian byte order, to - # not pollute the rest of the code. - # - # didn't want to override String#unpack, cause its too hacky, and incomplete. - def self.unpack str, unpack_spec - return str.unpack(unpack_spec) unless unpack_spec['T'] - @unpack_cache ||= {} - t_offsets, new_spec = @unpack_cache[unpack_spec] - unless t_offsets - t_offsets = [] - offset = 0 - new_spec = '' - unpack_spec.scan(/([^\d])_?(\*|\d+)?/o) do - num_elems = $1.downcase == 'a' ? 1 : ($2 || 1).to_i - if $1 == 'T' - num_elems.times { |i| t_offsets << offset + i } - new_spec << "V#{num_elems * 2}" - else - new_spec << $~[0] - end - offset += num_elems - end - @unpack_cache[unpack_spec] = [t_offsets, new_spec] - end - a = str.unpack(new_spec) - t_offsets.each do |offset| - low, high = a[offset, 2] - a[offset, 2] = low && high ? low + (high << 32) : nil - end - a - end - - # - # this is the header and encryption encapsulation code - # ---------------------------------------------------------------------------- - # - - # class which encapsulates the pst header - class Header - SIZE = 512 - MAGIC = 0x2142444e - - # these are the constants defined in libpst.c, that - # are referenced in pst_open() - INDEX_TYPE_OFFSET = 0x0A - FILE_SIZE_POINTER = 0xA8 - FILE_SIZE_POINTER_64 = 0xB8 - SECOND_POINTER = 0xBC - INDEX_POINTER = 0xC4 - SECOND_POINTER_64 = 0xE0 - INDEX_POINTER_64 = 0xF0 - ENC_OFFSET = 0x1CD - - attr_reader :magic, :index_type, :encrypt_type, :size - attr_reader :index1_count, :index1, :index2_count, :index2 - attr_reader :version - def initialize data - @magic = data.unpack('N')[0] - @index_type = data[INDEX_TYPE_OFFSET] - @version = {0x0e => 1997, 0x17 => 2003}[@index_type] - - if version_2003? - # don't know? - # >> data1.unpack('V*').zip(data2.unpack('V*')).enum_with_index.select { |(c, d), i| c != d and not [46, 56, 60].include?(i) }.select { |(a, b), i| b == 0 }.map { |(a, b), i| [a / 256, i] } - # [8, 76], [32768, 84], [128, 89] - # >> data1.unpack('C*').zip(data2.unpack('C*')).enum_with_index.select { |(c, d), i| c != d and not [184..187, 224..227, 240..243].any? { |r| r === i } }.select { |(a, b), i| b == 0 and ((Math.log(a) / Math.log(2)) % 1) < 0.0001 } - # [[[2, 0], 61], [[2, 0], 76], [[2, 0], 195], [[2, 0], 257], [[8, 0], 305], [[128, 0], 338], [[128, 0], 357]] - # i have only 2 psts to base this guess on, so i can't really come up with anything that looks reasonable yet. not sure what the offset is. unfortunately there is so much in the header - # that isn't understood... - @encrypt_type = 1 - - @index2_count, @index2 = data[SECOND_POINTER_64 - 4, 8].unpack('V2') - @index1_count, @index1 = data[INDEX_POINTER_64 - 4, 8].unpack('V2') - - @size = data[FILE_SIZE_POINTER_64, 4].unpack('V')[0] - else - @encrypt_type = data[ENC_OFFSET] - - @index2_count, @index2 = data[SECOND_POINTER - 4, 8].unpack('V2') - @index1_count, @index1 = data[INDEX_POINTER - 4, 8].unpack('V2') - - @size = data[FILE_SIZE_POINTER, 4].unpack('V')[0] - end - - validate! - end - - def version_2003? - version == 2003 - end - - def encrypted? - encrypt_type != 0 - end - - def validate! - raise FormatError, "bad signature on pst file (#{'0x%x' % magic})" unless magic == MAGIC - raise FormatError, "only index types 0x0e and 0x17 are handled (#{'0x%x' % index_type})" unless [0x0e, 0x17].include?(index_type) - raise FormatError, "only encrytion types 0 and 1 are handled (#{encrypt_type.inspect})" unless [0, 1].include?(encrypt_type) - end - end - - # compressible encryption! :D - # - # simple substitution. see libpst.c - # maybe test switch to using a String#tr! - class CompressibleEncryption - DECRYPT_TABLE = [ - 0x47, 0xf1, 0xb4, 0xe6, 0x0b, 0x6a, 0x72, 0x48, - 0x85, 0x4e, 0x9e, 0xeb, 0xe2, 0xf8, 0x94, 0x53, # 0x0f - 0xe0, 0xbb, 0xa0, 0x02, 0xe8, 0x5a, 0x09, 0xab, - 0xdb, 0xe3, 0xba, 0xc6, 0x7c, 0xc3, 0x10, 0xdd, # 0x1f - 0x39, 0x05, 0x96, 0x30, 0xf5, 0x37, 0x60, 0x82, - 0x8c, 0xc9, 0x13, 0x4a, 0x6b, 0x1d, 0xf3, 0xfb, # 0x2f - 0x8f, 0x26, 0x97, 0xca, 0x91, 0x17, 0x01, 0xc4, - 0x32, 0x2d, 0x6e, 0x31, 0x95, 0xff, 0xd9, 0x23, # 0x3f - 0xd1, 0x00, 0x5e, 0x79, 0xdc, 0x44, 0x3b, 0x1a, - 0x28, 0xc5, 0x61, 0x57, 0x20, 0x90, 0x3d, 0x83, # 0x4f - 0xb9, 0x43, 0xbe, 0x67, 0xd2, 0x46, 0x42, 0x76, - 0xc0, 0x6d, 0x5b, 0x7e, 0xb2, 0x0f, 0x16, 0x29, # 0x5f - 0x3c, 0xa9, 0x03, 0x54, 0x0d, 0xda, 0x5d, 0xdf, - 0xf6, 0xb7, 0xc7, 0x62, 0xcd, 0x8d, 0x06, 0xd3, # 0x6f - 0x69, 0x5c, 0x86, 0xd6, 0x14, 0xf7, 0xa5, 0x66, - 0x75, 0xac, 0xb1, 0xe9, 0x45, 0x21, 0x70, 0x0c, # 0x7f - 0x87, 0x9f, 0x74, 0xa4, 0x22, 0x4c, 0x6f, 0xbf, - 0x1f, 0x56, 0xaa, 0x2e, 0xb3, 0x78, 0x33, 0x50, # 0x8f - 0xb0, 0xa3, 0x92, 0xbc, 0xcf, 0x19, 0x1c, 0xa7, - 0x63, 0xcb, 0x1e, 0x4d, 0x3e, 0x4b, 0x1b, 0x9b, # 0x9f - 0x4f, 0xe7, 0xf0, 0xee, 0xad, 0x3a, 0xb5, 0x59, - 0x04, 0xea, 0x40, 0x55, 0x25, 0x51, 0xe5, 0x7a, # 0xaf - 0x89, 0x38, 0x68, 0x52, 0x7b, 0xfc, 0x27, 0xae, - 0xd7, 0xbd, 0xfa, 0x07, 0xf4, 0xcc, 0x8e, 0x5f, # 0xbf - 0xef, 0x35, 0x9c, 0x84, 0x2b, 0x15, 0xd5, 0x77, - 0x34, 0x49, 0xb6, 0x12, 0x0a, 0x7f, 0x71, 0x88, # 0xcf - 0xfd, 0x9d, 0x18, 0x41, 0x7d, 0x93, 0xd8, 0x58, - 0x2c, 0xce, 0xfe, 0x24, 0xaf, 0xde, 0xb8, 0x36, # 0xdf - 0xc8, 0xa1, 0x80, 0xa6, 0x99, 0x98, 0xa8, 0x2f, - 0x0e, 0x81, 0x65, 0x73, 0xe4, 0xc2, 0xa2, 0x8a, # 0xef - 0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2, - 0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec # 0xff - ] - - ENCRYPT_TABLE = [nil] * 256 - DECRYPT_TABLE.each_with_index { |i, j| ENCRYPT_TABLE[i] = j } - - def self.decrypt_alt encrypted - decrypted = '' - encrypted.length.times { |i| decrypted << DECRYPT_TABLE[encrypted[i]] } - decrypted - end - - def self.encrypt_alt decrypted - encrypted = '' - decrypted.length.times { |i| encrypted << ENCRYPT_TABLE[decrypted[i]] } - encrypted - end - - # an alternate implementation that is possibly faster.... - # TODO - bench - DECRYPT_STR, ENCRYPT_STR = [DECRYPT_TABLE, (0...256)].map do |values| - values.map { |i| i.chr }.join.gsub(/([\^\-\\])/, "\\\\\\1") - end - - def self.decrypt encrypted - encrypted.tr ENCRYPT_STR, DECRYPT_STR - end - - def self.encrypt decrypted - decrypted.tr DECRYPT_STR, ENCRYPT_STR - end - end - - class RangesIOEncryptable < RangesIO - def initialize io, mode='r', params={} - mode, params = 'r', mode if Hash === mode - @decrypt = !!params[:decrypt] - super - end - - def encrypted? - @decrypt - end - - def read limit=nil - buf = super - buf = CompressibleEncryption.decrypt(buf) if encrypted? - buf - end - end - - attr_reader :io, :header, :idx, :desc, :special_folder_ids - - # corresponds to - # * pst_open - # * pst_load_index - def initialize io - @io = io - io.pos = 0 - @header = Header.new io.read(Header::SIZE) - - # would prefer this to be in Header#validate, but it doesn't have the io size. - # should perhaps downgrade this to just be a warning... - raise FormatError, "header size field invalid (#{header.size} != #{io.size}}" unless header.size == io.size - - load_idx - load_desc - load_xattrib - - @special_folder_ids = {} - end - - def encrypted? - @header.encrypted? - end - - # until i properly fix logging... - def warn s - Mapi::Log.warn s - end - - # - # this is the index and desc record loading code - # ---------------------------------------------------------------------------- - # - - ToTree = Module.new - - module Index2 - BLOCK_SIZE = 512 - module RecursiveLoad - def load_chain - #... - end - end - - module Base - def read - #... - end - end - - class Version1997 < Struct.new(:a)#...) - SIZE = 12 - - include RecursiveLoad - include Base - end - - class Version2003 < Struct.new(:a)#...) - SIZE = 24 - - include RecursiveLoad - include Base - end - end - - module Desc2 - module Base - def desc - #... - end - end - - class Version1997 < Struct.new(:a)#...) - #include Index::RecursiveLoad - include Base - end - - class Version2003 < Struct.new(:a)#...) - #include Index::RecursiveLoad - include Base - end - end - - # more constants from libpst.c - # these relate to the index block - ITEM_COUNT_OFFSET = 0x1f0 # count byte - LEVEL_INDICATOR_OFFSET = 0x1f3 # node or leaf - BACKLINK_OFFSET = 0x1f8 # backlink u1 value - - # these 3 classes are used to hold various file records - - # pst_index - class Index < Struct.new(:id, :offset, :size, :u1) - UNPACK_STR = 'VVvv' - SIZE = 12 - BLOCK_SIZE = 512 # index blocks was 516 but bogus - COUNT_MAX = 41 # max active items (ITEM_COUNT_OFFSET / Index::SIZE = 41) - - attr_accessor :pst - def initialize data - data = Pst.unpack data, UNPACK_STR if String === data - super(*data) - end - - def type - @type ||= begin - if id & 0x2 == 0 - :data - else - first_byte, second_byte = read.unpack('CC') - if first_byte == 1 - raise second_byte unless second_byte == 1 - :data_chain_header - elsif first_byte == 2 - raise second_byte unless second_byte == 0 - :id2_assoc - else - raise FormatError, 'unknown first byte for block - %p' % first_byte - end - end - end - end - - def data? - (id & 0x2) == 0 - end - - def read decrypt=true - # only data blocks are every encrypted - decrypt = false unless data? - pst.pst_read_block_size offset, size, decrypt - end - - # show all numbers in hex - def inspect - super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }.sub(/Index /, "Index type=#{type.inspect}, ") - end - end - - # mostly guesses. - ITEM_COUNT_OFFSET_64 = 0x1e8 - LEVEL_INDICATOR_OFFSET_64 = 0x1eb # diff of 3 between these 2 as above... - - # will maybe inherit from Index64, in order to get the same #type function. - class Index64 < Index - UNPACK_STR = 'TTvvV' - SIZE = 24 - BLOCK_SIZE = 512 - COUNT_MAX = 20 # bit of a guess really. 512 / 24 = 21, but doesn't leave enough header room - - # this is the extra item on the end of the UNPACK_STR above - attr_accessor :u2 - - def initialize data - data = Pst.unpack data, UNPACK_STR if String === data - @u2 = data.pop - super data - end - - def inspect - super.sub(/>$/, ', u2=%p>' % u2) - end - - def self.load_chain io, header - load_idx_rec io, header.index1, 0, 0 - end - - # almost identical to load code for Index, just different offsets and unpack strings. - # can probably merge them, or write a generic load_tree function or something. - def self.load_idx_rec io, offset, linku1, start_val - io.seek offset - buf = io.read BLOCK_SIZE - idxs = [] - - item_count = buf[ITEM_COUNT_OFFSET_64] - raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX - - #idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE] - #raise 'blah 1' unless idx.id == linku1 - - if buf[LEVEL_INDICATOR_OFFSET_64] == 0 - # leaf pointers - # split the data into item_count index objects - buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i| - idx = new data - # first entry - raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val - #idx.pst = self - break if idx.id == 0 - idxs << idx - end - else - # node pointers - # split the data into item_count table pointers - buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i| - start, u1, offset = Pst.unpack data, 'T3' - # for the first value, we expect the start to be equal - raise 'blah 3' if i == 0 and start_val != 0 and start != start_val - break if start == 0 - idxs += load_idx_rec io, offset, u1, start - end - end - - idxs - end - end - - # pst_desc - class Desc64 < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id, :u2) - UNPACK_STR = 'T3VV' - SIZE = 32 - BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus - COUNT_MAX = 15 # guess as per Index64 - - include RecursivelyEnumerable - - attr_accessor :pst - attr_reader :children - def initialize data - super(*Pst.unpack(data, UNPACK_STR)) - @children = [] - end - - def desc - pst.idx_from_id idx_id - end - - def list_index - pst.idx_from_id idx2_id - end - - def self.load_chain io, header - load_desc_rec io, header.index2, 0, 0x21 - end - - def self.load_desc_rec io, offset, linku1, start_val - io.seek offset - buf = io.read BLOCK_SIZE - descs = [] - item_count = buf[ITEM_COUNT_OFFSET_64] - - # not real desc - #desc = Desc.new buf[BACKLINK_OFFSET, 4] - #raise 'blah 1' unless desc.desc_id == linku1 - - if buf[LEVEL_INDICATOR_OFFSET_64] == 0 - # leaf pointers - raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX - # split the data into item_count desc objects - buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i| - desc = new data - # first entry - raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val - break if desc.desc_id == 0 - descs << desc - end - else - # node pointers - raise "have too many active items in index (#{item_count})" if item_count > Index64::COUNT_MAX - # split the data into item_count table pointers - buf[0, Index64::SIZE * item_count].scan(/.{#{Index64::SIZE}}/mo).each_with_index do |data, i| - start, u1, offset = Pst.unpack data, 'T3' - # for the first value, we expect the start to be equal note that ids -1, so even for the - # first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert - # that the first desc record is always 33... - # thats because 0x21 is the pst root itself... - raise 'blah 3' if i == 0 and start_val != -1 and start != start_val - # this shouldn't really happen i'd imagine - break if start == 0 - descs += load_desc_rec io, offset, u1, start - end - end - - descs - end - - def each_child(&block) - @children.each(&block) - end - end - - # _pst_table_ptr_struct - class TablePtr < Struct.new(:start, :u1, :offset) - UNPACK_STR = 'V3' - SIZE = 12 - - def initialize data - data = data.unpack(UNPACK_STR) if String === data - super(*data) - end - end - - # pst_desc - # idx_id is a pointer to an idx record which gets the primary data stream for the Desc record. - # idx2_id gets you an idx record, that when read gives you an ID2 association list, which just maps - # another set of ids to index values - class Desc < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id) - UNPACK_STR = 'V4' - SIZE = 16 - BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus - COUNT_MAX = 31 # max active desc records (ITEM_COUNT_OFFSET / Desc::SIZE = 31) - - include ToTree - - attr_accessor :pst - attr_reader :children - def initialize data - super(*data.unpack(UNPACK_STR)) - @children = [] - end - - def desc - pst.idx_from_id idx_id - end - - def list_index - pst.idx_from_id idx2_id - end - - # show all numbers in hex - def inspect - super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i } - end - end - - # corresponds to - # * _pst_build_id_ptr - def load_idx - @idx = [] - @idx_offsets = [] - if header.version_2003? - @idx = Index64.load_chain io, header - @idx.each { |idx| idx.pst = self } - else - load_idx_rec header.index1, header.index1_count, 0 - end - - # we'll typically be accessing by id, so create a hash as a lookup cache - @idx_from_id = {} - @idx.each do |idx| - warn "there are duplicate idx records with id #{idx.id}" if @idx_from_id[idx.id] - @idx_from_id[idx.id] = idx - end - end - - # load the flat idx table, which maps ids to file ranges. this is the recursive helper - # - # corresponds to - # * _pst_build_id_ptr - def load_idx_rec offset, linku1, start_val - @idx_offsets << offset - - #_pst_read_block_size(pf, offset, BLOCK_SIZE, &buf, 0, 0) < BLOCK_SIZE) - buf = pst_read_block_size offset, Index::BLOCK_SIZE, false - - item_count = buf[ITEM_COUNT_OFFSET] - raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX - - idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE] - raise 'blah 1' unless idx.id == linku1 - - if buf[LEVEL_INDICATOR_OFFSET] == 0 - # leaf pointers - # split the data into item_count index objects - buf[0, Index::SIZE * item_count].scan(/.{#{Index::SIZE}}/mo).each_with_index do |data, i| - idx = Index.new data - # first entry - raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val - idx.pst = self - # this shouldn't really happen i'd imagine - break if idx.id == 0 - @idx << idx - end - else - # node pointers - # split the data into item_count table pointers - buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i| - table = TablePtr.new data - # for the first value, we expect the start to be equal - raise 'blah 3' if i == 0 and start_val != 0 and table.start != start_val - # this shouldn't really happen i'd imagine - break if table.start == 0 - load_idx_rec table.offset, table.u1, table.start - end - end - end - - # most access to idx objects will use this function - # - # corresponds to - # * _pst_getID - def idx_from_id id - @idx_from_id[id] - end - - # corresponds to - # * _pst_build_desc_ptr - # * record_descriptor - def load_desc - @desc = [] - @desc_offsets = [] - if header.version_2003? - @desc = Desc64.load_chain io, header - @desc.each { |desc| desc.pst = self } - else - load_desc_rec header.index2, header.index2_count, 0x21 - end - - # first create a lookup cache - @desc_from_id = {} - @desc.each do |desc| - desc.pst = self - warn "there are duplicate desc records with id #{desc.desc_id}" if @desc_from_id[desc.desc_id] - @desc_from_id[desc.desc_id] = desc - end - - # now turn the flat list of loaded desc records into a tree - - # well, they have no parent, so they're more like, the toplevel descs. - @orphans = [] - # now assign each node to the parents child array, putting the orphans in the above - @desc.each do |desc| - parent = @desc_from_id[desc.parent_desc_id] - # note, besides this, its possible to create other circular structures. - if parent == desc - # this actually happens usually, for the root_item it appears. - #warn "desc record's parent is itself (#{desc.inspect})" - # maybe add some more checks in here for circular structures - elsif parent - parent.children << desc - next - end - @orphans << desc - end - - # maybe change this to some sort of sane-ness check. orphans are expected -# warn "have #{@orphans.length} orphan desc record(s)." unless @orphans.empty? - end - - # load the flat list of desc records recursively - # - # corresponds to - # * _pst_build_desc_ptr - # * record_descriptor - def load_desc_rec offset, linku1, start_val - @desc_offsets << offset - - buf = pst_read_block_size offset, Desc::BLOCK_SIZE, false - item_count = buf[ITEM_COUNT_OFFSET] - - # not real desc - desc = Desc.new buf[BACKLINK_OFFSET, 4] - raise 'blah 1' unless desc.desc_id == linku1 - - if buf[LEVEL_INDICATOR_OFFSET] == 0 - # leaf pointers - raise "have too many active items in index (#{item_count})" if item_count > Desc::COUNT_MAX - # split the data into item_count desc objects - buf[0, Desc::SIZE * item_count].scan(/.{#{Desc::SIZE}}/mo).each_with_index do |data, i| - desc = Desc.new data - # first entry - raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val - # this shouldn't really happen i'd imagine - break if desc.desc_id == 0 - @desc << desc - end - else - # node pointers - raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX - # split the data into item_count table pointers - buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i| - table = TablePtr.new data - # for the first value, we expect the start to be equal note that ids -1, so even for the - # first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert - # that the first desc record is always 33... - raise 'blah 3' if i == 0 and start_val != -1 and table.start != start_val - # this shouldn't really happen i'd imagine - break if table.start == 0 - load_desc_rec table.offset, table.u1, table.start - end - end - end - - # as for idx - # - # corresponds to: - # * _pst_getDptr - def desc_from_id id - @desc_from_id[id] - end - - # corresponds to - # * pst_load_extended_attributes - def load_xattrib - unless desc = desc_from_id(0x61) - warn "no extended attributes desc record found" - return - end - unless desc.desc - warn "no desc idx for extended attributes" - return - end - if desc.list_index - end - #warn "skipping loading xattribs" - # FIXME implement loading xattribs - end - - # corresponds to: - # * _pst_read_block_size - # * _pst_read_block ?? - # * _pst_ff_getIDblock_dec ?? - # * _pst_ff_getIDblock ?? - def pst_read_block_size offset, size, decrypt=true - io.seek offset - buf = io.read size - warn "tried to read #{size} bytes but only got #{buf.length}" if buf.length != size - encrypted? && decrypt ? CompressibleEncryption.decrypt(buf) : buf - end - - # - # id2 - # ---------------------------------------------------------------------------- - # - - class ID2Assoc < Struct.new(:id2, :id, :table2) - UNPACK_STR = 'V3' - SIZE = 12 - - def initialize data - data = data.unpack(UNPACK_STR) if String === data - super(*data) - end - end - - class ID2Assoc64 < Struct.new(:id2, :u1, :id, :table2) - UNPACK_STR = 'VVT2' - SIZE = 24 - - def initialize data - if String === data - data = Pst.unpack data, UNPACK_STR - end - super(*data) - end - - def self.load_chain idx - buf = idx.read - type, count = buf.unpack 'v2' - unless type == 0x0002 - raise 'unknown id2 type 0x%04x' % type - #return - end - id2 = [] - count.times do |i| - assoc = new buf[8 + SIZE * i, SIZE] - id2 << assoc - if assoc.table2 != 0 - id2 += load_chain idx.pst.idx_from_id(assoc.table2) - end - end - id2 - end - end - - class ID2Mapping - attr_reader :list - def initialize pst, list - @pst = pst - @list = list - # create a lookup. - @id_from_id2 = {} - @list.each do |id2| - # NOTE we take the last value seen value if there are duplicates. this "fixes" - # test4-o1997.pst for the time being. - warn "there are duplicate id2 records with id #{id2.id2}" if @id_from_id2[id2.id2] - next if @id_from_id2[id2.id2] - @id_from_id2[id2.id2] = id2.id - end - end - - # TODO: fix logging - def warn s - Mapi::Log.warn s - end - - # corresponds to: - # * _pst_getID2 - def [] id - #id2 = @list.find { |x| x.id2 == id } - id = @id_from_id2[id] - id and @pst.idx_from_id(id) - end - end - - def load_idx2 idx - if header.version_2003? - id2 = ID2Assoc64.load_chain idx - else - id2 = load_idx2_rec idx - end - ID2Mapping.new self, id2 - end - - # corresponds to - # * _pst_build_id2 - def load_idx2_rec idx - # i should perhaps use a idx chain style read here? - buf = pst_read_block_size idx.offset, idx.size, false - type, count = buf.unpack 'v2' - unless type == 0x0002 - raise 'unknown id2 type 0x%04x' % type - #return - end - id2 = [] - count.times do |i| - assoc = ID2Assoc.new buf[4 + ID2Assoc::SIZE * i, ID2Assoc::SIZE] - id2 << assoc - if assoc.table2 != 0 - id2 += load_idx2_rec idx_from_id(assoc.table2) - end - end - id2 - end - - class RangesIOIdxChain < RangesIOEncryptable - def initialize pst, idx_head - @idxs = pst.id2_block_idx_chain idx_head - # whether or not a given idx needs encrypting - decrypts = @idxs.map do |idx| - decrypt = (idx.id & 2) != 0 ? false : pst.encrypted? - end.uniq - raise NotImplementedError, 'partial encryption in RangesIOID2' if decrypts.length > 1 - decrypt = decrypts.first - # convert idxs to ranges - ranges = @idxs.map { |idx| [idx.offset, idx.size] } - super pst.io, :ranges => ranges, :decrypt => decrypt - end - end - - class RangesIOID2 < RangesIOIdxChain - def self.new pst, id2, idx2 - RangesIOIdxChain.new pst, idx2[id2] - end - end - - # corresponds to: - # * _pst_ff_getID2block - # * _pst_ff_getID2data - # * _pst_ff_compile_ID - def id2_block_idx_chain idx - if (idx.id & 0x2) == 0 - [idx] - else - buf = idx.read - type, fdepth, count = buf[0, 4].unpack 'CCv' - unless type == 1 # libpst.c:3958 - warn 'Error in idx_chain - %p, %p, %p - attempting to ignore' % [type, fdepth, count] - return [idx] - end - # there are 4 unaccounted for bytes here, 4...8 - if header.version_2003? - ids = buf[8, count * 8].unpack("T#{count}") - else - ids = buf[8, count * 4].unpack('V*') - end - if fdepth == 1 - ids.map { |id| idx_from_id id } - else - ids.map { |id| id2_block_idx_chain idx_from_id(id) }.flatten - end - end - end - - # - # main block parsing code. gets raw properties - # ---------------------------------------------------------------------------- - # - - # the job of this class, is to take a desc record, and be able to enumerate through the - # mapi properties of the associated thing. - # - # corresponds to - # * _pst_parse_block - # * _pst_process (in some ways. although perhaps thats more the Item::Properties#add_property) - class BlockParser - include Mapi::Types::Constants - - TYPES = { - 0xbcec => 1, - 0x7cec => 2, - # type 3 is removed. an artifact of not handling the indirect blocks properly in libpst. - } - - PR_SUBJECT = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_SUBJECT' }.first.hex - PR_BODY_HTML = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_BODY_HTML' }.first.hex - - # this stuff could maybe be moved to Ole::Types? or leverage it somehow? - # whether or not a type is immeidate is more a property of the pst encoding though i expect. - # what i probably can add is a generic concept of whether a type is of variadic length or not. - - # these lists are very incomplete. think they are largely copied from libpst - - IMMEDIATE_TYPES = [ - PT_SHORT, PT_LONG, PT_BOOLEAN - ] - - INDIRECT_TYPES = [ - PT_DOUBLE, PT_OBJECT, - 0x0014, # whats this? probably something like PT_LONGLONG, given the correspondence with the - # ole variant types. (= VT_I8) - PT_STRING8, PT_UNICODE, # unicode isn't in libpst, but added here for outlook 2003 down the track - PT_SYSTIME, - 0x0048, # another unknown - 0x0102, # this is PT_BINARY vs PT_CLSID - #0x1003, # these are vector types, but they're commented out for now because i'd expect that - #0x1014, # there's extra decoding needed that i'm not doing. (probably just need a simple - # # PT_* => unpack string mapping for the immediate types, and just do unpack('V*') etc - #0x101e, - #0x1102 - ] - - # the attachment and recipient arrays appear to be always stored with these fixed - # id2 values. seems strange. are there other extra streams? can find out by making higher - # level IO wrapper, which has the id2 value, and doing the diff of available id2 values versus - # used id2 values in properties of an item. - ID2_ATTACHMENTS = 0x671 - ID2_RECIPIENTS = 0x692 - - attr_reader :desc, :data, :data_chunks, :offset_tables - def initialize desc - raise FormatError, "unable to get associated index record for #{desc.inspect}" unless desc.desc - @desc = desc - #@data = desc.desc.read - if Pst::Index === desc.desc - #@data = RangesIOIdxChain.new(desc.pst, desc.desc).read - idxs = desc.pst.id2_block_idx_chain desc.desc - # this gets me the plain index chain. - else - # fake desc - #@data = desc.desc.read - idxs = [desc.desc] - end - - @data_chunks = idxs.map { |idx| idx.read } - @data = @data_chunks.first - - load_header - - @index_offsets = [@index_offset] + @data_chunks[1..-1].map { |chunk| chunk.unpack('v')[0] } - @offset_tables = [] - @ignored = [] - @data_chunks.zip(@index_offsets).each do |chunk, offset| - ignore = chunk[offset, 2].unpack('v')[0] - @ignored << ignore -# p ignore - @offset_tables.push offset_table = [] - # maybe its ok if there aren't to be any values ? - raise FormatError if offset == 0 - offsets = chunk[offset + 2..-1].unpack('v*') - #p offsets - offsets[0, ignore + 2].each_cons 2 do |from, to| - #next if to == 0 - raise FormatError, [from, to].inspect if from > to - offset_table << [from, to] - end - end - - @offset_table = @offset_tables.first - @idxs = idxs - - # now, we may have multiple different blocks - end - - # a given desc record may or may not have associated idx2 data. we lazily load it here, so it will never - # actually be requested unless get_data_indirect actually needs to use it. - def idx2 - return @idx2 if @idx2 - raise FormatError, 'idx2 requested but no idx2 available' unless desc.list_index - # should check this can't return nil - @idx2 = desc.pst.load_idx2 desc.list_index - end - - def load_header - @index_offset, type, @offset1 = data.unpack 'vvV' - raise FormatError, 'unknown block type signature 0x%04x' % type unless TYPES[type] - @type = TYPES[type] - end - - # based on the value of offset, return either some data from buf, or some data from the - # id2 chain id2, where offset is some key into a lookup table that is stored as the id2 - # chain. i think i may need to create a BlockParser class that wraps up all this mess. - # - # corresponds to: - # * _pst_getBlockOffsetPointer - # * _pst_getBlockOffset - def get_data_indirect offset - return get_data_indirect_io(offset).read - - if offset == 0 - nil - elsif (offset & 0xf) == 0xf - RangesIOID2.new(desc.pst, offset, idx2).read - else - low, high = offset & 0xf, offset >> 4 - raise FormatError if low != 0 or (high & 0x1) != 0 or (high / 2) > @offset_table.length - from, to = @offset_table[high / 2] - data[from...to] - end - end - - def get_data_indirect_io offset - if offset == 0 - nil - elsif (offset & 0xf) == 0xf - if idx2[offset] - RangesIOID2.new desc.pst, offset, idx2 - else - warn "tried to get idx2 record for #{offset} but failed" - return StringIO.new('') - end - else - low, high = offset & 0xf, offset >> 4 - if low != 0 or (high & 0x1) != 0 -# raise FormatError, - warn "bad - #{low} #{high} (1)" - return StringIO.new('') - end - # lets see which block it should come from. - block_idx, i = high.divmod 4096 - unless block_idx < @data_chunks.length - warn "bad - block_idx to high (not #{block_idx} < #{@data_chunks.length})" - return StringIO.new('') - end - data_chunk, offset_table = @data_chunks[block_idx], @offset_tables[block_idx] - if i / 2 >= offset_table.length - warn "bad - #{low} #{high} - #{i / 2} >= #{offset_table.length} (2)" - return StringIO.new('') - end - #warn "ok - #{low} #{high} #{offset_table.length}" - from, to = offset_table[i / 2] - StringIO.new data_chunk[from...to] - end - end - - def handle_indirect_values key, type, value - case type - when PT_BOOLEAN - value = value != 0 - when *IMMEDIATE_TYPES # not including PT_BOOLEAN which we just did above - # no processing current applied (needed?). - when *INDIRECT_TYPES - # the value is a pointer - if String === value # ie, value size > 4 above - value = StringIO.new value - else - value = get_data_indirect_io(value) - end - # keep strings as immediate values for now, for compatability with how i set up - # Msg::Properties::ENCODINGS - if value - if type == PT_STRING8 - value = value.read - elsif type == PT_UNICODE - value = Ole::Types::FROM_UTF16.iconv value.read - end - end - # special subject handling - if key == PR_BODY_HTML and value - # to keep the msg code happy, which thinks body_html will be an io - # although, in 2003 version, they are 0102 already - value = StringIO.new value unless value.respond_to?(:read) - end - if key == PR_SUBJECT and value - ignore, offset = value.unpack 'C2' - offset = (offset == 1 ? nil : offset - 3) - value = value[2..-1] -=begin - index = value =~ /^[A-Z]*:/ ? $~[0].length - 1 : nil - unless ignore == 1 and offset == index - warn 'something wrong with subject hack' - $x = [ignore, offset, value] - require 'irb' - IRB.start - exit - end -=end -=begin -new idea: - -making sense of the \001\00[156] i've seen prefixing subject. i think its to do with the placement -of the ':', or the ' '. And perhaps an optimization to do with thread topic, and ignoring the prefixes -added by mailers. thread topic is equal to subject with all that crap removed. - -can test by creating some mails with bizarre subjects. - -subject="\001\005RE: blah blah" -subject="\001\001blah blah" -subject="\001\032Out of Office AutoReply: blah blah" -subject="\001\020Undeliverable: blah blah" - -looks like it - -=end - - # now what i think, is that perhaps, value[offset..-1] ... - # or something like that should be stored as a special tag. ie, do a double yield - # for this case. probably PR_CONVERSATION_TOPIC, in which case i'd write instead: - # yield [PR_SUBJECT, ref_type, value] - # yield [PR_CONVERSATION_TOPIC, ref_type, value[offset..-1] - # next # to skip the yield. - end - - # special handling for embedded objects - # used for attach_data for attached messages. in which case attach_method should == 5, - # for embedded object. - if type == PT_OBJECT and value - value = value.read if value.respond_to?(:read) - id2, unknown = value.unpack 'V2' - io = RangesIOID2.new desc.pst, id2, idx2 - - # hacky - desc2 = OpenStruct.new(:desc => io, :pst => desc.pst, :list_index => desc.list_index, :children => []) - # put nil instead of desc.list_index, otherwise the attachment is attached to itself ad infinitum. - # should try and fix that FIXME - # this shouldn't be done always. for an attached message, yes, but for an attached - # meta file, for example, it shouldn't. difference between embedded_ole vs embedded_msg - # really. - # note that in the case where its a embedded ole, you actually get a regular serialized ole - # object, so i need to create an ole storage object on a rangesioidxchain! - # eg: -=begin -att.props.display_name # => "Picture (Metafile)" -io = att.props.attach_data -io.read(32).unpack('H*') # => ["d0cf11e0a1b11ae100000.... note the docfile signature. -# plug some missing rangesio holes: -def io.rewind; seek 0; end -def io.flush; raise IOError; end -ole = Ole::Storage.open io -puts ole.root.to_tree - -- #<Dirent:"Root Entry"> - |- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000..."> - |- #<Dirent:"CONTENTS" size=65696 data="\327\315\306\232\000..."> - \- #<Dirent:"\003MailStream" size=12 data="\001\000\000\000[..."> -=end - # until properly fixed, i have disabled this code here, so this will break - # nested messages temporarily. - #value = Item.new desc2, RawPropertyStore.new(desc2).to_a - #desc2.list_index = nil - value = io - end - # this is PT_MV_STRING8, i guess. - # should probably have the 0x1000 flag, and do the or-ring. - # example of 0x1102 is PR_OUTLOOK_2003_ENTRYIDS. less sure about that one. - when 0x101e, 0x1102 - # example data: - # 0x802b "\003\000\000\000\020\000\000\000\030\000\000\000#\000\000\000BusinessCompetitionFavorites" - # this 0x802b would be an extended attribute for categories / keywords. - value = get_data_indirect_io(value).read unless String === value - num = value.unpack('V')[0] - offsets = value[4, 4 * num].unpack("V#{num}") - value = (offsets + [value.length]).to_enum(:each_cons, 2).map { |from, to| value[from...to] } - value.map! { |str| StringIO.new str } if type == 0x1102 - else - name = Mapi::Types::DATA[type].first rescue nil - warn '0x%04x %p' % [key, get_data_indirect_io(value).read] - raise NotImplementedError, 'unsupported mapi property type - 0x%04x (%p)' % [type, name] - end - [key, type, value] - end - end - -=begin -* recipients: - - affects: ["0x200764", "0x2011c4", "0x201b24", "0x201b44", "0x201ba4", "0x201c24", "0x201cc4", "0x202504"] - -after adding the rawpropertystoretable fix, all except the second parse properly, and satisfy: - - item.props.display_to == item.recipients.map { |r| r.props.display_name if r.props.recipient_type == 1 }.compact * '; ' - -only the second still has a problem - -#[#<struct Pst::Desc desc_id=0x2011c4, idx_id=0x397c, idx2_id=0x398a, parent_desc_id=0x8082>] - -think this is related to a multi block #data3. ie, when you use @x * rec_size, and it -goes > 8190, or there abouts, then it stuffs up. probably there is header gunk, or something, -similar to when #data is multi block. - -same problem affects the attachment table in test4. - -fixed that issue. round data3 ranges to rec_size. - -fix other issue with attached objects. - -all recipients and attachments in test2 are fine. - -only remaining issue is test4 recipients of 200044. strange. - -=end - - # RawPropertyStore is used to iterate through the properties of an item, or the auxiliary - # data for an attachment. its just a parser for the way the properties are serialized, when the - # properties don't have to conform to a column structure. - # - # structure of this chunk of data is often - # header, property keys, data values, and then indexes. - # the property keys has value in it. value can be the actual value if its a short type, - # otherwise you lookup the value in the indicies, where you get the offsets to use in the - # main data body. due to the indirect thing though, any of these parts could actually come - # from a separate stream. - class RawPropertyStore < BlockParser - include Enumerable - - attr_reader :length - def initialize desc - super - raise FormatError, "expected type 1 - got #{@type}" unless @type == 1 - - # the way that offset works, data1 may be a subset of buf, or something from id2. if its from buf, - # it will be offset based on index_offset and offset. so it could be some random chunk of data anywhere - # in the thing. - header_data = get_data_indirect @offset1 - raise FormatError if header_data.length < 8 - signature, offset2 = header_data.unpack 'V2' - #p [@type, signature] - raise FormatError, 'unhandled block signature 0x%08x' % @type if signature != 0x000602b5 - # this is actually a big chunk of tag tuples. - @index_data = get_data_indirect offset2 - @length = @index_data.length / 8 - end - - # iterate through the property tuples - def each - length.times do |i| - key, type, value = handle_indirect_values(*@index_data[8 * i, 8].unpack('vvV')) - yield key, type, value - end - end - end - - # RawPropertyStoreTable is kind of like a database table. - # it has a fixed set of columns. - # #[] is kind of like getting a row from the table. - # those rows are currently encapsulated by Row, which has #each like - # RawPropertyStore. - # only used for the recipients array, and the attachments array. completely lazy, doesn't - # load any of the properties upon creation. - class RawPropertyStoreTable < BlockParser - class Column < Struct.new(:ref_type, :type, :ind2_off, :size, :slot) - def initialize data - super(*data.unpack('v3CC')) - end - - def nice_type_name - Mapi::Types::DATA[ref_type].first[/_(.*)/, 1].downcase rescue '0x%04x' % ref_type - end - - def nice_prop_name - Mapi::PropertyStore::TAGS['%04x' % type].first[/_(.*)/, 1].downcase rescue '0x%04x' % type - end - - def inspect - "#<#{self.class} name=#{nice_prop_name.inspect}, type=#{nice_type_name.inspect}>" - end - end - - include Enumerable - - attr_reader :length, :index_data, :data2, :data3, :rec_size - def initialize desc - super - raise FormatError, "expected type 2 - got #{@type}" unless @type == 2 - - header_data = get_data_indirect @offset1 - # seven_c_blk - # often: u1 == u2 and u3 == u2 + 2, then rec_size == u3 + 4. wtf - seven_c, @num_list, u1, u2, u3, @rec_size, b_five_offset, - ind2_offset, u7, u8 = header_data[0, 22].unpack('CCv4V2v2') - @index_data = header_data[22..-1] - - raise FormatError if @num_list != schema.length or seven_c != 0x7c - # another check - min_size = schema.inject(0) { |total, col| total + col.size } - # seem to have at max, 8 padding bytes on the end of the record. not sure if it means - # anything. maybe its just space that hasn't been reclaimed due to columns being - # removed or something. probably should just check lower bound. - range = (min_size..min_size + 8) - warn "rec_size seems wrong (#{range} !=== #{rec_size})" unless range === rec_size - - header_data2 = get_data_indirect b_five_offset - raise FormatError if header_data2.length < 8 - signature, offset2 = header_data2.unpack 'V2' - # ??? seems a bit iffy - # there's probably more to the differences than this, and the data2 difference below - expect = desc.pst.header.version_2003? ? 0x000404b5 : 0x000204b5 - raise FormatError, 'unhandled block signature 0x%08x' % signature if signature != expect - - # this holds all the row data - # handle multiple block issue. - @data3_io = get_data_indirect_io ind2_offset - if RangesIOIdxChain === @data3_io - @data3_idxs = - # modify ranges - ranges = @data3_io.ranges.map { |offset, size| [offset, size / @rec_size * @rec_size] } - @data3_io.instance_variable_set :@ranges, ranges - end - @data3 = @data3_io.read - - # there must be something to the data in data2. i think data2 is the array of objects essentially. - # currently its only used to imply a length - # actually, at size 6, its just some auxiliary data. i'm thinking either Vv/vV, for 97, and something - # wider for 03. the second value is just the index (0...length), and the first value is - # some kind of offset i expect. actually, they were all id2 values, in another case. - # so maybe they're get_data_indirect values too? - # actually, it turned out they were identical to the PR_ATTACHMENT_ID2 values... - # id2_values = ie, data2.unpack('v*').to_enum(:each_slice, 3).transpose[0] - # table[i].assoc(PR_ATTACHMENT_ID2).last == id2_values[i], for all i. - @data2 = get_data_indirect(offset2) rescue nil - #if data2 - # @length = (data2.length / 6.0).ceil - #else - # the above / 6, may have been ok for 97 files, but the new 0x0004 style block must have - # different size records... just use this instead: - # hmmm, actually, we can still figure it out: - @length = @data3.length / @rec_size - #end - - # lets try and at least use data2 for a warning for now - if data2 - data2_rec_size = desc.pst.header.version_2003? ? 8 : 6 - warn 'somthing seems wrong with data3' unless @length == (data2.length / data2_rec_size) - end - end - - def schema - @schema ||= index_data.scan(/.{8}/m).map { |data| Column.new data } - end - - def [] idx - # handle funky rounding - Row.new self, idx * @rec_size - end - - def each - length.times { |i| yield self[i] } - end - - class Row - include Enumerable - - def initialize array_parser, x - @array_parser, @x = array_parser, x - end - - # iterate through the property tuples - def each - (@array_parser.index_data.length / 8).times do |i| - ref_type, type, ind2_off, size, slot = @array_parser.index_data[8 * i, 8].unpack 'v3CC' - # check this rescue too - value = @array_parser.data3[@x + ind2_off, size] -# if INDIRECT_TYPES.include? ref_type - if size <= 4 - value = value.unpack('V')[0] - end - #p ['0x%04x' % ref_type, '0x%04x' % type, (Msg::Properties::MAPITAGS['%04x' % type].first[/^.._(.*)/, 1].downcase rescue nil), - # value_orig, value, (get_data_indirect(value_orig.unpack('V')[0]) rescue nil), size, ind2_off, slot] - key, type, value = @array_parser.handle_indirect_values type, ref_type, value - yield key, type, value - end - end - end - end - - class AttachmentTable < BlockParser - # a "fake" MAPI property name for this constant. if you get a mapi property with - # this value, it is the id2 value to use to get attachment data. - PR_ATTACHMENT_ID2 = 0x67f2 - - attr_reader :desc, :table - def initialize desc - @desc = desc - # no super, we only actually want BlockParser2#idx2 - @table = nil - return unless desc.list_index - return unless idx = idx2[ID2_ATTACHMENTS] - # FIXME make a fake desc. - @desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index - @table = RawPropertyStoreTable.new @desc2 - end - - def to_a - return [] if !table - table.map do |attachment| - attachment = attachment.to_a - #p attachment - # potentially merge with yet more properties - # this still seems pretty broken - especially the property overlap - if attachment_id2 = attachment.assoc(PR_ATTACHMENT_ID2) - #p attachment_id2.last - #p idx2[attachment_id2.last] - @desc2.desc = idx2[attachment_id2.last] - RawPropertyStore.new(@desc2).each do |a, b, c| - record = attachment.assoc a - attachment << record = [] unless record - record.replace [a, b, c] - end - end - attachment - end - end - end - - # there is no equivalent to this in libpst. ID2_RECIPIENTS was just guessed given the above - # AttachmentTable. - class RecipientTable < BlockParser - attr_reader :desc, :table - def initialize desc - @desc = desc - # no super, we only actually want BlockParser2#idx2 - @table = nil - return unless desc.list_index - return unless idx = idx2[ID2_RECIPIENTS] - # FIXME make a fake desc. - desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index - @table = RawPropertyStoreTable.new desc2 - end - - def to_a - return [] if !table - table.map { |x| x.to_a } - end - end - - # - # higher level item code. wraps up the raw properties above, and gives nice - # objects to work with. handles item relationships too. - # ---------------------------------------------------------------------------- - # - - def self.make_property_set property_list - hash = property_list.inject({}) do |hash, (key, type, value)| - hash.update PropertySet::Key.new(key) => value - end - PropertySet.new hash - end - - class Attachment < Mapi::Attachment - def initialize list - super Pst.make_property_set(list) - - @embedded_msg = props.attach_data if Item === props.attach_data - end - end - - class Recipient < Mapi::Recipient - def initialize list - super Pst.make_property_set(list) - end - end - - class Item < Mapi::Message - class EntryID < Struct.new(:u1, :entry_id, :id) - UNPACK_STR = 'VA16V' - - def initialize data - data = data.unpack(UNPACK_STR) if String === data - super(*data) - end - end - - include RecursivelyEnumerable - - attr_accessor :type, :parent - - def initialize desc, list, type=nil - @desc = desc - super Pst.make_property_set(list) - - # this is kind of weird, but the ids of the special folders are stored in a hash - # when the root item is loaded - if ipm_wastebasket_entryid - desc.pst.special_folder_ids[ipm_wastebasket_entryid] = :wastebasket - end - - if finder_entryid - desc.pst.special_folder_ids[finder_entryid] = :finder - end - - # and then here, those are used, along with a crappy heuristic to determine if we are an - # item -=begin -i think the low bits of the desc_id can give some info on the type. - -it seems that 0x4 is for regular messages (and maybe contacts etc) -0x2 is for folders, and 0x8 is for special things like rules etc, that aren't visible. -=end - unless type - type = props.valid_folder_mask || ipm_subtree_entryid || props.content_count || props.subfolders ? :folder : :message - if type == :folder - type = desc.pst.special_folder_ids[desc.desc_id] || type - end - end - - @type = type - end - - def each_child - id = ipm_subtree_entryid - if id - root = @desc.pst.desc_from_id id - raise "couldn't find root" unless root - raise 'both kinds of children' unless @desc.children.empty? - children = root.children - # lets look up the other ids we have. - # typically the wastebasket one "deleted items" is in the children already, but - # the search folder isn't. - extras = [ipm_wastebasket_entryid, finder_entryid].compact.map do |id| - root = @desc.pst.desc_from_id id - warn "couldn't find root for id #{id}" unless root - root - end.compact - # i do this instead of union, so as not to mess with the order of the - # existing children. - children += (extras - children) - children - else - @desc.children - end.each do |desc| - item = @desc.pst.pst_parse_item(desc) - item.parent = self - yield item - end - end - - def path - parents, item = [], self - parents.unshift item while item = item.parent - # remove root - parents.shift - parents.map { |item| item.props.display_name or raise 'unable to construct path' } * '/' - end - - def children - to_enum(:each_child).to_a - end - - # these are still around because they do different stuff - - # Top of Personal Folder Record - def ipm_subtree_entryid - @ipm_subtree_entryid ||= EntryID.new(props.ipm_subtree_entryid.read).id rescue nil - end - - # Deleted Items Folder Record - def ipm_wastebasket_entryid - @ipm_wastebasket_entryid ||= EntryID.new(props.ipm_wastebasket_entryid.read).id rescue nil - end - - # Search Root Record - def finder_entryid - @finder_entryid ||= EntryID.new(props.finder_entryid.read).id rescue nil - end - - # all these have been replaced with the method_missing below -=begin - # States which folders are valid for this message store - #def valid_folder_mask - # props[0x35df] - #end - - # Number of emails stored in a folder - def content_count - props[0x3602] - end - - # Has children - def subfolders - props[0x360a] - end -=end - - # i think i will change these, so they can inherit the lazyness from RawPropertyStoreTable. - # so if you want the last attachment, you can get it without creating the others perhaps. - # it just has to handle the no table at all case a bit more gracefully. - - def attachments - @attachments ||= AttachmentTable.new(@desc).to_a.map { |list| Attachment.new list } - end - - def recipients - #[] - @recipients ||= RecipientTable.new(@desc).to_a.map { |list| Recipient.new list } - end - - def each_recursive(&block) - #p :self => self - children.each do |child| - #p :child => child - block[child] - child.each_recursive(&block) - end - end - - def inspect - attrs = %w[display_name subject sender_name subfolders] -# attrs = %w[display_name valid_folder_mask ipm_wastebasket_entryid finder_entryid content_count subfolders] - str = attrs.map { |a| b = props.send a; " #{a}=#{b.inspect}" if b }.compact * ',' - - type_s = type == :message ? 'Message' : type == :folder ? 'Folder' : type.to_s.capitalize + 'Folder' - str2 = 'desc_id=0x%x' % @desc.desc_id - - !str.empty? ? "#<Pst::#{type_s} #{str2}#{str}>" : "#<Pst::#{type_s} #{str2} props=#{props.inspect}>" #\n" + props.transport_message_headers + ">" - end - end - - # corresponds to - # * _pst_parse_item - def pst_parse_item desc - Item.new desc, RawPropertyStore.new(desc).to_a - end - - # - # other random code - # ---------------------------------------------------------------------------- - # - - def dump_debug_info - puts "* pst header" - p header - -=begin -Looking at the output of this, for blank-o1997.pst, i see this part: -... -- (26624,516) desc block data (overlap of 4 bytes) -- (27136,516) desc block data (gap of 508 bytes) -- (28160,516) desc block data (gap of 2620 bytes) -... - -which confirms my belief that the block size for idx and desc is more likely 512 -=end - if 0 + 0 == 0 - puts '* file range usage' - file_ranges = - # these 3 things, should account for most of the data in the file. - [[0, Header::SIZE, 'pst file header']] + - @idx_offsets.map { |offset| [offset, Index::BLOCK_SIZE, 'idx block data'] } + - @desc_offsets.map { |offset| [offset, Desc::BLOCK_SIZE, 'desc block data'] } + - @idx.map { |idx| [idx.offset, idx.size, 'idx id=0x%x (%s)' % [idx.id, idx.type]] } - (file_ranges.sort_by { |idx| idx.first } + [nil]).to_enum(:each_cons, 2).each do |(offset, size, name), next_record| - # i think there is a padding of the size out to 64 bytes - # which is equivalent to padding out the final offset, because i think the offset is - # similarly oriented - pad_amount = 64 - warn 'i am wrong about the offset padding' if offset % pad_amount != 0 - # so, assuming i'm not wrong about that, then we can calculate how much padding is needed. - pad = pad_amount - (size % pad_amount) - pad = 0 if pad == pad_amount - gap = next_record ? next_record.first - (offset + size + pad) : 0 - extra = case gap <=> 0 - when -1; ["overlap of #{gap.abs} bytes)"] - when 0; [] - when +1; ["gap of #{gap} bytes"] - end - # how about we check that padding - @io.pos = offset + size - pad_bytes = @io.read(pad) - extra += ["padding not all zero"] unless pad_bytes == 0.chr * pad - puts "- #{offset}:#{size}+#{pad} #{name.inspect}" + (extra.empty? ? '' : ' [' + extra * ', ' + ']') - end - end - - # i think the idea of the idx, and indeed the idx2, is just to be able to - # refer to data indirectly, which means it can get moved around, and you just update - # the idx table. it is simply a list of file offsets and sizes. - # not sure i get how id2 plays into it though.... - # the sizes seem to be all even. is that a co-incidence? and the ids are all even. that - # seems to be related to something else (see the (id & 2) == 1 stuff) - puts '* idx entries' - @idx.each { |idx| puts "- #{idx.inspect}" } - - # if you look at the desc tree, you notice a few things: - # 1. there is a desc that seems to be the parent of all the folders, messages etc. - # it is the one whose parent is itself. - # one of its children is referenced as the subtree_entryid of the first desc item, - # the root. - # 2. typically only 2 types of desc records have idx2_id != 0. messages themselves, - # and the desc with id = 0x61 - the xattrib container. everything else uses the - # regular ids to find its data. i think it should be reframed as small blocks and - # big blocks, but i'll look into it more. - # - # idx_id and idx2_id are for getting to the data. desc_id and parent_desc_id just define - # the parent <-> child relationship, and the desc_ids are how the items are referred to in - # entryids. - # note that these aren't unique! eg for 0, 4 etc. i expect these'd never change, as the ids - # are stored in entryids. whereas the idx and idx2 could be a bit more volatile. - puts '* desc tree' - # make a dummy root hold everything just for convenience - root = Desc.new '' - def root.inspect; "#<Pst::Root>"; end - root.children.replace @orphans - # this still loads the whole thing as a string for gsub. should use directo output io - # version. - puts root.to_tree.gsub(/, (parent_desc_id|idx2_id)=0x0(?!\d)/, '') - - # this is fairly easy to understand, its just an attempt to display the pst items in a tree form - # which resembles what you'd see in outlook. - puts '* item tree' - # now streams directly - root_item.to_tree STDOUT - end - - def root_desc - @desc.first - end - - def root_item - item = pst_parse_item root_desc - item.type = :root - item - end - - def root - root_item - end - - # depth first search of all items - include Enumerable - - def each(&block) - root = self.root - block[root] - root.each_recursive(&block) - end - - def name - @name ||= root_item.props.display_name - end - - def inspect - "#<Pst name=#{name.inspect} io=#{io.inspect}>" - end -end -end - diff --git a/vendor/ruby-msg/lib/mapi/rtf.rb b/vendor/ruby-msg/lib/mapi/rtf.rb deleted file mode 100644 index 9fa133fac..000000000 --- a/vendor/ruby-msg/lib/mapi/rtf.rb +++ /dev/null @@ -1,169 +0,0 @@ -require 'stringio' -require 'strscan' -require 'rtf' - -module Mapi - # - # = Introduction - # - # The +RTF+ module contains a few helper functions for dealing with rtf - # in mapi messages: +rtfdecompr+, and <tt>rtf2html</tt>. - # - # Both were ported from their original C versions for simplicity's sake. - # - module RTF - RTF_PREBUF = - "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \ - "{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \ - "\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \ - "{\\colortbl\\red0\\green0\\blue0\n\r\\par " \ - "\\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx" - - # Decompresses compressed rtf +data+, as found in the mapi property - # +PR_RTF_COMPRESSED+. Code converted from my C version, which in turn - # I wrote from a Java source, in JTNEF I believe. - # - # C version was modified to use circular buffer for back references, - # instead of the optimization of the Java version to index directly into - # output buffer. This was in preparation to support streaming in a - # read/write neutral fashion. - def rtfdecompr data - io = StringIO.new data - buf = RTF_PREBUF + "\x00" * (4096 - RTF_PREBUF.length) - wp = RTF_PREBUF.length - rtf = '' - - # get header fields (as defined in RTFLIB.H) - compr_size, uncompr_size, magic, crc32 = io.read(16).unpack 'V*' - #warn "compressed-RTF data size mismatch" unless io.size == data.compr_size + 4 - - # process the data - case magic - when 0x414c454d # "MELA" magic number that identifies the stream as a uncompressed stream - rtf = io.read uncompr_size - when 0x75465a4c # "LZFu" magic number that identifies the stream as a compressed stream - flag_count = -1 - flags = nil - while rtf.length < uncompr_size and !io.eof? - # each flag byte flags 8 literals/references, 1 per bit - flags = ((flag_count += 1) % 8 == 0) ? io.getc : flags >> 1 - if 1 == (flags & 1) # each flag bit is 1 for reference, 0 for literal - rp, l = io.getc, io.getc - # offset is a 12 byte number. 2^12 is 4096, so thats fine - rp = (rp << 4) | (l >> 4) # the offset relative to block start - l = (l & 0xf) + 2 # the number of bytes to copy - l.times do - rtf << buf[wp] = buf[rp] - wp = (wp + 1) % 4096 - rp = (rp + 1) % 4096 - end - else - rtf << buf[wp] = io.getc - wp = (wp + 1) % 4096 - end - end - else # unknown magic number - raise "Unknown compression type (magic number 0x%08x)" % magic - end - - # not sure if its due to a bug in the above code. doesn't seem to be - # in my tests, but sometimes there's a trailing null. we chomp it here, - # which actually makes the resultant rtf smaller than its advertised - # size (+uncompr_size+). - rtf.chomp! 0.chr - rtf - end - - # Note, this is a conversion of the original C code. Not great - needs tests and - # some refactoring, and an attempt to correct some inaccuracies. Hacky but works. - # - # Returns +nil+ if it doesn't look like an rtf encapsulated rtf. - # - # Some cases that the original didn't deal with have been patched up, eg from - # this chunk, where there are tags outside of the htmlrtf ignore block. - # - # "{\\*\\htmltag116 <br />}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84 <a href..." - # - # We take the approach of ignoring all rtf tags not explicitly handled. A proper - # parse tree would be nicer to work with. will need to look for ruby rtf library - # - # Some of the original comment to the c code is excerpted here: - # - # Sometimes in MAPI, the PR_BODY_HTML property contains the HTML of a message. - # But more usually, the HTML is encoded inside the RTF body (which you get in the - # PR_RTF_COMPRESSED property). These routines concern the decoding of the HTML - # from this RTF body. - # - # An encoded htmlrtf file is a valid RTF document, but which contains additional - # html markup information in its comments, and sometimes contains the equivalent - # rtf markup outside the comments. Therefore, when it is displayed by a plain - # simple RTF reader, the html comments are ignored and only the rtf markup has - # effect. Typically, this rtf markup is not as rich as the html markup would have been. - # But for an html-aware reader (such as the code below), we can ignore all the - # rtf markup, and extract the html markup out of the comments, and get a valid - # html document. - # - # There are actually two kinds of html markup in comments. Most of them are - # prefixed by "\*\htmltagNNN", for some number NNN. But sometimes there's one - # prefixed by "\*\mhtmltagNNN" followed by "\*\htmltagNNN". In this case, - # the two are equivalent, but the m-tag is for a MIME Multipart/Mixed Message - # and contains tags that refer to content-ids (e.g. img src="cid:072344a7") - # while the normal tag just refers to a name (e.g. img src="fred.jpg") - # The code below keeps the m-tag and discards the normal tag. - # If there are any m-tags like this, then the message also contains an - # attachment with a PR_CONTENT_ID property e.g. "072344a7". Actually, - # sometimes the m-tag is e.g. img src="http://outlook/welcome.html" and the - # attachment has a PR_CONTENT_LOCATION "http://outlook/welcome.html" instead - # of a PR_CONTENT_ID. - # - def rtf2html rtf - scan = StringScanner.new rtf - # require \fromhtml. is this worth keeping? apparently you see \\fromtext if it - # was converted from plain text. - return nil unless rtf["\\fromhtml"] - html = '' - ignore_tag = nil - # skip up to the first htmltag. return nil if we don't ever find one - return nil unless scan.scan_until /(?=\{\\\*\\htmltag)/ - until scan.empty? - if scan.scan /\{/ - elsif scan.scan /\}/ - elsif scan.scan /\\\*\\htmltag(\d+) ?/ - #p scan[1] - if ignore_tag == scan[1] - scan.scan_until /\}/ - ignore_tag = nil - end - elsif scan.scan /\\\*\\mhtmltag(\d+) ?/ - ignore_tag = scan[1] - elsif scan.scan /\\par ?/ - html << "\r\n" - elsif scan.scan /\\tab ?/ - html << "\t" - elsif scan.scan /\\'([0-9A-Za-z]{2})/ - html << scan[1].hex.chr - elsif scan.scan /\\pntext/ - scan.scan_until /\}/ - elsif scan.scan /\\htmlrtf/ - scan.scan_until /\\htmlrtf0 ?/ - # a generic throw away unknown tags thing. - # the above 2 however, are handled specially - elsif scan.scan /\\[a-z-]+(\d+)? ?/ - #elsif scan.scan /\\li(\d+) ?/ - #elsif scan.scan /\\fi-(\d+) ?/ - elsif scan.scan /[\r\n]/ - elsif scan.scan /\\([{}\\])/ - html << scan[1] - elsif scan.scan /(.)/ - html << scan[1] - else - p :wtf - end - end - html.strip.empty? ? nil : html - end - - module_function :rtf2html, :rtfdecompr - end -end - diff --git a/vendor/ruby-msg/lib/mapi/types.rb b/vendor/ruby-msg/lib/mapi/types.rb deleted file mode 100644 index 71416afd5..000000000 --- a/vendor/ruby-msg/lib/mapi/types.rb +++ /dev/null @@ -1,51 +0,0 @@ -require 'rubygems' -require 'ole/types' - -module Mapi - Log = Logger.new_with_callstack - - module Types - # - # Mapi property types, taken from http://msdn2.microsoft.com/en-us/library/bb147591.aspx. - # - # The fields are [mapi name, variant name, description]. Maybe I should just make it a - # struct. - # - # seen some synonyms here, like PT_I8 vs PT_LONG. seen stuff like PT_SRESTRICTION, not - # sure what that is. look at `grep ' PT_' data/mapitags.yaml | sort -u` - # also, it has stuff like PT_MV_BINARY, where _MV_ probably means multi value, and is - # likely just defined to | in 0x1000. - # - # Note that the last 2 are the only ones where the Mapi value differs from the Variant value - # for the corresponding variant type. Odd. Also, the last 2 are currently commented out here - # because of the clash. - # - # Note 2 - the strings here say VT_BSTR, but I don't have that defined in Ole::Types. Should - # maybe change them to match. I've also seen reference to PT_TSTRING, which is defined as some - # sort of get unicode first, and fallback to ansii or something. - # - DATA = { - 0x0001 => ['PT_NULL', 'VT_NULL', 'Null (no valid data)'], - 0x0002 => ['PT_SHORT', 'VT_I2', '2-byte integer (signed)'], - 0x0003 => ['PT_LONG', 'VT_I4', '4-byte integer (signed)'], - 0x0004 => ['PT_FLOAT', 'VT_R4', '4-byte real (floating point)'], - 0x0005 => ['PT_DOUBLE', 'VT_R8', '8-byte real (floating point)'], - 0x0006 => ['PT_CURRENCY', 'VT_CY', '8-byte integer (scaled by 10,000)'], - 0x000a => ['PT_ERROR', 'VT_ERROR', 'SCODE value; 32-bit unsigned integer'], - 0x000b => ['PT_BOOLEAN', 'VT_BOOL', 'Boolean'], - 0x000d => ['PT_OBJECT', 'VT_UNKNOWN', 'Data object'], - 0x001e => ['PT_STRING8', 'VT_BSTR', 'String'], - 0x001f => ['PT_UNICODE', 'VT_BSTR', 'String'], - 0x0040 => ['PT_SYSTIME', 'VT_DATE', '8-byte real (date in integer, time in fraction)'], - #0x0102 => ['PT_BINARY', 'VT_BLOB', 'Binary (unknown format)'], - #0x0102 => ['PT_CLSID', 'VT_CLSID', 'OLE GUID'] - } - - module Constants - DATA.each { |num, (mapi_name, variant_name, desc)| const_set mapi_name, num } - end - - include Constants - end -end - diff --git a/vendor/ruby-msg/lib/mime.rb b/vendor/ruby-msg/lib/mime.rb deleted file mode 100644 index 4340e4901..000000000 --- a/vendor/ruby-msg/lib/mime.rb +++ /dev/null @@ -1,165 +0,0 @@ -# -# = Introduction -# -# A *basic* mime class for _really_ _basic_ and probably non-standard parsing -# and construction of MIME messages. -# -# Intended for two main purposes in this project: -# 1. As the container that is used to build up the message for eventual -# serialization as an eml. -# 2. For assistance in parsing the +transport_message_headers+ provided in .msg files, -# which are then kept through to the final eml. -# -# = TODO -# -# * Better streaming support, rather than an all-in-string approach. -# * Add +OrderedHash+ optionally, to not lose ordering in headers. -# * A fair bit remains to be done for this class, its fairly immature. But generally I'd like -# to see it be more generally useful. -# * All sorts of correctness issues, encoding particular. -# * Duplication of work in net/http.rb's +HTTPHeader+? Don't know if the overlap is sufficient. -# I don't want to lower case things, just for starters. -# * Mime was the original place I wrote #to_tree, intended as a quick debug hack. -# -class SimpleMime - Hash = begin - require 'orderedhash' - OrderedHash - rescue LoadError - Hash - end - - attr_reader :headers, :body, :parts, :content_type, :preamble, :epilogue - - # Create a SimpleMime object using +str+ as an initial serialization, which must contain headers - # and a body (even if empty). Needs work. - def initialize str, ignore_body=false - headers, @body = $~[1..-1] if str[/(.*?\r?\n)(?:\r?\n(.*))?\Z/m] - - @headers = Hash.new { |hash, key| hash[key] = [] } - @body ||= '' - headers.to_s.scan(/^\S+:\s*.*(?:\n\t.*)*/).each do |header| - @headers[header[/(\S+):/, 1]] << header[/\S+:\s*(.*)/m, 1].gsub(/\s+/m, ' ').strip # this is kind of wrong - end - - # don't have to have content type i suppose - @content_type, attrs = nil, {} - if content_type = @headers['Content-Type'][0] - @content_type, attrs = SimpleMime.split_header content_type - end - - return if ignore_body - - if multipart? - if body.empty? - @preamble = '' - @epilogue = '' - @parts = [] - else - # we need to split the message at the boundary - boundary = attrs['boundary'] or raise "no boundary for multipart message" - - # splitting the body: - parts = body.split(/--#{Regexp.quote boundary}/m) - unless parts[-1] =~ /^--/; warn "bad multipart boundary (missing trailing --)" - else parts[-1][0..1] = '' - end - parts.each_with_index do |part, i| - part =~ /^(\r?\n)?(.*?)(\r?\n)?\Z/m - part.replace $2 - warn "bad multipart boundary" if (1...parts.length-1) === i and !($1 && $3) - end - @preamble = parts.shift - @epilogue = parts.pop - @parts = parts.map { |part| SimpleMime.new part } - end - end - end - - def multipart? - @content_type && @content_type =~ /^multipart/ ? true : false - end - - def inspect - # add some extra here. - "#<SimpleMime content_type=#{@content_type.inspect}>" - end - - def to_tree - if multipart? - str = "- #{inspect}\n" - parts.each_with_index do |part, i| - last = i == parts.length - 1 - part.to_tree.split(/\n/).each_with_index do |line, j| - str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}" + line + "\n" - end - end - str - else - "- #{inspect}\n" - end - end - - def to_s opts={} - opts = {:boundary_counter => 0}.merge opts - if multipart? - boundary = SimpleMime.make_boundary opts[:boundary_counter] += 1, self - @body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue]. - flatten.join("\r\n--" + boundary) - content_type, attrs = SimpleMime.split_header @headers['Content-Type'][0] - attrs['boundary'] = boundary - @headers['Content-Type'] = [([content_type] + attrs.map { |key, val| %{#{key}="#{val}"} }).join('; ')] - end - - str = '' - @headers.each do |key, vals| - vals.each { |val| str << "#{key}: #{val}\r\n" } - end - str << "\r\n" + @body - end - - def self.split_header header - # FIXME: haven't read standard. not sure what its supposed to do with " in the name, or if other - # escapes are allowed. can't test on windows as " isn't allowed anyway. can be fixed with more - # accurate parser later. - # maybe move to some sort of Header class. but not all headers should be of it i suppose. - # at least add a join_header then, taking name and {}. for use in SimpleMime#to_s (for boundary - # rewrite), and Attachment#to_mime, among others... - attrs = {} - header.scan(/;\s*([^\s=]+)\s*=\s*("[^"]*"|[^\s;]*)\s*/m).each do |key, value| - if attrs[key]; warn "ignoring duplicate header attribute #{key.inspect}" - else attrs[key] = value[/^"/] ? value[1..-2] : value - end - end - - [header[/^[^;]+/].strip, attrs] - end - - # +i+ is some value that should be unique for all multipart boundaries for a given message - def self.make_boundary i, extra_obj = SimpleMime - "----_=_NextPart_#{'%03d' % i}_#{'%08x' % extra_obj.object_id}.#{'%08x' % Time.now}" - end -end - -=begin -things to consider for header work. -encoded words: -Subject: =?iso-8859-1?q?p=F6stal?= - -and other mime funkyness: -Content-Disposition: attachment; - filename*0*=UTF-8''09%20%D7%90%D7%A5; - filename*1*=%20%D7%A1%D7%91-; - filename*2*=%D7%A7%95%A5.wma -Content-Transfer-Encoding: base64 - -and another, doing a test with an embedded newline in an attachment name, I -get this output from evolution. I get the feeling that this is probably a bug -with their implementation though, they weren't expecting new lines in filenames. -Content-Disposition: attachment; filename="asdf'b\"c -d efgh=i: ;\\j" -d efgh=i: ;\\j"; charset=us-ascii -Content-Type: text/plain; name="asdf'b\"c"; charset=us-ascii - -=end - diff --git a/vendor/ruby-msg/lib/orderedhash.rb b/vendor/ruby-msg/lib/orderedhash.rb deleted file mode 100644 index 16a4f5860..000000000 --- a/vendor/ruby-msg/lib/orderedhash.rb +++ /dev/null @@ -1,218 +0,0 @@ -# = OrderedHash -# -# == Version -# 1.2006.07.13 (change of the first number means Big Change) -# -# == Description -# Hash which preserves order of added items (like PHP array). -# -# == Usage -# -# (see examples directory under the ruby gems root directory) -# -# require 'rubygems' -# require 'ordered_hash' -# -# hsh = OrderedHash.new -# hsh['z'] = 1 -# hsh['a'] = 2 -# hsh['c'] = 3 -# p hsh.keys # ['z','a','c'] -# -# == Source -# http://simplypowerful.1984.cz/goodlibs/1.2006.07.13 -# -# == Author -# jan molic (/mig/at_sign/1984/dot/cz/) -# -# == Thanks to -# Andrew Johnson for his suggestions and fixes of Hash[], merge, to_a, inspect and shift -# Desmond Dsouza for == fixes -# -# == Licence -# You can redistribute it and/or modify it under the same terms of Ruby's license; -# either the dual license version in 2003, or any later version. -# - -class OrderedHash < Hash - - attr_accessor :order - - class << self - - def [] *args - hsh = OrderedHash.new - if Hash === args[0] - hsh.replace args[0] - elsif (args.size % 2) != 0 - raise ArgumentError, "odd number of elements for Hash" - else - hsh[args.shift] = args.shift while args.size > 0 - end - hsh - end - - end - - def initialize(*a, &b) - super - @order = [] - end - - def store_only a,b - store a,b - end - - alias orig_store store - - def store a,b - @order.push a unless has_key? a - super a,b - end - - alias []= store - - def == hsh2 - return hsh2==self if !hsh2.is_a?(OrderedHash) - return false if @order != hsh2.order - super hsh2 - end - - def clear - @order = [] - super - end - - def delete key - @order.delete key - super - end - - def each_key - @order.each { |k| yield k } - self - end - - def each_value - @order.each { |k| yield self[k] } - self - end - - def each - @order.each { |k| yield k,self[k] } - self - end - - alias each_pair each - - def delete_if - @order.clone.each { |k| - delete k if yield - } - self - end - - def values - ary = [] - @order.each { |k| ary.push self[k] } - ary - end - - def keys - @order - end - - def invert - hsh2 = Hash.new - @order.each { |k| hsh2[self[k]] = k } - hsh2 - end - - def reject &block - self.dup.delete_if( &block ) - end - - def reject! &block - hsh2 = reject( &block ) - self == hsh2 ? nil : hsh2 - end - - def replace hsh2 - @order = hsh2.keys - super hsh2 - end - - def shift - key = @order.first - key ? [key,delete(key)] : super - end - - def unshift k,v - unless self.include? k - @order.unshift k - orig_store(k,v) - true - else - false - end - end - - def push k,v - unless self.include? k - @order.push k - orig_store(k,v) - true - else - false - end - end - - def pop - key = @order.last - key ? [key,delete(key)] : nil - end - - def first - self[@order.first] - end - - def last - self[@order.last] - end - - def to_a - ary = [] - each { |k,v| ary << [k,v] } - ary - end - - def to_s - self.to_a.to_s - end - - def inspect - ary = [] - each {|k,v| ary << k.inspect + "=>" + v.inspect} - '{' + ary.join(", ") + '}' - end - - def update hsh2 - hsh2.each { |k,v| self[k] = v } - self - end - - alias :merge! update - - def merge hsh2 - self.dup update(hsh2) - end - - def select - ary = [] - each { |k,v| ary << [k,v] if yield k,v } - ary - end - -end - -#=end diff --git a/vendor/ruby-msg/lib/rtf.rb b/vendor/ruby-msg/lib/rtf.rb deleted file mode 100755 index 3afac68a8..000000000 --- a/vendor/ruby-msg/lib/rtf.rb +++ /dev/null @@ -1,109 +0,0 @@ -require 'stringio' - -# this file is pretty crap, its just to ensure there is always something readable if -# there is an rtf only body, with no html encapsulation. - -module RTF - class Tokenizer - def self.process io - while true do - case c = io.getc - when ?{; yield :open_group - when ?}; yield :close_group - when ?\\ - case c = io.getc - when ?{, ?}, ?\\; yield :text, c.chr - when ?'; yield :text, [io.read(2)].pack('H*') - when ?a..?z, ?A..?Z - # read control word - str = c.chr - str << c while c = io.read(1) and c =~ /[a-zA-Z]/ - neg = 1 - neg = -1 and c = io.read(1) if c == '-' - num = if c =~ /[0-9]/ - num = c - num << c while c = io.read(1) and c =~ /[0-9]/ - num.to_i * neg - end - raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text - io.seek(-1, IO::SEEK_CUR) if c != ' ' - yield :control_word, str, num - when nil - raise "invalid rtf stream" # \EOF - else - # other kind of control symbol - yield :control_symbol, c.chr - end - when nil - return - when ?\r, ?\n - # ignore - else yield :text, c.chr - end - end - end - end - - class Converter - # crappy - def self.rtf2text str, format=:text - group = 0 - text = '' - text << "<html>\n<body>" if format == :html - group_type = [] - group_tags = [] - RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c| - add_text = '' - case a - when :open_group; group += 1; group_type[group] = nil; group_tags[group] = [] - when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1; - when :control_word; # ignore - group_type[group] ||= b - # maybe change this to use utf8 where possible - add_text = if b == 'par' || b == 'line' || b == 'page'; "\n" - elsif b == 'tab' || b == 'cell'; "\t" - elsif b == 'endash' || b == 'emdash'; "-" - elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " " - elsif b == 'ldblquote'; '"' - else '' - end - if b == 'b' || b == 'i' and format == :html - close = c == 0 ? '/' : '' - text << "<#{close}#{b}>" - if c == 0 - group_tags[group].delete b - else - group_tags[group] << b - end - end - # lot of other ones belong in here.\ -=begin -\bullet Bullet character. -\lquote Left single quotation mark. -\rquote Right single quotation mark. -\ldblquote Left double quotation mark. -\rdblquote -=end - when :control_symbol; # ignore - group_type[group] ||= b - add_text = ' ' if b == '~' # non-breakable space - add_text = '-' if b == '_' # non-breakable hypen - when :text - add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*') - end - if format == :html - text << add_text.gsub(/([<>&"'])/) do - ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1] - "&#{ent};" - end - text << '<br>' if add_text == "\n" - else - text << add_text - end - end - text << "</body>\n</html>\n" if format == :html - text - end - end -end - |