aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/ruby-msg/lib/mapi/rtf.rb
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/ruby-msg/lib/mapi/rtf.rb')
-rw-r--r--vendor/ruby-msg/lib/mapi/rtf.rb169
1 files changed, 0 insertions, 169 deletions
diff --git a/vendor/ruby-msg/lib/mapi/rtf.rb b/vendor/ruby-msg/lib/mapi/rtf.rb
deleted file mode 100644
index 9fa133fac..000000000
--- a/vendor/ruby-msg/lib/mapi/rtf.rb
+++ /dev/null
@@ -1,169 +0,0 @@
-require 'stringio'
-require 'strscan'
-require 'rtf'
-
-module Mapi
- #
- # = Introduction
- #
- # The +RTF+ module contains a few helper functions for dealing with rtf
- # in mapi messages: +rtfdecompr+, and <tt>rtf2html</tt>.
- #
- # Both were ported from their original C versions for simplicity's sake.
- #
- module RTF
- RTF_PREBUF =
- "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
- "{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
- "\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
- "{\\colortbl\\red0\\green0\\blue0\n\r\\par " \
- "\\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"
-
- # Decompresses compressed rtf +data+, as found in the mapi property
- # +PR_RTF_COMPRESSED+. Code converted from my C version, which in turn
- # I wrote from a Java source, in JTNEF I believe.
- #
- # C version was modified to use circular buffer for back references,
- # instead of the optimization of the Java version to index directly into
- # output buffer. This was in preparation to support streaming in a
- # read/write neutral fashion.
- def rtfdecompr data
- io = StringIO.new data
- buf = RTF_PREBUF + "\x00" * (4096 - RTF_PREBUF.length)
- wp = RTF_PREBUF.length
- rtf = ''
-
- # get header fields (as defined in RTFLIB.H)
- compr_size, uncompr_size, magic, crc32 = io.read(16).unpack 'V*'
- #warn "compressed-RTF data size mismatch" unless io.size == data.compr_size + 4
-
- # process the data
- case magic
- when 0x414c454d # "MELA" magic number that identifies the stream as a uncompressed stream
- rtf = io.read uncompr_size
- when 0x75465a4c # "LZFu" magic number that identifies the stream as a compressed stream
- flag_count = -1
- flags = nil
- while rtf.length < uncompr_size and !io.eof?
- # each flag byte flags 8 literals/references, 1 per bit
- flags = ((flag_count += 1) % 8 == 0) ? io.getc : flags >> 1
- if 1 == (flags & 1) # each flag bit is 1 for reference, 0 for literal
- rp, l = io.getc, io.getc
- # offset is a 12 byte number. 2^12 is 4096, so thats fine
- rp = (rp << 4) | (l >> 4) # the offset relative to block start
- l = (l & 0xf) + 2 # the number of bytes to copy
- l.times do
- rtf << buf[wp] = buf[rp]
- wp = (wp + 1) % 4096
- rp = (rp + 1) % 4096
- end
- else
- rtf << buf[wp] = io.getc
- wp = (wp + 1) % 4096
- end
- end
- else # unknown magic number
- raise "Unknown compression type (magic number 0x%08x)" % magic
- end
-
- # not sure if its due to a bug in the above code. doesn't seem to be
- # in my tests, but sometimes there's a trailing null. we chomp it here,
- # which actually makes the resultant rtf smaller than its advertised
- # size (+uncompr_size+).
- rtf.chomp! 0.chr
- rtf
- end
-
- # Note, this is a conversion of the original C code. Not great - needs tests and
- # some refactoring, and an attempt to correct some inaccuracies. Hacky but works.
- #
- # Returns +nil+ if it doesn't look like an rtf encapsulated rtf.
- #
- # Some cases that the original didn't deal with have been patched up, eg from
- # this chunk, where there are tags outside of the htmlrtf ignore block.
- #
- # "{\\*\\htmltag116 <br />}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84 <a href..."
- #
- # We take the approach of ignoring all rtf tags not explicitly handled. A proper
- # parse tree would be nicer to work with. will need to look for ruby rtf library
- #
- # Some of the original comment to the c code is excerpted here:
- #
- # Sometimes in MAPI, the PR_BODY_HTML property contains the HTML of a message.
- # But more usually, the HTML is encoded inside the RTF body (which you get in the
- # PR_RTF_COMPRESSED property). These routines concern the decoding of the HTML
- # from this RTF body.
- #
- # An encoded htmlrtf file is a valid RTF document, but which contains additional
- # html markup information in its comments, and sometimes contains the equivalent
- # rtf markup outside the comments. Therefore, when it is displayed by a plain
- # simple RTF reader, the html comments are ignored and only the rtf markup has
- # effect. Typically, this rtf markup is not as rich as the html markup would have been.
- # But for an html-aware reader (such as the code below), we can ignore all the
- # rtf markup, and extract the html markup out of the comments, and get a valid
- # html document.
- #
- # There are actually two kinds of html markup in comments. Most of them are
- # prefixed by "\*\htmltagNNN", for some number NNN. But sometimes there's one
- # prefixed by "\*\mhtmltagNNN" followed by "\*\htmltagNNN". In this case,
- # the two are equivalent, but the m-tag is for a MIME Multipart/Mixed Message
- # and contains tags that refer to content-ids (e.g. img src="cid:072344a7")
- # while the normal tag just refers to a name (e.g. img src="fred.jpg")
- # The code below keeps the m-tag and discards the normal tag.
- # If there are any m-tags like this, then the message also contains an
- # attachment with a PR_CONTENT_ID property e.g. "072344a7". Actually,
- # sometimes the m-tag is e.g. img src="http://outlook/welcome.html" and the
- # attachment has a PR_CONTENT_LOCATION "http://outlook/welcome.html" instead
- # of a PR_CONTENT_ID.
- #
- def rtf2html rtf
- scan = StringScanner.new rtf
- # require \fromhtml. is this worth keeping? apparently you see \\fromtext if it
- # was converted from plain text.
- return nil unless rtf["\\fromhtml"]
- html = ''
- ignore_tag = nil
- # skip up to the first htmltag. return nil if we don't ever find one
- return nil unless scan.scan_until /(?=\{\\\*\\htmltag)/
- until scan.empty?
- if scan.scan /\{/
- elsif scan.scan /\}/
- elsif scan.scan /\\\*\\htmltag(\d+) ?/
- #p scan[1]
- if ignore_tag == scan[1]
- scan.scan_until /\}/
- ignore_tag = nil
- end
- elsif scan.scan /\\\*\\mhtmltag(\d+) ?/
- ignore_tag = scan[1]
- elsif scan.scan /\\par ?/
- html << "\r\n"
- elsif scan.scan /\\tab ?/
- html << "\t"
- elsif scan.scan /\\'([0-9A-Za-z]{2})/
- html << scan[1].hex.chr
- elsif scan.scan /\\pntext/
- scan.scan_until /\}/
- elsif scan.scan /\\htmlrtf/
- scan.scan_until /\\htmlrtf0 ?/
- # a generic throw away unknown tags thing.
- # the above 2 however, are handled specially
- elsif scan.scan /\\[a-z-]+(\d+)? ?/
- #elsif scan.scan /\\li(\d+) ?/
- #elsif scan.scan /\\fi-(\d+) ?/
- elsif scan.scan /[\r\n]/
- elsif scan.scan /\\([{}\\])/
- html << scan[1]
- elsif scan.scan /(.)/
- html << scan[1]
- else
- p :wtf
- end
- end
- html.strip.empty? ? nil : html
- end
-
- module_function :rtf2html, :rtfdecompr
- end
-end
-