diff options
Diffstat (limited to 'vendor/ruby-ole')
-rw-r--r-- | vendor/ruby-ole/ChangeLog | 62 | ||||
-rw-r--r-- | vendor/ruby-ole/README | 115 | ||||
-rw-r--r-- | vendor/ruby-ole/Rakefile | 209 | ||||
-rwxr-xr-x | vendor/ruby-ole/bin/oletool | 41 | ||||
-rw-r--r-- | vendor/ruby-ole/data/propids.yaml | 56 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/base.rb | 7 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/file_system.rb | 2 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/ranges_io.rb | 231 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/storage.rb | 3 | ||||
-rwxr-xr-x | vendor/ruby-ole/lib/ole/storage/base.rb | 916 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/storage/file_system.rb | 423 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/storage/meta_data.rb | 148 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/support.rb | 256 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/types.rb | 2 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/types/base.rb | 251 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/types/property_set.rb | 165 |
16 files changed, 2887 insertions, 0 deletions
diff --git a/vendor/ruby-ole/ChangeLog b/vendor/ruby-ole/ChangeLog new file mode 100644 index 000000000..1e7c80b59 --- /dev/null +++ b/vendor/ruby-ole/ChangeLog @@ -0,0 +1,62 @@ +== 1.2.8.2 / 2009-01-01 + +- Update code to support ruby 1.9.1 + +== 1.2.8.1 / 2008-10-22 + +- Fix a couple of breakages when using $KCODE = 'UTF8' + +== 1.2.8 / 2008-10-08 + +- Check in the new fixes to the mbat support. +- Update README to be a bit more useful. + +== 1.2.7 / 2008-08-12 + +- Prepare Ole::Types::PropertySet for write support. +- Introduce Ole::Storage#meta_data as an easy interface to meta data stored + within various property sets. +- Add new --metadata action to oletool to dump said metadata. +- Add new --mimetype action to oletool, and corresponding Ole::Storage#mime_type + function to try to guess mime type of a file based on some simple heuristics. +- Restructure project files a bit, and pull in file_system & meta_data support + by default. +- More tests - now have 100% coverage. + +== 1.2.6 / 2008-07-21 + +- Fix FileClass#expand_path to work properly on darwin (issue #2) +- Guard against Enumerable#sum clash with active support (issue #3) + +== 1.2.5 / 2008-02-16 + +- Make all tests pass on ruby 1.9. + +== 1.2.4 / 2008-01-09 + +- Make all tests pass on windows (issue #1). +- Make all tests pass on a power pc (running ubuntu). +- Property set convenience access functions. + +== 1.2.3 / 2007-12-28 + +- MBAT write support re-implmented. Can now write files over ~8mb again. +- Minor fixes (truncation in #flush, file modification timestamps) +- More test coverage +- Initial (read-only) property set support. +- Complete filesystem api, to pass most of the rubyzip tests. +- Add a ChangeLog :). + +== 1.2.2 / 2007-11-05 + +- Lots of test updates, 90% coverage. +- Fix +to_tree+ method to be more efficient, and stream output. +- Optimizations from benchmarks and profiling, mostly for writes. Fixed + AllocationTable#resize_chain, RangesIOResizable#truncate and + AllocationTable#free_block. +- Add in filesystem test file from rubyzip, and start working on a + filesystem api. + +== 1.2.1 / 2007-08-20 + +- Separate out from ruby-msg as new project. diff --git a/vendor/ruby-ole/README b/vendor/ruby-ole/README new file mode 100644 index 000000000..0208c5abd --- /dev/null +++ b/vendor/ruby-ole/README @@ -0,0 +1,115 @@ += Introduction + +The ruby-ole library provides a variety of functions primarily for +working with OLE2 structured storage files, such as those produced by +Microsoft Office - eg *.doc, *.msg etc. + += Example Usage + +Here are some examples of how to use the library functionality, +categorised roughly by purpose. + +1. Reading and writing files within an OLE container + + The recommended way to manipulate the contents is via the + "file_system" API, whereby you use Ole::Storage instance methods + similar to the regular File and Dir class methods. + + ole = Ole::Storage.open('oleWithDirs.ole', 'rb+') + p ole.dir.entries('.') # => [".", "..", "dir1", "dir2", "file1"] + p ole.file.read('file1')[0, 25] # => "this is the entry 'file1'" + ole.dir.mkdir('newdir') + +2. Accessing OLE meta data + + Some convenience functions are provided for (currently read only) + access to OLE property sets and other sources of meta data. + + ole = Ole::Storage.open('test_word_95.doc') + p ole.meta_data.file_format # => "MSWordDoc" + p ole.meta_data.mime_type # => "application/msword" + p ole.meta_data.doc_author.split.first # => "Charles" + +3. Raw access to underlying OLE internals + + This is probably of little interest to most developers using the + library, but for some use cases you may need to drop down to the + lower level API on which the "file_system" API is constructed, + which exposes more of the format details. + + <tt>Ole::Storage</tt> files can have multiple files with the same name, + or with a slash in the name, and other things that are probably + strictly invalid. This API is the only way to access those files. + + You can access the header object directly: + + p ole.header.num_sbat # => 1 + p ole.header.magic.unpack('H*') # => ["d0cf11e0a1b11ae1"] + + You can directly access the array of all Dirent objects, + including the root: + + p ole.dirents.length # => 5 + puts ole.root.to_tree + # => + - #<Dirent:"Root Entry"> + |- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000..."> + |- #<Dirent:"\001CompObj" size=98 data="\001\000\376\377\003..."> + |- #<Dirent:"WordDocument" size=2574 data="\334\245e\000-..."> + \- #<Dirent:"\005SummaryInformation" size=54788 data="\376\377\000\000\001..."> + + You can access (through RangesIO methods, or by using the + relevant Dirent and AllocationTable methods) information like where within + the container a stream is located (these are offset/length pairs): + + p ole.root["\001CompObj"].open { |io| io.ranges } # => [[0, 64], [64, 34]] + +See the documentation for each class for more details. + += Thanks + +* The code contained in this project was initially based on chicago's libole + (source available at http://prdownloads.sf.net/chicago/ole.tgz). + +* It was later augmented with some corrections by inspecting pole, and (purely + for header definitions) gsf. + +* The property set parsing code came from the apache java project POIFS. + +* The excellent idea for using a pseudo file system style interface by providing + #file and #dir methods which mimic File and Dir, was borrowed (along with almost + unchanged tests!) from Thomas Sondergaard's rubyzip. + += TODO + +== 1.2.9 + +* add buffering to rangesio so that performance for small reads and writes + isn't so awful. maybe try and remove the bottlenecks of unbuffered first + with more profiling, then implement the buffering on top of that. +* fix mode strings - like truncate when using 'w+', supporting append + 'a+' modes etc. done? +* make ranges io obey readable vs writeable modes. +* more RangesIO completion. ie, doesn't support #<< at the moment. +* maybe some oletool doc. +* make sure `rake test' runs tests both with $KCODE='UTF8', and without, + and maybe ensure i don't regress on 1.9 and jruby either now that they're + fixed. + +== 1.3.1 + +* fix property sets a bit more. see TODO in Ole::Storage::MetaData +* ability to zero out padding and unused blocks +* case insensitive mode for ole/file_system? +* better tests for mbat support. +* further doc cleanup +* add in place testing for jruby and ruby1.9 + +== Longer term + +* more benchmarking, profiling, and speed fixes. was thinking vs other + ruby filesystems (eg, vs File/Dir itself, and vs rubyzip), and vs other + ole implementations (maybe perl's, and poifs) just to check its in the + ballpark, with no remaining silly bottlenecks. +* supposedly vba does something weird to ole files. test that. + diff --git a/vendor/ruby-ole/Rakefile b/vendor/ruby-ole/Rakefile new file mode 100644 index 000000000..1153bb39a --- /dev/null +++ b/vendor/ruby-ole/Rakefile @@ -0,0 +1,209 @@ +require 'rake/rdoctask' +require 'rake/testtask' +require 'rake/packagetask' +require 'rake/gempackagetask' + +require 'rbconfig' +require 'fileutils' + +$:.unshift 'lib' + +require 'ole/storage' + +PKG_NAME = 'ruby-ole' +PKG_VERSION = Ole::Storage::VERSION + +task :default => [:test] + +Rake::TestTask.new do |t| + t.test_files = FileList["test/test_*.rb"] + t.warning = true + t.verbose = true +end + +begin + require 'rcov/rcovtask' + # NOTE: this will not do anything until you add some tests + desc "Create a cross-referenced code coverage report" + Rcov::RcovTask.new do |t| + t.test_files = FileList['test/test*.rb'] + t.ruby_opts << "-Ilib" # in order to use this rcov + t.rcov_opts << "--xrefs" # comment to disable cross-references + t.verbose = true + end +rescue LoadError + # Rcov not available +end + +Rake::RDocTask.new do |t| + t.rdoc_dir = 'doc' + t.rdoc_files.include 'lib/**/*.rb' + t.rdoc_files.include 'README', 'ChangeLog' + t.title = "#{PKG_NAME} documentation" + t.options += %w[--line-numbers --inline-source --tab-width 2] + t.main = 'README' +end + +spec = Gem::Specification.new do |s| + s.name = PKG_NAME + s.version = PKG_VERSION + s.summary = %q{Ruby OLE library.} + s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.} + s.authors = ['Charles Lowe'] + s.email = %q{aquasync@gmail.com} + s.homepage = %q{http://code.google.com/p/ruby-ole} + s.rubyforge_project = %q{ruby-ole} + + s.executables = ['oletool'] + s.files = ['README', 'Rakefile', 'ChangeLog', 'data/propids.yaml'] + s.files += FileList['lib/**/*.rb'] + s.files += FileList['test/test_*.rb', 'test/*.doc'] + s.files += FileList['test/oleWithDirs.ole', 'test/test_SummaryInformation'] + s.files += FileList['bin/*'] + s.test_files = FileList['test/test_*.rb'] + + s.has_rdoc = true + s.extra_rdoc_files = ['README', 'ChangeLog'] + s.rdoc_options += [ + '--main', 'README', + '--title', "#{PKG_NAME} documentation", + '--tab-width', '2' + ] +end + +Rake::GemPackageTask.new(spec) do |t| + t.gem_spec = spec + t.need_tar = true + t.need_zip = false + t.package_dir = 'build' +end + +desc 'Run various benchmarks' +task :benchmark do + require 'benchmark' + require 'tempfile' + require 'ole/file_system' + + # should probably add some read benchmarks too + def write_benchmark opts={} + files, size = opts[:files], opts[:size] + block_size = opts[:block_size] || 100_000 + block = 0.chr * block_size + blocks, remaining = size.divmod block_size + remaining = 0.chr * remaining + Tempfile.open 'ole_storage_benchmark' do |temp| + Ole::Storage.open temp do |ole| + files.times do |i| + ole.file.open "file_#{i}", 'w' do |f| + blocks.times { f.write block } + f.write remaining + end + end + end + end + end + + Benchmark.bm do |bm| + bm.report 'write_1mb_1x5' do + 5.times { write_benchmark :files => 1, :size => 1_000_000 } + end + + bm.report 'write_1mb_2x5' do + 5.times { write_benchmark :files => 1_000, :size => 1_000 } + end + end +end + +=begin + +1.2.1: + + user system total real +write_1mb_1x5 73.920000 8.400000 82.320000 ( 91.893138) + +revision 17 (speed up AllocationTable#free_block by using +@sparse attribute, and using Array#index otherwise): + + user system total real +write_1mb_1x5 57.910000 6.190000 64.100000 ( 66.207993) +write_1mb_2x5266.310000 31.750000 298.060000 (305.877203) + +add in extra resize_chain fix (return blocks to avoid calling +AllocationTable#chain twice): + + user system total real +write_1mb_1x5 43.140000 5.480000 48.620000 ( 51.835942) + +add in RangesIOResizeable fix (cache @blocks, to avoid calling +AllocationTable#chain at all when resizing now, just pass it +to AllocationTable#resize_chain): + + user system total real +write_1mb_1x5 29.770000 5.180000 34.950000 ( 39.916747) + +40 seconds is still a really long time to write out 5 megs. +of course, this is all with a 1_000 byte block size, which is +a very small wite. upping this to 100_000 bytes: + + user system total real +write_1mb_1x5 0.540000 0.130000 0.670000 ( 1.051862) + +so it seems that that makes a massive difference. so i really +need buffering in RangesIO if I don't want it to really hurt +for small writes, as all the resize code is kind of expensive. + +one of the costly things at the moment, is RangesIO#offset_and_size, +which is called for each write, and re-finds which range we are in. +that should obviously be changed, to a fixed one that is invalidated +on seeks. buffering would hide that problem to some extent, but i +should fix it anyway. + +re-running the original 1.2.1 with 100_000 byte block size: + + user system total real +write_1mb_1x5 15.590000 2.230000 17.820000 ( 18.704910) + +so there the really badly non-linear AllocationTable#resize_chain is +being felt. + +back to current working copy, running full benchmark: + + user system total real +write_1mb_1x5 0.530000 0.150000 0.680000 ( 0.708919) +write_1mb_2x5227.940000 31.260000 259.200000 (270.200960) + +not surprisingly, the second case hasn't been helped much by the fixes +so far, as they only really help multiple resizes and writes for a file. +this could be pain in the new file system code - potentially searching +through Dirent#children at creation time. + +to test, i'll profile creating 1_000 files, without writing anything: + + user system total real +write_1mb_2x5 16.990000 1.830000 18.820000 ( 19.900568) + +hmmm, so thats not all of it. maybe its the initial chain calls, etc? +writing 1 byte: + + user system total real +write_1mb_1x5 0.520000 0.120000 0.640000 ( 0.660638) +write_1mb_2x5 19.810000 2.280000 22.090000 ( 22.696214) + +weird. + +100 bytes: + + user system total real +write_1mb_1x5 0.560000 0.140000 0.700000 ( 1.424974) +write_1mb_2x5 22.940000 2.840000 25.780000 ( 26.556346) + +500 bytes: + + user system total real +write_1mb_1x5 0.530000 0.150000 0.680000 ( 1.139738) +write_1mb_2x5 77.260000 10.130000 87.390000 ( 91.671086) + +what happens there? very strange. + +=end + diff --git a/vendor/ruby-ole/bin/oletool b/vendor/ruby-ole/bin/oletool new file mode 100755 index 000000000..d81afab5a --- /dev/null +++ b/vendor/ruby-ole/bin/oletool @@ -0,0 +1,41 @@ +#! /usr/bin/ruby + +require 'optparse' +require 'rubygems' +require 'ole/storage' + +def oletool + opts = {:verbose => false, :action => :tree} + op = OptionParser.new do |op| + op.banner = "Usage: oletool [options] [files]" + op.separator '' + op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree } + op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack } + op.on('-m', '--mimetype', 'Print the guessed mime types') { opts[:action] = :mimetype } + op.on('-y', '--metadata', 'Dump the internal meta data as YAML') { opts[:action] = :metadata } + op.separator '' + op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v } + op.on_tail('-h', '--help', 'Show this message') { puts op; exit } + end + files = op.parse ARGV + if files.empty? + puts 'Must specify 1 or more msg files.' + puts op + exit 1 + end + Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL + files.each do |file| + case opts[:action] + when :tree + Ole::Storage.open(file) { |ole| puts ole.root.to_tree } + when :repack + Ole::Storage.open file, 'rb+', &:repack + when :metadata + Ole::Storage.open(file) { |ole| y ole.meta_data.to_h } + when :mimetype + puts Ole::Storage.open(file) { |ole| ole.meta_data.mime_type } + end + end +end + +oletool diff --git a/vendor/ruby-ole/data/propids.yaml b/vendor/ruby-ole/data/propids.yaml new file mode 100644 index 000000000..9ac43ffe1 --- /dev/null +++ b/vendor/ruby-ole/data/propids.yaml @@ -0,0 +1,56 @@ +"{f29f85e0-4ff9-1068-ab91-08002b27b3d9}": + - FMTID_SummaryInformation + - 2: doc_title + 3: doc_subject + 4: doc_author + 5: doc_keywords + 6: doc_comments + 7: doc_template + 8: doc_last_author + 9: doc_rev_number + 10: doc_edit_time + 11: doc_last_printed + 12: doc_created_time + 13: doc_last_saved_time + 14: doc_page_count + 15: doc_word_count + 16: doc_char_count + 18: doc_app_name + 19: security + +"{d5cdd502-2e9c-101b-9397-08002b2cf9ae}": + - FMTID_DocSummaryInfo + - 2: doc_category + 3: doc_presentation_target + 4: doc_byte_count + 5: doc_line_count + 6: doc_para_count + 7: doc_slide_count + 8: doc_note_count + 9: doc_hidden_count + 10: mmclips + 11: scale_crop + 12: heading_pairs + 13: doc_part_titles + 14: doc_manager + 15: doc_company + 16: links_up_to_date + +"{d5cdd505-2e9c-101b-9397-08002b2cf9ae}": + - FMTID_UserDefinedProperties + - {} + +# just dumped these all here. if i can confirm any of these +# better, i can update this file so they're recognized. +#0b63e343-9ccc-11d0-bcdb-00805fccce04 +#0b63e350-9ccc-11d0-bcdb-00805fccce04 NetLibrary propset? +#31f400a0-fd07-11cf-b9bd-00aa003db18e ScriptInfo propset? +#49691c90-7e17-101a-a91c-08002b2ecda9 Query propset? +#560c36c0-503a-11cf-baa1-00004c752a9a +#70eb7a10-55d9-11cf-b75b-00aa0051fe20 HTMLInfo propset +#85ac0960-1819-11d1-896f-00805f053bab message propset? +#aa568eec-e0e5-11cf-8fda-00aa00a14f93 NNTP SummaryInformation propset? +#b725f130-47ef-101a-a5f1-02608c9eebac Storage propset +#c82bf596-b831-11d0-b733-00aa00a1ebd2 NetLibraryInfo propset +#c82bf597-b831-11d0-b733-00aa00a1ebd2 LinkInformation propset? +#d1b5d3f0-c0b3-11cf-9a92-00a0c908dbf1 LinkInformation propset? diff --git a/vendor/ruby-ole/lib/ole/base.rb b/vendor/ruby-ole/lib/ole/base.rb new file mode 100644 index 000000000..ee1bc0431 --- /dev/null +++ b/vendor/ruby-ole/lib/ole/base.rb @@ -0,0 +1,7 @@ + +require 'ole/support' + +module Ole # :nodoc: + Log = Logger.new_with_callstack +end + diff --git a/vendor/ruby-ole/lib/ole/file_system.rb b/vendor/ruby-ole/lib/ole/file_system.rb new file mode 100644 index 000000000..24d330a92 --- /dev/null +++ b/vendor/ruby-ole/lib/ole/file_system.rb @@ -0,0 +1,2 @@ +# keeping this file around for now, but will delete later on... +require 'ole/storage/file_system' diff --git a/vendor/ruby-ole/lib/ole/ranges_io.rb b/vendor/ruby-ole/lib/ole/ranges_io.rb new file mode 100644 index 000000000..bfca4fe09 --- /dev/null +++ b/vendor/ruby-ole/lib/ole/ranges_io.rb @@ -0,0 +1,231 @@ +# need IO::Mode +require 'ole/support' + +# +# = Introduction +# +# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder +# slices of the input file by providing a list of ranges. Intended as an initial measure to curb +# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with +# no method to stream it. +# +# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file +# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just +# getting 16 bytes doesn't read the whole thing). +# +# In the simplest case it can be used with a single range to provide a limited io to a section of +# a file. +# +# = Limitations +# +# * No buffering. by design at the moment. Intended for large reads +# +# = TODO +# +# On further reflection, this class is something of a joining/optimization of +# two separate IO classes. a SubfileIO, for providing access to a range within +# a File as a separate IO object, and a ConcatIO, allowing the presentation of +# a bunch of io objects as a single unified whole. +# +# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will +# convert a whole mime message into an IO stream, that can be read from. +# It will just be the concatenation of a series of IO objects, corresponding to +# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the +# original message proper, or RangesIO as provided by the Attachment#data, that +# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the- +# fly. Thus the attachment, in its plain or encoded form, and the message as a +# whole never exists as a single string in memory, as it does now. This is a +# fair bit of work to achieve, but generally useful I believe. +# +# This class isn't ole specific, maybe move it to my general ruby stream project. +# +class RangesIO + attr_reader :io, :mode, :ranges, :size, :pos + # +io+:: the parent io object that we are wrapping. + # +mode+:: the mode to use + # +params+:: hash of params. + # * :ranges - byte offsets, either: + # 1. an array of ranges [1..2, 4..5, 6..8] or + # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above + # (think the way String indexing works) + # * :close_parent - boolean to close parent when this object is closed + # + # NOTE: the +ranges+ can overlap. + def initialize io, mode='r', params={} + mode, params = 'r', mode if Hash === mode + ranges = params[:ranges] + @params = {:close_parent => false}.merge params + @mode = IO::Mode.new mode + @io = io + # convert ranges to arrays. check for negative ranges? + ranges ||= [0, io.size] + @ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r } + # calculate size + @size = @ranges.inject(0) { |total, (pos, len)| total + len } + # initial position in the file + @pos = 0 + + # handle some mode flags + truncate 0 if @mode.truncate? + seek size if @mode.append? + end + +#IOError: closed stream +# get this for reading, writing, everything... +#IOError: not opened for writing + + # add block form. TODO add test for this + def self.open(*args, &block) + ranges_io = new(*args) + if block_given? + begin; yield ranges_io + ensure; ranges_io.close + end + else + ranges_io + end + end + + def pos= pos, whence=IO::SEEK_SET + case whence + when IO::SEEK_SET + when IO::SEEK_CUR + pos += @pos + when IO::SEEK_END + pos = @size + pos + else raise Errno::EINVAL + end + raise Errno::EINVAL unless (0...@size) === pos + @pos = pos + end + + alias seek :pos= + alias tell :pos + + def close + @io.close if @params[:close_parent] + end + + # returns the [+offset+, +size+], pair inorder to read/write at +pos+ + # (like a partial range), and its index. + def offset_and_size pos + total = 0 + ranges.each_with_index do |(offset, size), i| + if pos <= total + size + diff = pos - total + return [offset + diff, size - diff], i + end + total += size + end + # should be impossible for any valid pos, (0...size) === pos + raise ArgumentError, "no range for pos #{pos.inspect}" + end + + def eof? + @pos == @size + end + + # read bytes from file, to a maximum of +limit+, or all available if unspecified. + def read limit=nil + data = '' + return data if eof? + limit ||= size + partial_range, i = offset_and_size @pos + # this may be conceptually nice (create sub-range starting where we are), but + # for a large range array its pretty wasteful. even the previous way was. but + # i'm not trying to optimize this atm. it may even go to c later if necessary. + ([partial_range] + ranges[i+1..-1]).each do |pos, len| + @io.seek pos + if limit < len + # convoluted, to handle read errors. s may be nil + s = @io.read limit + @pos += s.length if s + break data << s + end + # convoluted, to handle ranges beyond the size of the file + s = @io.read len + @pos += s.length if s + data << s + break if s.length != len + limit -= len + end + data + end + + # you may override this call to update @ranges and @size, if applicable. + def truncate size + raise NotImplementedError, 'truncate not supported' + end + + # using explicit forward instead of an alias now for overriding. + # should override truncate. + def size= size + truncate size + end + + def write data + # short cut. needed because truncate 0 may return no ranges, instead of empty range, + # thus offset_and_size fails. + return 0 if data.empty? + data_pos = 0 + # if we don't have room, we can use the truncate hook to make more space. + if data.length > @size - @pos + begin + truncate @pos + data.length + rescue NotImplementedError + raise IOError, "unable to grow #{inspect} to write #{data.length} bytes" + end + end + partial_range, i = offset_and_size @pos + ([partial_range] + ranges[i+1..-1]).each do |pos, len| + @io.seek pos + if data_pos + len > data.length + chunk = data[data_pos..-1] + @io.write chunk + @pos += chunk.length + data_pos = data.length + break + end + @io.write data[data_pos, len] + @pos += len + data_pos += len + end + data_pos + end + + alias << write + + # i can wrap it in a buffered io stream that + # provides gets, and appropriately handle pos, + # truncate. mostly added just to past the tests. + # FIXME + def gets + s = read 1024 + i = s.index "\n" + @pos -= s.length - (i+1) + s[0..i] + end + alias readline :gets + + def inspect + # the rescue is for empty files + pos, len = (@ranges[offset_and_size(@pos).last] rescue [nil, nil]) + range_str = pos ? "#{pos}..#{pos+len}" : 'nil' + "#<#{self.class} io=#{io.inspect}, size=#@size, pos=#@pos, "\ + "range=#{range_str}>" + end +end + +# this subclass of ranges io explicitly ignores the truncate part of 'w' modes. +# only really needed for the allocation table writes etc. maybe just use explicit modes +# for those +# better yet write a test that breaks before I fix it. added nodoc for the +# time being. +class RangesIONonResizeable < RangesIO # :nodoc: + def initialize io, mode='r', params={} + mode, params = 'r', mode if Hash === mode + flags = IO::Mode.new(mode).flags & ~IO::TRUNC + super io, flags, params + end +end + diff --git a/vendor/ruby-ole/lib/ole/storage.rb b/vendor/ruby-ole/lib/ole/storage.rb new file mode 100644 index 000000000..02e851df7 --- /dev/null +++ b/vendor/ruby-ole/lib/ole/storage.rb @@ -0,0 +1,3 @@ +require 'ole/storage/base' +require 'ole/storage/file_system' +require 'ole/storage/meta_data' diff --git a/vendor/ruby-ole/lib/ole/storage/base.rb b/vendor/ruby-ole/lib/ole/storage/base.rb new file mode 100755 index 000000000..3c41b21a2 --- /dev/null +++ b/vendor/ruby-ole/lib/ole/storage/base.rb @@ -0,0 +1,916 @@ +require 'tempfile' + +require 'ole/base' +require 'ole/types' +require 'ole/ranges_io' + +module Ole # :nodoc: + # + # This class is the primary way the user interacts with an OLE storage file. + # + # = TODO + # + # * the custom header cruft for Header and Dirent needs some love. + # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent, + # and, in a manner of speaking, but arguably different, Storage itself. + # they have differing api's which would be nice to rethink. + # AllocationTable::Big must be created aot now, as it is used for all subsequent reads. + # + class Storage + # thrown for any bogus OLE file errors. + class FormatError < StandardError # :nodoc: + end + + VERSION = '1.2.8.2' + + # options used at creation time + attr_reader :params + # The top of the ole tree structure + attr_reader :root + # The tree structure in its original flattened form. only valid after #load, or #flush. + attr_reader :dirents + # The underlying io object to/from which the ole object is serialized, whether we + # should close it, and whether it is writeable + attr_reader :io, :close_parent, :writeable + # Low level internals, you probably shouldn't need to mess with these + attr_reader :header, :bbat, :sbat, :sb_file + + # +arg+ should be either a filename, or an +IO+ object, and needs to be seekable. + # +mode+ is optional, and should be a regular mode string. + def initialize arg, mode=nil, params={} + params, mode = mode, nil if Hash === mode + params = {:update_timestamps => true}.merge(params) + @params = params + + # get the io object + @close_parent, @io = if String === arg + mode ||= 'rb' + [true, open(arg, mode)] + else + raise ArgumentError, 'unable to specify mode string with io object' if mode + [false, arg] + end + # do we have this file opened for writing? don't know of a better way to tell + # (unless we parse the mode string in the open case) + # hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more + # reason to use mode string parsing when available, and fall back to something like + # io.writeable? otherwise. + @writeable = begin + if mode + IO::Mode.new(mode).writeable? + else + @io.flush + # this is for the benefit of ruby-1.9 + @io.syswrite('') if @io.respond_to?(:syswrite) + true + end + rescue IOError + false + end + # silence undefined warning in clear + @sb_file = nil + # if the io object has data, we should load it, otherwise start afresh + # this should be based on the mode string rather. + @io.size > 0 ? load : clear + end + + # somewhat similar to File.open, the open class method allows a block form where + # the Ole::Storage object is automatically closed on completion of the block. + def self.open arg, mode=nil, params={} + ole = new arg, mode, params + if block_given? + begin yield ole + ensure; ole.close + end + else ole + end + end + + # load document from file. + # + # TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :) + # + # 1. reterminate any chain not ending in EOC. + # compare file size with actually allocated blocks per file. + # 2. pass through all chain heads looking for collisions, and making sure nothing points to them + # (ie they are really heads). in both sbat and mbat + # 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks + # in the bat for them. + # 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size, + # (eg what is used for truncate in #flush), then maybe add some sort of message about that. it + # will be automatically thrown away at close time. + def load + # we always read 512 for the header block. if the block size ends up being different, + # what happens to the 109 fat entries. are there more/less entries? + @io.rewind + header_block = @io.read 512 + @header = Header.new header_block + + # create an empty bbat. + @bbat = AllocationTable::Big.new self + bbat_chain = header_block[Header::SIZE..-1].unpack 'V*' + mbat_block = @header.mbat_start + @header.num_mbat.times do + blocks = @bbat.read([mbat_block]).unpack 'V*' + mbat_block = blocks.pop + bbat_chain += blocks + end + # am i using num_bat in the right way? + @bbat.load @bbat.read(bbat_chain[0, @header.num_bat]) + + # get block chain for directories, read it, then split it into chunks and load the + # directory entries. semantics changed - used to cut at first dir where dir.type == 0 + @dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE). + map { |str| Dirent.new self, str }.reject { |d| d.type_id == 0 } + + # now reorder from flat into a tree + # links are stored in some kind of balanced binary tree + # check that everything is visited at least, and at most once + # similarly with the blocks of the file. + # was thinking of moving this to Dirent.to_tree instead. + class << @dirents + def to_tree idx=0 + return [] if idx == Dirent::EOT + d = self[idx] + d.children = to_tree d.child + raise FormatError, "directory #{d.inspect} used twice" if d.idx + d.idx = idx + to_tree(d.prev) + [d] + to_tree(d.next) + end + end + + @root = @dirents.to_tree.first + Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry' + unused = @dirents.reject(&:idx).length + Log.warn "#{unused} unused directories" if unused > 0 + + # FIXME i don't currently use @header.num_sbat which i should + # hmm. nor do i write it. it means what exactly again? + # which mode to use here? + @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size + @sbat = AllocationTable::Small.new self + @sbat.load @bbat.read(@header.sbat_start) + end + + def close + @sb_file.close + flush if @writeable + @io.close if @close_parent + end + + # the flush method is the main "save" method. all file contents are always + # written directly to the file by the RangesIO objects, all this method does + # is write out all the file meta data - dirents, allocation tables, file header + # etc. + # + # maybe add an option to zero the padding, and any remaining avail blocks in the + # allocation table. + # + # TODO: long and overly complex. simplify and test better. eg, perhaps move serialization + # of bbat to AllocationTable::Big. + def flush + # update root dirent, and flatten dirent tree + @root.name = 'Root Entry' + @root.first_block = @sb_file.first_block + @root.size = @sb_file.size + @dirents = @root.flatten + + # serialize the dirents using the bbat + RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io| + @dirents.each { |dirent| io.write dirent.to_s } + padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size + io.write 0.chr * padding + @header.dirent_start = io.first_block + end + + # serialize the sbat + # perhaps the blocks used by the sbat should be marked with BAT? + RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io| + io.write @sbat.to_s + @header.sbat_start = io.first_block + @header.num_sbat = @bbat.chain(@header.sbat_start).length + end + + # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using + # truncate. then when its time to write, convert that chain and some chunk of blocks at + # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its + # done. + # this is perhaps not good, as we reclaim all bat blocks here, which + # may include the sbat we just wrote. FIXME + @bbat.map! do |b| + b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b + end + + # currently we use a loop. this could be better, but basically, + # the act of writing out the bat, itself requires blocks which get + # recorded in the bat. + # + # i'm sure that there'd be some simpler closed form solution to this. solve + # recursive func: + # + # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0)) + # bbat_len = initial_bbat_len + num_mbat_blocks + # mbat_len = ceil(bbat_len * 4 / block_size) + # + # the actual bbat allocation table is itself stored throughout the file, and that chain + # is stored in the initial blocks, and the mbat blocks. + num_mbat_blocks = 0 + io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC + # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a + # contiguous chunk at the end. + # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you + # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can + # be fixed easily, add an io truncate + @bbat.truncate! + before = @io.size + @io.truncate @bbat.block_size * (@bbat.length + 1) + while true + # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of + # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration + # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the + # mbat must remain contiguous. + bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size + # now storing the excess mbat blocks also increases the size of the bbat: + new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil + if new_num_mbat_blocks != num_mbat_blocks + # need more space for the mbat. + num_mbat_blocks = new_num_mbat_blocks + elsif io.size != bbat_data_len + # need more space for the bat + # this may grow the bbat, depending on existing available blocks + io.truncate bbat_data_len + else + break + end + end + + # now extract the info we want: + ranges = io.ranges + bbat_chain = @bbat.chain io.first_block + io.close + bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT } + # tack on the mbat stuff + @header.num_bat = bbat_chain.length + mbat_blocks = (0...num_mbat_blocks).map do + block = @bbat.free_block + @bbat[block] = AllocationTable::META_BAT + block + end + @header.mbat_start = mbat_blocks.first || AllocationTable::EOC + + # now finally write the bbat, using a not resizable io. + # the mode here will be 'r', which allows write atm. + RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s } + + # this is the mbat. pad it out. + bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max + @header.num_mbat = num_mbat_blocks + if num_mbat_blocks != 0 + # write out the mbat blocks now. first of all, where are they going to be? + mbat_data = bbat_chain[109..-1] + # expand the mbat_data to include the linked list forward pointers. + mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a. + zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a } + # pad out the last one. + mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length))) + RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f| + f.write mbat_data.flatten.pack('V*') + end + end + + # now seek back and write the header out + @io.seek 0 + @io.write @header.to_s + bbat_chain[0, 109].pack('V*') + @io.flush + end + + def clear + # initialize to equivalent of loading an empty ole document. + Log.warn 'creating new ole storage object on non-writable io' unless @writeable + @header = Header.new + @bbat = AllocationTable::Big.new self + @root = Dirent.new self, :type => :root, :name => 'Root Entry' + @dirents = [@root] + @root.idx = 0 + @sb_file.close if @sb_file + @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC + @sbat = AllocationTable::Small.new self + # throw everything else the hell away + @io.truncate 0 + end + + # could be useful with mis-behaving ole documents. or to just clean them up. + def repack temp=:file + case temp + when :file + Tempfile.open 'ole-repack' do |io| + io.binmode + repack_using_io io + end + when :mem; StringIO.open('', &method(:repack_using_io)) + else raise ArgumentError, "unknown temp backing #{temp.inspect}" + end + end + + def repack_using_io temp_io + @io.rewind + IO.copy @io, temp_io + clear + Storage.open temp_io, nil, @params do |temp_ole| + #temp_ole.root.type = :dir + Dirent.copy temp_ole.root, root + end + end + + def bat_for_size size + # note >=, not > previously. + size >= @header.threshold ? @bbat : @sbat + end + + def inspect + "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>" + end + + # + # A class which wraps the ole header + # + # Header.new can be both used to load from a string, or to create from + # defaults. Serialization is accomplished with the #to_s method. + # + class Header < Struct.new( + :magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift, + :reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold, + :sbat_start, :num_sbat, :mbat_start, :num_mbat + ) + PACK = 'a8 a16 v2 a2 v2 a6 V3 a4 V5' + SIZE = 0x4c + # i have seen it pointed out that the first 4 bytes of hex, + # 0xd0cf11e0, is supposed to spell out docfile. hmmm :) + MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic + # what you get if creating new header from scratch. + # AllocationTable::EOC isn't available yet. meh. + EOC = 0xfffffffe + DEFAULT = [ + MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6, + 0.chr * 6, 0, 1, EOC, 0.chr * 4, + 4096, EOC, 0, EOC, 0 + ] + + def initialize values=DEFAULT + values = values.unpack(PACK) if String === values + super(*values) + validate! + end + + def to_s + to_a.pack PACK + end + + def validate! + raise FormatError, "OLE2 signature is invalid" unless magic == MAGIC + if num_bat == 0 or # is that valid for a completely empty file? + # not sure about this one. basically to do max possible bat given size of mbat + num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or + # shouldn't need to use the mbat as there is enough space in the header block + num_bat < 109 && num_mbat != 0 or + # given the size of the header is 76, if b_shift <= 6, blocks address the header. + s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or + # we only handle little endian + byte_order != "\xfe\xff" + raise FormatError, "not valid OLE2 structured storage file" + end + # relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had + # 3 for this value. + # transacting_signature != "\x00" * 4 or + if threshold != 4096 or + num_mbat == 0 && mbat_start != AllocationTable::EOC or + reserved != "\x00" * 6 + Log.warn "may not be a valid OLE2 structured storage file" + end + true + end + end + + # + # +AllocationTable+'s hold the chains corresponding to files. Given + # an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning + # the blocks that make up that file. + # + # There are 2 allocation tables, the bbat, and sbat, for big and small + # blocks respectively. The block chain should be loaded using either + # <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt> + # as appropriate. + # + # Whether or not big or small blocks are used for a file depends on + # whether its size is over the <tt>Header#threshold</tt> level. + # + # An <tt>Ole::Storage</tt> document is serialized as a series of directory objects, + # which are stored in blocks throughout the file. The blocks are either + # big or small, and are accessed using the <tt>AllocationTable</tt>. + # + # The bbat allocation table's data is stored in the spare room in the header + # block, and in extra blocks throughout the file as referenced by the meta + # bat. That chain is linear, as there is no higher level table. + # + # AllocationTable.new is used to create an empty table. It can parse a string + # with the #load method. Serialization is accomplished with the #to_s method. + # + class AllocationTable < Array + # a free block (I don't currently leave any blocks free), although I do pad out + # the allocation table with AVAIL to the block size. + AVAIL = 0xffffffff + EOC = 0xfffffffe # end of a chain + # these blocks are used for storing the allocation table chains + BAT = 0xfffffffd + META_BAT = 0xfffffffc + + attr_reader :ole, :io, :block_size + def initialize ole + @ole = ole + @sparse = true + super() + end + + def load data + replace data.unpack('V*') + end + + def truncate + # this strips trailing AVAILs. come to think of it, this has the potential to break + # bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is + # very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC + # at load time. + temp = reverse + not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1] + temp.reverse + end + + def truncate! + replace truncate + end + + def to_s + table = truncate + # pad it out some + num = @ole.bbat.block_size / 4 + # do you really use AVAIL? they probably extend past end of file, and may shortly + # be used for the bat. not really good. + table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0 + table.pack 'V*' + end + + # rewrote this to be non-recursive as it broke on a large attachment + # chain with a stack error + def chain idx + a = [] + until idx >= META_BAT + raise FormatError, "broken allocationtable chain" if idx < 0 || idx > length + a << idx + idx = self[idx] + end + Log.warn "invalid chain terminator #{idx}" unless idx == EOC + a + end + + # Turn a chain (an array given by +chain+) of blocks (optionally + # truncated to +size+) into an array of arrays describing the stretches of + # bytes in the file that it belongs to. + # + # The blocks are Big or Small blocks depending on the table type. + def blocks_to_ranges chain, size=nil + # truncate the chain if required + chain = chain[0...(size.to_f / block_size).ceil] if size + # convert chain to ranges of the block size + ranges = chain.map { |i| [block_size * i, block_size] } + # truncate final range if required + ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size + ranges + end + + def ranges chain, size=nil + chain = self.chain(chain) unless Array === chain + blocks_to_ranges chain, size + end + + # quick shortcut. chain can be either a head (in which case the table is used to + # turn it into a chain), or a chain. it is converted to ranges, then to rangesio. + def open chain, size=nil, &block + RangesIO.open @io, :ranges => ranges(chain, size), &block + end + + def read chain, size=nil + open chain, size, &:read + end + + # catch any method that may add an AVAIL somewhere in the middle, thus invalidating + # the @sparse speedup for free_block. annoying using eval, but define_method won't + # work for this. + # FIXME + [:map!, :collect!].each do |name| + eval <<-END + def #{name}(*args, &block) + @sparse = true + super + end + END + end + + def []= idx, val + @sparse = true if val == AVAIL + super + end + + def free_block + if @sparse + i = index(AVAIL) and return i + end + @sparse = false + push AVAIL + length - 1 + end + + # must return first_block. modifies +blocks+ in place + def resize_chain blocks, size + new_num_blocks = (size / block_size.to_f).ceil + old_num_blocks = blocks.length + if new_num_blocks < old_num_blocks + # de-allocate some of our old blocks. TODO maybe zero them out in the file??? + (new_num_blocks...old_num_blocks).each { |i| self[blocks[i]] = AVAIL } + self[blocks[new_num_blocks-1]] = EOC if new_num_blocks > 0 + blocks.slice! new_num_blocks..-1 + elsif new_num_blocks > old_num_blocks + # need some more blocks. + last_block = blocks.last + (new_num_blocks - old_num_blocks).times do + block = free_block + # connect the chain. handle corner case of blocks being [] initially + self[last_block] = block if last_block + blocks << block + last_block = block + self[last_block] = EOC + end + end + # update ranges, and return that also now + blocks + end + + class Big < AllocationTable + def initialize(*args) + super + @block_size = 1 << @ole.header.b_shift + @io = @ole.io + end + + # Big blocks are kind of -1 based, in order to not clash with the header. + def blocks_to_ranges blocks, size + super blocks.map { |b| b + 1 }, size + end + end + + class Small < AllocationTable + def initialize(*args) + super + @block_size = 1 << @ole.header.s_shift + @io = @ole.sb_file + end + end + end + + # like normal RangesIO, but Ole::Storage specific. the ranges are backed by an + # AllocationTable, and can be resized. used for read/write to 2 streams: + # 1. serialized dirent data + # 2. sbat table data + # 3. all dirents but through RangesIOMigrateable below + # + # Note that all internal access to first_block is through accessors, as it is sometimes + # useful to redirect it. + class RangesIOResizeable < RangesIO + attr_reader :bat + attr_accessor :first_block + def initialize bat, mode='r', params={} + mode, params = 'r', mode if Hash === mode + first_block, size = params.values_at :first_block, :size + raise ArgumentError, 'must specify first_block' unless first_block + @bat = bat + self.first_block = first_block + # we now cache the blocks chain, for faster resizing. + @blocks = @bat.chain first_block + super @bat.io, mode, :ranges => @bat.ranges(@blocks, size) + end + + def truncate size + # note that old_blocks is != @ranges.length necessarily. i'm planning to write a + # merge_ranges function that merges sequential ranges into one as an optimization. + @bat.resize_chain @blocks, size + @ranges = @bat.ranges @blocks, size + @pos = @size if @pos > size + self.first_block = @blocks.empty? ? AllocationTable::EOC : @blocks.first + + # don't know if this is required, but we explicitly request our @io to grow if necessary + # we never shrink it though. maybe this belongs in allocationtable, where smarter decisions + # can be made. + # maybe its ok to just seek out there later?? + max = @ranges.map { |pos, len| pos + len }.max || 0 + @io.truncate max if max > @io.size + + @size = size + end + end + + # like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration + # between bats based on size, and updating the dirent. + class RangesIOMigrateable < RangesIOResizeable + attr_reader :dirent + def initialize dirent, mode='r' + @dirent = dirent + super @dirent.ole.bat_for_size(@dirent.size), mode, + :first_block => @dirent.first_block, :size => @dirent.size + end + + def truncate size + bat = @dirent.ole.bat_for_size size + if bat.class != @bat.class + # bat migration needed! we need to backup some data. the amount of data + # should be <= @ole.header.threshold, so we can just hold it all in one buffer. + # backup this + pos = @pos + @pos = 0 + keep = read [@size, size].min + # this does a normal truncate to 0, removing our presence from the old bat, and + # rewrite the dirent's first_block + super 0 + @bat = bat + # just change the underlying io from right under everyone :) + @io = bat.io + # important to do this now, before the write. as the below write will always + # migrate us back to sbat! this will now allocate us +size+ in the new bat. + super + @pos = 0 + write keep + @pos = pos + else + super + end + # now just update the file + @dirent.size = size + end + + # forward this to the dirent + def first_block + @dirent.first_block + end + + def first_block= val + @dirent.first_block = val + end + end + + # + # A class which wraps an ole directory entry. Can be either a directory + # (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>) + # + # Most interaction with <tt>Ole::Storage</tt> is through this class. + # The 2 most important functions are <tt>Dirent#children</tt>, and + # <tt>Dirent#data</tt>. + # + # was considering separate classes for dirs and files. some methods/attrs only + # applicable to one or the other. + # + # As with the other classes, #to_s performs the serialization. + # + class Dirent < Struct.new( + :name_utf16, :name_len, :type_id, :colour, :prev, :next, :child, + :clsid, :flags, # dirs only + :create_time_str, :modify_time_str, # files only + :first_block, :size, :reserved + ) + include RecursivelyEnumerable + + PACK = 'a64 v C C V3 a16 V a8 a8 V2 a4' + SIZE = 128 + TYPE_MAP = { + # this is temporary + 0 => :empty, + 1 => :dir, + 2 => :file, + 5 => :root + } + # something to do with the fact that the tree is supposed to be red-black + COLOUR_MAP = { + 0 => :red, + 1 => :black + } + # used in the next / prev / child stuff to show that the tree ends here. + # also used for first_block for directory. + EOT = 0xffffffff + DEFAULT = [ + 0.chr * 2, 2, 0, # will get overwritten + 1, EOT, EOT, EOT, + 0.chr * 16, 0, nil, nil, + AllocationTable::EOC, 0, 0.chr * 4 + ] + + # i think its just used by the tree building + attr_accessor :idx + # This returns all the children of this +Dirent+. It is filled in + # when the tree structure is recreated. + attr_accessor :children + attr_accessor :name + attr_reader :ole, :type, :create_time, :modify_time + def initialize ole, values=DEFAULT, params={} + @ole = ole + values, params = DEFAULT, values if Hash === values + values = values.unpack(PACK) if String === values + super(*values) + + # extra parsing from the actual struct values + @name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len]) + @type = if params[:type] + unless TYPE_MAP.values.include?(params[:type]) + raise ArgumentError, "unknown type #{params[:type].inspect}" + end + params[:type] + else + TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}" + end + + # further extra type specific stuff + if file? + default_time = @ole.params[:update_timestamps] ? Time.now : nil + @create_time ||= default_time + @modify_time ||= default_time + @create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str + @modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str + @children = nil + else + @create_time = nil + @modify_time = nil + self.size = 0 unless @type == :root + @children = [] + end + + # to silence warnings. used for tree building at load time + # only. + @idx = nil + end + + def open mode='r' + raise Errno::EISDIR unless file? + io = RangesIOMigrateable.new self, mode + # TODO work on the mode string stuff a bit more. + # maybe let the io object know about the mode, so it can refuse + # to work for read/write appropriately. maybe redefine all unusable + # methods using singleton class to throw errors. + # for now, i just want to implement truncation on use of 'w'. later, + # i need to do 'a' etc. + case mode + when 'r', 'r+' + # as i don't enforce reading/writing, nothing changes here. kind of + # need to enforce tt if i want modify times to work better. + @modify_time = Time.now if mode == 'r+' + when 'w' + @modify_time = Time.now + # io.truncate 0 + #else + # raise NotImplementedError, "unsupported mode - #{mode.inspect}" + end + if block_given? + begin yield io + ensure; io.close + end + else io + end + end + + def read limit=nil + open { |io| io.read limit } + end + + def file? + type == :file + end + + def dir? + # to count root as a dir. + !file? + end + + # maybe need some options regarding case sensitivity. + def / name + children.find { |child| name === child.name } + end + + def [] idx + if String === idx + #warn 'String form of Dirent#[] is deprecated' + self / idx + else + super + end + end + + # move to ruby-msg. and remove from here + def time + #warn 'Dirent#time is deprecated' + create_time || modify_time + end + + def each_child(&block) + @children.each(&block) + end + + # flattens the tree starting from here into +dirents+. note it modifies its argument. + def flatten dirents=[] + @idx = dirents.length + dirents << self + if file? + self.prev = self.next = self.child = EOT + else + children.each { |child| child.flatten dirents } + self.child = Dirent.flatten_helper children + end + dirents + end + + # i think making the tree structure optimized is actually more complex than this, and + # requires some intelligent ordering of the children based on names, but as long as + # it is valid its ok. + # actually, i think its ok. gsf for example only outputs a singly-linked-list, where + # prev is always EOT. + def self.flatten_helper children + return EOT if children.empty? + i = children.length / 2 + this = children[i] + this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] } + this.idx + end + + def to_s + tmp = Types::Variant.dump(Types::VT_LPWSTR, name) + tmp = tmp[0, 62] if tmp.length > 62 + tmp += 0.chr * 2 + self.name_len = tmp.length + self.name_utf16 = tmp + 0.chr * (64 - tmp.length) + # type_id can perhaps be set in the initializer, as its read only now. + self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first + # for the case of files, it is assumed that that was handled already + # note not dir?, so as not to override root's first_block + self.first_block = Dirent::EOT if type == :dir + if file? + # this is messed up. it changes the time stamps regardless of whether the file + # was actually touched. instead, any open call with a writeable mode, should update + # the modify time. create time would be set in new. + if @ole.params[:update_timestamps] + self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time + self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time + end + else + self.create_time_str = 0.chr * 8 + self.modify_time_str = 0.chr * 8 + end + to_a.pack PACK + end + + def inspect + str = "#<Dirent:#{name.inspect}" + # perhaps i should remove the data snippet. its not that useful anymore. + # there is also some dir specific stuff. like clsid, flags, that i should + # probably include + if file? + tmp = read 9 + data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp + str << " size=#{size}" + + "#{modify_time ? ' modify_time=' + modify_time.to_s.inspect : nil}" + + " data=#{data.inspect}" + end + str + '>' + end + + def delete child + # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone + raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child + # free our blocks + child.open { |io| io.truncate 0 } + end + + def self.copy src, dst + # copies the contents of src to dst. must be the same type. this will throw an + # error on copying to root. maybe this will recurse too much for big documents?? + raise ArgumentError, 'differing types' if src.file? and !dst.file? + dst.name = src.name + if src.dir? + src.children.each do |src_child| + dst_child = Dirent.new dst.ole, :type => src_child.type + dst.children << dst_child + Dirent.copy src_child, dst_child + end + else + src.open do |src_io| + dst.open { |dst_io| IO.copy src_io, dst_io } + end + end + end + end + end +end + diff --git a/vendor/ruby-ole/lib/ole/storage/file_system.rb b/vendor/ruby-ole/lib/ole/storage/file_system.rb new file mode 100644 index 000000000..531f1ba11 --- /dev/null +++ b/vendor/ruby-ole/lib/ole/storage/file_system.rb @@ -0,0 +1,423 @@ +# +# = Introduction +# +# This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>. +# +# = TODO +# +# - need to implement some more IO functions on RangesIO, like #puts, #print +# etc, like AbstractOutputStream from zipfile. +# +# - check Dir.mkdir, and File.open, and File.rename, to add in filename +# length checks (max 32 / 31 or something). +# do the automatic truncation, and add in any necessary warnings. +# +# - File.split('a/') == File.split('a') == ['.', 'a'] +# the implication of this, is that things that try to force directory +# don't work. like, File.rename('a', 'b'), should work if a is a file +# or directory, but File.rename('a/', 'b') should only work if a is +# a directory. tricky, need to clean things up a bit more. +# i think a general path name => dirent method would work, with flags +# about what should raise an error. +# +# - Need to look at streamlining things after getting all the tests passing, +# as this file's getting pretty long - almost half the real implementation. +# and is probably more inefficient than necessary. +# too many exceptions in the expected path of certain functions. +# +# - should look at profiles before and after switching ruby-msg to use +# the filesystem api. +# + +require 'ole/storage' + +module Ole # :nodoc: + class Storage + def file + @file ||= FileClass.new self + end + + def dir + @dir ||= DirClass.new self + end + + # tries to get a dirent for path. return nil if it doesn't exist + # (change it) + def dirent_from_path path + dirent = @root + path = file.expand_path path + path = path.sub(/^\/*/, '').sub(/\/*$/, '').split(/\/+/) + until path.empty? + return nil if dirent.file? + return nil unless dirent = dirent/path.shift + end + dirent + end + + class FileClass + class Stat + attr_reader :ftype, :size, :blocks, :blksize + attr_reader :nlink, :uid, :gid, :dev, :rdev, :ino + def initialize dirent + @dirent = dirent + @size = dirent.size + if file? + @ftype = 'file' + bat = dirent.ole.bat_for_size(dirent.size) + @blocks = bat.chain(dirent.first_block).length + @blksize = bat.block_size + else + @ftype = 'directory' + @blocks = 0 + @blksize = 0 + end + # a lot of these are bogus. ole file format has no analogs + @nlink = 1 + @uid, @gid = 0, 0 + @dev, @rdev = 0, 0 + @ino = 0 + # need to add times - atime, mtime, ctime. + end + + alias rdev_major :rdev + alias rdev_minor :rdev + + def file? + @dirent.file? + end + + def directory? + @dirent.dir? + end + + def size? + size if file? + end + + def inspect + pairs = (instance_variables - ['@dirent']).map do |n| + "#{n[1..-1]}=#{instance_variable_get n}" + end + "#<#{self.class} #{pairs * ', '}>" + end + end + + def initialize ole + @ole = ole + end + + def expand_path path + # get the raw stored pwd value (its blank for root) + pwd = @ole.dir.instance_variable_get :@pwd + # its only absolute if it starts with a '/' + path = "#{pwd}/#{path}" unless path =~ /^\// + # at this point its already absolute. we use File.expand_path + # just for the .. and . handling + # No longer use RUBY_PLATFORM =~ /win/ as it matches darwin. better way? + File.expand_path(path)[File::ALT_SEPARATOR == "\\" ? (2..-1) : (0..-1)] + end + + # +orig_path+ is just so that we can use the requested path + # in the error messages even if it has been already modified + def dirent_from_path path, orig_path=nil + orig_path ||= path + dirent = @ole.dirent_from_path path + raise Errno::ENOENT, orig_path unless dirent + raise Errno::EISDIR, orig_path if dirent.dir? + dirent + end + private :dirent_from_path + + def exists? path + !!@ole.dirent_from_path(path) + end + alias exist? :exists? + + def file? path + dirent = @ole.dirent_from_path path + dirent and dirent.file? + end + + def directory? path + dirent = @ole.dirent_from_path path + dirent and dirent.dir? + end + + def open path, mode='r', &block + if IO::Mode.new(mode).create? + begin + dirent = dirent_from_path path + rescue Errno::ENOENT + # maybe instead of repeating this everywhere, i should have + # a get_parent_dirent function. + parent_path, basename = File.split expand_path(path) + parent = @ole.dir.send :dirent_from_path, parent_path, path + parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename) + end + else + dirent = dirent_from_path path + end + dirent.open mode, &block + end + + # explicit wrapper instead of alias to inhibit block + def new path, mode='r' + open path, mode + end + + def size path + dirent_from_path(path).size + rescue Errno::EISDIR + # kind of arbitrary. I'm getting 4096 from ::File, but + # the zip tests want 0. + 0 + end + + def size? path + dirent_from_path(path).size + # any other exceptions i need to rescue? + rescue Errno::ENOENT, Errno::EISDIR + nil + end + + def stat path + # we do this to allow dirs. + dirent = @ole.dirent_from_path path + raise Errno::ENOENT, path unless dirent + Stat.new dirent + end + + def read path + open path, &:read + end + + # most of the work this function does is moving the dirent between + # 2 parents. the actual name changing is quite simple. + # File.rename can move a file into another folder, which is why i've + # done it too, though i think its not always possible... + # + # FIXME File.rename can be used for directories too.... + def rename from_path, to_path + # check what we want to rename from exists. do it this + # way to allow directories. + dirent = @ole.dirent_from_path from_path + raise Errno::ENOENT, from_path unless dirent + # delete what we want to rename to if necessary + begin + unlink to_path + rescue Errno::ENOENT + # we actually get here, but rcov doesn't think so. add 1 + 1 to + # keep rcov happy for now... :) + 1 + 1 + end + # reparent the dirent + from_parent_path, from_basename = File.split expand_path(from_path) + to_parent_path, to_basename = File.split expand_path(to_path) + from_parent = @ole.dir.send :dirent_from_path, from_parent_path, from_path + to_parent = @ole.dir.send :dirent_from_path, to_parent_path, to_path + from_parent.children.delete dirent + # and also change its name + dirent.name = to_basename + to_parent.children << dirent + 0 + end + + # crappy copy from Dir. + def unlink(*paths) + paths.each do |path| + dirent = @ole.dirent_from_path path + # i think we should free all of our blocks from the + # allocation table. + # i think if you run repack, all free blocks should get zeroed, + # but currently the original data is there unmodified. + open(path) { |f| f.truncate 0 } + # remove ourself from our parent, so we won't be part of the dir + # tree at save time. + parent_path, basename = File.split expand_path(path) + parent = @ole.dir.send :dirent_from_path, parent_path, path + parent.children.delete dirent + end + paths.length # hmmm. as per ::File ? + end + alias delete :unlink + end + + # + # an *instance* of this class is supposed to provide similar methods + # to the class methods of Dir itself. + # + # pretty complete. like zip/zipfilesystem's implementation, i provide + # everything except chroot and glob. glob could be done with a glob + # to regex regex, and then simply match in the entries array... although + # recursive glob complicates that somewhat. + # + # Dir.chroot, Dir.glob, Dir.[], and Dir.tmpdir is the complete list. + class DirClass + def initialize ole + @ole = ole + @pwd = '' + end + + # +orig_path+ is just so that we can use the requested path + # in the error messages even if it has been already modified + def dirent_from_path path, orig_path=nil + orig_path ||= path + dirent = @ole.dirent_from_path path + raise Errno::ENOENT, orig_path unless dirent + raise Errno::ENOTDIR, orig_path unless dirent.dir? + dirent + end + private :dirent_from_path + + def open path + dir = Dir.new path, entries(path) + if block_given? + yield dir + else + dir + end + end + + # as for file, explicit alias to inhibit block + def new path + open path + end + + # pwd is always stored without the trailing slash. we handle + # the root case here + def pwd + if @pwd.empty? + '/' + else + @pwd + end + end + alias getwd :pwd + + def chdir orig_path + # make path absolute, squeeze slashes, and remove trailing slash + path = @ole.file.expand_path(orig_path).gsub(/\/+/, '/').sub(/\/$/, '') + # this is just for the side effects of the exceptions if invalid + dirent_from_path path, orig_path + if block_given? + old_pwd = @pwd + begin + @pwd = path + yield + ensure + @pwd = old_pwd + end + else + @pwd = path + 0 + end + end + + def entries path + dirent = dirent_from_path path + # Not sure about adding on the dots... + entries = %w[. ..] + dirent.children.map(&:name) + # do some checks about un-reachable files + seen = {} + entries.each do |n| + Log.warn "inaccessible file (filename contains slash) - #{n.inspect}" if n['/'] + Log.warn "inaccessible file (duplicate filename) - #{n.inspect}" if seen[n] + seen[n] = true + end + entries + end + + def foreach path, &block + entries(path).each(&block) + end + + # there are some other important ones, like: + # chroot (!), glob etc etc. for now, i think + def mkdir path + # as for rmdir below: + parent_path, basename = File.split @ole.file.expand_path(path) + # note that we will complain about the full path despite accessing + # the parent path. this is consistent with ::Dir + parent = dirent_from_path parent_path, path + # now, we first should ensure that it doesn't already exist + # either as a file or a directory. + raise Errno::EEXIST, path if parent/basename + parent.children << Dirent.new(@ole, :type => :dir, :name => basename) + 0 + end + + def rmdir path + dirent = dirent_from_path path + raise Errno::ENOTEMPTY, path unless dirent.children.empty? + + # now delete it, how to do that? the canonical representation that is + # maintained is the root tree, and the children array. we must remove it + # from the children array. + # we need the parent then. this sucks but anyway: + # we need to split the path. but before we can do that, we need + # to expand it first. eg. say we need the parent to unlink + # a/b/../c. the parent should be a, not a/b/.., or a/b. + parent_path, basename = File.split @ole.file.expand_path(path) + # this shouldn't be able to fail if the above didn't + parent = dirent_from_path parent_path + # note that the way this currently works, on save and repack time this will get + # reflected. to work properly, ie to make a difference now it would have to re-write + # the dirent. i think that Ole::Storage#close will handle that. and maybe include a + # #repack. + parent.children.delete dirent + 0 # hmmm. as per ::Dir ? + end + alias delete :rmdir + alias unlink :rmdir + + # note that there is nothing remotely ole specific about + # this class. it simply provides the dir like sequential access + # methods on top of an array. + # hmm, doesn't throw the IOError's on use of a closed directory... + class Dir + include Enumerable + + attr_reader :path + def initialize path, entries + @path, @entries, @pos = path, entries, 0 + @closed = false + end + + def pos + raise IOError if @closed + @pos + end + + def each(&block) + raise IOError if @closed + @entries.each(&block) + end + + def close + @closed = true + end + + def read + raise IOError if @closed + @entries[pos] + ensure + @pos += 1 if pos < @entries.length + end + + def pos= pos + raise IOError if @closed + @pos = [[0, pos].max, @entries.length].min + end + + def rewind + raise IOError if @closed + @pos = 0 + end + + alias tell :pos + alias seek :pos= + end + end + end +end + diff --git a/vendor/ruby-ole/lib/ole/storage/meta_data.rb b/vendor/ruby-ole/lib/ole/storage/meta_data.rb new file mode 100644 index 000000000..be84037df --- /dev/null +++ b/vendor/ruby-ole/lib/ole/storage/meta_data.rb @@ -0,0 +1,148 @@ +require 'ole/types/property_set' + +module Ole + class Storage + # + # The MetaData class is designed to be high level interface to all the + # underlying meta data stored within different sections, themselves within + # different property set streams. + # + # With this class, you can simply get properties using their names, without + # needing to know about the underlying guids, property ids etc. + # + # Example: + # + # Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author } + # + # TODO: + # + # * add write support + # * fix some of the missing type coercion (eg FileTime) + # * maybe add back the ability to access individual property sets as a unit + # directly. ie <tt>ole.summary_information</tt>. Is this useful? + # * full key support, for unknown keys, like + # <tt>ole.meta_data[myguid, myid]</tt>. probably needed for user-defined + # properties too. + # + class MetaData + include Enumerable + + FILE_MAP = { + Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation", + Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation" + } + + FORMAT_MAP = { + 'MSWordDoc' => :doc + } + + CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}" + CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}" + CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}" + CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}" + + CLSID_MAP = { + CLSID_EXCEL97 => :xls, + CLSID_EXCEL95 => :xls, + CLSID_WORD97 => :doc, + CLSID_WORD95 => :doc + } + + MIME_TYPES = { + :xls => 'application/vnd.ms-excel', + :doc => 'application/msword', + :ppt => 'application/vnd.ms-powerpoint', + # not registered at IANA, but seems most common usage + :msg => 'application/vnd.ms-outlook', + # this is my default fallback option. also not registered at IANA. + # file(1)'s default is application/msword, which is useless... + nil => 'application/x-ole-storage' + } + + def initialize ole + @ole = ole + end + + # i'm thinking of making file_format and mime_type available through + # #[], #each, and #to_h also, as calculated meta data (not assignable) + + def comp_obj + return {} unless dirent = @ole.root["\001CompObj"] + data = dirent.read + # see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html + # compobj_version: 0x0001 + # byte_order: 0xffe + # windows_version: 0x00000a03 (win31 apparently) + # marker: 0xffffffff + compobj_version, byte_order, windows_version, marker, clsid = + data.unpack("vvVVa#{Types::Clsid::SIZE}") + strings = [] + i = 28 + while i < data.length + len = data[i, 4].unpack('V').first + i += 4 + strings << data[i, len - 1] + i += len + end + # in the unknown chunk, you usually see something like 'Word.Document.6' + {:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]} + end + private :comp_obj + + def file_format + comp_obj[:file_format] + end + + def mime_type + # based on the CompObj stream contents + type = FORMAT_MAP[file_format] + return MIME_TYPES[type] if type + + # based on the root clsid + type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)] + return MIME_TYPES[type] if type + + # fallback to heuristics + has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten] + return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0'] + return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document'] + return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book'] + + MIME_TYPES[nil] + end + + def [] key + pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil + file = FILE_MAP[pair.first] or return nil + dirent = @ole.root[file] or return nil + dirent.open { |io| return Types::PropertySet.new(io)[key] } + end + + def []= key, value + raise NotImplementedError, 'meta data writes not implemented' + end + + def each(&block) + FILE_MAP.values.each do |file| + dirent = @ole.root[file] or next + dirent.open { |io| Types::PropertySet.new(io).each(&block) } + end + end + + def to_h + inject({}) { |hash, (name, value)| hash.update name.to_sym => value } + end + + def method_missing name, *args, &block + return super unless args.empty? + pair = Types::PropertySet::PROPERTY_MAP[name.to_s] or return super + self[name] + end + end + + def meta_data + @meta_data ||= MetaData.new(self) + end + end +end + diff --git a/vendor/ruby-ole/lib/ole/support.rb b/vendor/ruby-ole/lib/ole/support.rb new file mode 100644 index 000000000..bbb0bbe68 --- /dev/null +++ b/vendor/ruby-ole/lib/ole/support.rb @@ -0,0 +1,256 @@ +# +# A file with general support functions used by most files in the project. +# +# These are the only methods added to other classes. +# + +require 'logger' +require 'stringio' +require 'enumerator' + +class String # :nodoc: + # plural of String#index. returns all offsets of +string+. rename to indices? + # + # note that it doesn't check for overlapping values. + def indexes string + # in some ways i'm surprised that $~ works properly in this case... + to_enum(:scan, /#{Regexp.quote string}/m).map { $~.begin 0 } + end + + def each_chunk size + (length / size.to_f).ceil.times { |i| yield self[i * size, size] } + end +end + +class File # :nodoc: + # for interface consistency with StringIO etc (rather than adding #stat + # to them). used by RangesIO. + def size + stat.size + end +end + +class Symbol # :nodoc: + unless :x.respond_to? :to_proc + def to_proc + proc { |a| a.send self } + end + end +end + +module Enumerable # :nodoc: + unless [].respond_to? :group_by + # 1.9 backport + def group_by + hash = Hash.new { |h, key| h[key] = [] } + each { |item| hash[yield(item)] << item } + hash + end + end + + unless [].respond_to? :sum + def sum initial=0 + inject(initial) { |a, b| a + b } + end + end +end + +# move to support? +class IO # :nodoc: + # Copy data from IO-like object +src+, to +dst+ + def self.copy src, dst + until src.eof? + buf = src.read(4096) + dst.write buf + end + end +end + +class Logger # :nodoc: + # A helper method for creating a +Logger+ which produce call stack + # in their output + def self.new_with_callstack logdev=STDERR + log = Logger.new logdev + log.level = WARN + log.formatter = proc do |severity, time, progname, msg| + # find where we were called from, in our code + callstack = caller.dup + callstack.shift while callstack.first =~ /\/logger\.rb:\d+:in/ + from = callstack.first.sub(/:in `(.*?)'/, ":\\1") + "[%s %s]\n%-7s%s\n" % [time.strftime('%H:%M:%S'), from, severity, msg.to_s] + end + log + end +end + +# Include this module into a class that defines #each_child. It should +# maybe use #each instead, but its easier to be more specific, and use +# an alias. +# +# I don't want to force the class to cache children (eg where children +# are loaded on request in pst), because that forces the whole tree to +# be loaded. So, the methods should only call #each_child once, and +# breadth first iteration holds its own copy of the children around. +# +# Main methods are #recursive, and #to_tree +module RecursivelyEnumerable # :nodoc: + def each_recursive_depth_first(&block) + each_child do |child| + yield child + if child.respond_to? :each_recursive_depth_first + child.each_recursive_depth_first(&block) + end + end + end + + # don't think this is actually a proper breadth first recursion. only first + # level is breadth first. + def each_recursive_breadth_first(&block) + children = [] + each_child do |child| + children << child if child.respond_to? :each_recursive_breadth_first + yield child + end + children.each { |child| child.each_recursive_breadth_first(&block) } + end + + def each_recursive mode=:depth_first, &block + # we always actually yield ourself (the tree root) before recursing + yield self + send "each_recursive_#{mode}", &block + end + + # the idea of this function, is to allow use of regular Enumerable methods + # in a recursive fashion. eg: + # + # # just looks at top level children + # root.find { |child| child.some_condition? } + # # recurse into all children getting non-folders, breadth first + # root.recursive(:breadth_first).select { |child| !child.folder? } + # # just get everything + # items = root.recursive.to_a + # + def recursive mode=:depth_first + to_enum(:each_recursive, mode) + end + + # streams a "tree" form of the recursively enumerable structure to +io+, or + # return a string form instead if +io+ is not specified. + # + # mostly a debugging aid. can specify a different block which will be called + # to provide the string form for each node. + def to_tree io='', &inspect + inspect ||= :inspect.to_proc + io << "- #{inspect[self]}\n" + recurse = proc do |node, prefix| + child = nil + node.each_child do |next_child| + if child + io << "#{prefix}|- #{inspect[child]}\n" + recurse.call child, prefix + '| ' + end + child = next_child + end if node.respond_to?(:each_child) + if child + io << "#{prefix}\\- #{inspect[child]}\n" + recurse.call child, prefix + ' ' + end + end + recurse.call self, ' ' + io + end +end + +# can include File::Constants +class IO + # this is for jruby + include File::Constants unless defined?(RDONLY) + + # nabbed from rubinius, and modified + def self.parse_mode mode + ret = 0 + + case mode[0, 1] + when 'r'; ret |= RDONLY + when 'w'; ret |= WRONLY | CREAT | TRUNC + when 'a'; ret |= WRONLY | CREAT | APPEND + else raise ArgumentError, "illegal access mode #{mode}" + end + + (1...mode.length).each do |i| + case mode[i, 1] + when '+'; ret = (ret & ~(RDONLY | WRONLY)) | RDWR + when 'b'; ret |= Mode::BINARY + else raise ArgumentError, "illegal access mode #{mode}" + end + end + + ret + end + + class Mode + # ruby 1.9 defines binary as 0, which isn't very helpful. + # its 4 in rubinius. no longer using + # + # BINARY = 0x4 unless defined?(BINARY) + # + # for that reason, have my own constants module here + module Constants + include File::Constants + BINARY = 0x4 + end + + include Constants + NAMES = %w[rdonly wronly rdwr creat trunc append binary] + + attr_reader :flags + def initialize flags + flags = IO.parse_mode flags.to_str if flags.respond_to? :to_str + raise ArgumentError, "invalid flags - #{flags.inspect}" unless Fixnum === flags + @flags = flags + end + + def writeable? + #(@flags & RDONLY) == 0 + (@flags & 0x3) != RDONLY + end + + def readable? + (@flags & WRONLY) == 0 + end + + def truncate? + (@flags & TRUNC) != 0 + end + + def append? + (@flags & APPEND) != 0 + end + + def create? + (@flags & CREAT) != 0 + end + + def binary? + (@flags & BINARY) != 0 + end + +=begin + # revisit this + def apply io + if truncate? + io.truncate 0 + elsif append? + io.seek IO::SEEK_END, 0 + end + end +=end + + def inspect + names = NAMES.map { |name| name if (flags & Mode.const_get(name.upcase)) != 0 } + names.unshift 'rdonly' if (flags & 0x3) == 0 + "#<#{self.class} #{names.compact * '|'}>" + end + end +end + diff --git a/vendor/ruby-ole/lib/ole/types.rb b/vendor/ruby-ole/lib/ole/types.rb new file mode 100644 index 000000000..95616927a --- /dev/null +++ b/vendor/ruby-ole/lib/ole/types.rb @@ -0,0 +1,2 @@ +require 'ole/types/base' +require 'ole/types/property_set' diff --git a/vendor/ruby-ole/lib/ole/types/base.rb b/vendor/ruby-ole/lib/ole/types/base.rb new file mode 100644 index 000000000..31e7b24e9 --- /dev/null +++ b/vendor/ruby-ole/lib/ole/types/base.rb @@ -0,0 +1,251 @@ +require 'iconv' +require 'date' + +require 'ole/base' + +module Ole # :nodoc: + # + # The Types module contains all the serialization and deserialization code for standard ole + # types. + # + # It also defines all the variant type constants, and symbolic names. + # + module Types + # for anything that we don't have serialization code for + class Data < String + def self.load str + new str + end + + def self.dump str + str.to_s + end + end + + class Lpstr < String + def self.load str + # not sure if its always there, but there is often a trailing + # null byte. + new str.chomp(0.chr) + end + + def self.dump str + # do i need to append the null byte? + str.to_s + end + end + + # for VT_LPWSTR + class Lpwstr < String + FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le' + TO_UTF16 = Iconv.new 'utf-16le', 'utf-8' + + def self.load str + new FROM_UTF16.iconv(str).chomp(0.chr) + end + + def self.dump str + # need to append nulls? + data = TO_UTF16.iconv str + # not sure if this is the recommended way to do it, but I want to treat + # the resulting utf16 data as regular bytes, not characters. + data.force_encoding Encoding::US_ASCII if data.respond_to? :encoding + data + end + end + + # for VT_FILETIME + class FileTime < DateTime + SIZE = 8 + EPOCH = new 1601, 1, 1 + + # Create a +DateTime+ object from a struct +FILETIME+ + # (http://msdn2.microsoft.com/en-us/library/ms724284.aspx). + # + # Converts +str+ to two 32 bit time values, comprising the high and low 32 bits of + # the 100's of nanoseconds since 1st january 1601 (Epoch). + def self.load str + low, high = str.to_s.unpack 'V2' + # we ignore these, without even warning about it + return nil if low == 0 and high == 0 + # switched to rational, and fixed the off by 1 second error i sometimes got. + # time = EPOCH + (high * (1 << 32) + low) / 1e7 / 86400 rescue return + # use const_get to ensure we can return anything which subclasses this (VT_DATE?) + const_get('EPOCH') + Rational(high * (1 << 32) + low, 1e7.to_i * 86400) rescue return + # extra sanity check... + #unless (1800...2100) === time.year + # Log.warn "ignoring unlikely time value #{time.to_s}" + # return nil + #end + #time + end + + # +time+ should be able to be either a Time, Date, or DateTime. + def self.dump time + # i think i'll convert whatever i get to be a datetime, because of + # the covered range. + return 0.chr * SIZE unless time + time = time.send(:to_datetime) if Time === time + # don't bother to use const_get here + bignum = (time - EPOCH) * 86400 * 1e7.to_i + high, low = bignum.divmod 1 << 32 + [low, high].pack 'V2' + end + + def inspect + "#<#{self.class} #{to_s}>" + end + end + + # for VT_CLSID + # Unlike most of the other conversions, the Guid's are serialized/deserialized by actually + # doing nothing! (eg, _load & _dump are null ops) + # Rather, its just a string with a different inspect string, and it includes a + # helper method for creating a Guid from that readable form (#format). + class Clsid < String + SIZE = 16 + PACK = 'V v v CC C6' + + def self.load str + new str.to_s + end + + def self.dump guid + return 0.chr * SIZE unless guid + # allow use of plain strings in place of guids. + guid['-'] ? parse(guid) : guid + end + + def self.parse str + vals = str.scan(/[a-f\d]+/i).map(&:hex) + if vals.length == 5 + # this is pretty ugly + vals[3] = ('%04x' % vals[3]).scan(/../).map(&:hex) + vals[4] = ('%012x' % vals[4]).scan(/../).map(&:hex) + guid = new vals.flatten.pack(PACK) + return guid if guid.format.delete('{}') == str.downcase.delete('{}') + end + raise ArgumentError, 'invalid guid - %p' % str + end + + def format + "%08x-%04x-%04x-%02x%02x-#{'%02x' * 6}" % unpack(PACK) + end + + def inspect + "#<#{self.class}:{#{format}}>" + end + end + + # + # The OLE variant types, extracted from + # http://www.marin.clara.net/COM/variant_type_definitions.htm. + # + # A subset is also in WIN32OLE::VARIANT, but its not cross platform (obviously). + # + # Use like: + # + # p Ole::Types::Variant::NAMES[0x001f] => 'VT_LPWSTR' + # p Ole::Types::VT_DATE # => 7 + # + # The serialization / deserialization functions should be fixed to make it easier + # to work with. like + # + # Ole::Types.from_str(VT_DATE, data) # and + # Ole::Types.to_str(VT_DATE, data) + # + # Or similar, rather than having to do VT_* <=> ad hoc class name etc as it is + # currently. + # + module Variant + NAMES = { + 0x0000 => 'VT_EMPTY', + 0x0001 => 'VT_NULL', + 0x0002 => 'VT_I2', + 0x0003 => 'VT_I4', + 0x0004 => 'VT_R4', + 0x0005 => 'VT_R8', + 0x0006 => 'VT_CY', + 0x0007 => 'VT_DATE', + 0x0008 => 'VT_BSTR', + 0x0009 => 'VT_DISPATCH', + 0x000a => 'VT_ERROR', + 0x000b => 'VT_BOOL', + 0x000c => 'VT_VARIANT', + 0x000d => 'VT_UNKNOWN', + 0x000e => 'VT_DECIMAL', + 0x0010 => 'VT_I1', + 0x0011 => 'VT_UI1', + 0x0012 => 'VT_UI2', + 0x0013 => 'VT_UI4', + 0x0014 => 'VT_I8', + 0x0015 => 'VT_UI8', + 0x0016 => 'VT_INT', + 0x0017 => 'VT_UINT', + 0x0018 => 'VT_VOID', + 0x0019 => 'VT_HRESULT', + 0x001a => 'VT_PTR', + 0x001b => 'VT_SAFEARRAY', + 0x001c => 'VT_CARRAY', + 0x001d => 'VT_USERDEFINED', + 0x001e => 'VT_LPSTR', + 0x001f => 'VT_LPWSTR', + 0x0040 => 'VT_FILETIME', + 0x0041 => 'VT_BLOB', + 0x0042 => 'VT_STREAM', + 0x0043 => 'VT_STORAGE', + 0x0044 => 'VT_STREAMED_OBJECT', + 0x0045 => 'VT_STORED_OBJECT', + 0x0046 => 'VT_BLOB_OBJECT', + 0x0047 => 'VT_CF', + 0x0048 => 'VT_CLSID', + 0x0fff => 'VT_ILLEGALMASKED', + 0x0fff => 'VT_TYPEMASK', + 0x1000 => 'VT_VECTOR', + 0x2000 => 'VT_ARRAY', + 0x4000 => 'VT_BYREF', + 0x8000 => 'VT_RESERVED', + 0xffff => 'VT_ILLEGAL' + } + + CLASS_MAP = { + # haven't seen one of these. wonder if its same as FILETIME? + #'VT_DATE' => ?, + 'VT_LPSTR' => Lpstr, + 'VT_LPWSTR' => Lpwstr, + 'VT_FILETIME' => FileTime, + 'VT_CLSID' => Clsid + } + + module Constants + NAMES.each { |num, name| const_set name, num } + end + + def self.load type, str + type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type + (CLASS_MAP[type] || Data).load str + end + + def self.dump type, variant + type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type + (CLASS_MAP[type] || Data).dump variant + end + end + + include Variant::Constants + + # deprecated aliases, kept mostly for the benefit of ruby-msg, until + # i release a new version. + def self.load_guid str + Variant.load VT_CLSID, str + end + + def self.load_time str + Variant.load VT_FILETIME, str + end + + FROM_UTF16 = Lpwstr::FROM_UTF16 + TO_UTF16 = Lpwstr::TO_UTF16 + end +end + diff --git a/vendor/ruby-ole/lib/ole/types/property_set.rb b/vendor/ruby-ole/lib/ole/types/property_set.rb new file mode 100644 index 000000000..b8d85acba --- /dev/null +++ b/vendor/ruby-ole/lib/ole/types/property_set.rb @@ -0,0 +1,165 @@ +require 'ole/types' +require 'yaml' + +module Ole + module Types + # + # The PropertySet class currently supports readonly access to the properties + # serialized in "property set" streams, such as the file "\005SummaryInformation", + # in OLE files. + # + # Think it has its roots in MFC property set serialization. + # + # See http://poi.apache.org/hpsf/internals.html for details + # + class PropertySet + HEADER_SIZE = 28 + HEADER_PACK = "vvVa#{Clsid::SIZE}V" + OS_MAP = { + 0 => :win16, + 1 => :mac, + 2 => :win32, + 0x20001 => :ooffice, # open office on linux... + } + + # define a smattering of the property set guids. + DATA = YAML.load_file(File.dirname(__FILE__) + '/../../../data/propids.yaml'). + inject({}) { |hash, (key, value)| hash.update Clsid.parse(key) => value } + + # create an inverted map of names to guid/key pairs + PROPERTY_MAP = DATA.inject({}) do |h1, (guid, data)| + data[1].inject(h1) { |h2, (id, name)| h2.update name => [guid, id] } + end + + module Constants + DATA.each { |guid, (name, map)| const_set name, guid } + end + + include Constants + include Enumerable + + class Section + include Variant::Constants + include Enumerable + + SIZE = Clsid::SIZE + 4 + PACK = "a#{Clsid::SIZE}v" + + attr_accessor :guid, :offset + attr_reader :length + + def initialize str, property_set + @property_set = property_set + @guid, @offset = str.unpack PACK + self.guid = Clsid.load guid + load_header + end + + def io + @property_set.io + end + + def load_header + io.seek offset + @byte_size, @length = io.read(8).unpack 'V2' + end + + def [] key + each_raw do |id, property_offset| + return read_property(property_offset).last if key == id + end + nil + end + + def []= key, value + raise NotImplementedError, 'section writes not yet implemented' + end + + def each + each_raw do |id, property_offset| + yield id, read_property(property_offset).last + end + end + + private + + def each_raw + io.seek offset + 8 + io.read(length * 8).each_chunk(8) { |str| yield(*str.unpack('V2')) } + end + + def read_property property_offset + io.seek offset + property_offset + type, value = io.read(8).unpack('V2') + # is the method of serialization here custom? + case type + when VT_LPSTR, VT_LPWSTR + value = Variant.load type, io.read(value) + # .... + end + [type, value] + end + end + + attr_reader :io, :signature, :unknown, :os, :guid, :sections + + def initialize io + @io = io + load_header io.read(HEADER_SIZE) + load_section_list io.read(@num_sections * Section::SIZE) + # expect no gap between last section and start of data. + #Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min + end + + def load_header str + @signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_PACK + # should i check that unknown == 0? it usually is. so is the guid actually + @guid = Clsid.load @guid + @os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}") + end + + def load_section_list str + @sections = str.to_enum(:each_chunk, Section::SIZE).map { |s| Section.new s, self } + end + + def [] key + pair = PROPERTY_MAP[key.to_s] or return nil + section = @sections.find { |s| s.guid == pair.first } or return nil + section[pair.last] + end + + def []= key, value + pair = PROPERTY_MAP[key.to_s] or return nil + section = @sections.find { |s| s.guid == pair.first } or return nil + section[pair.last] = value + end + + def method_missing name, *args, &block + if name.to_s =~ /(.*)=$/ + return super unless args.length == 1 + return super unless PROPERTY_MAP[$1] + self[$1] = args.first + else + return super unless args.length == 0 + return super unless PROPERTY_MAP[name.to_s] + self[name] + end + end + + def each + @sections.each do |section| + next unless pair = DATA[section.guid] + map = pair.last + section.each do |id, value| + name = map[id] or next + yield name, value + end + end + end + + def to_h + inject({}) { |hash, (name, value)| hash.update name.to_sym => value } + end + end + end +end |